Files
beaver_project/app-instance/backend/beaver/skills/authoring/format.py
steven_li 8aeb97a5fc feat(app): 移除内置agents并添加CORS支持和技能上传优化
移除了agents/registry.json中的所有内置agents配置,将agents数组清空。
为web应用添加了CORS中间件支持,允许指定的前端地址跨域访问。
重构了技能上传功能,增加了LLM重写机制,自动规范化上传的技能格式。
新增了工具名称提取逻辑,从技能正文中自动识别Required Tools段落。
更新了技能学习候选者和草稿的载荷结构,添加评估报告统计信息。
修改了意图路由技能的说明,改进任务状态管理逻辑。
2026-06-12 13:25:20 +08:00

251 lines
8.7 KiB
Python

"""Canonical Beaver skill authoring format."""
from __future__ import annotations
import json
import re
from typing import Any
from beaver.skills.catalog.utils import extract_required_tool_names
CANONICAL_SKILL_SECTION_HEADINGS: tuple[str, ...] = (
"## Overview",
"## When to Use",
"## Required Tools",
"## Workflow",
"## Validation",
"## Boundaries",
"## Anti-Patterns",
)
def canonical_skill_format_instructions() -> str:
headings = "\n".join(f"- {heading}" for heading in CANONICAL_SKILL_SECTION_HEADINGS)
return (
"Canonical Beaver SKILL.md format:\n"
"1. Return a frontmatter object with `name`, `description`, and `tools`.\n"
"2. `name` must be lowercase kebab-case. `description` must explain when the skill should be used.\n"
"3. `tools` must be an explicit JSON array of exact runtime tool names. Use [] only if no tool is required.\n"
"4. The Markdown content must start with one H1 title and include these H2 sections in this exact order:\n"
f"{headings}\n"
"5. Write concrete operational guidance, not a story about a past task.\n"
"6. Include validation steps and anti-patterns so future runs know how to avoid false completion."
)
def normalize_skill_frontmatter(frontmatter: dict[str, Any] | None, *, skill_name: str) -> dict[str, Any]:
raw = dict(frontmatter or {})
name = _slug(str(raw.get("name") or skill_name))
description = str(raw.get("description") or f"Use when {name} guidance is needed.").strip()
tools = _coerce_string_list(raw.get("tools"))
normalized = {}
for key, value in raw.items():
if key in {"name", "description", "tools"}:
continue
if key in {"always", "internal"} and isinstance(value, str):
normalized[key] = value.strip().lower() in {"1", "true", "yes", "on"}
continue
normalized[key] = value
return {
"name": name,
"description": description,
"tools": tools,
**normalized,
}
def is_canonical_skill_body(body: str) -> bool:
text = body.strip()
if not re.search(r"^#\s+\S", text, flags=re.MULTILINE):
return False
position = 0
for heading in CANONICAL_SKILL_SECTION_HEADINGS:
found = text.find(heading, position)
if found < 0:
return False
position = found + len(heading)
return True
def ensure_canonical_skill_body(
body: str,
*,
title: str,
description: str = "",
tools: list[str] | None = None,
) -> str:
if is_canonical_skill_body(body):
normalized = body.strip()
if tools:
normalized = _replace_required_tools_section(normalized, tools)
return normalized + "\n"
source = _compact_source_guidance(body)
overview = description or source or f"Use this skill for {title}."
return canonicalize_skill_body(
title=title,
overview=overview,
tools=list(tools or []),
workflow=[
"Identify whether the user's request matches the skill's trigger conditions.",
"Read the relevant source guidance below and apply only the steps that fit the current task.",
"Use the required tools deliberately and keep tool output tied to the user's goal.",
],
validation=[
"Verify the requested outcome with the most direct available check.",
"Report any skipped step, unavailable dependency, or remaining uncertainty explicitly.",
],
boundaries=[
"Do not broaden the task beyond the user's request.",
"Do not use tools that are not listed or clearly available in the current runtime.",
],
anti_patterns=[
"Do not summarize the skill instead of applying it.",
"Do not claim completion without validation evidence.",
],
source_guidance=source,
)
def canonicalize_skill_body(
*,
title: str,
overview: str,
tools: list[str] | None = None,
workflow: list[str] | None = None,
validation: list[str] | None = None,
boundaries: list[str] | None = None,
anti_patterns: list[str] | None = None,
when_to_use: list[str] | None = None,
source_guidance: str = "",
) -> str:
cleaned_title = _title(title)
tool_lines = _tool_lines(tools or [])
workflow_lines = _bullet_lines(workflow or ["Follow the workflow described by the current task and evidence."])
validation_lines = _bullet_lines(validation or ["Validate the result before reporting completion."])
boundary_lines = _bullet_lines(boundaries or ["Stay within the current task and workspace boundaries."])
anti_pattern_lines = _bullet_lines(anti_patterns or ["Do not skip validation."])
when_lines = _bullet_lines(when_to_use or [f"Use when the task requires {cleaned_title} guidance."])
source_section = f"\n\n### Source Guidance\n\n{source_guidance.strip()}" if source_guidance.strip() else ""
return (
f"# {cleaned_title}\n\n"
"## Overview\n\n"
f"{overview.strip() or f'Use this skill for {cleaned_title}.'}\n\n"
"## When to Use\n\n"
f"{when_lines}\n\n"
"## Required Tools\n\n"
f"{tool_lines}\n\n"
"## Workflow\n\n"
f"{workflow_lines}{source_section}\n\n"
"## Validation\n\n"
f"{validation_lines}\n\n"
"## Boundaries\n\n"
f"{boundary_lines}\n\n"
"## Anti-Patterns\n\n"
f"{anti_pattern_lines}\n"
)
def parse_skill_rewrite_json(content: str, *, skill_name: str) -> dict[str, Any] | None:
cleaned = content.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
try:
payload = json.loads(cleaned)
except json.JSONDecodeError:
return None
if not isinstance(payload, dict):
return None
frontmatter = payload.get("frontmatter")
body = payload.get("content")
if not isinstance(frontmatter, dict) or not isinstance(body, str):
return None
normalized = normalize_skill_frontmatter(frontmatter, skill_name=skill_name)
normalized["tools"] = _merge_string_lists(
normalized.get("tools"),
extract_required_tool_names(body),
)
normalized_body = ensure_canonical_skill_body(
body,
title=normalized["name"],
description=normalized["description"],
tools=normalized["tools"],
)
return {
"frontmatter": normalized,
"content": normalized_body,
"change_reason": str(payload.get("change_reason") or ""),
}
def _compact_source_guidance(body: str, *, max_chars: int = 20000) -> str:
text = body.strip()
if not text:
return ""
text = re.sub(r"^---\n.*?\n---\n?", "", text, flags=re.DOTALL).strip()
text = re.sub(r"\n{3,}", "\n\n", text)
text = re.sub(r"^(#{1,4})\s+", r"##\1 ", text, flags=re.MULTILINE)
return text[:max_chars].rstrip()
def _tool_lines(tools: list[str]) -> str:
if not tools:
return "- No dedicated tools are required."
return "\n".join(f"- `{tool}`" for tool in tools)
def _bullet_lines(items: list[str]) -> str:
cleaned = [str(item).strip() for item in items if str(item).strip()]
if not cleaned:
return "- No additional guidance."
return "\n".join(f"- {item}" for item in cleaned)
def _coerce_string_list(value: Any) -> list[str]:
if isinstance(value, list):
raw_items = value
elif isinstance(value, str):
raw_items = value.split(",")
else:
raw_items = []
result: list[str] = []
for item in raw_items:
cleaned = str(item).strip()
if cleaned and cleaned not in result:
result.append(cleaned)
return result
def _merge_string_lists(*values: Any) -> list[str]:
result: list[str] = []
for value in values:
for item in _coerce_string_list(value):
if item not in result:
result.append(item)
return result
def _replace_required_tools_section(body: str, tools: list[str]) -> str:
replacement = "## Required Tools\n\n" + _tool_lines(tools)
updated, count = re.subn(
r"(?ms)^##\s+Required\s+Tools\s*\n.*?(?=^##\s+|\Z)",
replacement + "\n\n",
body.strip(),
count=1,
)
return updated.strip() if count else body.strip()
def _slug(value: str) -> str:
text = value.strip().lower()
text = re.sub(r"[^a-z0-9-]+", "-", text)
text = re.sub(r"-{2,}", "-", text).strip("-")
return text or "generated-skill"
def _title(value: str) -> str:
cleaned = str(value or "").strip().replace("-", " ")
return cleaned.title() if cleaned else "Generated Skill"