Files
beaver_project/app-instance/backend/beaver/skills/learning/synthesizer.py
steven_li 8aeb97a5fc feat(app): 移除内置agents并添加CORS支持和技能上传优化
移除了agents/registry.json中的所有内置agents配置,将agents数组清空。
为web应用添加了CORS中间件支持,允许指定的前端地址跨域访问。
重构了技能上传功能,增加了LLM重写机制,自动规范化上传的技能格式。
新增了工具名称提取逻辑,从技能正文中自动识别Required Tools段落。
更新了技能学习候选者和草稿的载荷结构,添加评估报告统计信息。
修改了意图路由技能的说明,改进任务状态管理逻辑。
2026-06-12 13:25:20 +08:00

213 lines
9.1 KiB
Python

"""LLM-backed draft synthesis for skill learning."""
from __future__ import annotations
import json
from typing import Any
from beaver.engine.providers.base import LLMProvider
from beaver.skills.authoring import canonical_skill_format_instructions, ensure_canonical_skill_body, normalize_skill_frontmatter
from beaver.skills.learning.evidence import EvidencePacket
from beaver.memory.skills.models import SkillLearningCandidate
class SkillDraftSynthesizer:
async def synthesize_revision(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
base_skill: dict[str, Any] | None = None,
) -> dict[str, Any]:
return await self._synthesize(candidate, evidence_packet, provider, model, "revise", base_skill=base_skill)
async def synthesize_new_skill(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
) -> dict[str, Any]:
return await self._synthesize(candidate, evidence_packet, provider, model, "new", base_skill=None)
async def synthesize_merge(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
base_skill: dict[str, Any] | None = None,
) -> dict[str, Any]:
return await self._synthesize(candidate, evidence_packet, provider, model, "merge", base_skill=base_skill)
async def _synthesize(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
action: str,
*,
base_skill: dict[str, Any] | None,
) -> dict[str, Any]:
prompt = self._build_prompt(candidate, evidence_packet, action, base_skill=base_skill)
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"You synthesize Beaver skill drafts from execution evidence. "
"Return only JSON with keys: frontmatter, content, change_reason, "
"preserved_sections, changed_sections, dropped_sections. "
"The content must follow the Canonical Beaver SKILL.md format."
),
},
{"role": "user", "content": prompt},
],
tools=None,
model=model,
max_tokens=4096,
temperature=0,
)
payload = self._parse_payload(response.content or "")
if payload:
return self._normalize_payload(payload, evidence_packet)
return self._fallback_payload(candidate, evidence_packet, action)
@staticmethod
def _build_prompt(
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
action: str,
base_skill: dict[str, Any] | None = None,
) -> str:
tool_names = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
tool_section = ", ".join(tool_names) if tool_names else "none observed"
selected_tool_names = _coerce_string_list(evidence_packet.metadata.get("selected_tool_names"))
selected_tool_section = ", ".join(selected_tool_names) if selected_tool_names else "none recorded"
base_section = ""
if base_skill:
base_section = (
"\n\nBase skill snapshot:\n"
f"- skill_name: {base_skill.get('skill_name')}\n"
f"- version: {base_skill.get('version')}\n"
f"- frontmatter: {json.dumps(base_skill.get('frontmatter') or {}, ensure_ascii=False, sort_keys=True)}\n"
f"- tool_hints: {base_skill.get('tool_hints') or []}\n"
f"- summary: {base_skill.get('summary') or ''}\n"
"Base skill content:\n"
f"{base_skill.get('content') or ''}\n"
"Preserve existing instructions unless the evidence requires a change. "
"If any section is changed or dropped, explain it in changed_sections or dropped_sections."
)
return (
f"Action: {action}\n"
f"Candidate kind: {candidate.kind}\n"
f"Reason: {candidate.reason}\n"
f"Related skills: {candidate.related_skill_names}\n"
f"Called tool names: {tool_section}\n"
f"Run-selected tool names: {selected_tool_section}\n"
f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries)
+ "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts)
+ base_section
+ "\n\nReturn JSON only. The frontmatter object must include:"
+ "\n- description: a concise skill description"
+ "\n- tools: an explicit JSON array of exact tool names this skill needs. "
+ "Prefer called tool names when the workflow depends on them; use run-selected tool names only when clearly required. "
+ "Use [] only when no tool is required."
+ "\n\n" + canonical_skill_format_instructions()
+ "\nThe JSON may include preserved_sections, changed_sections, and dropped_sections arrays."
)
@staticmethod
def _parse_payload(content: str) -> dict[str, Any]:
cleaned = content.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
try:
payload = json.loads(cleaned)
except json.JSONDecodeError:
return {}
if not isinstance(payload, dict):
return {}
frontmatter = payload.get("frontmatter")
content_value = payload.get("content")
if not isinstance(frontmatter, dict) or not isinstance(content_value, str):
return {}
return {
"frontmatter": frontmatter,
"content": content_value.strip(),
"change_reason": str(payload.get("change_reason") or ""),
"preserved_sections": _coerce_string_list(payload.get("preserved_sections")),
"changed_sections": _coerce_string_list(payload.get("changed_sections")),
"dropped_sections": _coerce_string_list(payload.get("dropped_sections")),
}
@staticmethod
def _normalize_payload(payload: dict[str, Any], evidence_packet: EvidencePacket) -> dict[str, Any]:
frontmatter = normalize_skill_frontmatter(
dict(payload.get("frontmatter") or {}),
skill_name=str((payload.get("frontmatter") or {}).get("name") or "generated-skill"),
)
tool_hints = _coerce_string_list(frontmatter.get("tools"))
if not tool_hints:
tool_hints = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
frontmatter["tools"] = tool_hints
content = ensure_canonical_skill_body(
str(payload.get("content") or "").strip(),
title=str(frontmatter.get("name") or "generated-skill"),
description=str(frontmatter.get("description") or ""),
tools=tool_hints,
)
return {
"frontmatter": frontmatter,
"content": content,
"change_reason": str(payload.get("change_reason") or ""),
"preserved_sections": _coerce_string_list(payload.get("preserved_sections")),
"changed_sections": _coerce_string_list(payload.get("changed_sections")),
"dropped_sections": _coerce_string_list(payload.get("dropped_sections")),
}
@staticmethod
def _fallback_payload(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> dict[str, Any]:
related = candidate.related_skill_names[0] if candidate.related_skill_names else "generated-skill"
title = related.replace("_", "-")
tools = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
content = ensure_canonical_skill_body(
"\n".join(f"- {item}" for item in evidence_packet.task_summaries[:5]) or "- No evidence captured.",
title=title,
description=candidate.reason or f"Auto-generated {action} draft for {title}.",
tools=tools,
)
return {
"frontmatter": {
"name": title,
"description": candidate.reason or f"Auto-generated {action} draft for {title}.",
"tools": tools,
},
"content": content,
"change_reason": candidate.reason or f"Fallback {action} synthesis.",
"preserved_sections": [],
"changed_sections": [],
"dropped_sections": [],
}
def _coerce_string_list(value: Any) -> list[str]:
raw_items: list[Any]
if isinstance(value, list):
raw_items = value
elif isinstance(value, str):
raw_items = value.split(",")
else:
raw_items = []
result: list[str] = []
for item in raw_items:
cleaned = str(item).strip()
if cleaned and cleaned not in result:
result.append(cleaned)
return result