- 将所有环境变量前缀从NANO_改为BEAVER_ - 更新README.md文档内容,包括项目介绍、组件说明和快速开始指南 - 修改.gitignore文件,添加auth-portal运行时路径排除规则 - 更新app-instance镜像标签从nano/app-instance改为beaver/app-instance - 增强技能安全检查器,支持工具前缀白名单功能 - 添加技能草稿重新检查安全性API端点 - 扩展证据选择器,收集工具调用名称用于技能学习 - 改进技能合成器,基于实际调用的工具生成工具提示 - 优化路由超时处理机制,增加重试逻辑 - 更新后端架构文档,添加可视化入口和基础概念说明 - 实现在WebSocket消息中传递工具迭代次数信息
159 lines
6.3 KiB
Python
159 lines
6.3 KiB
Python
"""LLM-backed draft synthesis for skill learning."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from typing import Any
|
|
|
|
from beaver.engine.providers.base import LLMProvider
|
|
from beaver.skills.learning.evidence import EvidencePacket
|
|
from beaver.memory.skills.models import SkillLearningCandidate
|
|
|
|
|
|
class SkillDraftSynthesizer:
|
|
async def synthesize_revision(
|
|
self,
|
|
candidate: SkillLearningCandidate,
|
|
evidence_packet: EvidencePacket,
|
|
provider: LLMProvider,
|
|
model: str,
|
|
) -> dict[str, Any]:
|
|
return await self._synthesize(candidate, evidence_packet, provider, model, "revise")
|
|
|
|
async def synthesize_new_skill(
|
|
self,
|
|
candidate: SkillLearningCandidate,
|
|
evidence_packet: EvidencePacket,
|
|
provider: LLMProvider,
|
|
model: str,
|
|
) -> dict[str, Any]:
|
|
return await self._synthesize(candidate, evidence_packet, provider, model, "new")
|
|
|
|
async def synthesize_merge(
|
|
self,
|
|
candidate: SkillLearningCandidate,
|
|
evidence_packet: EvidencePacket,
|
|
provider: LLMProvider,
|
|
model: str,
|
|
) -> dict[str, Any]:
|
|
return await self._synthesize(candidate, evidence_packet, provider, model, "merge")
|
|
|
|
async def _synthesize(
|
|
self,
|
|
candidate: SkillLearningCandidate,
|
|
evidence_packet: EvidencePacket,
|
|
provider: LLMProvider,
|
|
model: str,
|
|
action: str,
|
|
) -> dict[str, Any]:
|
|
prompt = self._build_prompt(candidate, evidence_packet, action)
|
|
response = await provider.chat(
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
"You synthesize Beaver skill drafts from execution evidence. "
|
|
"Return only JSON with keys: frontmatter, content, change_reason."
|
|
),
|
|
},
|
|
{"role": "user", "content": prompt},
|
|
],
|
|
tools=None,
|
|
model=model,
|
|
max_tokens=4096,
|
|
temperature=0,
|
|
)
|
|
payload = self._parse_payload(response.content or "")
|
|
if payload:
|
|
return self._normalize_payload(payload, evidence_packet)
|
|
return self._fallback_payload(candidate, evidence_packet, action)
|
|
|
|
@staticmethod
|
|
def _build_prompt(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> str:
|
|
tool_names = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
|
|
tool_section = ", ".join(tool_names) if tool_names else "none observed"
|
|
selected_tool_names = _coerce_string_list(evidence_packet.metadata.get("selected_tool_names"))
|
|
selected_tool_section = ", ".join(selected_tool_names) if selected_tool_names else "none recorded"
|
|
return (
|
|
f"Action: {action}\n"
|
|
f"Candidate kind: {candidate.kind}\n"
|
|
f"Reason: {candidate.reason}\n"
|
|
f"Related skills: {candidate.related_skill_names}\n"
|
|
f"Called tool names: {tool_section}\n"
|
|
f"Run-selected tool names: {selected_tool_section}\n"
|
|
f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries)
|
|
+ "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts)
|
|
+ "\n\nReturn JSON only. The frontmatter object must include:"
|
|
+ "\n- description: a concise skill description"
|
|
+ "\n- tools: an explicit JSON array of exact tool names this skill needs. "
|
|
+ "Prefer called tool names when the workflow depends on them; use run-selected tool names only when clearly required. "
|
|
+ "Use [] only when no tool is required."
|
|
)
|
|
|
|
@staticmethod
|
|
def _parse_payload(content: str) -> dict[str, Any]:
|
|
cleaned = content.strip()
|
|
if cleaned.startswith("```"):
|
|
lines = cleaned.splitlines()
|
|
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
|
cleaned = "\n".join(lines[1:-1]).strip()
|
|
try:
|
|
payload = json.loads(cleaned)
|
|
except json.JSONDecodeError:
|
|
return {}
|
|
if not isinstance(payload, dict):
|
|
return {}
|
|
frontmatter = payload.get("frontmatter")
|
|
content_value = payload.get("content")
|
|
if not isinstance(frontmatter, dict) or not isinstance(content_value, str):
|
|
return {}
|
|
return {
|
|
"frontmatter": frontmatter,
|
|
"content": content_value.strip(),
|
|
"change_reason": str(payload.get("change_reason") or ""),
|
|
}
|
|
|
|
@staticmethod
|
|
def _normalize_payload(payload: dict[str, Any], evidence_packet: EvidencePacket) -> dict[str, Any]:
|
|
frontmatter = dict(payload.get("frontmatter") or {})
|
|
tool_hints = _coerce_string_list(frontmatter.get("tools"))
|
|
if not tool_hints:
|
|
tool_hints = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
|
|
frontmatter["tools"] = tool_hints
|
|
return {
|
|
"frontmatter": frontmatter,
|
|
"content": str(payload.get("content") or "").strip(),
|
|
"change_reason": str(payload.get("change_reason") or ""),
|
|
}
|
|
|
|
@staticmethod
|
|
def _fallback_payload(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> dict[str, Any]:
|
|
related = candidate.related_skill_names[0] if candidate.related_skill_names else "generated-skill"
|
|
title = related.replace("_", "-")
|
|
content = "\n".join(f"- {item}" for item in evidence_packet.task_summaries[:5]) or "- No evidence captured."
|
|
return {
|
|
"frontmatter": {
|
|
"description": candidate.reason or f"Auto-generated {action} draft for {title}.",
|
|
"tools": _coerce_string_list(evidence_packet.metadata.get("tool_names")),
|
|
},
|
|
"content": f"# {title}\n\n## Evidence\n\n{content}\n",
|
|
"change_reason": candidate.reason or f"Fallback {action} synthesis.",
|
|
}
|
|
|
|
|
|
def _coerce_string_list(value: Any) -> list[str]:
|
|
raw_items: list[Any]
|
|
if isinstance(value, list):
|
|
raw_items = value
|
|
elif isinstance(value, str):
|
|
raw_items = value.split(",")
|
|
else:
|
|
raw_items = []
|
|
|
|
result: list[str] = []
|
|
for item in raw_items:
|
|
cleaned = str(item).strip()
|
|
if cleaned and cleaned not in result:
|
|
result.append(cleaned)
|
|
return result
|