集成新的Beaver后端服务到应用实例中,替换原有的nanobot实现。 主要变更包括: - 在Dockerfile和环境配置中添加Beaver相关路径和配置变量 - 更新工作目录结构从.nanobot到.beaver - 实现Beaver引擎加载器,支持配置文件加载和工具组装 - 添加内置工具如ListDirectoryTool、ReadFileTool、SearchFilesTool - 更新消息处理流程,支持通道适配器和网关模式 - 重构技能系统,支持显式工具提示和嵌入式检索 - 改进错误处理和生命周期管理 此变更使应用实例能够使用统一的Beaver后端进行AI代理运行时管理。
174 lines
5.9 KiB
Python
174 lines
5.9 KiB
Python
"""LLM-driven skill assembler.
|
|
|
|
这层现在不再自己做规则打分,而是直接把:
|
|
1. task description
|
|
2. embedding 召回后的候选 skill 摘要
|
|
|
|
交给一个模型来决定本轮要激活哪些 skill。
|
|
|
|
当前目标非常克制:
|
|
- 输入尽量简单
|
|
- 输出只要 skill 名称
|
|
- 没有命中就返回空 skills
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
import json
|
|
from typing import Any
|
|
|
|
from beaver.engine.context import SkillContext
|
|
from beaver.engine.providers.base import LLMProvider
|
|
from beaver.engine.providers.runtime import ProviderRuntime
|
|
from beaver.skills.catalog.loader import SkillsLoader
|
|
from beaver.skills.catalog.utils import strip_frontmatter
|
|
from .embedding_retriever import SkillEmbeddingRetriever
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class SkillAssemblyResult:
|
|
"""一次装配后真正要注入当前 run 的 skills。"""
|
|
|
|
activated_skills: list[SkillContext] = field(default_factory=list)
|
|
|
|
|
|
class SkillAssembler:
|
|
"""用 LLM 根据 task description 选择当前 run 的 skills。"""
|
|
|
|
def __init__(
|
|
self,
|
|
loader: SkillsLoader,
|
|
retriever: SkillEmbeddingRetriever | None = None,
|
|
) -> None:
|
|
self.loader = loader
|
|
self.retriever = retriever or SkillEmbeddingRetriever()
|
|
|
|
async def assemble(
|
|
self,
|
|
*,
|
|
task_description: str,
|
|
provider: LLMProvider,
|
|
model: str,
|
|
embedding_runtime: ProviderRuntime | None = None,
|
|
top_k: int = 12,
|
|
) -> SkillAssemblyResult:
|
|
candidates = self.loader.build_selection_candidates()
|
|
if not candidates:
|
|
return SkillAssemblyResult()
|
|
candidates = await self.retriever.retrieve(
|
|
query=task_description,
|
|
candidates=candidates,
|
|
top_k=top_k,
|
|
api_key=embedding_runtime.api_key if embedding_runtime is not None else None,
|
|
api_base=embedding_runtime.api_base if embedding_runtime is not None else None,
|
|
model=embedding_runtime.model if embedding_runtime is not None else None,
|
|
extra_headers=embedding_runtime.extra_headers if embedding_runtime is not None else None,
|
|
timeout_seconds=(
|
|
embedding_runtime.request_timeout_seconds if embedding_runtime is not None else None
|
|
),
|
|
fallback_top_k=None,
|
|
)
|
|
if not candidates:
|
|
return SkillAssemblyResult()
|
|
|
|
selected_names = await self._select_skill_names(
|
|
task_description=task_description,
|
|
candidates=candidates,
|
|
provider=provider,
|
|
model=model,
|
|
)
|
|
if not selected_names:
|
|
return SkillAssemblyResult()
|
|
|
|
activated_skills: list[SkillContext] = []
|
|
for name in selected_names:
|
|
raw_content = self.loader.load_skill(name)
|
|
content = strip_frontmatter(raw_content).strip() if raw_content else ""
|
|
if not content:
|
|
continue
|
|
activated_skills.append(SkillContext(name=name, content=content))
|
|
|
|
return SkillAssemblyResult(activated_skills=activated_skills)
|
|
|
|
async def _select_skill_names(
|
|
self,
|
|
*,
|
|
task_description: str,
|
|
candidates: list[dict[str, str]],
|
|
provider: LLMProvider,
|
|
model: str,
|
|
) -> list[str]:
|
|
candidate_summary = self._render_candidates(candidates)
|
|
candidate_names = {item["name"] for item in candidates}
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
"You select Beaver skills for a single run. "
|
|
"Given a task description and candidate skill summaries, "
|
|
"return only a JSON array of skill names to activate. "
|
|
"Do not invent names. If nothing matches, return []."
|
|
),
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
f"Task description:\n{task_description}\n\n"
|
|
f"Candidate skills:\n{candidate_summary}\n\n"
|
|
"Return only JSON, for example: [\"skill-a\", \"skill-b\"]"
|
|
),
|
|
},
|
|
]
|
|
response = await provider.chat(
|
|
messages=messages,
|
|
tools=None,
|
|
model=model,
|
|
max_tokens=512,
|
|
temperature=0,
|
|
)
|
|
if response.finish_reason == "error" or not response.content:
|
|
return []
|
|
|
|
parsed = self._parse_selected_names(response.content)
|
|
if not parsed:
|
|
return []
|
|
|
|
# 只保留当前候选集中真实存在的 skill 名称,并维持模型输出顺序。
|
|
filtered: list[str] = []
|
|
for name in parsed:
|
|
if name in candidate_names and name not in filtered:
|
|
filtered.append(name)
|
|
return filtered
|
|
|
|
@staticmethod
|
|
def _render_candidates(candidates: list[dict[str, str]]) -> str:
|
|
lines: list[str] = []
|
|
for item in candidates:
|
|
lines.append(f"- {item['name']}: {item['description']}")
|
|
return "\n".join(lines)
|
|
|
|
@staticmethod
|
|
def _parse_selected_names(content: str) -> list[str]:
|
|
cleaned = content.strip()
|
|
if cleaned.startswith("```"):
|
|
lines = cleaned.splitlines()
|
|
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
|
cleaned = "\n".join(lines[1:-1]).strip()
|
|
|
|
try:
|
|
payload: Any = json.loads(cleaned)
|
|
except json.JSONDecodeError:
|
|
return []
|
|
|
|
if isinstance(payload, dict):
|
|
for key in ("skills", "selected_skills", "activated_skills", "selected"):
|
|
value = payload.get(key)
|
|
if isinstance(value, list):
|
|
payload = value
|
|
break
|
|
|
|
if not isinstance(payload, list):
|
|
return []
|
|
return [item.strip() for item in payload if isinstance(item, str) and item.strip()]
|