Files
beaver_project/app-instance/backend/beaver/skills/assembler/task_assembler.py
steven_li 5ba5c7e4c1 feat(app-instance): 集成Beaver后端并更新配置管理
集成新的Beaver后端服务到应用实例中,替换原有的nanobot实现。

主要变更包括:
- 在Dockerfile和环境配置中添加Beaver相关路径和配置变量
- 更新工作目录结构从.nanobot到.beaver
- 实现Beaver引擎加载器,支持配置文件加载和工具组装
- 添加内置工具如ListDirectoryTool、ReadFileTool、SearchFilesTool
- 更新消息处理流程,支持通道适配器和网关模式
- 重构技能系统,支持显式工具提示和嵌入式检索
- 改进错误处理和生命周期管理

此变更使应用实例能够使用统一的Beaver后端进行AI代理运行时管理。
2026-04-27 17:37:40 +08:00

174 lines
5.9 KiB
Python

"""LLM-driven skill assembler.
这层现在不再自己做规则打分,而是直接把:
1. task description
2. embedding 召回后的候选 skill 摘要
交给一个模型来决定本轮要激活哪些 skill。
当前目标非常克制:
- 输入尽量简单
- 输出只要 skill 名称
- 没有命中就返回空 skills
"""
from __future__ import annotations
from dataclasses import dataclass, field
import json
from typing import Any
from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider
from beaver.engine.providers.runtime import ProviderRuntime
from beaver.skills.catalog.loader import SkillsLoader
from beaver.skills.catalog.utils import strip_frontmatter
from .embedding_retriever import SkillEmbeddingRetriever
@dataclass(slots=True)
class SkillAssemblyResult:
"""一次装配后真正要注入当前 run 的 skills。"""
activated_skills: list[SkillContext] = field(default_factory=list)
class SkillAssembler:
"""用 LLM 根据 task description 选择当前 run 的 skills。"""
def __init__(
self,
loader: SkillsLoader,
retriever: SkillEmbeddingRetriever | None = None,
) -> None:
self.loader = loader
self.retriever = retriever or SkillEmbeddingRetriever()
async def assemble(
self,
*,
task_description: str,
provider: LLMProvider,
model: str,
embedding_runtime: ProviderRuntime | None = None,
top_k: int = 12,
) -> SkillAssemblyResult:
candidates = self.loader.build_selection_candidates()
if not candidates:
return SkillAssemblyResult()
candidates = await self.retriever.retrieve(
query=task_description,
candidates=candidates,
top_k=top_k,
api_key=embedding_runtime.api_key if embedding_runtime is not None else None,
api_base=embedding_runtime.api_base if embedding_runtime is not None else None,
model=embedding_runtime.model if embedding_runtime is not None else None,
extra_headers=embedding_runtime.extra_headers if embedding_runtime is not None else None,
timeout_seconds=(
embedding_runtime.request_timeout_seconds if embedding_runtime is not None else None
),
fallback_top_k=None,
)
if not candidates:
return SkillAssemblyResult()
selected_names = await self._select_skill_names(
task_description=task_description,
candidates=candidates,
provider=provider,
model=model,
)
if not selected_names:
return SkillAssemblyResult()
activated_skills: list[SkillContext] = []
for name in selected_names:
raw_content = self.loader.load_skill(name)
content = strip_frontmatter(raw_content).strip() if raw_content else ""
if not content:
continue
activated_skills.append(SkillContext(name=name, content=content))
return SkillAssemblyResult(activated_skills=activated_skills)
async def _select_skill_names(
self,
*,
task_description: str,
candidates: list[dict[str, str]],
provider: LLMProvider,
model: str,
) -> list[str]:
candidate_summary = self._render_candidates(candidates)
candidate_names = {item["name"] for item in candidates}
messages = [
{
"role": "system",
"content": (
"You select Beaver skills for a single run. "
"Given a task description and candidate skill summaries, "
"return only a JSON array of skill names to activate. "
"Do not invent names. If nothing matches, return []."
),
},
{
"role": "user",
"content": (
f"Task description:\n{task_description}\n\n"
f"Candidate skills:\n{candidate_summary}\n\n"
"Return only JSON, for example: [\"skill-a\", \"skill-b\"]"
),
},
]
response = await provider.chat(
messages=messages,
tools=None,
model=model,
max_tokens=512,
temperature=0,
)
if response.finish_reason == "error" or not response.content:
return []
parsed = self._parse_selected_names(response.content)
if not parsed:
return []
# 只保留当前候选集中真实存在的 skill 名称,并维持模型输出顺序。
filtered: list[str] = []
for name in parsed:
if name in candidate_names and name not in filtered:
filtered.append(name)
return filtered
@staticmethod
def _render_candidates(candidates: list[dict[str, str]]) -> str:
lines: list[str] = []
for item in candidates:
lines.append(f"- {item['name']}: {item['description']}")
return "\n".join(lines)
@staticmethod
def _parse_selected_names(content: str) -> list[str]:
cleaned = content.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
try:
payload: Any = json.loads(cleaned)
except json.JSONDecodeError:
return []
if isinstance(payload, dict):
for key in ("skills", "selected_skills", "activated_skills", "selected"):
value = payload.get(key)
if isinstance(value, list):
payload = value
break
if not isinstance(payload, list):
return []
return [item.strip() for item in payload if isinstance(item, str) and item.strip()]