修改了nanobot,往Hermes agent的风格走,进度1/3
This commit is contained in:
168
app-instance/backend/beaver/skills/assembler/task_assembler.py
Normal file
168
app-instance/backend/beaver/skills/assembler/task_assembler.py
Normal file
@ -0,0 +1,168 @@
|
||||
"""LLM-driven skill assembler.
|
||||
|
||||
这层现在不再自己做规则打分,而是直接把:
|
||||
1. task description
|
||||
2. embedding 召回后的候选 skill 摘要
|
||||
|
||||
交给一个模型来决定本轮要激活哪些 skill。
|
||||
|
||||
当前目标非常克制:
|
||||
- 输入尽量简单
|
||||
- 输出只要 skill 名称
|
||||
- 没有命中就返回空 skills
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers.base import LLMProvider
|
||||
from beaver.engine.providers.runtime import ProviderRuntime
|
||||
from beaver.skills.catalog.loader import SkillsLoader
|
||||
from beaver.skills.catalog.utils import strip_frontmatter
|
||||
from .embedding_retriever import SkillEmbeddingRetriever
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillAssemblyResult:
|
||||
"""一次装配后真正要注入当前 run 的 skills。"""
|
||||
|
||||
activated_skills: list[SkillContext] = field(default_factory=list)
|
||||
|
||||
|
||||
class SkillAssembler:
|
||||
"""用 LLM 根据 task description 选择当前 run 的 skills。"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
loader: SkillsLoader,
|
||||
retriever: SkillEmbeddingRetriever | None = None,
|
||||
) -> None:
|
||||
self.loader = loader
|
||||
self.retriever = retriever or SkillEmbeddingRetriever()
|
||||
|
||||
async def assemble(
|
||||
self,
|
||||
*,
|
||||
task_description: str,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
embedding_runtime: ProviderRuntime | None = None,
|
||||
top_k: int = 12,
|
||||
) -> SkillAssemblyResult:
|
||||
candidates = self.loader.build_selection_candidates()
|
||||
if not candidates:
|
||||
return SkillAssemblyResult()
|
||||
candidates = await self.retriever.retrieve(
|
||||
query=task_description,
|
||||
candidates=candidates,
|
||||
top_k=top_k,
|
||||
api_key=embedding_runtime.api_key if embedding_runtime is not None else None,
|
||||
api_base=embedding_runtime.api_base if embedding_runtime is not None else None,
|
||||
model=embedding_runtime.model if embedding_runtime is not None else None,
|
||||
)
|
||||
if not candidates:
|
||||
return SkillAssemblyResult()
|
||||
|
||||
selected_names = await self._select_skill_names(
|
||||
task_description=task_description,
|
||||
candidates=candidates,
|
||||
provider=provider,
|
||||
model=model,
|
||||
)
|
||||
if not selected_names:
|
||||
return SkillAssemblyResult()
|
||||
|
||||
activated_skills: list[SkillContext] = []
|
||||
for name in selected_names:
|
||||
raw_content = self.loader.load_skill(name)
|
||||
content = strip_frontmatter(raw_content).strip() if raw_content else ""
|
||||
if not content:
|
||||
continue
|
||||
activated_skills.append(SkillContext(name=name, content=content))
|
||||
|
||||
return SkillAssemblyResult(activated_skills=activated_skills)
|
||||
|
||||
async def _select_skill_names(
|
||||
self,
|
||||
*,
|
||||
task_description: str,
|
||||
candidates: list[dict[str, str]],
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
) -> list[str]:
|
||||
candidate_summary = self._render_candidates(candidates)
|
||||
candidate_names = {item["name"] for item in candidates}
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You select Beaver skills for a single run. "
|
||||
"Given a task description and candidate skill summaries, "
|
||||
"return only a JSON array of skill names to activate. "
|
||||
"Do not invent names. If nothing matches, return []."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"Task description:\n{task_description}\n\n"
|
||||
f"Candidate skills:\n{candidate_summary}\n\n"
|
||||
"Return only JSON, for example: [\"skill-a\", \"skill-b\"]"
|
||||
),
|
||||
},
|
||||
]
|
||||
response = await provider.chat(
|
||||
messages=messages,
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=512,
|
||||
temperature=0,
|
||||
)
|
||||
if response.finish_reason == "error" or not response.content:
|
||||
return []
|
||||
|
||||
parsed = self._parse_selected_names(response.content)
|
||||
if not parsed:
|
||||
return []
|
||||
|
||||
# 只保留当前候选集中真实存在的 skill 名称,并维持模型输出顺序。
|
||||
filtered: list[str] = []
|
||||
for name in parsed:
|
||||
if name in candidate_names and name not in filtered:
|
||||
filtered.append(name)
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def _render_candidates(candidates: list[dict[str, str]]) -> str:
|
||||
lines: list[str] = []
|
||||
for item in candidates:
|
||||
lines.append(f"- {item['name']}: {item['description']}")
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _parse_selected_names(content: str) -> list[str]:
|
||||
cleaned = content.strip()
|
||||
if cleaned.startswith("```"):
|
||||
lines = cleaned.splitlines()
|
||||
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
||||
cleaned = "\n".join(lines[1:-1]).strip()
|
||||
|
||||
try:
|
||||
payload: Any = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
|
||||
if isinstance(payload, dict):
|
||||
for key in ("skills", "selected_skills", "activated_skills", "selected"):
|
||||
value = payload.get(key)
|
||||
if isinstance(value, list):
|
||||
payload = value
|
||||
break
|
||||
|
||||
if not isinstance(payload, list):
|
||||
return []
|
||||
return [item.strip() for item in payload if isinstance(item, str) and item.strip()]
|
||||
Reference in New Issue
Block a user