修改了nanobot，往Hermes agent的风格走，进度1/3

2026-04-20 18:11:14 +08:00
parent cdfc222c9f
commit 36882a7d7b
261 changed files with 12659 additions and 604 deletions
--- a/app-instance/backend/beaver/skills/assembler/task_assembler.py
+++ b/app-instance/backend/beaver/skills/assembler/task_assembler.py
@ -0,0 +1,168 @@
+"""LLM-driven skill assembler.
+
+这层现在不再自己做规则打分，而是直接把：
+1. task description
+2. embedding 召回后的候选 skill 摘要
+
+交给一个模型来决定本轮要激活哪些 skill。
+
+当前目标非常克制：
+- 输入尽量简单
+- 输出只要 skill 名称
+- 没有命中就返回空 skills
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+import json
+from typing import Any
+
+from beaver.engine.context import SkillContext
+from beaver.engine.providers.base import LLMProvider
+from beaver.engine.providers.runtime import ProviderRuntime
+from beaver.skills.catalog.loader import SkillsLoader
+from beaver.skills.catalog.utils import strip_frontmatter
+from .embedding_retriever import SkillEmbeddingRetriever
+
+
+@dataclass(slots=True)
+class SkillAssemblyResult:
+    """一次装配后真正要注入当前 run 的 skills。"""
+
+    activated_skills: list[SkillContext] = field(default_factory=list)
+
+
+class SkillAssembler:
+    """用 LLM 根据 task description 选择当前 run 的 skills。"""
+
+    def __init__(
+        self,
+        loader: SkillsLoader,
+        retriever: SkillEmbeddingRetriever | None = None,
+    ) -> None:
+        self.loader = loader
+        self.retriever = retriever or SkillEmbeddingRetriever()
+
+    async def assemble(
+        self,
+        *,
+        task_description: str,
+        provider: LLMProvider,
+        model: str,
+        embedding_runtime: ProviderRuntime | None = None,
+        top_k: int = 12,
+    ) -> SkillAssemblyResult:
+        candidates = self.loader.build_selection_candidates()
+        if not candidates:
+            return SkillAssemblyResult()
+        candidates = await self.retriever.retrieve(
+            query=task_description,
+            candidates=candidates,
+            top_k=top_k,
+            api_key=embedding_runtime.api_key if embedding_runtime is not None else None,
+            api_base=embedding_runtime.api_base if embedding_runtime is not None else None,
+            model=embedding_runtime.model if embedding_runtime is not None else None,
+        )
+        if not candidates:
+            return SkillAssemblyResult()
+
+        selected_names = await self._select_skill_names(
+            task_description=task_description,
+            candidates=candidates,
+            provider=provider,
+            model=model,
+        )
+        if not selected_names:
+            return SkillAssemblyResult()
+
+        activated_skills: list[SkillContext] = []
+        for name in selected_names:
+            raw_content = self.loader.load_skill(name)
+            content = strip_frontmatter(raw_content).strip() if raw_content else ""
+            if not content:
+                continue
+            activated_skills.append(SkillContext(name=name, content=content))
+
+        return SkillAssemblyResult(activated_skills=activated_skills)
+
+    async def _select_skill_names(
+        self,
+        *,
+        task_description: str,
+        candidates: list[dict[str, str]],
+        provider: LLMProvider,
+        model: str,
+    ) -> list[str]:
+        candidate_summary = self._render_candidates(candidates)
+        candidate_names = {item["name"] for item in candidates}
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You select Beaver skills for a single run. "
+                    "Given a task description and candidate skill summaries, "
+                    "return only a JSON array of skill names to activate. "
+                    "Do not invent names. If nothing matches, return []."
+                ),
+            },
+            {
+                "role": "user",
+                "content": (
+                    f"Task description:\n{task_description}\n\n"
+                    f"Candidate skills:\n{candidate_summary}\n\n"
+                    "Return only JSON, for example: [\"skill-a\", \"skill-b\"]"
+                ),
+            },
+        ]
+        response = await provider.chat(
+            messages=messages,
+            tools=None,
+            model=model,
+            max_tokens=512,
+            temperature=0,
+        )
+        if response.finish_reason == "error" or not response.content:
+            return []
+
+        parsed = self._parse_selected_names(response.content)
+        if not parsed:
+            return []
+
+        # 只保留当前候选集中真实存在的 skill 名称，并维持模型输出顺序。
+        filtered: list[str] = []
+        for name in parsed:
+            if name in candidate_names and name not in filtered:
+                filtered.append(name)
+        return filtered
+
+    @staticmethod
+    def _render_candidates(candidates: list[dict[str, str]]) -> str:
+        lines: list[str] = []
+        for item in candidates:
+            lines.append(f"- {item['name']}: {item['description']}")
+        return "\n".join(lines)
+
+    @staticmethod
+    def _parse_selected_names(content: str) -> list[str]:
+        cleaned = content.strip()
+        if cleaned.startswith("```"):
+            lines = cleaned.splitlines()
+            if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
+                cleaned = "\n".join(lines[1:-1]).strip()
+
+        try:
+            payload: Any = json.loads(cleaned)
+        except json.JSONDecodeError:
+            return []
+
+        if isinstance(payload, dict):
+            for key in ("skills", "selected_skills", "activated_skills", "selected"):
+                value = payload.get(key)
+                if isinstance(value, list):
+                    payload = value
+                    break
+
+        if not isinstance(payload, list):
+            return []
+        return [item.strip() for item in payload if isinstance(item, str) and item.strip()]