修改了nanobot,往Hermes agent的风格走,进度1/3

This commit is contained in:
2026-04-20 18:11:14 +08:00
parent cdfc222c9f
commit 36882a7d7b
261 changed files with 12659 additions and 604 deletions

View File

@ -0,0 +1,12 @@
"""Skill system for Beaver."""
from .assembler import SkillAssembler, SkillAssemblyResult, SkillEmbeddingRetriever
from .catalog import SkillRecord, SkillsLoader
__all__ = [
"SkillAssembler",
"SkillAssemblyResult",
"SkillEmbeddingRetriever",
"SkillRecord",
"SkillsLoader",
]

View File

@ -0,0 +1,6 @@
"""Skill assembly for Beaver."""
from .embedding_retriever import SkillEmbeddingRetriever
from .task_assembler import SkillAssemblyResult, SkillAssembler
__all__ = ["SkillAssemblyResult", "SkillAssembler", "SkillEmbeddingRetriever"]

View File

@ -0,0 +1,188 @@
"""Embedding-based skill candidate retrieval.
当前实现使用 OpenAI-compatible `/v1/embeddings` 接口调用
阿里云百炼 `text-embedding-v4` 做最小语义召回:
1. 复用当前 provider 的 `api_key/api_base`
2. 先用 embedding 相似度召回一小批候选
3. 再交给上层 LLM selector 做最终技能选择
"""
from __future__ import annotations
import asyncio
import math
import os
import json
from urllib import request
from typing import Any
class SkillEmbeddingRetriever:
"""用 OpenAI-compatible embeddings API 为 skill 选择做候选召回。"""
def __init__(
self,
*,
api_key_env: str = "OPENAI_API_KEY",
api_base_env: str = "OPENAI_API_BASE",
model: str = "text-embedding-v4",
timeout_seconds: float = 20.0,
) -> None:
self.api_key_env = api_key_env
self.api_base_env = api_base_env
self.model = model
self.timeout_seconds = timeout_seconds
async def retrieve(
self,
*,
query: str,
candidates: list[dict[str, str]],
top_k: int = 12,
api_key: str | None = None,
api_base: str | None = None,
model: str | None = None,
) -> list[dict[str, str]]:
"""按 embedding 相似度召回 top-k 候选。
如果没有可用的 API Key / base URL或者 embedding 调用失败,
当前阶段先退回到“全部候选交给 LLM selector”。
"""
if not candidates:
return []
resolved_api_key = api_key or os.getenv(self.api_key_env)
resolved_api_base = api_base or os.getenv(self.api_base_env)
if not resolved_api_key or not resolved_api_base:
return candidates
try:
query_embedding = await self._embed_texts(
api_key=resolved_api_key,
api_base=resolved_api_base,
texts=[query],
model=model or self.model,
)
candidate_texts = [self._candidate_text(item) for item in candidates]
candidate_embeddings = await self._embed_texts(
api_key=resolved_api_key,
api_base=resolved_api_base,
texts=candidate_texts,
model=model or self.model,
)
except Exception:
return candidates
if not query_embedding or not query_embedding[0] or len(candidate_embeddings) != len(candidates):
return candidates
query_vector = query_embedding[0]
scored: list[tuple[float, dict[str, str]]] = []
for candidate, vector in zip(candidates, candidate_embeddings, strict=False):
if not vector:
continue
scored.append((self._cosine_similarity(query_vector, vector), candidate))
scored.sort(key=lambda item: item[0], reverse=True)
return [item[1] for item in scored[:top_k]]
async def _embed_texts(
self,
*,
api_key: str,
api_base: str,
texts: list[str],
model: str,
) -> list[list[float]]:
"""调用 OpenAI-compatible embeddings 接口。
当前对齐的是你们实际在用的网关配置:
- `POST {api_base}/embeddings`
- `model=text-embedding-v4`
- `encoding_format=float`
"""
all_vectors: list[list[float]] = []
endpoint = self._normalize_embeddings_endpoint(api_base)
for start in range(0, len(texts), 10):
batch = texts[start:start + 10]
payload = await self._post_embeddings(
endpoint=endpoint,
api_key=api_key,
model=model,
texts=batch,
)
embeddings = payload.get("data") or []
embeddings = sorted(embeddings, key=lambda item: item.get("index", 0))
all_vectors.extend([list(item.get("embedding") or []) for item in embeddings])
return all_vectors
async def _post_embeddings(
self,
*,
endpoint: str,
api_key: str,
model: str,
texts: list[str],
) -> dict[str, Any]:
return await asyncio.to_thread(
self._post_embeddings_sync,
endpoint=endpoint,
api_key=api_key,
model=model,
texts=texts,
)
def _post_embeddings_sync(
self,
*,
endpoint: str,
api_key: str,
model: str,
texts: list[str],
) -> dict[str, Any]:
body = json.dumps(
{
"model": model,
"input": texts if len(texts) > 1 else texts[0],
"encoding_format": "float",
}
).encode("utf-8")
req = request.Request(
endpoint,
data=body,
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
method="POST",
)
with request.urlopen(req, timeout=self.timeout_seconds) as response:
return json.loads(response.read().decode("utf-8"))
@staticmethod
def _candidate_text(candidate: dict[str, str]) -> str:
name = (candidate.get("name") or "").strip()
description = (candidate.get("description") or "").strip()
return f"{name}\n{description}".strip()
@staticmethod
def _normalize_embeddings_endpoint(api_base: str) -> str:
base = api_base.rstrip("/")
if base.endswith("/embeddings"):
return base
if base.endswith("/v1"):
return f"{base}/embeddings"
return f"{base}/v1/embeddings"
@staticmethod
def _cosine_similarity(left: list[float], right: list[float]) -> float:
if not left or not right or len(left) != len(right):
return -1.0
dot = sum(a * b for a, b in zip(left, right, strict=False))
left_norm = math.sqrt(sum(a * a for a in left))
right_norm = math.sqrt(sum(b * b for b in right))
if left_norm == 0 or right_norm == 0:
return -1.0
return dot / (left_norm * right_norm)

View File

@ -0,0 +1,168 @@
"""LLM-driven skill assembler.
这层现在不再自己做规则打分,而是直接把:
1. task description
2. embedding 召回后的候选 skill 摘要
交给一个模型来决定本轮要激活哪些 skill。
当前目标非常克制:
- 输入尽量简单
- 输出只要 skill 名称
- 没有命中就返回空 skills
"""
from __future__ import annotations
from dataclasses import dataclass, field
import json
from typing import Any
from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider
from beaver.engine.providers.runtime import ProviderRuntime
from beaver.skills.catalog.loader import SkillsLoader
from beaver.skills.catalog.utils import strip_frontmatter
from .embedding_retriever import SkillEmbeddingRetriever
@dataclass(slots=True)
class SkillAssemblyResult:
"""一次装配后真正要注入当前 run 的 skills。"""
activated_skills: list[SkillContext] = field(default_factory=list)
class SkillAssembler:
"""用 LLM 根据 task description 选择当前 run 的 skills。"""
def __init__(
self,
loader: SkillsLoader,
retriever: SkillEmbeddingRetriever | None = None,
) -> None:
self.loader = loader
self.retriever = retriever or SkillEmbeddingRetriever()
async def assemble(
self,
*,
task_description: str,
provider: LLMProvider,
model: str,
embedding_runtime: ProviderRuntime | None = None,
top_k: int = 12,
) -> SkillAssemblyResult:
candidates = self.loader.build_selection_candidates()
if not candidates:
return SkillAssemblyResult()
candidates = await self.retriever.retrieve(
query=task_description,
candidates=candidates,
top_k=top_k,
api_key=embedding_runtime.api_key if embedding_runtime is not None else None,
api_base=embedding_runtime.api_base if embedding_runtime is not None else None,
model=embedding_runtime.model if embedding_runtime is not None else None,
)
if not candidates:
return SkillAssemblyResult()
selected_names = await self._select_skill_names(
task_description=task_description,
candidates=candidates,
provider=provider,
model=model,
)
if not selected_names:
return SkillAssemblyResult()
activated_skills: list[SkillContext] = []
for name in selected_names:
raw_content = self.loader.load_skill(name)
content = strip_frontmatter(raw_content).strip() if raw_content else ""
if not content:
continue
activated_skills.append(SkillContext(name=name, content=content))
return SkillAssemblyResult(activated_skills=activated_skills)
async def _select_skill_names(
self,
*,
task_description: str,
candidates: list[dict[str, str]],
provider: LLMProvider,
model: str,
) -> list[str]:
candidate_summary = self._render_candidates(candidates)
candidate_names = {item["name"] for item in candidates}
messages = [
{
"role": "system",
"content": (
"You select Beaver skills for a single run. "
"Given a task description and candidate skill summaries, "
"return only a JSON array of skill names to activate. "
"Do not invent names. If nothing matches, return []."
),
},
{
"role": "user",
"content": (
f"Task description:\n{task_description}\n\n"
f"Candidate skills:\n{candidate_summary}\n\n"
"Return only JSON, for example: [\"skill-a\", \"skill-b\"]"
),
},
]
response = await provider.chat(
messages=messages,
tools=None,
model=model,
max_tokens=512,
temperature=0,
)
if response.finish_reason == "error" or not response.content:
return []
parsed = self._parse_selected_names(response.content)
if not parsed:
return []
# 只保留当前候选集中真实存在的 skill 名称,并维持模型输出顺序。
filtered: list[str] = []
for name in parsed:
if name in candidate_names and name not in filtered:
filtered.append(name)
return filtered
@staticmethod
def _render_candidates(candidates: list[dict[str, str]]) -> str:
lines: list[str] = []
for item in candidates:
lines.append(f"- {item['name']}: {item['description']}")
return "\n".join(lines)
@staticmethod
def _parse_selected_names(content: str) -> list[str]:
cleaned = content.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
try:
payload: Any = json.loads(cleaned)
except json.JSONDecodeError:
return []
if isinstance(payload, dict):
for key in ("skills", "selected_skills", "activated_skills", "selected"):
value = payload.get(key)
if isinstance(value, list):
payload = value
break
if not isinstance(payload, list):
return []
return [item.strip() for item in payload if isinstance(item, str) and item.strip()]

View File

@ -0,0 +1,2 @@
"""Built-in skill payloads."""

View File

@ -0,0 +1,5 @@
"""Skill catalog and indexing."""
from .loader import SkillRecord, SkillsLoader
__all__ = ["SkillRecord", "SkillsLoader"]

View File

@ -0,0 +1,281 @@
"""Beaver skills catalog loader。
第一版目标非常明确:
1. 扫描技能目录
2. 读取 `SKILL.md`
3. 解析前置元数据
4. 生成可注入上下文的正文与索引
这层不负责:
1. 动态选择本轮应该启用哪些 skill
2. skill review / publishing
3. skill 自动学习
这些决策属于 resolver 或更高层工作流。
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from .utils import (
check_requirements,
escape_xml,
get_missing_requirements,
parse_frontmatter,
parse_skill_metadata_blob,
strip_frontmatter,
)
@dataclass(slots=True)
class SkillRecord:
"""单个 skill 的目录级元数据。"""
name: str
path: Path
source: str
class SkillsLoader:
"""从 workspace/builtin 目录中发现并读取 skills。"""
def __init__(
self,
workspace: str | Path,
*,
builtin_skills_dir: str | Path | None = None,
extra_dirs: list[str | Path] | None = None,
) -> None:
self.workspace = Path(workspace)
self.workspace_skills = self.workspace / "skills"
self.builtin_skills = Path(builtin_skills_dir) if builtin_skills_dir is not None else Path(__file__).resolve().parent.parent / "builtin"
self.extra_dirs = [Path(item) for item in (extra_dirs or [])]
def list_skills(self, *, filter_unavailable: bool = True) -> list[SkillRecord]:
"""列出当前可见的 skills。
优先级:
1. workspace
2. extra/plugin 目录
3. builtin
重名 skill 只保留优先级更高的那一个。
"""
ordered_roots: list[tuple[str, Path]] = [
("workspace", self.workspace_skills),
*[("plugin", path) for path in self.extra_dirs],
("builtin", self.builtin_skills),
]
found: dict[str, SkillRecord] = {}
for source, root in ordered_roots:
if not root.exists():
continue
for skill_dir in root.iterdir():
skill_file = skill_dir / "SKILL.md"
if not skill_dir.is_dir() or not skill_file.exists():
continue
name = skill_dir.name
if name in found:
continue
record = SkillRecord(name=name, path=skill_file, source=source)
if filter_unavailable and not self._record_available(record):
continue
found[name] = record
return list(found.values())
def load_skill(self, name: str) -> str | None:
"""按名称加载 skill 原始内容。"""
record = self._find_record(name)
if record is None:
return None
return record.path.read_text(encoding="utf-8")
def get_skill_record(self, name: str) -> SkillRecord | None:
"""按名称返回 skill record。"""
return self._find_record(name)
def get_skill_metadata(self, name: str) -> dict[str, Any] | None:
"""读取 skill frontmatter 元数据。"""
content = self.load_skill(name)
if content is None:
return None
metadata, _ = parse_frontmatter(content)
return metadata
def load_skills_for_context(self, skill_names: list[str]) -> str:
"""加载指定 skills 的正文,并整理成上下文块。"""
sections: list[str] = []
for name in skill_names:
content = self.load_skill(name)
if not content:
continue
body = strip_frontmatter(content).strip()
if not body:
continue
sections.append(f"## {name}\n\n{body}")
return "\n\n".join(sections)
def build_skills_summary(self) -> str:
"""构建可注入 system prompt 的 skills index。
虽然函数名还沿用 `summary`,但当前语义已经更接近 Hermes 的 skills index
- 这里只告诉模型“系统里有哪些 skill 可用”
- 不负责把 skill 正文塞进 system prompt
- 真正激活的 skill 正文由 resolver/builder 走显式消息注入
"""
skills = self.list_skills(filter_unavailable=False)
if not skills:
return ""
lines = ["<skills>"]
for record in skills:
frontmatter = self.get_skill_metadata(record.name) or {}
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
available = check_requirements(meta_blob)
description = frontmatter.get("description") or record.name
load_hint = f'Use skill_view(name="{record.name}") to load the full skill.'
lines.append(f' <skill available="{str(available).lower()}">')
lines.append(f" <name>{escape_xml(record.name)}</name>")
lines.append(f" <description>{escape_xml(description)}</description>")
lines.append(f" <load_hint>{escape_xml(load_hint)}</load_hint>")
support_files = self.list_skill_supporting_files(record.name)
if support_files:
lines.append(" <supporting_files>")
for file_path in support_files[:12]:
lines.append(f" <file>{escape_xml(file_path)}</file>")
if len(support_files) > 12:
lines.append(" <file>...additional files omitted...</file>")
lines.append(" </supporting_files>")
if not available:
missing = get_missing_requirements(meta_blob)
if missing:
lines.append(f" <requires>{escape_xml(missing)}</requires>")
lines.append(" </skill>")
lines.append("</skills>")
return "\n".join(lines)
def build_selection_candidates(self) -> list[dict[str, str]]:
"""构建给 LLM selector 使用的候选 skill 摘要。
这里刻意保持精简,只给:
- `name`
- `description`
选择器的任务只是“从候选里挑名字”,不是直接阅读完整 skill 正文。
真正激活后的 skill 正文仍然在后续阶段按需加载。
"""
candidates: list[dict[str, str]] = []
for record in self.list_skills(filter_unavailable=True):
frontmatter = self.get_skill_metadata(record.name) or {}
description = str(frontmatter.get("description") or "").strip()
if not description:
raw_content = self.load_skill(record.name) or ""
body = strip_frontmatter(raw_content).strip()
if body:
description = " ".join(body.splitlines()[:3])[:240].strip()
candidates.append(
{
"name": record.name,
"description": description or record.name,
}
)
return candidates
def list_skill_supporting_files(self, name: str) -> list[str]:
"""列出 skill 目录下可按需查看的支持文件相对路径。"""
record = self._find_record(name)
if record is None:
return []
skill_dir = record.path.parent
results: list[str] = []
for subdir in ("references", "templates", "scripts", "assets"):
root = skill_dir / subdir
if not root.exists():
continue
for file in sorted(root.rglob("*")):
if file.is_file() and not file.is_symlink():
results.append(str(file.relative_to(skill_dir)))
return results
def view_skill(self, name: str, file_path: str | None = None) -> tuple[str, str] | None:
"""读取 skill 正文或其支持文件。
返回 `(display_name, content)`
- `display_name` 用于提示当前读取的是 skill 本体还是某个支持文件
- `content` 为实际文本内容
"""
record = self._find_record(name)
if record is None:
return None
if not self._record_available(record):
frontmatter = self.get_skill_metadata(name) or {}
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
missing = get_missing_requirements(meta_blob)
detail = f" Missing requirements: {missing}." if missing else ""
raise ValueError(f"Skill '{name}' is currently unavailable.{detail}")
skill_dir = record.path.parent
if not file_path:
return ("SKILL.md", self._read_text_file(record.path, display_name="SKILL.md"))
candidate = (skill_dir / file_path).resolve()
try:
candidate.relative_to(skill_dir.resolve())
except ValueError as exc:
raise ValueError("Requested skill file must stay within the skill directory") from exc
if not candidate.exists() or not candidate.is_file():
raise FileNotFoundError(f"Skill file '{file_path}' does not exist")
display_name = str(candidate.relative_to(skill_dir))
return (display_name, self._read_text_file(candidate, display_name=display_name))
def get_always_skills(self) -> list[str]:
"""返回标记为 always 的可用 skill 名称。"""
result: list[str] = []
for record in self.list_skills(filter_unavailable=True):
frontmatter = self.get_skill_metadata(record.name) or {}
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
if meta_blob.get("always") or str(frontmatter.get("always", "")).lower() == "true":
result.append(record.name)
return result
def _find_record(self, name: str) -> SkillRecord | None:
for record in self.list_skills(filter_unavailable=False):
if record.name == name:
return record
return None
def _record_available(self, record: SkillRecord) -> bool:
content = record.path.read_text(encoding="utf-8")
frontmatter, _ = parse_frontmatter(content)
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
return check_requirements(meta_blob)
@staticmethod
def _read_text_file(path: Path, *, display_name: str) -> str:
try:
return path.read_text(encoding="utf-8")
except UnicodeDecodeError as exc:
raise ValueError(
f"Skill file '{display_name}' is not UTF-8 text and cannot be viewed with skill_view."
) from exc
def _skill_available(self, name: str) -> bool:
record = self._find_record(name)
if record is None:
return False
return self._record_available(record)

View File

@ -0,0 +1,122 @@
"""Skills catalog 的公共辅助函数。
这里专门放“解析和校验 skill 文件”的纯函数,避免 `loader.py` 里同时承担:
1. 目录扫描
2. frontmatter 解析
3. requirements 校验
4. 文本裁剪/格式化
把这些细节拆出来之后skills catalog 的边界会更清楚,后面无论是 reviews、publisher
还是 runtime resolver都可以复用同一套元数据解析规则。
"""
from __future__ import annotations
import json
import os
import re
import shutil
from typing import Any
def parse_frontmatter(content: str) -> tuple[dict[str, str], str]:
"""解析 Markdown 文件顶部的极简 frontmatter。
当前先只支持最常见的:
```md
---
key: value
key2: value2
---
body...
```
这样足够支撑第一版 skills runtime不提前把 YAML 解析器引进来。
"""
if not content.startswith("---"):
return {}, content
match = re.match(r"^---\n(.*?)\n---\n?", content, re.DOTALL)
if match is None:
return {}, content
metadata: dict[str, str] = {}
for line in match.group(1).splitlines():
if ":" not in line:
continue
key, value = line.split(":", 1)
metadata[key.strip()] = value.strip().strip('"\'')
body = content[match.end():].strip()
return metadata, body
def strip_frontmatter(content: str) -> str:
"""去掉 frontmatter只保留 skill 正文。"""
_, body = parse_frontmatter(content)
return body
def parse_skill_metadata_blob(raw: str) -> dict[str, Any]:
"""解析 metadata 字段里的 JSON 扩展配置。
为了兼容旧 nanobot 习惯,这里同时支持:
- `nanobot`
- `openclaw`
第一版主要关心的字段有:
- `always`
- `requires`
"""
try:
data = json.loads(raw)
except (json.JSONDecodeError, TypeError):
return {}
if not isinstance(data, dict):
return {}
nested = data.get("nanobot", data.get("openclaw", data))
return nested if isinstance(nested, dict) else {}
def check_requirements(metadata: dict[str, Any]) -> bool:
"""检查 skill 的最小 requirements 是否满足。"""
requires = metadata.get("requires", {})
if not isinstance(requires, dict):
return True
for binary in requires.get("bins", []):
if not shutil.which(str(binary)):
return False
for env_name in requires.get("env", []):
if not os.environ.get(str(env_name)):
return False
return True
def get_missing_requirements(metadata: dict[str, Any]) -> str:
"""返回缺失 requirements 的简短描述。"""
requires = metadata.get("requires", {})
if not isinstance(requires, dict):
return ""
missing: list[str] = []
for binary in requires.get("bins", []):
if not shutil.which(str(binary)):
missing.append(f"CLI: {binary}")
for env_name in requires.get("env", []):
if not os.environ.get(str(env_name)):
missing.append(f"ENV: {env_name}")
return ", ".join(missing)
def escape_xml(value: str) -> str:
"""给 skills summary 做最小 XML 转义。"""
return value.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")

View File

@ -0,0 +1,2 @@
"""Draft skills generated before review."""

View File

@ -0,0 +1,2 @@
"""Skill publishing and version switching."""

View File

@ -0,0 +1,5 @@
"""Runtime skill resolution."""
from .runtime import ResolvedSkillSet, RuntimeSkillResolver
__all__ = ["ResolvedSkillSet", "RuntimeSkillResolver"]

View File

@ -0,0 +1,50 @@
"""Runtime skill resolver。
这层负责回答一个运行时问题:
“这一次调用,哪些 skill 要被激活,并以什么形式注入上下文?”
第一版保持保守,只综合三类来源:
1. `always` skills
不在这里做复杂的语义匹配或自动推荐。
"""
from __future__ import annotations
from dataclasses import dataclass, field
from beaver.engine.context import SkillContext
from beaver.skills.catalog.loader import SkillsLoader
from beaver.skills.catalog.utils import strip_frontmatter
@dataclass(slots=True)
class ResolvedSkillSet:
"""一次运行最终解析出的 skills 结果。"""
activated_skills: list[SkillContext] = field(default_factory=list)
class RuntimeSkillResolver:
"""把 profile/request 转成当前轮次真正激活的 skill 集合。"""
def __init__(self, loader: SkillsLoader) -> None:
self.loader = loader
def resolve(
self,
) -> ResolvedSkillSet:
selected: list[str] = []
for name in self.loader.get_always_skills():
if name not in selected:
selected.append(name)
activated_skills: list[SkillContext] = []
for name in selected:
raw_content = self.loader.load_skill(name)
content = strip_frontmatter(raw_content).strip() if raw_content else ""
if not content:
continue
activated_skills.append(SkillContext(name=name, content=content))
return ResolvedSkillSet(activated_skills=activated_skills)

View File

@ -0,0 +1,2 @@
"""Skill review workflow."""