修改了nanobot,往Hermes agent的风格走,进度1/3
This commit is contained in:
12
app-instance/backend/beaver/skills/__init__.py
Normal file
12
app-instance/backend/beaver/skills/__init__.py
Normal file
@ -0,0 +1,12 @@
|
||||
"""Skill system for Beaver."""
|
||||
|
||||
from .assembler import SkillAssembler, SkillAssemblyResult, SkillEmbeddingRetriever
|
||||
from .catalog import SkillRecord, SkillsLoader
|
||||
|
||||
__all__ = [
|
||||
"SkillAssembler",
|
||||
"SkillAssemblyResult",
|
||||
"SkillEmbeddingRetriever",
|
||||
"SkillRecord",
|
||||
"SkillsLoader",
|
||||
]
|
||||
6
app-instance/backend/beaver/skills/assembler/__init__.py
Normal file
6
app-instance/backend/beaver/skills/assembler/__init__.py
Normal file
@ -0,0 +1,6 @@
|
||||
"""Skill assembly for Beaver."""
|
||||
|
||||
from .embedding_retriever import SkillEmbeddingRetriever
|
||||
from .task_assembler import SkillAssemblyResult, SkillAssembler
|
||||
|
||||
__all__ = ["SkillAssemblyResult", "SkillAssembler", "SkillEmbeddingRetriever"]
|
||||
@ -0,0 +1,188 @@
|
||||
"""Embedding-based skill candidate retrieval.
|
||||
|
||||
当前实现使用 OpenAI-compatible `/v1/embeddings` 接口调用
|
||||
阿里云百炼 `text-embedding-v4` 做最小语义召回:
|
||||
1. 复用当前 provider 的 `api_key/api_base`
|
||||
2. 先用 embedding 相似度召回一小批候选
|
||||
3. 再交给上层 LLM selector 做最终技能选择
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import math
|
||||
import os
|
||||
import json
|
||||
from urllib import request
|
||||
from typing import Any
|
||||
|
||||
|
||||
class SkillEmbeddingRetriever:
|
||||
"""用 OpenAI-compatible embeddings API 为 skill 选择做候选召回。"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key_env: str = "OPENAI_API_KEY",
|
||||
api_base_env: str = "OPENAI_API_BASE",
|
||||
model: str = "text-embedding-v4",
|
||||
timeout_seconds: float = 20.0,
|
||||
) -> None:
|
||||
self.api_key_env = api_key_env
|
||||
self.api_base_env = api_base_env
|
||||
self.model = model
|
||||
self.timeout_seconds = timeout_seconds
|
||||
|
||||
async def retrieve(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
candidates: list[dict[str, str]],
|
||||
top_k: int = 12,
|
||||
api_key: str | None = None,
|
||||
api_base: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> list[dict[str, str]]:
|
||||
"""按 embedding 相似度召回 top-k 候选。
|
||||
|
||||
如果没有可用的 API Key / base URL,或者 embedding 调用失败,
|
||||
当前阶段先退回到“全部候选交给 LLM selector”。
|
||||
"""
|
||||
|
||||
if not candidates:
|
||||
return []
|
||||
|
||||
resolved_api_key = api_key or os.getenv(self.api_key_env)
|
||||
resolved_api_base = api_base or os.getenv(self.api_base_env)
|
||||
if not resolved_api_key or not resolved_api_base:
|
||||
return candidates
|
||||
|
||||
try:
|
||||
query_embedding = await self._embed_texts(
|
||||
api_key=resolved_api_key,
|
||||
api_base=resolved_api_base,
|
||||
texts=[query],
|
||||
model=model or self.model,
|
||||
)
|
||||
candidate_texts = [self._candidate_text(item) for item in candidates]
|
||||
candidate_embeddings = await self._embed_texts(
|
||||
api_key=resolved_api_key,
|
||||
api_base=resolved_api_base,
|
||||
texts=candidate_texts,
|
||||
model=model or self.model,
|
||||
)
|
||||
except Exception:
|
||||
return candidates
|
||||
|
||||
if not query_embedding or not query_embedding[0] or len(candidate_embeddings) != len(candidates):
|
||||
return candidates
|
||||
|
||||
query_vector = query_embedding[0]
|
||||
scored: list[tuple[float, dict[str, str]]] = []
|
||||
for candidate, vector in zip(candidates, candidate_embeddings, strict=False):
|
||||
if not vector:
|
||||
continue
|
||||
scored.append((self._cosine_similarity(query_vector, vector), candidate))
|
||||
|
||||
scored.sort(key=lambda item: item[0], reverse=True)
|
||||
return [item[1] for item in scored[:top_k]]
|
||||
|
||||
async def _embed_texts(
|
||||
self,
|
||||
*,
|
||||
api_key: str,
|
||||
api_base: str,
|
||||
texts: list[str],
|
||||
model: str,
|
||||
) -> list[list[float]]:
|
||||
"""调用 OpenAI-compatible embeddings 接口。
|
||||
|
||||
当前对齐的是你们实际在用的网关配置:
|
||||
- `POST {api_base}/embeddings`
|
||||
- `model=text-embedding-v4`
|
||||
- `encoding_format=float`
|
||||
"""
|
||||
|
||||
all_vectors: list[list[float]] = []
|
||||
endpoint = self._normalize_embeddings_endpoint(api_base)
|
||||
for start in range(0, len(texts), 10):
|
||||
batch = texts[start:start + 10]
|
||||
payload = await self._post_embeddings(
|
||||
endpoint=endpoint,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
texts=batch,
|
||||
)
|
||||
embeddings = payload.get("data") or []
|
||||
embeddings = sorted(embeddings, key=lambda item: item.get("index", 0))
|
||||
all_vectors.extend([list(item.get("embedding") or []) for item in embeddings])
|
||||
return all_vectors
|
||||
|
||||
async def _post_embeddings(
|
||||
self,
|
||||
*,
|
||||
endpoint: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
texts: list[str],
|
||||
) -> dict[str, Any]:
|
||||
return await asyncio.to_thread(
|
||||
self._post_embeddings_sync,
|
||||
endpoint=endpoint,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
texts=texts,
|
||||
)
|
||||
|
||||
def _post_embeddings_sync(
|
||||
self,
|
||||
*,
|
||||
endpoint: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
texts: list[str],
|
||||
) -> dict[str, Any]:
|
||||
body = json.dumps(
|
||||
{
|
||||
"model": model,
|
||||
"input": texts if len(texts) > 1 else texts[0],
|
||||
"encoding_format": "float",
|
||||
}
|
||||
).encode("utf-8")
|
||||
req = request.Request(
|
||||
endpoint,
|
||||
data=body,
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
with request.urlopen(req, timeout=self.timeout_seconds) as response:
|
||||
return json.loads(response.read().decode("utf-8"))
|
||||
|
||||
@staticmethod
|
||||
def _candidate_text(candidate: dict[str, str]) -> str:
|
||||
name = (candidate.get("name") or "").strip()
|
||||
description = (candidate.get("description") or "").strip()
|
||||
return f"{name}\n{description}".strip()
|
||||
|
||||
@staticmethod
|
||||
def _normalize_embeddings_endpoint(api_base: str) -> str:
|
||||
base = api_base.rstrip("/")
|
||||
if base.endswith("/embeddings"):
|
||||
return base
|
||||
if base.endswith("/v1"):
|
||||
return f"{base}/embeddings"
|
||||
return f"{base}/v1/embeddings"
|
||||
|
||||
@staticmethod
|
||||
def _cosine_similarity(left: list[float], right: list[float]) -> float:
|
||||
if not left or not right or len(left) != len(right):
|
||||
return -1.0
|
||||
dot = sum(a * b for a, b in zip(left, right, strict=False))
|
||||
left_norm = math.sqrt(sum(a * a for a in left))
|
||||
right_norm = math.sqrt(sum(b * b for b in right))
|
||||
if left_norm == 0 or right_norm == 0:
|
||||
return -1.0
|
||||
return dot / (left_norm * right_norm)
|
||||
168
app-instance/backend/beaver/skills/assembler/task_assembler.py
Normal file
168
app-instance/backend/beaver/skills/assembler/task_assembler.py
Normal file
@ -0,0 +1,168 @@
|
||||
"""LLM-driven skill assembler.
|
||||
|
||||
这层现在不再自己做规则打分,而是直接把:
|
||||
1. task description
|
||||
2. embedding 召回后的候选 skill 摘要
|
||||
|
||||
交给一个模型来决定本轮要激活哪些 skill。
|
||||
|
||||
当前目标非常克制:
|
||||
- 输入尽量简单
|
||||
- 输出只要 skill 名称
|
||||
- 没有命中就返回空 skills
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers.base import LLMProvider
|
||||
from beaver.engine.providers.runtime import ProviderRuntime
|
||||
from beaver.skills.catalog.loader import SkillsLoader
|
||||
from beaver.skills.catalog.utils import strip_frontmatter
|
||||
from .embedding_retriever import SkillEmbeddingRetriever
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillAssemblyResult:
|
||||
"""一次装配后真正要注入当前 run 的 skills。"""
|
||||
|
||||
activated_skills: list[SkillContext] = field(default_factory=list)
|
||||
|
||||
|
||||
class SkillAssembler:
|
||||
"""用 LLM 根据 task description 选择当前 run 的 skills。"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
loader: SkillsLoader,
|
||||
retriever: SkillEmbeddingRetriever | None = None,
|
||||
) -> None:
|
||||
self.loader = loader
|
||||
self.retriever = retriever or SkillEmbeddingRetriever()
|
||||
|
||||
async def assemble(
|
||||
self,
|
||||
*,
|
||||
task_description: str,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
embedding_runtime: ProviderRuntime | None = None,
|
||||
top_k: int = 12,
|
||||
) -> SkillAssemblyResult:
|
||||
candidates = self.loader.build_selection_candidates()
|
||||
if not candidates:
|
||||
return SkillAssemblyResult()
|
||||
candidates = await self.retriever.retrieve(
|
||||
query=task_description,
|
||||
candidates=candidates,
|
||||
top_k=top_k,
|
||||
api_key=embedding_runtime.api_key if embedding_runtime is not None else None,
|
||||
api_base=embedding_runtime.api_base if embedding_runtime is not None else None,
|
||||
model=embedding_runtime.model if embedding_runtime is not None else None,
|
||||
)
|
||||
if not candidates:
|
||||
return SkillAssemblyResult()
|
||||
|
||||
selected_names = await self._select_skill_names(
|
||||
task_description=task_description,
|
||||
candidates=candidates,
|
||||
provider=provider,
|
||||
model=model,
|
||||
)
|
||||
if not selected_names:
|
||||
return SkillAssemblyResult()
|
||||
|
||||
activated_skills: list[SkillContext] = []
|
||||
for name in selected_names:
|
||||
raw_content = self.loader.load_skill(name)
|
||||
content = strip_frontmatter(raw_content).strip() if raw_content else ""
|
||||
if not content:
|
||||
continue
|
||||
activated_skills.append(SkillContext(name=name, content=content))
|
||||
|
||||
return SkillAssemblyResult(activated_skills=activated_skills)
|
||||
|
||||
async def _select_skill_names(
|
||||
self,
|
||||
*,
|
||||
task_description: str,
|
||||
candidates: list[dict[str, str]],
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
) -> list[str]:
|
||||
candidate_summary = self._render_candidates(candidates)
|
||||
candidate_names = {item["name"] for item in candidates}
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You select Beaver skills for a single run. "
|
||||
"Given a task description and candidate skill summaries, "
|
||||
"return only a JSON array of skill names to activate. "
|
||||
"Do not invent names. If nothing matches, return []."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"Task description:\n{task_description}\n\n"
|
||||
f"Candidate skills:\n{candidate_summary}\n\n"
|
||||
"Return only JSON, for example: [\"skill-a\", \"skill-b\"]"
|
||||
),
|
||||
},
|
||||
]
|
||||
response = await provider.chat(
|
||||
messages=messages,
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=512,
|
||||
temperature=0,
|
||||
)
|
||||
if response.finish_reason == "error" or not response.content:
|
||||
return []
|
||||
|
||||
parsed = self._parse_selected_names(response.content)
|
||||
if not parsed:
|
||||
return []
|
||||
|
||||
# 只保留当前候选集中真实存在的 skill 名称,并维持模型输出顺序。
|
||||
filtered: list[str] = []
|
||||
for name in parsed:
|
||||
if name in candidate_names and name not in filtered:
|
||||
filtered.append(name)
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def _render_candidates(candidates: list[dict[str, str]]) -> str:
|
||||
lines: list[str] = []
|
||||
for item in candidates:
|
||||
lines.append(f"- {item['name']}: {item['description']}")
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _parse_selected_names(content: str) -> list[str]:
|
||||
cleaned = content.strip()
|
||||
if cleaned.startswith("```"):
|
||||
lines = cleaned.splitlines()
|
||||
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
||||
cleaned = "\n".join(lines[1:-1]).strip()
|
||||
|
||||
try:
|
||||
payload: Any = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
|
||||
if isinstance(payload, dict):
|
||||
for key in ("skills", "selected_skills", "activated_skills", "selected"):
|
||||
value = payload.get(key)
|
||||
if isinstance(value, list):
|
||||
payload = value
|
||||
break
|
||||
|
||||
if not isinstance(payload, list):
|
||||
return []
|
||||
return [item.strip() for item in payload if isinstance(item, str) and item.strip()]
|
||||
2
app-instance/backend/beaver/skills/builtin/__init__.py
Normal file
2
app-instance/backend/beaver/skills/builtin/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
"""Built-in skill payloads."""
|
||||
|
||||
5
app-instance/backend/beaver/skills/catalog/__init__.py
Normal file
5
app-instance/backend/beaver/skills/catalog/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Skill catalog and indexing."""
|
||||
|
||||
from .loader import SkillRecord, SkillsLoader
|
||||
|
||||
__all__ = ["SkillRecord", "SkillsLoader"]
|
||||
281
app-instance/backend/beaver/skills/catalog/loader.py
Normal file
281
app-instance/backend/beaver/skills/catalog/loader.py
Normal file
@ -0,0 +1,281 @@
|
||||
"""Beaver skills catalog loader。
|
||||
|
||||
第一版目标非常明确:
|
||||
|
||||
1. 扫描技能目录
|
||||
2. 读取 `SKILL.md`
|
||||
3. 解析前置元数据
|
||||
4. 生成可注入上下文的正文与索引
|
||||
|
||||
这层不负责:
|
||||
1. 动态选择本轮应该启用哪些 skill
|
||||
2. skill review / publishing
|
||||
3. skill 自动学习
|
||||
|
||||
这些决策属于 resolver 或更高层工作流。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .utils import (
|
||||
check_requirements,
|
||||
escape_xml,
|
||||
get_missing_requirements,
|
||||
parse_frontmatter,
|
||||
parse_skill_metadata_blob,
|
||||
strip_frontmatter,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillRecord:
|
||||
"""单个 skill 的目录级元数据。"""
|
||||
|
||||
name: str
|
||||
path: Path
|
||||
source: str
|
||||
|
||||
|
||||
class SkillsLoader:
|
||||
"""从 workspace/builtin 目录中发现并读取 skills。"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
workspace: str | Path,
|
||||
*,
|
||||
builtin_skills_dir: str | Path | None = None,
|
||||
extra_dirs: list[str | Path] | None = None,
|
||||
) -> None:
|
||||
self.workspace = Path(workspace)
|
||||
self.workspace_skills = self.workspace / "skills"
|
||||
self.builtin_skills = Path(builtin_skills_dir) if builtin_skills_dir is not None else Path(__file__).resolve().parent.parent / "builtin"
|
||||
self.extra_dirs = [Path(item) for item in (extra_dirs or [])]
|
||||
|
||||
def list_skills(self, *, filter_unavailable: bool = True) -> list[SkillRecord]:
|
||||
"""列出当前可见的 skills。
|
||||
|
||||
优先级:
|
||||
1. workspace
|
||||
2. extra/plugin 目录
|
||||
3. builtin
|
||||
|
||||
重名 skill 只保留优先级更高的那一个。
|
||||
"""
|
||||
|
||||
ordered_roots: list[tuple[str, Path]] = [
|
||||
("workspace", self.workspace_skills),
|
||||
*[("plugin", path) for path in self.extra_dirs],
|
||||
("builtin", self.builtin_skills),
|
||||
]
|
||||
found: dict[str, SkillRecord] = {}
|
||||
|
||||
for source, root in ordered_roots:
|
||||
if not root.exists():
|
||||
continue
|
||||
for skill_dir in root.iterdir():
|
||||
skill_file = skill_dir / "SKILL.md"
|
||||
if not skill_dir.is_dir() or not skill_file.exists():
|
||||
continue
|
||||
name = skill_dir.name
|
||||
if name in found:
|
||||
continue
|
||||
record = SkillRecord(name=name, path=skill_file, source=source)
|
||||
if filter_unavailable and not self._record_available(record):
|
||||
continue
|
||||
found[name] = record
|
||||
return list(found.values())
|
||||
|
||||
def load_skill(self, name: str) -> str | None:
|
||||
"""按名称加载 skill 原始内容。"""
|
||||
|
||||
record = self._find_record(name)
|
||||
if record is None:
|
||||
return None
|
||||
return record.path.read_text(encoding="utf-8")
|
||||
|
||||
def get_skill_record(self, name: str) -> SkillRecord | None:
|
||||
"""按名称返回 skill record。"""
|
||||
|
||||
return self._find_record(name)
|
||||
|
||||
def get_skill_metadata(self, name: str) -> dict[str, Any] | None:
|
||||
"""读取 skill frontmatter 元数据。"""
|
||||
|
||||
content = self.load_skill(name)
|
||||
if content is None:
|
||||
return None
|
||||
metadata, _ = parse_frontmatter(content)
|
||||
return metadata
|
||||
|
||||
def load_skills_for_context(self, skill_names: list[str]) -> str:
|
||||
"""加载指定 skills 的正文,并整理成上下文块。"""
|
||||
|
||||
sections: list[str] = []
|
||||
for name in skill_names:
|
||||
content = self.load_skill(name)
|
||||
if not content:
|
||||
continue
|
||||
body = strip_frontmatter(content).strip()
|
||||
if not body:
|
||||
continue
|
||||
sections.append(f"## {name}\n\n{body}")
|
||||
return "\n\n".join(sections)
|
||||
|
||||
def build_skills_summary(self) -> str:
|
||||
"""构建可注入 system prompt 的 skills index。
|
||||
|
||||
虽然函数名还沿用 `summary`,但当前语义已经更接近 Hermes 的 skills index:
|
||||
- 这里只告诉模型“系统里有哪些 skill 可用”
|
||||
- 不负责把 skill 正文塞进 system prompt
|
||||
- 真正激活的 skill 正文由 resolver/builder 走显式消息注入
|
||||
"""
|
||||
|
||||
skills = self.list_skills(filter_unavailable=False)
|
||||
if not skills:
|
||||
return ""
|
||||
|
||||
lines = ["<skills>"]
|
||||
for record in skills:
|
||||
frontmatter = self.get_skill_metadata(record.name) or {}
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
available = check_requirements(meta_blob)
|
||||
description = frontmatter.get("description") or record.name
|
||||
load_hint = f'Use skill_view(name="{record.name}") to load the full skill.'
|
||||
lines.append(f' <skill available="{str(available).lower()}">')
|
||||
lines.append(f" <name>{escape_xml(record.name)}</name>")
|
||||
lines.append(f" <description>{escape_xml(description)}</description>")
|
||||
lines.append(f" <load_hint>{escape_xml(load_hint)}</load_hint>")
|
||||
support_files = self.list_skill_supporting_files(record.name)
|
||||
if support_files:
|
||||
lines.append(" <supporting_files>")
|
||||
for file_path in support_files[:12]:
|
||||
lines.append(f" <file>{escape_xml(file_path)}</file>")
|
||||
if len(support_files) > 12:
|
||||
lines.append(" <file>...additional files omitted...</file>")
|
||||
lines.append(" </supporting_files>")
|
||||
if not available:
|
||||
missing = get_missing_requirements(meta_blob)
|
||||
if missing:
|
||||
lines.append(f" <requires>{escape_xml(missing)}</requires>")
|
||||
lines.append(" </skill>")
|
||||
lines.append("</skills>")
|
||||
return "\n".join(lines)
|
||||
|
||||
def build_selection_candidates(self) -> list[dict[str, str]]:
|
||||
"""构建给 LLM selector 使用的候选 skill 摘要。
|
||||
|
||||
这里刻意保持精简,只给:
|
||||
- `name`
|
||||
- `description`
|
||||
|
||||
选择器的任务只是“从候选里挑名字”,不是直接阅读完整 skill 正文。
|
||||
真正激活后的 skill 正文仍然在后续阶段按需加载。
|
||||
"""
|
||||
|
||||
candidates: list[dict[str, str]] = []
|
||||
for record in self.list_skills(filter_unavailable=True):
|
||||
frontmatter = self.get_skill_metadata(record.name) or {}
|
||||
description = str(frontmatter.get("description") or "").strip()
|
||||
if not description:
|
||||
raw_content = self.load_skill(record.name) or ""
|
||||
body = strip_frontmatter(raw_content).strip()
|
||||
if body:
|
||||
description = " ".join(body.splitlines()[:3])[:240].strip()
|
||||
candidates.append(
|
||||
{
|
||||
"name": record.name,
|
||||
"description": description or record.name,
|
||||
}
|
||||
)
|
||||
return candidates
|
||||
|
||||
def list_skill_supporting_files(self, name: str) -> list[str]:
|
||||
"""列出 skill 目录下可按需查看的支持文件相对路径。"""
|
||||
|
||||
record = self._find_record(name)
|
||||
if record is None:
|
||||
return []
|
||||
skill_dir = record.path.parent
|
||||
results: list[str] = []
|
||||
for subdir in ("references", "templates", "scripts", "assets"):
|
||||
root = skill_dir / subdir
|
||||
if not root.exists():
|
||||
continue
|
||||
for file in sorted(root.rglob("*")):
|
||||
if file.is_file() and not file.is_symlink():
|
||||
results.append(str(file.relative_to(skill_dir)))
|
||||
return results
|
||||
|
||||
def view_skill(self, name: str, file_path: str | None = None) -> tuple[str, str] | None:
|
||||
"""读取 skill 正文或其支持文件。
|
||||
|
||||
返回 `(display_name, content)`:
|
||||
- `display_name` 用于提示当前读取的是 skill 本体还是某个支持文件
|
||||
- `content` 为实际文本内容
|
||||
"""
|
||||
|
||||
record = self._find_record(name)
|
||||
if record is None:
|
||||
return None
|
||||
if not self._record_available(record):
|
||||
frontmatter = self.get_skill_metadata(name) or {}
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
missing = get_missing_requirements(meta_blob)
|
||||
detail = f" Missing requirements: {missing}." if missing else ""
|
||||
raise ValueError(f"Skill '{name}' is currently unavailable.{detail}")
|
||||
|
||||
skill_dir = record.path.parent
|
||||
if not file_path:
|
||||
return ("SKILL.md", self._read_text_file(record.path, display_name="SKILL.md"))
|
||||
|
||||
candidate = (skill_dir / file_path).resolve()
|
||||
try:
|
||||
candidate.relative_to(skill_dir.resolve())
|
||||
except ValueError as exc:
|
||||
raise ValueError("Requested skill file must stay within the skill directory") from exc
|
||||
if not candidate.exists() or not candidate.is_file():
|
||||
raise FileNotFoundError(f"Skill file '{file_path}' does not exist")
|
||||
display_name = str(candidate.relative_to(skill_dir))
|
||||
return (display_name, self._read_text_file(candidate, display_name=display_name))
|
||||
|
||||
def get_always_skills(self) -> list[str]:
|
||||
"""返回标记为 always 的可用 skill 名称。"""
|
||||
|
||||
result: list[str] = []
|
||||
for record in self.list_skills(filter_unavailable=True):
|
||||
frontmatter = self.get_skill_metadata(record.name) or {}
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
if meta_blob.get("always") or str(frontmatter.get("always", "")).lower() == "true":
|
||||
result.append(record.name)
|
||||
return result
|
||||
|
||||
def _find_record(self, name: str) -> SkillRecord | None:
|
||||
for record in self.list_skills(filter_unavailable=False):
|
||||
if record.name == name:
|
||||
return record
|
||||
return None
|
||||
|
||||
def _record_available(self, record: SkillRecord) -> bool:
|
||||
content = record.path.read_text(encoding="utf-8")
|
||||
frontmatter, _ = parse_frontmatter(content)
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
return check_requirements(meta_blob)
|
||||
|
||||
@staticmethod
|
||||
def _read_text_file(path: Path, *, display_name: str) -> str:
|
||||
try:
|
||||
return path.read_text(encoding="utf-8")
|
||||
except UnicodeDecodeError as exc:
|
||||
raise ValueError(
|
||||
f"Skill file '{display_name}' is not UTF-8 text and cannot be viewed with skill_view."
|
||||
) from exc
|
||||
|
||||
def _skill_available(self, name: str) -> bool:
|
||||
record = self._find_record(name)
|
||||
if record is None:
|
||||
return False
|
||||
return self._record_available(record)
|
||||
122
app-instance/backend/beaver/skills/catalog/utils.py
Normal file
122
app-instance/backend/beaver/skills/catalog/utils.py
Normal file
@ -0,0 +1,122 @@
|
||||
"""Skills catalog 的公共辅助函数。
|
||||
|
||||
这里专门放“解析和校验 skill 文件”的纯函数,避免 `loader.py` 里同时承担:
|
||||
|
||||
1. 目录扫描
|
||||
2. frontmatter 解析
|
||||
3. requirements 校验
|
||||
4. 文本裁剪/格式化
|
||||
|
||||
把这些细节拆出来之后,skills catalog 的边界会更清楚,后面无论是 reviews、publisher
|
||||
还是 runtime resolver,都可以复用同一套元数据解析规则。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from typing import Any
|
||||
|
||||
|
||||
def parse_frontmatter(content: str) -> tuple[dict[str, str], str]:
|
||||
"""解析 Markdown 文件顶部的极简 frontmatter。
|
||||
|
||||
当前先只支持最常见的:
|
||||
|
||||
```md
|
||||
---
|
||||
key: value
|
||||
key2: value2
|
||||
---
|
||||
body...
|
||||
```
|
||||
|
||||
这样足够支撑第一版 skills runtime,不提前把 YAML 解析器引进来。
|
||||
"""
|
||||
|
||||
if not content.startswith("---"):
|
||||
return {}, content
|
||||
|
||||
match = re.match(r"^---\n(.*?)\n---\n?", content, re.DOTALL)
|
||||
if match is None:
|
||||
return {}, content
|
||||
|
||||
metadata: dict[str, str] = {}
|
||||
for line in match.group(1).splitlines():
|
||||
if ":" not in line:
|
||||
continue
|
||||
key, value = line.split(":", 1)
|
||||
metadata[key.strip()] = value.strip().strip('"\'')
|
||||
body = content[match.end():].strip()
|
||||
return metadata, body
|
||||
|
||||
|
||||
def strip_frontmatter(content: str) -> str:
|
||||
"""去掉 frontmatter,只保留 skill 正文。"""
|
||||
|
||||
_, body = parse_frontmatter(content)
|
||||
return body
|
||||
|
||||
|
||||
def parse_skill_metadata_blob(raw: str) -> dict[str, Any]:
|
||||
"""解析 metadata 字段里的 JSON 扩展配置。
|
||||
|
||||
为了兼容旧 nanobot 习惯,这里同时支持:
|
||||
- `nanobot`
|
||||
- `openclaw`
|
||||
|
||||
第一版主要关心的字段有:
|
||||
- `always`
|
||||
- `requires`
|
||||
"""
|
||||
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return {}
|
||||
|
||||
if not isinstance(data, dict):
|
||||
return {}
|
||||
nested = data.get("nanobot", data.get("openclaw", data))
|
||||
return nested if isinstance(nested, dict) else {}
|
||||
|
||||
|
||||
def check_requirements(metadata: dict[str, Any]) -> bool:
|
||||
"""检查 skill 的最小 requirements 是否满足。"""
|
||||
|
||||
requires = metadata.get("requires", {})
|
||||
if not isinstance(requires, dict):
|
||||
return True
|
||||
|
||||
for binary in requires.get("bins", []):
|
||||
if not shutil.which(str(binary)):
|
||||
return False
|
||||
for env_name in requires.get("env", []):
|
||||
if not os.environ.get(str(env_name)):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_missing_requirements(metadata: dict[str, Any]) -> str:
|
||||
"""返回缺失 requirements 的简短描述。"""
|
||||
|
||||
requires = metadata.get("requires", {})
|
||||
if not isinstance(requires, dict):
|
||||
return ""
|
||||
|
||||
missing: list[str] = []
|
||||
for binary in requires.get("bins", []):
|
||||
if not shutil.which(str(binary)):
|
||||
missing.append(f"CLI: {binary}")
|
||||
for env_name in requires.get("env", []):
|
||||
if not os.environ.get(str(env_name)):
|
||||
missing.append(f"ENV: {env_name}")
|
||||
return ", ".join(missing)
|
||||
|
||||
|
||||
def escape_xml(value: str) -> str:
|
||||
"""给 skills summary 做最小 XML 转义。"""
|
||||
|
||||
return value.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
2
app-instance/backend/beaver/skills/drafts/__init__.py
Normal file
2
app-instance/backend/beaver/skills/drafts/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
"""Draft skills generated before review."""
|
||||
|
||||
2
app-instance/backend/beaver/skills/publisher/__init__.py
Normal file
2
app-instance/backend/beaver/skills/publisher/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
"""Skill publishing and version switching."""
|
||||
|
||||
5
app-instance/backend/beaver/skills/resolver/__init__.py
Normal file
5
app-instance/backend/beaver/skills/resolver/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Runtime skill resolution."""
|
||||
|
||||
from .runtime import ResolvedSkillSet, RuntimeSkillResolver
|
||||
|
||||
__all__ = ["ResolvedSkillSet", "RuntimeSkillResolver"]
|
||||
50
app-instance/backend/beaver/skills/resolver/runtime.py
Normal file
50
app-instance/backend/beaver/skills/resolver/runtime.py
Normal file
@ -0,0 +1,50 @@
|
||||
"""Runtime skill resolver。
|
||||
|
||||
这层负责回答一个运行时问题:
|
||||
“这一次调用,哪些 skill 要被激活,并以什么形式注入上下文?”
|
||||
|
||||
第一版保持保守,只综合三类来源:
|
||||
1. `always` skills
|
||||
|
||||
不在这里做复杂的语义匹配或自动推荐。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.skills.catalog.loader import SkillsLoader
|
||||
from beaver.skills.catalog.utils import strip_frontmatter
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ResolvedSkillSet:
|
||||
"""一次运行最终解析出的 skills 结果。"""
|
||||
|
||||
activated_skills: list[SkillContext] = field(default_factory=list)
|
||||
|
||||
|
||||
class RuntimeSkillResolver:
|
||||
"""把 profile/request 转成当前轮次真正激活的 skill 集合。"""
|
||||
|
||||
def __init__(self, loader: SkillsLoader) -> None:
|
||||
self.loader = loader
|
||||
|
||||
def resolve(
|
||||
self,
|
||||
) -> ResolvedSkillSet:
|
||||
selected: list[str] = []
|
||||
for name in self.loader.get_always_skills():
|
||||
if name not in selected:
|
||||
selected.append(name)
|
||||
|
||||
activated_skills: list[SkillContext] = []
|
||||
for name in selected:
|
||||
raw_content = self.loader.load_skill(name)
|
||||
content = strip_frontmatter(raw_content).strip() if raw_content else ""
|
||||
if not content:
|
||||
continue
|
||||
activated_skills.append(SkillContext(name=name, content=content))
|
||||
|
||||
return ResolvedSkillSet(activated_skills=activated_skills)
|
||||
2
app-instance/backend/beaver/skills/reviews/__init__.py
Normal file
2
app-instance/backend/beaver/skills/reviews/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
"""Skill review workflow."""
|
||||
|
||||
Reference in New Issue
Block a user