Files
beaver_project/app-instance/backend/beaver/services/skill_migration.py
steven_li 30ab74ffb2 feat(engine): 添加MCP连接管理和工具集成功能
- 集成MCP连接管理器,支持MCP服务器连接
- 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、
  PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、
  TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等
- 实现工具注册和装配功能
- 添加技能选择上下文参数
- 支持思考模式控制参数thinking_enabled

feat(coordinator): 重构任务执行计划器参数命名

- 将learning_candidate_enabled重命名为allow_candidate_generation
- 更新TeamGraphScheduler中的参数传递
- 修改LocalAgentRunner中的相关参数处理
- 更新README文档中的相应描述

refactor(context): 标准化工具调用参数格式

- 添加_json导入用于参数序列化
- 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷
- 修复工具调用中参数非字符串类型的序列化问题

refactor(session): 优化消息历史记录过滤逻辑

- 修改get_messages_as_conversation为基于运行状态过滤消息
- 排除未完成、失败或错误结束的运行记录
- 改进对话历史的可见性控制机制

fix(store): 修复FTS索引重建逻辑

- 添加异常处理防止FTS索引创建失败
- 实现_rebuild_fts_index方法重新构建全文搜索索引
- 优化索引触发器和表的维护流程
2026-05-14 09:43:48 +08:00

209 lines
8.8 KiB
Python

"""Import legacy and staged skills into the Beaver SkillSpecStore."""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timezone
import io
import json
import re
import zipfile
from pathlib import Path
from typing import Any
from beaver.skills.catalog.utils import parse_frontmatter, strip_frontmatter
from beaver.skills.specs import SkillSpec, SkillSpecStore, SkillVersion
from beaver.skills.specs.serialization import canonical_hash, normalize_frontmatter, summarize_skill_content
@dataclass(slots=True)
class SkillMigrationService:
store: SkillSpecStore
repo_root: Path | None = None
def migrate_all(self) -> dict[str, Any]:
included: list[dict[str, Any]] = []
skipped: list[dict[str, Any]] = []
for path in self._backend_old_skills():
self._migrate_skill_file(path, "backend-old", included, skipped)
for path in self._staged_skills():
self._migrate_skill_file(path, "stevenli-staged", included, skipped)
for path in self._skill_zips():
self._migrate_zip(path, included, skipped)
manifest = {
"generated_at": _now(),
"workspace": str(self.store.workspace),
"included": included,
"skipped": skipped,
}
manifest_path = self.store.workspace / "skill_migration_manifest.json"
manifest_path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
return manifest
def _backend_old_skills(self) -> list[Path]:
root = self._repo_root() / "app-instance" / "backend-old" / "nanobot" / "skills"
if not root.exists():
return []
return sorted(root.glob("*/SKILL.md"))
def _staged_skills(self) -> list[Path]:
root = self.store.workspace / "state" / "skill-reviews"
if not root.exists():
return []
return sorted(root.glob("*/staged/*/SKILL.md"))
def _skill_zips(self) -> list[Path]:
root = self.store.workspace / "skills"
if not root.exists():
return []
return sorted(root.glob("*.zip"))
def _repo_root(self) -> Path:
if self.repo_root is not None:
return self.repo_root
return Path(__file__).resolve().parents[4]
def _migrate_skill_file(self, path: Path, source: str, included: list[dict[str, Any]], skipped: list[dict[str, Any]]) -> None:
try:
content = path.read_text(encoding="utf-8")
result = self._publish_content(content, source=source, source_path=str(path))
included.append(result)
except Exception as exc:
skipped.append({"source": source, "source_path": str(path), "reason": str(exc)})
def _migrate_zip(self, path: Path, included: list[dict[str, Any]], skipped: list[dict[str, Any]]) -> None:
try:
with zipfile.ZipFile(io.BytesIO(path.read_bytes()), "r") as archive:
entries = [info for info in archive.infolist() if not info.is_dir()]
skill_entry = _find_skill_entry(entries)
content = archive.read(skill_entry).decode("utf-8", errors="replace")
result = self._publish_content(content, source="stevenli-zip", source_path=str(path))
skill_name = result["skill_name"]
version = result["version"]
top = Path(skill_entry).parts[0] if len(Path(skill_entry).parts) == 2 else ""
for info in entries:
raw = info.filename.replace("\\", "/")
if raw == skill_entry or raw.startswith("/") or "__MACOSX" in Path(raw).parts:
continue
parts = Path(raw).parts
rel_parts = parts[1:] if top and parts and parts[0] == top else parts
if not rel_parts or any(part in {"", ".", ".."} for part in rel_parts):
continue
target = self.store.root / skill_name / "versions" / version / "/".join(rel_parts)
target.parent.mkdir(parents=True, exist_ok=True)
target.write_bytes(archive.read(info))
included.append(result)
except Exception as exc:
skipped.append({"source": "stevenli-zip", "source_path": str(path), "reason": str(exc)})
def _publish_content(self, content: str, *, source: str, source_path: str) -> dict[str, Any]:
frontmatter, body = parse_frontmatter(content)
skill_name = _safe_name(str(frontmatter.get("name") or Path(source_path).parent.name))
if not skill_name:
raise ValueError("unsafe or missing skill name")
normalized = normalize_frontmatter(
{
**frontmatter,
"name": skill_name,
"description": frontmatter.get("description") or skill_name,
}
)
rendered = _render_skill_content(normalized, body)
content_hash = canonical_hash(rendered)
existing = self.store.read_published_skill(skill_name)
if existing is not None and existing.version.content_hash == content_hash:
return {
"status": "unchanged",
"skill_name": skill_name,
"version": existing.version.version,
"source": source,
"source_path": source_path,
}
version_id = self._next_version(skill_name)
now = _now()
skill_version = SkillVersion(
skill_name=skill_name,
version=version_id,
content_hash=content_hash,
summary_hash=canonical_hash(strip_frontmatter(rendered).strip()),
created_at=now,
created_by="migration",
change_reason=f"Import skill from {source}",
parent_version=existing.version.version if existing is not None else None,
review_state="published",
frontmatter=normalized,
summary=summarize_skill_content(body),
tool_hints=self.store._extract_tool_hints(normalized),
provenance={"source": source, "source_path": source_path, "imported_at": now},
)
self.store.write_skill_version(skill_version, rendered)
spec = self.store.get_skill_spec(skill_name) or SkillSpec(
name=skill_name,
display_name=skill_name,
description=str(normalized.get("description") or skill_name),
created_at=now,
updated_at=now,
current_version=version_id,
status="active",
tags=[],
owners=["migration"],
source_kind=source,
lineage=[],
)
spec.current_version = version_id
spec.updated_at = now
spec.status = "active"
spec.source_kind = source
if "migration" not in spec.owners:
spec.owners.append("migration")
self.store.write_skill_spec(spec)
self.store.set_current_version(skill_name, version_id)
published = self.store.read_index("published")
if skill_name not in published:
published.append(skill_name)
self.store.update_index("published", published)
return {"status": "included", "skill_name": skill_name, "version": version_id, "source": source, "source_path": source_path}
def _next_version(self, skill_name: str) -> str:
versions = [item for item in self.store.list_versions(skill_name) if item.startswith("v")]
numbers = [int(item[1:]) for item in versions if item[1:].isdigit()]
return f"v{(max(numbers) if numbers else 0) + 1:04d}"
def _find_skill_entry(entries: list[zipfile.ZipInfo]) -> str:
candidates = []
for info in entries:
raw = info.filename.replace("\\", "/")
parts = Path(raw).parts
if raw.startswith("/") or any(part in {"", ".", ".."} for part in parts):
raise ValueError(f"unsafe archive entry: {info.filename}")
if parts and parts[-1] == "SKILL.md" and len(parts) in (1, 2):
candidates.append(raw)
if not candidates:
raise ValueError("zip has no root SKILL.md")
return candidates[0]
def _safe_name(value: str) -> str:
cleaned = value.strip().replace(" ", "-")
if not cleaned or cleaned in {".", ".."} or "/" in cleaned or "\\" in cleaned:
return ""
return cleaned if re.fullmatch(r"[A-Za-z0-9_.-]+", cleaned) else ""
def _render_skill_content(frontmatter: dict[str, Any], body: str) -> str:
lines = ["---"]
for key, value in normalize_frontmatter(frontmatter).items():
if isinstance(value, list):
lines.append(f"{key}:")
for item in value:
lines.append(f" - {item}")
else:
lines.append(f"{key}: {value}")
lines.extend(["---", "", body.strip()])
return "\n".join(lines).rstrip() + "\n"
def _now() -> str:
return datetime.now(timezone.utc).isoformat()