"""Import legacy and staged skills into the Beaver SkillSpecStore.""" from __future__ import annotations from dataclasses import dataclass from datetime import datetime, timezone import io import json import re import zipfile from pathlib import Path from typing import Any from beaver.skills.catalog.utils import parse_frontmatter, strip_frontmatter from beaver.skills.specs import SkillSpec, SkillSpecStore, SkillVersion from beaver.skills.specs.serialization import canonical_hash, normalize_frontmatter, summarize_skill_content @dataclass(slots=True) class SkillMigrationService: store: SkillSpecStore repo_root: Path | None = None def migrate_all(self) -> dict[str, Any]: included: list[dict[str, Any]] = [] skipped: list[dict[str, Any]] = [] for path in self._backend_old_skills(): self._migrate_skill_file(path, "backend-old", included, skipped) for path in self._staged_skills(): self._migrate_skill_file(path, "stevenli-staged", included, skipped) for path in self._skill_zips(): self._migrate_zip(path, included, skipped) manifest = { "generated_at": _now(), "workspace": str(self.store.workspace), "included": included, "skipped": skipped, } manifest_path = self.store.workspace / "skill_migration_manifest.json" manifest_path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") return manifest def _backend_old_skills(self) -> list[Path]: root = self._repo_root() / "app-instance" / "backend-old" / "nanobot" / "skills" if not root.exists(): return [] return sorted(root.glob("*/SKILL.md")) def _staged_skills(self) -> list[Path]: root = self.store.workspace / "state" / "skill-reviews" if not root.exists(): return [] return sorted(root.glob("*/staged/*/SKILL.md")) def _skill_zips(self) -> list[Path]: root = self.store.workspace / "skills" if not root.exists(): return [] return sorted(root.glob("*.zip")) def _repo_root(self) -> Path: if self.repo_root is not None: return self.repo_root return Path(__file__).resolve().parents[4] def _migrate_skill_file(self, path: Path, source: str, included: list[dict[str, Any]], skipped: list[dict[str, Any]]) -> None: try: content = path.read_text(encoding="utf-8") result = self._publish_content(content, source=source, source_path=str(path)) included.append(result) except Exception as exc: skipped.append({"source": source, "source_path": str(path), "reason": str(exc)}) def _migrate_zip(self, path: Path, included: list[dict[str, Any]], skipped: list[dict[str, Any]]) -> None: try: with zipfile.ZipFile(io.BytesIO(path.read_bytes()), "r") as archive: entries = [info for info in archive.infolist() if not info.is_dir()] skill_entry = _find_skill_entry(entries) content = archive.read(skill_entry).decode("utf-8", errors="replace") result = self._publish_content(content, source="stevenli-zip", source_path=str(path)) skill_name = result["skill_name"] version = result["version"] top = Path(skill_entry).parts[0] if len(Path(skill_entry).parts) == 2 else "" for info in entries: raw = info.filename.replace("\\", "/") if raw == skill_entry or raw.startswith("/") or "__MACOSX" in Path(raw).parts: continue parts = Path(raw).parts rel_parts = parts[1:] if top and parts and parts[0] == top else parts if not rel_parts or any(part in {"", ".", ".."} for part in rel_parts): continue target = self.store.root / skill_name / "versions" / version / "/".join(rel_parts) target.parent.mkdir(parents=True, exist_ok=True) target.write_bytes(archive.read(info)) included.append(result) except Exception as exc: skipped.append({"source": "stevenli-zip", "source_path": str(path), "reason": str(exc)}) def _publish_content(self, content: str, *, source: str, source_path: str) -> dict[str, Any]: frontmatter, body = parse_frontmatter(content) skill_name = _safe_name(str(frontmatter.get("name") or Path(source_path).parent.name)) if not skill_name: raise ValueError("unsafe or missing skill name") normalized = normalize_frontmatter( { **frontmatter, "name": skill_name, "description": frontmatter.get("description") or skill_name, } ) rendered = _render_skill_content(normalized, body) content_hash = canonical_hash(rendered) existing = self.store.read_published_skill(skill_name) if existing is not None and existing.version.content_hash == content_hash: return { "status": "unchanged", "skill_name": skill_name, "version": existing.version.version, "source": source, "source_path": source_path, } version_id = self._next_version(skill_name) now = _now() skill_version = SkillVersion( skill_name=skill_name, version=version_id, content_hash=content_hash, summary_hash=canonical_hash(strip_frontmatter(rendered).strip()), created_at=now, created_by="migration", change_reason=f"Import skill from {source}", parent_version=existing.version.version if existing is not None else None, review_state="published", frontmatter=normalized, summary=summarize_skill_content(body), tool_hints=self.store._extract_tool_hints(normalized), provenance={"source": source, "source_path": source_path, "imported_at": now}, ) self.store.write_skill_version(skill_version, rendered) spec = self.store.get_skill_spec(skill_name) or SkillSpec( name=skill_name, display_name=skill_name, description=str(normalized.get("description") or skill_name), created_at=now, updated_at=now, current_version=version_id, status="active", tags=[], owners=["migration"], source_kind=source, lineage=[], ) spec.current_version = version_id spec.updated_at = now spec.status = "active" spec.source_kind = source if "migration" not in spec.owners: spec.owners.append("migration") self.store.write_skill_spec(spec) self.store.set_current_version(skill_name, version_id) published = self.store.read_index("published") if skill_name not in published: published.append(skill_name) self.store.update_index("published", published) return {"status": "included", "skill_name": skill_name, "version": version_id, "source": source, "source_path": source_path} def _next_version(self, skill_name: str) -> str: versions = [item for item in self.store.list_versions(skill_name) if item.startswith("v")] numbers = [int(item[1:]) for item in versions if item[1:].isdigit()] return f"v{(max(numbers) if numbers else 0) + 1:04d}" def _find_skill_entry(entries: list[zipfile.ZipInfo]) -> str: candidates = [] for info in entries: raw = info.filename.replace("\\", "/") parts = Path(raw).parts if raw.startswith("/") or any(part in {"", ".", ".."} for part in parts): raise ValueError(f"unsafe archive entry: {info.filename}") if parts and parts[-1] == "SKILL.md" and len(parts) in (1, 2): candidates.append(raw) if not candidates: raise ValueError("zip has no root SKILL.md") return candidates[0] def _safe_name(value: str) -> str: cleaned = value.strip().replace(" ", "-") if not cleaned or cleaned in {".", ".."} or "/" in cleaned or "\\" in cleaned: return "" return cleaned if re.fullmatch(r"[A-Za-z0-9_.-]+", cleaned) else "" def _render_skill_content(frontmatter: dict[str, Any], body: str) -> str: lines = ["---"] for key, value in normalize_frontmatter(frontmatter).items(): if isinstance(value, list): lines.append(f"{key}:") for item in value: lines.append(f" - {item}") else: lines.append(f"{key}: {value}") lines.extend(["---", "", body.strip()]) return "\n".join(lines).rstrip() + "\n" def _now() -> str: return datetime.now(timezone.utc).isoformat()