diff --git a/app-instance/backend/beaver/plugins/transaction.py b/app-instance/backend/beaver/plugins/transaction.py new file mode 100644 index 0000000..b1fff59 --- /dev/null +++ b/app-instance/backend/beaver/plugins/transaction.py @@ -0,0 +1,48 @@ +"""Same-filesystem staging for plugin skill writes.""" + +from __future__ import annotations + +import filecmp +import os +from pathlib import Path +import shutil +from uuid import uuid4 + + +class PluginSkillTransaction: + def __init__(self, workspace: str | Path) -> None: + self.workspace = Path(workspace) + self.transaction_id = uuid4().hex + self.root = self.workspace / ".beaver" / "staging" / "plugin-skills" / self.transaction_id + self.root.mkdir(parents=True, exist_ok=True) + + def stage_upstream_snapshot(self, skill_name: str, source_id: str, tree_hash: str) -> Path: + path = self.root / "upstreams" / skill_name / source_id / tree_hash + path.mkdir(parents=True, exist_ok=True) + return path + + def stage_skill_version(self, skill_name: str, version: str) -> Path: + path = self.root / "versions" / skill_name / version + path.mkdir(parents=True, exist_ok=True) + return path + + def promote_directory(self, staged: Path, final: Path) -> None: + if final.exists(): + if _directories_identical(staged, final): + return + raise ValueError(f"Immutable directory already exists with different content: {final}") + final.parent.mkdir(parents=True, exist_ok=True) + os.replace(staged, final) + + def cleanup(self) -> None: + shutil.rmtree(self.root, ignore_errors=True) + + +def _directories_identical(left: Path, right: Path) -> bool: + comparison = filecmp.dircmp(left, right) + if comparison.left_only or comparison.right_only or comparison.funny_files: + return False + for filename in comparison.common_files: + if not filecmp.cmp(left / filename, right / filename, shallow=False): + return False + return all(_directories_identical(left / name, right / name) for name in comparison.common_dirs) diff --git a/app-instance/backend/beaver/skills/specs/__init__.py b/app-instance/backend/beaver/skills/specs/__init__.py index 45c6331..0919984 100644 --- a/app-instance/backend/beaver/skills/specs/__init__.py +++ b/app-instance/backend/beaver/skills/specs/__init__.py @@ -7,9 +7,10 @@ from .models import ( SkillReviewState, SkillSpec, SkillStatus, + SkillUpstreamSnapshot, SkillVersion, ) -from .storage import SkillSpecStore +from .storage import LoadedSkillUpstreamSnapshot, SkillSpecStore __all__ = [ "SkillActivationReceipt", @@ -19,5 +20,7 @@ __all__ = [ "SkillSpec", "SkillSpecStore", "SkillStatus", + "SkillUpstreamSnapshot", "SkillVersion", + "LoadedSkillUpstreamSnapshot", ] diff --git a/app-instance/backend/beaver/skills/specs/models.py b/app-instance/backend/beaver/skills/specs/models.py index 34bcd3d..1172b55 100644 --- a/app-instance/backend/beaver/skills/specs/models.py +++ b/app-instance/backend/beaver/skills/specs/models.py @@ -84,6 +84,7 @@ class SkillVersion: summary: str = "" tool_hints: list[str] = field(default_factory=list) provenance: dict[str, Any] = field(default_factory=dict) + tree_hash: str = "" def to_dict(self) -> dict[str, Any]: return { @@ -100,6 +101,7 @@ class SkillVersion: "summary": self.summary, "tool_hints": list(self.tool_hints), "provenance": dict(self.provenance), + "tree_hash": self.tree_hash, } @classmethod @@ -118,6 +120,48 @@ class SkillVersion: summary=str(payload.get("summary") or ""), tool_hints=_coerce_string_list(payload.get("tool_hints")), provenance=dict(payload.get("provenance") or {}), + tree_hash=str(payload.get("tree_hash") or ""), + ) + + +@dataclass(slots=True) +class SkillUpstreamSnapshot: + skill_name: str + source_kind: str + source_id: str + source_version: str + source_path: str + skill_content_hash: str + skill_tree_hash: str + created_at: str + frontmatter: dict[str, Any] = field(default_factory=dict) + staged_root: Any | None = field(default=None, repr=False, compare=False) + + def to_dict(self) -> dict[str, Any]: + return { + "skill_name": self.skill_name, + "source_kind": self.source_kind, + "source_id": self.source_id, + "source_version": self.source_version, + "source_path": self.source_path, + "skill_content_hash": self.skill_content_hash, + "skill_tree_hash": self.skill_tree_hash, + "created_at": self.created_at, + "frontmatter": dict(self.frontmatter), + } + + @classmethod + def from_dict(cls, payload: dict[str, Any]) -> "SkillUpstreamSnapshot": + return cls( + skill_name=str(payload["skill_name"]), + source_kind=str(payload.get("source_kind") or ""), + source_id=str(payload.get("source_id") or ""), + source_version=str(payload.get("source_version") or ""), + source_path=str(payload.get("source_path") or ""), + skill_content_hash=str(payload.get("skill_content_hash") or ""), + skill_tree_hash=str(payload.get("skill_tree_hash") or ""), + created_at=str(payload.get("created_at") or ""), + frontmatter=dict(payload.get("frontmatter") or {}), ) diff --git a/app-instance/backend/beaver/skills/specs/storage.py b/app-instance/backend/beaver/skills/specs/storage.py index 4530a49..2539b1c 100644 --- a/app-instance/backend/beaver/skills/specs/storage.py +++ b/app-instance/backend/beaver/skills/specs/storage.py @@ -4,12 +4,16 @@ from __future__ import annotations from dataclasses import dataclass import json +import os from pathlib import Path +import shutil from typing import Any +from beaver.plugins.hashing import hash_plugin_skill_tree +from beaver.plugins.transaction import PluginSkillTransaction from beaver.skills.catalog.utils import parse_frontmatter -from .models import SkillDraft, SkillReviewRecord, SkillSpec, SkillVersion +from .models import SkillDraft, SkillReviewRecord, SkillSpec, SkillUpstreamSnapshot, SkillVersion from .serialization import canonical_hash, json_dumps, normalize_frontmatter, summarize_skill_content @@ -19,6 +23,13 @@ class LoadedSkillVersion: content: str +@dataclass(slots=True) +class LoadedSkillUpstreamSnapshot: + snapshot: SkillUpstreamSnapshot + content: str + root: Path + + class SkillSpecStore: """Manage structured skill lifecycle state inside the workspace.""" @@ -155,13 +166,80 @@ class SkillSpecStore: payload = self._read_json(version_file) loaded = SkillVersion.from_dict(payload) content = skill_file.read_text(encoding="utf-8") + if not loaded.tree_hash: + loaded.tree_hash = hash_plugin_skill_tree(version_dir).skill_tree_hash return LoadedSkillVersion(version=loaded, content=content) def write_skill_version(self, version: SkillVersion, content: str) -> None: version_dir = self._skill_dir(version.skill_name) / "versions" / version.version version_dir.mkdir(parents=True, exist_ok=True) - self._write_json(version_dir / "version.json", version.to_dict()) self._write_text(version_dir / "SKILL.md", content) + if not version.tree_hash: + version.tree_hash = hash_plugin_skill_tree(version_dir).skill_tree_hash + self._write_json(version_dir / "version.json", version.to_dict()) + + def stage_upstream_snapshot( + self, + transaction: PluginSkillTransaction, + *, + skill_name: str, + source_kind: str, + source_id: str, + source_version: str, + source_path: str, + source_root: str | Path, + ) -> SkillUpstreamSnapshot: + source = Path(source_root) + digest = hash_plugin_skill_tree(source) + staged_root = transaction.stage_upstream_snapshot(skill_name, source_id, digest.skill_tree_hash) + self._copy_regular_tree(source, staged_root) + content = (staged_root / "SKILL.md").read_text(encoding="utf-8") + frontmatter, _body = parse_frontmatter(content) + snapshot = SkillUpstreamSnapshot( + skill_name=skill_name, + source_kind=source_kind, + source_id=source_id, + source_version=source_version, + source_path=source_path, + skill_content_hash=digest.skill_content_hash, + skill_tree_hash=digest.skill_tree_hash, + created_at=_utc_now(), + frontmatter=normalize_frontmatter(frontmatter), + staged_root=staged_root, + ) + self._write_json(staged_root / "upstream.json", snapshot.to_dict()) + return snapshot + + def promote_upstream_snapshot( + self, + transaction: PluginSkillTransaction, + snapshot: SkillUpstreamSnapshot, + ) -> None: + staged_root = Path(snapshot.staged_root) if snapshot.staged_root is not None else None + final_root = self._upstream_snapshot_dir(snapshot.skill_name, snapshot.source_id, snapshot.skill_tree_hash) + if final_root.exists(): + return + if staged_root is None or not staged_root.exists(): + raise ValueError("Staged upstream snapshot is missing") + transaction.promote_directory(staged_root, final_root) + + def read_upstream_snapshot( + self, + skill_name: str, + source_id: str, + skill_tree_hash: str, + ) -> LoadedSkillUpstreamSnapshot | None: + root = self._upstream_snapshot_dir(skill_name, source_id, skill_tree_hash) + metadata = root / "upstream.json" + skill_file = root / "SKILL.md" + if not metadata.exists() or not skill_file.exists(): + return None + snapshot = SkillUpstreamSnapshot.from_dict(self._read_json(metadata)) + return LoadedSkillUpstreamSnapshot( + snapshot=snapshot, + content=skill_file.read_text(encoding="utf-8"), + root=root, + ) def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]: results: list[SkillDraft] = [] @@ -259,6 +337,9 @@ class SkillSpecStore: def _skill_dir(self, name: str) -> Path: return self.root / name + def _upstream_snapshot_dir(self, skill_name: str, source_id: str, skill_tree_hash: str) -> Path: + return self._skill_dir(skill_name) / "upstreams" / source_id / skill_tree_hash + def _iter_skill_dirs(self) -> list[Path]: return [ child @@ -285,9 +366,41 @@ class SkillSpecStore: @staticmethod def _write_json(path: Path, payload: dict[str, Any]) -> None: path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json_dumps(payload) + "\n", encoding="utf-8") + tmp_path = path.with_name(f"{path.name}.tmp") + with tmp_path.open("w", encoding="utf-8") as handle: + handle.write(json_dumps(payload) + "\n") + handle.flush() + os.fsync(handle.fileno()) + os.replace(tmp_path, path) @staticmethod def _write_text(path: Path, content: str) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(content, encoding="utf-8") + + @staticmethod + def _copy_regular_tree(source_root: Path, target_root: Path) -> None: + source_root = Path(source_root) + target_root = Path(target_root) + for source in sorted(source_root.rglob("*"), key=lambda item: item.relative_to(source_root).as_posix()): + relative = source.relative_to(source_root) + if any(part in {"", ".", ".."} for part in relative.parts): + raise ValueError(f"Invalid path in skill tree: {relative.as_posix()}") + if source.is_symlink(): + raise ValueError(f"Skill tree contains a symlink: {relative.as_posix()}") + target = target_root / relative + if not target.resolve().is_relative_to(target_root.resolve()): + raise ValueError(f"Skill tree copy target escapes root: {relative.as_posix()}") + if source.is_dir(): + target.mkdir(parents=True, exist_ok=True) + continue + if not source.is_file(): + raise ValueError(f"Skill tree contains a non-regular file: {relative.as_posix()}") + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, target) + + +def _utc_now() -> str: + from datetime import datetime, timezone + + return datetime.now(timezone.utc).isoformat() diff --git a/app-instance/backend/tests/unit/test_plugin_skill_storage.py b/app-instance/backend/tests/unit/test_plugin_skill_storage.py new file mode 100644 index 0000000..b78e11f --- /dev/null +++ b/app-instance/backend/tests/unit/test_plugin_skill_storage.py @@ -0,0 +1,174 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from beaver.plugins.transaction import PluginSkillTransaction +from beaver.skills.specs import SkillSpecStore, SkillVersion + + +def _create_source_skill(root: Path, *, template_text: str = "panel") -> Path: + source = root / "plugin" / "skills" / "comic" + source.mkdir(parents=True) + (source / "SKILL.md").write_text("# Comic\n\nOriginal.\n", encoding="utf-8") + (source / "templates").mkdir() + (source / "templates" / "panel.txt").write_text(template_text, encoding="utf-8") + return source + + +def test_write_upstream_snapshot_copies_skill_without_mutating_source(tmp_path: Path) -> None: + source = _create_source_skill(tmp_path) + store = SkillSpecStore(tmp_path / "workspace") + transaction = PluginSkillTransaction(tmp_path / "workspace") + + snapshot = store.stage_upstream_snapshot( + transaction, + skill_name="baoyu-comic", + source_kind="plugin", + source_id="baoyu-comic", + source_version="1.0.0", + source_path="skills/comic", + source_root=source, + ) + store.promote_upstream_snapshot(transaction, snapshot) + + loaded = store.read_upstream_snapshot("baoyu-comic", "baoyu-comic", snapshot.skill_tree_hash) + assert loaded is not None + assert loaded.content == "# Comic\n\nOriginal.\n" + assert (loaded.root / "templates" / "panel.txt").read_text(encoding="utf-8") == "panel" + assert (source / "SKILL.md").read_text(encoding="utf-8") == "# Comic\n\nOriginal.\n" + + +def test_upstream_snapshot_tree_hash_tracks_supporting_files(tmp_path: Path) -> None: + source = _create_source_skill(tmp_path, template_text="v1") + store = SkillSpecStore(tmp_path / "workspace") + first_tx = PluginSkillTransaction(tmp_path / "workspace") + first = store.stage_upstream_snapshot( + first_tx, + skill_name="baoyu-comic", + source_kind="plugin", + source_id="baoyu-comic", + source_version="1.0.0", + source_path="skills/comic", + source_root=source, + ) + store.promote_upstream_snapshot(first_tx, first) + + (source / "templates" / "panel.txt").write_text("v2", encoding="utf-8") + second_tx = PluginSkillTransaction(tmp_path / "workspace") + second = store.stage_upstream_snapshot( + second_tx, + skill_name="baoyu-comic", + source_kind="plugin", + source_id="baoyu-comic", + source_version="1.0.1", + source_path="skills/comic", + source_root=source, + ) + + assert first.skill_content_hash == second.skill_content_hash + assert first.skill_tree_hash != second.skill_tree_hash + + +def test_staged_upstream_snapshot_is_not_visible_until_promoted(tmp_path: Path) -> None: + source = _create_source_skill(tmp_path) + store = SkillSpecStore(tmp_path / "workspace") + transaction = PluginSkillTransaction(tmp_path / "workspace") + + snapshot = store.stage_upstream_snapshot( + transaction, + skill_name="baoyu-comic", + source_kind="plugin", + source_id="baoyu-comic", + source_version="1.0.0", + source_path="skills/comic", + source_root=source, + ) + + assert store.read_upstream_snapshot("baoyu-comic", "baoyu-comic", snapshot.skill_tree_hash) is None + + +def test_promote_upstream_snapshot_is_idempotent_for_identical_snapshot(tmp_path: Path) -> None: + source = _create_source_skill(tmp_path) + store = SkillSpecStore(tmp_path / "workspace") + transaction = PluginSkillTransaction(tmp_path / "workspace") + snapshot = store.stage_upstream_snapshot( + transaction, + skill_name="baoyu-comic", + source_kind="plugin", + source_id="baoyu-comic", + source_version="1.0.0", + source_path="skills/comic", + source_root=source, + ) + + store.promote_upstream_snapshot(transaction, snapshot) + store.promote_upstream_snapshot(transaction, snapshot) + + loaded = store.read_upstream_snapshot("baoyu-comic", "baoyu-comic", snapshot.skill_tree_hash) + assert loaded is not None + assert loaded.snapshot.skill_tree_hash == snapshot.skill_tree_hash + + +def test_stage_upstream_snapshot_rejects_symlinks(tmp_path: Path) -> None: + source = _create_source_skill(tmp_path) + (source / "linked").symlink_to(source / "SKILL.md") + store = SkillSpecStore(tmp_path / "workspace") + transaction = PluginSkillTransaction(tmp_path / "workspace") + + with pytest.raises(ValueError, match="symlink"): + store.stage_upstream_snapshot( + transaction, + skill_name="baoyu-comic", + source_kind="plugin", + source_id="baoyu-comic", + source_version="1.0.0", + source_path="skills/comic", + source_root=source, + ) + + +def test_legacy_skill_version_without_tree_hash_derives_tree_hash_on_read(tmp_path: Path) -> None: + store = SkillSpecStore(tmp_path / "workspace") + version_dir = store.root / "debug" / "versions" / "v0001" + version_dir.mkdir(parents=True) + (version_dir / "SKILL.md").write_text("# Debug\n", encoding="utf-8") + (version_dir / "version.json").write_text( + json.dumps( + { + "skill_name": "debug", + "version": "v0001", + "content_hash": "old", + "summary_hash": "old-summary", + "created_at": "now", + "created_by": "tester", + "change_reason": "legacy", + } + ), + encoding="utf-8", + ) + store.set_current_version("debug", "v0001") + + loaded = store.read_published_skill("debug") + + assert loaded is not None + assert loaded.version.tree_hash.startswith("sha256:") + + +def test_atomic_json_write_does_not_leave_temp_file(tmp_path: Path) -> None: + store = SkillSpecStore(tmp_path / "workspace") + version = SkillVersion( + skill_name="debug", + version="v0001", + content_hash="hash", + summary_hash="summary", + created_at="now", + created_by="tester", + change_reason="test", + ) + + store.write_skill_version(version, "# Debug\n") + + assert not list((store.root / "debug" / "versions" / "v0001").glob("*.tmp"))