feat(skills): store immutable plugin upstream snapshots

This commit is contained in:
2026-06-16 11:42:46 +08:00
parent 41b45e0423
commit 094dde0b81
5 changed files with 386 additions and 4 deletions

View File

@ -0,0 +1,48 @@
"""Same-filesystem staging for plugin skill writes."""
from __future__ import annotations
import filecmp
import os
from pathlib import Path
import shutil
from uuid import uuid4
class PluginSkillTransaction:
def __init__(self, workspace: str | Path) -> None:
self.workspace = Path(workspace)
self.transaction_id = uuid4().hex
self.root = self.workspace / ".beaver" / "staging" / "plugin-skills" / self.transaction_id
self.root.mkdir(parents=True, exist_ok=True)
def stage_upstream_snapshot(self, skill_name: str, source_id: str, tree_hash: str) -> Path:
path = self.root / "upstreams" / skill_name / source_id / tree_hash
path.mkdir(parents=True, exist_ok=True)
return path
def stage_skill_version(self, skill_name: str, version: str) -> Path:
path = self.root / "versions" / skill_name / version
path.mkdir(parents=True, exist_ok=True)
return path
def promote_directory(self, staged: Path, final: Path) -> None:
if final.exists():
if _directories_identical(staged, final):
return
raise ValueError(f"Immutable directory already exists with different content: {final}")
final.parent.mkdir(parents=True, exist_ok=True)
os.replace(staged, final)
def cleanup(self) -> None:
shutil.rmtree(self.root, ignore_errors=True)
def _directories_identical(left: Path, right: Path) -> bool:
comparison = filecmp.dircmp(left, right)
if comparison.left_only or comparison.right_only or comparison.funny_files:
return False
for filename in comparison.common_files:
if not filecmp.cmp(left / filename, right / filename, shallow=False):
return False
return all(_directories_identical(left / name, right / name) for name in comparison.common_dirs)

View File

@ -7,9 +7,10 @@ from .models import (
SkillReviewState, SkillReviewState,
SkillSpec, SkillSpec,
SkillStatus, SkillStatus,
SkillUpstreamSnapshot,
SkillVersion, SkillVersion,
) )
from .storage import SkillSpecStore from .storage import LoadedSkillUpstreamSnapshot, SkillSpecStore
__all__ = [ __all__ = [
"SkillActivationReceipt", "SkillActivationReceipt",
@ -19,5 +20,7 @@ __all__ = [
"SkillSpec", "SkillSpec",
"SkillSpecStore", "SkillSpecStore",
"SkillStatus", "SkillStatus",
"SkillUpstreamSnapshot",
"SkillVersion", "SkillVersion",
"LoadedSkillUpstreamSnapshot",
] ]

View File

@ -84,6 +84,7 @@ class SkillVersion:
summary: str = "" summary: str = ""
tool_hints: list[str] = field(default_factory=list) tool_hints: list[str] = field(default_factory=list)
provenance: dict[str, Any] = field(default_factory=dict) provenance: dict[str, Any] = field(default_factory=dict)
tree_hash: str = ""
def to_dict(self) -> dict[str, Any]: def to_dict(self) -> dict[str, Any]:
return { return {
@ -100,6 +101,7 @@ class SkillVersion:
"summary": self.summary, "summary": self.summary,
"tool_hints": list(self.tool_hints), "tool_hints": list(self.tool_hints),
"provenance": dict(self.provenance), "provenance": dict(self.provenance),
"tree_hash": self.tree_hash,
} }
@classmethod @classmethod
@ -118,6 +120,48 @@ class SkillVersion:
summary=str(payload.get("summary") or ""), summary=str(payload.get("summary") or ""),
tool_hints=_coerce_string_list(payload.get("tool_hints")), tool_hints=_coerce_string_list(payload.get("tool_hints")),
provenance=dict(payload.get("provenance") or {}), provenance=dict(payload.get("provenance") or {}),
tree_hash=str(payload.get("tree_hash") or ""),
)
@dataclass(slots=True)
class SkillUpstreamSnapshot:
skill_name: str
source_kind: str
source_id: str
source_version: str
source_path: str
skill_content_hash: str
skill_tree_hash: str
created_at: str
frontmatter: dict[str, Any] = field(default_factory=dict)
staged_root: Any | None = field(default=None, repr=False, compare=False)
def to_dict(self) -> dict[str, Any]:
return {
"skill_name": self.skill_name,
"source_kind": self.source_kind,
"source_id": self.source_id,
"source_version": self.source_version,
"source_path": self.source_path,
"skill_content_hash": self.skill_content_hash,
"skill_tree_hash": self.skill_tree_hash,
"created_at": self.created_at,
"frontmatter": dict(self.frontmatter),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillUpstreamSnapshot":
return cls(
skill_name=str(payload["skill_name"]),
source_kind=str(payload.get("source_kind") or ""),
source_id=str(payload.get("source_id") or ""),
source_version=str(payload.get("source_version") or ""),
source_path=str(payload.get("source_path") or ""),
skill_content_hash=str(payload.get("skill_content_hash") or ""),
skill_tree_hash=str(payload.get("skill_tree_hash") or ""),
created_at=str(payload.get("created_at") or ""),
frontmatter=dict(payload.get("frontmatter") or {}),
) )

View File

@ -4,12 +4,16 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
import json import json
import os
from pathlib import Path from pathlib import Path
import shutil
from typing import Any from typing import Any
from beaver.plugins.hashing import hash_plugin_skill_tree
from beaver.plugins.transaction import PluginSkillTransaction
from beaver.skills.catalog.utils import parse_frontmatter from beaver.skills.catalog.utils import parse_frontmatter
from .models import SkillDraft, SkillReviewRecord, SkillSpec, SkillVersion from .models import SkillDraft, SkillReviewRecord, SkillSpec, SkillUpstreamSnapshot, SkillVersion
from .serialization import canonical_hash, json_dumps, normalize_frontmatter, summarize_skill_content from .serialization import canonical_hash, json_dumps, normalize_frontmatter, summarize_skill_content
@ -19,6 +23,13 @@ class LoadedSkillVersion:
content: str content: str
@dataclass(slots=True)
class LoadedSkillUpstreamSnapshot:
snapshot: SkillUpstreamSnapshot
content: str
root: Path
class SkillSpecStore: class SkillSpecStore:
"""Manage structured skill lifecycle state inside the workspace.""" """Manage structured skill lifecycle state inside the workspace."""
@ -155,13 +166,80 @@ class SkillSpecStore:
payload = self._read_json(version_file) payload = self._read_json(version_file)
loaded = SkillVersion.from_dict(payload) loaded = SkillVersion.from_dict(payload)
content = skill_file.read_text(encoding="utf-8") content = skill_file.read_text(encoding="utf-8")
if not loaded.tree_hash:
loaded.tree_hash = hash_plugin_skill_tree(version_dir).skill_tree_hash
return LoadedSkillVersion(version=loaded, content=content) return LoadedSkillVersion(version=loaded, content=content)
def write_skill_version(self, version: SkillVersion, content: str) -> None: def write_skill_version(self, version: SkillVersion, content: str) -> None:
version_dir = self._skill_dir(version.skill_name) / "versions" / version.version version_dir = self._skill_dir(version.skill_name) / "versions" / version.version
version_dir.mkdir(parents=True, exist_ok=True) version_dir.mkdir(parents=True, exist_ok=True)
self._write_json(version_dir / "version.json", version.to_dict())
self._write_text(version_dir / "SKILL.md", content) self._write_text(version_dir / "SKILL.md", content)
if not version.tree_hash:
version.tree_hash = hash_plugin_skill_tree(version_dir).skill_tree_hash
self._write_json(version_dir / "version.json", version.to_dict())
def stage_upstream_snapshot(
self,
transaction: PluginSkillTransaction,
*,
skill_name: str,
source_kind: str,
source_id: str,
source_version: str,
source_path: str,
source_root: str | Path,
) -> SkillUpstreamSnapshot:
source = Path(source_root)
digest = hash_plugin_skill_tree(source)
staged_root = transaction.stage_upstream_snapshot(skill_name, source_id, digest.skill_tree_hash)
self._copy_regular_tree(source, staged_root)
content = (staged_root / "SKILL.md").read_text(encoding="utf-8")
frontmatter, _body = parse_frontmatter(content)
snapshot = SkillUpstreamSnapshot(
skill_name=skill_name,
source_kind=source_kind,
source_id=source_id,
source_version=source_version,
source_path=source_path,
skill_content_hash=digest.skill_content_hash,
skill_tree_hash=digest.skill_tree_hash,
created_at=_utc_now(),
frontmatter=normalize_frontmatter(frontmatter),
staged_root=staged_root,
)
self._write_json(staged_root / "upstream.json", snapshot.to_dict())
return snapshot
def promote_upstream_snapshot(
self,
transaction: PluginSkillTransaction,
snapshot: SkillUpstreamSnapshot,
) -> None:
staged_root = Path(snapshot.staged_root) if snapshot.staged_root is not None else None
final_root = self._upstream_snapshot_dir(snapshot.skill_name, snapshot.source_id, snapshot.skill_tree_hash)
if final_root.exists():
return
if staged_root is None or not staged_root.exists():
raise ValueError("Staged upstream snapshot is missing")
transaction.promote_directory(staged_root, final_root)
def read_upstream_snapshot(
self,
skill_name: str,
source_id: str,
skill_tree_hash: str,
) -> LoadedSkillUpstreamSnapshot | None:
root = self._upstream_snapshot_dir(skill_name, source_id, skill_tree_hash)
metadata = root / "upstream.json"
skill_file = root / "SKILL.md"
if not metadata.exists() or not skill_file.exists():
return None
snapshot = SkillUpstreamSnapshot.from_dict(self._read_json(metadata))
return LoadedSkillUpstreamSnapshot(
snapshot=snapshot,
content=skill_file.read_text(encoding="utf-8"),
root=root,
)
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]: def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
results: list[SkillDraft] = [] results: list[SkillDraft] = []
@ -259,6 +337,9 @@ class SkillSpecStore:
def _skill_dir(self, name: str) -> Path: def _skill_dir(self, name: str) -> Path:
return self.root / name return self.root / name
def _upstream_snapshot_dir(self, skill_name: str, source_id: str, skill_tree_hash: str) -> Path:
return self._skill_dir(skill_name) / "upstreams" / source_id / skill_tree_hash
def _iter_skill_dirs(self) -> list[Path]: def _iter_skill_dirs(self) -> list[Path]:
return [ return [
child child
@ -285,9 +366,41 @@ class SkillSpecStore:
@staticmethod @staticmethod
def _write_json(path: Path, payload: dict[str, Any]) -> None: def _write_json(path: Path, payload: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json_dumps(payload) + "\n", encoding="utf-8") tmp_path = path.with_name(f"{path.name}.tmp")
with tmp_path.open("w", encoding="utf-8") as handle:
handle.write(json_dumps(payload) + "\n")
handle.flush()
os.fsync(handle.fileno())
os.replace(tmp_path, path)
@staticmethod @staticmethod
def _write_text(path: Path, content: str) -> None: def _write_text(path: Path, content: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content, encoding="utf-8") path.write_text(content, encoding="utf-8")
@staticmethod
def _copy_regular_tree(source_root: Path, target_root: Path) -> None:
source_root = Path(source_root)
target_root = Path(target_root)
for source in sorted(source_root.rglob("*"), key=lambda item: item.relative_to(source_root).as_posix()):
relative = source.relative_to(source_root)
if any(part in {"", ".", ".."} for part in relative.parts):
raise ValueError(f"Invalid path in skill tree: {relative.as_posix()}")
if source.is_symlink():
raise ValueError(f"Skill tree contains a symlink: {relative.as_posix()}")
target = target_root / relative
if not target.resolve().is_relative_to(target_root.resolve()):
raise ValueError(f"Skill tree copy target escapes root: {relative.as_posix()}")
if source.is_dir():
target.mkdir(parents=True, exist_ok=True)
continue
if not source.is_file():
raise ValueError(f"Skill tree contains a non-regular file: {relative.as_posix()}")
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(source, target)
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,174 @@
from __future__ import annotations
import json
from pathlib import Path
import pytest
from beaver.plugins.transaction import PluginSkillTransaction
from beaver.skills.specs import SkillSpecStore, SkillVersion
def _create_source_skill(root: Path, *, template_text: str = "panel") -> Path:
source = root / "plugin" / "skills" / "comic"
source.mkdir(parents=True)
(source / "SKILL.md").write_text("# Comic\n\nOriginal.\n", encoding="utf-8")
(source / "templates").mkdir()
(source / "templates" / "panel.txt").write_text(template_text, encoding="utf-8")
return source
def test_write_upstream_snapshot_copies_skill_without_mutating_source(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path)
store = SkillSpecStore(tmp_path / "workspace")
transaction = PluginSkillTransaction(tmp_path / "workspace")
snapshot = store.stage_upstream_snapshot(
transaction,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
store.promote_upstream_snapshot(transaction, snapshot)
loaded = store.read_upstream_snapshot("baoyu-comic", "baoyu-comic", snapshot.skill_tree_hash)
assert loaded is not None
assert loaded.content == "# Comic\n\nOriginal.\n"
assert (loaded.root / "templates" / "panel.txt").read_text(encoding="utf-8") == "panel"
assert (source / "SKILL.md").read_text(encoding="utf-8") == "# Comic\n\nOriginal.\n"
def test_upstream_snapshot_tree_hash_tracks_supporting_files(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path, template_text="v1")
store = SkillSpecStore(tmp_path / "workspace")
first_tx = PluginSkillTransaction(tmp_path / "workspace")
first = store.stage_upstream_snapshot(
first_tx,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
store.promote_upstream_snapshot(first_tx, first)
(source / "templates" / "panel.txt").write_text("v2", encoding="utf-8")
second_tx = PluginSkillTransaction(tmp_path / "workspace")
second = store.stage_upstream_snapshot(
second_tx,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.1",
source_path="skills/comic",
source_root=source,
)
assert first.skill_content_hash == second.skill_content_hash
assert first.skill_tree_hash != second.skill_tree_hash
def test_staged_upstream_snapshot_is_not_visible_until_promoted(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path)
store = SkillSpecStore(tmp_path / "workspace")
transaction = PluginSkillTransaction(tmp_path / "workspace")
snapshot = store.stage_upstream_snapshot(
transaction,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
assert store.read_upstream_snapshot("baoyu-comic", "baoyu-comic", snapshot.skill_tree_hash) is None
def test_promote_upstream_snapshot_is_idempotent_for_identical_snapshot(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path)
store = SkillSpecStore(tmp_path / "workspace")
transaction = PluginSkillTransaction(tmp_path / "workspace")
snapshot = store.stage_upstream_snapshot(
transaction,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
store.promote_upstream_snapshot(transaction, snapshot)
store.promote_upstream_snapshot(transaction, snapshot)
loaded = store.read_upstream_snapshot("baoyu-comic", "baoyu-comic", snapshot.skill_tree_hash)
assert loaded is not None
assert loaded.snapshot.skill_tree_hash == snapshot.skill_tree_hash
def test_stage_upstream_snapshot_rejects_symlinks(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path)
(source / "linked").symlink_to(source / "SKILL.md")
store = SkillSpecStore(tmp_path / "workspace")
transaction = PluginSkillTransaction(tmp_path / "workspace")
with pytest.raises(ValueError, match="symlink"):
store.stage_upstream_snapshot(
transaction,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
def test_legacy_skill_version_without_tree_hash_derives_tree_hash_on_read(tmp_path: Path) -> None:
store = SkillSpecStore(tmp_path / "workspace")
version_dir = store.root / "debug" / "versions" / "v0001"
version_dir.mkdir(parents=True)
(version_dir / "SKILL.md").write_text("# Debug\n", encoding="utf-8")
(version_dir / "version.json").write_text(
json.dumps(
{
"skill_name": "debug",
"version": "v0001",
"content_hash": "old",
"summary_hash": "old-summary",
"created_at": "now",
"created_by": "tester",
"change_reason": "legacy",
}
),
encoding="utf-8",
)
store.set_current_version("debug", "v0001")
loaded = store.read_published_skill("debug")
assert loaded is not None
assert loaded.version.tree_hash.startswith("sha256:")
def test_atomic_json_write_does_not_leave_temp_file(tmp_path: Path) -> None:
store = SkillSpecStore(tmp_path / "workspace")
version = SkillVersion(
skill_name="debug",
version="v0001",
content_hash="hash",
summary_hash="summary",
created_at="now",
created_by="tester",
change_reason="test",
)
store.write_skill_version(version, "# Debug\n")
assert not list((store.root / "debug" / "versions" / "v0001").glob("*.tmp"))