feat(skills): store immutable plugin upstream snapshots

This commit is contained in:
2026-06-16 11:42:46 +08:00
parent 41b45e0423
commit 094dde0b81
5 changed files with 386 additions and 4 deletions

View File

@ -0,0 +1,48 @@
"""Same-filesystem staging for plugin skill writes."""
from __future__ import annotations
import filecmp
import os
from pathlib import Path
import shutil
from uuid import uuid4
class PluginSkillTransaction:
def __init__(self, workspace: str | Path) -> None:
self.workspace = Path(workspace)
self.transaction_id = uuid4().hex
self.root = self.workspace / ".beaver" / "staging" / "plugin-skills" / self.transaction_id
self.root.mkdir(parents=True, exist_ok=True)
def stage_upstream_snapshot(self, skill_name: str, source_id: str, tree_hash: str) -> Path:
path = self.root / "upstreams" / skill_name / source_id / tree_hash
path.mkdir(parents=True, exist_ok=True)
return path
def stage_skill_version(self, skill_name: str, version: str) -> Path:
path = self.root / "versions" / skill_name / version
path.mkdir(parents=True, exist_ok=True)
return path
def promote_directory(self, staged: Path, final: Path) -> None:
if final.exists():
if _directories_identical(staged, final):
return
raise ValueError(f"Immutable directory already exists with different content: {final}")
final.parent.mkdir(parents=True, exist_ok=True)
os.replace(staged, final)
def cleanup(self) -> None:
shutil.rmtree(self.root, ignore_errors=True)
def _directories_identical(left: Path, right: Path) -> bool:
comparison = filecmp.dircmp(left, right)
if comparison.left_only or comparison.right_only or comparison.funny_files:
return False
for filename in comparison.common_files:
if not filecmp.cmp(left / filename, right / filename, shallow=False):
return False
return all(_directories_identical(left / name, right / name) for name in comparison.common_dirs)

View File

@ -7,9 +7,10 @@ from .models import (
SkillReviewState,
SkillSpec,
SkillStatus,
SkillUpstreamSnapshot,
SkillVersion,
)
from .storage import SkillSpecStore
from .storage import LoadedSkillUpstreamSnapshot, SkillSpecStore
__all__ = [
"SkillActivationReceipt",
@ -19,5 +20,7 @@ __all__ = [
"SkillSpec",
"SkillSpecStore",
"SkillStatus",
"SkillUpstreamSnapshot",
"SkillVersion",
"LoadedSkillUpstreamSnapshot",
]

View File

@ -84,6 +84,7 @@ class SkillVersion:
summary: str = ""
tool_hints: list[str] = field(default_factory=list)
provenance: dict[str, Any] = field(default_factory=dict)
tree_hash: str = ""
def to_dict(self) -> dict[str, Any]:
return {
@ -100,6 +101,7 @@ class SkillVersion:
"summary": self.summary,
"tool_hints": list(self.tool_hints),
"provenance": dict(self.provenance),
"tree_hash": self.tree_hash,
}
@classmethod
@ -118,6 +120,48 @@ class SkillVersion:
summary=str(payload.get("summary") or ""),
tool_hints=_coerce_string_list(payload.get("tool_hints")),
provenance=dict(payload.get("provenance") or {}),
tree_hash=str(payload.get("tree_hash") or ""),
)
@dataclass(slots=True)
class SkillUpstreamSnapshot:
skill_name: str
source_kind: str
source_id: str
source_version: str
source_path: str
skill_content_hash: str
skill_tree_hash: str
created_at: str
frontmatter: dict[str, Any] = field(default_factory=dict)
staged_root: Any | None = field(default=None, repr=False, compare=False)
def to_dict(self) -> dict[str, Any]:
return {
"skill_name": self.skill_name,
"source_kind": self.source_kind,
"source_id": self.source_id,
"source_version": self.source_version,
"source_path": self.source_path,
"skill_content_hash": self.skill_content_hash,
"skill_tree_hash": self.skill_tree_hash,
"created_at": self.created_at,
"frontmatter": dict(self.frontmatter),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillUpstreamSnapshot":
return cls(
skill_name=str(payload["skill_name"]),
source_kind=str(payload.get("source_kind") or ""),
source_id=str(payload.get("source_id") or ""),
source_version=str(payload.get("source_version") or ""),
source_path=str(payload.get("source_path") or ""),
skill_content_hash=str(payload.get("skill_content_hash") or ""),
skill_tree_hash=str(payload.get("skill_tree_hash") or ""),
created_at=str(payload.get("created_at") or ""),
frontmatter=dict(payload.get("frontmatter") or {}),
)

View File

@ -4,12 +4,16 @@ from __future__ import annotations
from dataclasses import dataclass
import json
import os
from pathlib import Path
import shutil
from typing import Any
from beaver.plugins.hashing import hash_plugin_skill_tree
from beaver.plugins.transaction import PluginSkillTransaction
from beaver.skills.catalog.utils import parse_frontmatter
from .models import SkillDraft, SkillReviewRecord, SkillSpec, SkillVersion
from .models import SkillDraft, SkillReviewRecord, SkillSpec, SkillUpstreamSnapshot, SkillVersion
from .serialization import canonical_hash, json_dumps, normalize_frontmatter, summarize_skill_content
@ -19,6 +23,13 @@ class LoadedSkillVersion:
content: str
@dataclass(slots=True)
class LoadedSkillUpstreamSnapshot:
snapshot: SkillUpstreamSnapshot
content: str
root: Path
class SkillSpecStore:
"""Manage structured skill lifecycle state inside the workspace."""
@ -155,13 +166,80 @@ class SkillSpecStore:
payload = self._read_json(version_file)
loaded = SkillVersion.from_dict(payload)
content = skill_file.read_text(encoding="utf-8")
if not loaded.tree_hash:
loaded.tree_hash = hash_plugin_skill_tree(version_dir).skill_tree_hash
return LoadedSkillVersion(version=loaded, content=content)
def write_skill_version(self, version: SkillVersion, content: str) -> None:
version_dir = self._skill_dir(version.skill_name) / "versions" / version.version
version_dir.mkdir(parents=True, exist_ok=True)
self._write_json(version_dir / "version.json", version.to_dict())
self._write_text(version_dir / "SKILL.md", content)
if not version.tree_hash:
version.tree_hash = hash_plugin_skill_tree(version_dir).skill_tree_hash
self._write_json(version_dir / "version.json", version.to_dict())
def stage_upstream_snapshot(
self,
transaction: PluginSkillTransaction,
*,
skill_name: str,
source_kind: str,
source_id: str,
source_version: str,
source_path: str,
source_root: str | Path,
) -> SkillUpstreamSnapshot:
source = Path(source_root)
digest = hash_plugin_skill_tree(source)
staged_root = transaction.stage_upstream_snapshot(skill_name, source_id, digest.skill_tree_hash)
self._copy_regular_tree(source, staged_root)
content = (staged_root / "SKILL.md").read_text(encoding="utf-8")
frontmatter, _body = parse_frontmatter(content)
snapshot = SkillUpstreamSnapshot(
skill_name=skill_name,
source_kind=source_kind,
source_id=source_id,
source_version=source_version,
source_path=source_path,
skill_content_hash=digest.skill_content_hash,
skill_tree_hash=digest.skill_tree_hash,
created_at=_utc_now(),
frontmatter=normalize_frontmatter(frontmatter),
staged_root=staged_root,
)
self._write_json(staged_root / "upstream.json", snapshot.to_dict())
return snapshot
def promote_upstream_snapshot(
self,
transaction: PluginSkillTransaction,
snapshot: SkillUpstreamSnapshot,
) -> None:
staged_root = Path(snapshot.staged_root) if snapshot.staged_root is not None else None
final_root = self._upstream_snapshot_dir(snapshot.skill_name, snapshot.source_id, snapshot.skill_tree_hash)
if final_root.exists():
return
if staged_root is None or not staged_root.exists():
raise ValueError("Staged upstream snapshot is missing")
transaction.promote_directory(staged_root, final_root)
def read_upstream_snapshot(
self,
skill_name: str,
source_id: str,
skill_tree_hash: str,
) -> LoadedSkillUpstreamSnapshot | None:
root = self._upstream_snapshot_dir(skill_name, source_id, skill_tree_hash)
metadata = root / "upstream.json"
skill_file = root / "SKILL.md"
if not metadata.exists() or not skill_file.exists():
return None
snapshot = SkillUpstreamSnapshot.from_dict(self._read_json(metadata))
return LoadedSkillUpstreamSnapshot(
snapshot=snapshot,
content=skill_file.read_text(encoding="utf-8"),
root=root,
)
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
results: list[SkillDraft] = []
@ -259,6 +337,9 @@ class SkillSpecStore:
def _skill_dir(self, name: str) -> Path:
return self.root / name
def _upstream_snapshot_dir(self, skill_name: str, source_id: str, skill_tree_hash: str) -> Path:
return self._skill_dir(skill_name) / "upstreams" / source_id / skill_tree_hash
def _iter_skill_dirs(self) -> list[Path]:
return [
child
@ -285,9 +366,41 @@ class SkillSpecStore:
@staticmethod
def _write_json(path: Path, payload: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json_dumps(payload) + "\n", encoding="utf-8")
tmp_path = path.with_name(f"{path.name}.tmp")
with tmp_path.open("w", encoding="utf-8") as handle:
handle.write(json_dumps(payload) + "\n")
handle.flush()
os.fsync(handle.fileno())
os.replace(tmp_path, path)
@staticmethod
def _write_text(path: Path, content: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content, encoding="utf-8")
@staticmethod
def _copy_regular_tree(source_root: Path, target_root: Path) -> None:
source_root = Path(source_root)
target_root = Path(target_root)
for source in sorted(source_root.rglob("*"), key=lambda item: item.relative_to(source_root).as_posix()):
relative = source.relative_to(source_root)
if any(part in {"", ".", ".."} for part in relative.parts):
raise ValueError(f"Invalid path in skill tree: {relative.as_posix()}")
if source.is_symlink():
raise ValueError(f"Skill tree contains a symlink: {relative.as_posix()}")
target = target_root / relative
if not target.resolve().is_relative_to(target_root.resolve()):
raise ValueError(f"Skill tree copy target escapes root: {relative.as_posix()}")
if source.is_dir():
target.mkdir(parents=True, exist_ok=True)
continue
if not source.is_file():
raise ValueError(f"Skill tree contains a non-regular file: {relative.as_posix()}")
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(source, target)
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,174 @@
from __future__ import annotations
import json
from pathlib import Path
import pytest
from beaver.plugins.transaction import PluginSkillTransaction
from beaver.skills.specs import SkillSpecStore, SkillVersion
def _create_source_skill(root: Path, *, template_text: str = "panel") -> Path:
source = root / "plugin" / "skills" / "comic"
source.mkdir(parents=True)
(source / "SKILL.md").write_text("# Comic\n\nOriginal.\n", encoding="utf-8")
(source / "templates").mkdir()
(source / "templates" / "panel.txt").write_text(template_text, encoding="utf-8")
return source
def test_write_upstream_snapshot_copies_skill_without_mutating_source(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path)
store = SkillSpecStore(tmp_path / "workspace")
transaction = PluginSkillTransaction(tmp_path / "workspace")
snapshot = store.stage_upstream_snapshot(
transaction,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
store.promote_upstream_snapshot(transaction, snapshot)
loaded = store.read_upstream_snapshot("baoyu-comic", "baoyu-comic", snapshot.skill_tree_hash)
assert loaded is not None
assert loaded.content == "# Comic\n\nOriginal.\n"
assert (loaded.root / "templates" / "panel.txt").read_text(encoding="utf-8") == "panel"
assert (source / "SKILL.md").read_text(encoding="utf-8") == "# Comic\n\nOriginal.\n"
def test_upstream_snapshot_tree_hash_tracks_supporting_files(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path, template_text="v1")
store = SkillSpecStore(tmp_path / "workspace")
first_tx = PluginSkillTransaction(tmp_path / "workspace")
first = store.stage_upstream_snapshot(
first_tx,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
store.promote_upstream_snapshot(first_tx, first)
(source / "templates" / "panel.txt").write_text("v2", encoding="utf-8")
second_tx = PluginSkillTransaction(tmp_path / "workspace")
second = store.stage_upstream_snapshot(
second_tx,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.1",
source_path="skills/comic",
source_root=source,
)
assert first.skill_content_hash == second.skill_content_hash
assert first.skill_tree_hash != second.skill_tree_hash
def test_staged_upstream_snapshot_is_not_visible_until_promoted(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path)
store = SkillSpecStore(tmp_path / "workspace")
transaction = PluginSkillTransaction(tmp_path / "workspace")
snapshot = store.stage_upstream_snapshot(
transaction,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
assert store.read_upstream_snapshot("baoyu-comic", "baoyu-comic", snapshot.skill_tree_hash) is None
def test_promote_upstream_snapshot_is_idempotent_for_identical_snapshot(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path)
store = SkillSpecStore(tmp_path / "workspace")
transaction = PluginSkillTransaction(tmp_path / "workspace")
snapshot = store.stage_upstream_snapshot(
transaction,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
store.promote_upstream_snapshot(transaction, snapshot)
store.promote_upstream_snapshot(transaction, snapshot)
loaded = store.read_upstream_snapshot("baoyu-comic", "baoyu-comic", snapshot.skill_tree_hash)
assert loaded is not None
assert loaded.snapshot.skill_tree_hash == snapshot.skill_tree_hash
def test_stage_upstream_snapshot_rejects_symlinks(tmp_path: Path) -> None:
source = _create_source_skill(tmp_path)
(source / "linked").symlink_to(source / "SKILL.md")
store = SkillSpecStore(tmp_path / "workspace")
transaction = PluginSkillTransaction(tmp_path / "workspace")
with pytest.raises(ValueError, match="symlink"):
store.stage_upstream_snapshot(
transaction,
skill_name="baoyu-comic",
source_kind="plugin",
source_id="baoyu-comic",
source_version="1.0.0",
source_path="skills/comic",
source_root=source,
)
def test_legacy_skill_version_without_tree_hash_derives_tree_hash_on_read(tmp_path: Path) -> None:
store = SkillSpecStore(tmp_path / "workspace")
version_dir = store.root / "debug" / "versions" / "v0001"
version_dir.mkdir(parents=True)
(version_dir / "SKILL.md").write_text("# Debug\n", encoding="utf-8")
(version_dir / "version.json").write_text(
json.dumps(
{
"skill_name": "debug",
"version": "v0001",
"content_hash": "old",
"summary_hash": "old-summary",
"created_at": "now",
"created_by": "tester",
"change_reason": "legacy",
}
),
encoding="utf-8",
)
store.set_current_version("debug", "v0001")
loaded = store.read_published_skill("debug")
assert loaded is not None
assert loaded.version.tree_hash.startswith("sha256:")
def test_atomic_json_write_does_not_leave_temp_file(tmp_path: Path) -> None:
store = SkillSpecStore(tmp_path / "workspace")
version = SkillVersion(
skill_name="debug",
version="v0001",
content_hash="hash",
summary_hash="summary",
created_at="now",
created_by="tester",
change_reason="test",
)
store.write_skill_version(version, "# Debug\n")
assert not list((store.root / "debug" / "versions" / "v0001").glob("*.tmp"))