test(plugins): cover skill mirror lifecycle

This commit is contained in:
2026-06-16 12:24:19 +08:00
parent a9b830d11e
commit a65e59fcb6
7 changed files with 516 additions and 5 deletions

View File

@ -271,6 +271,8 @@ class PluginManager:
current = self.skill_store.read_published_skill(declaration.name)
if current is None:
continue
if self._reconcile_published_update(binding, current.version, snapshot.skill_tree_hash):
continue
classification = classify_plugin_skill_update(
binding.accepted_upstream_tree_hash,
current.version.tree_hash,
@ -317,6 +319,33 @@ class PluginManager:
finally:
transaction.cleanup()
def _reconcile_published_update(
self,
binding: PluginSkillBinding,
current_version: SkillVersion,
observed_upstream_tree_hash: str,
) -> bool:
if not binding.pending_candidate_id:
return False
candidates = self.learning_store.list_learning_candidates()
candidate = next(
(item for item in candidates if item.candidate_id == binding.pending_candidate_id),
None,
)
if candidate is None or candidate.status != "published":
return False
candidate_hash = str(candidate.evidence.get("new_upstream_tree_hash") or "")
version_hash = str(current_version.provenance.get("new_upstream_tree_hash") or "")
if not candidate_hash or candidate_hash != observed_upstream_tree_hash or version_hash != candidate_hash:
return False
binding.accepted_upstream_tree_hash = candidate_hash
binding.observed_upstream_tree_hash = candidate_hash
binding.accepted_beaver_version = current_version.version
binding.current_beaver_version = current_version.version
binding.pending_candidate_id = None
binding.status = "synced"
return True
@staticmethod
def _create_update_candidate(
*,

View File

@ -12,11 +12,13 @@ from beaver.engine.context import SkillContext
from beaver.engine.providers import ProviderBundle
from beaver.memory.runs import RunMemoryStore
from beaver.memory.skills import SkillDraftEvalReport, SkillLearningCandidate
from beaver.skills.catalog.utils import strip_frontmatter
from beaver.skills.learning.case_selection import select_replay_cases
from beaver.skills.learning.preservation import check_preservation
from beaver.skills.learning.preservation import check_plugin_merge_preservation, check_preservation
from beaver.skills.learning.replay import ReplayArmRequest, ReplayRunner
from beaver.skills.learning.surrogate import SurrogateToolEvaluator
from beaver.skills.specs import SkillDraft
from beaver.skills.specs.storage import SkillSpecStore
class SkillDraftEvaluator:
@ -28,9 +30,11 @@ class SkillDraftEvaluator:
*,
surrogate_evaluator: SurrogateToolEvaluator | None = None,
max_parallel_cases: int | None = None,
skill_store: SkillSpecStore | None = None,
) -> None:
self.run_store = run_store
self.surrogate_evaluator = surrogate_evaluator or SurrogateToolEvaluator()
self.skill_store = skill_store
configured_parallelism = max_parallel_cases
if configured_parallelism is None:
try:
@ -207,7 +211,7 @@ class SkillDraftEvaluator:
results = await asyncio.gather(*(evaluate_case(case) for case in replay_cases))
case_reports = [case_report for case_report, _ in results]
legacy_cases = [legacy_case for _, legacy_case in results]
preservation_report = _preservation_report(candidate, draft)
preservation_report = _preservation_report(candidate, draft, skill_store=self.skill_store)
return _report_from_case_reports(
candidate,
draft,
@ -343,9 +347,35 @@ def _draft_skill_context(draft: SkillDraft) -> SkillContext:
)
def _preservation_report(candidate: SkillLearningCandidate, draft: SkillDraft) -> dict | None:
def _preservation_report(
candidate: SkillLearningCandidate,
draft: SkillDraft,
*,
skill_store: SkillSpecStore | None = None,
) -> dict | None:
if candidate.kind not in {"revise_skill", "merge_skills"}:
return None
if candidate.kind != "plugin_skill_update" or skill_store is None:
return None
plugin_id = str(draft.provenance.get("plugin_id") or candidate.evidence.get("plugin_id") or "")
skill_name = str(draft.provenance.get("skill_name") or candidate.evidence.get("skill_name") or draft.skill_name)
local_version = str(draft.base_version or draft.provenance.get("local_version") or candidate.evidence.get("local_version") or "")
upstream_hash = str(
draft.provenance.get("new_upstream_tree_hash")
or candidate.evidence.get("new_upstream_tree_hash")
or ""
)
if not plugin_id or not skill_name or not local_version or not upstream_hash:
return None
local = skill_store.read_published_skill(skill_name, local_version)
upstream = skill_store.read_upstream_snapshot(skill_name, plugin_id, upstream_hash)
if local is None or upstream is None:
return None
return check_plugin_merge_preservation(
local_content=strip_frontmatter(local.content),
upstream_content=strip_frontmatter(upstream.content),
draft_content=draft.proposed_content,
merge_decisions=draft.provenance,
)
base_content = str(candidate.evidence.get("base_content") or "") if isinstance(candidate.evidence, dict) else ""
if not base_content.strip():
return None

View File

@ -315,7 +315,10 @@ class SkillLearningPipelineService:
) -> SkillDraftEvalReport:
draft = self.get_draft(skill_name, draft_id)
candidate = self.get_candidate(candidate_id)
evaluator = self.evaluator or SkillDraftEvaluator(self.learning_service.run_store)
evaluator = self.evaluator or SkillDraftEvaluator(
self.learning_service.run_store,
skill_store=self.draft_service.store,
)
report = await evaluator.evaluate(
candidate=candidate,
draft=draft,

View File

@ -55,7 +55,11 @@ class SkillPublisher:
version.tree_hash = hash_plugin_skill_tree(version_dir).skill_tree_hash
self.store._write_json(version_dir / "version.json", version.to_dict())
else:
self._copy_base_supporting_files(draft, next_version)
self._copy_uploaded_supporting_files(draft, next_version)
version_dir = self.store.root / draft.skill_name / "versions" / next_version
version.tree_hash = hash_plugin_skill_tree(version_dir).skill_tree_hash
self.store._write_json(version_dir / "version.json", version.to_dict())
self.store.set_current_version(skill_name, next_version)
spec = self.store.get_skill_spec(skill_name)
@ -202,6 +206,23 @@ class SkillPublisher:
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copyfile(source, target)
def _copy_base_supporting_files(self, draft: SkillDraft, version: str) -> None:
if not draft.base_version:
return
source_root = self.store.root / draft.skill_name / "versions" / draft.base_version
if not source_root.exists() or not source_root.is_dir():
return
target_root = self.store.root / draft.skill_name / "versions" / version
for source in sorted(source_root.rglob("*"), key=lambda item: item.relative_to(source_root).as_posix()):
if not source.is_file() or source.is_symlink():
continue
relative = source.relative_to(source_root)
if relative.as_posix() in {"SKILL.md", "version.json", "upstream.json"}:
continue
target = target_root / relative
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copyfile(source, target)
def _copy_plugin_update_supporting_files(self, draft: SkillDraft, version: str) -> None:
plugin_id = str(draft.provenance.get("plugin_id") or "")
tree_hash = str(draft.provenance.get("new_upstream_tree_hash") or "")

View File

@ -0,0 +1,326 @@
from __future__ import annotations
import asyncio
import json
import shutil
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from types import SimpleNamespace
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.foundation.utils.file_lock import WorkspaceWriteLock
from beaver.memory.runs import RunMemoryStore
from beaver.memory.skills import SkillLearningStore
from beaver.plugins.discovery import discover_plugins
from beaver.plugins.skills import PluginManager
from beaver.plugins.state import PluginStateStore
from beaver.skills.drafts import DraftService
from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningPipelineService, SkillLearningService
from beaver.skills.learning.safety import SkillDraftSafetyChecker
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
class StubProvider(LLMProvider):
def __init__(self, content: str) -> None:
super().__init__()
self.content = content
self.calls: list[dict] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.calls.append({"messages": messages, "model": model})
return LLMResponse(content=self.content, provider_name="stub", model=model or "stub")
def get_default_model(self) -> str:
return "stub"
class StubReplayRunner:
def __init__(self) -> None:
self.requests: list[object] = []
async def run_arm(self, request):
self.requests.append(request)
return {
"case_id": request.case_id,
"arm": request.arm,
"session_id": "session-replay",
"run_id": f"{request.arm}-run",
"task_text": request.task_text,
"finish_reason": "stop",
"final_answer": "panel safety review complete",
"tool_calls": [
{
"tool_name": "write_file",
"mode": "executed",
"arguments": {"path": "storyboard.md"},
"result": {"success": True},
}
],
"artifacts": [],
"side_effects": [],
}
def test_plugin_skill_mirror_upgrade_and_recovery_lifecycle(tmp_path: Path) -> None:
workspace = tmp_path / "workspace"
plugin_root = _write_plugin(
workspace / "plugins",
version="1.0.0",
body="# Baoyu Comic\n\n## Workflow\n\nDraw panels.\n",
template="panel-v1",
)
manager, store, learning_store, pipeline = _services(workspace)
manager.enable("baoyu-comic")
initial = store.read_published_skill("baoyu-comic")
assert initial is not None
assert initial.version.version == "v0001"
local = pipeline.draft_service.create_revision_draft(
skill_name="baoyu-comic",
base_version="v0001",
proposed_content="# Baoyu Comic\n\n## Workflow\n\nDraw panels.\n\n## Local Review\n\nKeep user edits.\n",
proposed_frontmatter={"name": "baoyu-comic", "description": "Comic workflow", "tools": []},
created_by="tester",
reason="learned local revision",
)
pipeline.check_safety(local.skill_name, local.draft_id)
pipeline.submit_review(local.skill_name, local.draft_id, requested_by="tester")
pipeline.approve(local.skill_name, local.draft_id, reviewer="tester")
local_version = pipeline.publish(local.skill_name, local.draft_id, publisher="tester")
assert local_version.version == "v0002"
_rewrite_plugin(
plugin_root,
version="1.1.0",
body="# Baoyu Comic\n\n## Workflow\n\nDraw better panels.\n\n## Safety\n\nDo not leak secrets.\n",
template="panel-v2",
)
plugin_files_after_update = _plugin_file_bytes(plugin_root)
_services(workspace)[0].sync_enabled()
first_candidate = _only_open_candidate(learning_store)
assert first_candidate.evidence["merge_mode"] == "three_way"
merged_payload = {
"frontmatter": {"name": "baoyu-comic", "description": "Comic workflow", "tools": []},
"content": (
"# Baoyu Comic\n\n"
"## Workflow\n\nDraw better panels.\n\n"
"## Local Review\n\nKeep user edits.\n\n"
"## Safety\n\nDo not leak secrets.\n"
),
"change_reason": "Merge upstream safety guidance and preserve local review.",
"preserved_local_sections": ["Local Review"],
"adopted_upstream_sections": ["Workflow", "Safety"],
"resolved_conflicts": [],
"dropped_sections": [],
}
draft = asyncio.run(
pipeline.synthesize_draft(
first_candidate.candidate_id,
provider_bundle=_bundle(StubProvider(json.dumps(merged_payload))),
)
)
_add_eval_cases(learning_store, first_candidate.candidate_id)
pipeline.check_safety(draft.skill_name, draft.draft_id)
replay_runner = StubReplayRunner()
report = asyncio.run(
pipeline.evaluate_draft(
first_candidate.candidate_id,
draft.skill_name,
draft.draft_id,
provider_bundle=_bundle(StubProvider('{"cases": []}')),
replay_runner=replay_runner,
)
)
assert replay_runner.requests
assert report.mode == "replay"
assert report.preservation_report is not None
assert report.preservation_report["mode"] == "plugin_three_way"
assert report.preservation_report["passed"] is True
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
_, _, _, failing_ack_pipeline = _services(
workspace,
publish_observer=lambda draft, result: (_ for _ in ()).throw(RuntimeError("observer failed")),
)
published = failing_ack_pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
assert published.version == "v0003"
pending_after_failed_observer = PluginStateStore(workspace).get_plugin("baoyu-comic")
assert pending_after_failed_observer is not None
assert pending_after_failed_observer.skills["baoyu-comic"].pending_candidate_id == first_candidate.candidate_id
_services(workspace)[0].sync_enabled()
state = PluginStateStore(workspace).get_plugin("baoyu-comic")
assert state is not None
binding = state.skills["baoyu-comic"]
assert binding.accepted_upstream_tree_hash == draft.provenance["new_upstream_tree_hash"]
published_loaded = store.read_published_skill("baoyu-comic")
assert published_loaded is not None
assert published_loaded.version.provenance["new_upstream_tree_hash"] == draft.provenance["new_upstream_tree_hash"]
pipeline.rollback("baoyu-comic", "v0002", actor="tester", reason="verify rollback")
assert store.read_published_skill("baoyu-comic").version.version == "v0002" # type: ignore[union-attr]
assert _plugin_file_bytes(plugin_root) == plugin_files_after_update
_rewrite_plugin(plugin_root, version="1.2.0", template="panel-v3")
_services(workspace)[0].sync_enabled()
second_candidate = _only_open_candidate(learning_store)
assert second_candidate.candidate_id != first_candidate.candidate_id
shutil.rmtree(plugin_root)
_services(workspace)[0].sync_enabled()
missing = PluginStateStore(workspace).get_plugin("baoyu-comic")
assert missing is not None and missing.status == "missing"
assert store.get_skill_spec("baoyu-comic").status == "active" # type: ignore[union-attr]
plugin_root = _write_plugin(
workspace / "plugins",
version="1.3.0",
body="# Baoyu Comic\n\n## Workflow\n\nDraw better panels.\n\n## Safety\n\nDo not leak secrets.\n",
template="panel-v4",
)
with ThreadPoolExecutor(max_workers=2) as executor:
list(executor.map(lambda _: _services(workspace)[0].sync_enabled(), range(2)))
candidates = [
item
for item in learning_store.list_learning_candidates()
if item.candidate_id != first_candidate.candidate_id
]
assert len([item for item in candidates if item.status == "open"]) == 1
versions = store.list_versions("baoyu-comic")
assert versions.count("v0003") == 1
assert (plugin_root / "skills" / "baoyu-comic" / "templates" / "panel.txt").read_text(encoding="utf-8") == "panel-v4"
def _services(
workspace: Path,
*,
publish_observer=None,
) -> tuple[PluginManager, SkillSpecStore, SkillLearningStore, SkillLearningPipelineService]:
discovery = discover_plugins(workspace, search_paths=[])
store = SkillSpecStore(workspace)
learning_store = SkillLearningStore(workspace / "memory" / "skills")
run_store = RunMemoryStore(workspace / "memory" / "runs")
publisher = SkillPublisher(store)
manager = PluginManager(
workspace=workspace,
manifests=discovery.manifests,
discovery_errors=discovery.errors,
state_store=PluginStateStore(workspace),
skill_store=store,
learning_store=learning_store,
publisher=publisher,
safety_checker=SkillDraftSafetyChecker(),
write_lock=WorkspaceWriteLock(workspace),
)
pipeline = SkillLearningPipelineService(
learning_store=learning_store,
learning_service=SkillLearningService(
run_store=run_store,
learning_store=learning_store,
draft_service=DraftService(store),
evidence_selector=EvidenceSelector(run_store),
synthesizer=SkillDraftSynthesizer(),
),
draft_service=DraftService(store),
review_service=ReviewService(store),
publisher=publisher,
publish_observer=publish_observer if publish_observer is not None else manager.on_skill_published,
)
return manager, store, learning_store, pipeline
def _write_plugin(root: Path, *, version: str, body: str, template: str) -> Path:
plugin_root = root / "baoyu-comic"
skill_root = plugin_root / "skills" / "baoyu-comic"
skill_root.mkdir(parents=True, exist_ok=True)
_write_skill(skill_root, body)
(skill_root / "templates").mkdir(exist_ok=True)
(skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8")
(plugin_root / "beaver.plugin.json").write_text(
json.dumps(
{
"schema_version": 1,
"id": "baoyu-comic",
"name": "Baoyu Comic",
"version": version,
"skills": [{"name": "baoyu-comic", "path": "skills/baoyu-comic"}],
}
),
encoding="utf-8",
)
return plugin_root
def _rewrite_plugin(plugin_root: Path, *, version: str, body: str | None = None, template: str | None = None) -> None:
manifest_path = plugin_root / "beaver.plugin.json"
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
manifest["version"] = version
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
skill_root = plugin_root / "skills" / "baoyu-comic"
if body is not None:
_write_skill(skill_root, body)
if template is not None:
(skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8")
def _write_skill(skill_root: Path, body: str) -> None:
(skill_root / "SKILL.md").write_text(
"---\nname: baoyu-comic\ndescription: Comic workflow\ntools: []\n---\n\n" + body,
encoding="utf-8",
)
def _bundle(provider: StubProvider) -> ProviderBundle:
runtime = SimpleNamespace(model="stub", provider_name="stub")
return ProviderBundle(main_runtime=runtime, main_provider=provider) # type: ignore[arg-type]
def _only_open_candidate(learning_store: SkillLearningStore):
open_candidates = learning_store.list_learning_candidates(status="open")
assert len(open_candidates) == 1
return open_candidates[0]
def _add_eval_cases(learning_store: SkillLearningStore, candidate_id: str) -> None:
candidate = next(item for item in learning_store.list_learning_candidates() if item.candidate_id == candidate_id)
evidence = dict(candidate.evidence)
evidence["eval_cases"] = [
{
"run_id": f"explicit:{index}",
"task_text": f"Review comic panel safety case {index}",
"baseline_skill_names": ["baoyu-comic"],
"candidate_skill_name": "baoyu-comic",
"accepted_score": 0.8,
"validator": {
"type": "final_answer_contains",
"required_terms": ["panel", "safety"],
"forbidden_terms": ["secret"],
},
}
for index in range(10)
]
learning_store.update_learning_candidate(candidate_id, evidence=evidence)
def _plugin_file_bytes(plugin_root: Path) -> dict[str, bytes]:
return {
path.relative_to(plugin_root).as_posix(): path.read_bytes()
for path in sorted(plugin_root.rglob("*"))
if path.is_file()
}