from __future__ import annotations import asyncio import json import shutil from concurrent.futures import ThreadPoolExecutor from pathlib import Path from types import SimpleNamespace from beaver.engine.providers.base import LLMProvider, LLMResponse from beaver.engine.providers.factory import ProviderBundle from beaver.foundation.utils.file_lock import WorkspaceWriteLock from beaver.memory.runs import RunMemoryStore from beaver.memory.skills import SkillLearningStore from beaver.plugins.discovery import discover_plugins from beaver.plugins.skills import PluginManager from beaver.plugins.state import PluginStateStore from beaver.skills.drafts import DraftService from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningPipelineService, SkillLearningService from beaver.skills.learning.safety import SkillDraftSafetyChecker from beaver.skills.publisher import SkillPublisher from beaver.skills.reviews import ReviewService from beaver.skills.specs import SkillSpecStore class StubProvider(LLMProvider): def __init__(self, content: str) -> None: super().__init__() self.content = content self.calls: list[dict] = [] async def chat( self, messages: list[dict], tools: list[dict] | None = None, model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7, thinking_enabled: bool | None = None, ) -> LLMResponse: self.calls.append({"messages": messages, "model": model}) return LLMResponse(content=self.content, provider_name="stub", model=model or "stub") def get_default_model(self) -> str: return "stub" class StubReplayRunner: def __init__(self) -> None: self.requests: list[object] = [] async def run_arm(self, request): self.requests.append(request) return { "case_id": request.case_id, "arm": request.arm, "session_id": "session-replay", "run_id": f"{request.arm}-run", "task_text": request.task_text, "finish_reason": "stop", "final_answer": "panel safety review complete", "tool_calls": [ { "tool_name": "write_file", "mode": "executed", "arguments": {"path": "storyboard.md"}, "result": {"success": True}, } ], "artifacts": [], "side_effects": [], } def test_plugin_skill_mirror_upgrade_and_recovery_lifecycle(tmp_path: Path) -> None: workspace = tmp_path / "workspace" plugin_root = _write_plugin( workspace / "plugins", version="1.0.0", body="# Baoyu Comic\n\n## Workflow\n\nDraw panels.\n", template="panel-v1", ) manager, store, learning_store, pipeline = _services(workspace) manager.enable("baoyu-comic") initial = store.read_published_skill("baoyu-comic") assert initial is not None assert initial.version.version == "v0001" local = pipeline.draft_service.create_revision_draft( skill_name="baoyu-comic", base_version="v0001", proposed_content="# Baoyu Comic\n\n## Workflow\n\nDraw panels.\n\n## Local Review\n\nKeep user edits.\n", proposed_frontmatter={"name": "baoyu-comic", "description": "Comic workflow", "tools": []}, created_by="tester", reason="learned local revision", ) pipeline.check_safety(local.skill_name, local.draft_id) pipeline.submit_review(local.skill_name, local.draft_id, requested_by="tester") pipeline.approve(local.skill_name, local.draft_id, reviewer="tester") local_version = pipeline.publish(local.skill_name, local.draft_id, publisher="tester") assert local_version.version == "v0002" _rewrite_plugin( plugin_root, version="1.1.0", body="# Baoyu Comic\n\n## Workflow\n\nDraw better panels.\n\n## Safety\n\nDo not leak secrets.\n", template="panel-v2", ) plugin_files_after_update = _plugin_file_bytes(plugin_root) _services(workspace)[0].sync_enabled() first_candidate = _only_open_candidate(learning_store) assert first_candidate.evidence["merge_mode"] == "three_way" merged_payload = { "frontmatter": {"name": "baoyu-comic", "description": "Comic workflow", "tools": []}, "content": ( "# Baoyu Comic\n\n" "## Workflow\n\nDraw better panels.\n\n" "## Local Review\n\nKeep user edits.\n\n" "## Safety\n\nDo not leak secrets.\n" ), "change_reason": "Merge upstream safety guidance and preserve local review.", "preserved_local_sections": ["Local Review"], "adopted_upstream_sections": ["Workflow", "Safety"], "resolved_conflicts": [], "dropped_sections": [], } draft = asyncio.run( pipeline.synthesize_draft( first_candidate.candidate_id, provider_bundle=_bundle(StubProvider(json.dumps(merged_payload))), ) ) _add_eval_cases(learning_store, first_candidate.candidate_id) pipeline.check_safety(draft.skill_name, draft.draft_id) replay_runner = StubReplayRunner() report = asyncio.run( pipeline.evaluate_draft( first_candidate.candidate_id, draft.skill_name, draft.draft_id, provider_bundle=_bundle(StubProvider('{"cases": []}')), replay_runner=replay_runner, ) ) assert replay_runner.requests assert report.mode == "replay" assert report.preservation_report is not None assert report.preservation_report["mode"] == "plugin_three_way" assert report.preservation_report["passed"] is True pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester") pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester") _, _, _, failing_ack_pipeline = _services( workspace, publish_observer=lambda draft, result: (_ for _ in ()).throw(RuntimeError("observer failed")), ) published = failing_ack_pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester") assert published.version == "v0003" pending_after_failed_observer = PluginStateStore(workspace).get_plugin("baoyu-comic") assert pending_after_failed_observer is not None assert pending_after_failed_observer.skills["baoyu-comic"].pending_candidate_id == first_candidate.candidate_id _services(workspace)[0].sync_enabled() state = PluginStateStore(workspace).get_plugin("baoyu-comic") assert state is not None binding = state.skills["baoyu-comic"] assert binding.accepted_upstream_tree_hash == draft.provenance["new_upstream_tree_hash"] published_loaded = store.read_published_skill("baoyu-comic") assert published_loaded is not None assert published_loaded.version.provenance["new_upstream_tree_hash"] == draft.provenance["new_upstream_tree_hash"] pipeline.rollback("baoyu-comic", "v0002", actor="tester", reason="verify rollback") assert store.read_published_skill("baoyu-comic").version.version == "v0002" # type: ignore[union-attr] assert _plugin_file_bytes(plugin_root) == plugin_files_after_update _rewrite_plugin(plugin_root, version="1.2.0", template="panel-v3") _services(workspace)[0].sync_enabled() second_candidate = _only_open_candidate(learning_store) assert second_candidate.candidate_id != first_candidate.candidate_id shutil.rmtree(plugin_root) _services(workspace)[0].sync_enabled() missing = PluginStateStore(workspace).get_plugin("baoyu-comic") assert missing is not None and missing.status == "missing" assert store.get_skill_spec("baoyu-comic").status == "active" # type: ignore[union-attr] plugin_root = _write_plugin( workspace / "plugins", version="1.3.0", body="# Baoyu Comic\n\n## Workflow\n\nDraw better panels.\n\n## Safety\n\nDo not leak secrets.\n", template="panel-v4", ) with ThreadPoolExecutor(max_workers=2) as executor: list(executor.map(lambda _: _services(workspace)[0].sync_enabled(), range(2))) candidates = [ item for item in learning_store.list_learning_candidates() if item.candidate_id != first_candidate.candidate_id ] assert len([item for item in candidates if item.status == "open"]) == 1 versions = store.list_versions("baoyu-comic") assert versions.count("v0003") == 1 assert (plugin_root / "skills" / "baoyu-comic" / "templates" / "panel.txt").read_text(encoding="utf-8") == "panel-v4" def _services( workspace: Path, *, publish_observer=None, ) -> tuple[PluginManager, SkillSpecStore, SkillLearningStore, SkillLearningPipelineService]: discovery = discover_plugins(workspace, search_paths=[]) store = SkillSpecStore(workspace) learning_store = SkillLearningStore(workspace / "memory" / "skills") run_store = RunMemoryStore(workspace / "memory" / "runs") publisher = SkillPublisher(store) manager = PluginManager( workspace=workspace, manifests=discovery.manifests, discovery_errors=discovery.errors, state_store=PluginStateStore(workspace), skill_store=store, learning_store=learning_store, publisher=publisher, safety_checker=SkillDraftSafetyChecker(), write_lock=WorkspaceWriteLock(workspace), ) pipeline = SkillLearningPipelineService( learning_store=learning_store, learning_service=SkillLearningService( run_store=run_store, learning_store=learning_store, draft_service=DraftService(store), evidence_selector=EvidenceSelector(run_store), synthesizer=SkillDraftSynthesizer(), ), draft_service=DraftService(store), review_service=ReviewService(store), publisher=publisher, publish_observer=publish_observer if publish_observer is not None else manager.on_skill_published, ) return manager, store, learning_store, pipeline def _write_plugin(root: Path, *, version: str, body: str, template: str) -> Path: plugin_root = root / "baoyu-comic" skill_root = plugin_root / "skills" / "baoyu-comic" skill_root.mkdir(parents=True, exist_ok=True) _write_skill(skill_root, body) (skill_root / "templates").mkdir(exist_ok=True) (skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8") (plugin_root / "beaver.plugin.json").write_text( json.dumps( { "schema_version": 1, "id": "baoyu-comic", "name": "Baoyu Comic", "version": version, "skills": [{"name": "baoyu-comic", "path": "skills/baoyu-comic"}], } ), encoding="utf-8", ) return plugin_root def _rewrite_plugin(plugin_root: Path, *, version: str, body: str | None = None, template: str | None = None) -> None: manifest_path = plugin_root / "beaver.plugin.json" manifest = json.loads(manifest_path.read_text(encoding="utf-8")) manifest["version"] = version manifest_path.write_text(json.dumps(manifest), encoding="utf-8") skill_root = plugin_root / "skills" / "baoyu-comic" if body is not None: _write_skill(skill_root, body) if template is not None: (skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8") def _write_skill(skill_root: Path, body: str) -> None: (skill_root / "SKILL.md").write_text( "---\nname: baoyu-comic\ndescription: Comic workflow\ntools: []\n---\n\n" + body, encoding="utf-8", ) def _bundle(provider: StubProvider) -> ProviderBundle: runtime = SimpleNamespace(model="stub", provider_name="stub") return ProviderBundle(main_runtime=runtime, main_provider=provider) # type: ignore[arg-type] def _only_open_candidate(learning_store: SkillLearningStore): open_candidates = learning_store.list_learning_candidates(status="open") assert len(open_candidates) == 1 return open_candidates[0] def _add_eval_cases(learning_store: SkillLearningStore, candidate_id: str) -> None: candidate = next(item for item in learning_store.list_learning_candidates() if item.candidate_id == candidate_id) evidence = dict(candidate.evidence) evidence["eval_cases"] = [ { "run_id": f"explicit:{index}", "task_text": f"Review comic panel safety case {index}", "baseline_skill_names": ["baoyu-comic"], "candidate_skill_name": "baoyu-comic", "accepted_score": 0.8, "validator": { "type": "final_answer_contains", "required_terms": ["panel", "safety"], "forbidden_terms": ["secret"], }, } for index in range(10) ] learning_store.update_learning_candidate(candidate_id, evidence=evidence) def _plugin_file_bytes(plugin_root: Path) -> dict[str, bytes]: return { path.relative_to(plugin_root).as_posix(): path.read_bytes() for path in sorted(plugin_root.rglob("*")) if path.is_file() }