test(plugins): cover skill mirror lifecycle
This commit is contained in:
@ -0,0 +1,326 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import shutil
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.engine.providers.factory import ProviderBundle
|
||||
from beaver.foundation.utils.file_lock import WorkspaceWriteLock
|
||||
from beaver.memory.runs import RunMemoryStore
|
||||
from beaver.memory.skills import SkillLearningStore
|
||||
from beaver.plugins.discovery import discover_plugins
|
||||
from beaver.plugins.skills import PluginManager
|
||||
from beaver.plugins.state import PluginStateStore
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningPipelineService, SkillLearningService
|
||||
from beaver.skills.learning.safety import SkillDraftSafetyChecker
|
||||
from beaver.skills.publisher import SkillPublisher
|
||||
from beaver.skills.reviews import ReviewService
|
||||
from beaver.skills.specs import SkillSpecStore
|
||||
|
||||
|
||||
class StubProvider(LLMProvider):
|
||||
def __init__(self, content: str) -> None:
|
||||
super().__init__()
|
||||
self.content = content
|
||||
self.calls: list[dict] = []
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> LLMResponse:
|
||||
self.calls.append({"messages": messages, "model": model})
|
||||
return LLMResponse(content=self.content, provider_name="stub", model=model or "stub")
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "stub"
|
||||
|
||||
|
||||
class StubReplayRunner:
|
||||
def __init__(self) -> None:
|
||||
self.requests: list[object] = []
|
||||
|
||||
async def run_arm(self, request):
|
||||
self.requests.append(request)
|
||||
return {
|
||||
"case_id": request.case_id,
|
||||
"arm": request.arm,
|
||||
"session_id": "session-replay",
|
||||
"run_id": f"{request.arm}-run",
|
||||
"task_text": request.task_text,
|
||||
"finish_reason": "stop",
|
||||
"final_answer": "panel safety review complete",
|
||||
"tool_calls": [
|
||||
{
|
||||
"tool_name": "write_file",
|
||||
"mode": "executed",
|
||||
"arguments": {"path": "storyboard.md"},
|
||||
"result": {"success": True},
|
||||
}
|
||||
],
|
||||
"artifacts": [],
|
||||
"side_effects": [],
|
||||
}
|
||||
|
||||
|
||||
def test_plugin_skill_mirror_upgrade_and_recovery_lifecycle(tmp_path: Path) -> None:
|
||||
workspace = tmp_path / "workspace"
|
||||
plugin_root = _write_plugin(
|
||||
workspace / "plugins",
|
||||
version="1.0.0",
|
||||
body="# Baoyu Comic\n\n## Workflow\n\nDraw panels.\n",
|
||||
template="panel-v1",
|
||||
)
|
||||
|
||||
manager, store, learning_store, pipeline = _services(workspace)
|
||||
manager.enable("baoyu-comic")
|
||||
initial = store.read_published_skill("baoyu-comic")
|
||||
assert initial is not None
|
||||
assert initial.version.version == "v0001"
|
||||
|
||||
local = pipeline.draft_service.create_revision_draft(
|
||||
skill_name="baoyu-comic",
|
||||
base_version="v0001",
|
||||
proposed_content="# Baoyu Comic\n\n## Workflow\n\nDraw panels.\n\n## Local Review\n\nKeep user edits.\n",
|
||||
proposed_frontmatter={"name": "baoyu-comic", "description": "Comic workflow", "tools": []},
|
||||
created_by="tester",
|
||||
reason="learned local revision",
|
||||
)
|
||||
pipeline.check_safety(local.skill_name, local.draft_id)
|
||||
pipeline.submit_review(local.skill_name, local.draft_id, requested_by="tester")
|
||||
pipeline.approve(local.skill_name, local.draft_id, reviewer="tester")
|
||||
local_version = pipeline.publish(local.skill_name, local.draft_id, publisher="tester")
|
||||
assert local_version.version == "v0002"
|
||||
|
||||
_rewrite_plugin(
|
||||
plugin_root,
|
||||
version="1.1.0",
|
||||
body="# Baoyu Comic\n\n## Workflow\n\nDraw better panels.\n\n## Safety\n\nDo not leak secrets.\n",
|
||||
template="panel-v2",
|
||||
)
|
||||
plugin_files_after_update = _plugin_file_bytes(plugin_root)
|
||||
|
||||
_services(workspace)[0].sync_enabled()
|
||||
first_candidate = _only_open_candidate(learning_store)
|
||||
assert first_candidate.evidence["merge_mode"] == "three_way"
|
||||
|
||||
merged_payload = {
|
||||
"frontmatter": {"name": "baoyu-comic", "description": "Comic workflow", "tools": []},
|
||||
"content": (
|
||||
"# Baoyu Comic\n\n"
|
||||
"## Workflow\n\nDraw better panels.\n\n"
|
||||
"## Local Review\n\nKeep user edits.\n\n"
|
||||
"## Safety\n\nDo not leak secrets.\n"
|
||||
),
|
||||
"change_reason": "Merge upstream safety guidance and preserve local review.",
|
||||
"preserved_local_sections": ["Local Review"],
|
||||
"adopted_upstream_sections": ["Workflow", "Safety"],
|
||||
"resolved_conflicts": [],
|
||||
"dropped_sections": [],
|
||||
}
|
||||
draft = asyncio.run(
|
||||
pipeline.synthesize_draft(
|
||||
first_candidate.candidate_id,
|
||||
provider_bundle=_bundle(StubProvider(json.dumps(merged_payload))),
|
||||
)
|
||||
)
|
||||
_add_eval_cases(learning_store, first_candidate.candidate_id)
|
||||
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
replay_runner = StubReplayRunner()
|
||||
report = asyncio.run(
|
||||
pipeline.evaluate_draft(
|
||||
first_candidate.candidate_id,
|
||||
draft.skill_name,
|
||||
draft.draft_id,
|
||||
provider_bundle=_bundle(StubProvider('{"cases": []}')),
|
||||
replay_runner=replay_runner,
|
||||
)
|
||||
)
|
||||
assert replay_runner.requests
|
||||
assert report.mode == "replay"
|
||||
assert report.preservation_report is not None
|
||||
assert report.preservation_report["mode"] == "plugin_three_way"
|
||||
assert report.preservation_report["passed"] is True
|
||||
|
||||
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
||||
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
|
||||
_, _, _, failing_ack_pipeline = _services(
|
||||
workspace,
|
||||
publish_observer=lambda draft, result: (_ for _ in ()).throw(RuntimeError("observer failed")),
|
||||
)
|
||||
published = failing_ack_pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
||||
assert published.version == "v0003"
|
||||
|
||||
pending_after_failed_observer = PluginStateStore(workspace).get_plugin("baoyu-comic")
|
||||
assert pending_after_failed_observer is not None
|
||||
assert pending_after_failed_observer.skills["baoyu-comic"].pending_candidate_id == first_candidate.candidate_id
|
||||
_services(workspace)[0].sync_enabled()
|
||||
|
||||
state = PluginStateStore(workspace).get_plugin("baoyu-comic")
|
||||
assert state is not None
|
||||
binding = state.skills["baoyu-comic"]
|
||||
assert binding.accepted_upstream_tree_hash == draft.provenance["new_upstream_tree_hash"]
|
||||
published_loaded = store.read_published_skill("baoyu-comic")
|
||||
assert published_loaded is not None
|
||||
assert published_loaded.version.provenance["new_upstream_tree_hash"] == draft.provenance["new_upstream_tree_hash"]
|
||||
|
||||
pipeline.rollback("baoyu-comic", "v0002", actor="tester", reason="verify rollback")
|
||||
assert store.read_published_skill("baoyu-comic").version.version == "v0002" # type: ignore[union-attr]
|
||||
assert _plugin_file_bytes(plugin_root) == plugin_files_after_update
|
||||
|
||||
_rewrite_plugin(plugin_root, version="1.2.0", template="panel-v3")
|
||||
_services(workspace)[0].sync_enabled()
|
||||
second_candidate = _only_open_candidate(learning_store)
|
||||
assert second_candidate.candidate_id != first_candidate.candidate_id
|
||||
|
||||
shutil.rmtree(plugin_root)
|
||||
_services(workspace)[0].sync_enabled()
|
||||
missing = PluginStateStore(workspace).get_plugin("baoyu-comic")
|
||||
assert missing is not None and missing.status == "missing"
|
||||
assert store.get_skill_spec("baoyu-comic").status == "active" # type: ignore[union-attr]
|
||||
|
||||
plugin_root = _write_plugin(
|
||||
workspace / "plugins",
|
||||
version="1.3.0",
|
||||
body="# Baoyu Comic\n\n## Workflow\n\nDraw better panels.\n\n## Safety\n\nDo not leak secrets.\n",
|
||||
template="panel-v4",
|
||||
)
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
list(executor.map(lambda _: _services(workspace)[0].sync_enabled(), range(2)))
|
||||
candidates = [
|
||||
item
|
||||
for item in learning_store.list_learning_candidates()
|
||||
if item.candidate_id != first_candidate.candidate_id
|
||||
]
|
||||
assert len([item for item in candidates if item.status == "open"]) == 1
|
||||
versions = store.list_versions("baoyu-comic")
|
||||
assert versions.count("v0003") == 1
|
||||
assert (plugin_root / "skills" / "baoyu-comic" / "templates" / "panel.txt").read_text(encoding="utf-8") == "panel-v4"
|
||||
|
||||
|
||||
def _services(
|
||||
workspace: Path,
|
||||
*,
|
||||
publish_observer=None,
|
||||
) -> tuple[PluginManager, SkillSpecStore, SkillLearningStore, SkillLearningPipelineService]:
|
||||
discovery = discover_plugins(workspace, search_paths=[])
|
||||
store = SkillSpecStore(workspace)
|
||||
learning_store = SkillLearningStore(workspace / "memory" / "skills")
|
||||
run_store = RunMemoryStore(workspace / "memory" / "runs")
|
||||
publisher = SkillPublisher(store)
|
||||
manager = PluginManager(
|
||||
workspace=workspace,
|
||||
manifests=discovery.manifests,
|
||||
discovery_errors=discovery.errors,
|
||||
state_store=PluginStateStore(workspace),
|
||||
skill_store=store,
|
||||
learning_store=learning_store,
|
||||
publisher=publisher,
|
||||
safety_checker=SkillDraftSafetyChecker(),
|
||||
write_lock=WorkspaceWriteLock(workspace),
|
||||
)
|
||||
pipeline = SkillLearningPipelineService(
|
||||
learning_store=learning_store,
|
||||
learning_service=SkillLearningService(
|
||||
run_store=run_store,
|
||||
learning_store=learning_store,
|
||||
draft_service=DraftService(store),
|
||||
evidence_selector=EvidenceSelector(run_store),
|
||||
synthesizer=SkillDraftSynthesizer(),
|
||||
),
|
||||
draft_service=DraftService(store),
|
||||
review_service=ReviewService(store),
|
||||
publisher=publisher,
|
||||
publish_observer=publish_observer if publish_observer is not None else manager.on_skill_published,
|
||||
)
|
||||
return manager, store, learning_store, pipeline
|
||||
|
||||
|
||||
def _write_plugin(root: Path, *, version: str, body: str, template: str) -> Path:
|
||||
plugin_root = root / "baoyu-comic"
|
||||
skill_root = plugin_root / "skills" / "baoyu-comic"
|
||||
skill_root.mkdir(parents=True, exist_ok=True)
|
||||
_write_skill(skill_root, body)
|
||||
(skill_root / "templates").mkdir(exist_ok=True)
|
||||
(skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8")
|
||||
(plugin_root / "beaver.plugin.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"schema_version": 1,
|
||||
"id": "baoyu-comic",
|
||||
"name": "Baoyu Comic",
|
||||
"version": version,
|
||||
"skills": [{"name": "baoyu-comic", "path": "skills/baoyu-comic"}],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return plugin_root
|
||||
|
||||
|
||||
def _rewrite_plugin(plugin_root: Path, *, version: str, body: str | None = None, template: str | None = None) -> None:
|
||||
manifest_path = plugin_root / "beaver.plugin.json"
|
||||
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
manifest["version"] = version
|
||||
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
|
||||
skill_root = plugin_root / "skills" / "baoyu-comic"
|
||||
if body is not None:
|
||||
_write_skill(skill_root, body)
|
||||
if template is not None:
|
||||
(skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8")
|
||||
|
||||
|
||||
def _write_skill(skill_root: Path, body: str) -> None:
|
||||
(skill_root / "SKILL.md").write_text(
|
||||
"---\nname: baoyu-comic\ndescription: Comic workflow\ntools: []\n---\n\n" + body,
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _bundle(provider: StubProvider) -> ProviderBundle:
|
||||
runtime = SimpleNamespace(model="stub", provider_name="stub")
|
||||
return ProviderBundle(main_runtime=runtime, main_provider=provider) # type: ignore[arg-type]
|
||||
|
||||
|
||||
def _only_open_candidate(learning_store: SkillLearningStore):
|
||||
open_candidates = learning_store.list_learning_candidates(status="open")
|
||||
assert len(open_candidates) == 1
|
||||
return open_candidates[0]
|
||||
|
||||
|
||||
def _add_eval_cases(learning_store: SkillLearningStore, candidate_id: str) -> None:
|
||||
candidate = next(item for item in learning_store.list_learning_candidates() if item.candidate_id == candidate_id)
|
||||
evidence = dict(candidate.evidence)
|
||||
evidence["eval_cases"] = [
|
||||
{
|
||||
"run_id": f"explicit:{index}",
|
||||
"task_text": f"Review comic panel safety case {index}",
|
||||
"baseline_skill_names": ["baoyu-comic"],
|
||||
"candidate_skill_name": "baoyu-comic",
|
||||
"accepted_score": 0.8,
|
||||
"validator": {
|
||||
"type": "final_answer_contains",
|
||||
"required_terms": ["panel", "safety"],
|
||||
"forbidden_terms": ["secret"],
|
||||
},
|
||||
}
|
||||
for index in range(10)
|
||||
]
|
||||
learning_store.update_learning_candidate(candidate_id, evidence=evidence)
|
||||
|
||||
|
||||
def _plugin_file_bytes(plugin_root: Path) -> dict[str, bytes]:
|
||||
return {
|
||||
path.relative_to(plugin_root).as_posix(): path.read_bytes()
|
||||
for path in sorted(plugin_root.rglob("*"))
|
||||
if path.is_file()
|
||||
}
|
||||
Reference in New Issue
Block a user