Files
beaver_project/app-instance/backend/tests/integration/test_plugin_skill_lifecycle.py

327 lines
13 KiB
Python

from __future__ import annotations
import asyncio
import json
import shutil
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from types import SimpleNamespace
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.foundation.utils.file_lock import WorkspaceWriteLock
from beaver.memory.runs import RunMemoryStore
from beaver.memory.skills import SkillLearningStore
from beaver.plugins.discovery import discover_plugins
from beaver.plugins.skills import PluginManager
from beaver.plugins.state import PluginStateStore
from beaver.skills.drafts import DraftService
from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningPipelineService, SkillLearningService
from beaver.skills.learning.safety import SkillDraftSafetyChecker
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
class StubProvider(LLMProvider):
def __init__(self, content: str) -> None:
super().__init__()
self.content = content
self.calls: list[dict] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.calls.append({"messages": messages, "model": model})
return LLMResponse(content=self.content, provider_name="stub", model=model or "stub")
def get_default_model(self) -> str:
return "stub"
class StubReplayRunner:
def __init__(self) -> None:
self.requests: list[object] = []
async def run_arm(self, request):
self.requests.append(request)
return {
"case_id": request.case_id,
"arm": request.arm,
"session_id": "session-replay",
"run_id": f"{request.arm}-run",
"task_text": request.task_text,
"finish_reason": "stop",
"final_answer": "panel safety review complete",
"tool_calls": [
{
"tool_name": "write_file",
"mode": "executed",
"arguments": {"path": "storyboard.md"},
"result": {"success": True},
}
],
"artifacts": [],
"side_effects": [],
}
def test_plugin_skill_mirror_upgrade_and_recovery_lifecycle(tmp_path: Path) -> None:
workspace = tmp_path / "workspace"
plugin_root = _write_plugin(
workspace / "plugins",
version="1.0.0",
body="# Baoyu Comic\n\n## Workflow\n\nDraw panels.\n",
template="panel-v1",
)
manager, store, learning_store, pipeline = _services(workspace)
manager.enable("baoyu-comic")
initial = store.read_published_skill("baoyu-comic")
assert initial is not None
assert initial.version.version == "v0001"
local = pipeline.draft_service.create_revision_draft(
skill_name="baoyu-comic",
base_version="v0001",
proposed_content="# Baoyu Comic\n\n## Workflow\n\nDraw panels.\n\n## Local Review\n\nKeep user edits.\n",
proposed_frontmatter={"name": "baoyu-comic", "description": "Comic workflow", "tools": []},
created_by="tester",
reason="learned local revision",
)
pipeline.check_safety(local.skill_name, local.draft_id)
pipeline.submit_review(local.skill_name, local.draft_id, requested_by="tester")
pipeline.approve(local.skill_name, local.draft_id, reviewer="tester")
local_version = pipeline.publish(local.skill_name, local.draft_id, publisher="tester")
assert local_version.version == "v0002"
_rewrite_plugin(
plugin_root,
version="1.1.0",
body="# Baoyu Comic\n\n## Workflow\n\nDraw better panels.\n\n## Safety\n\nDo not leak secrets.\n",
template="panel-v2",
)
plugin_files_after_update = _plugin_file_bytes(plugin_root)
_services(workspace)[0].sync_enabled()
first_candidate = _only_open_candidate(learning_store)
assert first_candidate.evidence["merge_mode"] == "three_way"
merged_payload = {
"frontmatter": {"name": "baoyu-comic", "description": "Comic workflow", "tools": []},
"content": (
"# Baoyu Comic\n\n"
"## Workflow\n\nDraw better panels.\n\n"
"## Local Review\n\nKeep user edits.\n\n"
"## Safety\n\nDo not leak secrets.\n"
),
"change_reason": "Merge upstream safety guidance and preserve local review.",
"preserved_local_sections": ["Local Review"],
"adopted_upstream_sections": ["Workflow", "Safety"],
"resolved_conflicts": [],
"dropped_sections": [],
}
draft = asyncio.run(
pipeline.synthesize_draft(
first_candidate.candidate_id,
provider_bundle=_bundle(StubProvider(json.dumps(merged_payload))),
)
)
_add_eval_cases(learning_store, first_candidate.candidate_id)
pipeline.check_safety(draft.skill_name, draft.draft_id)
replay_runner = StubReplayRunner()
report = asyncio.run(
pipeline.evaluate_draft(
first_candidate.candidate_id,
draft.skill_name,
draft.draft_id,
provider_bundle=_bundle(StubProvider('{"cases": []}')),
replay_runner=replay_runner,
)
)
assert replay_runner.requests
assert report.mode == "replay"
assert report.preservation_report is not None
assert report.preservation_report["mode"] == "plugin_three_way"
assert report.preservation_report["passed"] is True
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
_, _, _, failing_ack_pipeline = _services(
workspace,
publish_observer=lambda draft, result: (_ for _ in ()).throw(RuntimeError("observer failed")),
)
published = failing_ack_pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
assert published.version == "v0003"
pending_after_failed_observer = PluginStateStore(workspace).get_plugin("baoyu-comic")
assert pending_after_failed_observer is not None
assert pending_after_failed_observer.skills["baoyu-comic"].pending_candidate_id == first_candidate.candidate_id
_services(workspace)[0].sync_enabled()
state = PluginStateStore(workspace).get_plugin("baoyu-comic")
assert state is not None
binding = state.skills["baoyu-comic"]
assert binding.accepted_upstream_tree_hash == draft.provenance["new_upstream_tree_hash"]
published_loaded = store.read_published_skill("baoyu-comic")
assert published_loaded is not None
assert published_loaded.version.provenance["new_upstream_tree_hash"] == draft.provenance["new_upstream_tree_hash"]
pipeline.rollback("baoyu-comic", "v0002", actor="tester", reason="verify rollback")
assert store.read_published_skill("baoyu-comic").version.version == "v0002" # type: ignore[union-attr]
assert _plugin_file_bytes(plugin_root) == plugin_files_after_update
_rewrite_plugin(plugin_root, version="1.2.0", template="panel-v3")
_services(workspace)[0].sync_enabled()
second_candidate = _only_open_candidate(learning_store)
assert second_candidate.candidate_id != first_candidate.candidate_id
shutil.rmtree(plugin_root)
_services(workspace)[0].sync_enabled()
missing = PluginStateStore(workspace).get_plugin("baoyu-comic")
assert missing is not None and missing.status == "missing"
assert store.get_skill_spec("baoyu-comic").status == "active" # type: ignore[union-attr]
plugin_root = _write_plugin(
workspace / "plugins",
version="1.3.0",
body="# Baoyu Comic\n\n## Workflow\n\nDraw better panels.\n\n## Safety\n\nDo not leak secrets.\n",
template="panel-v4",
)
with ThreadPoolExecutor(max_workers=2) as executor:
list(executor.map(lambda _: _services(workspace)[0].sync_enabled(), range(2)))
candidates = [
item
for item in learning_store.list_learning_candidates()
if item.candidate_id != first_candidate.candidate_id
]
assert len([item for item in candidates if item.status == "open"]) == 1
versions = store.list_versions("baoyu-comic")
assert versions.count("v0003") == 1
assert (plugin_root / "skills" / "baoyu-comic" / "templates" / "panel.txt").read_text(encoding="utf-8") == "panel-v4"
def _services(
workspace: Path,
*,
publish_observer=None,
) -> tuple[PluginManager, SkillSpecStore, SkillLearningStore, SkillLearningPipelineService]:
discovery = discover_plugins(workspace, search_paths=[])
store = SkillSpecStore(workspace)
learning_store = SkillLearningStore(workspace / "memory" / "skills")
run_store = RunMemoryStore(workspace / "memory" / "runs")
publisher = SkillPublisher(store)
manager = PluginManager(
workspace=workspace,
manifests=discovery.manifests,
discovery_errors=discovery.errors,
state_store=PluginStateStore(workspace),
skill_store=store,
learning_store=learning_store,
publisher=publisher,
safety_checker=SkillDraftSafetyChecker(),
write_lock=WorkspaceWriteLock(workspace),
)
pipeline = SkillLearningPipelineService(
learning_store=learning_store,
learning_service=SkillLearningService(
run_store=run_store,
learning_store=learning_store,
draft_service=DraftService(store),
evidence_selector=EvidenceSelector(run_store),
synthesizer=SkillDraftSynthesizer(),
),
draft_service=DraftService(store),
review_service=ReviewService(store),
publisher=publisher,
publish_observer=publish_observer if publish_observer is not None else manager.on_skill_published,
)
return manager, store, learning_store, pipeline
def _write_plugin(root: Path, *, version: str, body: str, template: str) -> Path:
plugin_root = root / "baoyu-comic"
skill_root = plugin_root / "skills" / "baoyu-comic"
skill_root.mkdir(parents=True, exist_ok=True)
_write_skill(skill_root, body)
(skill_root / "templates").mkdir(exist_ok=True)
(skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8")
(plugin_root / "beaver.plugin.json").write_text(
json.dumps(
{
"schema_version": 1,
"id": "baoyu-comic",
"name": "Baoyu Comic",
"version": version,
"skills": [{"name": "baoyu-comic", "path": "skills/baoyu-comic"}],
}
),
encoding="utf-8",
)
return plugin_root
def _rewrite_plugin(plugin_root: Path, *, version: str, body: str | None = None, template: str | None = None) -> None:
manifest_path = plugin_root / "beaver.plugin.json"
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
manifest["version"] = version
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
skill_root = plugin_root / "skills" / "baoyu-comic"
if body is not None:
_write_skill(skill_root, body)
if template is not None:
(skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8")
def _write_skill(skill_root: Path, body: str) -> None:
(skill_root / "SKILL.md").write_text(
"---\nname: baoyu-comic\ndescription: Comic workflow\ntools: []\n---\n\n" + body,
encoding="utf-8",
)
def _bundle(provider: StubProvider) -> ProviderBundle:
runtime = SimpleNamespace(model="stub", provider_name="stub")
return ProviderBundle(main_runtime=runtime, main_provider=provider) # type: ignore[arg-type]
def _only_open_candidate(learning_store: SkillLearningStore):
open_candidates = learning_store.list_learning_candidates(status="open")
assert len(open_candidates) == 1
return open_candidates[0]
def _add_eval_cases(learning_store: SkillLearningStore, candidate_id: str) -> None:
candidate = next(item for item in learning_store.list_learning_candidates() if item.candidate_id == candidate_id)
evidence = dict(candidate.evidence)
evidence["eval_cases"] = [
{
"run_id": f"explicit:{index}",
"task_text": f"Review comic panel safety case {index}",
"baseline_skill_names": ["baoyu-comic"],
"candidate_skill_name": "baoyu-comic",
"accepted_score": 0.8,
"validator": {
"type": "final_answer_contains",
"required_terms": ["panel", "safety"],
"forbidden_terms": ["secret"],
},
}
for index in range(10)
]
learning_store.update_learning_candidate(candidate_id, evidence=evidence)
def _plugin_file_bytes(plugin_root: Path) -> dict[str, bytes]:
return {
path.relative_to(plugin_root).as_posix(): path.read_bytes()
for path in sorted(plugin_root.rglob("*"))
if path.is_file()
}