test(plugins): cover skill mirror lifecycle

This commit is contained in:
2026-06-16 12:24:19 +08:00
parent a9b830d11e
commit a65e59fcb6
7 changed files with 516 additions and 5 deletions

View File

@ -12,11 +12,13 @@ from beaver.engine.context import SkillContext
from beaver.engine.providers import ProviderBundle
from beaver.memory.runs import RunMemoryStore
from beaver.memory.skills import SkillDraftEvalReport, SkillLearningCandidate
from beaver.skills.catalog.utils import strip_frontmatter
from beaver.skills.learning.case_selection import select_replay_cases
from beaver.skills.learning.preservation import check_preservation
from beaver.skills.learning.preservation import check_plugin_merge_preservation, check_preservation
from beaver.skills.learning.replay import ReplayArmRequest, ReplayRunner
from beaver.skills.learning.surrogate import SurrogateToolEvaluator
from beaver.skills.specs import SkillDraft
from beaver.skills.specs.storage import SkillSpecStore
class SkillDraftEvaluator:
@ -28,9 +30,11 @@ class SkillDraftEvaluator:
*,
surrogate_evaluator: SurrogateToolEvaluator | None = None,
max_parallel_cases: int | None = None,
skill_store: SkillSpecStore | None = None,
) -> None:
self.run_store = run_store
self.surrogate_evaluator = surrogate_evaluator or SurrogateToolEvaluator()
self.skill_store = skill_store
configured_parallelism = max_parallel_cases
if configured_parallelism is None:
try:
@ -207,7 +211,7 @@ class SkillDraftEvaluator:
results = await asyncio.gather(*(evaluate_case(case) for case in replay_cases))
case_reports = [case_report for case_report, _ in results]
legacy_cases = [legacy_case for _, legacy_case in results]
preservation_report = _preservation_report(candidate, draft)
preservation_report = _preservation_report(candidate, draft, skill_store=self.skill_store)
return _report_from_case_reports(
candidate,
draft,
@ -343,9 +347,35 @@ def _draft_skill_context(draft: SkillDraft) -> SkillContext:
)
def _preservation_report(candidate: SkillLearningCandidate, draft: SkillDraft) -> dict | None:
def _preservation_report(
candidate: SkillLearningCandidate,
draft: SkillDraft,
*,
skill_store: SkillSpecStore | None = None,
) -> dict | None:
if candidate.kind not in {"revise_skill", "merge_skills"}:
return None
if candidate.kind != "plugin_skill_update" or skill_store is None:
return None
plugin_id = str(draft.provenance.get("plugin_id") or candidate.evidence.get("plugin_id") or "")
skill_name = str(draft.provenance.get("skill_name") or candidate.evidence.get("skill_name") or draft.skill_name)
local_version = str(draft.base_version or draft.provenance.get("local_version") or candidate.evidence.get("local_version") or "")
upstream_hash = str(
draft.provenance.get("new_upstream_tree_hash")
or candidate.evidence.get("new_upstream_tree_hash")
or ""
)
if not plugin_id or not skill_name or not local_version or not upstream_hash:
return None
local = skill_store.read_published_skill(skill_name, local_version)
upstream = skill_store.read_upstream_snapshot(skill_name, plugin_id, upstream_hash)
if local is None or upstream is None:
return None
return check_plugin_merge_preservation(
local_content=strip_frontmatter(local.content),
upstream_content=strip_frontmatter(upstream.content),
draft_content=draft.proposed_content,
merge_decisions=draft.provenance,
)
base_content = str(candidate.evidence.get("base_content") or "") if isinstance(candidate.evidence, dict) else ""
if not base_content.strip():
return None

View File

@ -315,7 +315,10 @@ class SkillLearningPipelineService:
) -> SkillDraftEvalReport:
draft = self.get_draft(skill_name, draft_id)
candidate = self.get_candidate(candidate_id)
evaluator = self.evaluator or SkillDraftEvaluator(self.learning_service.run_store)
evaluator = self.evaluator or SkillDraftEvaluator(
self.learning_service.run_store,
skill_store=self.draft_service.store,
)
report = await evaluator.evaluate(
candidate=candidate,
draft=draft,