diff --git a/app-instance/backend/beaver/memory/skills/store.py b/app-instance/backend/beaver/memory/skills/store.py index 98424b5..0b4a384 100644 --- a/app-instance/backend/beaver/memory/skills/store.py +++ b/app-instance/backend/beaver/memory/skills/store.py @@ -33,7 +33,17 @@ class SkillLearningStore: self.eval_reports_dir = self.root / "eval-reports" def record_learning_candidate(self, candidate: SkillLearningCandidate) -> None: - self.record_learning_candidate_if_absent(candidate) + normalized = SkillLearningCandidate.from_dict(candidate.to_dict()) + self._append_jsonl(self.candidates_path, normalized.to_dict()) + self.append_audit_event( + normalized.candidate_id, + "candidate_created", + { + "kind": normalized.kind, + "status": normalized.status, + "reason": normalized.reason, + }, + ) def record_learning_candidate_if_absent( self, diff --git a/app-instance/backend/beaver/plugins/__init__.py b/app-instance/backend/beaver/plugins/__init__.py index da463f6..5dfe36a 100644 --- a/app-instance/backend/beaver/plugins/__init__.py +++ b/app-instance/backend/beaver/plugins/__init__.py @@ -13,7 +13,6 @@ from .models import ( PluginState, ) from .state import PluginStateStore -from .skills import PluginManager __all__ = [ "PluginDiscoveryError", @@ -25,7 +24,6 @@ __all__ = [ "PluginSkillTreeDigest", "PluginState", "PluginStateStore", - "PluginManager", "hash_plugin_skill_tree", "load_plugin_manifest", ] diff --git a/app-instance/backend/beaver/plugins/skills.py b/app-instance/backend/beaver/plugins/skills.py index 20de778..f02bf09 100644 --- a/app-instance/backend/beaver/plugins/skills.py +++ b/app-instance/backend/beaver/plugins/skills.py @@ -109,6 +109,77 @@ class PluginManager: results[state.plugin_id] = self._sync_plugin(state, manifest) return results + def pause(self, plugin_id: str) -> PluginState: + with self.write_lock.acquire(timeout_seconds=10): + state = self._require_state(plugin_id) + state.updates_paused = True + self.state_store.upsert_plugin(state) + return state + + def resume(self, plugin_id: str) -> PluginState: + with self.write_lock.acquire(timeout_seconds=10): + state = self._require_state(plugin_id) + state.updates_paused = False + self.state_store.upsert_plugin(state) + return self.sync_enabled().get(plugin_id) or self._require_state(plugin_id) + + def disable(self, plugin_id: str, *, disable_linked_skills: bool) -> PluginState: + if not disable_linked_skills: + raise ValueError("disable_linked_skills confirmation is required") + with self.write_lock.acquire(timeout_seconds=10): + state = self._require_state(plugin_id) + for skill_name in list(state.skills): + self.publisher.disable(skill_name, actor="plugin-manager", reason=f"plugin_disabled:{plugin_id}") + state.skills[skill_name].status = "disabled" + state.enabled = False + state.updates_paused = True + state.status = "disabled" + self.state_store.upsert_plugin(state) + return state + + def adopt(self, plugin_id: str, skill_name: str) -> SkillSpec: + with self.write_lock.acquire(timeout_seconds=10): + state = self._require_state(plugin_id) + if skill_name not in state.skills: + raise ValueError(f"Plugin skill binding not found: {plugin_id}/{skill_name}") + spec = self.skill_store.get_skill_spec(skill_name) + if spec is None: + raise ValueError(f"Skill spec not found: {skill_name}") + spec.source_kind = "managed" + spec.status = SkillStatus.ACTIVE.value + spec.updated_at = _utc_now() + marker = f"adopted_from_plugin:{plugin_id}" + if marker not in spec.lineage: + spec.lineage.append(marker) + self.skill_store.write_skill_spec(spec) + del state.skills[skill_name] + if not state.skills: + state.status = "adopted" + state.enabled = False + self.state_store.upsert_plugin(state) + self.publisher._refresh_indexes(skill_name, spec.status) + return spec + + def on_skill_published(self, draft: SkillDraft, published: SkillVersion | SkillSpec) -> None: + if draft.proposal_kind != "plugin_skill_update" or not isinstance(published, SkillVersion): + return + plugin_id = str(draft.provenance.get("plugin_id") or "") + skill_name = str(draft.provenance.get("skill_name") or draft.skill_name) + tree_hash = str(draft.provenance.get("new_upstream_tree_hash") or "") + if not plugin_id or not skill_name or not tree_hash: + raise ValueError("Plugin publish acknowledgement is missing provenance") + state = self._require_state(plugin_id) + binding = state.skills.get(skill_name) or PluginSkillBinding() + binding.accepted_upstream_tree_hash = tree_hash + binding.observed_upstream_tree_hash = tree_hash + binding.accepted_beaver_version = published.version + binding.current_beaver_version = published.version + binding.pending_candidate_id = None + binding.status = "synced" + state.skills[skill_name] = binding + state.status = "synced" + self.state_store.upsert_plugin(state) + def _prepare_initial_mirror( self, manifest: PluginManifest, @@ -174,6 +245,12 @@ class PluginManager: ) return prepared + def _require_state(self, plugin_id: str) -> PluginState: + state = self.state_store.get_plugin(plugin_id) + if state is None: + raise ValueError(f"Unknown plugin state: {plugin_id}") + return state + def _sync_plugin(self, state: PluginState, manifest: PluginManifest) -> PluginState: transaction = PluginSkillTransaction(self.workspace) try: diff --git a/app-instance/backend/beaver/plugins/tree_merge.py b/app-instance/backend/beaver/plugins/tree_merge.py new file mode 100644 index 0000000..34e3d9a --- /dev/null +++ b/app-instance/backend/beaver/plugins/tree_merge.py @@ -0,0 +1,65 @@ +"""Deterministic path-level three-way merge for plugin supporting files.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass(frozen=True, slots=True) +class SupportingFileDecision: + path: str + source: str + + def to_dict(self) -> dict[str, Any]: + return {"path": self.path, "source": self.source} + + +@dataclass(frozen=True, slots=True) +class SupportingFileConflict: + path: str + reason: str + + def to_dict(self) -> dict[str, Any]: + return {"path": self.path, "reason": self.reason} + + +@dataclass(frozen=True, slots=True) +class SupportingFileMergePlan: + files: dict[str, SupportingFileDecision] = field(default_factory=dict) + conflicts: list[SupportingFileConflict] = field(default_factory=list) + + def to_dict(self) -> dict[str, Any]: + return { + "files": {path: decision.to_dict() for path, decision in sorted(self.files.items())}, + "conflicts": [conflict.to_dict() for conflict in self.conflicts], + } + + +def merge_supporting_file_trees( + *, + base: dict[str, Any], + local: dict[str, Any], + upstream: dict[str, Any], +) -> SupportingFileMergePlan: + decisions: dict[str, SupportingFileDecision] = {} + conflicts: list[SupportingFileConflict] = [] + for path in sorted({*base.keys(), *local.keys(), *upstream.keys()} - {"SKILL.md"}): + b = base.get(path) + l = local.get(path) + u = upstream.get(path) + if l == u and l is not None: + decisions[path] = SupportingFileDecision(path=path, source="local") + elif l == b and u is not None: + decisions[path] = SupportingFileDecision(path=path, source="upstream") + elif u == b and l is not None: + decisions[path] = SupportingFileDecision(path=path, source="local") + elif b is None and l is None and u is not None: + decisions[path] = SupportingFileDecision(path=path, source="upstream") + elif b is None and u is None and l is not None: + decisions[path] = SupportingFileDecision(path=path, source="local") + elif b is not None and l is None and u is None: + continue + else: + conflicts.append(SupportingFileConflict(path=path, reason="divergent supporting-file change")) + return SupportingFileMergePlan(files=decisions, conflicts=conflicts) diff --git a/app-instance/backend/beaver/skills/drafts/service.py b/app-instance/backend/beaver/skills/drafts/service.py index 546f939..5089190 100644 --- a/app-instance/backend/beaver/skills/drafts/service.py +++ b/app-instance/backend/beaver/skills/drafts/service.py @@ -94,6 +94,34 @@ class DraftService: self.store.write_draft(draft) return draft + def create_plugin_update_draft( + self, + *, + skill_name: str, + base_version: str, + proposed_content: str, + proposed_frontmatter: dict, + created_by: str, + reason: str, + provenance: dict, + evidence_refs: list[dict] | None = None, + ) -> SkillDraft: + draft = SkillDraft( + draft_id=uuid4().hex, + skill_name=skill_name, + base_version=base_version, + proposed_content=proposed_content, + proposed_frontmatter=dict(proposed_frontmatter), + created_at=_utc_now(), + created_by=created_by, + reason=reason, + evidence_refs=list(evidence_refs or []), + proposal_kind="plugin_skill_update", + provenance=dict(provenance), + ) + self.store.write_draft(draft) + return draft + def create_retire_proposal( self, *, diff --git a/app-instance/backend/beaver/skills/learning/pipeline.py b/app-instance/backend/beaver/skills/learning/pipeline.py index b7e38ce..aef1dfa 100644 --- a/app-instance/backend/beaver/skills/learning/pipeline.py +++ b/app-instance/backend/beaver/skills/learning/pipeline.py @@ -35,6 +35,7 @@ class SkillLearningPipelineService: publisher: SkillPublisher, safety_checker: SkillDraftSafetyChecker | None = None, evaluator: SkillDraftEvaluator | None = None, + publish_observer: Callable[[SkillDraft, SkillVersion | SkillSpec], None] | None = None, ) -> None: self.learning_store = learning_store self.learning_service = learning_service @@ -43,6 +44,7 @@ class SkillLearningPipelineService: self.publisher = publisher self.safety_checker = safety_checker or SkillDraftSafetyChecker() self.evaluator = evaluator + self.publish_observer = publish_observer def list_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]: return self.learning_store.list_learning_candidates(status=status) @@ -238,6 +240,16 @@ class SkillLearningPipelineService: else: result = self.publisher.publish(skill_name, draft_id, publisher=publisher, notes=notes) self._mark_candidate_by_draft(skill_name, draft_id, "published", "published") + if self.publish_observer is not None: + try: + self.publish_observer(draft, result) + except Exception as exc: # noqa: BLE001 - observer is best effort after successful publish. + candidate = self._candidate_by_draft(skill_name, draft_id) + self.learning_store.append_audit_event( + candidate.candidate_id if candidate is not None else f"draft:{draft_id}", + "plugin_publish_ack_failed", + {"error": str(exc), "skill_name": skill_name, "draft_id": draft_id}, + ) return result def rollback( @@ -391,6 +403,14 @@ class SkillLearningPipelineService: preservation = eval_report.preservation_report or {} if preservation.get("passed") is False: raise ValueError("Draft preservation check did not pass") + if draft.proposal_kind == "plugin_skill_update": + if draft.provenance.get("merge_mode") == "three_way" and preservation.get("mode") != "plugin_three_way": + raise ValueError("Plugin update requires a three-way preservation report") + if preservation.get("unresolved_conflicts"): + raise ValueError("Plugin update has unresolved merge conflicts") + supporting_plan = draft.provenance.get("supporting_file_plan") + if isinstance(supporting_plan, dict) and supporting_plan.get("conflicts"): + raise ValueError("Plugin update has unresolved supporting-file conflicts") def _mark_candidate_by_draft( self, diff --git a/app-instance/backend/beaver/skills/learning/preservation.py b/app-instance/backend/beaver/skills/learning/preservation.py index f1c1e75..c282125 100644 --- a/app-instance/backend/beaver/skills/learning/preservation.py +++ b/app-instance/backend/beaver/skills/learning/preservation.py @@ -32,6 +32,30 @@ def check_preservation(*, base_content: str, draft_content: str) -> dict[str, An } +def check_plugin_merge_preservation( + *, + local_content: str, + upstream_content: str, + draft_content: str, + merge_decisions: dict[str, Any], +) -> dict[str, Any]: + local = check_preservation(base_content=local_content, draft_content=draft_content) + upstream = check_preservation(base_content=upstream_content, draft_content=draft_content) + unresolved = [str(item) for item in merge_decisions.get("unresolved_conflicts") or []] + safety_sections_missing = _important_sections_missing(upstream, local) + passed = bool(local.get("passed")) and bool(upstream.get("passed")) and not unresolved and not safety_sections_missing + return { + "mode": "plugin_three_way", + "passed": passed, + "risk_level": "high" if not passed else "low", + "local": local, + "upstream": upstream, + "unresolved_conflicts": unresolved, + "safety_sections_missing": safety_sections_missing, + "resolved_conflicts": [str(item) for item in merge_decisions.get("resolved_conflicts") or []], + } + + def _sections(content: str) -> dict[str, str]: current = "body" sections: dict[str, list[str]] = {current: []} @@ -51,3 +75,13 @@ def _sections(content: str) -> dict[str, str]: def _normalize(value: str) -> str: return re.sub(r"\s+", " ", value or "").strip().lower() + + +def _important_sections_missing(*reports: dict[str, Any]) -> list[str]: + important = {"safety", "required tools", "required tool", "tools"} + missing: list[str] = [] + for report in reports: + for section in report.get("dropped_sections") or []: + if str(section).strip().lower() in important and str(section) not in missing: + missing.append(str(section)) + return missing diff --git a/app-instance/backend/beaver/skills/learning/service.py b/app-instance/backend/beaver/skills/learning/service.py index 83d80c6..ef12a67 100644 --- a/app-instance/backend/beaver/skills/learning/service.py +++ b/app-instance/backend/beaver/skills/learning/service.py @@ -5,6 +5,7 @@ from __future__ import annotations from dataclasses import dataclass, field from datetime import datetime, timedelta, timezone from itertools import combinations +from pathlib import Path import re from typing import Any from uuid import uuid4 @@ -14,9 +15,12 @@ from beaver.memory.runs.models import RunRecord, SkillEffectRecord from beaver.memory.runs.store import RunMemoryStore from beaver.memory.skills.models import SkillLearningCandidate, SkillPerformanceSnapshot from beaver.memory.skills.store import SkillLearningStore +from beaver.plugins.hashing import hash_plugin_skill_tree +from beaver.plugins.tree_merge import merge_supporting_file_trees from beaver.skills.drafts.service import DraftService from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector from beaver.skills.learning.synthesizer import SkillDraftSynthesizer +from beaver.skills.catalog.utils import parse_frontmatter from beaver.skills.specs import SkillActivationReceipt @@ -179,6 +183,8 @@ class SkillLearningService: candidate = candidates.get(candidate_id) if candidate is None: raise ValueError(f"Unknown learning candidate: {candidate_id}") + if candidate.kind == "plugin_skill_update": + return await self._synthesize_plugin_update(candidate, provider_bundle) if candidate.kind == "retire_skill": target_skill = candidate.related_skill_names[0] return self.draft_service.create_retire_proposal( @@ -242,6 +248,85 @@ class SkillLearningService: evidence_refs=[{"run_id": item} for item in candidate.source_run_ids], ) + async def _synthesize_plugin_update(self, candidate: SkillLearningCandidate, provider_bundle: ProviderBundle) -> Any: + evidence = dict(candidate.evidence) + skill_name = str(evidence.get("skill_name") or (candidate.related_skill_names[0] if candidate.related_skill_names else "")) + plugin_id = str(evidence.get("plugin_id") or "") + new_upstream_tree_hash = str(evidence.get("new_upstream_tree_hash") or "") + local_version = str(evidence.get("local_version") or "") + merge_mode = str(evidence.get("merge_mode") or "") + if not skill_name or not plugin_id or not new_upstream_tree_hash or not local_version: + raise ValueError("Plugin update candidate is missing required evidence references") + new_upstream = self.draft_service.store.read_upstream_snapshot( + skill_name, + plugin_id, + new_upstream_tree_hash, + ) + if new_upstream is None: + raise ValueError("Plugin update references a missing upstream snapshot") + frontmatter, body = parse_frontmatter(new_upstream.content) + if merge_mode == "fast_forward": + return self.draft_service.create_plugin_update_draft( + skill_name=skill_name, + base_version=local_version, + proposed_content=body.strip(), + proposed_frontmatter=frontmatter, + created_by="learning-loop", + reason=candidate.reason, + provenance={ + **evidence, + "proposal_kind": "plugin_skill_update", + }, + evidence_refs=[], + ) + base_upstream_tree_hash = str(evidence.get("base_upstream_tree_hash") or "") + old_upstream = self.draft_service.store.read_upstream_snapshot(skill_name, plugin_id, base_upstream_tree_hash) + current_local = self.draft_service.store.read_published_skill(skill_name, local_version) + if old_upstream is None: + raise ValueError("Plugin update references a missing base upstream snapshot") + if current_local is None: + raise ValueError("Plugin update references a missing local skill version") + packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids) + provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider + model = ( + provider_bundle.auxiliary_runtime.model + if provider_bundle.auxiliary_runtime is not None + else provider_bundle.main_runtime.model + ) + local_root = self.draft_service.store.root / skill_name / "versions" / local_version + file_plan = merge_supporting_file_trees( + base=_digest_map(old_upstream.root), + local=_digest_map(local_root), + upstream=_digest_map(new_upstream.root), + ) + payload = await self.synthesizer.synthesize_plugin_update( + candidate, + packet, + provider, + model, + old_upstream={"content": old_upstream.content, "frontmatter": old_upstream.snapshot.frontmatter}, + current_local={"content": current_local.content, "frontmatter": current_local.version.frontmatter}, + new_upstream={"content": new_upstream.content, "frontmatter": frontmatter}, + ) + return self.draft_service.create_plugin_update_draft( + skill_name=skill_name, + base_version=local_version, + proposed_content=payload["content"], + proposed_frontmatter=payload["frontmatter"], + created_by="learning-loop", + reason=payload["change_reason"] or candidate.reason, + provenance={ + **evidence, + "proposal_kind": "plugin_skill_update", + "preserved_local_sections": payload.get("preserved_local_sections", []), + "adopted_upstream_sections": payload.get("adopted_upstream_sections", []), + "resolved_conflicts": payload.get("resolved_conflicts", []), + "dropped_sections": payload.get("dropped_sections", []), + "supporting_file_plan": file_plan.to_dict(), + }, + evidence_refs=[], + ) + def _base_skill_snapshot(self, skill_name: str, version: str | None) -> dict[str, Any] | None: loaded = self.draft_service.store.read_published_skill(skill_name, version) if loaded is None: @@ -515,3 +600,16 @@ class SkillLearningService: if parsed.tzinfo is None: return parsed.replace(tzinfo=timezone.utc) return parsed.astimezone(timezone.utc) + + +def _digest_map(root: Path) -> dict[str, dict[str, Any]]: + digest = hash_plugin_skill_tree(root) + return { + item.path: { + "content_hash": item.content_hash, + "executable": item.executable, + "size": item.size, + } + for item in digest.files + if item.path not in {"SKILL.md", "version.json", "upstream.json"} + } diff --git a/app-instance/backend/beaver/skills/learning/synthesizer.py b/app-instance/backend/beaver/skills/learning/synthesizer.py index b11c539..c13754c 100644 --- a/app-instance/backend/beaver/skills/learning/synthesizer.py +++ b/app-instance/backend/beaver/skills/learning/synthesizer.py @@ -41,6 +41,55 @@ class SkillDraftSynthesizer: ) -> dict[str, Any]: return await self._synthesize(candidate, evidence_packet, provider, model, "merge", base_skill=base_skill) + async def synthesize_plugin_update( + self, + candidate: SkillLearningCandidate, + evidence_packet: EvidencePacket, + provider: LLMProvider, + model: str, + *, + old_upstream: dict[str, Any], + current_local: dict[str, Any], + new_upstream: dict[str, Any], + ) -> dict[str, Any]: + prompt = self._build_plugin_update_prompt( + candidate, + evidence_packet, + old_upstream=old_upstream, + current_local=current_local, + new_upstream=new_upstream, + ) + response = await provider.chat( + messages=[ + { + "role": "system", + "content": ( + "You merge Beaver plugin skill updates. Return JSON only with keys: " + "frontmatter, content, change_reason, preserved_local_sections, " + "adopted_upstream_sections, resolved_conflicts, dropped_sections. " + "Preserve valid local learning, adopt upstream fixes and safety changes, " + "do not concatenate duplicate sections, and list every intentional drop." + ), + }, + {"role": "user", "content": prompt}, + ], + tools=None, + model=model, + max_tokens=4096, + temperature=0, + ) + payload = self._parse_plugin_update_payload(response.content or "") + if payload: + return payload + fallback = self._fallback_payload(candidate, evidence_packet, "plugin_update") + return { + **fallback, + "preserved_local_sections": [], + "adopted_upstream_sections": [], + "resolved_conflicts": [], + "dropped_sections": [], + } + async def _synthesize( self, candidate: SkillLearningCandidate, @@ -119,6 +168,28 @@ class SkillDraftSynthesizer: + "\nThe JSON may include preserved_sections, changed_sections, and dropped_sections arrays." ) + @staticmethod + def _build_plugin_update_prompt( + candidate: SkillLearningCandidate, + evidence_packet: EvidencePacket, + *, + old_upstream: dict[str, Any], + current_local: dict[str, Any], + new_upstream: dict[str, Any], + ) -> str: + return ( + f"Candidate kind: {candidate.kind}\n" + f"Reason: {candidate.reason}\n" + f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries or ["No historical run evidence."]) + + "\n\nOLD UPSTREAM (merge base B):\n" + + str(old_upstream.get("content") or "") + + "\n\nCURRENT LOCAL (Beaver learned version L):\n" + + str(current_local.get("content") or "") + + "\n\nNEW UPSTREAM (plugin update U):\n" + + str(new_upstream.get("content") or "") + + "\n\nReturn JSON only. Preserve useful CURRENT LOCAL learning and adopt important NEW UPSTREAM changes." + ) + @staticmethod def _parse_payload(content: str) -> dict[str, Any]: cleaned = content.strip() @@ -145,6 +216,33 @@ class SkillDraftSynthesizer: "dropped_sections": _coerce_string_list(payload.get("dropped_sections")), } + @staticmethod + def _parse_plugin_update_payload(content: str) -> dict[str, Any]: + cleaned = content.strip() + if cleaned.startswith("```"): + lines = cleaned.splitlines() + if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"): + cleaned = "\n".join(lines[1:-1]).strip() + try: + payload = json.loads(cleaned) + except json.JSONDecodeError: + return {} + if not isinstance(payload, dict): + return {} + frontmatter = payload.get("frontmatter") + content_value = payload.get("content") + if not isinstance(frontmatter, dict) or not isinstance(content_value, str): + return {} + return { + "frontmatter": frontmatter, + "content": content_value.strip(), + "change_reason": str(payload.get("change_reason") or ""), + "preserved_local_sections": _coerce_string_list(payload.get("preserved_local_sections")), + "adopted_upstream_sections": _coerce_string_list(payload.get("adopted_upstream_sections")), + "resolved_conflicts": _coerce_string_list(payload.get("resolved_conflicts")), + "dropped_sections": _coerce_string_list(payload.get("dropped_sections")), + } + @staticmethod def _normalize_payload(payload: dict[str, Any], evidence_packet: EvidencePacket) -> dict[str, Any]: frontmatter = normalize_skill_frontmatter( diff --git a/app-instance/backend/beaver/skills/publisher/service.py b/app-instance/backend/beaver/skills/publisher/service.py index e77d077..1654ecc 100644 --- a/app-instance/backend/beaver/skills/publisher/service.py +++ b/app-instance/backend/beaver/skills/publisher/service.py @@ -8,6 +8,7 @@ from pathlib import Path from beaver.skills.catalog.utils import strip_frontmatter from beaver.skills.specs import SkillDraft, SkillReviewState, SkillSpec, SkillSpecStore, SkillStatus, SkillVersion from beaver.skills.specs.serialization import canonical_hash, normalize_frontmatter, summarize_skill_content +from beaver.plugins.hashing import hash_plugin_skill_tree class SkillPublisher: @@ -40,6 +41,7 @@ class SkillPublisher: summary=summarize_skill_content(body), tool_hints=self.store._extract_tool_hints(normalize_frontmatter(draft.proposed_frontmatter)), provenance={ + **dict(draft.provenance), "draft_id": draft_id, "proposal_kind": draft.proposal_kind, "trigger_run_id": draft.trigger_run_id, @@ -47,7 +49,13 @@ class SkillPublisher: }, ) self.store.write_skill_version(version, content) - self._copy_uploaded_supporting_files(draft, next_version) + if draft.proposal_kind == "plugin_skill_update": + self._copy_plugin_update_supporting_files(draft, next_version) + version_dir = self.store.root / draft.skill_name / "versions" / next_version + version.tree_hash = hash_plugin_skill_tree(version_dir).skill_tree_hash + self.store._write_json(version_dir / "version.json", version.to_dict()) + else: + self._copy_uploaded_supporting_files(draft, next_version) self.store.set_current_version(skill_name, next_version) spec = self.store.get_skill_spec(skill_name) @@ -194,6 +202,25 @@ class SkillPublisher: target.parent.mkdir(parents=True, exist_ok=True) shutil.copyfile(source, target) + def _copy_plugin_update_supporting_files(self, draft: SkillDraft, version: str) -> None: + plugin_id = str(draft.provenance.get("plugin_id") or "") + tree_hash = str(draft.provenance.get("new_upstream_tree_hash") or "") + if not plugin_id or not tree_hash: + raise ValueError("Plugin update draft is missing upstream provenance") + upstream = self.store.read_upstream_snapshot(draft.skill_name, plugin_id, tree_hash) + if upstream is None: + raise ValueError("Plugin update upstream snapshot is missing") + target_root = self.store.root / draft.skill_name / "versions" / version + for source in sorted(upstream.root.rglob("*"), key=lambda item: item.relative_to(upstream.root).as_posix()): + if not source.is_file() or source.is_symlink(): + continue + relative = source.relative_to(upstream.root) + if relative.as_posix() in {"SKILL.md", "upstream.json", "version.json"}: + continue + target = target_root / relative + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copyfile(source, target) + def _require_draft(self, skill_name: str, draft_id: str) -> SkillDraft: draft = self.store.read_draft(skill_name, draft_id) if draft is None: diff --git a/app-instance/backend/beaver/skills/specs/models.py b/app-instance/backend/beaver/skills/specs/models.py index 1172b55..fa1af5a 100644 --- a/app-instance/backend/beaver/skills/specs/models.py +++ b/app-instance/backend/beaver/skills/specs/models.py @@ -180,6 +180,7 @@ class SkillDraft: status: str = SkillReviewState.DRAFT.value evidence_refs: list[dict[str, Any]] = field(default_factory=list) proposal_kind: str = "revise_skill" + provenance: dict[str, Any] = field(default_factory=dict) def to_dict(self) -> dict[str, Any]: return { @@ -196,6 +197,7 @@ class SkillDraft: "status": self.status, "evidence_refs": list(self.evidence_refs), "proposal_kind": self.proposal_kind, + "provenance": dict(self.provenance), } @classmethod @@ -214,6 +216,7 @@ class SkillDraft: status=str(payload.get("status") or SkillReviewState.DRAFT.value), evidence_refs=list(payload.get("evidence_refs") or []), proposal_kind=str(payload.get("proposal_kind") or "revise_skill"), + provenance=dict(payload.get("provenance") or {}), ) diff --git a/app-instance/backend/tests/unit/test_plugin_skill_learning.py b/app-instance/backend/tests/unit/test_plugin_skill_learning.py new file mode 100644 index 0000000..aa5606f --- /dev/null +++ b/app-instance/backend/tests/unit/test_plugin_skill_learning.py @@ -0,0 +1,239 @@ +from __future__ import annotations + +import asyncio +import json +from pathlib import Path +from types import SimpleNamespace + +from beaver.engine.providers.base import LLMProvider, LLMResponse +from beaver.engine.providers.factory import ProviderBundle +from beaver.foundation.utils.file_lock import WorkspaceWriteLock +from beaver.memory.runs import RunMemoryStore +from beaver.memory.skills import SkillLearningCandidate, SkillLearningStore +from beaver.plugins.discovery import discover_plugins +from beaver.plugins.skills import PluginManager +from beaver.plugins.state import PluginStateStore +from beaver.plugins.tree_merge import merge_supporting_file_trees +from beaver.skills.drafts import DraftService +from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningService +from beaver.skills.learning.safety import SkillDraftSafetyChecker +from beaver.skills.publisher import SkillPublisher +from beaver.skills.specs import SkillDraft, SkillReviewState, SkillSpecStore + + +class CountingProvider(LLMProvider): + def __init__(self, content: str = "{}") -> None: + super().__init__() + self.content = content + self.calls: list[dict] = [] + + async def chat( + self, + messages: list[dict], + tools: list[dict] | None = None, + model: str | None = None, + max_tokens: int = 4096, + temperature: float = 0.7, + thinking_enabled: bool | None = None, + ) -> LLMResponse: + self.calls.append({"messages": messages, "model": model}) + return LLMResponse(content=self.content) + + def get_default_model(self) -> str: + return "stub" + + +def _bundle(provider: CountingProvider) -> ProviderBundle: + runtime = SimpleNamespace(model="stub", provider_name="stub") + return ProviderBundle(main_runtime=runtime, main_provider=provider) # type: ignore[arg-type] + + +def _write_plugin(root: Path, *, version: str = "1.0.0", body: str = "# Comic\n\nV1.\n", template: str = "v1") -> Path: + plugin_root = root / "baoyu-comic" + skill_root = plugin_root / "skills" / "baoyu-comic" + skill_root.mkdir(parents=True, exist_ok=True) + (skill_root / "SKILL.md").write_text( + "---\nname: baoyu-comic\ndescription: Comic workflow\ntools: []\n---\n\n" + body, + encoding="utf-8", + ) + (skill_root / "templates").mkdir(exist_ok=True) + (skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8") + (plugin_root / "beaver.plugin.json").write_text( + json.dumps( + { + "schema_version": 1, + "id": "baoyu-comic", + "name": "Baoyu Comic", + "version": version, + "skills": [{"name": "baoyu-comic", "path": "skills/baoyu-comic"}], + } + ), + encoding="utf-8", + ) + return plugin_root + + +def _rewrite_plugin(plugin_root: Path, *, version: str, body: str, template: str) -> None: + manifest_path = plugin_root / "beaver.plugin.json" + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + manifest["version"] = version + manifest_path.write_text(json.dumps(manifest), encoding="utf-8") + skill_root = plugin_root / "skills" / "baoyu-comic" + (skill_root / "SKILL.md").write_text( + "---\nname: baoyu-comic\ndescription: Comic workflow\ntools: []\n---\n\n" + body, + encoding="utf-8", + ) + (skill_root / "templates" / "panel.txt").write_text(template, encoding="utf-8") + + +def _manager(workspace: Path) -> tuple[PluginManager, SkillSpecStore, SkillLearningStore]: + discovery = discover_plugins(workspace, search_paths=[]) + skill_store = SkillSpecStore(workspace) + learning_store = SkillLearningStore(workspace / "memory" / "skills") + manager = PluginManager( + workspace=workspace, + manifests=discovery.manifests, + discovery_errors=discovery.errors, + state_store=PluginStateStore(workspace), + skill_store=skill_store, + learning_store=learning_store, + publisher=SkillPublisher(skill_store), + safety_checker=SkillDraftSafetyChecker(), + write_lock=WorkspaceWriteLock(workspace), + ) + return manager, skill_store, learning_store + + +def test_skill_draft_from_legacy_payload_has_empty_provenance() -> None: + draft = SkillDraft.from_dict( + { + "draft_id": "draft-1", + "skill_name": "debug", + "proposed_content": "# Debug\n", + "created_at": "now", + "created_by": "tester", + } + ) + + assert draft.provenance == {} + + +def test_fast_forward_plugin_update_synthesis_uses_exact_upstream_without_llm(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + plugin_root = _write_plugin(workspace / "plugins") + manager, skill_store, learning_store = _manager(workspace) + manager.enable("baoyu-comic") + _rewrite_plugin(plugin_root, version="1.1.0", body="# Comic\n\nV2.\n", template="v2") + _manager(workspace)[0].sync_enabled() + candidate = learning_store.list_learning_candidates()[0] + provider = CountingProvider() + service = SkillLearningService( + run_store=RunMemoryStore(workspace / "memory" / "runs"), + learning_store=learning_store, + draft_service=DraftService(skill_store), + evidence_selector=EvidenceSelector(RunMemoryStore(workspace / "memory" / "runs")), + ) + + draft = asyncio.run(service.synthesize_draft(candidate.candidate_id, _bundle(provider))) + upstream = skill_store.read_upstream_snapshot( + "baoyu-comic", + "baoyu-comic", + candidate.evidence["new_upstream_tree_hash"], + ) + + assert upstream is not None + assert draft.proposal_kind == "plugin_skill_update" + assert draft.proposed_content == "# Comic\n\nV2." + assert draft.base_version == "v0001" + assert draft.provenance["merge_mode"] == "fast_forward" + assert draft.provenance["new_upstream_tree_hash"] == upstream.snapshot.skill_tree_hash + assert provider.calls == [] + + +def test_publish_plugin_update_materializes_referenced_supporting_files(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + plugin_root = _write_plugin(workspace / "plugins", template="v1") + manager, skill_store, learning_store = _manager(workspace) + manager.enable("baoyu-comic") + _rewrite_plugin(plugin_root, version="1.1.0", body="# Comic\n\nV2.\n", template="v2") + _manager(workspace)[0].sync_enabled() + candidate = learning_store.list_learning_candidates()[0] + service = SkillLearningService( + run_store=RunMemoryStore(workspace / "memory" / "runs"), + learning_store=learning_store, + draft_service=DraftService(skill_store), + evidence_selector=EvidenceSelector(RunMemoryStore(workspace / "memory" / "runs")), + ) + draft = asyncio.run(service.synthesize_draft(candidate.candidate_id, _bundle(CountingProvider()))) + draft.status = SkillReviewState.APPROVED.value + skill_store.write_draft(draft) + + version = SkillPublisher(skill_store).publish("baoyu-comic", draft.draft_id, publisher="tester") + + assert version.version == "v0002" + assert (workspace / "skills" / "baoyu-comic" / "versions" / "v0002" / "templates" / "panel.txt").read_text( + encoding="utf-8" + ) == "v2" + + +def test_supporting_file_merge_adopts_upstream_when_local_is_unchanged() -> None: + plan = merge_supporting_file_trees( + base={"a.txt": {"content_hash": "A", "executable": False}}, + local={"a.txt": {"content_hash": "A", "executable": False}}, + upstream={"a.txt": {"content_hash": "U", "executable": False}}, + ) + + assert plan.files["a.txt"].source == "upstream" + assert plan.conflicts == [] + + +def test_supporting_file_merge_blocks_divergent_edits() -> None: + plan = merge_supporting_file_trees( + base={"a.txt": {"content_hash": "A", "executable": False}}, + local={"a.txt": {"content_hash": "L", "executable": False}}, + upstream={"a.txt": {"content_hash": "U", "executable": False}}, + ) + + assert plan.conflicts[0].path == "a.txt" + + +def test_three_way_synthesizer_prompt_labels_all_inputs() -> None: + provider = CountingProvider( + json.dumps( + { + "frontmatter": {"name": "baoyu-comic", "description": "Comic workflow", "tools": []}, + "content": "# Baoyu Comic\n\nMerged.", + "change_reason": "Adopt upstream while preserving local review.", + "preserved_local_sections": ["Review"], + "adopted_upstream_sections": ["Panel Layout"], + "resolved_conflicts": ["Output ordering"], + "dropped_sections": [], + } + ) + ) + async def run_case() -> dict: + return await SkillDraftSynthesizer().synthesize_plugin_update( + SkillLearningCandidate( + candidate_id="candidate", + kind="plugin_skill_update", + source_run_ids=[], + source_session_ids=[], + related_skill_names=["baoyu-comic"], + reason="merge", + ), + EvidenceSelector(RunMemoryStore(Path("/tmp/unused-runs"))).build_evidence_packet([], []), + provider, + "stub", + old_upstream={"content": "# Old\n"}, + current_local={"content": "# Local\n"}, + new_upstream={"content": "# New\n"}, + ) + + payload = asyncio.run(run_case()) + prompt = provider.calls[0]["messages"][1]["content"] + + assert "OLD UPSTREAM" in prompt + assert "CURRENT LOCAL" in prompt + assert "NEW UPSTREAM" in prompt + assert payload["preserved_local_sections"] == ["Review"] + assert payload["adopted_upstream_sections"] == ["Panel Layout"] diff --git a/app-instance/backend/tests/unit/test_plugin_skill_sync.py b/app-instance/backend/tests/unit/test_plugin_skill_sync.py index 5af6349..2b66d09 100644 --- a/app-instance/backend/tests/unit/test_plugin_skill_sync.py +++ b/app-instance/backend/tests/unit/test_plugin_skill_sync.py @@ -234,3 +234,58 @@ def test_sync_enabled_supersedes_stale_pending_update(tmp_path: Path) -> None: assert len(candidates) == 2 assert {candidate.status for candidate in candidates} == {"open", "superseded"} assert any(candidate.candidate_id != first_candidate.candidate_id for candidate in candidates) + + +def test_pause_leaves_skill_active_and_suppresses_update_candidates(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + plugin_root = _write_skill_plugin(workspace / "plugins") + _manager(workspace).enable("baoyu-comic") + _manager(workspace).pause("baoyu-comic") + _rewrite_plugin_version(plugin_root, version="1.1.0", skill_text="# Baoyu Comic\n\nPaused update.\n") + + _manager(workspace).sync_enabled() + + assert SkillSpecStore(workspace).get_skill_spec("baoyu-comic").status == "active" # type: ignore[union-attr] + assert SkillLearningStore(workspace / "memory" / "skills").list_learning_candidates() == [] + + +def test_resume_reconciles_and_syncs_updates(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + plugin_root = _write_skill_plugin(workspace / "plugins") + _manager(workspace).enable("baoyu-comic") + _manager(workspace).pause("baoyu-comic") + _rewrite_plugin_version(plugin_root, version="1.1.0", skill_text="# Baoyu Comic\n\nResume update.\n") + + state = _manager(workspace).resume("baoyu-comic") + + assert state.status == "update_pending" + assert SkillLearningStore(workspace / "memory" / "skills").list_learning_candidates() + + +def test_disable_plugin_disables_linked_skills_without_deleting_versions(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + _write_skill_plugin(workspace / "plugins") + _manager(workspace).enable("baoyu-comic") + + with pytest.raises(ValueError, match="disable_linked_skills"): + _manager(workspace).disable("baoyu-comic", disable_linked_skills=False) + state = _manager(workspace).disable("baoyu-comic", disable_linked_skills=True) + + spec = SkillSpecStore(workspace).get_skill_spec("baoyu-comic") + assert state.enabled is False + assert spec is not None and spec.status == "disabled" + assert SkillSpecStore(workspace).read_published_skill("baoyu-comic", "v0001") is not None + + +def test_adopt_detaches_plugin_binding_and_keeps_skill_active(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + _write_skill_plugin(workspace / "plugins") + _manager(workspace).enable("baoyu-comic") + + spec = _manager(workspace).adopt("baoyu-comic", "baoyu-comic") + state = PluginStateStore(workspace).get_plugin("baoyu-comic") + + assert spec.source_kind == "managed" + assert spec.status == "active" + assert "adopted_from_plugin:baoyu-comic" in spec.lineage + assert state is not None and "baoyu-comic" not in state.skills diff --git a/app-instance/backend/tests/unit/test_skill_learning_pipeline.py b/app-instance/backend/tests/unit/test_skill_learning_pipeline.py index 5b82dd9..5ebb16f 100644 --- a/app-instance/backend/tests/unit/test_skill_learning_pipeline.py +++ b/app-instance/backend/tests/unit/test_skill_learning_pipeline.py @@ -222,3 +222,80 @@ def test_publish_blocks_failed_preservation_report(tmp_path: Path) -> None: with pytest.raises(ValueError, match="preservation"): pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester") + + +def test_publish_blocks_plugin_three_way_without_plugin_preservation_report(tmp_path: Path) -> None: + pipeline = _pipeline(tmp_path) + draft = pipeline.draft_service.create_plugin_update_draft( + skill_name="plugin-skill", + base_version="v0001", + proposed_content="# Plugin\n\nDo it.", + proposed_frontmatter={"description": "plugin", "tools": []}, + created_by="test", + reason="plugin update", + provenance={"merge_mode": "three_way"}, + ) + pipeline.learning_store.write_eval_report( + SkillDraftEvalReport( + report_id="eval-plugin", + skill_name=draft.skill_name, + draft_id=draft.draft_id, + candidate_id="candidate-1", + passed=True, + baseline_score_avg=0.8, + candidate_score_avg=0.9, + score_delta=0.1, + regression_count=0, + improved_count=1, + unchanged_count=0, + confidence="medium", + mode="replay", + eval_version="replay-v1", + preservation_report={"passed": True, "mode": "ordinary"}, + ) + ) + pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester") + pipeline.check_safety(draft.skill_name, draft.draft_id) + + with pytest.raises(ValueError, match="three-way preservation"): + pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester") + + +def test_publish_blocks_plugin_update_with_unresolved_supporting_file_conflicts(tmp_path: Path) -> None: + pipeline = _pipeline(tmp_path) + draft = pipeline.draft_service.create_plugin_update_draft( + skill_name="plugin-skill", + base_version="v0001", + proposed_content="# Plugin\n\nDo it.", + proposed_frontmatter={"description": "plugin", "tools": []}, + created_by="test", + reason="plugin update", + provenance={ + "merge_mode": "three_way", + "supporting_file_plan": {"conflicts": [{"path": "a.txt", "reason": "diverged"}]}, + }, + ) + pipeline.learning_store.write_eval_report( + SkillDraftEvalReport( + report_id="eval-plugin-conflict", + skill_name=draft.skill_name, + draft_id=draft.draft_id, + candidate_id="candidate-1", + passed=True, + baseline_score_avg=0.8, + candidate_score_avg=0.9, + score_delta=0.1, + regression_count=0, + improved_count=1, + unchanged_count=0, + confidence="medium", + mode="replay", + eval_version="replay-v1", + preservation_report={"passed": True, "mode": "plugin_three_way", "unresolved_conflicts": []}, + ) + ) + pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester") + pipeline.check_safety(draft.skill_name, draft.draft_id) + + with pytest.raises(ValueError, match="supporting-file conflicts"): + pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester") diff --git a/app-instance/backend/tests/unit/test_skill_learning_preservation.py b/app-instance/backend/tests/unit/test_skill_learning_preservation.py index f82e330..f2c5e22 100644 --- a/app-instance/backend/tests/unit/test_skill_learning_preservation.py +++ b/app-instance/backend/tests/unit/test_skill_learning_preservation.py @@ -1,6 +1,6 @@ from __future__ import annotations -from beaver.skills.learning.preservation import check_preservation +from beaver.skills.learning.preservation import check_plugin_merge_preservation, check_preservation def test_preservation_passes_when_base_sections_remain() -> None: @@ -25,3 +25,29 @@ def test_preservation_flags_dropped_section() -> None: assert report["passed"] is False assert report["risk_level"] == "high" assert "Safety" in report["dropped_sections"] + + +def test_plugin_merge_preservation_checks_local_and_upstream_and_conflicts() -> None: + report = check_plugin_merge_preservation( + local_content="# Local\n\n## Review\n\nKeep review.\n", + upstream_content="# Upstream\n\n## Safety\n\nDo not leak secrets.\n", + draft_content="# Draft\n\n## Review\n\nKeep review.\n\n## Safety\n\nDo not leak secrets.\n", + merge_decisions={"resolved_conflicts": ["ordering"], "unresolved_conflicts": []}, + ) + + assert report["mode"] == "plugin_three_way" + assert report["passed"] is True + assert report["local"]["passed"] is True + assert report["upstream"]["passed"] is True + + +def test_plugin_merge_preservation_fails_unresolved_conflicts() -> None: + report = check_plugin_merge_preservation( + local_content="# Local\n\n## Review\n\nKeep review.\n", + upstream_content="# Upstream\n\n## Safety\n\nDo not leak secrets.\n", + draft_content="# Draft\n\n## Review\n\nKeep review.\n", + merge_decisions={"unresolved_conflicts": ["Safety conflict"]}, + ) + + assert report["passed"] is False + assert report["unresolved_conflicts"] == ["Safety conflict"]