diff --git a/app-instance/backend/beaver/skills/learning/service.py b/app-instance/backend/beaver/skills/learning/service.py index 3262280..50c9634 100644 --- a/app-instance/backend/beaver/skills/learning/service.py +++ b/app-instance/backend/beaver/skills/learning/service.py @@ -205,7 +205,13 @@ class SkillLearningService: ) if candidate.kind == "merge_skills": target_name = self._suggest_skill_name(candidate, packet) - payload = await self.synthesizer.synthesize_merge(candidate, packet, provider, model) + payload = await self.synthesizer.synthesize_merge( + candidate, + packet, + provider, + model, + base_skill=self._merged_base_skill_snapshot(candidate.related_skill_names), + ) return self.draft_service.create_merge_draft( skill_name=target_name, base_version=None, @@ -217,7 +223,13 @@ class SkillLearningService: ) target_skill = candidate.related_skill_names[0] base_version = candidate.evidence.get("skill_version") - payload = await self.synthesizer.synthesize_revision(candidate, packet, provider, model) + payload = await self.synthesizer.synthesize_revision( + candidate, + packet, + provider, + model, + base_skill=self._base_skill_snapshot(target_skill, base_version), + ) return self.draft_service.create_revision_draft( skill_name=target_skill, base_version=base_version, @@ -228,6 +240,46 @@ class SkillLearningService: evidence_refs=[{"run_id": item} for item in candidate.source_run_ids], ) + def _base_skill_snapshot(self, skill_name: str, version: str | None) -> dict[str, Any] | None: + loaded = self.draft_service.store.read_published_skill(skill_name, version) + if loaded is None: + return None + return { + "skill_name": loaded.version.skill_name, + "version": loaded.version.version, + "frontmatter": dict(loaded.version.frontmatter), + "content": loaded.content, + "summary": loaded.version.summary, + "tool_hints": list(loaded.version.tool_hints), + } + + def _merged_base_skill_snapshot(self, skill_names: list[str]) -> dict[str, Any] | None: + snapshots = [ + snapshot + for name in skill_names + if (snapshot := self._base_skill_snapshot(name, None)) is not None + ] + if not snapshots: + return None + return { + "skill_name": "merge:" + ",".join(str(item["skill_name"]) for item in snapshots), + "version": "mixed", + "frontmatter": {"merged_skills": [item["frontmatter"] for item in snapshots]}, + "content": "\n\n".join( + f"\n{item['content']}" + for item in snapshots + ), + "summary": "\n".join(str(item["summary"]) for item in snapshots if item.get("summary")), + "tool_hints": list( + dict.fromkeys( + tool + for item in snapshots + for tool in item.get("tool_hints", []) + if str(tool).strip() + ) + ), + } + def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]: snapshots: list[SkillPerformanceSnapshot] = [] grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {} diff --git a/app-instance/backend/beaver/skills/learning/synthesizer.py b/app-instance/backend/beaver/skills/learning/synthesizer.py index 353fc0b..ca95b88 100644 --- a/app-instance/backend/beaver/skills/learning/synthesizer.py +++ b/app-instance/backend/beaver/skills/learning/synthesizer.py @@ -17,8 +17,9 @@ class SkillDraftSynthesizer: evidence_packet: EvidencePacket, provider: LLMProvider, model: str, + base_skill: dict[str, Any] | None = None, ) -> dict[str, Any]: - return await self._synthesize(candidate, evidence_packet, provider, model, "revise") + return await self._synthesize(candidate, evidence_packet, provider, model, "revise", base_skill=base_skill) async def synthesize_new_skill( self, @@ -27,7 +28,7 @@ class SkillDraftSynthesizer: provider: LLMProvider, model: str, ) -> dict[str, Any]: - return await self._synthesize(candidate, evidence_packet, provider, model, "new") + return await self._synthesize(candidate, evidence_packet, provider, model, "new", base_skill=None) async def synthesize_merge( self, @@ -35,8 +36,9 @@ class SkillDraftSynthesizer: evidence_packet: EvidencePacket, provider: LLMProvider, model: str, + base_skill: dict[str, Any] | None = None, ) -> dict[str, Any]: - return await self._synthesize(candidate, evidence_packet, provider, model, "merge") + return await self._synthesize(candidate, evidence_packet, provider, model, "merge", base_skill=base_skill) async def _synthesize( self, @@ -45,15 +47,18 @@ class SkillDraftSynthesizer: provider: LLMProvider, model: str, action: str, + *, + base_skill: dict[str, Any] | None, ) -> dict[str, Any]: - prompt = self._build_prompt(candidate, evidence_packet, action) + prompt = self._build_prompt(candidate, evidence_packet, action, base_skill=base_skill) response = await provider.chat( messages=[ { "role": "system", "content": ( "You synthesize Beaver skill drafts from execution evidence. " - "Return only JSON with keys: frontmatter, content, change_reason." + "Return only JSON with keys: frontmatter, content, change_reason, " + "preserved_sections, changed_sections, dropped_sections." ), }, {"role": "user", "content": prompt}, @@ -69,11 +74,30 @@ class SkillDraftSynthesizer: return self._fallback_payload(candidate, evidence_packet, action) @staticmethod - def _build_prompt(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> str: + def _build_prompt( + candidate: SkillLearningCandidate, + evidence_packet: EvidencePacket, + action: str, + base_skill: dict[str, Any] | None = None, + ) -> str: tool_names = _coerce_string_list(evidence_packet.metadata.get("tool_names")) tool_section = ", ".join(tool_names) if tool_names else "none observed" selected_tool_names = _coerce_string_list(evidence_packet.metadata.get("selected_tool_names")) selected_tool_section = ", ".join(selected_tool_names) if selected_tool_names else "none recorded" + base_section = "" + if base_skill: + base_section = ( + "\n\nBase skill snapshot:\n" + f"- skill_name: {base_skill.get('skill_name')}\n" + f"- version: {base_skill.get('version')}\n" + f"- frontmatter: {json.dumps(base_skill.get('frontmatter') or {}, ensure_ascii=False, sort_keys=True)}\n" + f"- tool_hints: {base_skill.get('tool_hints') or []}\n" + f"- summary: {base_skill.get('summary') or ''}\n" + "Base skill content:\n" + f"{base_skill.get('content') or ''}\n" + "Preserve existing instructions unless the evidence requires a change. " + "If any section is changed or dropped, explain it in changed_sections or dropped_sections." + ) return ( f"Action: {action}\n" f"Candidate kind: {candidate.kind}\n" @@ -83,11 +107,13 @@ class SkillDraftSynthesizer: f"Run-selected tool names: {selected_tool_section}\n" f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries) + "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts) + + base_section + "\n\nReturn JSON only. The frontmatter object must include:" + "\n- description: a concise skill description" + "\n- tools: an explicit JSON array of exact tool names this skill needs. " + "Prefer called tool names when the workflow depends on them; use run-selected tool names only when clearly required. " + "Use [] only when no tool is required." + + "\nThe JSON may include preserved_sections, changed_sections, and dropped_sections arrays." ) @staticmethod @@ -111,6 +137,9 @@ class SkillDraftSynthesizer: "frontmatter": frontmatter, "content": content_value.strip(), "change_reason": str(payload.get("change_reason") or ""), + "preserved_sections": _coerce_string_list(payload.get("preserved_sections")), + "changed_sections": _coerce_string_list(payload.get("changed_sections")), + "dropped_sections": _coerce_string_list(payload.get("dropped_sections")), } @staticmethod @@ -124,6 +153,9 @@ class SkillDraftSynthesizer: "frontmatter": frontmatter, "content": str(payload.get("content") or "").strip(), "change_reason": str(payload.get("change_reason") or ""), + "preserved_sections": _coerce_string_list(payload.get("preserved_sections")), + "changed_sections": _coerce_string_list(payload.get("changed_sections")), + "dropped_sections": _coerce_string_list(payload.get("dropped_sections")), } @staticmethod @@ -138,6 +170,9 @@ class SkillDraftSynthesizer: }, "content": f"# {title}\n\n## Evidence\n\n{content}\n", "change_reason": candidate.reason or f"Fallback {action} synthesis.", + "preserved_sections": [], + "changed_sections": [], + "dropped_sections": [], } diff --git a/app-instance/backend/tests/unit/test_skill_learning_synthesizer_preservation.py b/app-instance/backend/tests/unit/test_skill_learning_synthesizer_preservation.py new file mode 100644 index 0000000..41a9f75 --- /dev/null +++ b/app-instance/backend/tests/unit/test_skill_learning_synthesizer_preservation.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from beaver.memory.skills import SkillLearningCandidate +from beaver.skills.learning.evidence import EvidencePacket +from beaver.skills.learning.synthesizer import SkillDraftSynthesizer + + +def test_revision_prompt_includes_base_skill_snapshot() -> None: + candidate = SkillLearningCandidate( + candidate_id="candidate-1", + kind="revise_skill", + source_run_ids=["run-1"], + source_session_ids=["session-1"], + related_skill_names=["debug-skill"], + reason="Improve debugging flow.", + ) + packet = EvidencePacket( + run_ids=["run-1"], + session_ids=["session-1"], + task_summaries=["debug a failing test"], + session_excerpts=["assistant: fixed it"], + ) + prompt = SkillDraftSynthesizer._build_prompt( + candidate, + packet, + "revise", + base_skill={ + "skill_name": "debug-skill", + "version": "v0001", + "frontmatter": {"description": "Debug tests", "tools": ["read_file"]}, + "content": "# Debug Skill\n\n## Safety\n\nDo not delete files.", + "summary": "Debug tests safely.", + "tool_hints": ["read_file"], + }, + ) + + assert "Base skill snapshot" in prompt + assert "# Debug Skill" in prompt + assert "Do not delete files." in prompt + assert "preserved_sections" in prompt + assert "dropped_sections" in prompt