feat(skill-learning): preserve base skill during synthesis

2026-06-08 13:28:41 +08:00
parent 6dc580ab26
commit a925f0e77f
3 changed files with 136 additions and 8 deletions
--- a/app-instance/backend/beaver/skills/learning/service.py
+++ b/app-instance/backend/beaver/skills/learning/service.py
@ -205,7 +205,13 @@ class SkillLearningService:
            )
        if candidate.kind == "merge_skills":
            target_name = self._suggest_skill_name(candidate, packet)
-            payload = await self.synthesizer.synthesize_merge(candidate, packet, provider, model)
+            payload = await self.synthesizer.synthesize_merge(
                candidate,
                packet,
                provider,
                model,
                base_skill=self._merged_base_skill_snapshot(candidate.related_skill_names),
            )
            return self.draft_service.create_merge_draft(
                skill_name=target_name,
                base_version=None,
@ -217,7 +223,13 @@ class SkillLearningService:
            )
        target_skill = candidate.related_skill_names[0]
        base_version = candidate.evidence.get("skill_version")
-        payload = await self.synthesizer.synthesize_revision(candidate, packet, provider, model)
+        payload = await self.synthesizer.synthesize_revision(
            candidate,
            packet,
            provider,
            model,
            base_skill=self._base_skill_snapshot(target_skill, base_version),
        )
        return self.draft_service.create_revision_draft(
            skill_name=target_skill,
            base_version=base_version,
@ -228,6 +240,46 @@ class SkillLearningService:
            evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
        )
    def _base_skill_snapshot(self, skill_name: str, version: str | None) -> dict[str, Any] | None:
        loaded = self.draft_service.store.read_published_skill(skill_name, version)
        if loaded is None:
            return None
        return {
            "skill_name": loaded.version.skill_name,
            "version": loaded.version.version,
            "frontmatter": dict(loaded.version.frontmatter),
            "content": loaded.content,
            "summary": loaded.version.summary,
            "tool_hints": list(loaded.version.tool_hints),
        }
    def _merged_base_skill_snapshot(self, skill_names: list[str]) -> dict[str, Any] | None:
        snapshots = [
            snapshot
            for name in skill_names
            if (snapshot := self._base_skill_snapshot(name, None)) is not None
        ]
        if not snapshots:
            return None
        return {
            "skill_name": "merge:" + ",".join(str(item["skill_name"]) for item in snapshots),
            "version": "mixed",
            "frontmatter": {"merged_skills": [item["frontmatter"] for item in snapshots]},
            "content": "\n\n".join(
                f"<!-- base skill: {item['skill_name']} {item['version']} -->\n{item['content']}"
                for item in snapshots
            ),
            "summary": "\n".join(str(item["summary"]) for item in snapshots if item.get("summary")),
            "tool_hints": list(
                dict.fromkeys(
                    tool
                    for item in snapshots
                    for tool in item.get("tool_hints", [])
                    if str(tool).strip()
                )
            ),
        }
    def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]:
        snapshots: list[SkillPerformanceSnapshot] = []
        grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {}
--- a/app-instance/backend/beaver/skills/learning/synthesizer.py
+++ b/app-instance/backend/beaver/skills/learning/synthesizer.py
@ -17,8 +17,9 @@ class SkillDraftSynthesizer:
        evidence_packet: EvidencePacket,
        provider: LLMProvider,
        model: str,
        base_skill: dict[str, Any] | None = None,
    ) -> dict[str, Any]:
-        return await self._synthesize(candidate, evidence_packet, provider, model, "revise")
+        return await self._synthesize(candidate, evidence_packet, provider, model, "revise", base_skill=base_skill)
    async def synthesize_new_skill(
        self,
@ -27,7 +28,7 @@ class SkillDraftSynthesizer:
        provider: LLMProvider,
        model: str,
    ) -> dict[str, Any]:
-        return await self._synthesize(candidate, evidence_packet, provider, model, "new")
+        return await self._synthesize(candidate, evidence_packet, provider, model, "new", base_skill=None)
    async def synthesize_merge(
        self,
@ -35,8 +36,9 @@ class SkillDraftSynthesizer:
        evidence_packet: EvidencePacket,
        provider: LLMProvider,
        model: str,
        base_skill: dict[str, Any] | None = None,
    ) -> dict[str, Any]:
-        return await self._synthesize(candidate, evidence_packet, provider, model, "merge")
+        return await self._synthesize(candidate, evidence_packet, provider, model, "merge", base_skill=base_skill)
    async def _synthesize(
        self,
@ -45,15 +47,18 @@ class SkillDraftSynthesizer:
        provider: LLMProvider,
        model: str,
        action: str,
        *,
        base_skill: dict[str, Any] | None,
    ) -> dict[str, Any]:
-        prompt = self._build_prompt(candidate, evidence_packet, action)
+        prompt = self._build_prompt(candidate, evidence_packet, action, base_skill=base_skill)
        response = await provider.chat(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You synthesize Beaver skill drafts from execution evidence. "
-                        "Return only JSON with keys: frontmatter, content, change_reason."
+                        "Return only JSON with keys: frontmatter, content, change_reason, "
                        "preserved_sections, changed_sections, dropped_sections."
                    ),
                },
                {"role": "user", "content": prompt},
@ -69,11 +74,30 @@ class SkillDraftSynthesizer:
        return self._fallback_payload(candidate, evidence_packet, action)
    @staticmethod
-    def _build_prompt(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> str:
+    def _build_prompt(
        candidate: SkillLearningCandidate,
        evidence_packet: EvidencePacket,
        action: str,
        base_skill: dict[str, Any] | None = None,
    ) -> str:
        tool_names = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
        tool_section = ", ".join(tool_names) if tool_names else "none observed"
        selected_tool_names = _coerce_string_list(evidence_packet.metadata.get("selected_tool_names"))
        selected_tool_section = ", ".join(selected_tool_names) if selected_tool_names else "none recorded"
        base_section = ""
        if base_skill:
            base_section = (
                "\n\nBase skill snapshot:\n"
                f"- skill_name: {base_skill.get('skill_name')}\n"
                f"- version: {base_skill.get('version')}\n"
                f"- frontmatter: {json.dumps(base_skill.get('frontmatter') or {}, ensure_ascii=False, sort_keys=True)}\n"
                f"- tool_hints: {base_skill.get('tool_hints') or []}\n"
                f"- summary: {base_skill.get('summary') or ''}\n"
                "Base skill content:\n"
                f"{base_skill.get('content') or ''}\n"
                "Preserve existing instructions unless the evidence requires a change. "
                "If any section is changed or dropped, explain it in changed_sections or dropped_sections."
            )
        return (
            f"Action: {action}\n"
            f"Candidate kind: {candidate.kind}\n"
@ -83,11 +107,13 @@ class SkillDraftSynthesizer:
            f"Run-selected tool names: {selected_tool_section}\n"
            f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries)
            + "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts)
            + base_section
            + "\n\nReturn JSON only. The frontmatter object must include:"
            + "\n- description: a concise skill description"
            + "\n- tools: an explicit JSON array of exact tool names this skill needs. "
            + "Prefer called tool names when the workflow depends on them; use run-selected tool names only when clearly required. "
            + "Use [] only when no tool is required."
            + "\nThe JSON may include preserved_sections, changed_sections, and dropped_sections arrays."
        )
    @staticmethod
@ -111,6 +137,9 @@ class SkillDraftSynthesizer:
            "frontmatter": frontmatter,
            "content": content_value.strip(),
            "change_reason": str(payload.get("change_reason") or ""),
            "preserved_sections": _coerce_string_list(payload.get("preserved_sections")),
            "changed_sections": _coerce_string_list(payload.get("changed_sections")),
            "dropped_sections": _coerce_string_list(payload.get("dropped_sections")),
        }
    @staticmethod
@ -124,6 +153,9 @@ class SkillDraftSynthesizer:
            "frontmatter": frontmatter,
            "content": str(payload.get("content") or "").strip(),
            "change_reason": str(payload.get("change_reason") or ""),
            "preserved_sections": _coerce_string_list(payload.get("preserved_sections")),
            "changed_sections": _coerce_string_list(payload.get("changed_sections")),
            "dropped_sections": _coerce_string_list(payload.get("dropped_sections")),
        }
    @staticmethod
@ -138,6 +170,9 @@ class SkillDraftSynthesizer:
            },
            "content": f"# {title}\n\n## Evidence\n\n{content}\n",
            "change_reason": candidate.reason or f"Fallback {action} synthesis.",
            "preserved_sections": [],
            "changed_sections": [],
            "dropped_sections": [],
        }
--- a/app-instance/backend/tests/unit/test_skill_learning_synthesizer_preservation.py
+++ b/app-instance/backend/tests/unit/test_skill_learning_synthesizer_preservation.py
@ -0,0 +1,41 @@
 from __future__ import annotations
 from beaver.memory.skills import SkillLearningCandidate
 from beaver.skills.learning.evidence import EvidencePacket
 from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
 def test_revision_prompt_includes_base_skill_snapshot() -> None:
    candidate = SkillLearningCandidate(
        candidate_id="candidate-1",
        kind="revise_skill",
        source_run_ids=["run-1"],
        source_session_ids=["session-1"],
        related_skill_names=["debug-skill"],
        reason="Improve debugging flow.",
    )
    packet = EvidencePacket(
        run_ids=["run-1"],
        session_ids=["session-1"],
        task_summaries=["debug a failing test"],
        session_excerpts=["assistant: fixed it"],
    )
    prompt = SkillDraftSynthesizer._build_prompt(
        candidate,
        packet,
        "revise",
        base_skill={
            "skill_name": "debug-skill",
            "version": "v0001",
            "frontmatter": {"description": "Debug tests", "tools": ["read_file"]},
            "content": "# Debug Skill\n\n## Safety\n\nDo not delete files.",
            "summary": "Debug tests safely.",
            "tool_hints": ["read_file"],
        },
    )
    assert "Base skill snapshot" in prompt
    assert "# Debug Skill" in prompt
    assert "Do not delete files." in prompt
    assert "preserved_sections" in prompt
    assert "dropped_sections" in prompt