"""LLM-backed draft synthesis for skill learning.""" from __future__ import annotations import json from typing import Any from beaver.engine.providers.base import LLMProvider from beaver.skills.learning.evidence import EvidencePacket from beaver.memory.skills.models import SkillLearningCandidate class SkillDraftSynthesizer: async def synthesize_revision( self, candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, provider: LLMProvider, model: str, base_skill: dict[str, Any] | None = None, ) -> dict[str, Any]: return await self._synthesize(candidate, evidence_packet, provider, model, "revise", base_skill=base_skill) async def synthesize_new_skill( self, candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, provider: LLMProvider, model: str, ) -> dict[str, Any]: return await self._synthesize(candidate, evidence_packet, provider, model, "new", base_skill=None) async def synthesize_merge( self, candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, provider: LLMProvider, model: str, base_skill: dict[str, Any] | None = None, ) -> dict[str, Any]: return await self._synthesize(candidate, evidence_packet, provider, model, "merge", base_skill=base_skill) async def _synthesize( self, candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, provider: LLMProvider, model: str, action: str, *, base_skill: dict[str, Any] | None, ) -> dict[str, Any]: prompt = self._build_prompt(candidate, evidence_packet, action, base_skill=base_skill) response = await provider.chat( messages=[ { "role": "system", "content": ( "You synthesize Beaver skill drafts from execution evidence. " "Return only JSON with keys: frontmatter, content, change_reason, " "preserved_sections, changed_sections, dropped_sections." ), }, {"role": "user", "content": prompt}, ], tools=None, model=model, max_tokens=4096, temperature=0, ) payload = self._parse_payload(response.content or "") if payload: return self._normalize_payload(payload, evidence_packet) return self._fallback_payload(candidate, evidence_packet, action) @staticmethod def _build_prompt( candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str, base_skill: dict[str, Any] | None = None, ) -> str: tool_names = _coerce_string_list(evidence_packet.metadata.get("tool_names")) tool_section = ", ".join(tool_names) if tool_names else "none observed" selected_tool_names = _coerce_string_list(evidence_packet.metadata.get("selected_tool_names")) selected_tool_section = ", ".join(selected_tool_names) if selected_tool_names else "none recorded" base_section = "" if base_skill: base_section = ( "\n\nBase skill snapshot:\n" f"- skill_name: {base_skill.get('skill_name')}\n" f"- version: {base_skill.get('version')}\n" f"- frontmatter: {json.dumps(base_skill.get('frontmatter') or {}, ensure_ascii=False, sort_keys=True)}\n" f"- tool_hints: {base_skill.get('tool_hints') or []}\n" f"- summary: {base_skill.get('summary') or ''}\n" "Base skill content:\n" f"{base_skill.get('content') or ''}\n" "Preserve existing instructions unless the evidence requires a change. " "If any section is changed or dropped, explain it in changed_sections or dropped_sections." ) return ( f"Action: {action}\n" f"Candidate kind: {candidate.kind}\n" f"Reason: {candidate.reason}\n" f"Related skills: {candidate.related_skill_names}\n" f"Called tool names: {tool_section}\n" f"Run-selected tool names: {selected_tool_section}\n" f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries) + "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts) + base_section + "\n\nReturn JSON only. The frontmatter object must include:" + "\n- description: a concise skill description" + "\n- tools: an explicit JSON array of exact tool names this skill needs. " + "Prefer called tool names when the workflow depends on them; use run-selected tool names only when clearly required. " + "Use [] only when no tool is required." + "\nThe JSON may include preserved_sections, changed_sections, and dropped_sections arrays." ) @staticmethod def _parse_payload(content: str) -> dict[str, Any]: cleaned = content.strip() if cleaned.startswith("```"): lines = cleaned.splitlines() if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"): cleaned = "\n".join(lines[1:-1]).strip() try: payload = json.loads(cleaned) except json.JSONDecodeError: return {} if not isinstance(payload, dict): return {} frontmatter = payload.get("frontmatter") content_value = payload.get("content") if not isinstance(frontmatter, dict) or not isinstance(content_value, str): return {} return { "frontmatter": frontmatter, "content": content_value.strip(), "change_reason": str(payload.get("change_reason") or ""), "preserved_sections": _coerce_string_list(payload.get("preserved_sections")), "changed_sections": _coerce_string_list(payload.get("changed_sections")), "dropped_sections": _coerce_string_list(payload.get("dropped_sections")), } @staticmethod def _normalize_payload(payload: dict[str, Any], evidence_packet: EvidencePacket) -> dict[str, Any]: frontmatter = dict(payload.get("frontmatter") or {}) tool_hints = _coerce_string_list(frontmatter.get("tools")) if not tool_hints: tool_hints = _coerce_string_list(evidence_packet.metadata.get("tool_names")) frontmatter["tools"] = tool_hints return { "frontmatter": frontmatter, "content": str(payload.get("content") or "").strip(), "change_reason": str(payload.get("change_reason") or ""), "preserved_sections": _coerce_string_list(payload.get("preserved_sections")), "changed_sections": _coerce_string_list(payload.get("changed_sections")), "dropped_sections": _coerce_string_list(payload.get("dropped_sections")), } @staticmethod def _fallback_payload(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> dict[str, Any]: related = candidate.related_skill_names[0] if candidate.related_skill_names else "generated-skill" title = related.replace("_", "-") content = "\n".join(f"- {item}" for item in evidence_packet.task_summaries[:5]) or "- No evidence captured." return { "frontmatter": { "description": candidate.reason or f"Auto-generated {action} draft for {title}.", "tools": _coerce_string_list(evidence_packet.metadata.get("tool_names")), }, "content": f"# {title}\n\n## Evidence\n\n{content}\n", "change_reason": candidate.reason or f"Fallback {action} synthesis.", "preserved_sections": [], "changed_sections": [], "dropped_sections": [], } def _coerce_string_list(value: Any) -> list[str]: raw_items: list[Any] if isinstance(value, list): raw_items = value elif isinstance(value, str): raw_items = value.split(",") else: raw_items = [] result: list[str] = [] for item in raw_items: cleaned = str(item).strip() if cleaned and cleaned not in result: result.append(cleaned) return result