"""Skill learning loop services.""" from __future__ import annotations from dataclasses import dataclass, field from datetime import datetime, timedelta, timezone from itertools import combinations import re from typing import Any from uuid import uuid4 from beaver.engine.providers import ProviderBundle from beaver.memory.runs.models import RunRecord, SkillEffectRecord from beaver.memory.runs.store import RunMemoryStore from beaver.memory.skills.models import SkillLearningCandidate, SkillPerformanceSnapshot from beaver.memory.skills.store import SkillLearningStore from beaver.skills.drafts.service import DraftService from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector from beaver.skills.learning.synthesizer import SkillDraftSynthesizer from beaver.skills.specs import SkillActivationReceipt @dataclass(slots=True) class RunReceiptContext: run_record: RunRecord effect_records: list[SkillEffectRecord] = field(default_factory=list) class SkillLearningService: def __init__( self, *, run_store: RunMemoryStore, learning_store: SkillLearningStore, draft_service: DraftService, evidence_selector: EvidenceSelector, synthesizer: SkillDraftSynthesizer | None = None, ) -> None: self.run_store = run_store self.learning_store = learning_store self.draft_service = draft_service self.evidence_selector = evidence_selector self.synthesizer = synthesizer or SkillDraftSynthesizer() def collect_run_receipts( self, run_result_context: RunReceiptContext, *, generate_candidates: bool = True, ) -> list[SkillLearningCandidate]: self.run_store.append_run_record(run_result_context.run_record) for effect in run_result_context.effect_records: self.run_store.append_skill_effect(effect) self.rescore_skill_versions() if not generate_candidates: return [] return self.build_learning_candidates() def build_learning_candidates(self) -> list[SkillLearningCandidate]: candidates: list[SkillLearningCandidate] = [] candidates.extend(self._build_revision_candidates()) candidates.extend(self._build_new_skill_candidates()) candidates.extend(self._build_merge_candidates()) candidates.extend(self._build_retire_candidates()) existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()} for candidate in candidates: if candidate.candidate_id not in existing_ids: self.learning_store.record_learning_candidate(candidate) existing_ids.add(candidate.candidate_id) return candidates def build_learning_candidates_for_task( self, task_id: str, *, final_accepted_run_id: str | None = None, trigger_run_id: str | None = None, ) -> list[SkillLearningCandidate]: """Build candidates from a user-accepted Task and all of its runs.""" final_accepted_run_id = final_accepted_run_id or trigger_run_id if not final_accepted_run_id: return [] runs = [record for record in self.run_store.list_runs() if record.task_id == task_id] final_run = next((record for record in runs if record.run_id == final_accepted_run_id), None) if final_run is None or not self._is_task_accepted_run(final_run): return [] source_runs = sorted(runs, key=lambda item: (item.started_at, item.run_id)) if not source_runs: return [] candidates: list[SkillLearningCandidate] = [] published_receipts = [ receipt for record in source_runs for receipt in record.activated_skills if self._is_published_skill_receipt(receipt) ] source_run_ids = [record.run_id for record in source_runs] source_session_ids = list(dict.fromkeys(record.session_id for record in source_runs)) representative_task_text = self._representative_task_text(source_runs, fallback=final_run.task_text) if not published_receipts: candidates.append( SkillLearningCandidate( candidate_id=f"new:task:{task_id}", kind="new_skill", source_run_ids=source_run_ids, source_session_ids=source_session_ids, related_skill_names=[], reason=f"Task {task_id} completed successfully without a published skill; consider extracting reusable guidance.", evidence={ "task_id": task_id, "final_accepted_run_id": final_accepted_run_id, "source_run_ids": source_run_ids, "task_text": representative_task_text, "theme": self._task_theme(representative_task_text), }, status="open", priority=1, confidence=0.8, trigger_reason="task_accepted", ) ) else: seen: set[tuple[str, str]] = set() for receipt in published_receipts: key = (receipt.skill_name, receipt.skill_version) if key in seen: continue seen.add(key) skill_runs = [ record for record in source_runs if any( item.skill_name == receipt.skill_name and item.skill_version == receipt.skill_version and self._is_published_skill_receipt(item) for item in record.activated_skills ) ] candidates.append( SkillLearningCandidate( candidate_id=f"revise:{receipt.skill_name}:{receipt.skill_version}:task:{task_id}", kind="revise_skill", source_run_ids=[record.run_id for record in skill_runs], source_session_ids=list(dict.fromkeys(record.session_id for record in skill_runs)), related_skill_names=[receipt.skill_name], reason=( f"Task {task_id} succeeded with published skill " f"{receipt.skill_name}/{receipt.skill_version}; consider whether the skill should capture this evidence." ), evidence={ "task_id": task_id, "final_accepted_run_id": final_accepted_run_id, "source_run_ids": source_run_ids, "skill_version": receipt.skill_version, }, status="open", priority=1, confidence=0.7, trigger_reason="task_accepted", ) ) existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()} created: list[SkillLearningCandidate] = [] for candidate in candidates: if candidate.candidate_id in existing_ids: continue self.learning_store.record_learning_candidate(candidate) existing_ids.add(candidate.candidate_id) created.append(candidate) return created async def synthesize_draft(self, candidate_id: str, provider_bundle: ProviderBundle) -> Any: candidates = {item.candidate_id: item for item in self.learning_store.list_learning_candidates()} candidate = candidates.get(candidate_id) if candidate is None: raise ValueError(f"Unknown learning candidate: {candidate_id}") if candidate.kind == "retire_skill": target_skill = candidate.related_skill_names[0] return self.draft_service.create_retire_proposal( skill_name=target_skill, base_version=candidate.evidence.get("skill_version"), created_by="learning-loop", reason=candidate.reason, evidence_refs=[{"run_id": item} for item in candidate.source_run_ids], ) packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids) provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider model = ( provider_bundle.auxiliary_runtime.model if provider_bundle.auxiliary_runtime is not None else provider_bundle.main_runtime.model ) if candidate.kind == "new_skill": payload = await self.synthesizer.synthesize_new_skill(candidate, packet, provider, model) return self.draft_service.create_new_skill_draft( skill_name=self._suggest_skill_name(candidate, packet, payload.get("frontmatter")), proposed_content=payload["content"], proposed_frontmatter=payload["frontmatter"], created_by="learning-loop", reason=payload["change_reason"] or candidate.reason, evidence_refs=[{"run_id": item} for item in candidate.source_run_ids], ) if candidate.kind == "merge_skills": target_name = self._suggest_skill_name(candidate, packet) payload = await self.synthesizer.synthesize_merge( candidate, packet, provider, model, base_skill=self._merged_base_skill_snapshot(candidate.related_skill_names), ) return self.draft_service.create_merge_draft( skill_name=target_name, base_version=None, proposed_content=payload["content"], proposed_frontmatter=payload["frontmatter"], created_by="learning-loop", reason=payload["change_reason"] or candidate.reason, evidence_refs=[{"run_id": item} for item in candidate.source_run_ids], ) target_skill = candidate.related_skill_names[0] base_version = candidate.evidence.get("skill_version") payload = await self.synthesizer.synthesize_revision( candidate, packet, provider, model, base_skill=self._base_skill_snapshot(target_skill, base_version), ) return self.draft_service.create_revision_draft( skill_name=target_skill, base_version=base_version, proposed_content=payload["content"], proposed_frontmatter=payload["frontmatter"], created_by="learning-loop", reason=payload["change_reason"] or candidate.reason, evidence_refs=[{"run_id": item} for item in candidate.source_run_ids], ) def _base_skill_snapshot(self, skill_name: str, version: str | None) -> dict[str, Any] | None: loaded = self.draft_service.store.read_published_skill(skill_name, version) if loaded is None: return None return { "skill_name": loaded.version.skill_name, "version": loaded.version.version, "frontmatter": dict(loaded.version.frontmatter), "content": loaded.content, "summary": loaded.version.summary, "tool_hints": list(loaded.version.tool_hints), } def _merged_base_skill_snapshot(self, skill_names: list[str]) -> dict[str, Any] | None: snapshots = [ snapshot for name in skill_names if (snapshot := self._base_skill_snapshot(name, None)) is not None ] if not snapshots: return None return { "skill_name": "merge:" + ",".join(str(item["skill_name"]) for item in snapshots), "version": "mixed", "frontmatter": {"merged_skills": [item["frontmatter"] for item in snapshots]}, "content": "\n\n".join( f"\n{item['content']}" for item in snapshots ), "summary": "\n".join(str(item["summary"]) for item in snapshots if item.get("summary")), "tool_hints": list( dict.fromkeys( tool for item in snapshots for tool in item.get("tool_hints", []) if str(tool).strip() ) ), } def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]: snapshots: list[SkillPerformanceSnapshot] = [] grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {} for record in self.run_store.list_runs(): for receipt in record.activated_skills: key = (receipt.skill_name, receipt.skill_version) grouped.setdefault(key, []) for effect in self._all_effects(): grouped.setdefault((effect.skill_name, effect.skill_version), []).append(effect) for (skill_name, skill_version), effects in grouped.items(): activation_count = len(effects) success_count = sum(1 for item in effects if item.success) failure_count = activation_count - success_count last_feedback = next((item.feedback_score for item in reversed(effects) if item.feedback_score is not None), None) latest_used = effects[-1].created_at if effects else "" snapshot = SkillPerformanceSnapshot( skill_name=skill_name, skill_version=skill_version, activation_count=activation_count, success_count=success_count, failure_count=failure_count, latest_used_at=latest_used, last_feedback_score=last_feedback, ) self.learning_store.update_performance_snapshot(snapshot) snapshots.append(snapshot) return snapshots def _build_revision_candidates(self) -> list[SkillLearningCandidate]: candidates: list[SkillLearningCandidate] = [] for snapshot in self.learning_store.list_low_performing_versions(): runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=5) if len(runs) < 2: continue candidate = SkillLearningCandidate( candidate_id=self._candidate_id("revise", snapshot.skill_name, snapshot.skill_version), kind="revise_skill", source_run_ids=[record.run_id for record in runs], source_session_ids=list(dict.fromkeys(record.session_id for record in runs)), related_skill_names=[snapshot.skill_name], reason=f"Skill version {snapshot.skill_name}/{snapshot.skill_version} is underperforming across repeated runs.", evidence={"skill_version": snapshot.skill_version}, status="open", ) candidates.append(candidate) return candidates def _build_new_skill_candidates(self) -> list[SkillLearningCandidate]: groups: dict[str, list[RunRecord]] = {} all_runs = self.run_store.list_runs() runs_by_task: dict[str, list[RunRecord]] = {} for record in all_runs: if record.task_id: runs_by_task.setdefault(record.task_id, []).append(record) for record in all_runs: task_runs = runs_by_task.get(record.task_id, [record]) key = self._task_theme(self._representative_task_text(task_runs, fallback=record.task_text)) if not key: continue groups.setdefault(key, []).append(record) candidates: list[SkillLearningCandidate] = [] for theme, runs in groups.items(): successful = [record for record in runs if self._is_task_accepted_run(record)] if len(successful) < 2: continue if any(record.activated_skills for record in successful): continue candidate = SkillLearningCandidate( candidate_id=self._candidate_id("new", theme, str(len(successful))), kind="new_skill", source_run_ids=[record.run_id for record in successful[-5:]], source_session_ids=list(dict.fromkeys(record.session_id for record in successful[-5:])), related_skill_names=[], reason=f"Repeated successful tasks around '{theme}' suggest a reusable skill should be created.", evidence={"theme": theme}, status="open", ) candidates.append(candidate) return candidates def _build_merge_candidates(self) -> list[SkillLearningCandidate]: pair_counts: dict[tuple[str, str], list[RunRecord]] = {} for record in self.run_store.list_runs(): if not self._is_task_accepted_run(record): continue unique = sorted({receipt.skill_name for receipt in record.activated_skills}) for pair in combinations(unique, 2): pair_counts.setdefault(pair, []).append(record) candidates: list[SkillLearningCandidate] = [] for pair, runs in pair_counts.items(): if len(runs) < 2: continue candidate = SkillLearningCandidate( candidate_id=self._candidate_id("merge", *pair), kind="merge_skills", source_run_ids=[record.run_id for record in runs[-5:]], source_session_ids=list(dict.fromkeys(record.session_id for record in runs[-5:])), related_skill_names=list(pair), reason=f"Skills {pair[0]} and {pair[1]} repeatedly co-activate and may benefit from consolidation.", evidence={"pair": list(pair)}, status="open", ) candidates.append(candidate) return candidates def _build_retire_candidates(self, *, stale_days: int = 30) -> list[SkillLearningCandidate]: candidates: list[SkillLearningCandidate] = [] cutoff = datetime.now(timezone.utc) - timedelta(days=stale_days) for snapshot in self.learning_store.list_performance_snapshots(): if snapshot.activation_count == 0 or not snapshot.latest_used_at: continue latest_used = self._parse_timestamp(snapshot.latest_used_at) if latest_used is None or latest_used > cutoff: continue runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=3) candidate = SkillLearningCandidate( candidate_id=self._candidate_id("retire", snapshot.skill_name, snapshot.skill_version), kind="retire_skill", source_run_ids=[record.run_id for record in runs], source_session_ids=list(dict.fromkeys(record.session_id for record in runs)), related_skill_names=[snapshot.skill_name], reason=( f"Skill version {snapshot.skill_name}/{snapshot.skill_version} has been inactive " f"since {snapshot.latest_used_at} and may be ready for retirement." ), evidence={"skill_version": snapshot.skill_version, "latest_used_at": snapshot.latest_used_at}, status="open", ) candidates.append(candidate) return candidates def _all_effects(self) -> list[SkillEffectRecord]: effects: list[SkillEffectRecord] = [] for candidate in self.learning_store.list_performance_snapshots(): effects.extend(self.run_store.list_skill_effects(candidate.skill_name, version=candidate.skill_version)) if effects: return effects # Bootstrap from runs when there are no prior snapshots. for record in self.run_store.list_runs(): for receipt in record.activated_skills: effects.extend(self.run_store.list_skill_effects(receipt.skill_name, version=receipt.skill_version)) return effects @staticmethod def _is_task_accepted_run(record: RunRecord) -> bool: feedback = record.feedback or {} acceptance_type = feedback.get("acceptance_type") if acceptance_type is None and feedback.get("feedback_type") == "satisfied": acceptance_type = "accept" return ( bool(record.success) and bool(record.task_id) and acceptance_type == "accept" ) @staticmethod def _is_published_skill_receipt(receipt: SkillActivationReceipt) -> bool: return ( not receipt.skill_name.startswith(("draft:", "ephemeral:")) and not receipt.skill_version.startswith(("draft:", "ephemeral:")) and receipt.activation_reason not in {"generated_missing_skill", "ephemeral_guidance"} ) @staticmethod def _candidate_id(kind: str, *parts: str) -> str: return f"{kind}:{'|'.join(parts)}" @staticmethod def _task_theme(task_text: str) -> str: cleaned = re.sub(r"\s+", " ", task_text.strip()) if not cleaned: return "" first_sentence = re.split(r"[。!?.!?]", cleaned, maxsplit=1)[0].strip() if not first_sentence: first_sentence = cleaned words = first_sentence.split(" ") return " ".join(words[:8]).strip() @staticmethod def _representative_task_text(runs: list[RunRecord], *, fallback: str = "") -> str: ordered = sorted( runs, key=lambda item: ( item.attempt_index is None, item.attempt_index if item.attempt_index is not None else 0, item.started_at, item.run_id, ), ) for record in ordered: text = record.task_text.strip() if text: return text return fallback.strip() @staticmethod def _suggest_skill_name( candidate: SkillLearningCandidate, packet: EvidencePacket, frontmatter: dict[str, Any] | None = None, ) -> str: if candidate.related_skill_names: return candidate.related_skill_names[0] if isinstance(frontmatter, dict): description = str(frontmatter.get("description") or "") seed = SkillLearningService._slugify_skill_name(description) if seed: return seed if packet.task_summaries: seed = SkillLearningService._slugify_skill_name(packet.task_summaries[0]) if seed: return seed return f"generated-skill-{uuid4().hex[:8]}" @staticmethod def _slugify_skill_name(value: str) -> str: seed = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") seed = re.sub(r"-+", "-", seed) if not seed or seed.isdigit() or len(seed) < 3: return "" words = [part for part in seed.split("-") if part and not part.isdigit()] seed = "-".join(words) or seed return seed[:48].strip("-") @staticmethod def _parse_timestamp(value: str) -> datetime | None: try: parsed = datetime.fromisoformat(value.replace("Z", "+00:00")) except ValueError: return None if parsed.tzinfo is None: return parsed.replace(tzinfo=timezone.utc) return parsed.astimezone(timezone.utc)