"""File-backed skill learning store.""" from __future__ import annotations import json from pathlib import Path import threading from uuid import uuid4 from contextlib import contextmanager from typing import Iterator from beaver.foundation.utils.file_lock import WorkspaceWriteLock from .models import ( SkillDraftEvalReport, SkillDraftSafetyReport, SkillLearningAuditEvent, SkillLearningCandidate, SkillPerformanceSnapshot, ) class SkillLearningStore: def __init__(self, root: str | Path, *, write_lock: WorkspaceWriteLock | None = None) -> None: self.root = Path(root) self.root.mkdir(parents=True, exist_ok=True) self.write_lock = write_lock self._local_lock = threading.RLock() self.performance_path = self.root / "performance.jsonl" self.candidates_path = self.root / "learning-candidates.jsonl" self.audit_path = self.root / "learning-audit.jsonl" self.safety_reports_dir = self.root / "safety-reports" self.eval_reports_dir = self.root / "eval-reports" def record_learning_candidate(self, candidate: SkillLearningCandidate) -> None: normalized = SkillLearningCandidate.from_dict(candidate.to_dict()) self._append_jsonl(self.candidates_path, normalized.to_dict()) self.append_audit_event( normalized.candidate_id, "candidate_created", { "kind": normalized.kind, "status": normalized.status, "reason": normalized.reason, }, ) def record_learning_candidate_if_absent( self, candidate: SkillLearningCandidate, ) -> tuple[SkillLearningCandidate, bool]: normalized = SkillLearningCandidate.from_dict(candidate.to_dict()) with self._locked(): existing = { item.candidate_id: item for item in self.list_learning_candidates() } found = existing.get(normalized.candidate_id) if found is not None: return found, False self._append_jsonl(self.candidates_path, normalized.to_dict()) self.append_audit_event( normalized.candidate_id, "candidate_created", { "kind": normalized.kind, "status": normalized.status, "reason": normalized.reason, }, ) return normalized, True def update_learning_candidate(self, candidate_id: str, **updates: object) -> SkillLearningCandidate | None: with self._locked(): candidates = self.list_learning_candidates() updated: SkillLearningCandidate | None = None for index, candidate in enumerate(candidates): if candidate.candidate_id != candidate_id: continue payload = candidate.to_dict() payload.update(updates) if "updated_at" not in updates: payload["updated_at"] = _utc_now() updated = SkillLearningCandidate.from_dict(payload) candidates[index] = updated break if updated is None: return None self.candidates_path.parent.mkdir(parents=True, exist_ok=True) self.candidates_path.write_text( "".join( json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n" for candidate in candidates ), encoding="utf-8", ) return updated def transition_learning_candidate( self, candidate_id: str, status: str, *, event_type: str | None = None, payload: dict | None = None, **updates: object, ) -> SkillLearningCandidate | None: updated = self.update_learning_candidate(candidate_id, status=status, **updates) if updated is not None: self.append_audit_event( candidate_id, event_type or f"candidate_{status}", {"status": status, **dict(payload or {})}, ) return updated def claim_learning_candidate_for_synthesis( self, candidate_id: str, *, force: bool = False, ) -> SkillLearningCandidate | None: """Atomically claim a candidate before the expensive draft synthesis step.""" with self._locked(): candidates = self.list_learning_candidates() claimed: SkillLearningCandidate | None = None for index, candidate in enumerate(candidates): if candidate.candidate_id != candidate_id: continue if candidate.status in {"queued", "synthesizing"}: return None if not force and candidate.draft_skill_name and candidate.draft_id: return None payload = candidate.to_dict() payload.update( { "status": "synthesizing", "last_error": None, "updated_at": _utc_now(), } ) claimed = SkillLearningCandidate.from_dict(payload) candidates[index] = claimed break if claimed is None: return None self.candidates_path.parent.mkdir(parents=True, exist_ok=True) self.candidates_path.write_text( "".join( json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n" for candidate in candidates ), encoding="utf-8", ) self.append_audit_event( candidate_id, "draft_synthesis_started", {"status": "synthesizing", "force": force}, ) return claimed def list_learning_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]: results: list[SkillLearningCandidate] = [] for payload in self._read_jsonl(self.candidates_path): candidate = SkillLearningCandidate.from_dict(payload) if status is not None and candidate.status != status: continue results.append(candidate) return results def update_performance_snapshot(self, snapshot: SkillPerformanceSnapshot) -> None: snapshots = self.list_performance_snapshots() filtered = [ item for item in snapshots if not (item.skill_name == snapshot.skill_name and item.skill_version == snapshot.skill_version) ] filtered.append(snapshot) self.performance_path.write_text( "".join(json.dumps(item.to_dict(), ensure_ascii=False, sort_keys=True) + "\n" for item in filtered), encoding="utf-8", ) def list_performance_snapshots(self) -> list[SkillPerformanceSnapshot]: return [SkillPerformanceSnapshot.from_dict(item) for item in self._read_jsonl(self.performance_path)] def list_low_performing_versions(self, *, minimum_activations: int = 2, success_ratio_threshold: float = 0.5) -> list[SkillPerformanceSnapshot]: results: list[SkillPerformanceSnapshot] = [] for snapshot in self.list_performance_snapshots(): if snapshot.activation_count < minimum_activations: continue if snapshot.activation_count == 0: continue ratio = snapshot.success_count / snapshot.activation_count if ratio <= success_ratio_threshold: results.append(snapshot) return results def list_merge_candidates(self) -> list[SkillLearningCandidate]: return [item for item in self.list_learning_candidates(status="open") if item.kind == "merge_skills"] def append_audit_event(self, candidate_id: str, event_type: str, payload: dict | None = None) -> SkillLearningAuditEvent: event = SkillLearningAuditEvent( event_id=uuid4().hex, candidate_id=candidate_id, event_type=event_type, created_at=_utc_now(), payload=dict(payload or {}), ) self._append_jsonl(self.audit_path, event.to_dict()) return event def list_audit_events(self, candidate_id: str | None = None) -> list[SkillLearningAuditEvent]: events = [SkillLearningAuditEvent.from_dict(item) for item in self._read_jsonl(self.audit_path)] if candidate_id is None: return events return [event for event in events if event.candidate_id == candidate_id] def write_safety_report(self, report: SkillDraftSafetyReport) -> None: path = self._report_path(self.safety_reports_dir, report.skill_name, report.draft_id, report.report_id) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8") def get_safety_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftSafetyReport | None: reports = self.list_safety_reports(skill_name, draft_id) if report_id is not None: return next((item for item in reports if item.report_id == report_id), None) return reports[-1] if reports else None def list_safety_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftSafetyReport]: root = self.safety_reports_dir / skill_name / draft_id if not root.exists(): return [] return [ SkillDraftSafetyReport.from_dict(self._read_json(path)) for path in sorted(root.glob("report-*.json")) ] def write_eval_report(self, report: SkillDraftEvalReport) -> None: path = self._report_path(self.eval_reports_dir, report.skill_name, report.draft_id, report.report_id) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8") def get_eval_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftEvalReport | None: reports = self.list_eval_reports(skill_name, draft_id) if report_id is not None: return next((item for item in reports if item.report_id == report_id), None) return reports[-1] if reports else None def list_eval_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftEvalReport]: root = self.eval_reports_dir / skill_name / draft_id if not root.exists(): return [] return [ SkillDraftEvalReport.from_dict(self._read_json(path)) for path in sorted(root.glob("report-*.json")) ] @staticmethod def _report_path(root: Path, skill_name: str, draft_id: str, report_id: str) -> Path: return root / skill_name / draft_id / f"report-{report_id}.json" @staticmethod def _append_jsonl(path: Path, payload: dict) -> None: path.parent.mkdir(parents=True, exist_ok=True) with path.open("a", encoding="utf-8") as handle: handle.write(json.dumps(payload, ensure_ascii=False, sort_keys=True) + "\n") @staticmethod def _read_jsonl(path: Path) -> list[dict]: if not path.exists(): return [] results: list[dict] = [] for line in path.read_text(encoding="utf-8").splitlines(): cleaned = line.strip() if not cleaned: continue payload = json.loads(cleaned) if isinstance(payload, dict): results.append(payload) return results @staticmethod def _read_json(path: Path) -> dict: payload = json.loads(path.read_text(encoding="utf-8")) if not isinstance(payload, dict): raise ValueError(f"Expected JSON object in {path}") return payload @contextmanager def _locked(self) -> Iterator[None]: if self.write_lock is not None: with self.write_lock.acquire(timeout_seconds=10): yield return with self._local_lock: yield def _utc_now() -> str: from datetime import datetime, timezone return datetime.now(timezone.utc).isoformat()