259 lines
10 KiB
Python
259 lines
10 KiB
Python
"""File-backed skill learning store."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
import threading
|
|
from uuid import uuid4
|
|
from contextlib import contextmanager
|
|
from typing import Iterator
|
|
|
|
from beaver.foundation.utils.file_lock import WorkspaceWriteLock
|
|
|
|
from .models import (
|
|
SkillDraftEvalReport,
|
|
SkillDraftSafetyReport,
|
|
SkillLearningAuditEvent,
|
|
SkillLearningCandidate,
|
|
SkillPerformanceSnapshot,
|
|
)
|
|
|
|
|
|
class SkillLearningStore:
|
|
def __init__(self, root: str | Path, *, write_lock: WorkspaceWriteLock | None = None) -> None:
|
|
self.root = Path(root)
|
|
self.root.mkdir(parents=True, exist_ok=True)
|
|
self.write_lock = write_lock
|
|
self._local_lock = threading.RLock()
|
|
self.performance_path = self.root / "performance.jsonl"
|
|
self.candidates_path = self.root / "learning-candidates.jsonl"
|
|
self.audit_path = self.root / "learning-audit.jsonl"
|
|
self.safety_reports_dir = self.root / "safety-reports"
|
|
self.eval_reports_dir = self.root / "eval-reports"
|
|
|
|
def record_learning_candidate(self, candidate: SkillLearningCandidate) -> None:
|
|
normalized = SkillLearningCandidate.from_dict(candidate.to_dict())
|
|
self._append_jsonl(self.candidates_path, normalized.to_dict())
|
|
self.append_audit_event(
|
|
normalized.candidate_id,
|
|
"candidate_created",
|
|
{
|
|
"kind": normalized.kind,
|
|
"status": normalized.status,
|
|
"reason": normalized.reason,
|
|
},
|
|
)
|
|
|
|
def record_learning_candidate_if_absent(
|
|
self,
|
|
candidate: SkillLearningCandidate,
|
|
) -> tuple[SkillLearningCandidate, bool]:
|
|
normalized = SkillLearningCandidate.from_dict(candidate.to_dict())
|
|
with self._locked():
|
|
existing = {
|
|
item.candidate_id: item
|
|
for item in self.list_learning_candidates()
|
|
}
|
|
found = existing.get(normalized.candidate_id)
|
|
if found is not None:
|
|
return found, False
|
|
self._append_jsonl(self.candidates_path, normalized.to_dict())
|
|
self.append_audit_event(
|
|
normalized.candidate_id,
|
|
"candidate_created",
|
|
{
|
|
"kind": normalized.kind,
|
|
"status": normalized.status,
|
|
"reason": normalized.reason,
|
|
},
|
|
)
|
|
return normalized, True
|
|
|
|
def update_learning_candidate(self, candidate_id: str, **updates: object) -> SkillLearningCandidate | None:
|
|
with self._locked():
|
|
candidates = self.list_learning_candidates()
|
|
updated: SkillLearningCandidate | None = None
|
|
for index, candidate in enumerate(candidates):
|
|
if candidate.candidate_id != candidate_id:
|
|
continue
|
|
payload = candidate.to_dict()
|
|
payload.update(updates)
|
|
if "updated_at" not in updates:
|
|
payload["updated_at"] = _utc_now()
|
|
updated = SkillLearningCandidate.from_dict(payload)
|
|
candidates[index] = updated
|
|
break
|
|
if updated is None:
|
|
return None
|
|
self.candidates_path.parent.mkdir(parents=True, exist_ok=True)
|
|
self.candidates_path.write_text(
|
|
"".join(
|
|
json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
|
|
for candidate in candidates
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
return updated
|
|
|
|
def transition_learning_candidate(
|
|
self,
|
|
candidate_id: str,
|
|
status: str,
|
|
*,
|
|
event_type: str | None = None,
|
|
payload: dict | None = None,
|
|
**updates: object,
|
|
) -> SkillLearningCandidate | None:
|
|
updated = self.update_learning_candidate(candidate_id, status=status, **updates)
|
|
if updated is not None:
|
|
self.append_audit_event(
|
|
candidate_id,
|
|
event_type or f"candidate_{status}",
|
|
{"status": status, **dict(payload or {})},
|
|
)
|
|
return updated
|
|
|
|
def list_learning_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
|
|
results: list[SkillLearningCandidate] = []
|
|
for payload in self._read_jsonl(self.candidates_path):
|
|
candidate = SkillLearningCandidate.from_dict(payload)
|
|
if status is not None and candidate.status != status:
|
|
continue
|
|
results.append(candidate)
|
|
return results
|
|
|
|
def update_performance_snapshot(self, snapshot: SkillPerformanceSnapshot) -> None:
|
|
snapshots = self.list_performance_snapshots()
|
|
filtered = [
|
|
item
|
|
for item in snapshots
|
|
if not (item.skill_name == snapshot.skill_name and item.skill_version == snapshot.skill_version)
|
|
]
|
|
filtered.append(snapshot)
|
|
self.performance_path.write_text(
|
|
"".join(json.dumps(item.to_dict(), ensure_ascii=False, sort_keys=True) + "\n" for item in filtered),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
def list_performance_snapshots(self) -> list[SkillPerformanceSnapshot]:
|
|
return [SkillPerformanceSnapshot.from_dict(item) for item in self._read_jsonl(self.performance_path)]
|
|
|
|
def list_low_performing_versions(self, *, minimum_activations: int = 2, success_ratio_threshold: float = 0.5) -> list[SkillPerformanceSnapshot]:
|
|
results: list[SkillPerformanceSnapshot] = []
|
|
for snapshot in self.list_performance_snapshots():
|
|
if snapshot.activation_count < minimum_activations:
|
|
continue
|
|
if snapshot.activation_count == 0:
|
|
continue
|
|
ratio = snapshot.success_count / snapshot.activation_count
|
|
if ratio <= success_ratio_threshold:
|
|
results.append(snapshot)
|
|
return results
|
|
|
|
def list_merge_candidates(self) -> list[SkillLearningCandidate]:
|
|
return [item for item in self.list_learning_candidates(status="open") if item.kind == "merge_skills"]
|
|
|
|
def append_audit_event(self, candidate_id: str, event_type: str, payload: dict | None = None) -> SkillLearningAuditEvent:
|
|
event = SkillLearningAuditEvent(
|
|
event_id=uuid4().hex,
|
|
candidate_id=candidate_id,
|
|
event_type=event_type,
|
|
created_at=_utc_now(),
|
|
payload=dict(payload or {}),
|
|
)
|
|
self._append_jsonl(self.audit_path, event.to_dict())
|
|
return event
|
|
|
|
def list_audit_events(self, candidate_id: str | None = None) -> list[SkillLearningAuditEvent]:
|
|
events = [SkillLearningAuditEvent.from_dict(item) for item in self._read_jsonl(self.audit_path)]
|
|
if candidate_id is None:
|
|
return events
|
|
return [event for event in events if event.candidate_id == candidate_id]
|
|
|
|
def write_safety_report(self, report: SkillDraftSafetyReport) -> None:
|
|
path = self._report_path(self.safety_reports_dir, report.skill_name, report.draft_id, report.report_id)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
|
|
|
|
def get_safety_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftSafetyReport | None:
|
|
reports = self.list_safety_reports(skill_name, draft_id)
|
|
if report_id is not None:
|
|
return next((item for item in reports if item.report_id == report_id), None)
|
|
return reports[-1] if reports else None
|
|
|
|
def list_safety_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftSafetyReport]:
|
|
root = self.safety_reports_dir / skill_name / draft_id
|
|
if not root.exists():
|
|
return []
|
|
return [
|
|
SkillDraftSafetyReport.from_dict(self._read_json(path))
|
|
for path in sorted(root.glob("report-*.json"))
|
|
]
|
|
|
|
def write_eval_report(self, report: SkillDraftEvalReport) -> None:
|
|
path = self._report_path(self.eval_reports_dir, report.skill_name, report.draft_id, report.report_id)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
|
|
|
|
def get_eval_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftEvalReport | None:
|
|
reports = self.list_eval_reports(skill_name, draft_id)
|
|
if report_id is not None:
|
|
return next((item for item in reports if item.report_id == report_id), None)
|
|
return reports[-1] if reports else None
|
|
|
|
def list_eval_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftEvalReport]:
|
|
root = self.eval_reports_dir / skill_name / draft_id
|
|
if not root.exists():
|
|
return []
|
|
return [
|
|
SkillDraftEvalReport.from_dict(self._read_json(path))
|
|
for path in sorted(root.glob("report-*.json"))
|
|
]
|
|
|
|
@staticmethod
|
|
def _report_path(root: Path, skill_name: str, draft_id: str, report_id: str) -> Path:
|
|
return root / skill_name / draft_id / f"report-{report_id}.json"
|
|
|
|
@staticmethod
|
|
def _append_jsonl(path: Path, payload: dict) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
with path.open("a", encoding="utf-8") as handle:
|
|
handle.write(json.dumps(payload, ensure_ascii=False, sort_keys=True) + "\n")
|
|
|
|
@staticmethod
|
|
def _read_jsonl(path: Path) -> list[dict]:
|
|
if not path.exists():
|
|
return []
|
|
results: list[dict] = []
|
|
for line in path.read_text(encoding="utf-8").splitlines():
|
|
cleaned = line.strip()
|
|
if not cleaned:
|
|
continue
|
|
payload = json.loads(cleaned)
|
|
if isinstance(payload, dict):
|
|
results.append(payload)
|
|
return results
|
|
|
|
@staticmethod
|
|
def _read_json(path: Path) -> dict:
|
|
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
if not isinstance(payload, dict):
|
|
raise ValueError(f"Expected JSON object in {path}")
|
|
return payload
|
|
|
|
@contextmanager
|
|
def _locked(self) -> Iterator[None]:
|
|
if self.write_lock is not None:
|
|
with self.write_lock.acquire(timeout_seconds=10):
|
|
yield
|
|
return
|
|
with self._local_lock:
|
|
yield
|
|
|
|
|
|
def _utc_now() -> str:
|
|
from datetime import datetime, timezone
|
|
|
|
return datetime.now(timezone.utc).isoformat()
|