Files
beaver_project/app-instance/backend/beaver/memory/skills/models.py

336 lines
13 KiB
Python

"""Aggregated skill learning models."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
LEARNING_CANDIDATE_STATUSES = {
"open",
"queued",
"synthesizing",
"draft_ready",
"safety_failed",
"eval_failed",
"review_pending",
"approved",
"rejected",
"published",
"failed",
"superseded",
}
RISK_LEVELS = {"low", "medium", "high", "critical"}
@dataclass(slots=True)
class SkillPerformanceSnapshot:
skill_name: str
skill_version: str
activation_count: int
success_count: int
failure_count: int
latest_used_at: str
last_feedback_score: float | None = None
def to_dict(self) -> dict[str, Any]:
return {
"skill_name": self.skill_name,
"skill_version": self.skill_version,
"activation_count": self.activation_count,
"success_count": self.success_count,
"failure_count": self.failure_count,
"latest_used_at": self.latest_used_at,
"last_feedback_score": self.last_feedback_score,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillPerformanceSnapshot":
value = payload.get("last_feedback_score")
return cls(
skill_name=str(payload["skill_name"]),
skill_version=str(payload["skill_version"]),
activation_count=int(payload.get("activation_count", 0) or 0),
success_count=int(payload.get("success_count", 0) or 0),
failure_count=int(payload.get("failure_count", 0) or 0),
latest_used_at=str(payload.get("latest_used_at") or ""),
last_feedback_score=None if value in (None, "") else float(value),
)
@dataclass(slots=True)
class SkillLearningCandidate:
candidate_id: str
kind: str
source_run_ids: list[str]
source_session_ids: list[str]
related_skill_names: list[str]
reason: str
evidence: dict[str, Any] = field(default_factory=dict)
status: str = "open"
priority: int = 0
confidence: float = 0.0
risk_level: str = "medium"
owner: str | None = None
retry_count: int = 0
last_error: str | None = None
trigger_reason: str = ""
evidence_summary: str = ""
draft_skill_name: str | None = None
draft_id: str | None = None
safety_report_id: str | None = None
eval_report_id: str | None = None
created_at: str = ""
updated_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"candidate_id": self.candidate_id,
"kind": self.kind,
"source_run_ids": list(self.source_run_ids),
"source_session_ids": list(self.source_session_ids),
"related_skill_names": list(self.related_skill_names),
"reason": self.reason,
"evidence": dict(self.evidence),
"status": self.status,
"priority": self.priority,
"confidence": self.confidence,
"risk_level": self.risk_level,
"owner": self.owner,
"retry_count": self.retry_count,
"last_error": self.last_error,
"trigger_reason": self.trigger_reason,
"evidence_summary": self.evidence_summary,
"draft_skill_name": self.draft_skill_name,
"draft_id": self.draft_id,
"safety_report_id": self.safety_report_id,
"eval_report_id": self.eval_report_id,
"created_at": self.created_at,
"updated_at": self.updated_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningCandidate":
now = _utc_now()
status = str(payload.get("status") or "open")
risk_level = str(payload.get("risk_level") or "medium")
return cls(
candidate_id=str(payload["candidate_id"]),
kind=str(payload.get("kind") or "revise_skill"),
source_run_ids=[str(item) for item in payload.get("source_run_ids") or []],
source_session_ids=[str(item) for item in payload.get("source_session_ids") or []],
related_skill_names=[str(item) for item in payload.get("related_skill_names") or []],
reason=str(payload.get("reason") or ""),
evidence=dict(payload.get("evidence") or {}),
status=status if status in LEARNING_CANDIDATE_STATUSES else "open",
priority=int(payload.get("priority", 0) or 0),
confidence=float(payload.get("confidence", 0.0) or 0.0),
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
owner=_optional_str(payload.get("owner")),
retry_count=int(payload.get("retry_count", 0) or 0),
last_error=_optional_str(payload.get("last_error")),
trigger_reason=str(payload.get("trigger_reason") or payload.get("reason") or ""),
evidence_summary=str(payload.get("evidence_summary") or _summarize_evidence(payload)),
draft_skill_name=_optional_str(payload.get("draft_skill_name")),
draft_id=_optional_str(payload.get("draft_id")),
safety_report_id=_optional_str(payload.get("safety_report_id")),
eval_report_id=_optional_str(payload.get("eval_report_id")),
created_at=str(payload.get("created_at") or now),
updated_at=str(payload.get("updated_at") or payload.get("created_at") or now),
)
@dataclass(slots=True)
class SkillLearningAuditEvent:
event_id: str
candidate_id: str
event_type: str
created_at: str
payload: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"event_id": self.event_id,
"candidate_id": self.candidate_id,
"event_type": self.event_type,
"created_at": self.created_at,
"payload": dict(self.payload),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningAuditEvent":
return cls(
event_id=str(payload["event_id"]),
candidate_id=str(payload["candidate_id"]),
event_type=str(payload.get("event_type") or ""),
created_at=str(payload.get("created_at") or ""),
payload=dict(payload.get("payload") or {}),
)
@dataclass(slots=True)
class SkillDraftSafetyReport:
report_id: str
skill_name: str
draft_id: str
passed: bool
risk_level: str
issues: list[str] = field(default_factory=list)
blocked_reasons: list[str] = field(default_factory=list)
suggested_fix: str = ""
created_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"report_id": self.report_id,
"skill_name": self.skill_name,
"draft_id": self.draft_id,
"passed": self.passed,
"risk_level": self.risk_level,
"issues": list(self.issues),
"blocked_reasons": list(self.blocked_reasons),
"suggested_fix": self.suggested_fix,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftSafetyReport":
risk_level = str(payload.get("risk_level") or "medium")
return cls(
report_id=str(payload["report_id"]),
skill_name=str(payload["skill_name"]),
draft_id=str(payload["draft_id"]),
passed=bool(payload.get("passed")),
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
issues=[str(item) for item in payload.get("issues") or []],
blocked_reasons=[str(item) for item in payload.get("blocked_reasons") or []],
suggested_fix=str(payload.get("suggested_fix") or ""),
created_at=str(payload.get("created_at") or ""),
)
@dataclass(slots=True)
class SkillDraftEvalReport:
report_id: str
skill_name: str
draft_id: str
candidate_id: str
passed: bool
baseline_score_avg: float
candidate_score_avg: float
score_delta: float
regression_count: int
improved_count: int
unchanged_count: int
cases: list[dict[str, Any]] = field(default_factory=list)
status: str = "completed"
created_at: str = ""
eval_version: str = "heuristic-v1"
mode: str = "heuristic"
execution_coverage: float = 0.0
surrogate_coverage: float = 0.0
blocked_coverage: float = 0.0
confidence: str = "low"
case_reports: list[dict[str, Any]] = field(default_factory=list)
tool_mode_summary: dict[str, Any] = field(default_factory=dict)
preservation_report: dict[str, Any] | None = None
def to_dict(self) -> dict[str, Any]:
return {
"report_id": self.report_id,
"skill_name": self.skill_name,
"draft_id": self.draft_id,
"candidate_id": self.candidate_id,
"passed": self.passed,
"baseline_score_avg": self.baseline_score_avg,
"candidate_score_avg": self.candidate_score_avg,
"score_delta": self.score_delta,
"regression_count": self.regression_count,
"improved_count": self.improved_count,
"unchanged_count": self.unchanged_count,
"cases": [dict(item) for item in self.cases],
"status": self.status,
"created_at": self.created_at,
"eval_version": self.eval_version,
"mode": self.mode,
"execution_coverage": self.execution_coverage,
"surrogate_coverage": self.surrogate_coverage,
"blocked_coverage": self.blocked_coverage,
"confidence": self.confidence,
"case_reports": [dict(item) for item in self.case_reports],
"tool_mode_summary": dict(self.tool_mode_summary),
"preservation_report": (
dict(self.preservation_report) if self.preservation_report is not None else None
),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftEvalReport":
return cls(
report_id=str(payload["report_id"]),
skill_name=str(payload["skill_name"]),
draft_id=str(payload["draft_id"]),
candidate_id=str(payload.get("candidate_id") or ""),
passed=bool(payload.get("passed")),
baseline_score_avg=float(payload.get("baseline_score_avg", 0.0) or 0.0),
candidate_score_avg=float(payload.get("candidate_score_avg", 0.0) or 0.0),
score_delta=float(payload.get("score_delta", 0.0) or 0.0),
regression_count=int(payload.get("regression_count", 0) or 0),
improved_count=int(payload.get("improved_count", 0) or 0),
unchanged_count=int(payload.get("unchanged_count", 0) or 0),
cases=[dict(item) for item in payload.get("cases") or [] if isinstance(item, dict)],
status=str(payload.get("status") or "completed"),
created_at=str(payload.get("created_at") or ""),
eval_version=str(payload.get("eval_version") or "heuristic-v1"),
mode=str(payload.get("mode") or "heuristic"),
execution_coverage=_bounded_float(payload.get("execution_coverage"), default=0.0),
surrogate_coverage=_bounded_float(payload.get("surrogate_coverage"), default=0.0),
blocked_coverage=_bounded_float(payload.get("blocked_coverage"), default=0.0),
confidence=str(payload.get("confidence") or "low"),
case_reports=[
dict(item)
for item in payload.get("case_reports") or []
if isinstance(item, dict)
],
tool_mode_summary=dict(payload.get("tool_mode_summary") or {}),
preservation_report=(
dict(payload["preservation_report"])
if isinstance(payload.get("preservation_report"), dict)
else None
),
)
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)
def _bounded_float(value: Any, *, default: float = 0.0) -> float:
if value in (None, ""):
return default
try:
return max(0.0, min(1.0, float(value)))
except (TypeError, ValueError):
return default
def _summarize_evidence(payload: dict[str, Any]) -> str:
evidence = payload.get("evidence")
if isinstance(evidence, dict):
theme = evidence.get("theme")
if theme:
return f"Theme: {theme}"
skill_version = evidence.get("skill_version")
if skill_version:
return f"Skill version: {skill_version}"
source_run_ids = payload.get("source_run_ids") or []
return f"{len(source_run_ids)} source run(s)"
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()