feat(skill-learning): extend eval report payload
This commit is contained in:
@ -227,6 +227,15 @@ class SkillDraftEvalReport:
|
||||
cases: list[dict[str, Any]] = field(default_factory=list)
|
||||
status: str = "completed"
|
||||
created_at: str = ""
|
||||
eval_version: str = "heuristic-v1"
|
||||
mode: str = "heuristic"
|
||||
execution_coverage: float = 0.0
|
||||
surrogate_coverage: float = 0.0
|
||||
blocked_coverage: float = 0.0
|
||||
confidence: str = "low"
|
||||
case_reports: list[dict[str, Any]] = field(default_factory=list)
|
||||
tool_mode_summary: dict[str, Any] = field(default_factory=dict)
|
||||
preservation_report: dict[str, Any] | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
@ -244,6 +253,17 @@ class SkillDraftEvalReport:
|
||||
"cases": [dict(item) for item in self.cases],
|
||||
"status": self.status,
|
||||
"created_at": self.created_at,
|
||||
"eval_version": self.eval_version,
|
||||
"mode": self.mode,
|
||||
"execution_coverage": self.execution_coverage,
|
||||
"surrogate_coverage": self.surrogate_coverage,
|
||||
"blocked_coverage": self.blocked_coverage,
|
||||
"confidence": self.confidence,
|
||||
"case_reports": [dict(item) for item in self.case_reports],
|
||||
"tool_mode_summary": dict(self.tool_mode_summary),
|
||||
"preservation_report": (
|
||||
dict(self.preservation_report) if self.preservation_report is not None else None
|
||||
),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@ -263,6 +283,23 @@ class SkillDraftEvalReport:
|
||||
cases=[dict(item) for item in payload.get("cases") or [] if isinstance(item, dict)],
|
||||
status=str(payload.get("status") or "completed"),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
eval_version=str(payload.get("eval_version") or "heuristic-v1"),
|
||||
mode=str(payload.get("mode") or "heuristic"),
|
||||
execution_coverage=_bounded_float(payload.get("execution_coverage"), default=0.0),
|
||||
surrogate_coverage=_bounded_float(payload.get("surrogate_coverage"), default=0.0),
|
||||
blocked_coverage=_bounded_float(payload.get("blocked_coverage"), default=0.0),
|
||||
confidence=str(payload.get("confidence") or "low"),
|
||||
case_reports=[
|
||||
dict(item)
|
||||
for item in payload.get("case_reports") or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
tool_mode_summary=dict(payload.get("tool_mode_summary") or {}),
|
||||
preservation_report=(
|
||||
dict(payload["preservation_report"])
|
||||
if isinstance(payload.get("preservation_report"), dict)
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@ -272,6 +309,15 @@ def _optional_str(value: Any) -> str | None:
|
||||
return str(value)
|
||||
|
||||
|
||||
def _bounded_float(value: Any, *, default: float = 0.0) -> float:
|
||||
if value in (None, ""):
|
||||
return default
|
||||
try:
|
||||
return max(0.0, min(1.0, float(value)))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _summarize_evidence(payload: dict[str, Any]) -> str:
|
||||
evidence = payload.get("evidence")
|
||||
if isinstance(evidence, dict):
|
||||
|
||||
@ -0,0 +1,61 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from beaver.memory.skills import SkillDraftEvalReport
|
||||
|
||||
|
||||
def test_eval_report_defaults_preserve_legacy_payload_shape() -> None:
|
||||
report = SkillDraftEvalReport(
|
||||
report_id="eval-1",
|
||||
skill_name="debug",
|
||||
draft_id="draft-1",
|
||||
candidate_id="candidate-1",
|
||||
passed=True,
|
||||
baseline_score_avg=0.5,
|
||||
candidate_score_avg=0.8,
|
||||
score_delta=0.3,
|
||||
regression_count=0,
|
||||
improved_count=2,
|
||||
unchanged_count=0,
|
||||
cases=[{"run_id": "run-1"}],
|
||||
status="completed",
|
||||
created_at="now",
|
||||
)
|
||||
|
||||
payload = report.to_dict()
|
||||
|
||||
assert payload["eval_version"] == "heuristic-v1"
|
||||
assert payload["mode"] == "heuristic"
|
||||
assert payload["execution_coverage"] == 0.0
|
||||
assert payload["surrogate_coverage"] == 0.0
|
||||
assert payload["blocked_coverage"] == 0.0
|
||||
assert payload["confidence"] == "low"
|
||||
assert payload["case_reports"] == []
|
||||
assert payload["tool_mode_summary"] == {}
|
||||
assert payload["preservation_report"] is None
|
||||
assert payload["cases"] == [{"run_id": "run-1"}]
|
||||
|
||||
|
||||
def test_eval_report_reads_legacy_payload_without_replay_fields() -> None:
|
||||
report = SkillDraftEvalReport.from_dict(
|
||||
{
|
||||
"report_id": "eval-legacy",
|
||||
"skill_name": "debug",
|
||||
"draft_id": "draft-1",
|
||||
"candidate_id": "candidate-1",
|
||||
"passed": True,
|
||||
"baseline_score_avg": 0.4,
|
||||
"candidate_score_avg": 0.8,
|
||||
"score_delta": 0.4,
|
||||
"regression_count": 0,
|
||||
"improved_count": 1,
|
||||
"unchanged_count": 0,
|
||||
"cases": [{"run_id": "run-1"}],
|
||||
"status": "completed",
|
||||
"created_at": "now",
|
||||
}
|
||||
)
|
||||
|
||||
assert report.eval_version == "heuristic-v1"
|
||||
assert report.mode == "heuristic"
|
||||
assert report.confidence == "low"
|
||||
assert report.case_reports == []
|
||||
Reference in New Issue
Block a user