feat(tasks): add skill-templated task graph execution
This commit is contained in:
@ -284,6 +284,9 @@ def _build_replay_case_reports(
|
||||
"side_effects": [*baseline.get("side_effects", []), *candidate_arm.get("side_effects", [])],
|
||||
"validator_notes": list(surrogate.get("notes") or []),
|
||||
}
|
||||
historical_accepted_score = _historical_accepted_score(case)
|
||||
if historical_accepted_score is not None:
|
||||
case_report["historical_accepted_score"] = historical_accepted_score
|
||||
return case_report, {
|
||||
"run_id": case["run_id"],
|
||||
"session_id": case.get("session_id") or "",
|
||||
@ -293,6 +296,7 @@ def _build_replay_case_reports(
|
||||
"baseline_score": baseline_score,
|
||||
"candidate_score": candidate_score,
|
||||
"delta": round(candidate_score - baseline_score, 4),
|
||||
**({"historical_accepted_score": historical_accepted_score} if historical_accepted_score is not None else {}),
|
||||
}
|
||||
|
||||
|
||||
@ -658,8 +662,11 @@ def _ability_score(*, case: dict[str, Any], arm: dict[str, Any], arm_name: str)
|
||||
if validator is not None:
|
||||
return _ability_from_validator(validator, arm)
|
||||
if not case.get("synthetic"):
|
||||
score = _bounded_score(case.get("accepted_score"), default=0.75) if arm_name == "baseline" else _ability_from_output(arm)["final_score"]
|
||||
return _ability_breakdown(score=score, source="user_feedback" if arm_name == "baseline" else "llm_judge")
|
||||
result = _ability_from_output(arm, source="output_heuristic")
|
||||
historical_accepted_score = _historical_accepted_score(case)
|
||||
if historical_accepted_score is not None:
|
||||
result["historical_accepted_score"] = historical_accepted_score
|
||||
return result
|
||||
return _ability_breakdown(score=0.0, source="unscored", notes=["Synthetic cases require a validator."])
|
||||
|
||||
|
||||
@ -697,6 +704,12 @@ def _ability_from_output(arm: dict[str, Any], *, source: str = "llm_judge", note
|
||||
return _ability_breakdown(score=score, source=source, notes=notes)
|
||||
|
||||
|
||||
def _historical_accepted_score(case: dict[str, Any]) -> float | None:
|
||||
if case.get("synthetic") or isinstance(case.get("validator"), dict) or "accepted_score" not in case:
|
||||
return None
|
||||
return _bounded_score(case.get("accepted_score"), default=0.75)
|
||||
|
||||
|
||||
def _ability_breakdown(*, score: float, source: str, notes: list[str] | None = None) -> dict[str, Any]:
|
||||
bounded = _bounded_score(score, default=0.0)
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user