feat(skill-learning): select replay eval cases

This commit is contained in:
2026-06-08 13:30:00 +08:00
parent a925f0e77f
commit 7287e93f87
3 changed files with 193 additions and 0 deletions

View File

@ -0,0 +1,82 @@
from __future__ import annotations
from beaver.memory.runs import RunRecord
from beaver.memory.skills import SkillLearningCandidate
from beaver.skills.learning.case_selection import select_replay_cases
from beaver.skills.specs import SkillActivationReceipt
def _run(
run_id: str,
*,
task_id: str = "task",
session_id: str = "session",
task_text: str = "debug task",
skill_name: str | None = None,
skill_version: str = "v0001",
) -> RunRecord:
receipts = []
if skill_name:
receipts.append(
SkillActivationReceipt(
run_id=run_id,
session_id=session_id,
skill_name=skill_name,
skill_version=skill_version,
content_hash="hash",
activated_at="now",
activation_reason="selected",
)
)
return RunRecord(
run_id=run_id,
session_id=session_id,
task_id=task_id,
attempt_index=1,
task_text=task_text,
started_at=f"2026-06-08T00:00:{run_id[-2:]}+00:00",
ended_at="end",
success=True,
finish_reason="stop",
feedback={"acceptance_type": "accept"},
activated_skills=receipts,
)
def test_select_revise_cases_caps_at_ten_and_prefers_related_skill() -> None:
runs = [
_run(f"run-{index:02d}", task_id=f"task-{index}", skill_name="debug", skill_version="v0001")
for index in range(12)
]
candidate = SkillLearningCandidate(
candidate_id="candidate-1",
kind="revise_skill",
source_run_ids=[],
source_session_ids=[],
related_skill_names=["debug"],
reason="revise",
evidence={"skill_version": "v0001"},
)
cases = select_replay_cases(candidate, runs)
assert len(cases) == 10
assert all(case["baseline_skill_names"] == ["debug"] for case in cases)
assert cases[0]["run_id"] == "run-11"
def test_select_new_skill_uses_all_available_source_runs_under_ten() -> None:
runs = [_run(f"run-{index:02d}", task_id=f"task-{index}") for index in range(3)]
candidate = SkillLearningCandidate(
candidate_id="candidate-1",
kind="new_skill",
source_run_ids=["run-00", "run-01", "run-02"],
source_session_ids=["session"],
related_skill_names=[],
reason="new",
)
cases = select_replay_cases(candidate, runs)
assert [case["run_id"] for case in cases] == ["run-02", "run-01", "run-00"]
assert all(case["baseline_skill_names"] == [] for case in cases)