feat(skill-learning): gate publish on replay confidence

This commit is contained in:
2026-06-08 13:36:55 +08:00
parent 64d789a3d0
commit b9171998b9
2 changed files with 83 additions and 1 deletions

View File

@ -5,7 +5,7 @@ from pathlib import Path
import pytest
from beaver.memory.runs import RunMemoryStore
from beaver.memory.skills import SkillLearningCandidate, SkillLearningStore
from beaver.memory.skills import SkillDraftEvalReport, SkillLearningCandidate, SkillLearningStore
from beaver.skills.drafts import DraftService
from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningPipelineService, SkillLearningService
from beaver.skills.publisher import SkillPublisher
@ -132,3 +132,77 @@ def test_pipeline_reject_removes_draft_from_review_list(tmp_path: Path) -> None:
assert review.status == SkillReviewState.REJECTED.value
assert pipeline.list_drafts() == []
def test_publish_blocks_low_confidence_replay_report(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="low-confidence",
proposed_content="# Low\n\nDo it.",
proposed_frontmatter={"description": "low", "tools": []},
created_by="test",
reason="test",
)
pipeline.learning_store.write_eval_report(
SkillDraftEvalReport(
report_id="eval-low",
skill_name=draft.skill_name,
draft_id=draft.draft_id,
candidate_id="candidate-1",
passed=True,
baseline_score_avg=0.7,
candidate_score_avg=0.9,
score_delta=0.2,
regression_count=0,
improved_count=1,
unchanged_count=0,
confidence="low",
mode="replay",
eval_version="replay-v1",
execution_coverage=0.0,
surrogate_coverage=1.0,
blocked_coverage=0.0,
)
)
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
pipeline.check_safety(draft.skill_name, draft.draft_id)
with pytest.raises(ValueError, match="low confidence"):
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
def test_publish_blocks_failed_preservation_report(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="dropped-section",
proposed_content="# Skill\n\n## Workflow\n\nDo it.",
proposed_frontmatter={"description": "dropped", "tools": []},
created_by="test",
reason="test",
)
pipeline.learning_store.write_eval_report(
SkillDraftEvalReport(
report_id="eval-preservation",
skill_name=draft.skill_name,
draft_id=draft.draft_id,
candidate_id="candidate-1",
passed=True,
baseline_score_avg=0.7,
candidate_score_avg=0.9,
score_delta=0.2,
regression_count=0,
improved_count=1,
unchanged_count=0,
confidence="medium",
mode="replay",
eval_version="replay-v1",
preservation_report={"passed": False, "risk_level": "high", "dropped_sections": ["Safety"]},
)
)
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
pipeline.check_safety(draft.skill_name, draft.draft_id)
with pytest.raises(ValueError, match="preservation"):
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")