移除了agents/registry.json中的所有内置agents配置,将agents数组清空。 为web应用添加了CORS中间件支持,允许指定的前端地址跨域访问。 重构了技能上传功能,增加了LLM重写机制,自动规范化上传的技能格式。 新增了工具名称提取逻辑,从技能正文中自动识别Required Tools段落。 更新了技能学习候选者和草稿的载荷结构,添加评估报告统计信息。 修改了意图路由技能的说明,改进任务状态管理逻辑。
204 lines
7.8 KiB
Python
204 lines
7.8 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from beaver.memory.runs import RunMemoryStore
|
|
from beaver.memory.skills import SkillDraftEvalReport, SkillLearningCandidate, SkillLearningStore
|
|
from beaver.skills.drafts import DraftService
|
|
from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningPipelineService, SkillLearningService
|
|
from beaver.skills.publisher import SkillPublisher
|
|
from beaver.skills.reviews import ReviewService
|
|
from beaver.skills.specs import SkillReviewState, SkillSpecStore
|
|
|
|
|
|
def _pipeline(tmp_path: Path) -> SkillLearningPipelineService:
|
|
spec_store = SkillSpecStore(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
|
|
draft_service = DraftService(spec_store)
|
|
learning_service = SkillLearningService(
|
|
run_store=run_store,
|
|
learning_store=learning_store,
|
|
draft_service=draft_service,
|
|
evidence_selector=EvidenceSelector(run_store),
|
|
synthesizer=SkillDraftSynthesizer(),
|
|
)
|
|
learning_store.record_learning_candidate(
|
|
SkillLearningCandidate(
|
|
candidate_id="candidate-1",
|
|
kind="retire_skill",
|
|
source_run_ids=["run-1"],
|
|
source_session_ids=["session-1"],
|
|
related_skill_names=["old-skill"],
|
|
reason="not useful",
|
|
evidence={"skill_version": "v0001"},
|
|
)
|
|
)
|
|
return SkillLearningPipelineService(
|
|
learning_store=learning_store,
|
|
learning_service=learning_service,
|
|
draft_service=draft_service,
|
|
review_service=ReviewService(spec_store),
|
|
publisher=SkillPublisher(spec_store),
|
|
)
|
|
|
|
|
|
def test_pipeline_lists_candidates_and_moves_draft_through_review(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="new-skill",
|
|
proposed_content="# New Skill\n\nDo the thing.",
|
|
proposed_frontmatter={"description": "test skill"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
safety = pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
review = pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
version = pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
|
|
|
assert pipeline.list_candidates()[0].candidate_id == "candidate-1"
|
|
assert review.status == SkillReviewState.IN_REVIEW.value
|
|
assert safety.passed is True
|
|
assert version.skill_name == "new-skill"
|
|
assert pipeline.get_draft(draft.skill_name, draft.draft_id).status == SkillReviewState.PUBLISHED.value
|
|
|
|
|
|
def test_pipeline_approve_requires_submitted_review(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="needs-review",
|
|
proposed_content="# Needs Review\n\nDo the thing.",
|
|
proposed_frontmatter={"description": "needs review"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
with pytest.raises(ValueError, match="in review before approval"):
|
|
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
|
|
|
|
|
|
def test_pipeline_does_not_resubmit_terminal_draft(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="already-published",
|
|
proposed_content="# Already Published\n\nDo the thing.",
|
|
proposed_frontmatter={"description": "already published"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
|
|
|
with pytest.raises(ValueError, match="draft status before review submission"):
|
|
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
|
|
|
|
def test_pipeline_reject_blocks_publish(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="blocked-skill",
|
|
proposed_content="# Blocked\n\nNo publish.",
|
|
proposed_frontmatter={"description": "blocked"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
pipeline.reject(draft.skill_name, draft.draft_id, reviewer="tester")
|
|
|
|
with pytest.raises(ValueError, match="Draft not found"):
|
|
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
|
assert pipeline.draft_service.get_draft(draft.skill_name, draft.draft_id) is None
|
|
|
|
|
|
def test_pipeline_reject_removes_draft_from_review_list(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="remove-skill",
|
|
proposed_content="# Remove\n\nNo longer needed.",
|
|
proposed_frontmatter={"description": "remove"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
review = pipeline.reject(draft.skill_name, draft.draft_id, reviewer="tester")
|
|
|
|
assert review.status == SkillReviewState.REJECTED.value
|
|
assert pipeline.list_drafts() == []
|
|
|
|
|
|
def test_publish_blocks_low_confidence_replay_report(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="low-confidence",
|
|
proposed_content="# Low\n\nDo it.",
|
|
proposed_frontmatter={"description": "low", "tools": []},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
pipeline.learning_store.write_eval_report(
|
|
SkillDraftEvalReport(
|
|
report_id="eval-low",
|
|
skill_name=draft.skill_name,
|
|
draft_id=draft.draft_id,
|
|
candidate_id="candidate-1",
|
|
passed=True,
|
|
baseline_score_avg=0.7,
|
|
candidate_score_avg=0.9,
|
|
score_delta=0.2,
|
|
regression_count=0,
|
|
improved_count=1,
|
|
unchanged_count=0,
|
|
confidence="low",
|
|
mode="replay",
|
|
eval_version="replay-v1",
|
|
execution_coverage=0.0,
|
|
surrogate_coverage=1.0,
|
|
blocked_coverage=0.0,
|
|
)
|
|
)
|
|
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
|
|
with pytest.raises(ValueError, match="low confidence"):
|
|
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
|
|
|
|
|
def test_publish_blocks_failed_preservation_report(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="dropped-section",
|
|
proposed_content="# Skill\n\n## Workflow\n\nDo it.",
|
|
proposed_frontmatter={"description": "dropped", "tools": []},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
pipeline.learning_store.write_eval_report(
|
|
SkillDraftEvalReport(
|
|
report_id="eval-preservation",
|
|
skill_name=draft.skill_name,
|
|
draft_id=draft.draft_id,
|
|
candidate_id="candidate-1",
|
|
passed=True,
|
|
baseline_score_avg=0.7,
|
|
candidate_score_avg=0.9,
|
|
score_delta=0.2,
|
|
regression_count=0,
|
|
improved_count=1,
|
|
unchanged_count=0,
|
|
confidence="medium",
|
|
mode="replay",
|
|
eval_version="replay-v1",
|
|
preservation_report={"passed": False, "risk_level": "high", "dropped_sections": ["Safety"]},
|
|
)
|
|
)
|
|
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
|
|
with pytest.raises(ValueError, match="preservation"):
|
|
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|