feat(engine): 优化智能体循环中的助手消息处理逻辑 - 在没有工具调用时才添加助手消息到上下文 - 确保工具调用响应正确添加到消息上下文中 - 修复了消息构建的条件逻辑 fix(cron): 改进定时任务调度的时间解析功能 - 添加正则表达式导入用于时间显示解析 - 实现从显示文本中提取毫秒间隔的功能 - 增强整数转换的安全性,避免类型错误 - 优化定时任务配置的解析逻辑 feat(outlook): 增强Outlook集成的功能和稳定性 - 将默认超时时间从10秒增加到180秒 - 为状态检查函数添加可选的验证参数 - 串行执行邮件概览获取操作而非并行 - 改进连接状态验证逻辑 feat(channel): 添加设备名称作为会话标识的选项 - 为终端WebSocket适配器添加新的配置选项 - 实现基于设备名称生成会话对等ID的功能 - 记录原始对等ID和设备名称的元数据 - 支持从设备名称创建会话对等ID feat(skills): 完善技能学习评估系统和进度跟踪 - 在应用启动时自动调度待评估的技能草稿 - 为技能评估工作创建独立的循环工厂 - 实现异步技能评估任务的取消和清理机制 - 添加技能评估进度报告和状态跟踪功能 - 扩展会话列表API以包含更多详细信息 - 防止对不存在的会话进行操作 - 优化技能草稿提交和评估的业务逻辑 perf(skills): 提升技能评估的并发性能 - 实现并行技能案例评估以提高效率 - 添加最大并行案例数的环境变量控制 - 实现实时评估进度更新和回调机制 - 优化评估过程中的资源管理和同步 refactor(services): 创建隔离的智能体循环实例 - 添加创建独立智能体循环的工厂方法 - 确保新循环继承运行时服务配置 - 支持技能评估等需要隔离环境的场景 ```
225 lines
8.6 KiB
Python
225 lines
8.6 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from beaver.memory.runs import RunMemoryStore
|
|
from beaver.memory.skills import SkillDraftEvalReport, SkillLearningCandidate, SkillLearningStore
|
|
from beaver.skills.drafts import DraftService
|
|
from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningPipelineService, SkillLearningService
|
|
from beaver.skills.publisher import SkillPublisher
|
|
from beaver.skills.reviews import ReviewService
|
|
from beaver.skills.specs import SkillReviewState, SkillSpecStore
|
|
|
|
|
|
def _pipeline(tmp_path: Path) -> SkillLearningPipelineService:
|
|
spec_store = SkillSpecStore(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
|
|
draft_service = DraftService(spec_store)
|
|
learning_service = SkillLearningService(
|
|
run_store=run_store,
|
|
learning_store=learning_store,
|
|
draft_service=draft_service,
|
|
evidence_selector=EvidenceSelector(run_store),
|
|
synthesizer=SkillDraftSynthesizer(),
|
|
)
|
|
learning_store.record_learning_candidate(
|
|
SkillLearningCandidate(
|
|
candidate_id="candidate-1",
|
|
kind="retire_skill",
|
|
source_run_ids=["run-1"],
|
|
source_session_ids=["session-1"],
|
|
related_skill_names=["old-skill"],
|
|
reason="not useful",
|
|
evidence={"skill_version": "v0001"},
|
|
)
|
|
)
|
|
return SkillLearningPipelineService(
|
|
learning_store=learning_store,
|
|
learning_service=learning_service,
|
|
draft_service=draft_service,
|
|
review_service=ReviewService(spec_store),
|
|
publisher=SkillPublisher(spec_store),
|
|
)
|
|
|
|
|
|
def test_pipeline_lists_candidates_and_moves_draft_through_review(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="new-skill",
|
|
proposed_content="# New Skill\n\nDo the thing.",
|
|
proposed_frontmatter={"description": "test skill"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
safety = pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
review = pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
version = pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
|
|
|
assert pipeline.list_candidates()[0].candidate_id == "candidate-1"
|
|
assert review.status == SkillReviewState.IN_REVIEW.value
|
|
assert safety.passed is True
|
|
assert version.skill_name == "new-skill"
|
|
assert pipeline.get_draft(draft.skill_name, draft.draft_id).status == SkillReviewState.PUBLISHED.value
|
|
|
|
|
|
def test_pipeline_approve_requires_submitted_review(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="needs-review",
|
|
proposed_content="# Needs Review\n\nDo the thing.",
|
|
proposed_frontmatter={"description": "needs review"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
with pytest.raises(ValueError, match="in review before approval"):
|
|
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
|
|
|
|
|
|
def test_pipeline_does_not_resubmit_terminal_draft(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="already-published",
|
|
proposed_content="# Already Published\n\nDo the thing.",
|
|
proposed_frontmatter={"description": "already published"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
|
|
|
with pytest.raises(ValueError, match="draft status before review submission"):
|
|
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
|
|
|
|
def test_safety_recheck_keeps_submitted_candidate_in_review(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="reviewed-skill",
|
|
proposed_content="# Reviewed Skill\n\nDo the thing.",
|
|
proposed_frontmatter={"description": "reviewed"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
candidate = pipeline.get_candidate("candidate-1")
|
|
candidate.draft_skill_name = draft.skill_name
|
|
candidate.draft_id = draft.draft_id
|
|
pipeline.learning_store.record_learning_candidate(candidate)
|
|
|
|
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
|
|
assert pipeline.get_candidate("candidate-1").status == "review_pending"
|
|
|
|
|
|
def test_pipeline_reject_blocks_publish(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="blocked-skill",
|
|
proposed_content="# Blocked\n\nNo publish.",
|
|
proposed_frontmatter={"description": "blocked"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
pipeline.reject(draft.skill_name, draft.draft_id, reviewer="tester")
|
|
|
|
with pytest.raises(ValueError, match="Draft not found"):
|
|
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
|
assert pipeline.draft_service.get_draft(draft.skill_name, draft.draft_id) is None
|
|
|
|
|
|
def test_pipeline_reject_removes_draft_from_review_list(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="remove-skill",
|
|
proposed_content="# Remove\n\nNo longer needed.",
|
|
proposed_frontmatter={"description": "remove"},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
|
|
review = pipeline.reject(draft.skill_name, draft.draft_id, reviewer="tester")
|
|
|
|
assert review.status == SkillReviewState.REJECTED.value
|
|
assert pipeline.list_drafts() == []
|
|
|
|
|
|
def test_publish_blocks_low_confidence_replay_report(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="low-confidence",
|
|
proposed_content="# Low\n\nDo it.",
|
|
proposed_frontmatter={"description": "low", "tools": []},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
pipeline.learning_store.write_eval_report(
|
|
SkillDraftEvalReport(
|
|
report_id="eval-low",
|
|
skill_name=draft.skill_name,
|
|
draft_id=draft.draft_id,
|
|
candidate_id="candidate-1",
|
|
passed=True,
|
|
baseline_score_avg=0.7,
|
|
candidate_score_avg=0.9,
|
|
score_delta=0.2,
|
|
regression_count=0,
|
|
improved_count=1,
|
|
unchanged_count=0,
|
|
confidence="low",
|
|
mode="replay",
|
|
eval_version="replay-v1",
|
|
execution_coverage=0.0,
|
|
surrogate_coverage=1.0,
|
|
blocked_coverage=0.0,
|
|
)
|
|
)
|
|
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
|
|
with pytest.raises(ValueError, match="low confidence"):
|
|
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
|
|
|
|
|
def test_publish_blocks_failed_preservation_report(tmp_path: Path) -> None:
|
|
pipeline = _pipeline(tmp_path)
|
|
draft = pipeline.draft_service.create_new_skill_draft(
|
|
skill_name="dropped-section",
|
|
proposed_content="# Skill\n\n## Workflow\n\nDo it.",
|
|
proposed_frontmatter={"description": "dropped", "tools": []},
|
|
created_by="test",
|
|
reason="test",
|
|
)
|
|
pipeline.learning_store.write_eval_report(
|
|
SkillDraftEvalReport(
|
|
report_id="eval-preservation",
|
|
skill_name=draft.skill_name,
|
|
draft_id=draft.draft_id,
|
|
candidate_id="candidate-1",
|
|
passed=True,
|
|
baseline_score_avg=0.7,
|
|
candidate_score_avg=0.9,
|
|
score_delta=0.2,
|
|
regression_count=0,
|
|
improved_count=1,
|
|
unchanged_count=0,
|
|
confidence="medium",
|
|
mode="replay",
|
|
eval_version="replay-v1",
|
|
preservation_report={"passed": False, "risk_level": "high", "dropped_sections": ["Safety"]},
|
|
)
|
|
)
|
|
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
|
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
|
|
|
with pytest.raises(ValueError, match="preservation"):
|
|
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|