```
feat(engine): 优化智能体循环中的助手消息处理逻辑 - 在没有工具调用时才添加助手消息到上下文 - 确保工具调用响应正确添加到消息上下文中 - 修复了消息构建的条件逻辑 fix(cron): 改进定时任务调度的时间解析功能 - 添加正则表达式导入用于时间显示解析 - 实现从显示文本中提取毫秒间隔的功能 - 增强整数转换的安全性,避免类型错误 - 优化定时任务配置的解析逻辑 feat(outlook): 增强Outlook集成的功能和稳定性 - 将默认超时时间从10秒增加到180秒 - 为状态检查函数添加可选的验证参数 - 串行执行邮件概览获取操作而非并行 - 改进连接状态验证逻辑 feat(channel): 添加设备名称作为会话标识的选项 - 为终端WebSocket适配器添加新的配置选项 - 实现基于设备名称生成会话对等ID的功能 - 记录原始对等ID和设备名称的元数据 - 支持从设备名称创建会话对等ID feat(skills): 完善技能学习评估系统和进度跟踪 - 在应用启动时自动调度待评估的技能草稿 - 为技能评估工作创建独立的循环工厂 - 实现异步技能评估任务的取消和清理机制 - 添加技能评估进度报告和状态跟踪功能 - 扩展会话列表API以包含更多详细信息 - 防止对不存在的会话进行操作 - 优化技能草稿提交和评估的业务逻辑 perf(skills): 提升技能评估的并发性能 - 实现并行技能案例评估以提高效率 - 添加最大并行案例数的环境变量控制 - 实现实时评估进度更新和回调机制 - 优化评估过程中的资源管理和同步 refactor(services): 创建隔离的智能体循环实例 - 添加创建独立智能体循环的工厂方法 - 确保新循环继承运行时服务配置 - 支持技能评估等需要隔离环境的场景 ```
This commit is contained in:
@ -201,6 +201,22 @@ class FakeReplayRunner:
|
||||
}
|
||||
|
||||
|
||||
class ConcurrentReplayRunner(FakeReplayRunner):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.active = 0
|
||||
self.max_active = 0
|
||||
|
||||
async def run_arm(self, request):
|
||||
self.active += 1
|
||||
self.max_active = max(self.max_active, self.active)
|
||||
await asyncio.sleep(0.02)
|
||||
try:
|
||||
return await super().run_arm(request)
|
||||
finally:
|
||||
self.active -= 1
|
||||
|
||||
|
||||
def test_eval_report_includes_replay_case_and_coverage(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
@ -238,6 +254,94 @@ def test_eval_report_includes_replay_case_and_coverage(tmp_path: Path) -> None:
|
||||
assert report.tool_execution_summary["score_role"] == "diagnostic_only"
|
||||
|
||||
|
||||
def test_replay_eval_reports_arm_progress(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
skill_name="release-checklist",
|
||||
proposed_content="# Release\n\nRun tests.",
|
||||
proposed_frontmatter={"description": "release", "tools": []},
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
pipeline.learning_store.update_learning_candidate(
|
||||
"candidate-1",
|
||||
draft_skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
)
|
||||
progress: list[dict] = []
|
||||
|
||||
asyncio.run(
|
||||
pipeline.evaluate_draft(
|
||||
"candidate-1",
|
||||
draft.skill_name,
|
||||
draft.draft_id,
|
||||
provider_bundle=_bundle(),
|
||||
replay_runner=FakeReplayRunner(),
|
||||
progress_callback=progress.append,
|
||||
)
|
||||
)
|
||||
|
||||
assert progress[0] == {
|
||||
"phase": "replaying",
|
||||
"completed_arms": 0,
|
||||
"total_arms": 20,
|
||||
"completed_cases": 0,
|
||||
"total_cases": 10,
|
||||
}
|
||||
assert progress[-1] == {
|
||||
"phase": "replaying",
|
||||
"completed_arms": 20,
|
||||
"total_arms": 20,
|
||||
"completed_cases": 10,
|
||||
"total_cases": 10,
|
||||
}
|
||||
|
||||
|
||||
def test_replay_eval_runs_cases_with_bounded_parallelism(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
pipeline.evaluator = SkillDraftEvaluator(
|
||||
pipeline.learning_service.run_store,
|
||||
max_parallel_cases=2,
|
||||
)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
skill_name="release-checklist",
|
||||
proposed_content="# Release\n\nRun tests.",
|
||||
proposed_frontmatter={"description": "release", "tools": []},
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
pipeline.learning_store.update_learning_candidate(
|
||||
"candidate-1",
|
||||
draft_skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
)
|
||||
replay_runner = ConcurrentReplayRunner()
|
||||
|
||||
report = asyncio.run(
|
||||
pipeline.evaluate_draft(
|
||||
"candidate-1",
|
||||
draft.skill_name,
|
||||
draft.draft_id,
|
||||
provider_bundle=_bundle(),
|
||||
replay_runner=replay_runner,
|
||||
)
|
||||
)
|
||||
|
||||
assert replay_runner.max_active == 2
|
||||
assert [case["run_id"] for case in report.cases] == [
|
||||
"run-1",
|
||||
"synthetic:candidate-1:01",
|
||||
"synthetic:candidate-1:02",
|
||||
"synthetic:candidate-1:03",
|
||||
"synthetic:candidate-1:04",
|
||||
"synthetic:candidate-1:05",
|
||||
"synthetic:candidate-1:06",
|
||||
"synthetic:candidate-1:07",
|
||||
"synthetic:candidate-1:08",
|
||||
"synthetic:candidate-1:09",
|
||||
]
|
||||
|
||||
|
||||
def test_replay_main_score_uses_validator_not_tool_success(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
pipeline.learning_store.update_learning_candidate(
|
||||
|
||||
Reference in New Issue
Block a user