添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数, 并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。 fix(runtime): 修复团队节点运行成功判断逻辑 更新运行成功判断条件,将 finish_reason 为 "max_tool_iterations_finalized" 的情况 视为运行失败,并添加对原始工具调用输出的检测,避免将其误判为成功完成。 feat(mcp): 添加团队工作流MCP工具类别支持 增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能, 为团队工作流提供本地工具支持。 refactor(engine): 调整AgentLoop最大工具迭代次数设置 将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100, 同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。 perf(mcp): 优化MCP连接管理避免重复连接 添加 mcp_connected 标志来跟踪MCP连接状态,确保 connect_all 只执行一次, 提高性能并避免不必要的重复连接。 refactor(skills): 移除技能团队模板相关功能 移除与技能团队模板相关的代码,包括解析、存储和处理逻辑, 简化技能记录结构和加载流程。 feat(process): 增强会话过程投影器功能 添加技能激活快照事件处理,改进团队运行完成消息显示, 并增强技能激活事件的时间戳记录功能。 refactor(tasks): 简化任务尝试编排器团队执行逻辑 移除团队执行相关代码,将所有任务统一按单步执行处理, 简化任务编排器的复杂度并提升执行效率。 fix(evidence): 修复节点证据评估中需求验证逻辑 更新节点证据评估逻辑,跳过自然语言证据需求的确定性验证, 只执行机器可读的需求验证,避免因自然语言需求导致的节点失败。
583 lines
21 KiB
Python
583 lines
21 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
from beaver.engine.session import SessionManager
|
|
from beaver.memory.runs import RunMemoryStore, RunRecord
|
|
from beaver.services.process_service import SessionProcessProjector
|
|
from beaver.skills.specs import SkillActivationReceipt
|
|
|
|
|
|
def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
|
|
session = SessionManager(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
run_store.append_run_record(
|
|
RunRecord(
|
|
run_id="sub-run",
|
|
session_id="sub-session",
|
|
task_id="task-1",
|
|
attempt_index=1,
|
|
task_text="sub task",
|
|
started_at="2026-01-01T00:00:01+00:00",
|
|
ended_at="2026-01-01T00:00:02+00:00",
|
|
success=True,
|
|
finish_reason="stop",
|
|
)
|
|
)
|
|
run_store.append_run_record(
|
|
RunRecord(
|
|
run_id="main-run",
|
|
session_id="web:test",
|
|
task_id="task-1",
|
|
attempt_index=1,
|
|
task_text="main task",
|
|
started_at="2026-01-01T00:00:03+00:00",
|
|
ended_at="2026-01-01T00:00:04+00:00",
|
|
success=True,
|
|
finish_reason="stop",
|
|
)
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_execution_planned",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"plan_mode": "team",
|
|
"strategy": "sequence",
|
|
"node_ids": ["research"],
|
|
"skill_queries": ["research workflow"],
|
|
"selected_skill_names": ["research-workflow"],
|
|
"skill_resolution_report": [
|
|
{
|
|
"node_id": "research",
|
|
"skill_query": "research workflow",
|
|
"selected_skill_names": ["research-workflow"],
|
|
"ephemeral_guidance_id": None,
|
|
"ephemeral_guidance_name": None,
|
|
"ephemeral_used": False,
|
|
"reason": "matched published skill",
|
|
}
|
|
],
|
|
"reason": "needs research",
|
|
},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_team_run_completed",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"team_success": True,
|
|
"team_run_ids": ["sub-run"],
|
|
"node_results": [
|
|
{
|
|
"node_id": "research",
|
|
"success": True,
|
|
"output_text": "evidence",
|
|
"run_id": "sub-run",
|
|
"skill_query": "research workflow",
|
|
"selected_skill_names": ["research-workflow"],
|
|
"ephemeral_skill_names": [],
|
|
"ephemeral_guidance_id": None,
|
|
"ephemeral_guidance_name": None,
|
|
"ephemeral_used": False,
|
|
"finish_reason": "stop",
|
|
}
|
|
],
|
|
},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_synthesis_completed",
|
|
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
run_id="main-run",
|
|
role="system",
|
|
event_type="task_evidence_recorded",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"evidence_status": "recorded",
|
|
},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
run_id="main-run",
|
|
role="system",
|
|
event_type="task_acceptance_recorded",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"acceptance_type": "accept",
|
|
},
|
|
context_visible=False,
|
|
)
|
|
|
|
projection = SessionProcessProjector(session, run_store).project("web:test")
|
|
|
|
run_ids = {run["run_id"] for run in projection["runs"]}
|
|
assert "task:task-1:attempt:1" in run_ids
|
|
assert "sub-run" in run_ids
|
|
assert "main-run" in run_ids
|
|
sub_run = next(run for run in projection["runs"] if run["run_id"] == "sub-run")
|
|
assert sub_run["metadata"]["selected_skill_names"] == ["research-workflow"]
|
|
assert sub_run["metadata"]["skill_query"] == "research workflow"
|
|
assert sub_run["metadata"]["ephemeral_guidance_id"] is None
|
|
assert any(event["actor_name"] == "Evidence" for event in projection["events"])
|
|
assert any(run["session_id"] == "web:test" for run in projection["runs"])
|
|
|
|
planned_event = next(event for event in projection["events"] if event["kind"] == "task_planned")
|
|
assert planned_event["metadata"]["timeline_type"] == "plan"
|
|
assert planned_event["metadata"]["plan_mode"] == "team"
|
|
assert planned_event["metadata"]["strategy"] == "sequence"
|
|
assert planned_event["metadata"]["selected_skill_names"] == ["research-workflow"]
|
|
|
|
skill_event = next(event for event in projection["events"] if event["kind"] == "skill_selected")
|
|
assert skill_event["metadata"]["timeline_type"] == "skill"
|
|
assert skill_event["metadata"]["skill_names"] == ["research-workflow"]
|
|
|
|
team_event = next(event for event in projection["events"] if event["kind"] == "agent_team_created")
|
|
assert team_event["metadata"]["timeline_type"] == "agent_team"
|
|
assert team_event["metadata"]["team_run_ids"] == ["sub-run"]
|
|
|
|
node_event = next(event for event in projection["events"] if event["kind"] == "agent_finished")
|
|
assert node_event["metadata"]["timeline_type"] == "agent_progress"
|
|
assert "node_result" not in node_event["metadata"]
|
|
|
|
evidence_event = next(event for event in projection["events"] if event["kind"] == "task_result_ready")
|
|
assert evidence_event["metadata"]["timeline_type"] == "result"
|
|
assert evidence_event["status"] == "done"
|
|
|
|
acceptance_event = next(event for event in projection["events"] if event["kind"] == "task_acceptance_recorded")
|
|
assert acceptance_event["metadata"]["timeline_type"] == "acceptance"
|
|
|
|
|
|
def test_process_projection_maps_failed_task_team_events(tmp_path: Path) -> None:
|
|
session = SessionManager(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
run_store.append_run_record(
|
|
RunRecord(
|
|
run_id="failed-sub-run",
|
|
session_id="failed-sub-session",
|
|
task_id="task-1",
|
|
attempt_index=1,
|
|
task_text="failed sub task",
|
|
started_at="2026-01-01T00:00:01+00:00",
|
|
ended_at="2026-01-01T00:00:02+00:00",
|
|
success=False,
|
|
finish_reason="error",
|
|
)
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_team_run_failed",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"team_success": False,
|
|
"team_run_ids": ["failed-sub-run"],
|
|
"error": "research node failed",
|
|
"node_results": [
|
|
{
|
|
"node_id": "research",
|
|
"success": False,
|
|
"error": "source unavailable",
|
|
"run_id": "failed-sub-run",
|
|
"finish_reason": "error",
|
|
}
|
|
],
|
|
},
|
|
context_visible=False,
|
|
)
|
|
|
|
projection = SessionProcessProjector(session, run_store).project("web:test")
|
|
|
|
team_event = next(event for event in projection["events"] if event["kind"] == "agent_team_created")
|
|
assert team_event["status"] == "error"
|
|
assert team_event["text"] == "Team 执行未完成 / 子节点失败"
|
|
assert team_event["metadata"]["timeline_type"] == "agent_team"
|
|
assert team_event["metadata"]["team_run_ids"] == ["failed-sub-run"]
|
|
|
|
node_event = next(event for event in projection["events"] if event["kind"] == "agent_finished")
|
|
assert node_event["status"] == "error"
|
|
assert node_event["metadata"]["timeline_type"] == "agent_progress"
|
|
|
|
|
|
def test_process_projection_uses_normalized_plan_metadata_defaults(tmp_path: Path) -> None:
|
|
session = SessionManager(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_execution_planned",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"plan_mode": None,
|
|
"strategy": None,
|
|
},
|
|
context_visible=False,
|
|
)
|
|
|
|
projection = SessionProcessProjector(session, run_store).project("web:test")
|
|
|
|
root_run = next(run for run in projection["runs"] if run["run_id"] == "task:task-1:attempt:1")
|
|
assert root_run["metadata"]["plan_mode"] == "single"
|
|
assert root_run["metadata"]["strategy"] == "single"
|
|
planned_event = next(event for event in projection["events"] if event["kind"] == "task_planned")
|
|
assert planned_event["metadata"]["plan_mode"] == "single"
|
|
assert planned_event["metadata"]["strategy"] == "single"
|
|
|
|
|
|
def test_process_projection_emits_skill_card_from_main_run_receipts(tmp_path: Path) -> None:
|
|
session = SessionManager(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
run_store.append_run_record(
|
|
RunRecord(
|
|
run_id="main-run",
|
|
session_id="web:test",
|
|
task_id="task-1",
|
|
attempt_index=1,
|
|
task_text="main task",
|
|
started_at="2026-01-01T00:00:03+00:00",
|
|
ended_at="2026-01-01T00:00:04+00:00",
|
|
success=True,
|
|
finish_reason="stop",
|
|
activated_skills=[
|
|
SkillActivationReceipt(
|
|
run_id="main-run",
|
|
session_id="web:test",
|
|
skill_name="web-operation",
|
|
skill_version="1",
|
|
content_hash="hash",
|
|
activated_at="2026-01-01T00:00:03+00:00",
|
|
activation_reason="Needs live web lookup.",
|
|
)
|
|
],
|
|
)
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_execution_planned",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"plan_mode": "single",
|
|
"strategy": "single",
|
|
"selected_skill_names": [],
|
|
},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_synthesis_completed",
|
|
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
|
|
context_visible=False,
|
|
)
|
|
|
|
projection = SessionProcessProjector(session, run_store).project("web:test")
|
|
|
|
skill_events = [
|
|
event
|
|
for event in projection["events"]
|
|
if event["kind"] == "skill_selected" and event["run_id"] == "main-run"
|
|
]
|
|
assert skill_events
|
|
assert skill_events[0]["metadata"]["timeline_type"] == "skill"
|
|
assert skill_events[0]["metadata"]["skill_names"] == ["web-operation"]
|
|
assert skill_events[0]["created_at"] == "2026-01-01T00:00:03+00:00"
|
|
|
|
|
|
def test_process_projection_prefers_skill_activation_snapshot_over_synthesis_fallback(tmp_path: Path) -> None:
|
|
session = SessionManager(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
run_store.append_run_record(
|
|
RunRecord(
|
|
run_id="main-run",
|
|
session_id="web:test",
|
|
task_id="task-1",
|
|
attempt_index=1,
|
|
task_text="main task",
|
|
started_at="2026-01-01T00:00:03+00:00",
|
|
ended_at="2026-01-01T00:00:04+00:00",
|
|
success=True,
|
|
finish_reason="stop",
|
|
activated_skills=[
|
|
SkillActivationReceipt(
|
|
run_id="main-run",
|
|
session_id="web:test",
|
|
skill_name="web-operation",
|
|
skill_version="1",
|
|
content_hash="hash",
|
|
activated_at="2026-01-01T00:00:03+00:00",
|
|
activation_reason="Needs live web lookup.",
|
|
)
|
|
],
|
|
)
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_execution_planned",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"plan_mode": "single",
|
|
"strategy": "single",
|
|
"selected_skill_names": [],
|
|
},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
run_id="main-run",
|
|
role="system",
|
|
event_type="skill_activation_snapshotted",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"receipts": [
|
|
{
|
|
"run_id": "main-run",
|
|
"session_id": "web:test",
|
|
"skill_name": "web-operation",
|
|
"skill_version": "1",
|
|
"content_hash": "hash",
|
|
"activated_at": "2026-01-01T00:00:03+00:00",
|
|
"activation_reason": "Needs live web lookup.",
|
|
}
|
|
],
|
|
},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
run_id="main-run",
|
|
role="assistant",
|
|
event_type="assistant_message_added",
|
|
event_payload={"task_id": "task-1", "attempt_index": 1},
|
|
content="Searching",
|
|
tool_calls=[{"id": "call-1", "name": "web_fetch", "arguments": {"url": "https://example.com"}}],
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_synthesis_completed",
|
|
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
|
|
context_visible=False,
|
|
)
|
|
|
|
projection = SessionProcessProjector(session, run_store).project("web:test")
|
|
|
|
skill_events = [
|
|
event
|
|
for event in projection["events"]
|
|
if event["kind"] == "skill_selected" and event["run_id"] == "main-run"
|
|
]
|
|
assert len(skill_events) == 1
|
|
assert skill_events[0]["event_id"].endswith(":skill-activation")
|
|
assert skill_events[0]["created_at"] == "2026-01-01T00:00:03+00:00"
|
|
tool_event = next(event for event in projection["events"] if event["kind"] == "tool_call_started")
|
|
assert projection["events"].index(skill_events[0]) < projection["events"].index(tool_event)
|
|
|
|
|
|
def test_process_projection_emits_tool_cards_from_run_messages(tmp_path: Path) -> None:
|
|
session = SessionManager(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
run_store.append_run_record(
|
|
RunRecord(
|
|
run_id="main-run",
|
|
session_id="web:test",
|
|
task_id="task-1",
|
|
attempt_index=1,
|
|
task_text="main task",
|
|
started_at="2026-01-01T00:00:03+00:00",
|
|
ended_at="2026-01-01T00:00:04+00:00",
|
|
success=True,
|
|
finish_reason="stop",
|
|
)
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_execution_planned",
|
|
event_payload={"task_id": "task-1", "attempt_index": 1},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
run_id="main-run",
|
|
role="assistant",
|
|
event_type="assistant_message_added",
|
|
event_payload={"task_id": "task-1"},
|
|
content="Searching",
|
|
tool_calls=[
|
|
{
|
|
"id": "call-1",
|
|
"name": "multi_search",
|
|
"arguments": {"query": "Macau cafe near Bóvia"},
|
|
}
|
|
],
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
run_id="main-run",
|
|
role="tool",
|
|
event_type="tool_result_recorded",
|
|
event_payload={"success": True, "error": None},
|
|
content="Found 3 restaurants",
|
|
tool_name="multi_search",
|
|
tool_call_id="call-1",
|
|
context_visible=True,
|
|
)
|
|
|
|
projection = SessionProcessProjector(session, run_store).project("web:test")
|
|
|
|
tool_call = next(event for event in projection["events"] if event["kind"] == "tool_call_started")
|
|
assert tool_call["metadata"]["timeline_type"] == "tool_call"
|
|
assert tool_call["metadata"]["tool_name"] == "multi_search"
|
|
assert tool_call["run_id"] == "main-run"
|
|
|
|
tool_result = next(event for event in projection["events"] if event["kind"] == "tool_call_finished")
|
|
assert tool_result["metadata"]["timeline_type"] == "tool_result"
|
|
assert tool_result["metadata"]["tool_name"] == "multi_search"
|
|
assert tool_result["metadata"]["success"] is True
|
|
|
|
|
|
def test_process_projection_marks_root_done_when_result_is_ready(tmp_path: Path) -> None:
|
|
session = SessionManager(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
run_store.append_run_record(
|
|
RunRecord(
|
|
run_id="main-run",
|
|
session_id="web:test",
|
|
task_id="task-1",
|
|
attempt_index=1,
|
|
task_text="send email",
|
|
started_at="2026-01-01T00:00:03+00:00",
|
|
ended_at="2026-01-01T00:00:04+00:00",
|
|
success=True,
|
|
finish_reason="stop",
|
|
)
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_execution_planned",
|
|
event_payload={"task_id": "task-1", "attempt_index": 1, "plan_mode": "single", "strategy": "single"},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_synthesis_completed",
|
|
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
run_id="main-run",
|
|
role="system",
|
|
event_type="task_evidence_recorded",
|
|
event_payload={"task_id": "task-1", "attempt_index": 1, "evidence_status": "recorded"},
|
|
context_visible=False,
|
|
)
|
|
|
|
projection = SessionProcessProjector(session, run_store).project("web:test")
|
|
|
|
root_run = next(run for run in projection["runs"] if run["run_id"] == "task:task-1:attempt:1")
|
|
assert root_run["status"] == "done"
|
|
assert root_run["finished_at"] is not None
|
|
|
|
|
|
def test_process_projection_exposes_ephemeral_guidance_artifacts(tmp_path: Path) -> None:
|
|
session = SessionManager(tmp_path)
|
|
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
|
run_store.append_run_record(
|
|
RunRecord(
|
|
run_id="sub-run",
|
|
session_id="sub-session",
|
|
task_id="task-1",
|
|
attempt_index=1,
|
|
task_text="sub task",
|
|
started_at="2026-01-01T00:00:01+00:00",
|
|
ended_at="2026-01-01T00:00:02+00:00",
|
|
success=True,
|
|
finish_reason="stop",
|
|
)
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_execution_planned",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"plan_mode": "team",
|
|
"strategy": "sequence",
|
|
"node_ids": ["research"],
|
|
"ephemeral_guidance_ids": ["eg_123"],
|
|
"skill_resolution_report": [
|
|
{
|
|
"node_id": "research",
|
|
"skill_query": "research workflow",
|
|
"selected_skill_names": [],
|
|
"ephemeral_guidance_id": "eg_123",
|
|
"ephemeral_guidance_name": "research-workflow",
|
|
"ephemeral_used": True,
|
|
"reason": "generated ephemeral guidance",
|
|
}
|
|
],
|
|
},
|
|
context_visible=False,
|
|
)
|
|
session.append_message(
|
|
"web:test",
|
|
role="system",
|
|
event_type="task_team_run_completed",
|
|
event_payload={
|
|
"task_id": "task-1",
|
|
"attempt_index": 1,
|
|
"team_success": True,
|
|
"team_run_ids": ["sub-run"],
|
|
"node_results": [
|
|
{
|
|
"node_id": "research",
|
|
"success": True,
|
|
"output_text": "evidence",
|
|
"run_id": "sub-run",
|
|
"skill_query": "research workflow",
|
|
"selected_skill_names": [],
|
|
"ephemeral_skill_names": ["ephemeral:research-workflow"],
|
|
"ephemeral_guidance_id": "eg_123",
|
|
"ephemeral_guidance_name": "research-workflow",
|
|
"ephemeral_used": True,
|
|
"finish_reason": "stop",
|
|
}
|
|
],
|
|
},
|
|
context_visible=False,
|
|
)
|
|
|
|
projection = SessionProcessProjector(session, run_store).project("web:test")
|
|
|
|
sub_run = next(run for run in projection["runs"] if run["run_id"] == "sub-run")
|
|
assert sub_run["metadata"]["ephemeral_guidance_id"] == "eg_123"
|
|
assert projection["artifacts"][0]["artifact_id"] == "sub-run:ephemeral-guidance:eg_123"
|
|
assert projection["artifacts"][0]["metadata"]["ephemeral_guidance_name"] == "research-workflow"
|