feat(coordinator): 添加团队节点默认最大工具迭代次数配置

添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数,
并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。

fix(runtime): 修复团队节点运行成功判断逻辑

更新运行成功判断条件,将 finish_reason 为 "max_tool_iterations_finalized" 的情况
视为运行失败,并添加对原始工具调用输出的检测,避免将其误判为成功完成。

feat(mcp): 添加团队工作流MCP工具类别支持

增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能,
为团队工作流提供本地工具支持。

refactor(engine): 调整AgentLoop最大工具迭代次数设置

将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100,
同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。

perf(mcp): 优化MCP连接管理避免重复连接

添加 mcp_connected 标志来跟踪MCP连接状态,确保 connect_all 只执行一次,
提高性能并避免不必要的重复连接。

refactor(skills): 移除技能团队模板相关功能

移除与技能团队模板相关的代码,包括解析、存储和处理逻辑,
简化技能记录结构和加载流程。

feat(process): 增强会话过程投影器功能

添加技能激活快照事件处理,改进团队运行完成消息显示,
并增强技能激活事件的时间戳记录功能。

refactor(tasks): 简化任务尝试编排器团队执行逻辑

移除团队执行相关代码,将所有任务统一按单步执行处理,
简化任务编排器的复杂度并提升执行效率。

fix(evidence): 修复节点证据评估中需求验证逻辑

更新节点证据评估逻辑,跳过自然语言证据需求的确定性验证,
只执行机器可读的需求验证,避免因自然语言需求导致的节点失败。
This commit is contained in:
2026-06-26 16:36:29 +08:00
parent 53b13e8eac
commit 520a21a027
360 changed files with 13271 additions and 1848 deletions

View File

@ -1,10 +1,13 @@
import asyncio
import json
from contextlib import suppress
from types import SimpleNamespace
from typing import Any
from beaver.engine import AgentLoop, AgentRunResult, EngineLoader
from beaver.engine import loop as loop_module
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
def _run_result(run_id: str, output_text: str) -> AgentRunResult:
@ -49,7 +52,7 @@ def test_running_loop_handles_reentrant_submit_direct(tmp_path) -> None:
asyncio.run(run_case())
def test_web_search_loop_guard_stops_after_repeated_low_quality_results() -> None:
def test_web_search_loop_guard_keeps_successful_low_quality_results_available() -> None:
guard = loop_module._WebSearchLoopGuard()
low_quality = json.dumps(
{
@ -63,21 +66,106 @@ def test_web_search_loop_guard_stops_after_repeated_low_quality_results() -> Non
assert guard.observe_result("web_search", low_quality) is None
assert guard.observe_result("web_search", low_quality) is None
guidance = guard.observe_result("web_search", low_quality)
assert guidance is not None
assert guidance["finish_reason"] == "web_search_low_quality_budget"
assert "weather beijing" in guidance["message"]
assert guard.observe_result("web_search", low_quality) is None
def test_web_search_loop_guard_resets_after_useful_result() -> None:
guard = loop_module._WebSearchLoopGuard()
low_quality = json.dumps({"success": True, "query": "weather", "quality": "low", "results": []})
failed_search = json.dumps({"success": False, "query": "weather", "error": "timeout"})
useful = json.dumps({"success": True, "query": "weather", "quality": "high", "results": []})
assert guard.observe_result("web_search", low_quality) is None
assert guard.observe_result("web_search", failed_search) is None
assert guard.observe_result("web_search", useful) is None
assert guard.observe_result("web_search", low_quality) is None
assert guard.observe_result("web_search", low_quality) is None
assert guard.observe_result("web_search", failed_search) is None
assert guard.observe_result("web_search", failed_search) is None
assert guard.observe_result("web_search", low_quality) is not None
assert guard.observe_result("web_search", failed_search) is not None
class RecordingProvider(LLMProvider):
def __init__(self) -> None:
super().__init__()
self.tool_names_by_call: list[list[str]] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int | None = None,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.tool_names_by_call.append(
[
str(tool.get("function", {}).get("name") or tool.get("name"))
for tool in tools or []
]
)
return LLMResponse("done", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
def _bundle(provider: RecordingProvider) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
)
def test_task_mode_root_does_not_expose_agent_team_tool(tmp_path) -> None:
provider = RecordingProvider()
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
asyncio.run(
loop.process_direct(
"compare financial reports",
session_id="session",
task_id="task-1",
task_mode=True,
include_skill_assembly=False,
provider_bundle=_bundle(provider),
)
)
assert "run_agent_team" not in provider.tool_names_by_call[0]
def test_default_engine_registry_does_not_register_agent_team_tool(tmp_path) -> None:
loaded = AgentLoop(loader=EngineLoader(workspace=tmp_path)).boot()
assert loaded.tool_registry is not None
assert loaded.tool_registry.get("run_agent_team") is None
assert "run_agent_team" not in loaded.tools
def test_non_task_and_team_node_do_not_expose_agent_team_tool(tmp_path) -> None:
non_task_provider = RecordingProvider()
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
asyncio.run(
loop.process_direct(
"ordinary chat",
session_id="session",
include_skill_assembly=False,
provider_bundle=_bundle(non_task_provider),
)
)
team_node_provider = RecordingProvider()
asyncio.run(
loop.process_direct(
"team child",
session_id="session:team:child",
parent_session_id="session",
source="team:child",
task_id="task-1",
task_mode=True,
include_skill_assembly=False,
provider_bundle=_bundle(team_node_provider),
)
)
assert "run_agent_team" not in non_task_provider.tool_names_by_call[0]
assert "run_agent_team" not in team_node_provider.tool_names_by_call[0]

View File

@ -15,6 +15,7 @@ from beaver.engine import AgentLoop, EngineLoader
from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.engine.session.manager import SessionManager
from beaver.services.team_service import TeamService
from beaver.skills.assembler import SkillAssemblyResult
from beaver.skills.drafts import DraftService
@ -232,9 +233,9 @@ def test_unknown_evidence_requirement_makes_node_partial(tmp_path: Path) -> None
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is False
assert result.completion_status == "partial"
assert result.evidence_gaps == ["unsupported evidence requirement: unknown_type"]
assert result.success is True
assert result.completion_status == "succeeded"
assert result.evidence_gaps == []
def test_team_node_preserves_evidence_when_finish_reason_is_not_stop(tmp_path: Path) -> None:
@ -257,6 +258,90 @@ def test_team_node_preserves_evidence_when_finish_reason_is_not_stop(tmp_path: P
assert result.evidence.finish_reason == "max_tool_iterations"
def test_team_node_accepts_finalized_tool_budget_output(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("usable finalized output", finish_reason="max_tool_iterations_finalized")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is True
assert result.completion_status == "succeeded"
assert result.finish_reason == "max_tool_iterations_finalized"
def test_team_node_rejects_finalized_raw_tool_call_output(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider(
[
_response(
'<DSMLtool_calls><DSMLinvoke name="web_fetch"></DSMLinvoke></DSMLtool_calls>',
finish_reason="max_tool_iterations_finalized",
)
]
)
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is False
assert result.completion_status == "failed"
assert result.error == "finalized output is a raw tool call"
def test_team_node_defaults_to_larger_tool_iteration_budget(tmp_path: Path) -> None:
session_manager = SessionManager(tmp_path)
captured_kwargs: dict[str, object] = {}
class CapturingLoop:
profile = SimpleNamespace()
loader = None
is_running = False
async def process_direct(self, task: str, **kwargs: object) -> SimpleNamespace:
captured_kwargs.update(kwargs)
session_id = str(kwargs["session_id"])
run_id = "run-captured"
session_manager.ensure_session(session_id, source="test")
return SimpleNamespace(
session_id=session_id,
run_id=run_id,
output_text="done",
finish_reason="stop",
)
def boot(self) -> SimpleNamespace:
return SimpleNamespace(session_manager=session_manager)
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(CapturingLoop()).run(envelope))
assert result.success is True
assert captured_kwargs["max_tool_iterations"] == 100
def test_pinned_skill_is_injected_into_delegated_run(tmp_path: Path) -> None:
_publish_skill(
tmp_path,

View File

@ -323,6 +323,14 @@ def test_agent_defaults_include_runtime_controls(tmp_path) -> None:
service.close()
def test_agent_default_tool_iteration_budget_is_100(tmp_path) -> None:
service = AgentService(config_path=tmp_path / "config.json")
assert service.profile.max_tool_iterations == 100
service.close()
def test_agent_config_api_persists_and_reloads_defaults(tmp_path) -> None:
config_path = tmp_path / "config.json"
config_path.write_text(json.dumps({"agents": {"defaults": {}}}), encoding="utf-8")
@ -514,3 +522,16 @@ def test_load_config_adds_managed_local_mcp_servers(tmp_path) -> None:
assert local.managed is True
assert local.display_name == "个人智能体文件系统工具"
assert "beaver.interfaces.mcp.tools_server" in local.args
team_workflow = config.tools.mcp_servers["local_team_workflow_mcp"]
assert team_workflow.transport == "stdio"
assert team_workflow.kind == "local"
assert team_workflow.category == "team_workflow"
assert team_workflow.managed is True
assert team_workflow.display_name == "本地 Agent Team Workflow 工具"
assert team_workflow.args == [
"-m",
"beaver.interfaces.mcp.tools_server",
"--category",
"team_workflow",
]

View File

@ -0,0 +1,120 @@
from __future__ import annotations
import asyncio
from types import SimpleNamespace
from typing import Any
from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine import AgentRunResult
from beaver.tasks import TaskExecutionPlan, TaskRecord
from beaver.tasks.attempt_orchestrator import TaskAttemptOrchestrator
class FakeTaskService:
def start_run(self, task_id: str, **_: Any) -> None:
return None
def append_run(self, task_id: str, run_id: str, **_: Any) -> TaskRecord:
return self.task
class FakeSessionManager:
def __init__(self) -> None:
self.events: list[dict[str, Any]] = []
def append_message(self, session_id: str, **kwargs: Any) -> None:
self.events.append({"session_id": session_id, **kwargs})
def update_latest_assistant_event_payload(self, *args: Any, **kwargs: Any) -> None:
return None
def get_run_event_records(self, session_id: str, run_id: str) -> list[Any]:
return []
class LegacyTeamPlanner:
async def plan(self, **_: Any) -> TaskExecutionPlan:
return TaskExecutionPlan(
mode="team",
reason="legacy plan should be ignored by orchestrator",
graph=ExecutionGraph(
strategy="sequence",
nodes=[
ExecutionNode("collect", "Collect", AgentDescriptor(name="collect")),
],
),
)
def _task() -> TaskRecord:
return TaskRecord(
task_id="task-1",
session_id="session-1",
description="finance comparison",
goal="finance comparison",
constraints=[],
priority=0,
status="open",
creator="test",
created_at="now",
updated_at="now",
)
def test_builtin_tools_do_not_export_legacy_agent_team_tool() -> None:
import beaver.tools.builtins as builtins
assert "AgentTeamTool" not in builtins.__all__
assert not hasattr(builtins, "AgentTeamTool")
def test_task_orchestrator_does_not_execute_legacy_planner_team_graph() -> None:
task = _task()
task_service = FakeTaskService()
task_service.task = task
session_manager = FakeSessionManager()
loaded = SimpleNamespace(
task_service=task_service,
task_execution_planner=LegacyTeamPlanner(),
session_manager=session_manager,
run_memory_store=None,
)
orchestrator = TaskAttemptOrchestrator(
loaded=loaded,
create_loop=lambda: None,
make_provider_bundle_for_task=lambda *_: None,
)
async def fail_if_called(*args: Any, **kwargs: Any) -> Any:
raise AssertionError("legacy planner team graph must not start TeamService")
async def runner(message: str, **kwargs: Any) -> AgentRunResult:
return AgentRunResult(
session_id="session-1",
run_id="main-run",
output_text="single path",
finish_reason="stop",
tool_iterations=0,
)
orchestrator._run_team_for_task = fail_if_called # type: ignore[method-assign]
result = asyncio.run(
orchestrator.run(
message="compare finance",
runner=runner,
kwargs={
"session_id": "session-1",
"provider_bundle": SimpleNamespace(),
"include_skill_assembly": False,
},
task=task,
)
)
assert result.output_text == "single path"
synthesis_events = [
event
for event in session_manager.events
if event.get("event_type") == "task_synthesis_completed"
]
assert synthesis_events[0]["event_payload"]["task_outcome"] == "single"

View File

@ -20,3 +20,30 @@ def test_local_filesystem_mcp_exposes_personal_user_file_tools_only(tmp_path) ->
assert "search_files" not in names
assert "list_directory" not in names
assert all("personal agent file system" in tool.spec.description for tool in tools)
def test_team_workflow_mcp_exposes_workflow_tool_schemas(tmp_path) -> None:
tools, _context = _category_tools("team_workflow", tmp_path)
specs = {tool.spec.name: tool.spec for tool in tools}
assert list(specs) == [
"SequentialWorkflow",
"ConcurrentWorkflow",
"MixtureOfAgents",
"AgentRearrange",
"GraphWorkflow",
]
assert specs["SequentialWorkflow"].input_schema["required"] == ["task", "agents"]
assert specs["SequentialWorkflow"].input_schema["properties"]["agents"]["items"]["required"] == [
"name",
"instruction",
]
assert specs["GraphWorkflow"].input_schema["required"] == [
"task",
"agents",
"edges",
"output_agent",
]
assert specs["GraphWorkflow"].input_schema["properties"]["edges"]["items"]["minItems"] == 2
assert specs["AgentRearrange"].input_schema["required"] == ["task", "agents", "flow"]

View File

@ -205,6 +205,7 @@ def test_process_projection_maps_failed_task_team_events(tmp_path: Path) -> None
team_event = next(event for event in projection["events"] if event["kind"] == "agent_team_created")
assert team_event["status"] == "error"
assert team_event["text"] == "Team 执行未完成 / 子节点失败"
assert team_event["metadata"]["timeline_type"] == "agent_team"
assert team_event["metadata"]["team_run_ids"] == ["failed-sub-run"]
@ -297,6 +298,101 @@ def test_process_projection_emits_skill_card_from_main_run_receipts(tmp_path: Pa
assert skill_events
assert skill_events[0]["metadata"]["timeline_type"] == "skill"
assert skill_events[0]["metadata"]["skill_names"] == ["web-operation"]
assert skill_events[0]["created_at"] == "2026-01-01T00:00:03+00:00"
def test_process_projection_prefers_skill_activation_snapshot_over_synthesis_fallback(tmp_path: Path) -> None:
session = SessionManager(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
run_store.append_run_record(
RunRecord(
run_id="main-run",
session_id="web:test",
task_id="task-1",
attempt_index=1,
task_text="main task",
started_at="2026-01-01T00:00:03+00:00",
ended_at="2026-01-01T00:00:04+00:00",
success=True,
finish_reason="stop",
activated_skills=[
SkillActivationReceipt(
run_id="main-run",
session_id="web:test",
skill_name="web-operation",
skill_version="1",
content_hash="hash",
activated_at="2026-01-01T00:00:03+00:00",
activation_reason="Needs live web lookup.",
)
],
)
)
session.append_message(
"web:test",
role="system",
event_type="task_execution_planned",
event_payload={
"task_id": "task-1",
"attempt_index": 1,
"plan_mode": "single",
"strategy": "single",
"selected_skill_names": [],
},
context_visible=False,
)
session.append_message(
"web:test",
run_id="main-run",
role="system",
event_type="skill_activation_snapshotted",
event_payload={
"task_id": "task-1",
"attempt_index": 1,
"receipts": [
{
"run_id": "main-run",
"session_id": "web:test",
"skill_name": "web-operation",
"skill_version": "1",
"content_hash": "hash",
"activated_at": "2026-01-01T00:00:03+00:00",
"activation_reason": "Needs live web lookup.",
}
],
},
context_visible=False,
)
session.append_message(
"web:test",
run_id="main-run",
role="assistant",
event_type="assistant_message_added",
event_payload={"task_id": "task-1", "attempt_index": 1},
content="Searching",
tool_calls=[{"id": "call-1", "name": "web_fetch", "arguments": {"url": "https://example.com"}}],
context_visible=False,
)
session.append_message(
"web:test",
role="system",
event_type="task_synthesis_completed",
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
context_visible=False,
)
projection = SessionProcessProjector(session, run_store).project("web:test")
skill_events = [
event
for event in projection["events"]
if event["kind"] == "skill_selected" and event["run_id"] == "main-run"
]
assert len(skill_events) == 1
assert skill_events[0]["event_id"].endswith(":skill-activation")
assert skill_events[0]["created_at"] == "2026-01-01T00:00:03+00:00"
tool_event = next(event for event in projection["events"] if event["kind"] == "tool_call_started")
assert projection["events"].index(skill_events[0]) < projection["events"].index(tool_event)
def test_process_projection_emits_tool_cards_from_run_messages(tmp_path: Path) -> None:

View File

@ -2,38 +2,9 @@ from __future__ import annotations
from beaver.skills.assembler.task_assembler import SkillAssembler
from beaver.skills.catalog.loader import SkillsLoader
from beaver.skills.catalog.utils import extract_skill_team_template
def test_extract_team_template_returns_none_when_block_is_absent() -> None:
result = extract_skill_team_template("# Ordinary Skill")
assert result.template is None
assert result.warnings == []
def test_extract_team_template_parses_valid_json_block() -> None:
result = extract_skill_team_template(
"```beaver-team-template\n"
'{"version": 1, "nodes": [{"node_id": "collect", "task": "Collect"}]}\n'
"```"
)
assert result.template == {
"version": 1,
"nodes": [{"node_id": "collect", "task": "Collect"}],
}
assert result.warnings == []
def test_invalid_template_is_warning_not_skill_load_failure() -> None:
result = extract_skill_team_template("```beaver-team-template\nnot-json\n```")
assert result.template is None
assert result.warnings == ["team template JSON is invalid"]
def test_loader_and_assembler_propagate_team_template_to_skill_context(tmp_path) -> None:
def test_beaver_team_template_block_is_not_runtime_metadata(tmp_path) -> None:
skill_dir = tmp_path / "plugin-skills" / "financial-comparison"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
@ -56,10 +27,7 @@ def test_loader_and_assembler_propagate_team_template_to_skill_context(tmp_path)
context = SkillAssembler(loader)._activate_skill_contexts(["financial-comparison"])[0]
assert record is not None
assert record.team_template == {
"version": 1,
"nodes": [{"node_id": "collect", "task": "Collect official sources"}],
}
assert record.team_template_warnings == []
assert context.team_template == record.team_template
assert context.team_template_warnings == []
assert not hasattr(record, "team_template")
assert not hasattr(record, "team_template_warnings")
assert not hasattr(context, "team_template")
assert not hasattr(context, "team_template_warnings")

View File

@ -55,12 +55,11 @@ def test_evaluate_node_evidence_accepts_url_in_successful_tool_content() -> None
assert evaluate_node_evidence(evidence, ["tool_result", "url"], "done") == []
def test_evaluate_node_evidence_checks_output_and_unknown_requirements() -> None:
def test_evaluate_node_evidence_checks_output_and_ignores_natural_language_requirements() -> None:
evidence = _run_evidence()
assert evaluate_node_evidence(evidence, ["output", "unknown_type"], " ") == [
assert evaluate_node_evidence(evidence, ["output", "至少3个价格信息来源"], " ") == [
"missing required evidence: output",
"unsupported evidence requirement: unknown_type",
]

View File

@ -3,19 +3,15 @@ from __future__ import annotations
import asyncio
from types import SimpleNamespace
from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.tasks import SkillResolutionReport, TaskExecutionPlanner, TaskRecord
from beaver.tools.base import BaseTool, ToolContext, ToolResult, ToolSpec
from beaver.tools.registry import ToolRegistry
from beaver.tasks import TaskExecutionPlanner, TaskRecord
class PlannerProvider(LLMProvider):
def __init__(self, response: str) -> None:
def __init__(self) -> None:
super().__init__()
self.response = response
self.calls: list[dict] = []
self.calls = 0
async def chat(
self,
@ -25,59 +21,18 @@ class PlannerProvider(LLMProvider):
max_tokens: int = 4096,
temperature: float = 0.7,
) -> LLMResponse:
self.calls.append(
{
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"model": model,
"tools": tools,
}
self.calls += 1
return LLMResponse(
content='{"mode":"team"}',
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
return LLMResponse(content=self.response, finish_reason="stop", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
class HangingPlannerProvider(LLMProvider):
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
) -> LLMResponse:
await asyncio.sleep(10)
return LLMResponse(content='{"mode":"team"}', finish_reason="stop", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
class SequencedPlannerProvider(PlannerProvider):
def __init__(self, responses: list[str]) -> None:
super().__init__(responses[0])
self.responses = list(responses)
async def chat(self, *args, **kwargs) -> LLMResponse:
self.response = self.responses.pop(0)
return await super().chat(*args, **kwargs)
class StubTool(BaseTool):
def __init__(self, name: str) -> None:
self._spec = ToolSpec(name=name, description=name, input_schema={"type": "object"})
@property
def spec(self) -> ToolSpec:
return self._spec
async def invoke(self, arguments: dict, context: ToolContext) -> ToolResult:
raise AssertionError("Planner tests do not execute tools")
def _task() -> TaskRecord:
return TaskRecord(
task_id="task-1",
@ -93,55 +48,15 @@ def _task() -> TaskRecord:
)
def _bundle(response: str) -> ProviderBundle:
provider = PlannerProvider(response)
def _bundle(provider: PlannerProvider) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
)
def _bundle_with_provider(provider: LLMProvider) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
)
def _registry() -> ToolRegistry:
registry = ToolRegistry()
registry.register_many([StubTool("web_search"), StubTool("web_fetch"), StubTool("terminal")])
return registry
def _hanging_bundle() -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=HangingPlannerProvider(),
)
def test_planner_selects_single_mode() -> None:
plan = asyncio.run(
TaskExecutionPlanner().plan(
task=_task(),
user_message="implement workflow",
attempt_index=1,
provider_bundle=_bundle('{"mode":"single","reason":"main agent is enough"}'),
)
)
assert plan.mode == "single"
assert plan.graph is None
assert plan.reason == "main agent is enough"
def test_planner_skips_llm_for_simple_task() -> None:
provider = PlannerProvider('{"mode":"team","reason":"should not be used"}')
bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
)
def test_planner_skips_provider_for_simple_task() -> None:
provider = PlannerProvider()
task = _task()
task.description = "查询深圳天气"
task.goal = "查询深圳天气"
@ -151,409 +66,55 @@ def test_planner_skips_llm_for_simple_task() -> None:
task=task,
user_message="帮我查一下今天深圳天气",
attempt_index=1,
provider_bundle=bundle,
provider_bundle=_bundle(provider),
)
)
assert plan.mode == "single"
assert plan.graph is None
assert plan.reason == "planner_skipped_simple_task"
assert provider.calls == []
assert provider.calls == 0
def test_planner_builds_team_graph() -> None:
bundle = _bundle(
"""
{
"mode": "team",
"reason": "needs parallel review",
"strategy": "dag",
"nodes": [
{"node_id": "research", "task": "research options"},
{"node_id": "review", "task": "review result", "depends_on": ["research"]}
],
"final_synthesis_instruction": "merge the findings"
}
"""
)
provider = bundle.main_provider
def test_planner_replaces_team_planning_with_workflow_tools_without_provider_call() -> None:
provider = PlannerProvider()
plan = asyncio.run(
TaskExecutionPlanner().plan(
task=_task(),
user_message="implement workflow",
user_message="research and compare workflow options",
attempt_index=1,
provider_bundle=bundle,
provider_bundle=_bundle(provider),
skill_summaries=["docker-debug: Use docker logs before editing config."],
tool_hints=["terminal", "search_files"],
)
)
assert plan.is_team
assert plan.graph is not None
assert plan.graph.strategy == "dag"
assert [node.node_id for node in plan.graph.nodes] == ["research", "review"]
assert plan.graph.nodes[1].depends_on == ["research"]
assert plan.final_synthesis_instruction == "merge the findings"
assert isinstance(provider, PlannerProvider)
prompt = provider.calls[0]["messages"][1]["content"]
assert "Activated skill summaries" in prompt
assert "docker-debug: Use docker logs before editing config." in prompt
assert "terminal" in prompt
assert "search_files" in prompt
assert not plan.is_team
assert plan.mode == "single"
assert plan.graph is None
assert plan.reason == "planner_team_replaced_by_workflow_tools"
assert plan.final_synthesis_instruction == ""
assert provider.calls == 0
def test_planner_timeout_falls_back_to_single() -> None:
def test_planner_can_be_disabled_by_environment(monkeypatch) -> None:
monkeypatch.setenv("BEAVER_AGENT_TEAM_ENABLED", "0")
provider = PlannerProvider()
plan = asyncio.run(
TaskExecutionPlanner().plan(
task=_task(),
user_message="implement workflow",
user_message="research and compare workflow options",
attempt_index=1,
provider_bundle=_hanging_bundle(),
timeout_seconds=0.01,
provider_bundle=_bundle(provider),
)
)
assert plan.mode == "single"
assert plan.reason == "planner_failed"
assert "TimeoutError" in (plan.fallback_error or "")
assert plan.reason == "planner_disabled_by_environment"
assert provider.calls == 0
def test_planner_team_nodes_use_task_as_internal_skill_query() -> None:
plan = TaskExecutionPlanner().from_json(
"""
{
"mode": "team",
"reason": "needs skill-guided review",
"strategy": "sequence",
"nodes": [
{
"node_id": "api_review",
"task": "review API compatibility"
}
]
}
"""
)
assert plan.is_team
assert plan.graph is not None
node = plan.graph.nodes[0]
assert node.agent.name == "api_review"
assert node.agent.role == ""
assert node.agent.metadata["skill_query"] == "review API compatibility"
assert node.agent.metadata["required_capabilities"] == []
def test_planner_accepts_use_skill_and_skill_query() -> None:
plan = TaskExecutionPlanner().from_json(
"""
{
"mode": "team",
"strategy": "sequence",
"nodes": [
{
"node_id": "collect",
"task": "Collect official sources",
"use_skill": "official-source-research",
"skill_query": "official source verification"
}
]
}
"""
)
assert plan.is_team
assert plan.graph is not None
node = plan.graph.nodes[0]
assert node.agent.metadata["use_skill"] == "official-source-research"
assert node.agent.metadata["skill_query"] == "official source verification"
assert node.inherited_pinned_skills == []
assert node.allowed_tool_names is None
assert plan.planner_adaptation["node_skill_bindings"] == [
{
"node_id": "collect",
"use_skill": "official-source-research",
"skill_query": "official source verification",
}
]
def test_planner_defaults_skill_query_to_node_task_when_absent() -> None:
plan = TaskExecutionPlanner().from_json(
'{"mode":"team","strategy":"sequence","nodes":['
'{"node_id":"extract","task":"Extract financial metrics","use_skill":"financial-extraction"}]}'
)
assert plan.is_team
assert plan.graph is not None
assert plan.graph.nodes[0].agent.metadata["skill_query"] == "Extract financial metrics"
def test_planner_adaptation_records_unresolved_use_skill_fallback() -> None:
planner = TaskExecutionPlanner()
plan = planner.from_json(
'{"mode":"team","strategy":"sequence","nodes":['
'{"node_id":"extract","task":"Extract metrics","use_skill":"missing-skill",'
'"skill_query":"financial extraction"}]}'
)
report = SkillResolutionReport(
node_id="extract",
skill_query="financial extraction",
requested_skill_name="missing-skill",
exact_binding_used=False,
warnings=["use_skill unresolved: missing-skill"],
reason="matched published skill",
)
planner._merge_skill_resolution_adaptation(plan, [report])
assert plan.planner_adaptation["warnings"] == ["use_skill unresolved: missing-skill"]
assert plan.planner_adaptation["node_skill_bindings"][0]["fallback_reason"] == (
"use_skill unresolved; matched published skill"
)
def test_planner_invalid_outputs_fallback_to_single() -> None:
planner = TaskExecutionPlanner()
invalid_json = planner.from_json("not json")
unknown_strategy = planner.from_json(
'{"mode":"team","strategy":"moa","nodes":[{"node_id":"a","task":"a","agent":{"name":"a"}}]}'
)
too_many_nodes = planner.from_json(
'{"mode":"team","strategy":"parallel","nodes":['
+ ",".join(
'{"node_id":"n%s","task":"work","agent":{"name":"n%s"}}' % (index, index)
for index in range(7)
)
+ "]}"
)
cyclic = planner.from_json(
"""
{
"mode": "team",
"strategy": "dag",
"nodes": [
{"node_id": "a", "task": "a", "agent": {"name": "a"}, "depends_on": ["b"]},
{"node_id": "b", "task": "b", "agent": {"name": "b"}, "depends_on": ["a"]}
]
}
"""
)
assert invalid_json.mode == "single"
assert unknown_strategy.mode == "single"
assert too_many_nodes.mode == "single"
assert cyclic.mode == "single"
def test_template_plan_builds_generic_worker_and_preserves_v1_contract_fields() -> None:
plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
"""
{
"mode": "team",
"strategy": "dag",
"nodes": [
{
"node_id": "collect",
"task": "Collect official sources",
"requested_tools": ["web_search"],
"evidence_contract": {"entities": ["MGM", "Galaxy"]},
"block_downstream_on_partial": true
}
],
"adaptation": {"template_used": true}
}
"""
)
assert plan.is_team
assert plan.graph is not None
node = plan.graph.nodes[0]
assert node.agent.name == "collect"
assert node.agent.role == ""
assert node.agent.metadata["sub_agent_kind"] == "generic_skill_worker"
assert node.allowed_tool_names == ["web_search"]
assert node.evidence_contract == {"entities": ["MGM", "Galaxy"]}
assert node.block_downstream_on_partial is True
assert plan.planner_adaptation["template_used"] is True
def test_unknown_tool_is_removed_and_warned() -> None:
plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
'{"mode":"team","strategy":"sequence","nodes":['
'{"node_id":"collect","task":"Collect","requested_tools":["web_search","not_real"]}]}'
)
assert plan.is_team
assert plan.graph is not None
assert plan.graph.nodes[0].allowed_tool_names == ["web_search"]
assert "unknown tool removed: not_real" in plan.planner_adaptation["warnings"]
def test_high_risk_tool_is_removed_without_failing_low_risk_plan() -> None:
plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
'{"mode":"team","strategy":"sequence","nodes":['
'{"node_id":"collect","task":"Collect","requested_tools":["web_search","terminal"]}]}'
)
assert plan.is_team
assert plan.graph is not None
assert plan.graph.nodes[0].allowed_tool_names == ["web_search"]
assert "requires_high_risk_review: terminal" in plan.planner_adaptation["warnings"]
def test_planner_rejects_agent_and_role_node_fields() -> None:
planner = TaskExecutionPlanner(tool_registry=_registry())
agent_plan = planner.from_json(
'{"mode":"team","strategy":"sequence","nodes":['
'{"node_id":"collect","task":"Collect","agent":{"name":"researcher"}}]}'
)
role_plan = planner.from_json(
'{"mode":"team","strategy":"sequence","nodes":['
'{"node_id":"collect","task":"Collect","role":"researcher"}]}'
)
assert agent_plan.mode == "single"
assert "agent" in (agent_plan.fallback_error or "")
assert role_plan.mode == "single"
assert "role" in (role_plan.fallback_error or "")
def test_planner_records_primary_template_selection_and_ignored_templates() -> None:
primary = SkillContext(
name="financial-comparison",
version="v1",
content="Compare official financial disclosures.",
team_template={"version": 1, "nodes": [{"node_id": "collect", "task": "Collect"}]},
)
secondary = SkillContext(
name="chart-reporting",
version="v2",
content="Render chart-ready Markdown.",
team_template={"version": 1, "nodes": [{"node_id": "report", "task": "Report"}]},
)
provider = PlannerProvider(
'{"mode":"team","strategy":"sequence","nodes":['
'{"node_id":"collect","task":"Collect official sources"}],'
'"adaptation":{"template_used":true}}'
)
plan = asyncio.run(
TaskExecutionPlanner(tool_registry=_registry()).plan(
task=_task(),
user_message="compare financial workflow",
attempt_index=1,
provider_bundle=_bundle_with_provider(provider),
activated_skills=[primary, secondary],
)
)
assert plan.planner_adaptation == {
"template_used": True,
"selected_template": "financial-comparison",
"selection_reason": "first activated skill with a valid team template",
"ignored_templates": ["chart-reporting"],
"warnings": [],
}
prompt = provider.calls[0]["messages"][1]["content"]
assert '"skill_name": "financial-comparison"' in prompt
assert "Compare official financial disclosures." in prompt
assert "Render chart-ready Markdown." in prompt
def test_malformed_planner_output_repairs_once_without_tools() -> None:
provider = SequencedPlannerProvider(
[
"not json",
'{"mode":"team","strategy":"sequence","nodes":[{"node_id":"collect","task":"Collect"}]}',
]
)
plan = asyncio.run(
TaskExecutionPlanner(tool_registry=_registry()).plan(
task=_task(),
user_message="implement workflow",
attempt_index=1,
provider_bundle=_bundle_with_provider(provider),
)
)
assert plan.is_team
assert len(provider.calls) == 2
assert provider.calls[1]["tools"] is None
assert "Repair the invalid planner JSON" in provider.calls[1]["messages"][1]["content"]
def test_failed_planner_repair_falls_back_to_single() -> None:
provider = SequencedPlannerProvider(["not json", "still not json"])
plan = asyncio.run(
TaskExecutionPlanner(tool_registry=_registry()).plan(
task=_task(),
user_message="implement workflow",
attempt_index=1,
provider_bundle=_bundle_with_provider(provider),
)
)
assert plan.mode == "single"
assert plan.reason == "planner_fallback_single"
assert len(provider.calls) == 2
def test_finance_template_adapts_to_task_oriented_read_only_graph() -> None:
plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
"""
{
"mode": "team",
"strategy": "dag",
"nodes": [
{
"node_id": "collect_official_sources",
"task": "Collect MGM and Galaxy official financial disclosures",
"requested_tools": ["web_search", "web_fetch"],
"required_evidence": ["tool_result", "url"]
},
{
"node_id": "extract_financial_metrics",
"task": "Extract comparable financial metrics from collected sources",
"depends_on": ["collect_official_sources"],
"requested_tools": ["web_fetch"],
"required_evidence": ["output"]
},
{
"node_id": "validate_metrics",
"task": "Validate metric units, periods, and source consistency",
"depends_on": ["extract_financial_metrics"],
"required_evidence": ["output"]
},
{
"node_id": "generate_chart_report",
"task": "Generate a Markdown comparison table and chart-ready data without claiming an image or file artifact",
"depends_on": ["validate_metrics"],
"requested_tools": [],
"required_evidence": ["output"]
}
]
}
"""
)
assert plan.is_team
assert plan.graph is not None
assert [node.node_id for node in plan.graph.nodes] == [
"collect_official_sources",
"extract_financial_metrics",
"validate_metrics",
"generate_chart_report",
]
assert all(node.agent.role == "" for node in plan.graph.nodes)
assert not {"researcher", "writer", "reviewer", "analyst"}.intersection(
node.node_id for node in plan.graph.nodes
)
assert plan.graph.nodes[0].allowed_tool_names == ["web_search", "web_fetch"]
assert plan.graph.nodes[-1].allowed_tool_names == []
report_task = plan.graph.nodes[-1].task.lower()
assert "markdown" in report_task
assert "without claiming an image or file artifact" in report_task
def test_planner_no_longer_exposes_json_to_team_graph_parser() -> None:
assert not hasattr(TaskExecutionPlanner(), "from_json")

View File

@ -1,233 +0,0 @@
from __future__ import annotations
import asyncio
from types import SimpleNamespace
from typing import Any
import pytest
from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode, NodeRunResult, TeamRunResult
from beaver.engine import AgentRunResult
from beaver.tasks import TaskExecutionPlan, TaskRecord
from beaver.tasks.attempt_orchestrator import TaskAttemptOrchestrator
def _plan(*, optional_second: bool = False) -> TaskExecutionPlan:
return TaskExecutionPlan(
mode="team",
reason="test team",
graph=ExecutionGraph(
strategy="sequence",
nodes=[
ExecutionNode("collect", "Collect", AgentDescriptor(name="collect")),
ExecutionNode(
"report",
"Report",
AgentDescriptor(name="report"),
required_for_completion=not optional_second,
),
],
),
)
def _team_result(*results: NodeRunResult) -> TeamRunResult:
return TeamRunResult(
success=all(result.success for result in results),
summary="team summary",
node_results=list(results),
)
def _result(node_id: str, status: str, *, gaps: list[str] | None = None) -> NodeRunResult:
return NodeRunResult(
node_id=node_id,
success=status == "succeeded",
output_text=f"{node_id} output",
finish_reason="blocked" if status == "blocked" else "stop",
error=None if status == "succeeded" else f"{status} node",
completion_status=status,
evidence_gaps=list(gaps or []),
)
def test_required_partial_node_marks_synthesis_incomplete() -> None:
context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
_plan(),
_team_result(
_result("collect", "partial", gaps=["missing required evidence: url"]),
_result("report", "succeeded"),
),
)
assert metadata["task_outcome"] == "incomplete"
assert metadata["incomplete_node_ids"] == ["collect"]
assert metadata["evidence_gaps"] == {"collect": ["missing required evidence: url"]}
assert "Task outcome: incomplete" in context
assert "missing required evidence: url" in context
assert prefix.startswith("任务未完成:")
@pytest.mark.parametrize("status", ["failed", "blocked"])
def test_required_failed_or_blocked_node_marks_synthesis_incomplete(status: str) -> None:
_, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
_plan(),
_team_result(_result("collect", status), _result("report", "succeeded")),
)
assert metadata["task_outcome"] == "incomplete"
assert metadata["incomplete_node_ids"] == ["collect"]
assert metadata["node_statuses"]["collect"] == status
assert prefix
def test_optional_failed_node_does_not_force_incomplete() -> None:
context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
_plan(optional_second=True),
_team_result(_result("collect", "succeeded"), _result("report", "failed")),
)
assert metadata["task_outcome"] == "complete"
assert metadata["incomplete_node_ids"] == []
assert "Task outcome: complete" in context
assert prefix == ""
def test_all_required_nodes_succeeded_is_complete() -> None:
_, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
_plan(),
_team_result(_result("collect", "succeeded"), _result("report", "succeeded")),
)
assert metadata["task_outcome"] == "complete"
assert prefix == ""
def test_single_plan_outcome_does_not_add_prefix() -> None:
context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
TaskExecutionPlan.single("single"),
None,
)
assert metadata["task_outcome"] == "single"
assert "Task outcome: single" in context
assert prefix == ""
class FakeTaskService:
def start_run(self, task_id: str, **_: Any) -> None:
return None
def append_run(self, task_id: str, run_id: str, **_: Any) -> TaskRecord:
return self.task
class FakeSessionManager:
def __init__(self) -> None:
self.events: list[dict[str, Any]] = []
def append_message(self, session_id: str, **kwargs: Any) -> None:
self.events.append({"session_id": session_id, **kwargs})
def update_latest_assistant_event_payload(self, *args: Any, **kwargs: Any) -> None:
return None
def get_run_event_records(self, session_id: str, run_id: str) -> list[Any]:
return []
class FixedPlanner:
def __init__(self, plan: TaskExecutionPlan) -> None:
self.fixed_plan = plan
async def plan(self, **_: Any) -> TaskExecutionPlan:
return self.fixed_plan
def _task() -> TaskRecord:
return TaskRecord(
task_id="task-1",
session_id="session-1",
description="finance comparison",
goal="finance comparison",
constraints=[],
priority=0,
status="open",
creator="test",
created_at="now",
updated_at="now",
)
def test_incomplete_team_still_runs_tool_free_synthesis_and_prefixes_output() -> None:
plan = _plan()
team_result = _team_result(
_result("collect", "partial", gaps=["missing required evidence: url"]),
_result("report", "succeeded"),
)
task = _task()
task_service = FakeTaskService()
task_service.task = task
session_manager = FakeSessionManager()
loaded = SimpleNamespace(
task_service=task_service,
task_execution_planner=FixedPlanner(plan),
session_manager=session_manager,
run_memory_store=None,
)
orchestrator = TaskAttemptOrchestrator(
loaded=loaded,
create_loop=lambda: None,
make_provider_bundle_for_task=lambda *_: None,
)
async def fake_run_team(*args: Any, **kwargs: Any) -> tuple[TeamRunResult, None]:
return team_result, None
runner_calls: list[dict[str, Any]] = []
async def runner(message: str, **kwargs: Any) -> AgentRunResult:
runner_calls.append(kwargs)
return AgentRunResult(
session_id="session-1",
run_id="main-run",
output_text="Available financial comparison.",
finish_reason="stop",
tool_iterations=0,
)
orchestrator._run_team_for_task = fake_run_team # type: ignore[method-assign]
result = asyncio.run(
orchestrator.run(
message="compare finance",
runner=runner,
kwargs={
"session_id": "session-1",
"provider_bundle": SimpleNamespace(),
"include_skill_assembly": False,
},
task=task,
)
)
assert len(runner_calls) == 1
assert runner_calls[0]["include_tools"] is False
assert runner_calls[0]["max_tool_iterations"] == 0
assert "Task outcome: incomplete" in runner_calls[0]["execution_context"]
assert result.output_text.startswith("任务未完成:")
synthesis_event = [event for event in session_manager.events if event.get("event_type") == "task_synthesis_completed"][0]
assert synthesis_event["event_payload"]["task_outcome"] == "incomplete"
assert synthesis_event["event_payload"]["incomplete_node_ids"] == ["collect"]
assert synthesis_event["event_payload"]["node_statuses"] == {
"collect": "partial",
"report": "succeeded",
}
assert synthesis_event["event_payload"]["evidence_gaps"] == {
"collect": ["missing required evidence: url"]
}
def test_incomplete_notice_is_not_prefixed_twice() -> None:
text = "任务未完成:缺少官方来源。"
assert TaskAttemptOrchestrator._apply_incomplete_prefix(text, "任务未完成:部分步骤缺少证据。\n\n") == text

View File

@ -0,0 +1,214 @@
from __future__ import annotations
import pytest
from beaver.team_workflows.agent_rearrange import build_graph as build_rearrange_graph
from beaver.team_workflows.concurrent import build_graph as build_concurrent_graph
from beaver.team_workflows.graph import build_graph as build_explicit_graph
from beaver.team_workflows.mixture_of_agents import build_graph as build_moa_graph
from beaver.team_workflows.sequential import build_graph as build_sequential_graph
def _deps(graph) -> dict[str, list[str]]:
return {node.node_id: list(node.depends_on) for node in graph.nodes}
def test_sequential_workflow_builds_chain_and_preserves_agent_fields() -> None:
graph = build_sequential_graph(
task="finance report",
agents=[
{
"name": "source_collector",
"instruction": "Collect official sources",
"skill_query": "official filings",
"allowed_tool_names": ["web_search", "web_fetch"],
"required_evidence": ["url"],
"validation_rules": ["Prefer official sources."],
"block_downstream_on_partial": True,
},
{"name": "metric_extractor", "instruction": "Extract metrics"},
{"name": "reporter", "instruction": "Write report"},
],
)
assert graph.strategy == "sequence"
assert [node.node_id for node in graph.nodes] == [
"source_collector",
"metric_extractor",
"reporter",
]
assert _deps(graph) == {
"source_collector": [],
"metric_extractor": ["source_collector"],
"reporter": ["metric_extractor"],
}
first = graph.nodes[0]
assert first.task == "Collect official sources"
assert first.agent.role == ""
assert first.agent.metadata["sub_agent_kind"] == "generic_skill_worker"
assert first.agent.metadata["workflow_tool"] == "SequentialWorkflow"
assert first.agent.metadata["workflow_agent_name"] == "source_collector"
assert first.agent.metadata["skill_query"] == "official filings"
assert first.allowed_tool_names == ["web_search", "web_fetch"]
assert first.required_evidence == ["url"]
assert first.validation_rules == ["Prefer official sources."]
assert first.block_downstream_on_partial is True
def test_concurrent_workflow_builds_independent_nodes() -> None:
graph = build_concurrent_graph(
task="research topic",
agents=[
{"name": "official_sources", "instruction": "Check official sources"},
{"name": "media_sources", "instruction": "Check media sources"},
{"name": "data_sources", "instruction": "Check data sources"},
],
)
assert graph.strategy == "parallel"
assert _deps(graph) == {
"official_sources": [],
"media_sources": [],
"data_sources": [],
}
def test_mixture_of_agents_builds_experts_to_aggregator() -> None:
graph = build_moa_graph(
task="analyze match",
agents=[
{"name": "tactics", "instruction": "Analyze tactics"},
{"name": "players", "instruction": "Analyze players"},
{"name": "media", "instruction": "Analyze media"},
],
aggregator={"name": "synthesizer", "instruction": "Synthesize report"},
)
assert graph.strategy == "dag"
assert _deps(graph) == {
"tactics": [],
"players": [],
"media": [],
"synthesizer": ["tactics", "players", "media"],
}
assert graph.nodes[-1].agent.metadata["workflow_tool"] == "MixtureOfAgents"
def test_agent_rearrange_parses_flow_into_edges() -> None:
graph = build_rearrange_graph(
task="collect then analyze then synthesize",
agents=[
{"name": "collector", "instruction": "Collect facts"},
{"name": "tactics", "instruction": "Analyze tactics"},
{"name": "players", "instruction": "Analyze players"},
{"name": "media", "instruction": "Analyze media"},
{"name": "synthesizer", "instruction": "Synthesize report"},
],
flow="collector -> tactics, players, media -> synthesizer",
)
assert graph.strategy == "dag"
assert _deps(graph) == {
"collector": [],
"tactics": ["collector"],
"players": ["collector"],
"media": ["collector"],
"synthesizer": ["tactics", "players", "media"],
}
def test_agent_rearrange_rejects_unknown_agent_in_flow() -> None:
with pytest.raises(ValueError, match="unknown agent"):
build_rearrange_graph(
task="bad flow",
agents=[{"name": "collector", "instruction": "Collect"}],
flow="collector -> missing",
)
def test_graph_workflow_requires_edges_and_output_agent() -> None:
with pytest.raises(ValueError, match="edges"):
build_explicit_graph(
task="bad graph",
agents=[{"name": "collector", "instruction": "Collect"}],
edges=[],
output_agent="collector",
)
with pytest.raises(ValueError, match="output_agent"):
build_explicit_graph(
task="bad graph",
agents=[
{"name": "collector", "instruction": "Collect"},
{"name": "reporter", "instruction": "Report"},
],
edges=[["collector", "reporter"]],
output_agent="missing",
)
def test_graph_workflow_builds_explicit_dag() -> None:
graph = build_explicit_graph(
task="match analysis",
agents=[
{"name": "collector", "instruction": "Collect facts"},
{"name": "tactics", "instruction": "Analyze tactics"},
{"name": "players", "instruction": "Analyze players"},
{"name": "media", "instruction": "Analyze media"},
{"name": "synthesizer", "instruction": "Synthesize report"},
],
edges=[
["collector", "tactics"],
["collector", "players"],
["collector", "media"],
["tactics", "synthesizer"],
["players", "synthesizer"],
["media", "synthesizer"],
],
output_agent="synthesizer",
)
assert graph.strategy == "dag"
assert _deps(graph) == {
"collector": [],
"tactics": ["collector"],
"players": ["collector"],
"media": ["collector"],
"synthesizer": ["tactics", "players", "media"],
}
def test_graph_workflow_rejects_unknown_cycle_and_disconnected_agents() -> None:
with pytest.raises(ValueError, match="unknown agent"):
build_explicit_graph(
task="bad graph",
agents=[
{"name": "collector", "instruction": "Collect"},
{"name": "reporter", "instruction": "Report"},
],
edges=[["collector", "missing"]],
output_agent="reporter",
)
with pytest.raises(ValueError, match="cyclic"):
build_explicit_graph(
task="bad graph",
agents=[
{"name": "a", "instruction": "A"},
{"name": "b", "instruction": "B"},
],
edges=[["a", "b"], ["b", "a"]],
output_agent="b",
)
with pytest.raises(ValueError, match="disconnected"):
build_explicit_graph(
task="bad graph",
agents=[
{"name": "collector", "instruction": "Collect"},
{"name": "reporter", "instruction": "Report"},
{"name": "orphan", "instruction": "Unused"},
],
edges=[["collector", "reporter"]],
output_agent="reporter",
)

View File

@ -0,0 +1,182 @@
from __future__ import annotations
import asyncio
import json
from types import SimpleNamespace
from typing import Any
from beaver.coordinator import NodeRunResult, TeamRunResult
from beaver.tools import ToolContext
from beaver.tools.mcp.wrapper import MCPToolWrapper
def _tool_def(name: str) -> SimpleNamespace:
return SimpleNamespace(
name=name,
description=name,
inputSchema={"type": "object", "properties": {}},
)
def test_team_workflow_mcp_wrapper_bridges_to_current_team_runtime() -> None:
remote_calls: list[tuple[str, dict[str, Any]]] = []
captured: dict[str, Any] = {}
async def call_tool(name: str, arguments: dict[str, Any]) -> Any:
remote_calls.append((name, arguments))
raise AssertionError("team workflow bridge must not call MCP subprocess")
async def runner(graph, **kwargs: Any) -> TeamRunResult:
captured["graph"] = graph
captured["kwargs"] = kwargs
return TeamRunResult(
success=True,
summary="team done",
node_results=[
NodeRunResult("collect", True, "collected", run_id="run-collect"),
NodeRunResult("report", True, "reported", run_id="run-report"),
],
run_ids=["run-collect", "run-report"],
session_ids=["session:collect", "session:report"],
task_id=kwargs["parent_task_id"],
)
wrapper = MCPToolWrapper(
"local_team_workflow_mcp",
_tool_def("SequentialWorkflow"),
call_tool,
category="team_workflow",
kind="local",
)
context = ToolContext(
session_id="session-1",
services={
"task_id": "task-1",
"run_id": "run-root",
"agent_team_runner": runner,
},
metadata={"source": "websocket"},
)
result = asyncio.run(
wrapper.invoke(
{
"task": "finance report",
"agents": [
{"name": "collect", "instruction": "Collect official sources"},
{"name": "report", "instruction": "Write report"},
],
},
context,
)
)
payload = json.loads(result.content)
graph = captured["graph"]
assert remote_calls == []
assert result.success is True
assert result.tool_name == "mcp_local_team_workflow_mcp_SequentialWorkflow"
assert payload["success"] is True
assert payload["workflow"] == "SequentialWorkflow"
assert payload["summary"] == "team done"
assert payload["run_ids"] == ["run-collect", "run-report"]
assert captured["kwargs"]["parent_task_id"] == "task-1"
assert captured["kwargs"]["parent_session_id"] == "session-1"
assert captured["kwargs"]["parent_run_id"] == "run-root"
assert graph.strategy == "sequence"
assert {node.node_id: list(node.depends_on) for node in graph.nodes} == {
"collect": [],
"report": ["collect"],
}
def test_ordinary_mcp_wrapper_still_calls_remote_tool() -> None:
remote_calls: list[tuple[str, dict[str, Any]]] = []
async def call_tool(name: str, arguments: dict[str, Any]) -> Any:
remote_calls.append((name, arguments))
return SimpleNamespace(content=[], structuredContent={"ok": True})
wrapper = MCPToolWrapper(
"local_web_mcp",
_tool_def("web_search"),
call_tool,
category="web",
kind="local",
)
result = asyncio.run(wrapper.invoke({"query": "beaver"}, ToolContext()))
assert result.success is True
assert remote_calls == [("web_search", {"query": "beaver"})]
def test_team_workflow_bridge_uses_team_service_without_injected_runner(monkeypatch) -> None:
captured: dict[str, Any] = {}
class FakeTeamService:
def __init__(self, loop: Any) -> None:
captured["loop"] = loop
async def run_team(self, graph, **kwargs: Any) -> TeamRunResult:
captured["graph"] = graph
captured["kwargs"] = kwargs
return TeamRunResult(
success=True,
summary="service team done",
node_results=[NodeRunResult("only", True, "ok", run_id="run-only")],
run_ids=["run-only"],
session_ids=["session:only"],
task_id=kwargs["parent_task_id"],
)
class FakeAgentLoop:
def __init__(self, *, profile: Any, loader: Any) -> None:
self.profile = profile
self.loader = loader
self.loaded = None
monkeypatch.setattr("beaver.engine.AgentLoop", FakeAgentLoop)
monkeypatch.setattr("beaver.services.team_service.TeamService", FakeTeamService)
wrapper = MCPToolWrapper(
"local_team_workflow_mcp",
_tool_def("ConcurrentWorkflow"),
call_tool=lambda _name, _arguments: None, # type: ignore[arg-type]
category="team_workflow",
kind="local",
)
parent_loop = SimpleNamespace(profile="profile", loader="loader")
context = ToolContext(
session_id="session-1",
services={
"task_id": "task-1",
"run_id": "run-root",
"agent_loop": parent_loop,
"loaded": SimpleNamespace(name="loaded"),
},
)
result = asyncio.run(
wrapper.invoke(
{
"task": "parallel work",
"agents": [{"name": "only", "instruction": "Do work"}],
},
context,
)
)
payload = json.loads(result.content)
assert result.success is True
assert payload["summary"] == "service team done"
assert captured["loop"].profile == "profile"
assert captured["loop"].loader == "loader"
assert captured["loop"].loaded.name == "loaded"
assert captured["kwargs"]["parent_task_id"] == "task-1"
assert captured["kwargs"]["parent_session_id"] == "session-1"
assert captured["kwargs"]["parent_run_id"] == "run-root"
assert captured["kwargs"]["allow_candidate_generation"] is False
assert captured["graph"].strategy == "parallel"