Files
beaver_project/app-instance/backend/tests/unit/test_agent_team_v1.py
steven_li 520a21a027 feat(coordinator): 添加团队节点默认最大工具迭代次数配置
添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数,
并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。

fix(runtime): 修复团队节点运行成功判断逻辑

更新运行成功判断条件,将 finish_reason 为 "max_tool_iterations_finalized" 的情况
视为运行失败,并添加对原始工具调用输出的检测,避免将其误判为成功完成。

feat(mcp): 添加团队工作流MCP工具类别支持

增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能,
为团队工作流提供本地工具支持。

refactor(engine): 调整AgentLoop最大工具迭代次数设置

将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100,
同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。

perf(mcp): 优化MCP连接管理避免重复连接

添加 mcp_connected 标志来跟踪MCP连接状态,确保 connect_all 只执行一次,
提高性能并避免不必要的重复连接。

refactor(skills): 移除技能团队模板相关功能

移除与技能团队模板相关的代码,包括解析、存储和处理逻辑,
简化技能记录结构和加载流程。

feat(process): 增强会话过程投影器功能

添加技能激活快照事件处理,改进团队运行完成消息显示,
并增强技能激活事件的时间戳记录功能。

refactor(tasks): 简化任务尝试编排器团队执行逻辑

移除团队执行相关代码,将所有任务统一按单步执行处理,
简化任务编排器的复杂度并提升执行效率。

fix(evidence): 修复节点证据评估中需求验证逻辑

更新节点证据评估逻辑,跳过自然语言证据需求的确定性验证,
只执行机器可读的需求验证,避免因自然语言需求导致的节点失败。
2026-06-26 16:36:29 +08:00

1037 lines
38 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import asyncio
from pathlib import Path
from types import SimpleNamespace
import pytest
from beaver.memory.curated.snapshot import MemorySnapshot
from beaver.services.memory_service import MemoryService
from beaver.coordinator import AgentDescriptor, DelegationEnvelope, ExecutionGraph, ExecutionNode, NodeRunResult
from beaver.coordinator.execution.scheduler import TeamGraphScheduler
from beaver.coordinator.local import LocalAgentRunner
from beaver.engine import AgentLoop, EngineLoader
from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.engine.session.manager import SessionManager
from beaver.services.team_service import TeamService
from beaver.skills.assembler import SkillAssemblyResult
from beaver.skills.drafts import DraftService
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
class RecordingProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]) -> None:
super().__init__()
self.responses = list(responses)
self.calls: list[list[dict]] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
) -> LLMResponse:
self.calls.append(messages)
if not self.responses:
raise AssertionError("No stubbed provider responses left")
return self.responses.pop(0)
def get_default_model(self) -> str:
return "stub-model"
class BlockingProvider(RecordingProvider):
def __init__(self, content: str, started: asyncio.Event, release: asyncio.Event) -> None:
super().__init__([_response(content)])
self.started = started
self.release = release
async def chat(self, *args, **kwargs) -> LLMResponse:
self.started.set()
await self.release.wait()
return await super().chat(*args, **kwargs)
class StubSkillAssembler:
def __init__(self, activated_skills: list[SkillContext] | None = None) -> None:
self.activated_skills = list(activated_skills or [])
async def assemble(self, **kwargs) -> SkillAssemblyResult:
return SkillAssemblyResult(activated_skills=list(self.activated_skills))
class BlockingSkillAssembler:
def __init__(self) -> None:
self.first_started = asyncio.Event()
self.release_first = asyncio.Event()
async def assemble(self, **kwargs) -> SkillAssemblyResult:
if "task first" in kwargs["task_description"]:
self.first_started.set()
await self.release_first.wait()
return SkillAssemblyResult()
class PerRunSnapshotMemoryService(MemoryService):
def __init__(self, root: Path) -> None:
super().__init__(root)
self.count = 0
def capture_snapshot_for_run(self) -> MemorySnapshot:
self.count += 1
return MemorySnapshot(memory_block=f"# Memory\n\nsnapshot-{self.count}", user_block=None)
def get_snapshot(self) -> MemorySnapshot:
return MemorySnapshot(memory_block="# Memory\n\nshared-snapshot", user_block=None)
class CapturingRunner:
def __init__(self) -> None:
self.envelopes: list[DelegationEnvelope] = []
async def run(self, envelope: DelegationEnvelope, **kwargs) -> NodeRunResult:
self.envelopes.append(envelope)
return NodeRunResult(node_id=envelope.node_id or "node", success=True, output_text="done")
def _bundle(provider: RecordingProvider) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
)
def _loop(tmp_path: Path) -> AgentLoop:
return AgentLoop(
loader=EngineLoader(
workspace=tmp_path,
skill_assembler=StubSkillAssembler(),
)
)
def _loop_with_services(
tmp_path: Path,
*,
skill_assembler,
memory_service: MemoryService | None = None,
) -> AgentLoop:
return AgentLoop(
loader=EngineLoader(
workspace=tmp_path,
skill_assembler=skill_assembler,
memory_service=memory_service,
)
)
def _response(content: str, *, finish_reason: str = "stop") -> LLMResponse:
return LLMResponse(
content=content,
finish_reason=finish_reason,
provider_name="stub",
model="stub-model",
)
def _publish_skill(workspace: Path, *, skill_name: str, body: str) -> None:
store = SkillSpecStore(workspace)
draft = DraftService(store).create_new_skill_draft(
skill_name=skill_name,
proposed_content=body,
proposed_frontmatter={"description": f"{skill_name} test skill", "tools": []},
created_by="tester",
reason="test",
)
ReviewService(store).approve(skill_name, draft.draft_id, reviewer="tester", notes="ok")
SkillPublisher(store).publish(skill_name, draft.draft_id, publisher="tester", notes="publish")
def test_local_agent_runner_uses_shared_loop_and_records_parent_task(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("sub-agent result")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
loaded = loop.boot()
run_record = loaded.run_memory_store.list_runs()[-1] # type: ignore[union-attr]
child_session = loaded.session_manager.get_session(result.session_id) # type: ignore[union-attr,arg-type]
assert result.success is True
assert result.completion_status == "succeeded"
assert result.evidence_gaps == []
assert run_record.task_id == "task-parent"
assert child_session["parent_session_id"] == "session-root"
def test_node_without_required_tool_result_is_partial(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("collected narrative")])
envelope = DelegationEnvelope(
parent_task_id=None,
parent_session_id="session-root",
parent_run_id=None,
agent=AgentDescriptor(name="collect"),
task="collect",
node_id="collect",
required_evidence=["tool_result"],
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is False
assert result.completion_status == "partial"
assert result.evidence_gaps == ["missing required evidence: tool_result"]
def test_node_with_required_nonempty_output_succeeds(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("verified output")])
envelope = DelegationEnvelope(
parent_task_id=None,
parent_session_id="session-root",
parent_run_id=None,
agent=AgentDescriptor(name="verify"),
task="verify",
node_id="verify",
required_evidence=["output"],
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is True
assert result.completion_status == "succeeded"
assert result.evidence_gaps == []
def test_unknown_evidence_requirement_makes_node_partial(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("output")])
envelope = DelegationEnvelope(
parent_task_id=None,
parent_session_id="session-root",
parent_run_id=None,
agent=AgentDescriptor(name="verify"),
task="verify",
node_id="verify",
required_evidence=["unknown_type"],
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is True
assert result.completion_status == "succeeded"
assert result.evidence_gaps == []
def test_team_node_preserves_evidence_when_finish_reason_is_not_stop(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("partial evidence", finish_reason="max_tool_iterations")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is False
assert result.evidence is not None
assert result.evidence.output_text == "partial evidence"
assert result.evidence.finish_reason == "max_tool_iterations"
def test_team_node_accepts_finalized_tool_budget_output(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("usable finalized output", finish_reason="max_tool_iterations_finalized")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is True
assert result.completion_status == "succeeded"
assert result.finish_reason == "max_tool_iterations_finalized"
def test_team_node_rejects_finalized_raw_tool_call_output(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider(
[
_response(
'<DSMLtool_calls><DSMLinvoke name="web_fetch"></DSMLinvoke></DSMLtool_calls>',
finish_reason="max_tool_iterations_finalized",
)
]
)
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is False
assert result.completion_status == "failed"
assert result.error == "finalized output is a raw tool call"
def test_team_node_defaults_to_larger_tool_iteration_budget(tmp_path: Path) -> None:
session_manager = SessionManager(tmp_path)
captured_kwargs: dict[str, object] = {}
class CapturingLoop:
profile = SimpleNamespace()
loader = None
is_running = False
async def process_direct(self, task: str, **kwargs: object) -> SimpleNamespace:
captured_kwargs.update(kwargs)
session_id = str(kwargs["session_id"])
run_id = "run-captured"
session_manager.ensure_session(session_id, source="test")
return SimpleNamespace(
session_id=session_id,
run_id=run_id,
output_text="done",
finish_reason="stop",
)
def boot(self) -> SimpleNamespace:
return SimpleNamespace(session_manager=session_manager)
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(CapturingLoop()).run(envelope))
assert result.success is True
assert captured_kwargs["max_tool_iterations"] == 100
def test_pinned_skill_is_injected_into_delegated_run(tmp_path: Path) -> None:
_publish_skill(
tmp_path,
skill_name="review-check",
body="# Review Check\n\nAlways mention the pinned review checklist.\n",
)
loop = _loop(tmp_path)
provider = RecordingProvider([_response("done")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="reviewer"),
task="review the work",
inherited_pinned_skills=["review-check"],
node_id="review",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
loaded = loop.boot()
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id) # type: ignore[union-attr,arg-type]
skill_events = [event for event in events if event.event_type == "skill_activation_snapshotted"]
assert "Always mention the pinned review checklist" in provider.calls[0][1]["content"]
assert skill_events
receipts = skill_events[0].event_payload["receipts"]
assert receipts[0]["skill_name"] == "review-check"
assert receipts[0]["activation_reason"] == "pinned_delegation"
def test_ephemeral_pinned_skill_context_is_injected_into_delegated_run(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("done")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="api_review"),
task="review the API",
inherited_pinned_skill_contexts=[
SkillContext(
name="draft:api-review",
content="Always mention schema compatibility.",
version="draft:draft-1",
content_hash="hash",
activation_reason="generated_missing_skill",
)
],
node_id="api_review",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
loaded = loop.boot()
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id) # type: ignore[union-attr,arg-type]
skill_events = [event for event in events if event.event_type == "skill_activation_snapshotted"]
assert "Always mention schema compatibility" in provider.calls[0][1]["content"]
receipts = skill_events[0].event_payload["receipts"]
assert receipts[0]["skill_name"] == "draft:api-review"
assert receipts[0]["skill_version"] == "draft:draft-1"
assert receipts[0]["activation_reason"] == "generated_missing_skill"
def test_team_sequence_passes_prior_outputs(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"first": RecordingProvider([_response("first output")]),
"second": RecordingProvider([_response("second output")]),
}
graph = ExecutionGraph(
strategy="sequence",
nodes=[
ExecutionNode("first", "step one", AgentDescriptor(name="a")),
ExecutionNode("second", "step two", AgentDescriptor(name="b")),
],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
parent_run_id="run-root",
provider_bundle_factory=lambda node: _bundle(providers[node.node_id]),
)
)
assert result.success is True
assert result.summary == "first output\n\nsecond output"
assert "Dependency first output:\nfirst output" in providers["second"].calls[0][0]["content"]
def test_partial_node_allows_downstream_by_default(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"collect": RecordingProvider([_response("partial source notes")]),
"extract": RecordingProvider([_response("extracted metrics")]),
}
graph = ExecutionGraph(
strategy="sequence",
nodes=[
ExecutionNode(
"collect",
"collect",
AgentDescriptor(name="collect"),
required_evidence=["tool_result"],
),
ExecutionNode("extract", "extract", AgentDescriptor(name="extract")),
],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
provider_bundle_factory=lambda node: _bundle(providers[node.node_id]),
)
)
assert result.node_results[0].completion_status == "partial"
assert result.node_results[1].completion_status == "succeeded"
assert "Dependency collect output:\npartial source notes" in providers["extract"].calls[0][0]["content"]
def test_partial_node_blocks_downstream_when_configured(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"collect": RecordingProvider([_response("partial source notes")]),
"extract": RecordingProvider([_response("must not run")]),
}
graph = ExecutionGraph(
strategy="sequence",
nodes=[
ExecutionNode(
"collect",
"collect",
AgentDescriptor(name="collect"),
required_evidence=["tool_result"],
block_downstream_on_partial=True,
),
ExecutionNode("extract", "extract", AgentDescriptor(name="extract")),
],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
provider_bundle_factory=lambda node: _bundle(providers[node.node_id]),
)
)
assert result.node_results[0].completion_status == "partial"
assert result.node_results[1].completion_status == "blocked"
assert providers["extract"].calls == []
def test_scheduler_copies_task_two_contract_fields_to_envelope() -> None:
runner = CapturingRunner()
node = ExecutionNode(
"collect",
"collect",
AgentDescriptor(name="collect"),
input_contract={"query": "str"},
output_contract={"sources": "list"},
required_evidence=["tool_result"],
evidence_contract={"entities": ["MGM"]},
validation_rules=["official_sources_only"],
required_for_completion=False,
block_downstream_on_partial=True,
max_tool_iterations=2,
)
asyncio.run(
TeamGraphScheduler(runner).run( # type: ignore[arg-type]
ExecutionGraph(strategy="sequence", nodes=[node]),
parent_task_id=None,
parent_session_id="session-root",
)
)
envelope = runner.envelopes[0]
assert envelope.input_contract == {"query": "str"}
assert envelope.output_contract == {"sources": "list"}
assert envelope.required_evidence == ["tool_result"]
assert envelope.evidence_contract == {"entities": ["MGM"]}
assert envelope.validation_rules == ["official_sources_only"]
assert envelope.required_for_completion is False
assert envelope.block_downstream_on_partial is True
assert envelope.max_tool_iterations == 2
def test_team_parallel_runs_all_nodes(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"one": RecordingProvider([_response("one")]),
"two": RecordingProvider([_response("two")]),
"three": RecordingProvider([_response("three")]),
}
factory_calls: list[str] = []
graph = ExecutionGraph(
strategy="parallel",
nodes=[
ExecutionNode("one", "task one", AgentDescriptor(name="one")),
ExecutionNode("two", "task two", AgentDescriptor(name="two")),
ExecutionNode("three", "task three", AgentDescriptor(name="three")),
],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
parent_run_id="run-root",
provider_bundle_factory=lambda node: (factory_calls.append(node.node_id) or _bundle(providers[node.node_id])),
)
)
assert result.success is True
assert sorted(factory_calls) == ["one", "three", "two"]
assert result.run_ids and len(result.run_ids) == 3
assert [item.output_text for item in result.node_results] == ["one", "two", "three"]
def test_team_parallel_starts_nodes_concurrently_with_isolated_loops(tmp_path: Path) -> None:
loop = _loop(tmp_path)
first_started = asyncio.Event()
second_started = asyncio.Event()
release = asyncio.Event()
providers = {
"one": BlockingProvider("one", first_started, release),
"two": BlockingProvider("two", second_started, release),
}
graph = ExecutionGraph(
strategy="parallel",
nodes=[
ExecutionNode("one", "task one", AgentDescriptor(name="one")),
ExecutionNode("two", "task two", AgentDescriptor(name="two")),
],
)
async def run_case():
loop_task = asyncio.create_task(loop.run())
await asyncio.sleep(0)
task = asyncio.create_task(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
parent_run_id="run-root",
provider_bundle_factory=lambda node: _bundle(providers[node.node_id]),
)
)
try:
await asyncio.wait_for(first_started.wait(), timeout=1)
await asyncio.wait_for(second_started.wait(), timeout=1)
release.set()
return await task
finally:
release.set()
if not task.done():
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
await loop.stop()
await loop_task
result = asyncio.run(run_case())
assert result.success is True
assert [item.node_id for item in result.node_results] == ["one", "two"]
def test_parallel_node_factory_error_is_normalized_and_keeps_completed_runs(tmp_path: Path) -> None:
loop = _loop(tmp_path)
loaded = loop.boot()
parent = loaded.task_service.create_task(session_id="session-root", description="parent task") # type: ignore[union-attr]
providers = {
"ok": RecordingProvider([_response("ok output")]),
}
graph = ExecutionGraph(
strategy="parallel",
nodes=[
ExecutionNode("ok", "task ok", AgentDescriptor(name="ok")),
ExecutionNode("bad", "task bad", AgentDescriptor(name="bad")),
],
)
def factory(node: ExecutionNode) -> ProviderBundle:
if node.node_id == "bad":
raise RuntimeError("factory failed")
return _bundle(providers[node.node_id])
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=parent.task_id,
parent_session_id=parent.session_id,
parent_run_id="run-root",
provider_bundle_factory=factory,
)
)
bad = [item for item in result.node_results if item.node_id == "bad"][0]
task = loaded.task_service.get_task(parent.task_id) # type: ignore[union-attr]
assert result.success is False
assert bad.finish_reason == "error"
assert bad.error == "factory failed"
assert result.run_ids and len(result.run_ids) == 1
assert task is not None
assert task.run_ids == result.run_ids
assert "ok output" in result.summary
assert "Failed nodes:\n- bad: factory failed" in result.summary
def test_team_dag_blocks_dependents_after_failure(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"prepare": RecordingProvider([_response("ok")]),
"validate": RecordingProvider([_response("failed", finish_reason="error")]),
}
graph = ExecutionGraph(
strategy="dag",
nodes=[
ExecutionNode("prepare", "prepare", AgentDescriptor(name="prep")),
ExecutionNode("validate", "validate", AgentDescriptor(name="validator"), depends_on=["prepare"]),
ExecutionNode("publish", "publish", AgentDescriptor(name="publisher"), depends_on=["validate"]),
],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
parent_run_id="run-root",
provider_bundle_factory=lambda node: _bundle(providers[node.node_id]),
)
)
publish = [item for item in result.node_results if item.node_id == "publish"][0]
validate = [item for item in result.node_results if item.node_id == "validate"][0]
assert result.success is False
assert validate.completion_status == "failed"
assert publish.finish_reason == "blocked"
assert publish.completion_status == "blocked"
assert publish.run_id is None
assert publish.error == "Blocked by failed dependency: validate"
assert "failed" not in result.summary.split("Failed nodes:")[0]
assert "- validate: failed" in result.summary
assert "- publish: Blocked by failed dependency: validate" in result.summary
def test_dag_node_factory_error_blocks_dependents(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"prepare": RecordingProvider([_response("prepared")]),
}
graph = ExecutionGraph(
strategy="dag",
nodes=[
ExecutionNode("prepare", "prepare", AgentDescriptor(name="prep")),
ExecutionNode("validate", "validate", AgentDescriptor(name="validator"), depends_on=["prepare"]),
ExecutionNode("publish", "publish", AgentDescriptor(name="publisher"), depends_on=["validate"]),
],
)
def factory(node: ExecutionNode) -> ProviderBundle:
if node.node_id == "validate":
raise RuntimeError("validator unavailable")
return _bundle(providers[node.node_id])
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
parent_run_id="run-root",
provider_bundle_factory=factory,
)
)
validate = [item for item in result.node_results if item.node_id == "validate"][0]
publish = [item for item in result.node_results if item.node_id == "publish"][0]
assert result.success is False
assert validate.finish_reason == "error"
assert validate.completion_status == "failed"
assert validate.error == "validator unavailable"
assert publish.finish_reason == "blocked"
assert publish.completion_status == "blocked"
assert publish.error == "Blocked by failed dependency: validate"
def test_provider_bundle_with_node_model_override_is_normalized_by_team_service(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("unused")])
graph = ExecutionGraph(
strategy="sequence",
nodes=[ExecutionNode("specialist", "work", AgentDescriptor(name="specialist", model="special-model"))],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
provider_bundle=_bundle(provider),
)
)
assert result.success is False
assert result.node_results[0].finish_reason == "error"
assert "provider_bundle cannot be combined" in (result.node_results[0].error or "")
def test_team_summary_lists_only_failed_nodes_when_all_nodes_fail(tmp_path: Path) -> None:
loop = _loop(tmp_path)
graph = ExecutionGraph(
strategy="parallel",
nodes=[
ExecutionNode("one", "task one", AgentDescriptor(name="one")),
ExecutionNode("two", "task two", AgentDescriptor(name="two")),
],
)
def factory(node: ExecutionNode) -> ProviderBundle:
raise RuntimeError(f"{node.node_id} down")
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
provider_bundle_factory=factory,
)
)
assert result.success is False
assert result.summary == "Failed nodes:\n- one: one down evidence=no\n- two: two down evidence=no"
def test_graph_structure_errors_still_raise(tmp_path: Path) -> None:
loop = _loop(tmp_path)
reserved = ExecutionGraph(
strategy="moa",
nodes=[ExecutionNode("node", "task", AgentDescriptor(name="node"))],
)
unknown_dependency = ExecutionGraph(
strategy="dag",
nodes=[ExecutionNode("node", "task", AgentDescriptor(name="node"), depends_on=["missing"])],
)
cyclic = ExecutionGraph(
strategy="dag",
nodes=[
ExecutionNode("a", "task a", AgentDescriptor(name="a"), depends_on=["b"]),
ExecutionNode("b", "task b", AgentDescriptor(name="b"), depends_on=["a"]),
],
)
with pytest.raises(NotImplementedError, match="reserved"):
asyncio.run(TeamService(loop).run_team(reserved, parent_task_id=None, parent_session_id="session-root"))
with pytest.raises(ValueError, match="unknown node"):
asyncio.run(TeamService(loop).run_team(unknown_dependency, parent_task_id=None, parent_session_id="session-root"))
with pytest.raises(ValueError, match="cyclic or unresolved dependencies"):
asyncio.run(TeamService(loop).run_team(cyclic, parent_task_id=None, parent_session_id="session-root"))
def test_execution_node_contract_defaults_preserve_legacy_scope_behavior() -> None:
node = ExecutionNode("collect", "Collect sources", AgentDescriptor(name="collect"))
assert node.input_contract == {}
assert node.output_contract == {}
assert node.allowed_tool_names is None
assert node.required_evidence == []
assert node.evidence_contract == {}
assert node.validation_rules == []
assert node.required_for_completion is True
assert node.block_downstream_on_partial is False
assert node.max_tool_iterations is None
def test_execution_node_keeps_explicit_empty_tool_scope_distinct_from_unspecified_scope() -> None:
unrestricted = ExecutionNode("unrestricted", "Collect", AgentDescriptor(name="unrestricted"))
tool_free = ExecutionNode(
"tool_free",
"Synthesize",
AgentDescriptor(name="tool_free"),
allowed_tool_names=[],
)
assert unrestricted.allowed_tool_names is None
assert tool_free.allowed_tool_names == []
def test_delegation_envelope_and_node_result_preserve_new_contract_metadata() -> None:
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="collect"),
task="Collect sources",
allowed_tool_names=["web_search"],
required_evidence=["url"],
evidence_contract={"entities": ["MGM", "Galaxy"]},
validation_rules=["official_sources_only"],
required_for_completion=True,
block_downstream_on_partial=True,
max_tool_iterations=2,
)
result = NodeRunResult(
node_id="collect",
success=False,
output_text="MGM source only",
completion_status="partial",
evidence_gaps=["missing required evidence: Galaxy official source"],
)
assert envelope.allowed_tool_names == ["web_search"]
assert envelope.evidence_contract == {"entities": ["MGM", "Galaxy"]}
assert result.to_dict()["completion_status"] == "partial"
assert result.to_dict()["evidence_gaps"] == ["missing required evidence: Galaxy official source"]
def test_graph_rejects_depth_above_configured_limit() -> None:
graph = ExecutionGraph(
strategy="dag",
nodes=[
ExecutionNode("a", "A", AgentDescriptor(name="a")),
ExecutionNode("b", "B", AgentDescriptor(name="b"), depends_on=["a"]),
ExecutionNode("c", "C", AgentDescriptor(name="c"), depends_on=["b"]),
],
)
with pytest.raises(ValueError, match="max depth"):
graph.validate(max_depth=2)
def test_team_run_does_not_create_independent_team_task(tmp_path: Path) -> None:
loop = _loop(tmp_path)
loaded = loop.boot()
parent = loaded.task_service.create_task(session_id="session-root", description="parent task") # type: ignore[union-attr]
provider = RecordingProvider([_response("child output")])
graph = ExecutionGraph(
strategy="sequence",
nodes=[ExecutionNode("child", "child task", AgentDescriptor(name="child"))],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=parent.task_id,
parent_session_id=parent.session_id,
parent_run_id="run-root",
provider_bundle=_bundle(provider),
)
)
tasks = loaded.task_service.store.list_tasks() # type: ignore[union-attr]
run_record = loaded.run_memory_store.list_runs()[-1] # type: ignore[union-attr]
assert result.task_id == parent.task_id
assert [task.task_id for task in tasks] == [parent.task_id]
assert tasks[0].run_ids == result.run_ids
assert run_record.task_id == parent.task_id
def test_parallel_nodes_use_independent_memory_snapshots(tmp_path: Path) -> None:
skill_assembler = BlockingSkillAssembler()
memory_service = PerRunSnapshotMemoryService(tmp_path / "memory" / "curated")
memory_service.initialize()
loop = _loop_with_services(tmp_path, skill_assembler=skill_assembler, memory_service=memory_service)
providers = {
"first": RecordingProvider([_response("first")]),
"second": RecordingProvider([_response("second")]),
}
graph = ExecutionGraph(
strategy="parallel",
nodes=[
ExecutionNode("first", "task first", AgentDescriptor(name="first")),
ExecutionNode("second", "task second", AgentDescriptor(name="second")),
],
)
async def run_team() -> None:
task = asyncio.create_task(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
provider_bundle_factory=lambda node: _bundle(providers[node.node_id]),
)
)
await skill_assembler.first_started.wait()
skill_assembler.release_first.set()
await task
asyncio.run(run_team())
first_system = providers["first"].calls[0][0]["content"]
second_system = providers["second"].calls[0][0]["content"]
assert "snapshot-1" in first_system
assert "snapshot-2" in second_system
assert "shared-snapshot" not in first_system
assert "shared-snapshot" not in second_system
def test_provider_bundle_with_node_model_override_is_rejected(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("unused")])
envelope = DelegationEnvelope(
parent_task_id=None,
parent_session_id="session-root",
parent_run_id=None,
agent=AgentDescriptor(name="specialist", model="special-model"),
task="work",
node_id="specialist",
)
with pytest.raises(ValueError, match="provider_bundle cannot be combined"):
asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
def test_node_level_model_without_bundle_reaches_provider_resolution(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict[str, str | None] = {}
provider = RecordingProvider([_response("node model used")])
def fake_make_provider_bundle(**kwargs):
captured["model"] = kwargs.get("model")
captured["provider_name"] = kwargs.get("provider_name")
return _bundle(provider)
monkeypatch.setattr("beaver.engine.loop.make_provider_bundle", fake_make_provider_bundle)
loop = _loop(tmp_path)
envelope = DelegationEnvelope(
parent_task_id=None,
parent_session_id="session-root",
parent_run_id=None,
agent=AgentDescriptor(name="specialist", model="special-model", provider_name="custom"),
task="work",
node_id="specialist",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope))
assert result.success is True
assert captured == {"model": "special-model", "provider_name": "custom"}
def test_unknown_parent_task_is_rejected_before_any_run(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("unused")])
graph = ExecutionGraph(
strategy="sequence",
nodes=[ExecutionNode("child", "child task", AgentDescriptor(name="child"))],
)
with pytest.raises(ValueError, match="Unknown parent_task_id"):
asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id="missing-task",
parent_session_id="session-root",
provider_bundle=_bundle(provider),
)
)
loaded = loop.boot()
assert loaded.run_memory_store.list_runs() == [] # type: ignore[union-attr]
def test_parent_task_session_mismatch_is_rejected(tmp_path: Path) -> None:
loop = _loop(tmp_path)
loaded = loop.boot()
parent = loaded.task_service.create_task(session_id="session-root", description="parent task") # type: ignore[union-attr]
provider = RecordingProvider([_response("unused")])
graph = ExecutionGraph(
strategy="sequence",
nodes=[ExecutionNode("child", "child task", AgentDescriptor(name="child"))],
)
with pytest.raises(ValueError, match="belongs to session"):
asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=parent.task_id,
parent_session_id="other-session",
provider_bundle=_bundle(provider),
)
)