Files
beaver_project/app-instance/backend/tests/unit/test_agent_team_v1.py
steven_li 30ab74ffb2 feat(engine): 添加MCP连接管理和工具集成功能
- 集成MCP连接管理器,支持MCP服务器连接
- 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、
  PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、
  TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等
- 实现工具注册和装配功能
- 添加技能选择上下文参数
- 支持思考模式控制参数thinking_enabled

feat(coordinator): 重构任务执行计划器参数命名

- 将learning_candidate_enabled重命名为allow_candidate_generation
- 更新TeamGraphScheduler中的参数传递
- 修改LocalAgentRunner中的相关参数处理
- 更新README文档中的相应描述

refactor(context): 标准化工具调用参数格式

- 添加_json导入用于参数序列化
- 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷
- 修复工具调用中参数非字符串类型的序列化问题

refactor(session): 优化消息历史记录过滤逻辑

- 修改get_messages_as_conversation为基于运行状态过滤消息
- 排除未完成、失败或错误结束的运行记录
- 改进对话历史的可见性控制机制

fix(store): 修复FTS索引重建逻辑

- 添加异常处理防止FTS索引创建失败
- 实现_rebuild_fts_index方法重新构建全文搜索索引
- 优化索引触发器和表的维护流程
2026-05-14 09:43:48 +08:00

620 lines
23 KiB
Python

from __future__ import annotations
import asyncio
from pathlib import Path
from types import SimpleNamespace
import pytest
from beaver.memory.curated.snapshot import MemorySnapshot
from beaver.services.memory_service import MemoryService
from beaver.coordinator import AgentDescriptor, DelegationEnvelope, ExecutionGraph, ExecutionNode
from beaver.coordinator.local import LocalAgentRunner
from beaver.engine import AgentLoop, EngineLoader
from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.services.team_service import TeamService
from beaver.skills.assembler import SkillAssemblyResult
from beaver.skills.drafts import DraftService
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
class RecordingProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]) -> None:
super().__init__()
self.responses = list(responses)
self.calls: list[list[dict]] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
) -> LLMResponse:
self.calls.append(messages)
if not self.responses:
raise AssertionError("No stubbed provider responses left")
return self.responses.pop(0)
def get_default_model(self) -> str:
return "stub-model"
class StubSkillAssembler:
def __init__(self, activated_skills: list[SkillContext] | None = None) -> None:
self.activated_skills = list(activated_skills or [])
async def assemble(self, **kwargs) -> SkillAssemblyResult:
return SkillAssemblyResult(activated_skills=list(self.activated_skills))
class BlockingSkillAssembler:
def __init__(self) -> None:
self.first_started = asyncio.Event()
self.release_first = asyncio.Event()
async def assemble(self, **kwargs) -> SkillAssemblyResult:
if "task first" in kwargs["task_description"]:
self.first_started.set()
await self.release_first.wait()
return SkillAssemblyResult()
class PerRunSnapshotMemoryService(MemoryService):
def __init__(self, root: Path) -> None:
super().__init__(root)
self.count = 0
def capture_snapshot_for_run(self) -> MemorySnapshot:
self.count += 1
return MemorySnapshot(memory_block=f"# Memory\n\nsnapshot-{self.count}", user_block=None)
def get_snapshot(self) -> MemorySnapshot:
return MemorySnapshot(memory_block="# Memory\n\nshared-snapshot", user_block=None)
def _bundle(provider: RecordingProvider) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
)
def _loop(tmp_path: Path) -> AgentLoop:
return AgentLoop(
loader=EngineLoader(
workspace=tmp_path,
skill_assembler=StubSkillAssembler(),
)
)
def _loop_with_services(
tmp_path: Path,
*,
skill_assembler,
memory_service: MemoryService | None = None,
) -> AgentLoop:
return AgentLoop(
loader=EngineLoader(
workspace=tmp_path,
skill_assembler=skill_assembler,
memory_service=memory_service,
)
)
def _response(content: str, *, finish_reason: str = "stop") -> LLMResponse:
return LLMResponse(
content=content,
finish_reason=finish_reason,
provider_name="stub",
model="stub-model",
)
def _publish_skill(workspace: Path, *, skill_name: str, body: str) -> None:
store = SkillSpecStore(workspace)
draft = DraftService(store).create_new_skill_draft(
skill_name=skill_name,
proposed_content=body,
proposed_frontmatter={"description": f"{skill_name} test skill", "tools": []},
created_by="tester",
reason="test",
)
ReviewService(store).approve(skill_name, draft.draft_id, reviewer="tester", notes="ok")
SkillPublisher(store).publish(skill_name, draft.draft_id, publisher="tester", notes="publish")
def test_local_agent_runner_uses_shared_loop_and_records_parent_task(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("sub-agent result")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
loaded = loop.boot()
run_record = loaded.run_memory_store.list_runs()[-1] # type: ignore[union-attr]
child_session = loaded.session_manager.get_session(result.session_id) # type: ignore[union-attr,arg-type]
assert result.success is True
assert run_record.task_id == "task-parent"
assert child_session["parent_session_id"] == "session-root"
def test_pinned_skill_is_injected_into_delegated_run(tmp_path: Path) -> None:
_publish_skill(
tmp_path,
skill_name="review-check",
body="# Review Check\n\nAlways mention the pinned review checklist.\n",
)
loop = _loop(tmp_path)
provider = RecordingProvider([_response("done")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="reviewer"),
task="review the work",
inherited_pinned_skills=["review-check"],
node_id="review",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
loaded = loop.boot()
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id) # type: ignore[union-attr,arg-type]
skill_events = [event for event in events if event.event_type == "skill_activation_snapshotted"]
assert "Always mention the pinned review checklist" in provider.calls[0][1]["content"]
assert skill_events
receipts = skill_events[0].event_payload["receipts"]
assert receipts[0]["skill_name"] == "review-check"
assert receipts[0]["activation_reason"] == "pinned_delegation"
def test_ephemeral_pinned_skill_context_is_injected_into_delegated_run(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("done")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="api_review"),
task="review the API",
inherited_pinned_skill_contexts=[
SkillContext(
name="draft:api-review",
content="Always mention schema compatibility.",
version="draft:draft-1",
content_hash="hash",
activation_reason="generated_missing_skill",
)
],
node_id="api_review",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
loaded = loop.boot()
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id) # type: ignore[union-attr,arg-type]
skill_events = [event for event in events if event.event_type == "skill_activation_snapshotted"]
assert "Always mention schema compatibility" in provider.calls[0][1]["content"]
receipts = skill_events[0].event_payload["receipts"]
assert receipts[0]["skill_name"] == "draft:api-review"
assert receipts[0]["skill_version"] == "draft:draft-1"
assert receipts[0]["activation_reason"] == "generated_missing_skill"
def test_team_sequence_passes_prior_outputs(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"first": RecordingProvider([_response("first output")]),
"second": RecordingProvider([_response("second output")]),
}
graph = ExecutionGraph(
strategy="sequence",
nodes=[
ExecutionNode("first", "step one", AgentDescriptor(name="a")),
ExecutionNode("second", "step two", AgentDescriptor(name="b")),
],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
parent_run_id="run-root",
provider_bundle_factory=lambda node: _bundle(providers[node.node_id]),
)
)
assert result.success is True
assert result.summary == "first output\n\nsecond output"
assert "Dependency first output:\nfirst output" in providers["second"].calls[0][0]["content"]
def test_team_parallel_runs_all_nodes(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"one": RecordingProvider([_response("one")]),
"two": RecordingProvider([_response("two")]),
"three": RecordingProvider([_response("three")]),
}
factory_calls: list[str] = []
graph = ExecutionGraph(
strategy="parallel",
nodes=[
ExecutionNode("one", "task one", AgentDescriptor(name="one")),
ExecutionNode("two", "task two", AgentDescriptor(name="two")),
ExecutionNode("three", "task three", AgentDescriptor(name="three")),
],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
parent_run_id="run-root",
provider_bundle_factory=lambda node: (factory_calls.append(node.node_id) or _bundle(providers[node.node_id])),
)
)
assert result.success is True
assert sorted(factory_calls) == ["one", "three", "two"]
assert result.run_ids and len(result.run_ids) == 3
assert [item.output_text for item in result.node_results] == ["one", "two", "three"]
def test_parallel_node_factory_error_is_normalized_and_keeps_completed_runs(tmp_path: Path) -> None:
loop = _loop(tmp_path)
loaded = loop.boot()
parent = loaded.task_service.create_task(session_id="session-root", description="parent task") # type: ignore[union-attr]
providers = {
"ok": RecordingProvider([_response("ok output")]),
}
graph = ExecutionGraph(
strategy="parallel",
nodes=[
ExecutionNode("ok", "task ok", AgentDescriptor(name="ok")),
ExecutionNode("bad", "task bad", AgentDescriptor(name="bad")),
],
)
def factory(node: ExecutionNode) -> ProviderBundle:
if node.node_id == "bad":
raise RuntimeError("factory failed")
return _bundle(providers[node.node_id])
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=parent.task_id,
parent_session_id=parent.session_id,
parent_run_id="run-root",
provider_bundle_factory=factory,
)
)
bad = [item for item in result.node_results if item.node_id == "bad"][0]
task = loaded.task_service.get_task(parent.task_id) # type: ignore[union-attr]
assert result.success is False
assert bad.finish_reason == "error"
assert bad.error == "factory failed"
assert result.run_ids and len(result.run_ids) == 1
assert task is not None
assert task.run_ids == result.run_ids
assert "ok output" in result.summary
assert "Failed nodes:\n- bad: factory failed" in result.summary
def test_team_dag_blocks_dependents_after_failure(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"prepare": RecordingProvider([_response("ok")]),
"validate": RecordingProvider([_response("failed", finish_reason="error")]),
}
graph = ExecutionGraph(
strategy="dag",
nodes=[
ExecutionNode("prepare", "prepare", AgentDescriptor(name="prep")),
ExecutionNode("validate", "validate", AgentDescriptor(name="validator"), depends_on=["prepare"]),
ExecutionNode("publish", "publish", AgentDescriptor(name="publisher"), depends_on=["validate"]),
],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
parent_run_id="run-root",
provider_bundle_factory=lambda node: _bundle(providers[node.node_id]),
)
)
publish = [item for item in result.node_results if item.node_id == "publish"][0]
assert result.success is False
assert publish.finish_reason == "blocked"
assert publish.run_id is None
assert publish.error == "Blocked by failed dependency: validate"
assert "failed" not in result.summary.split("Failed nodes:")[0]
assert "- validate: failed" in result.summary
assert "- publish: Blocked by failed dependency: validate" in result.summary
def test_dag_node_factory_error_blocks_dependents(tmp_path: Path) -> None:
loop = _loop(tmp_path)
providers = {
"prepare": RecordingProvider([_response("prepared")]),
}
graph = ExecutionGraph(
strategy="dag",
nodes=[
ExecutionNode("prepare", "prepare", AgentDescriptor(name="prep")),
ExecutionNode("validate", "validate", AgentDescriptor(name="validator"), depends_on=["prepare"]),
ExecutionNode("publish", "publish", AgentDescriptor(name="publisher"), depends_on=["validate"]),
],
)
def factory(node: ExecutionNode) -> ProviderBundle:
if node.node_id == "validate":
raise RuntimeError("validator unavailable")
return _bundle(providers[node.node_id])
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
parent_run_id="run-root",
provider_bundle_factory=factory,
)
)
validate = [item for item in result.node_results if item.node_id == "validate"][0]
publish = [item for item in result.node_results if item.node_id == "publish"][0]
assert result.success is False
assert validate.finish_reason == "error"
assert validate.error == "validator unavailable"
assert publish.finish_reason == "blocked"
assert publish.error == "Blocked by failed dependency: validate"
def test_provider_bundle_with_node_model_override_is_normalized_by_team_service(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("unused")])
graph = ExecutionGraph(
strategy="sequence",
nodes=[ExecutionNode("specialist", "work", AgentDescriptor(name="specialist", model="special-model"))],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
provider_bundle=_bundle(provider),
)
)
assert result.success is False
assert result.node_results[0].finish_reason == "error"
assert "provider_bundle cannot be combined" in (result.node_results[0].error or "")
def test_team_summary_lists_only_failed_nodes_when_all_nodes_fail(tmp_path: Path) -> None:
loop = _loop(tmp_path)
graph = ExecutionGraph(
strategy="parallel",
nodes=[
ExecutionNode("one", "task one", AgentDescriptor(name="one")),
ExecutionNode("two", "task two", AgentDescriptor(name="two")),
],
)
def factory(node: ExecutionNode) -> ProviderBundle:
raise RuntimeError(f"{node.node_id} down")
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
provider_bundle_factory=factory,
)
)
assert result.success is False
assert result.summary == "Failed nodes:\n- one: one down\n- two: two down"
def test_graph_structure_errors_still_raise(tmp_path: Path) -> None:
loop = _loop(tmp_path)
reserved = ExecutionGraph(
strategy="moa",
nodes=[ExecutionNode("node", "task", AgentDescriptor(name="node"))],
)
unknown_dependency = ExecutionGraph(
strategy="dag",
nodes=[ExecutionNode("node", "task", AgentDescriptor(name="node"), depends_on=["missing"])],
)
cyclic = ExecutionGraph(
strategy="dag",
nodes=[
ExecutionNode("a", "task a", AgentDescriptor(name="a"), depends_on=["b"]),
ExecutionNode("b", "task b", AgentDescriptor(name="b"), depends_on=["a"]),
],
)
with pytest.raises(NotImplementedError, match="reserved"):
asyncio.run(TeamService(loop).run_team(reserved, parent_task_id=None, parent_session_id="session-root"))
with pytest.raises(ValueError, match="unknown node"):
asyncio.run(TeamService(loop).run_team(unknown_dependency, parent_task_id=None, parent_session_id="session-root"))
with pytest.raises(ValueError, match="cyclic or unresolved dependencies"):
asyncio.run(TeamService(loop).run_team(cyclic, parent_task_id=None, parent_session_id="session-root"))
def test_team_run_does_not_create_independent_team_task(tmp_path: Path) -> None:
loop = _loop(tmp_path)
loaded = loop.boot()
parent = loaded.task_service.create_task(session_id="session-root", description="parent task") # type: ignore[union-attr]
provider = RecordingProvider([_response("child output")])
graph = ExecutionGraph(
strategy="sequence",
nodes=[ExecutionNode("child", "child task", AgentDescriptor(name="child"))],
)
result = asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=parent.task_id,
parent_session_id=parent.session_id,
parent_run_id="run-root",
provider_bundle=_bundle(provider),
)
)
tasks = loaded.task_service.store.list_tasks() # type: ignore[union-attr]
run_record = loaded.run_memory_store.list_runs()[-1] # type: ignore[union-attr]
assert result.task_id == parent.task_id
assert [task.task_id for task in tasks] == [parent.task_id]
assert tasks[0].run_ids == result.run_ids
assert run_record.task_id == parent.task_id
def test_parallel_nodes_use_independent_memory_snapshots(tmp_path: Path) -> None:
skill_assembler = BlockingSkillAssembler()
memory_service = PerRunSnapshotMemoryService(tmp_path / "memory" / "curated")
memory_service.initialize()
loop = _loop_with_services(tmp_path, skill_assembler=skill_assembler, memory_service=memory_service)
providers = {
"first": RecordingProvider([_response("first")]),
"second": RecordingProvider([_response("second")]),
}
graph = ExecutionGraph(
strategy="parallel",
nodes=[
ExecutionNode("first", "task first", AgentDescriptor(name="first")),
ExecutionNode("second", "task second", AgentDescriptor(name="second")),
],
)
async def run_team() -> None:
task = asyncio.create_task(
TeamService(loop).run_team(
graph,
parent_task_id=None,
parent_session_id="session-root",
provider_bundle_factory=lambda node: _bundle(providers[node.node_id]),
)
)
await skill_assembler.first_started.wait()
skill_assembler.release_first.set()
await task
asyncio.run(run_team())
first_system = providers["first"].calls[0][0]["content"]
second_system = providers["second"].calls[0][0]["content"]
assert "snapshot-1" in first_system
assert "snapshot-2" in second_system
assert "shared-snapshot" not in first_system
assert "shared-snapshot" not in second_system
def test_provider_bundle_with_node_model_override_is_rejected(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("unused")])
envelope = DelegationEnvelope(
parent_task_id=None,
parent_session_id="session-root",
parent_run_id=None,
agent=AgentDescriptor(name="specialist", model="special-model"),
task="work",
node_id="specialist",
)
with pytest.raises(ValueError, match="provider_bundle cannot be combined"):
asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
def test_node_level_model_without_bundle_reaches_provider_resolution(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict[str, str | None] = {}
provider = RecordingProvider([_response("node model used")])
def fake_make_provider_bundle(**kwargs):
captured["model"] = kwargs.get("model")
captured["provider_name"] = kwargs.get("provider_name")
return _bundle(provider)
monkeypatch.setattr("beaver.engine.loop.make_provider_bundle", fake_make_provider_bundle)
loop = _loop(tmp_path)
envelope = DelegationEnvelope(
parent_task_id=None,
parent_session_id="session-root",
parent_run_id=None,
agent=AgentDescriptor(name="specialist", model="special-model", provider_name="custom"),
task="work",
node_id="specialist",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope))
assert result.success is True
assert captured == {"model": "special-model", "provider_name": "custom"}
def test_unknown_parent_task_is_rejected_before_any_run(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("unused")])
graph = ExecutionGraph(
strategy="sequence",
nodes=[ExecutionNode("child", "child task", AgentDescriptor(name="child"))],
)
with pytest.raises(ValueError, match="Unknown parent_task_id"):
asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id="missing-task",
parent_session_id="session-root",
provider_bundle=_bundle(provider),
)
)
loaded = loop.boot()
assert loaded.run_memory_store.list_runs() == [] # type: ignore[union-attr]
def test_parent_task_session_mismatch_is_rejected(tmp_path: Path) -> None:
loop = _loop(tmp_path)
loaded = loop.boot()
parent = loaded.task_service.create_task(session_id="session-root", description="parent task") # type: ignore[union-attr]
provider = RecordingProvider([_response("unused")])
graph = ExecutionGraph(
strategy="sequence",
nodes=[ExecutionNode("child", "child task", AgentDescriptor(name="child"))],
)
with pytest.raises(ValueError, match="belongs to session"):
asyncio.run(
TeamService(loop).run_team(
graph,
parent_task_id=parent.task_id,
parent_session_id="other-session",
provider_bundle=_bundle(provider),
)
)