Files
beaver_project/app-instance/backend/tests/unit/test_agent_loop.py
steven_li 520a21a027 feat(coordinator): 添加团队节点默认最大工具迭代次数配置
添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数,
并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。

fix(runtime): 修复团队节点运行成功判断逻辑

更新运行成功判断条件,将 finish_reason 为 "max_tool_iterations_finalized" 的情况
视为运行失败,并添加对原始工具调用输出的检测,避免将其误判为成功完成。

feat(mcp): 添加团队工作流MCP工具类别支持

增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能,
为团队工作流提供本地工具支持。

refactor(engine): 调整AgentLoop最大工具迭代次数设置

将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100,
同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。

perf(mcp): 优化MCP连接管理避免重复连接

添加 mcp_connected 标志来跟踪MCP连接状态,确保 connect_all 只执行一次,
提高性能并避免不必要的重复连接。

refactor(skills): 移除技能团队模板相关功能

移除与技能团队模板相关的代码,包括解析、存储和处理逻辑,
简化技能记录结构和加载流程。

feat(process): 增强会话过程投影器功能

添加技能激活快照事件处理,改进团队运行完成消息显示,
并增强技能激活事件的时间戳记录功能。

refactor(tasks): 简化任务尝试编排器团队执行逻辑

移除团队执行相关代码,将所有任务统一按单步执行处理,
简化任务编排器的复杂度并提升执行效率。

fix(evidence): 修复节点证据评估中需求验证逻辑

更新节点证据评估逻辑,跳过自然语言证据需求的确定性验证,
只执行机器可读的需求验证,避免因自然语言需求导致的节点失败。
2026-06-26 16:36:29 +08:00

172 lines
5.7 KiB
Python

import asyncio
import json
from contextlib import suppress
from types import SimpleNamespace
from typing import Any
from beaver.engine import AgentLoop, AgentRunResult, EngineLoader
from beaver.engine import loop as loop_module
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
def _run_result(run_id: str, output_text: str) -> AgentRunResult:
return AgentRunResult(
session_id="web:test",
run_id=run_id,
output_text=output_text,
finish_reason="stop",
tool_iterations=0,
)
def test_running_loop_handles_reentrant_submit_direct(tmp_path) -> None:
async def run_case() -> None:
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
calls: list[str] = []
async def fake_process_direct(task: str, **kwargs: Any) -> AgentRunResult:
calls.append(task)
if task == "outer":
return await loop.submit_direct("inner", session_id="web:test")
return _run_result(task, "inner completed")
loop._process_direct_impl = fake_process_direct # type: ignore[method-assign]
loop_task = asyncio.create_task(loop.run())
await asyncio.sleep(0)
try:
result = await asyncio.wait_for(loop.submit_direct("outer", session_id="web:test"), timeout=1)
finally:
await loop.stop()
with suppress(asyncio.TimeoutError):
await asyncio.wait_for(loop_task, timeout=1)
if not loop_task.done():
loop_task.cancel()
with suppress(asyncio.CancelledError):
await loop_task
assert result.output_text == "inner completed"
assert calls == ["outer", "inner"]
asyncio.run(run_case())
def test_web_search_loop_guard_keeps_successful_low_quality_results_available() -> None:
guard = loop_module._WebSearchLoopGuard()
low_quality = json.dumps(
{
"success": True,
"query": "weather beijing",
"quality": "low",
"results": [{"title": "Example", "url": "https://example.com", "snippet": ""}],
}
)
assert guard.observe_result("web_search", low_quality) is None
assert guard.observe_result("web_search", low_quality) is None
assert guard.observe_result("web_search", low_quality) is None
def test_web_search_loop_guard_resets_after_useful_result() -> None:
guard = loop_module._WebSearchLoopGuard()
failed_search = json.dumps({"success": False, "query": "weather", "error": "timeout"})
useful = json.dumps({"success": True, "query": "weather", "quality": "high", "results": []})
assert guard.observe_result("web_search", failed_search) is None
assert guard.observe_result("web_search", useful) is None
assert guard.observe_result("web_search", failed_search) is None
assert guard.observe_result("web_search", failed_search) is None
assert guard.observe_result("web_search", failed_search) is not None
class RecordingProvider(LLMProvider):
def __init__(self) -> None:
super().__init__()
self.tool_names_by_call: list[list[str]] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int | None = None,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.tool_names_by_call.append(
[
str(tool.get("function", {}).get("name") or tool.get("name"))
for tool in tools or []
]
)
return LLMResponse("done", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
def _bundle(provider: RecordingProvider) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
)
def test_task_mode_root_does_not_expose_agent_team_tool(tmp_path) -> None:
provider = RecordingProvider()
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
asyncio.run(
loop.process_direct(
"compare financial reports",
session_id="session",
task_id="task-1",
task_mode=True,
include_skill_assembly=False,
provider_bundle=_bundle(provider),
)
)
assert "run_agent_team" not in provider.tool_names_by_call[0]
def test_default_engine_registry_does_not_register_agent_team_tool(tmp_path) -> None:
loaded = AgentLoop(loader=EngineLoader(workspace=tmp_path)).boot()
assert loaded.tool_registry is not None
assert loaded.tool_registry.get("run_agent_team") is None
assert "run_agent_team" not in loaded.tools
def test_non_task_and_team_node_do_not_expose_agent_team_tool(tmp_path) -> None:
non_task_provider = RecordingProvider()
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
asyncio.run(
loop.process_direct(
"ordinary chat",
session_id="session",
include_skill_assembly=False,
provider_bundle=_bundle(non_task_provider),
)
)
team_node_provider = RecordingProvider()
asyncio.run(
loop.process_direct(
"team child",
session_id="session:team:child",
parent_session_id="session",
source="team:child",
task_id="task-1",
task_mode=True,
include_skill_assembly=False,
provider_bundle=_bundle(team_node_provider),
)
)
assert "run_agent_team" not in non_task_provider.tool_names_by_call[0]
assert "run_agent_team" not in team_node_provider.tool_names_by_call[0]