添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数, 并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。 fix(runtime): 修复团队节点运行成功判断逻辑 更新运行成功判断条件,将 finish_reason 为 "max_tool_iterations_finalized" 的情况 视为运行失败,并添加对原始工具调用输出的检测,避免将其误判为成功完成。 feat(mcp): 添加团队工作流MCP工具类别支持 增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能, 为团队工作流提供本地工具支持。 refactor(engine): 调整AgentLoop最大工具迭代次数设置 将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100, 同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。 perf(mcp): 优化MCP连接管理避免重复连接 添加 mcp_connected 标志来跟踪MCP连接状态,确保 connect_all 只执行一次, 提高性能并避免不必要的重复连接。 refactor(skills): 移除技能团队模板相关功能 移除与技能团队模板相关的代码,包括解析、存储和处理逻辑, 简化技能记录结构和加载流程。 feat(process): 增强会话过程投影器功能 添加技能激活快照事件处理,改进团队运行完成消息显示, 并增强技能激活事件的时间戳记录功能。 refactor(tasks): 简化任务尝试编排器团队执行逻辑 移除团队执行相关代码,将所有任务统一按单步执行处理, 简化任务编排器的复杂度并提升执行效率。 fix(evidence): 修复节点证据评估中需求验证逻辑 更新节点证据评估逻辑,跳过自然语言证据需求的确定性验证, 只执行机器可读的需求验证,避免因自然语言需求导致的节点失败。
172 lines
5.7 KiB
Python
172 lines
5.7 KiB
Python
import asyncio
|
|
import json
|
|
from contextlib import suppress
|
|
from types import SimpleNamespace
|
|
from typing import Any
|
|
|
|
from beaver.engine import AgentLoop, AgentRunResult, EngineLoader
|
|
from beaver.engine import loop as loop_module
|
|
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
|
from beaver.engine.providers.factory import ProviderBundle
|
|
|
|
|
|
def _run_result(run_id: str, output_text: str) -> AgentRunResult:
|
|
return AgentRunResult(
|
|
session_id="web:test",
|
|
run_id=run_id,
|
|
output_text=output_text,
|
|
finish_reason="stop",
|
|
tool_iterations=0,
|
|
)
|
|
|
|
|
|
def test_running_loop_handles_reentrant_submit_direct(tmp_path) -> None:
|
|
async def run_case() -> None:
|
|
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
|
|
calls: list[str] = []
|
|
|
|
async def fake_process_direct(task: str, **kwargs: Any) -> AgentRunResult:
|
|
calls.append(task)
|
|
if task == "outer":
|
|
return await loop.submit_direct("inner", session_id="web:test")
|
|
return _run_result(task, "inner completed")
|
|
|
|
loop._process_direct_impl = fake_process_direct # type: ignore[method-assign]
|
|
|
|
loop_task = asyncio.create_task(loop.run())
|
|
await asyncio.sleep(0)
|
|
try:
|
|
result = await asyncio.wait_for(loop.submit_direct("outer", session_id="web:test"), timeout=1)
|
|
finally:
|
|
await loop.stop()
|
|
with suppress(asyncio.TimeoutError):
|
|
await asyncio.wait_for(loop_task, timeout=1)
|
|
if not loop_task.done():
|
|
loop_task.cancel()
|
|
with suppress(asyncio.CancelledError):
|
|
await loop_task
|
|
|
|
assert result.output_text == "inner completed"
|
|
assert calls == ["outer", "inner"]
|
|
|
|
asyncio.run(run_case())
|
|
|
|
|
|
def test_web_search_loop_guard_keeps_successful_low_quality_results_available() -> None:
|
|
guard = loop_module._WebSearchLoopGuard()
|
|
low_quality = json.dumps(
|
|
{
|
|
"success": True,
|
|
"query": "weather beijing",
|
|
"quality": "low",
|
|
"results": [{"title": "Example", "url": "https://example.com", "snippet": ""}],
|
|
}
|
|
)
|
|
|
|
assert guard.observe_result("web_search", low_quality) is None
|
|
assert guard.observe_result("web_search", low_quality) is None
|
|
|
|
assert guard.observe_result("web_search", low_quality) is None
|
|
|
|
|
|
def test_web_search_loop_guard_resets_after_useful_result() -> None:
|
|
guard = loop_module._WebSearchLoopGuard()
|
|
failed_search = json.dumps({"success": False, "query": "weather", "error": "timeout"})
|
|
useful = json.dumps({"success": True, "query": "weather", "quality": "high", "results": []})
|
|
|
|
assert guard.observe_result("web_search", failed_search) is None
|
|
assert guard.observe_result("web_search", useful) is None
|
|
assert guard.observe_result("web_search", failed_search) is None
|
|
assert guard.observe_result("web_search", failed_search) is None
|
|
|
|
assert guard.observe_result("web_search", failed_search) is not None
|
|
|
|
|
|
class RecordingProvider(LLMProvider):
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self.tool_names_by_call: list[list[str]] = []
|
|
|
|
async def chat(
|
|
self,
|
|
messages: list[dict],
|
|
tools: list[dict] | None = None,
|
|
model: str | None = None,
|
|
max_tokens: int | None = None,
|
|
temperature: float = 0.7,
|
|
thinking_enabled: bool | None = None,
|
|
) -> LLMResponse:
|
|
self.tool_names_by_call.append(
|
|
[
|
|
str(tool.get("function", {}).get("name") or tool.get("name"))
|
|
for tool in tools or []
|
|
]
|
|
)
|
|
return LLMResponse("done", provider_name="stub", model="stub-model")
|
|
|
|
def get_default_model(self) -> str:
|
|
return "stub-model"
|
|
|
|
|
|
def _bundle(provider: RecordingProvider) -> ProviderBundle:
|
|
return ProviderBundle(
|
|
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
|
main_provider=provider,
|
|
)
|
|
|
|
|
|
def test_task_mode_root_does_not_expose_agent_team_tool(tmp_path) -> None:
|
|
provider = RecordingProvider()
|
|
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
|
|
|
|
asyncio.run(
|
|
loop.process_direct(
|
|
"compare financial reports",
|
|
session_id="session",
|
|
task_id="task-1",
|
|
task_mode=True,
|
|
include_skill_assembly=False,
|
|
provider_bundle=_bundle(provider),
|
|
)
|
|
)
|
|
|
|
assert "run_agent_team" not in provider.tool_names_by_call[0]
|
|
|
|
|
|
def test_default_engine_registry_does_not_register_agent_team_tool(tmp_path) -> None:
|
|
loaded = AgentLoop(loader=EngineLoader(workspace=tmp_path)).boot()
|
|
|
|
assert loaded.tool_registry is not None
|
|
assert loaded.tool_registry.get("run_agent_team") is None
|
|
assert "run_agent_team" not in loaded.tools
|
|
|
|
|
|
def test_non_task_and_team_node_do_not_expose_agent_team_tool(tmp_path) -> None:
|
|
non_task_provider = RecordingProvider()
|
|
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
|
|
asyncio.run(
|
|
loop.process_direct(
|
|
"ordinary chat",
|
|
session_id="session",
|
|
include_skill_assembly=False,
|
|
provider_bundle=_bundle(non_task_provider),
|
|
)
|
|
)
|
|
|
|
team_node_provider = RecordingProvider()
|
|
asyncio.run(
|
|
loop.process_direct(
|
|
"team child",
|
|
session_id="session:team:child",
|
|
parent_session_id="session",
|
|
source="team:child",
|
|
task_id="task-1",
|
|
task_mode=True,
|
|
include_skill_assembly=False,
|
|
provider_bundle=_bundle(team_node_provider),
|
|
)
|
|
)
|
|
|
|
assert "run_agent_team" not in non_task_provider.tool_names_by_call[0]
|
|
assert "run_agent_team" not in team_node_provider.tool_names_by_call[0]
|