Files
beaver_project/app-instance/backend/tests/unit/test_main_agent_router.py
steven_li 6e9e74d1ee feat(engine): 添加运行时上下文支持并重构工具迭代限制
添加 RuntimeContext 类用于捕获模型运行时的日期时间信息,
包括UTC时间、本地时间和时区信息,并在系统提示中显示这些信息。

同时增加最大上下文消息数和工具迭代次数的配置选项,
将验证服务从引擎加载器中移除,并更新相关的数据结构和接口。

BREAKING CHANGE: 移除了验证服务,相关字段被替换为证据状态和接受状态。

- 添加 RuntimeContext 类和相关渲染方法
- 增加 max_context_messages 和 max_tool_iterations 配置
- 移除 ValidationService 相关代码
- 更新消息记录中的验证状态字段
- 添加原始工具调用检测和回退处理
2026-05-26 11:18:35 +08:00

218 lines
6.5 KiB
Python

from __future__ import annotations
import asyncio
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.tasks import MainAgentRouter, TaskRecord
class RouterProvider(LLMProvider):
def __init__(self, response: str | Exception) -> None:
super().__init__()
self.response = response
self.calls: list[dict] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.calls.append(
{
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"model": model,
"thinking_enabled": thinking_enabled,
}
)
if isinstance(self.response, Exception):
raise self.response
return LLMResponse(content=self.response, finish_reason="stop", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
class SequenceRouterProvider(LLMProvider):
def __init__(self, responses: list[str | Exception]) -> None:
super().__init__()
self.responses = list(responses)
self.calls: list[dict] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.calls.append(
{
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"model": model,
"thinking_enabled": thinking_enabled,
}
)
response = self.responses.pop(0)
if isinstance(response, Exception):
raise response
return LLMResponse(content=response, finish_reason="stop", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
def _task() -> TaskRecord:
return TaskRecord(
task_id="task-1",
session_id="web:task",
description="实现任务连续性",
goal="实现任务连续性",
constraints=[],
priority=0,
status="awaiting_acceptance",
creator="test",
created_at="now",
updated_at="now",
metadata={"short_title": "任务连续性"},
)
def test_router_continues_active_task_from_llm_decision() -> None:
provider = RouterProvider('{"action":"continue_task","reason":"related","short_title":"任务连续性"}')
decision = asyncio.run(
MainAgentRouter().classify(
"再把输入框标识也补上",
active_task=_task(),
provider=provider,
)
)
assert decision.is_task
assert decision.starts_new_task is False
assert decision.short_title == "任务连续性"
assert provider.calls[0]["max_tokens"] == 256
def test_router_marks_revision_from_llm_decision() -> None:
decision = asyncio.run(
MainAgentRouter().classify(
"再详细一点,并加上表格",
active_task=_task(),
provider=RouterProvider('{"action":"revise_task","reason":"user requested changes","short_title":"任务连续性"}'),
)
)
assert decision.is_task
assert decision.starts_new_task is False
assert decision.action == "revise_task"
def test_router_receives_thinking_mode() -> None:
provider = RouterProvider('{"action":"simple_chat","reason":"simple"}')
decision = asyncio.run(
MainAgentRouter().classify(
"你好",
provider=provider,
thinking_enabled=False,
)
)
assert not decision.is_task
assert provider.calls[0]["thinking_enabled"] is False
def test_router_injects_intent_skill_guidance() -> None:
provider = RouterProvider('{"action":"new_task","reason":"needs weather tool","short_title":"珠海天气"}')
decision = asyncio.run(
MainAgentRouter().classify(
"帮我查一下今天珠海天气",
provider=provider,
intent_skill="Weather and current external data must be routed to new_task.",
)
)
assert decision.is_task
assert decision.starts_new_task is True
assert decision.action == "create_task"
prompt = provider.calls[0]["messages"][1]["content"]
assert "Intent Agent skill guidance" in prompt
assert "Weather and current external data" in prompt
def test_router_closes_active_task_from_llm_decision() -> None:
decision = asyncio.run(
MainAgentRouter().classify(
"这个任务结束了",
active_task=_task(),
provider=RouterProvider('{"action":"close_task","reason":"user said done"}'),
)
)
assert not decision.is_task
assert decision.closes_task is True
def test_router_fallback_keeps_active_task_but_not_new_task() -> None:
active = asyncio.run(
MainAgentRouter().classify(
"继续",
active_task=_task(),
provider=RouterProvider(RuntimeError("provider down")),
)
)
inactive = asyncio.run(
MainAgentRouter().classify(
"implement something",
active_task=None,
provider=RouterProvider(RuntimeError("provider down")),
)
)
assert active.is_task
assert not inactive.is_task
def test_router_retries_once_after_provider_failure() -> None:
provider = SequenceRouterProvider(
[
TimeoutError(),
'{"action":"new_task","reason":"needs search","short_title":"中美会面"}',
]
)
decision = asyncio.run(
MainAgentRouter().classify(
"帮我看看昨天的中美会面都谈了什么?",
provider=provider,
)
)
assert decision.is_task
assert decision.action == "create_task"
assert len(provider.calls) == 2
def test_router_fallback_after_two_provider_failures() -> None:
provider = SequenceRouterProvider([TimeoutError(), RuntimeError("provider down")])
decision = asyncio.run(
MainAgentRouter().classify(
"帮我看看昨天的中美会面都谈了什么?",
provider=provider,
)
)
assert not decision.is_task
assert decision.reason == "router_failed: provider down"
assert len(provider.calls) == 2