84 lines
3.0 KiB
Python
84 lines
3.0 KiB
Python
import asyncio
|
|
import json
|
|
from contextlib import suppress
|
|
from typing import Any
|
|
|
|
from beaver.engine import AgentLoop, AgentRunResult, EngineLoader
|
|
from beaver.engine import loop as loop_module
|
|
|
|
|
|
def _run_result(run_id: str, output_text: str) -> AgentRunResult:
|
|
return AgentRunResult(
|
|
session_id="web:test",
|
|
run_id=run_id,
|
|
output_text=output_text,
|
|
finish_reason="stop",
|
|
tool_iterations=0,
|
|
)
|
|
|
|
|
|
def test_running_loop_handles_reentrant_submit_direct(tmp_path) -> None:
|
|
async def run_case() -> None:
|
|
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
|
|
calls: list[str] = []
|
|
|
|
async def fake_process_direct(task: str, **kwargs: Any) -> AgentRunResult:
|
|
calls.append(task)
|
|
if task == "outer":
|
|
return await loop.submit_direct("inner", session_id="web:test")
|
|
return _run_result(task, "inner completed")
|
|
|
|
loop._process_direct_impl = fake_process_direct # type: ignore[method-assign]
|
|
|
|
loop_task = asyncio.create_task(loop.run())
|
|
await asyncio.sleep(0)
|
|
try:
|
|
result = await asyncio.wait_for(loop.submit_direct("outer", session_id="web:test"), timeout=1)
|
|
finally:
|
|
await loop.stop()
|
|
with suppress(asyncio.TimeoutError):
|
|
await asyncio.wait_for(loop_task, timeout=1)
|
|
if not loop_task.done():
|
|
loop_task.cancel()
|
|
with suppress(asyncio.CancelledError):
|
|
await loop_task
|
|
|
|
assert result.output_text == "inner completed"
|
|
assert calls == ["outer", "inner"]
|
|
|
|
asyncio.run(run_case())
|
|
|
|
|
|
def test_web_search_loop_guard_stops_after_repeated_low_quality_results() -> None:
|
|
guard = loop_module._WebSearchLoopGuard()
|
|
low_quality = json.dumps(
|
|
{
|
|
"success": True,
|
|
"query": "weather beijing",
|
|
"quality": "low",
|
|
"results": [{"title": "Example", "url": "https://example.com", "snippet": ""}],
|
|
}
|
|
)
|
|
|
|
assert guard.observe_result("web_search", low_quality) is None
|
|
assert guard.observe_result("web_search", low_quality) is None
|
|
|
|
guidance = guard.observe_result("web_search", low_quality)
|
|
|
|
assert guidance is not None
|
|
assert guidance["finish_reason"] == "web_search_low_quality_budget"
|
|
assert "weather beijing" in guidance["message"]
|
|
|
|
|
|
def test_web_search_loop_guard_resets_after_useful_result() -> None:
|
|
guard = loop_module._WebSearchLoopGuard()
|
|
low_quality = json.dumps({"success": True, "query": "weather", "quality": "low", "results": []})
|
|
useful = json.dumps({"success": True, "query": "weather", "quality": "high", "results": []})
|
|
|
|
assert guard.observe_result("web_search", low_quality) is None
|
|
assert guard.observe_result("web_search", useful) is None
|
|
assert guard.observe_result("web_search", low_quality) is None
|
|
assert guard.observe_result("web_search", low_quality) is None
|
|
|
|
assert guard.observe_result("web_search", low_quality) is not None
|