from __future__ import annotations
import asyncio
import json
import sys
import types
from beaver.tools.builtins import web
def _disable_ddgs(monkeypatch) -> None:
def _raise_unavailable(query: str, limit: int) -> list[dict[str, str]]:
raise ModuleNotFoundError("ddgs disabled for fallback test")
monkeypatch.setattr(web, "_search_ddgs", _raise_unavailable)
class _FakeResponse:
headers = {"content-type": "text/html"}
status_code = 200
fetch_html = """
Investor Reports
2025 Annual Report
Investor Centre
"""
def __init__(self, url: str = "https://example.com") -> None:
self.url = url
if "duckduckgo.com" in url:
self.text = 'Duck Example'
elif "bing.com" in url:
self.text = (
''
"Example result
"
)
else:
self.text = self.fetch_html
def raise_for_status(self) -> None:
return None
class _FakeAsyncClient:
calls: list[dict[str, object]] = []
urls: list[str] = []
fail_bing = False
def __init__(self, **kwargs: object) -> None:
self.calls.append(kwargs)
async def __aenter__(self) -> "_FakeAsyncClient":
return self
async def __aexit__(self, *args: object) -> None:
return None
async def get(self, *args: object, **kwargs: object) -> _FakeResponse:
url = str(args[0])
self.urls.append(url)
if self.fail_bing and "bing.com" in url:
raise web.httpx.ConnectTimeout("bing unavailable")
return _FakeResponse(url)
def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None:
_FakeAsyncClient.calls = []
_disable_ddgs(monkeypatch)
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
async def _run() -> None:
await web.WebFetchTool().execute(url="https://example.com")
await web.WebSearchTool().execute(query="example")
asyncio.run(_run())
assert [call.get("trust_env") for call in _FakeAsyncClient.calls] == [True, True]
def test_web_fetch_uses_short_connect_timeout(monkeypatch) -> None:
_FakeAsyncClient.calls = []
_FakeAsyncClient.urls = []
_FakeAsyncClient.fail_bing = False
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
asyncio.run(web.WebFetchTool().execute(url="https://example.com"))
timeout = _FakeAsyncClient.calls[0]["timeout"]
assert isinstance(timeout, web.httpx.Timeout)
assert timeout.connect == 5
assert timeout.read == 12
def test_web_fetch_returns_page_title_and_links(monkeypatch) -> None:
_FakeAsyncClient.calls = []
_FakeAsyncClient.urls = []
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
raw = asyncio.run(web.WebFetchTool().execute(url="https://example.com/investor"))
payload = json.loads(raw)
assert payload["success"] is True
assert payload["title"] == "Investor Reports"
assert payload["links"] == [
{
"text": "2025 Annual Report",
"url": "https://example.com/reports/2025-annual.pdf",
},
{
"text": "Investor Centre",
"url": "https://example.com/investor",
},
]
assert payload["pdf_links"] == [
{
"text": "2025 Annual Report",
"url": "https://example.com/reports/2025-annual.pdf",
}
]
def test_web_search_uses_reachable_bing_endpoint_first(monkeypatch) -> None:
_FakeAsyncClient.calls = []
_FakeAsyncClient.urls = []
_FakeAsyncClient.fail_bing = False
_disable_ddgs(monkeypatch)
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
payload = json.loads(raw)
assert payload["success"] is True
assert payload["engine"] in {"bing", "duckduckgo"}
assert set(_FakeAsyncClient.urls) == {
"https://www.bing.com/search?q=weather+beijing",
"https://duckduckgo.com/html/?q=weather+beijing",
}
timeout = _FakeAsyncClient.calls[0]["timeout"]
assert isinstance(timeout, web.httpx.Timeout)
assert timeout.connect == 5
assert timeout.read == 8
def test_web_search_prefers_ddgs_provider_when_available(monkeypatch) -> None:
class _FakeDDGS:
def text(self, query: str, max_results: int) -> list[dict[str, str]]:
assert query == "weather beijing"
assert max_results == 5
return [
{
"title": "Beijing Weather",
"href": "https://weather.example.com/beijing",
"body": "Current Beijing weather forecast",
}
]
fake_module = types.SimpleNamespace(DDGS=_FakeDDGS)
monkeypatch.setitem(sys.modules, "ddgs", fake_module)
_FakeAsyncClient.calls = []
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
payload = json.loads(raw)
assert payload["success"] is True
assert payload["engine"] == "ddgs"
assert payload["quality"] == "high"
assert payload["results"] == [
{
"title": "Beijing Weather",
"url": "https://weather.example.com/beijing",
"snippet": "Current Beijing weather forecast",
}
]
assert _FakeAsyncClient.calls == []
def test_web_search_reports_low_quality_for_irrelevant_results(monkeypatch) -> None:
_FakeAsyncClient.calls = []
_FakeAsyncClient.urls = []
_FakeAsyncClient.fail_bing = False
_disable_ddgs(monkeypatch)
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
payload = json.loads(raw)
assert payload["success"] is True
assert payload["quality"] == "low"
assert payload["low_relevance_reason"] == "results do not overlap enough with query terms"
def test_web_search_falls_back_when_bing_is_unavailable(monkeypatch) -> None:
_FakeAsyncClient.calls = []
_FakeAsyncClient.urls = []
_FakeAsyncClient.fail_bing = True
_disable_ddgs(monkeypatch)
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
payload = json.loads(raw)
assert payload["success"] is True
assert payload["engine"] == "duckduckgo"
assert set(_FakeAsyncClient.urls) == {
"https://www.bing.com/search?q=weather+beijing",
"https://duckduckgo.com/html/?q=weather+beijing",
}