from __future__ import annotations import asyncio import json import sys import types from beaver.tools.builtins import web def _disable_ddgs(monkeypatch) -> None: def _raise_unavailable(query: str, limit: int) -> list[dict[str, str]]: raise ModuleNotFoundError("ddgs disabled for fallback test") monkeypatch.setattr(web, "_search_ddgs", _raise_unavailable) class _FakeResponse: headers = {"content-type": "text/html"} status_code = 200 fetch_html = """ Investor Reports 2025 Annual Report Investor Centre """ def __init__(self, url: str = "https://example.com") -> None: self.url = url if "duckduckgo.com" in url: self.text = 'Duck Example' elif "bing.com" in url: self.text = ( '
  • Example

    ' "

    Example result

  • " ) else: self.text = self.fetch_html def raise_for_status(self) -> None: return None class _FakeAsyncClient: calls: list[dict[str, object]] = [] urls: list[str] = [] fail_bing = False def __init__(self, **kwargs: object) -> None: self.calls.append(kwargs) async def __aenter__(self) -> "_FakeAsyncClient": return self async def __aexit__(self, *args: object) -> None: return None async def get(self, *args: object, **kwargs: object) -> _FakeResponse: url = str(args[0]) self.urls.append(url) if self.fail_bing and "bing.com" in url: raise web.httpx.ConnectTimeout("bing unavailable") return _FakeResponse(url) def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None: _FakeAsyncClient.calls = [] _disable_ddgs(monkeypatch) monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) async def _run() -> None: await web.WebFetchTool().execute(url="https://example.com") await web.WebSearchTool().execute(query="example") asyncio.run(_run()) assert [call.get("trust_env") for call in _FakeAsyncClient.calls] == [True, True] def test_web_fetch_uses_short_connect_timeout(monkeypatch) -> None: _FakeAsyncClient.calls = [] _FakeAsyncClient.urls = [] _FakeAsyncClient.fail_bing = False monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) asyncio.run(web.WebFetchTool().execute(url="https://example.com")) timeout = _FakeAsyncClient.calls[0]["timeout"] assert isinstance(timeout, web.httpx.Timeout) assert timeout.connect == 5 assert timeout.read == 12 def test_web_fetch_returns_page_title_and_links(monkeypatch) -> None: _FakeAsyncClient.calls = [] _FakeAsyncClient.urls = [] monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) raw = asyncio.run(web.WebFetchTool().execute(url="https://example.com/investor")) payload = json.loads(raw) assert payload["success"] is True assert payload["title"] == "Investor Reports" assert payload["links"] == [ { "text": "2025 Annual Report", "url": "https://example.com/reports/2025-annual.pdf", }, { "text": "Investor Centre", "url": "https://example.com/investor", }, ] assert payload["pdf_links"] == [ { "text": "2025 Annual Report", "url": "https://example.com/reports/2025-annual.pdf", } ] def test_web_search_uses_reachable_bing_endpoint_first(monkeypatch) -> None: _FakeAsyncClient.calls = [] _FakeAsyncClient.urls = [] _FakeAsyncClient.fail_bing = False _disable_ddgs(monkeypatch) monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing")) payload = json.loads(raw) assert payload["success"] is True assert payload["engine"] in {"bing", "duckduckgo"} assert set(_FakeAsyncClient.urls) == { "https://www.bing.com/search?q=weather+beijing", "https://duckduckgo.com/html/?q=weather+beijing", } timeout = _FakeAsyncClient.calls[0]["timeout"] assert isinstance(timeout, web.httpx.Timeout) assert timeout.connect == 5 assert timeout.read == 8 def test_web_search_prefers_ddgs_provider_when_available(monkeypatch) -> None: class _FakeDDGS: def text(self, query: str, max_results: int) -> list[dict[str, str]]: assert query == "weather beijing" assert max_results == 5 return [ { "title": "Beijing Weather", "href": "https://weather.example.com/beijing", "body": "Current Beijing weather forecast", } ] fake_module = types.SimpleNamespace(DDGS=_FakeDDGS) monkeypatch.setitem(sys.modules, "ddgs", fake_module) _FakeAsyncClient.calls = [] monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing")) payload = json.loads(raw) assert payload["success"] is True assert payload["engine"] == "ddgs" assert payload["quality"] == "high" assert payload["results"] == [ { "title": "Beijing Weather", "url": "https://weather.example.com/beijing", "snippet": "Current Beijing weather forecast", } ] assert _FakeAsyncClient.calls == [] def test_web_search_reports_low_quality_for_irrelevant_results(monkeypatch) -> None: _FakeAsyncClient.calls = [] _FakeAsyncClient.urls = [] _FakeAsyncClient.fail_bing = False _disable_ddgs(monkeypatch) monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing")) payload = json.loads(raw) assert payload["success"] is True assert payload["quality"] == "low" assert payload["low_relevance_reason"] == "results do not overlap enough with query terms" def test_web_search_falls_back_when_bing_is_unavailable(monkeypatch) -> None: _FakeAsyncClient.calls = [] _FakeAsyncClient.urls = [] _FakeAsyncClient.fail_bing = True _disable_ddgs(monkeypatch) monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing")) payload = json.loads(raw) assert payload["success"] is True assert payload["engine"] == "duckduckgo" assert set(_FakeAsyncClient.urls) == { "https://www.bing.com/search?q=weather+beijing", "https://duckduckgo.com/html/?q=weather+beijing", }