213 lines
6.8 KiB
Python
213 lines
6.8 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
import types
|
|
|
|
from beaver.tools.builtins import web
|
|
|
|
|
|
def _disable_ddgs(monkeypatch) -> None:
|
|
def _raise_unavailable(query: str, limit: int) -> list[dict[str, str]]:
|
|
raise ModuleNotFoundError("ddgs disabled for fallback test")
|
|
|
|
monkeypatch.setattr(web, "_search_ddgs", _raise_unavailable)
|
|
|
|
|
|
class _FakeResponse:
|
|
headers = {"content-type": "text/html"}
|
|
status_code = 200
|
|
fetch_html = """
|
|
<html>
|
|
<head><title>Investor Reports</title></head>
|
|
<body>
|
|
<a href="/reports/2025-annual.pdf">2025 Annual Report</a>
|
|
<a href="https://example.com/investor">Investor Centre</a>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
def __init__(self, url: str = "https://example.com") -> None:
|
|
self.url = url
|
|
if "duckduckgo.com" in url:
|
|
self.text = '<a class="result__a" href="https://duck.example.com">Duck Example</a>'
|
|
elif "bing.com" in url:
|
|
self.text = (
|
|
'<li class="b_algo"><h2><a href="https://example.com">Example</a></h2>'
|
|
"<p>Example result</p></li>"
|
|
)
|
|
else:
|
|
self.text = self.fetch_html
|
|
|
|
def raise_for_status(self) -> None:
|
|
return None
|
|
|
|
|
|
class _FakeAsyncClient:
|
|
calls: list[dict[str, object]] = []
|
|
urls: list[str] = []
|
|
fail_bing = False
|
|
|
|
def __init__(self, **kwargs: object) -> None:
|
|
self.calls.append(kwargs)
|
|
|
|
async def __aenter__(self) -> "_FakeAsyncClient":
|
|
return self
|
|
|
|
async def __aexit__(self, *args: object) -> None:
|
|
return None
|
|
|
|
async def get(self, *args: object, **kwargs: object) -> _FakeResponse:
|
|
url = str(args[0])
|
|
self.urls.append(url)
|
|
if self.fail_bing and "bing.com" in url:
|
|
raise web.httpx.ConnectTimeout("bing unavailable")
|
|
return _FakeResponse(url)
|
|
|
|
|
|
def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None:
|
|
_FakeAsyncClient.calls = []
|
|
_disable_ddgs(monkeypatch)
|
|
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
|
|
|
async def _run() -> None:
|
|
await web.WebFetchTool().execute(url="https://example.com")
|
|
await web.WebSearchTool().execute(query="example")
|
|
|
|
asyncio.run(_run())
|
|
|
|
assert [call.get("trust_env") for call in _FakeAsyncClient.calls] == [True, True]
|
|
|
|
|
|
def test_web_fetch_uses_short_connect_timeout(monkeypatch) -> None:
|
|
_FakeAsyncClient.calls = []
|
|
_FakeAsyncClient.urls = []
|
|
_FakeAsyncClient.fail_bing = False
|
|
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
|
|
|
asyncio.run(web.WebFetchTool().execute(url="https://example.com"))
|
|
|
|
timeout = _FakeAsyncClient.calls[0]["timeout"]
|
|
assert isinstance(timeout, web.httpx.Timeout)
|
|
assert timeout.connect == 5
|
|
assert timeout.read == 12
|
|
|
|
|
|
def test_web_fetch_returns_page_title_and_links(monkeypatch) -> None:
|
|
_FakeAsyncClient.calls = []
|
|
_FakeAsyncClient.urls = []
|
|
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
|
|
|
raw = asyncio.run(web.WebFetchTool().execute(url="https://example.com/investor"))
|
|
|
|
payload = json.loads(raw)
|
|
assert payload["success"] is True
|
|
assert payload["title"] == "Investor Reports"
|
|
assert payload["links"] == [
|
|
{
|
|
"text": "2025 Annual Report",
|
|
"url": "https://example.com/reports/2025-annual.pdf",
|
|
},
|
|
{
|
|
"text": "Investor Centre",
|
|
"url": "https://example.com/investor",
|
|
},
|
|
]
|
|
assert payload["pdf_links"] == [
|
|
{
|
|
"text": "2025 Annual Report",
|
|
"url": "https://example.com/reports/2025-annual.pdf",
|
|
}
|
|
]
|
|
|
|
|
|
def test_web_search_uses_reachable_bing_endpoint_first(monkeypatch) -> None:
|
|
_FakeAsyncClient.calls = []
|
|
_FakeAsyncClient.urls = []
|
|
_FakeAsyncClient.fail_bing = False
|
|
_disable_ddgs(monkeypatch)
|
|
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
|
|
|
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
|
|
|
|
payload = json.loads(raw)
|
|
assert payload["success"] is True
|
|
assert payload["engine"] in {"bing", "duckduckgo"}
|
|
assert set(_FakeAsyncClient.urls) == {
|
|
"https://www.bing.com/search?q=weather+beijing",
|
|
"https://duckduckgo.com/html/?q=weather+beijing",
|
|
}
|
|
|
|
timeout = _FakeAsyncClient.calls[0]["timeout"]
|
|
assert isinstance(timeout, web.httpx.Timeout)
|
|
assert timeout.connect == 5
|
|
assert timeout.read == 8
|
|
|
|
|
|
def test_web_search_prefers_ddgs_provider_when_available(monkeypatch) -> None:
|
|
class _FakeDDGS:
|
|
def text(self, query: str, max_results: int) -> list[dict[str, str]]:
|
|
assert query == "weather beijing"
|
|
assert max_results == 5
|
|
return [
|
|
{
|
|
"title": "Beijing Weather",
|
|
"href": "https://weather.example.com/beijing",
|
|
"body": "Current Beijing weather forecast",
|
|
}
|
|
]
|
|
|
|
fake_module = types.SimpleNamespace(DDGS=_FakeDDGS)
|
|
monkeypatch.setitem(sys.modules, "ddgs", fake_module)
|
|
_FakeAsyncClient.calls = []
|
|
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
|
|
|
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
|
|
|
|
payload = json.loads(raw)
|
|
assert payload["success"] is True
|
|
assert payload["engine"] == "ddgs"
|
|
assert payload["quality"] == "high"
|
|
assert payload["results"] == [
|
|
{
|
|
"title": "Beijing Weather",
|
|
"url": "https://weather.example.com/beijing",
|
|
"snippet": "Current Beijing weather forecast",
|
|
}
|
|
]
|
|
assert _FakeAsyncClient.calls == []
|
|
|
|
|
|
def test_web_search_reports_low_quality_for_irrelevant_results(monkeypatch) -> None:
|
|
_FakeAsyncClient.calls = []
|
|
_FakeAsyncClient.urls = []
|
|
_FakeAsyncClient.fail_bing = False
|
|
_disable_ddgs(monkeypatch)
|
|
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
|
|
|
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
|
|
|
|
payload = json.loads(raw)
|
|
assert payload["success"] is True
|
|
assert payload["quality"] == "low"
|
|
assert payload["low_relevance_reason"] == "results do not overlap enough with query terms"
|
|
|
|
|
|
def test_web_search_falls_back_when_bing_is_unavailable(monkeypatch) -> None:
|
|
_FakeAsyncClient.calls = []
|
|
_FakeAsyncClient.urls = []
|
|
_FakeAsyncClient.fail_bing = True
|
|
_disable_ddgs(monkeypatch)
|
|
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
|
|
|
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
|
|
|
|
payload = json.loads(raw)
|
|
assert payload["success"] is True
|
|
assert payload["engine"] == "duckduckgo"
|
|
assert set(_FakeAsyncClient.urls) == {
|
|
"https://www.bing.com/search?q=weather+beijing",
|
|
"https://duckduckgo.com/html/?q=weather+beijing",
|
|
}
|