- 集成MCP连接管理器,支持MCP服务器连接 - 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、 PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、 TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等 - 实现工具注册和装配功能 - 添加技能选择上下文参数 - 支持思考模式控制参数thinking_enabled feat(coordinator): 重构任务执行计划器参数命名 - 将learning_candidate_enabled重命名为allow_candidate_generation - 更新TeamGraphScheduler中的参数传递 - 修改LocalAgentRunner中的相关参数处理 - 更新README文档中的相应描述 refactor(context): 标准化工具调用参数格式 - 添加_json导入用于参数序列化 - 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷 - 修复工具调用中参数非字符串类型的序列化问题 refactor(session): 优化消息历史记录过滤逻辑 - 修改get_messages_as_conversation为基于运行状态过滤消息 - 排除未完成、失败或错误结束的运行记录 - 改进对话历史的可见性控制机制 fix(store): 修复FTS索引重建逻辑 - 添加异常处理防止FTS索引创建失败 - 实现_rebuild_fts_index方法重新构建全文搜索索引 - 优化索引触发器和表的维护流程
118 lines
4.4 KiB
Python
118 lines
4.4 KiB
Python
"""No-key web search and fetch tools."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from html import unescape
|
|
import json
|
|
import re
|
|
from typing import Any
|
|
from urllib.parse import quote_plus, urlparse
|
|
|
|
import httpx
|
|
|
|
|
|
def _json_result(success: bool, **payload: Any) -> str:
|
|
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
|
|
|
|
|
|
def _strip_html(value: str) -> str:
|
|
text = re.sub(r"(?is)<(script|style).*?>.*?</\1>", " ", value)
|
|
text = re.sub(r"(?s)<[^>]+>", " ", text)
|
|
text = unescape(text)
|
|
return re.sub(r"\s+", " ", text).strip()
|
|
|
|
|
|
def _safe_url(url: str) -> str:
|
|
parsed = urlparse(url)
|
|
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
|
|
raise ValueError("url must be an http(s) URL")
|
|
return url
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class WebFetchTool:
|
|
name: str = "web_fetch"
|
|
description: str = "Fetch a public HTTP(S) page and return readable text. No API key required."
|
|
toolset: str = "web"
|
|
always_available: bool = False
|
|
parameters: dict[str, Any] = field(
|
|
default_factory=lambda: {
|
|
"type": "object",
|
|
"properties": {
|
|
"url": {"type": "string", "description": "HTTP(S) URL to fetch."},
|
|
"max_chars": {"type": "integer", "default": 12000, "minimum": 1000, "maximum": 50000},
|
|
},
|
|
"required": ["url"],
|
|
}
|
|
)
|
|
|
|
async def execute(self, *, url: str, max_chars: int = 12000, **_: Any) -> str:
|
|
try:
|
|
safe_url = _safe_url(url)
|
|
limit = max(1000, min(int(max_chars or 12000), 50000))
|
|
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=False) as client:
|
|
response = await client.get(
|
|
safe_url,
|
|
headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"},
|
|
)
|
|
response.raise_for_status()
|
|
content_type = response.headers.get("content-type", "")
|
|
raw = response.text
|
|
text = _strip_html(raw) if "html" in content_type.lower() else raw
|
|
truncated = len(text) > limit
|
|
return _json_result(
|
|
True,
|
|
url=str(response.url),
|
|
status_code=response.status_code,
|
|
content_type=content_type,
|
|
content=text[:limit],
|
|
truncated=truncated,
|
|
)
|
|
except Exception as exc:
|
|
return _json_result(False, url=url, error=str(exc))
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class WebSearchTool:
|
|
name: str = "web_search"
|
|
description: str = "Search the web using DuckDuckGo HTML results. No API key required."
|
|
toolset: str = "web"
|
|
always_available: bool = False
|
|
parameters: dict[str, Any] = field(
|
|
default_factory=lambda: {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {"type": "string", "description": "Search query."},
|
|
"limit": {"type": "integer", "default": 5, "minimum": 1, "maximum": 10},
|
|
},
|
|
"required": ["query"],
|
|
}
|
|
)
|
|
|
|
async def execute(self, *, query: str, limit: int = 5, **_: Any) -> str:
|
|
try:
|
|
if not str(query).strip():
|
|
raise ValueError("query is required")
|
|
bounded = max(1, min(int(limit or 5), 10))
|
|
url = f"https://duckduckgo.com/html/?q={quote_plus(query)}"
|
|
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=False) as client:
|
|
response = await client.get(url, headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"})
|
|
response.raise_for_status()
|
|
html = response.text
|
|
results: list[dict[str, str]] = []
|
|
pattern = re.compile(
|
|
r'<a[^>]+class="result__a"[^>]+href="(?P<url>[^"]+)"[^>]*>(?P<title>.*?)</a>',
|
|
re.I | re.S,
|
|
)
|
|
for match in pattern.finditer(html):
|
|
title = _strip_html(match.group("title"))
|
|
result_url = unescape(match.group("url"))
|
|
if title and result_url:
|
|
results.append({"title": title, "url": result_url, "snippet": ""})
|
|
if len(results) >= bounded:
|
|
break
|
|
return _json_result(True, query=query, results=results)
|
|
except Exception as exc:
|
|
return _json_result(False, query=query, error=str(exc))
|