feat(engine): 添加MCP连接管理和工具集成功能

- 集成MCP连接管理器,支持MCP服务器连接
- 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、
  PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、
  TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等
- 实现工具注册和装配功能
- 添加技能选择上下文参数
- 支持思考模式控制参数thinking_enabled

feat(coordinator): 重构任务执行计划器参数命名

- 将learning_candidate_enabled重命名为allow_candidate_generation
- 更新TeamGraphScheduler中的参数传递
- 修改LocalAgentRunner中的相关参数处理
- 更新README文档中的相应描述

refactor(context): 标准化工具调用参数格式

- 添加_json导入用于参数序列化
- 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷
- 修复工具调用中参数非字符串类型的序列化问题

refactor(session): 优化消息历史记录过滤逻辑

- 修改get_messages_as_conversation为基于运行状态过滤消息
- 排除未完成、失败或错误结束的运行记录
- 改进对话历史的可见性控制机制

fix(store): 修复FTS索引重建逻辑

- 添加异常处理防止FTS索引创建失败
- 实现_rebuild_fts_index方法重新构建全文搜索索引
- 优化索引触发器和表的维护流程
This commit is contained in:
2026-05-14 09:43:48 +08:00
parent 8a12c30141
commit 30ab74ffb2
149 changed files with 12293 additions and 2812 deletions

View File

@ -1,19 +1,39 @@
"""Built-in Beaver tools."""
from .cron import CronTool
from .echo import EchoTool, echo_tool
from .filesystem import ListDirectoryTool, ReadFileTool, SearchFilesTool
from .filesystem import ListDirectoryTool, PatchFileTool, ReadFileTool, SearchFilesTool, WriteFileTool
from .memory import MemoryTool, memory_tool
from .skills_admin import SkillManageTool, SkillsListTool
from .skill_view import SkillViewTool, skill_view
from .session_search import SessionSearchTool, session_search
from .terminal import ExecuteCodeTool, ProcessTool, TerminalTool
from .utility import ClarifyTool, DelegateTool, SendMessageTool, SpawnTool, TodoTool
from .web import WebFetchTool, WebSearchTool
__all__ = [
"EchoTool",
"ExecuteCodeTool",
"CronTool",
"DelegateTool",
"ListDirectoryTool",
"MemoryTool",
"PatchFileTool",
"ProcessTool",
"ReadFileTool",
"SearchFilesTool",
"SendMessageTool",
"SpawnTool",
"SkillManageTool",
"SkillsListTool",
"SkillViewTool",
"SessionSearchTool",
"TerminalTool",
"TodoTool",
"ClarifyTool",
"WebFetchTool",
"WebSearchTool",
"WriteFileTool",
"echo_tool",
"memory_tool",
"skill_view",

View File

@ -0,0 +1,163 @@
"""Built-in cron tool for managing scheduled Beaver Tasks."""
from __future__ import annotations
import json
from typing import Any
from beaver.services.cron_service import CronService, schedule_from_api
from beaver.tools.base import BaseTool, ToolContext, ToolResult, ToolSpec
CRON_TOOL_DESCRIPTION = (
"Create and manage scheduled Beaver notifications or Tasks. Notification mode "
"sends scheduled results to the fixed notification session; task mode creates "
"a Task run. Actions: add, list, remove, toggle, run."
)
CRON_TOOL_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["add", "list", "remove", "toggle", "run"],
"description": "The scheduled-task operation to perform.",
},
"name": {
"type": "string",
"description": "Short scheduled-task name. Optional for add.",
},
"message": {
"type": "string",
"description": "The task instruction to run when the schedule triggers. Required for add.",
},
"schedule": {
"type": "string",
"description": "Hermes-style schedule, for example 'every 15m', '0 9 * * *', or an ISO datetime.",
},
"every_seconds": {
"type": "integer",
"minimum": 1,
"description": "Fixed interval in seconds for recurring scheduled tasks.",
},
"cron_expr": {
"type": "string",
"description": "Cron expression such as '0 9 * * *'.",
},
"tz": {
"type": "string",
"description": "IANA timezone for cron_expr, for example 'Asia/Shanghai'.",
},
"at_iso": {
"type": "string",
"description": "ISO datetime for one-time scheduled tasks.",
},
"job_id": {
"type": "string",
"description": "Scheduled-task ID for remove, toggle, or run.",
},
"enabled": {
"type": "boolean",
"description": "Whether the scheduled task should be enabled when action is toggle.",
},
"mode": {
"type": "string",
"enum": ["notification", "task"],
"description": "Use notification for reminders/reports; use task only when the scheduled work requires Task tracking.",
},
"requires_followup": {
"type": "boolean",
"description": "Whether a task-mode scheduled run should appear as an active task awaiting user follow-up.",
},
},
"required": ["action"],
}
class CronTool(BaseTool):
"""Tool-facing wrapper around the process CronService."""
@property
def spec(self) -> ToolSpec:
return ToolSpec(
name="cron",
description=CRON_TOOL_DESCRIPTION,
input_schema=CRON_TOOL_PARAMETERS,
toolset="cron",
always_available=False,
)
async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult:
try:
result = await self._invoke(arguments, context)
return ToolResult(
success=bool(result.get("success", True)),
content=json.dumps(result, ensure_ascii=False),
tool_name=self.spec.name,
error=str(result.get("error")) if result.get("error") else None,
raw_output=result,
)
except Exception as exc:
return ToolResult(
success=False,
content=json.dumps({"success": False, "error": str(exc)}, ensure_ascii=False),
tool_name=self.spec.name,
error=str(exc),
)
async def _invoke(self, arguments: dict[str, Any], context: ToolContext) -> dict[str, Any]:
service = self._resolve_cron_service(context)
action = str(arguments.get("action") or "").strip().lower()
if action == "add":
schedule = schedule_from_api(arguments)
job = service.add_job(
name=str(arguments.get("name") or "").strip(),
message=str(arguments.get("message") or "").strip(),
schedule=schedule,
session_key=str(arguments.get("session_key") or context.session_id or "").strip() or None,
payload_kind="agent_turn",
mode=str(arguments.get("mode") or "notification").strip().lower(),
requires_followup=bool(arguments.get("requires_followup", False)),
)
return {"success": True, "job": job.to_api_dict()}
if action == "list":
include_disabled = bool(arguments.get("include_disabled", True))
return {
"success": True,
"jobs": [job.to_api_dict() for job in service.list_jobs(include_disabled=include_disabled)],
}
if action == "remove":
job_id = _required_job_id(arguments)
return {"success": service.remove_job(job_id), "job_id": job_id}
if action == "toggle":
job_id = _required_job_id(arguments)
job = service.update_enabled(job_id, bool(arguments.get("enabled", True)))
if job is None:
return {"success": False, "error": f"Scheduled task {job_id!r} was not found."}
return {"success": True, "job": job.to_api_dict()}
if action == "run":
job_id = _required_job_id(arguments)
ok = await service.run_job(job_id, force=True)
job = service.get_job(job_id)
return {
"success": ok,
"job_id": job_id,
"job": job.to_api_dict() if job is not None else None,
}
return {"success": False, "error": "action must be one of: add, list, remove, toggle, run"}
@staticmethod
def _resolve_cron_service(context: ToolContext) -> CronService:
service = context.get("cron_service")
if isinstance(service, CronService):
return service
if not context.workspace:
raise RuntimeError("Cron service is unavailable for this runtime.")
return CronService(f"{context.workspace}/cron/jobs.json")
def _required_job_id(arguments: dict[str, Any]) -> str:
job_id = str(arguments.get("job_id") or "").strip()
if not job_id:
raise ValueError("job_id is required")
return job_id

View File

@ -116,6 +116,25 @@ SEARCH_FILES_PARAMETERS: dict[str, Any] = {
"required": ["query"],
}
WRITE_FILE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path relative to the current workspace."},
"content": {"type": "string", "description": "Full file content to write."},
},
"required": ["path", "content"],
}
PATCH_FILE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path relative to the current workspace."},
"old_text": {"type": "string", "description": "Exact text to replace."},
"new_text": {"type": "string", "description": "Replacement text."},
},
"required": ["path", "old_text", "new_text"],
}
class WorkspacePathError(ValueError):
"""Raised when a requested path escapes the configured workspace."""
@ -158,6 +177,20 @@ def _resolve_existing_path(workspace: str | None, user_path: str | None) -> tupl
return root, resolved
def _resolve_writable_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
root = _workspace_root(workspace)
if not user_path or not str(user_path).strip():
raise WorkspacePathError("path is required")
raw_path = Path(str(user_path)).expanduser()
candidate = raw_path if raw_path.is_absolute() else root / raw_path
parent = candidate.parent.resolve(strict=True)
try:
parent.relative_to(root)
except ValueError as exc:
raise WorkspacePathError(f"path escapes workspace: {user_path}") from exc
return root, parent / candidate.name
def _relative_path(root: Path, path: Path) -> str:
try:
return str(path.relative_to(root)) or "."
@ -440,3 +473,73 @@ class SearchFilesTool:
)
except (OSError, WorkspacePathError, ValueError) as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class WriteFileTool:
"""Write a UTF-8 text file inside the current workspace."""
name: str = "write_file"
description: str = (
"Write a UTF-8 text file inside the current workspace, replacing the full file. "
"Use patch_file for targeted edits. Paths outside the workspace are rejected."
)
toolset: str = "filesystem"
always_available: bool = False
workspace: str | None = None
parameters: dict[str, Any] = field(default_factory=lambda: dict(WRITE_FILE_PARAMETERS))
async def execute(self, *, path: str, content: str, workspace: str | None = None) -> str:
try:
root, resolved = _resolve_writable_path(workspace, path)
resolved.parent.mkdir(parents=True, exist_ok=True)
resolved.write_text(str(content), encoding="utf-8")
return _json_result(True, path=_relative_path(root, resolved), bytes=len(str(content).encode("utf-8")))
except (OSError, WorkspacePathError, ValueError) as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class PatchFileTool:
"""Replace an exact text fragment inside a workspace file."""
name: str = "patch_file"
description: str = (
"Replace an exact text fragment inside a UTF-8 workspace file. "
"Fails if old_text is missing or ambiguous."
)
toolset: str = "filesystem"
always_available: bool = False
workspace: str | None = None
parameters: dict[str, Any] = field(default_factory=lambda: dict(PATCH_FILE_PARAMETERS))
async def execute(
self,
*,
path: str,
old_text: str,
new_text: str,
workspace: str | None = None,
) -> str:
try:
root, resolved = _resolve_existing_path(workspace, path)
if not resolved.is_file():
return _json_result(False, error="not_a_file", path=path)
content = _read_text_file(resolved)
occurrences = content.count(old_text)
if occurrences == 0:
return _json_result(False, error="old_text_not_found", path=path)
if occurrences > 1:
return _json_result(False, error="old_text_ambiguous", occurrences=occurrences, path=path)
updated = content.replace(old_text, new_text, 1)
resolved.write_text(updated, encoding="utf-8")
return _json_result(
True,
path=_relative_path(root, resolved),
old_bytes=len(old_text.encode("utf-8")),
new_bytes=len(new_text.encode("utf-8")),
)
except UnicodeDecodeError:
return _json_result(False, error="file is not valid UTF-8 text", path=path)
except (OSError, WorkspacePathError, ValueError) as exc:
return _json_result(False, error=str(exc), path=path)

View File

@ -0,0 +1,87 @@
"""Runtime tools for listing and managing skills."""
from __future__ import annotations
from dataclasses import dataclass
import json
from typing import Any
from beaver.tools.base import BaseTool, ToolContext, ToolResult, ToolSpec
def _result(tool_name: str, success: bool, **payload: Any) -> ToolResult:
return ToolResult(
success=success,
tool_name=tool_name,
content=json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2),
error=None if success else str(payload.get("error") or "failed"),
)
@dataclass(slots=True)
class SkillsListTool(BaseTool):
@property
def spec(self) -> ToolSpec:
return ToolSpec(
name="skills_list",
description="List available skills with descriptions.",
input_schema={"type": "object", "properties": {}},
toolset="skills",
)
async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult:
loader = context.get("skills_loader")
if loader is None:
return _result(self.spec.name, False, error="skills_loader is unavailable")
skills = [
{
"name": record.name,
"description": record.description,
"source": record.source,
"version": record.version,
"tool_hints": list(record.tool_hints),
}
for record in loader.list_skills(filter_unavailable=False)
]
return _result(self.spec.name, True, skills=skills)
@dataclass(slots=True)
class SkillManageTool(BaseTool):
@property
def spec(self) -> ToolSpec:
return ToolSpec(
name="skill_manage",
description="Create a new skill draft. Publishing still goes through the normal review/publish APIs.",
input_schema={
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["create_draft"]},
"name": {"type": "string"},
"description": {"type": "string"},
"content": {"type": "string"},
},
"required": ["action", "name", "content"],
},
toolset="skills",
)
async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult:
if arguments.get("action") != "create_draft":
return _result(self.spec.name, False, error="only create_draft is supported")
draft_service = context.get("draft_service")
if draft_service is None:
return _result(self.spec.name, False, error="draft_service is unavailable")
name = str(arguments.get("name") or "").strip()
content = str(arguments.get("content") or "").strip()
if not name or not content:
return _result(self.spec.name, False, error="name and content are required")
draft = draft_service.create_new_skill_draft(
skill_name=name,
proposed_content=content,
proposed_frontmatter={"description": str(arguments.get("description") or name)},
created_by=context.user_id or "agent",
reason="created by skill_manage tool",
trigger_session_id=context.session_id,
)
return _result(self.spec.name, True, draft=draft.to_dict())

View File

@ -0,0 +1,213 @@
"""Local terminal and background process tools."""
from __future__ import annotations
import asyncio
from dataclasses import dataclass, field
import json
from pathlib import Path
import sys
from typing import Any
from uuid import uuid4
def _json_result(success: bool, **payload: Any) -> str:
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
class BackgroundProcessStore:
def __init__(self) -> None:
self._processes: dict[str, asyncio.subprocess.Process] = {}
self._logs: dict[str, bytes] = {}
async def start(self, command: str, cwd: str | None = None) -> str:
process_id = uuid4().hex[:12]
proc = await asyncio.create_subprocess_shell(
command,
cwd=cwd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
)
self._processes[process_id] = proc
self._logs[process_id] = b""
asyncio.create_task(self._drain(process_id, proc))
return process_id
async def _drain(self, process_id: str, proc: asyncio.subprocess.Process) -> None:
if proc.stdout is None:
return
while True:
chunk = await proc.stdout.read(4096)
if not chunk:
break
self._logs[process_id] = (self._logs.get(process_id, b"") + chunk)[-200_000:]
def list(self) -> list[dict[str, Any]]:
rows = []
for process_id, proc in self._processes.items():
rows.append({"process_id": process_id, "returncode": proc.returncode, "running": proc.returncode is None})
return rows
def log(self, process_id: str, limit: int = 12000) -> str:
return self._logs.get(process_id, b"")[-limit:].decode("utf-8", errors="replace")
async def kill(self, process_id: str) -> bool:
proc = self._processes.get(process_id)
if proc is None:
return False
if proc.returncode is None:
proc.terminate()
try:
await asyncio.wait_for(proc.wait(), timeout=5)
except asyncio.TimeoutError:
proc.kill()
await proc.wait()
return True
GLOBAL_PROCESS_STORE = BackgroundProcessStore()
def _workspace_cwd(workspace: str | None, working_dir: str | None) -> str | None:
if not workspace:
return None
root = Path(workspace).expanduser().resolve()
raw = Path(working_dir or ".").expanduser()
candidate = raw if raw.is_absolute() else root / raw
resolved = candidate.resolve()
resolved.relative_to(root)
return str(resolved)
@dataclass(slots=True)
class TerminalTool:
name: str = "terminal"
description: str = "Execute a shell command. Set background=true for long-running commands."
toolset: str = "terminal"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"command": {"type": "string"},
"working_dir": {"type": "string", "default": "."},
"timeout": {"type": "integer", "default": 60, "minimum": 1, "maximum": 600},
"background": {"type": "boolean", "default": False},
},
"required": ["command"],
}
)
async def execute(
self,
*,
command: str,
working_dir: str | None = None,
timeout: int = 60,
background: bool = False,
workspace: str | None = None,
) -> str:
try:
if not command.strip():
raise ValueError("command is required")
cwd = _workspace_cwd(workspace, working_dir)
if background:
process_id = await GLOBAL_PROCESS_STORE.start(command, cwd=cwd)
return _json_result(True, process_id=process_id, background=True)
proc = await asyncio.create_subprocess_shell(
command,
cwd=cwd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
)
output, _ = await asyncio.wait_for(proc.communicate(), timeout=max(1, min(int(timeout or 60), 600)))
text = output.decode("utf-8", errors="replace")
return _json_result(True, returncode=proc.returncode, output=text[-50000:])
except Exception as exc:
return _json_result(False, error=str(exc))
@dataclass(slots=True)
class ProcessTool:
name: str = "process"
description: str = "Manage background processes started with terminal(background=true)."
toolset: str = "terminal"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["list", "log", "kill"]},
"process_id": {"type": "string"},
},
"required": ["action"],
}
)
async def execute(self, *, action: str, process_id: str | None = None, **_: Any) -> str:
if action == "list":
return _json_result(True, processes=GLOBAL_PROCESS_STORE.list())
if action == "log":
if not process_id:
return _json_result(False, error="process_id is required")
return _json_result(True, process_id=process_id, output=GLOBAL_PROCESS_STORE.log(process_id))
if action == "kill":
if not process_id:
return _json_result(False, error="process_id is required")
return _json_result(await GLOBAL_PROCESS_STORE.kill(process_id), process_id=process_id)
return _json_result(False, error=f"unknown action: {action}")
@dataclass(slots=True)
class ExecuteCodeTool:
name: str = "execute_code"
description: str = "Execute small Python snippets locally without external APIs."
toolset: str = "terminal"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"language": {"type": "string", "enum": ["python"], "default": "python"},
"code": {"type": "string"},
"timeout": {"type": "integer", "default": 30, "minimum": 1, "maximum": 120},
"working_dir": {"type": "string", "default": "."},
},
"required": ["code"],
}
)
async def execute(
self,
*,
code: str,
language: str = "python",
timeout: int = 30,
working_dir: str | None = None,
workspace: str | None = None,
) -> str:
try:
if language != "python":
raise ValueError("Only python is supported")
cwd = _workspace_cwd(workspace, working_dir)
proc = await asyncio.create_subprocess_exec(
sys.executable,
"-I",
"-",
cwd=cwd,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
)
output, _ = await asyncio.wait_for(
proc.communicate(code.encode("utf-8")),
timeout=max(1, min(int(timeout or 30), 120)),
)
return _json_result(
True,
language="python",
returncode=proc.returncode,
output=output.decode("utf-8", errors="replace")[-50000:],
)
except Exception as exc:
return _json_result(False, error=str(exc))

View File

@ -0,0 +1,137 @@
"""Small local utility tools."""
from __future__ import annotations
from dataclasses import dataclass, field
import json
from typing import Any
def _json_result(success: bool, **payload: Any) -> str:
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
@dataclass(slots=True)
class TodoTool:
name: str = "todo"
description: str = "Manage a lightweight task list for the current session."
toolset: str = "planning"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"todos": {"type": "array", "items": {"type": "object"}},
"merge": {"type": "boolean", "default": False},
},
}
)
async def execute(self, *, todos: list[dict[str, Any]] | None = None, merge: bool = False, **kwargs: Any) -> str:
metadata = kwargs.get("metadata") if isinstance(kwargs.get("metadata"), dict) else {}
current = list(metadata.get("todos") or [])
if todos is None:
return _json_result(True, todos=current)
next_todos = [dict(item) for item in todos if isinstance(item, dict)]
metadata["todos"] = [*current, *next_todos] if merge else next_todos
return _json_result(True, todos=metadata["todos"])
@dataclass(slots=True)
class ClarifyTool:
name: str = "clarify"
description: str = "Ask the user for clarification by returning a structured question."
toolset: str = "planning"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"question": {"type": "string"},
"choices": {"type": "array", "items": {"type": "string"}},
},
"required": ["question"],
}
)
async def execute(self, *, question: str, choices: list[str] | None = None, **_: Any) -> str:
return _json_result(True, question=question, choices=[str(item) for item in (choices or [])])
@dataclass(slots=True)
class SendMessageTool:
name: str = "send_message"
description: str = "Return a message payload for an external channel. Actual delivery is handled by configured services."
toolset: str = "messaging"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"target": {"type": "string"},
"message": {"type": "string"},
},
"required": ["target", "message"],
}
)
async def execute(self, *, target: str, message: str, **_: Any) -> str:
return _json_result(True, target=target, message=message, delivered=False)
@dataclass(slots=True)
class DelegateTool:
name: str = "delegate"
description: str = "Create a structured delegation request for a sub-agent or teammate."
toolset: str = "coordination"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"task": {"type": "string"},
"agent": {"type": "string"},
"context": {"type": "object"},
},
"required": ["task"],
}
)
async def execute(self, *, task: str, agent: str | None = None, context: dict[str, Any] | None = None, **_: Any) -> str:
return _json_result(
True,
task=task,
agent=agent or "default",
context=dict(context or {}),
queued=False,
note="Delegation request recorded; runtime execution is handled by configured agent services.",
)
@dataclass(slots=True)
class SpawnTool:
name: str = "spawn"
description: str = "Create a structured request to spawn a bounded subtask."
toolset: str = "coordination"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"task": {"type": "string"},
"role": {"type": "string", "default": "worker"},
"write_scope": {"type": "array", "items": {"type": "string"}},
},
"required": ["task"],
}
)
async def execute(self, *, task: str, role: str = "worker", write_scope: list[str] | None = None, **_: Any) -> str:
return _json_result(
True,
task=task,
role=role,
write_scope=[str(item) for item in (write_scope or [])],
queued=False,
note="Spawn request recorded; runtime execution is handled by configured agent services.",
)

View File

@ -0,0 +1,117 @@
"""No-key web search and fetch tools."""
from __future__ import annotations
from dataclasses import dataclass, field
from html import unescape
import json
import re
from typing import Any
from urllib.parse import quote_plus, urlparse
import httpx
def _json_result(success: bool, **payload: Any) -> str:
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
def _strip_html(value: str) -> str:
text = re.sub(r"(?is)<(script|style).*?>.*?</\1>", " ", value)
text = re.sub(r"(?s)<[^>]+>", " ", text)
text = unescape(text)
return re.sub(r"\s+", " ", text).strip()
def _safe_url(url: str) -> str:
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
raise ValueError("url must be an http(s) URL")
return url
@dataclass(slots=True)
class WebFetchTool:
name: str = "web_fetch"
description: str = "Fetch a public HTTP(S) page and return readable text. No API key required."
toolset: str = "web"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"url": {"type": "string", "description": "HTTP(S) URL to fetch."},
"max_chars": {"type": "integer", "default": 12000, "minimum": 1000, "maximum": 50000},
},
"required": ["url"],
}
)
async def execute(self, *, url: str, max_chars: int = 12000, **_: Any) -> str:
try:
safe_url = _safe_url(url)
limit = max(1000, min(int(max_chars or 12000), 50000))
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=False) as client:
response = await client.get(
safe_url,
headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"},
)
response.raise_for_status()
content_type = response.headers.get("content-type", "")
raw = response.text
text = _strip_html(raw) if "html" in content_type.lower() else raw
truncated = len(text) > limit
return _json_result(
True,
url=str(response.url),
status_code=response.status_code,
content_type=content_type,
content=text[:limit],
truncated=truncated,
)
except Exception as exc:
return _json_result(False, url=url, error=str(exc))
@dataclass(slots=True)
class WebSearchTool:
name: str = "web_search"
description: str = "Search the web using DuckDuckGo HTML results. No API key required."
toolset: str = "web"
always_available: bool = False
parameters: dict[str, Any] = field(
default_factory=lambda: {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query."},
"limit": {"type": "integer", "default": 5, "minimum": 1, "maximum": 10},
},
"required": ["query"],
}
)
async def execute(self, *, query: str, limit: int = 5, **_: Any) -> str:
try:
if not str(query).strip():
raise ValueError("query is required")
bounded = max(1, min(int(limit or 5), 10))
url = f"https://duckduckgo.com/html/?q={quote_plus(query)}"
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=False) as client:
response = await client.get(url, headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"})
response.raise_for_status()
html = response.text
results: list[dict[str, str]] = []
pattern = re.compile(
r'<a[^>]+class="result__a"[^>]+href="(?P<url>[^"]+)"[^>]*>(?P<title>.*?)</a>',
re.I | re.S,
)
for match in pattern.finditer(html):
title = _strip_html(match.group("title"))
result_url = unescape(match.group("url"))
if title and result_url:
results.append({"title": title, "url": result_url, "snippet": ""})
if len(results) >= bounded:
break
return _json_result(True, query=query, results=results)
except Exception as exc:
return _json_result(False, query=query, error=str(exc))