Files
beaver_project/app-instance/backend/beaver/tools/builtins/filesystem.py
steven_li 30ab74ffb2 feat(engine): 添加MCP连接管理和工具集成功能
- 集成MCP连接管理器,支持MCP服务器连接
- 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、
  PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、
  TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等
- 实现工具注册和装配功能
- 添加技能选择上下文参数
- 支持思考模式控制参数thinking_enabled

feat(coordinator): 重构任务执行计划器参数命名

- 将learning_candidate_enabled重命名为allow_candidate_generation
- 更新TeamGraphScheduler中的参数传递
- 修改LocalAgentRunner中的相关参数处理
- 更新README文档中的相应描述

refactor(context): 标准化工具调用参数格式

- 添加_json导入用于参数序列化
- 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷
- 修复工具调用中参数非字符串类型的序列化问题

refactor(session): 优化消息历史记录过滤逻辑

- 修改get_messages_as_conversation为基于运行状态过滤消息
- 排除未完成、失败或错误结束的运行记录
- 改进对话历史的可见性控制机制

fix(store): 修复FTS索引重建逻辑

- 添加异常处理防止FTS索引创建失败
- 实现_rebuild_fts_index方法重新构建全文搜索索引
- 优化索引触发器和表的维护流程
2026-05-14 09:43:48 +08:00

546 lines
19 KiB
Python

"""Workspace-scoped read-only filesystem tools.
这些工具是 Beaver 第一批真实本地工具,只做只读能力:
- list_directory
- read_file
- search_files
安全边界先保持非常明确:所有用户传入路径都必须解析到当前
`ToolContext.workspace` 内部。即使 workspace 里有指向外部的符号链接,
读取时也会因为真实路径越界而被拒绝。
"""
from __future__ import annotations
from dataclasses import dataclass, field
import json
from pathlib import Path
from typing import Any, Iterable
MAX_LIST_ENTRIES = 1_000
MAX_READ_LINES = 1_000
MAX_READ_CHARS = 120_000
MAX_SEARCH_RESULTS = 200
MAX_SEARCH_FILE_BYTES = 2_000_000
MAX_SEARCH_FILES = 5_000
SKIP_DIR_NAMES = {
".git",
".hg",
".svn",
".venv",
"venv",
"__pycache__",
".pytest_cache",
".mypy_cache",
".ruff_cache",
"node_modules",
"dist",
"build",
}
LIST_DIRECTORY_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {
"type": "string",
"default": ".",
"description": "Directory path relative to the current workspace. Absolute paths are allowed only if they stay inside the workspace.",
},
"recursive": {
"type": "boolean",
"default": False,
"description": "Whether to recursively list child entries. Symlink directories are not followed.",
},
"max_entries": {
"type": "integer",
"default": 200,
"minimum": 1,
"maximum": MAX_LIST_ENTRIES,
"description": "Maximum number of entries to return.",
},
},
"required": [],
}
READ_FILE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "File path relative to the current workspace. Absolute paths are allowed only if they stay inside the workspace.",
},
"start_line": {
"type": "integer",
"default": 1,
"minimum": 1,
"description": "1-based line number to start reading from.",
},
"max_lines": {
"type": "integer",
"default": 200,
"minimum": 1,
"maximum": MAX_READ_LINES,
"description": "Maximum number of lines to read.",
},
},
"required": ["path"],
}
SEARCH_FILES_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Plain text query to search in file paths and UTF-8 text files.",
},
"path": {
"type": "string",
"default": ".",
"description": "Directory or file path relative to the current workspace.",
},
"max_results": {
"type": "integer",
"default": 50,
"minimum": 1,
"maximum": MAX_SEARCH_RESULTS,
"description": "Maximum number of matches to return.",
},
"case_sensitive": {
"type": "boolean",
"default": False,
"description": "Whether search should be case-sensitive.",
},
},
"required": ["query"],
}
WRITE_FILE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path relative to the current workspace."},
"content": {"type": "string", "description": "Full file content to write."},
},
"required": ["path", "content"],
}
PATCH_FILE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path relative to the current workspace."},
"old_text": {"type": "string", "description": "Exact text to replace."},
"new_text": {"type": "string", "description": "Replacement text."},
},
"required": ["path", "old_text", "new_text"],
}
class WorkspacePathError(ValueError):
"""Raised when a requested path escapes the configured workspace."""
def _json_result(success: bool, **payload: Any) -> str:
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
def _clamp_int(value: Any, *, default: int, minimum: int, maximum: int) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
parsed = default
return max(minimum, min(parsed, maximum))
def _workspace_root(workspace: str | None) -> Path:
if not workspace:
raise WorkspacePathError("workspace is not configured for filesystem tools")
root = Path(workspace).expanduser().resolve(strict=True)
if not root.is_dir():
raise WorkspacePathError(f"workspace is not a directory: {root}")
return root
def _resolve_existing_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
"""Resolve a user path and ensure the real target stays inside workspace."""
root = _workspace_root(workspace)
raw_path = Path(user_path or ".").expanduser()
candidate = raw_path if raw_path.is_absolute() else root / raw_path
resolved = candidate.resolve(strict=True)
try:
resolved.relative_to(root)
except ValueError as exc:
raise WorkspacePathError(
f"path escapes workspace: {user_path or '.'}"
) from exc
return root, resolved
def _resolve_writable_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
root = _workspace_root(workspace)
if not user_path or not str(user_path).strip():
raise WorkspacePathError("path is required")
raw_path = Path(str(user_path)).expanduser()
candidate = raw_path if raw_path.is_absolute() else root / raw_path
parent = candidate.parent.resolve(strict=True)
try:
parent.relative_to(root)
except ValueError as exc:
raise WorkspacePathError(f"path escapes workspace: {user_path}") from exc
return root, parent / candidate.name
def _relative_path(root: Path, path: Path) -> str:
try:
return str(path.relative_to(root)) or "."
except ValueError:
return str(path)
def _entry_type(path: Path) -> str:
if path.is_symlink():
return "symlink"
if path.is_dir():
return "directory"
if path.is_file():
return "file"
return "other"
def _entry_payload(root: Path, path: Path) -> dict[str, Any]:
try:
stat = path.lstat() if path.is_symlink() else path.stat()
size = stat.st_size
except OSError:
size = None
return {
"name": path.name,
"path": _relative_path(root, path),
"type": _entry_type(path),
"size": size,
}
def _iter_directory(root: Path, directory: Path, *, recursive: bool) -> Iterable[Path]:
def sort_key(item: Path) -> tuple[bool, str]:
is_real_directory = not item.is_symlink() and item.is_dir()
return (not is_real_directory, item.name.lower())
entries = sorted(directory.iterdir(), key=sort_key)
for entry in entries:
yield entry
if not recursive or entry.is_symlink() or not entry.is_dir():
continue
yield from _iter_directory(root, entry, recursive=True)
def _looks_binary(path: Path) -> bool:
try:
with path.open("rb") as handle:
sample = handle.read(4096)
except OSError:
return True
return b"\0" in sample
def _read_text_file(path: Path) -> str:
if _looks_binary(path):
raise ValueError("binary files cannot be read by read_file/search_files")
return path.read_text(encoding="utf-8")
def _iter_search_files(root: Path, start: Path) -> Iterable[Path]:
if start.is_file():
yield start
return
stack = [start]
visited = 0
while stack and visited < MAX_SEARCH_FILES:
current = stack.pop()
try:
children = sorted(current.iterdir(), key=lambda item: item.name.lower())
except OSError:
continue
for child in children:
if child.is_symlink():
continue
if child.is_dir():
if child.name in SKIP_DIR_NAMES:
continue
stack.append(child)
continue
if child.is_file():
visited += 1
yield child
if visited >= MAX_SEARCH_FILES:
break
@dataclass(slots=True)
class ListDirectoryTool:
"""List files and directories inside the current workspace."""
name: str = "list_directory"
description: str = (
"List files and directories inside the current workspace. "
"Use this before reading files when you need to inspect project structure. "
"This tool never follows paths outside the workspace."
)
toolset: str = "filesystem"
always_available: bool = True
workspace: str | None = None
parameters: dict[str, Any] = field(default_factory=lambda: dict(LIST_DIRECTORY_PARAMETERS))
async def execute(
self,
*,
path: str = ".",
recursive: bool = False,
max_entries: int = 200,
workspace: str | None = None,
) -> str:
try:
root, resolved = _resolve_existing_path(workspace, path)
if not resolved.is_dir():
return _json_result(False, error="not_a_directory", path=path)
limit = _clamp_int(max_entries, default=200, minimum=1, maximum=MAX_LIST_ENTRIES)
entries: list[dict[str, Any]] = []
truncated = False
for entry in _iter_directory(root, resolved, recursive=bool(recursive)):
entries.append(_entry_payload(root, entry))
if len(entries) >= limit:
truncated = True
break
return _json_result(
True,
path=_relative_path(root, resolved),
recursive=bool(recursive),
entries=entries,
truncated=truncated,
)
except (OSError, WorkspacePathError, ValueError) as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class ReadFileTool:
"""Read a UTF-8 text file inside the current workspace."""
name: str = "read_file"
description: str = (
"Read a UTF-8 text file inside the current workspace with line limits. "
"Use this to inspect source code, docs, config, or logs. "
"This tool rejects binary files and paths outside the workspace."
)
toolset: str = "filesystem"
always_available: bool = True
workspace: str | None = None
parameters: dict[str, Any] = field(default_factory=lambda: dict(READ_FILE_PARAMETERS))
async def execute(
self,
*,
path: str,
start_line: int = 1,
max_lines: int = 200,
workspace: str | None = None,
) -> str:
try:
root, resolved = _resolve_existing_path(workspace, path)
if not resolved.is_file():
return _json_result(False, error="not_a_file", path=path)
start = _clamp_int(start_line, default=1, minimum=1, maximum=10_000_000)
limit = _clamp_int(max_lines, default=200, minimum=1, maximum=MAX_READ_LINES)
content = _read_text_file(resolved)
lines = content.splitlines()
selected = lines[start - 1 : start - 1 + limit]
selected_text = "\n".join(selected)
char_truncated = False
if len(selected_text) > MAX_READ_CHARS:
selected_text = selected_text[:MAX_READ_CHARS]
char_truncated = True
end_line = start + len(selected) - 1 if selected else start - 1
return _json_result(
True,
path=_relative_path(root, resolved),
start_line=start,
end_line=end_line,
total_lines=len(lines),
truncated=end_line < len(lines) or char_truncated,
content=selected_text,
)
except UnicodeDecodeError:
return _json_result(False, error="file is not valid UTF-8 text", path=path)
except (OSError, WorkspacePathError, ValueError) as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class SearchFilesTool:
"""Search filenames and UTF-8 text file contents inside the workspace."""
name: str = "search_files"
description: str = (
"Search file paths and UTF-8 text file contents inside the current workspace. "
"Use this to find relevant source files, docs, config keys, or log lines. "
"This tool skips large/binary files and never searches outside the workspace."
)
toolset: str = "filesystem"
always_available: bool = True
workspace: str | None = None
parameters: dict[str, Any] = field(default_factory=lambda: dict(SEARCH_FILES_PARAMETERS))
async def execute(
self,
*,
query: str,
path: str = ".",
max_results: int = 50,
case_sensitive: bool = False,
workspace: str | None = None,
) -> str:
try:
if not isinstance(query, str) or not query.strip():
return _json_result(False, error="query must be a non-empty string")
root, resolved = _resolve_existing_path(workspace, path)
if not resolved.is_dir() and not resolved.is_file():
return _json_result(False, error="path must be a file or directory", path=path)
limit = _clamp_int(max_results, default=50, minimum=1, maximum=MAX_SEARCH_RESULTS)
needle = query if case_sensitive else query.lower()
results: list[dict[str, Any]] = []
searched_files = 0
skipped_files = 0
for file_path in _iter_search_files(root, resolved):
relative = _relative_path(root, file_path)
haystack_path = relative if case_sensitive else relative.lower()
if needle in haystack_path:
results.append(
{
"path": relative,
"line": None,
"match_type": "path",
"preview": relative,
}
)
if len(results) >= limit:
break
try:
if file_path.stat().st_size > MAX_SEARCH_FILE_BYTES or _looks_binary(file_path):
skipped_files += 1
continue
text = file_path.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError):
skipped_files += 1
continue
searched_files += 1
lines = text.splitlines()
for index, line in enumerate(lines, start=1):
haystack_line = line if case_sensitive else line.lower()
if needle not in haystack_line:
continue
results.append(
{
"path": relative,
"line": index,
"match_type": "content",
"preview": line[:500],
}
)
if len(results) >= limit:
break
if len(results) >= limit:
break
return _json_result(
True,
query=query,
path=_relative_path(root, resolved),
results=results,
truncated=len(results) >= limit,
searched_files=searched_files,
skipped_files=skipped_files,
)
except (OSError, WorkspacePathError, ValueError) as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class WriteFileTool:
"""Write a UTF-8 text file inside the current workspace."""
name: str = "write_file"
description: str = (
"Write a UTF-8 text file inside the current workspace, replacing the full file. "
"Use patch_file for targeted edits. Paths outside the workspace are rejected."
)
toolset: str = "filesystem"
always_available: bool = False
workspace: str | None = None
parameters: dict[str, Any] = field(default_factory=lambda: dict(WRITE_FILE_PARAMETERS))
async def execute(self, *, path: str, content: str, workspace: str | None = None) -> str:
try:
root, resolved = _resolve_writable_path(workspace, path)
resolved.parent.mkdir(parents=True, exist_ok=True)
resolved.write_text(str(content), encoding="utf-8")
return _json_result(True, path=_relative_path(root, resolved), bytes=len(str(content).encode("utf-8")))
except (OSError, WorkspacePathError, ValueError) as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class PatchFileTool:
"""Replace an exact text fragment inside a workspace file."""
name: str = "patch_file"
description: str = (
"Replace an exact text fragment inside a UTF-8 workspace file. "
"Fails if old_text is missing or ambiguous."
)
toolset: str = "filesystem"
always_available: bool = False
workspace: str | None = None
parameters: dict[str, Any] = field(default_factory=lambda: dict(PATCH_FILE_PARAMETERS))
async def execute(
self,
*,
path: str,
old_text: str,
new_text: str,
workspace: str | None = None,
) -> str:
try:
root, resolved = _resolve_existing_path(workspace, path)
if not resolved.is_file():
return _json_result(False, error="not_a_file", path=path)
content = _read_text_file(resolved)
occurrences = content.count(old_text)
if occurrences == 0:
return _json_result(False, error="old_text_not_found", path=path)
if occurrences > 1:
return _json_result(False, error="old_text_ambiguous", occurrences=occurrences, path=path)
updated = content.replace(old_text, new_text, 1)
resolved.write_text(updated, encoding="utf-8")
return _json_result(
True,
path=_relative_path(root, resolved),
old_bytes=len(old_text.encode("utf-8")),
new_bytes=len(new_text.encode("utf-8")),
)
except UnicodeDecodeError:
return _json_result(False, error="file is not valid UTF-8 text", path=path)
except (OSError, WorkspacePathError, ValueError) as exc:
return _json_result(False, error=str(exc), path=path)