feat(app-instance): 集成Beaver后端并更新配置管理
集成新的Beaver后端服务到应用实例中,替换原有的nanobot实现。 主要变更包括: - 在Dockerfile和环境配置中添加Beaver相关路径和配置变量 - 更新工作目录结构从.nanobot到.beaver - 实现Beaver引擎加载器,支持配置文件加载和工具组装 - 添加内置工具如ListDirectoryTool、ReadFileTool、SearchFilesTool - 更新消息处理流程,支持通道适配器和网关模式 - 重构技能系统,支持显式工具提示和嵌入式检索 - 改进错误处理和生命周期管理 此变更使应用实例能够使用统一的Beaver后端进行AI代理运行时管理。
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
"""Tool system for Beaver."""
|
||||
|
||||
from .base import BaseTool, ObjectBackedTool, ToolContext, ToolResult, ToolSpec
|
||||
from .assembler import ToolAssembler
|
||||
from .registry import ToolRegistry
|
||||
from .runtime import ToolExecutor
|
||||
|
||||
@ -8,6 +9,7 @@ __all__ = [
|
||||
"BaseTool",
|
||||
"ObjectBackedTool",
|
||||
"ToolContext",
|
||||
"ToolAssembler",
|
||||
"ToolExecutor",
|
||||
"ToolRegistry",
|
||||
"ToolResult",
|
||||
|
||||
5
app-instance/backend/beaver/tools/assembler/__init__.py
Normal file
5
app-instance/backend/beaver/tools/assembler/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Tool selection for a single Beaver run."""
|
||||
|
||||
from .task_assembler import ToolAssembler
|
||||
|
||||
__all__ = ["ToolAssembler"]
|
||||
106
app-instance/backend/beaver/tools/assembler/task_assembler.py
Normal file
106
app-instance/backend/beaver/tools/assembler/task_assembler.py
Normal file
@ -0,0 +1,106 @@
|
||||
"""Task-driven tool assembler.
|
||||
|
||||
这层和 SkillAssembler 的位置类似:它不执行工具,只决定本轮 run 应该把哪些
|
||||
tool schema 暴露给模型。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.foundation.embedding import EmbeddingRetriever
|
||||
from beaver.tools.base import ToolSpec
|
||||
from beaver.tools.registry import ToolRegistry
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver.engine.providers.runtime import ProviderRuntime
|
||||
from beaver.skills.catalog.loader import SkillsLoader
|
||||
|
||||
|
||||
class ToolAssembler:
|
||||
"""Use skill hints and embedding retrieval to select run-scoped tools."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
retriever: EmbeddingRetriever | None = None,
|
||||
always_tool_names: Sequence[str] | None = None,
|
||||
) -> None:
|
||||
self.retriever = retriever or EmbeddingRetriever()
|
||||
self.always_tool_names = tuple(always_tool_names or ("memory", "session_search", "skill_view"))
|
||||
|
||||
async def assemble(
|
||||
self,
|
||||
*,
|
||||
task_description: str,
|
||||
registry: ToolRegistry,
|
||||
skills_loader: SkillsLoader | None = None,
|
||||
activated_skills: Sequence[SkillContext] | None = None,
|
||||
embedding_runtime: ProviderRuntime | None = None,
|
||||
top_k: int = 10,
|
||||
) -> list[ToolSpec]:
|
||||
"""Return selected tool specs for the current run.
|
||||
|
||||
Selection order is intentionally deterministic:
|
||||
1. always tools from config/spec
|
||||
2. tools explicitly declared by activated skills
|
||||
3. embedding top-k tools for the task
|
||||
"""
|
||||
|
||||
selected: list[ToolSpec] = []
|
||||
selected_names: set[str] = set()
|
||||
|
||||
def add_specs(specs: Sequence[ToolSpec]) -> None:
|
||||
for spec in specs:
|
||||
if spec.name in selected_names:
|
||||
continue
|
||||
selected.append(spec)
|
||||
selected_names.add(spec.name)
|
||||
|
||||
add_specs(registry.list_always_specs())
|
||||
add_specs(registry.get_specs(self.always_tool_names))
|
||||
|
||||
skill_tool_names = self._collect_skill_tool_names(
|
||||
skills_loader=skills_loader,
|
||||
activated_skills=activated_skills or (),
|
||||
)
|
||||
add_specs(registry.get_specs(skill_tool_names))
|
||||
|
||||
candidates = [
|
||||
spec.to_embedding_candidate()
|
||||
for spec in registry.list_specs()
|
||||
if spec.name not in selected_names
|
||||
]
|
||||
retrieved = await self.retriever.retrieve(
|
||||
query=task_description,
|
||||
candidates=candidates,
|
||||
top_k=top_k,
|
||||
api_key=embedding_runtime.api_key if embedding_runtime is not None else None,
|
||||
api_base=embedding_runtime.api_base if embedding_runtime is not None else None,
|
||||
model=embedding_runtime.model if embedding_runtime is not None else None,
|
||||
extra_headers=embedding_runtime.extra_headers if embedding_runtime is not None else None,
|
||||
timeout_seconds=(
|
||||
embedding_runtime.request_timeout_seconds if embedding_runtime is not None else None
|
||||
),
|
||||
fallback_top_k=top_k,
|
||||
)
|
||||
add_specs(registry.get_specs([item["name"] for item in retrieved]))
|
||||
return selected
|
||||
|
||||
@staticmethod
|
||||
def _collect_skill_tool_names(
|
||||
*,
|
||||
skills_loader: SkillsLoader | None,
|
||||
activated_skills: Sequence[SkillContext],
|
||||
) -> list[str]:
|
||||
if skills_loader is None or not activated_skills:
|
||||
return []
|
||||
|
||||
result: list[str] = []
|
||||
for skill in activated_skills:
|
||||
for name in skills_loader.get_skill_tool_hints(skill.name):
|
||||
if name not in result:
|
||||
result.append(name)
|
||||
return result
|
||||
@ -29,13 +29,30 @@ class ToolSpec:
|
||||
"""单个工具对外暴露的描述信息。
|
||||
|
||||
这份信息主要服务两个场景:
|
||||
1. 导出给 provider 的 function schema
|
||||
2. 在 registry 中做列出、查找、调试
|
||||
1. 以 MCP-style descriptor 作为统一事实来源
|
||||
2. 导出给 provider 的 function schema
|
||||
3. 在 registry 中做列出、查找、调试与 embedding 召回
|
||||
"""
|
||||
|
||||
name: str
|
||||
description: str
|
||||
input_schema: dict[str, Any]
|
||||
toolset: str = "core"
|
||||
always_available: bool = False
|
||||
|
||||
def to_mcp_descriptor(self) -> dict[str, Any]:
|
||||
"""导出 MCP ListTools 风格的工具描述。
|
||||
|
||||
MCP 的基础字段是 `name`、`description`、`inputSchema`。
|
||||
Beaver 内部额外的 toolset/always_available 不塞进这个对象,
|
||||
避免未来对接真实 MCP server 时出现格式偏差。
|
||||
"""
|
||||
|
||||
return {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"inputSchema": self.input_schema,
|
||||
}
|
||||
|
||||
def to_provider_schema(self) -> dict[str, Any]:
|
||||
"""导出为 OpenAI-compatible function tool schema。"""
|
||||
@ -49,6 +66,15 @@ class ToolSpec:
|
||||
},
|
||||
}
|
||||
|
||||
def to_embedding_candidate(self) -> dict[str, str]:
|
||||
"""导出给语义召回使用的轻量文本候选。"""
|
||||
|
||||
return {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"input_schema": json.dumps(self.input_schema, ensure_ascii=False, sort_keys=True),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ToolContext:
|
||||
@ -113,6 +139,8 @@ class ObjectBackedTool(BaseTool):
|
||||
name=str(getattr(backend, "name")),
|
||||
description=str(getattr(backend, "description", "")),
|
||||
input_schema=dict(getattr(backend, "parameters", {"type": "object", "properties": {}})),
|
||||
toolset=str(getattr(backend, "toolset", "core")),
|
||||
always_available=bool(getattr(backend, "always_available", False)),
|
||||
)
|
||||
|
||||
@property
|
||||
@ -150,6 +178,8 @@ class ObjectBackedTool(BaseTool):
|
||||
|
||||
if "current_session_id" not in arguments and hasattr(self.backend, "current_session_id"):
|
||||
arguments["current_session_id"] = context.session_id
|
||||
if "workspace" not in arguments and hasattr(self.backend, "workspace"):
|
||||
arguments["workspace"] = context.workspace
|
||||
|
||||
@staticmethod
|
||||
def _normalize_output(content: Any) -> dict[str, Any]:
|
||||
|
||||
@ -1,13 +1,17 @@
|
||||
"""Built-in Beaver tools."""
|
||||
|
||||
from .echo import EchoTool, echo_tool
|
||||
from .filesystem import ListDirectoryTool, ReadFileTool, SearchFilesTool
|
||||
from .memory import MemoryTool, memory_tool
|
||||
from .skill_view import SkillViewTool, skill_view
|
||||
from .session_search import SessionSearchTool, session_search
|
||||
|
||||
__all__ = [
|
||||
"EchoTool",
|
||||
"ListDirectoryTool",
|
||||
"MemoryTool",
|
||||
"ReadFileTool",
|
||||
"SearchFilesTool",
|
||||
"SkillViewTool",
|
||||
"SessionSearchTool",
|
||||
"echo_tool",
|
||||
|
||||
@ -34,6 +34,8 @@ class EchoTool:
|
||||
|
||||
name: str = "echo"
|
||||
description: str = ECHO_TOOL_DESCRIPTION
|
||||
toolset: str = "debug"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(ECHO_TOOL_PARAMETERS))
|
||||
|
||||
async def execute(self, **kwargs: Any) -> str:
|
||||
|
||||
442
app-instance/backend/beaver/tools/builtins/filesystem.py
Normal file
442
app-instance/backend/beaver/tools/builtins/filesystem.py
Normal file
@ -0,0 +1,442 @@
|
||||
"""Workspace-scoped read-only filesystem tools.
|
||||
|
||||
这些工具是 Beaver 第一批真实本地工具,只做只读能力:
|
||||
- list_directory
|
||||
- read_file
|
||||
- search_files
|
||||
|
||||
安全边界先保持非常明确:所有用户传入路径都必须解析到当前
|
||||
`ToolContext.workspace` 内部。即使 workspace 里有指向外部的符号链接,
|
||||
读取时也会因为真实路径越界而被拒绝。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
MAX_LIST_ENTRIES = 1_000
|
||||
MAX_READ_LINES = 1_000
|
||||
MAX_READ_CHARS = 120_000
|
||||
MAX_SEARCH_RESULTS = 200
|
||||
MAX_SEARCH_FILE_BYTES = 2_000_000
|
||||
MAX_SEARCH_FILES = 5_000
|
||||
SKIP_DIR_NAMES = {
|
||||
".git",
|
||||
".hg",
|
||||
".svn",
|
||||
".venv",
|
||||
"venv",
|
||||
"__pycache__",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
"node_modules",
|
||||
"dist",
|
||||
"build",
|
||||
}
|
||||
|
||||
|
||||
LIST_DIRECTORY_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"default": ".",
|
||||
"description": "Directory path relative to the current workspace. Absolute paths are allowed only if they stay inside the workspace.",
|
||||
},
|
||||
"recursive": {
|
||||
"type": "boolean",
|
||||
"default": False,
|
||||
"description": "Whether to recursively list child entries. Symlink directories are not followed.",
|
||||
},
|
||||
"max_entries": {
|
||||
"type": "integer",
|
||||
"default": 200,
|
||||
"minimum": 1,
|
||||
"maximum": MAX_LIST_ENTRIES,
|
||||
"description": "Maximum number of entries to return.",
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
}
|
||||
|
||||
READ_FILE_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "File path relative to the current workspace. Absolute paths are allowed only if they stay inside the workspace.",
|
||||
},
|
||||
"start_line": {
|
||||
"type": "integer",
|
||||
"default": 1,
|
||||
"minimum": 1,
|
||||
"description": "1-based line number to start reading from.",
|
||||
},
|
||||
"max_lines": {
|
||||
"type": "integer",
|
||||
"default": 200,
|
||||
"minimum": 1,
|
||||
"maximum": MAX_READ_LINES,
|
||||
"description": "Maximum number of lines to read.",
|
||||
},
|
||||
},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
SEARCH_FILES_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Plain text query to search in file paths and UTF-8 text files.",
|
||||
},
|
||||
"path": {
|
||||
"type": "string",
|
||||
"default": ".",
|
||||
"description": "Directory or file path relative to the current workspace.",
|
||||
},
|
||||
"max_results": {
|
||||
"type": "integer",
|
||||
"default": 50,
|
||||
"minimum": 1,
|
||||
"maximum": MAX_SEARCH_RESULTS,
|
||||
"description": "Maximum number of matches to return.",
|
||||
},
|
||||
"case_sensitive": {
|
||||
"type": "boolean",
|
||||
"default": False,
|
||||
"description": "Whether search should be case-sensitive.",
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
|
||||
|
||||
class WorkspacePathError(ValueError):
|
||||
"""Raised when a requested path escapes the configured workspace."""
|
||||
|
||||
|
||||
def _json_result(success: bool, **payload: Any) -> str:
|
||||
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def _clamp_int(value: Any, *, default: int, minimum: int, maximum: int) -> int:
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
parsed = default
|
||||
return max(minimum, min(parsed, maximum))
|
||||
|
||||
|
||||
def _workspace_root(workspace: str | None) -> Path:
|
||||
if not workspace:
|
||||
raise WorkspacePathError("workspace is not configured for filesystem tools")
|
||||
root = Path(workspace).expanduser().resolve(strict=True)
|
||||
if not root.is_dir():
|
||||
raise WorkspacePathError(f"workspace is not a directory: {root}")
|
||||
return root
|
||||
|
||||
|
||||
def _resolve_existing_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
|
||||
"""Resolve a user path and ensure the real target stays inside workspace."""
|
||||
|
||||
root = _workspace_root(workspace)
|
||||
raw_path = Path(user_path or ".").expanduser()
|
||||
candidate = raw_path if raw_path.is_absolute() else root / raw_path
|
||||
resolved = candidate.resolve(strict=True)
|
||||
try:
|
||||
resolved.relative_to(root)
|
||||
except ValueError as exc:
|
||||
raise WorkspacePathError(
|
||||
f"path escapes workspace: {user_path or '.'}"
|
||||
) from exc
|
||||
return root, resolved
|
||||
|
||||
|
||||
def _relative_path(root: Path, path: Path) -> str:
|
||||
try:
|
||||
return str(path.relative_to(root)) or "."
|
||||
except ValueError:
|
||||
return str(path)
|
||||
|
||||
|
||||
def _entry_type(path: Path) -> str:
|
||||
if path.is_symlink():
|
||||
return "symlink"
|
||||
if path.is_dir():
|
||||
return "directory"
|
||||
if path.is_file():
|
||||
return "file"
|
||||
return "other"
|
||||
|
||||
|
||||
def _entry_payload(root: Path, path: Path) -> dict[str, Any]:
|
||||
try:
|
||||
stat = path.lstat() if path.is_symlink() else path.stat()
|
||||
size = stat.st_size
|
||||
except OSError:
|
||||
size = None
|
||||
return {
|
||||
"name": path.name,
|
||||
"path": _relative_path(root, path),
|
||||
"type": _entry_type(path),
|
||||
"size": size,
|
||||
}
|
||||
|
||||
|
||||
def _iter_directory(root: Path, directory: Path, *, recursive: bool) -> Iterable[Path]:
|
||||
def sort_key(item: Path) -> tuple[bool, str]:
|
||||
is_real_directory = not item.is_symlink() and item.is_dir()
|
||||
return (not is_real_directory, item.name.lower())
|
||||
|
||||
entries = sorted(directory.iterdir(), key=sort_key)
|
||||
for entry in entries:
|
||||
yield entry
|
||||
if not recursive or entry.is_symlink() or not entry.is_dir():
|
||||
continue
|
||||
yield from _iter_directory(root, entry, recursive=True)
|
||||
|
||||
|
||||
def _looks_binary(path: Path) -> bool:
|
||||
try:
|
||||
with path.open("rb") as handle:
|
||||
sample = handle.read(4096)
|
||||
except OSError:
|
||||
return True
|
||||
return b"\0" in sample
|
||||
|
||||
|
||||
def _read_text_file(path: Path) -> str:
|
||||
if _looks_binary(path):
|
||||
raise ValueError("binary files cannot be read by read_file/search_files")
|
||||
return path.read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def _iter_search_files(root: Path, start: Path) -> Iterable[Path]:
|
||||
if start.is_file():
|
||||
yield start
|
||||
return
|
||||
|
||||
stack = [start]
|
||||
visited = 0
|
||||
while stack and visited < MAX_SEARCH_FILES:
|
||||
current = stack.pop()
|
||||
try:
|
||||
children = sorted(current.iterdir(), key=lambda item: item.name.lower())
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
for child in children:
|
||||
if child.is_symlink():
|
||||
continue
|
||||
if child.is_dir():
|
||||
if child.name in SKIP_DIR_NAMES:
|
||||
continue
|
||||
stack.append(child)
|
||||
continue
|
||||
if child.is_file():
|
||||
visited += 1
|
||||
yield child
|
||||
if visited >= MAX_SEARCH_FILES:
|
||||
break
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ListDirectoryTool:
|
||||
"""List files and directories inside the current workspace."""
|
||||
|
||||
name: str = "list_directory"
|
||||
description: str = (
|
||||
"List files and directories inside the current workspace. "
|
||||
"Use this before reading files when you need to inspect project structure. "
|
||||
"This tool never follows paths outside the workspace."
|
||||
)
|
||||
toolset: str = "filesystem"
|
||||
always_available: bool = True
|
||||
workspace: str | None = None
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(LIST_DIRECTORY_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str = ".",
|
||||
recursive: bool = False,
|
||||
max_entries: int = 200,
|
||||
workspace: str | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
root, resolved = _resolve_existing_path(workspace, path)
|
||||
if not resolved.is_dir():
|
||||
return _json_result(False, error="not_a_directory", path=path)
|
||||
|
||||
limit = _clamp_int(max_entries, default=200, minimum=1, maximum=MAX_LIST_ENTRIES)
|
||||
entries: list[dict[str, Any]] = []
|
||||
truncated = False
|
||||
for entry in _iter_directory(root, resolved, recursive=bool(recursive)):
|
||||
entries.append(_entry_payload(root, entry))
|
||||
if len(entries) >= limit:
|
||||
truncated = True
|
||||
break
|
||||
|
||||
return _json_result(
|
||||
True,
|
||||
path=_relative_path(root, resolved),
|
||||
recursive=bool(recursive),
|
||||
entries=entries,
|
||||
truncated=truncated,
|
||||
)
|
||||
except (OSError, WorkspacePathError, ValueError) as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ReadFileTool:
|
||||
"""Read a UTF-8 text file inside the current workspace."""
|
||||
|
||||
name: str = "read_file"
|
||||
description: str = (
|
||||
"Read a UTF-8 text file inside the current workspace with line limits. "
|
||||
"Use this to inspect source code, docs, config, or logs. "
|
||||
"This tool rejects binary files and paths outside the workspace."
|
||||
)
|
||||
toolset: str = "filesystem"
|
||||
always_available: bool = True
|
||||
workspace: str | None = None
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(READ_FILE_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str,
|
||||
start_line: int = 1,
|
||||
max_lines: int = 200,
|
||||
workspace: str | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
root, resolved = _resolve_existing_path(workspace, path)
|
||||
if not resolved.is_file():
|
||||
return _json_result(False, error="not_a_file", path=path)
|
||||
|
||||
start = _clamp_int(start_line, default=1, minimum=1, maximum=10_000_000)
|
||||
limit = _clamp_int(max_lines, default=200, minimum=1, maximum=MAX_READ_LINES)
|
||||
content = _read_text_file(resolved)
|
||||
lines = content.splitlines()
|
||||
selected = lines[start - 1 : start - 1 + limit]
|
||||
selected_text = "\n".join(selected)
|
||||
char_truncated = False
|
||||
if len(selected_text) > MAX_READ_CHARS:
|
||||
selected_text = selected_text[:MAX_READ_CHARS]
|
||||
char_truncated = True
|
||||
|
||||
end_line = start + len(selected) - 1 if selected else start - 1
|
||||
return _json_result(
|
||||
True,
|
||||
path=_relative_path(root, resolved),
|
||||
start_line=start,
|
||||
end_line=end_line,
|
||||
total_lines=len(lines),
|
||||
truncated=end_line < len(lines) or char_truncated,
|
||||
content=selected_text,
|
||||
)
|
||||
except UnicodeDecodeError:
|
||||
return _json_result(False, error="file is not valid UTF-8 text", path=path)
|
||||
except (OSError, WorkspacePathError, ValueError) as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SearchFilesTool:
|
||||
"""Search filenames and UTF-8 text file contents inside the workspace."""
|
||||
|
||||
name: str = "search_files"
|
||||
description: str = (
|
||||
"Search file paths and UTF-8 text file contents inside the current workspace. "
|
||||
"Use this to find relevant source files, docs, config keys, or log lines. "
|
||||
"This tool skips large/binary files and never searches outside the workspace."
|
||||
)
|
||||
toolset: str = "filesystem"
|
||||
always_available: bool = True
|
||||
workspace: str | None = None
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(SEARCH_FILES_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
path: str = ".",
|
||||
max_results: int = 50,
|
||||
case_sensitive: bool = False,
|
||||
workspace: str | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
if not isinstance(query, str) or not query.strip():
|
||||
return _json_result(False, error="query must be a non-empty string")
|
||||
root, resolved = _resolve_existing_path(workspace, path)
|
||||
if not resolved.is_dir() and not resolved.is_file():
|
||||
return _json_result(False, error="path must be a file or directory", path=path)
|
||||
|
||||
limit = _clamp_int(max_results, default=50, minimum=1, maximum=MAX_SEARCH_RESULTS)
|
||||
needle = query if case_sensitive else query.lower()
|
||||
results: list[dict[str, Any]] = []
|
||||
searched_files = 0
|
||||
skipped_files = 0
|
||||
|
||||
for file_path in _iter_search_files(root, resolved):
|
||||
relative = _relative_path(root, file_path)
|
||||
haystack_path = relative if case_sensitive else relative.lower()
|
||||
if needle in haystack_path:
|
||||
results.append(
|
||||
{
|
||||
"path": relative,
|
||||
"line": None,
|
||||
"match_type": "path",
|
||||
"preview": relative,
|
||||
}
|
||||
)
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
try:
|
||||
if file_path.stat().st_size > MAX_SEARCH_FILE_BYTES or _looks_binary(file_path):
|
||||
skipped_files += 1
|
||||
continue
|
||||
text = file_path.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
skipped_files += 1
|
||||
continue
|
||||
|
||||
searched_files += 1
|
||||
lines = text.splitlines()
|
||||
for index, line in enumerate(lines, start=1):
|
||||
haystack_line = line if case_sensitive else line.lower()
|
||||
if needle not in haystack_line:
|
||||
continue
|
||||
results.append(
|
||||
{
|
||||
"path": relative,
|
||||
"line": index,
|
||||
"match_type": "content",
|
||||
"preview": line[:500],
|
||||
}
|
||||
)
|
||||
if len(results) >= limit:
|
||||
break
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
return _json_result(
|
||||
True,
|
||||
query=query,
|
||||
path=_relative_path(root, resolved),
|
||||
results=results,
|
||||
truncated=len(results) >= limit,
|
||||
searched_files=searched_files,
|
||||
skipped_files=skipped_files,
|
||||
)
|
||||
except (OSError, WorkspacePathError, ValueError) as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
@ -123,6 +123,8 @@ class MemoryTool:
|
||||
store: MemoryStore
|
||||
name: str = "memory"
|
||||
description: str = MEMORY_TOOL_DESCRIPTION
|
||||
toolset: str = "memory"
|
||||
always_available: bool = True
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(MEMORY_TOOL_PARAMETERS))
|
||||
|
||||
async def execute(self, **kwargs: Any) -> str:
|
||||
|
||||
@ -406,6 +406,8 @@ class SessionSearchTool:
|
||||
summarizer: SessionSummarizer | None = None
|
||||
name: str = "session_search"
|
||||
description: str = SESSION_SEARCH_TOOL_DESCRIPTION
|
||||
toolset: str = "session"
|
||||
always_available: bool = True
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(SESSION_SEARCH_TOOL_PARAMETERS))
|
||||
|
||||
async def execute(self, **kwargs: Any) -> str:
|
||||
|
||||
@ -76,6 +76,8 @@ class SkillViewTool:
|
||||
loader: SkillsLoader
|
||||
name: str = "skill_view"
|
||||
description: str = SKILL_VIEW_TOOL_DESCRIPTION
|
||||
toolset: str = "skills"
|
||||
always_available: bool = True
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(SKILL_VIEW_TOOL_PARAMETERS))
|
||||
|
||||
async def execute(self, **kwargs: Any) -> str:
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Iterable
|
||||
|
||||
from beaver.tools.base import BaseTool, ToolSpec
|
||||
@ -49,7 +50,30 @@ class ToolRegistry:
|
||||
def list_specs(self) -> list[ToolSpec]:
|
||||
return [tool.spec for tool in self._tools.values()]
|
||||
|
||||
def list_always_specs(self) -> list[ToolSpec]:
|
||||
"""列出每轮 run 都应该暴露给模型的基础工具。"""
|
||||
|
||||
return [spec for spec in self.list_specs() if spec.always_available]
|
||||
|
||||
def get_specs(self, names: Sequence[str]) -> list[ToolSpec]:
|
||||
"""按名称顺序返回已注册工具 spec,忽略未知工具。"""
|
||||
|
||||
specs: list[ToolSpec] = []
|
||||
seen: set[str] = set()
|
||||
for name in names:
|
||||
tool = self.get(name)
|
||||
if tool is None or name in seen:
|
||||
continue
|
||||
specs.append(tool.spec)
|
||||
seen.add(name)
|
||||
return specs
|
||||
|
||||
def export_provider_schemas(self) -> list[dict]:
|
||||
"""导出给 provider 的函数工具 schema 列表。"""
|
||||
|
||||
return [spec.to_provider_schema() for spec in self.list_specs()]
|
||||
|
||||
def export_selected_provider_schemas(self, specs: Sequence[ToolSpec]) -> list[dict]:
|
||||
"""导出一组已选择工具的 provider schema。"""
|
||||
|
||||
return [spec.to_provider_schema() for spec in specs]
|
||||
|
||||
@ -12,12 +12,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from beaver.engine.providers.base import ToolCallRequest
|
||||
from beaver.tools.base import ToolContext, ToolResult
|
||||
from beaver.tools.registry.tool_registry import ToolRegistry
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver.engine.providers.base import ToolCallRequest
|
||||
|
||||
|
||||
class ToolExecutor:
|
||||
"""统一执行单个 tool call。"""
|
||||
@ -80,16 +82,17 @@ class ToolExecutor:
|
||||
|
||||
@staticmethod
|
||||
def _normalize_tool_call(tool_call: ToolCallRequest | dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||
if isinstance(tool_call, ToolCallRequest):
|
||||
return tool_call.name, dict(tool_call.arguments)
|
||||
|
||||
function = tool_call.get("function")
|
||||
if isinstance(function, dict):
|
||||
name = function.get("name")
|
||||
arguments = function.get("arguments", {})
|
||||
if not isinstance(tool_call, dict):
|
||||
name = getattr(tool_call, "name", None)
|
||||
arguments = getattr(tool_call, "arguments", {})
|
||||
else:
|
||||
name = tool_call.get("name")
|
||||
arguments = tool_call.get("arguments", {})
|
||||
function = tool_call.get("function")
|
||||
if isinstance(function, dict):
|
||||
name = function.get("name")
|
||||
arguments = function.get("arguments", {})
|
||||
else:
|
||||
name = tool_call.get("name")
|
||||
arguments = tool_call.get("arguments", {})
|
||||
|
||||
if not name:
|
||||
raise ValueError("Tool call is missing a tool name")
|
||||
@ -104,8 +107,8 @@ class ToolExecutor:
|
||||
|
||||
@staticmethod
|
||||
def _extract_tool_name(tool_call: ToolCallRequest | dict[str, Any]) -> str:
|
||||
if isinstance(tool_call, ToolCallRequest):
|
||||
return str(tool_call.name or "unknown")
|
||||
if not isinstance(tool_call, dict):
|
||||
return str(getattr(tool_call, "name", None) or "unknown")
|
||||
function = tool_call.get("function")
|
||||
if isinstance(function, dict) and function.get("name"):
|
||||
return str(function["name"])
|
||||
|
||||
Reference in New Issue
Block a user