修改了nanobot,往Hermes agent的风格走,进度1/3
This commit is contained in:
15
app-instance/backend/beaver/tools/__init__.py
Normal file
15
app-instance/backend/beaver/tools/__init__.py
Normal file
@ -0,0 +1,15 @@
|
||||
"""Tool system for Beaver."""
|
||||
|
||||
from .base import BaseTool, ObjectBackedTool, ToolContext, ToolResult, ToolSpec
|
||||
from .registry import ToolRegistry
|
||||
from .runtime import ToolExecutor
|
||||
|
||||
__all__ = [
|
||||
"BaseTool",
|
||||
"ObjectBackedTool",
|
||||
"ToolContext",
|
||||
"ToolExecutor",
|
||||
"ToolRegistry",
|
||||
"ToolResult",
|
||||
"ToolSpec",
|
||||
]
|
||||
175
app-instance/backend/beaver/tools/base.py
Normal file
175
app-instance/backend/beaver/tools/base.py
Normal file
@ -0,0 +1,175 @@
|
||||
"""Beaver 工具系统的统一契约。
|
||||
|
||||
这一层的目标不是实现具体工具,而是把 runtime 真正依赖的最小接口定死。
|
||||
|
||||
我们需要统一回答 4 个问题:
|
||||
1. 一个工具长什么样
|
||||
2. tool schema 怎么导出给 provider
|
||||
3. 工具执行结果长什么样
|
||||
4. tool loop 执行时,可以把哪些运行时依赖传给工具
|
||||
|
||||
这层故意保持很薄:
|
||||
- 不绑定 MCP
|
||||
- 不绑定 memory/session
|
||||
- 不绑定具体 provider
|
||||
|
||||
这样内建工具、MCP 工具、未来插件工具都可以收敛到同一套契约上。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ToolSpec:
|
||||
"""单个工具对外暴露的描述信息。
|
||||
|
||||
这份信息主要服务两个场景:
|
||||
1. 导出给 provider 的 function schema
|
||||
2. 在 registry 中做列出、查找、调试
|
||||
"""
|
||||
|
||||
name: str
|
||||
description: str
|
||||
input_schema: dict[str, Any]
|
||||
|
||||
def to_provider_schema(self) -> dict[str, Any]:
|
||||
"""导出为 OpenAI-compatible function tool schema。"""
|
||||
|
||||
return {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"parameters": self.input_schema,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ToolContext:
|
||||
"""一次工具执行时可用的运行时上下文。
|
||||
|
||||
这不是“所有系统对象的大杂烩”,而是当前工具执行阶段最常用的公共入口。
|
||||
后面主链接进来时,可以把 session manager / memory store / workspace 等从这里传入。
|
||||
"""
|
||||
|
||||
workspace: str | None = None
|
||||
session_id: str | None = None
|
||||
user_id: str | None = None
|
||||
services: dict[str, Any] = field(default_factory=dict)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def get(self, key: str, default: Any = None) -> Any:
|
||||
"""优先从 services 中取依赖,方便工具侧少写样板代码。"""
|
||||
|
||||
return self.services.get(key, default)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ToolResult:
|
||||
"""标准化工具执行结果。
|
||||
|
||||
统一返回结构的意义是:
|
||||
1. tool loop 更容易记录日志和失败信息
|
||||
2. provider 回灌时可以稳定地拿到字符串内容
|
||||
3. 后面要做工具审计时,数据结构已经固定
|
||||
"""
|
||||
|
||||
success: bool
|
||||
content: str
|
||||
tool_name: str
|
||||
error: str | None = None
|
||||
raw_output: Any | None = None
|
||||
|
||||
|
||||
class BaseTool(ABC):
|
||||
"""所有工具实现都应遵守的抽象基类。"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def spec(self) -> ToolSpec:
|
||||
"""返回工具元数据。"""
|
||||
|
||||
@abstractmethod
|
||||
async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult:
|
||||
"""执行工具调用。"""
|
||||
|
||||
|
||||
class ObjectBackedTool(BaseTool):
|
||||
"""把现有“轻量对象工具”适配到统一 BaseTool 契约。
|
||||
|
||||
目前 `MemoryTool` / `SessionSearchTool` 已经存在,但它们还不是统一的 BaseTool。
|
||||
这个适配器的作用就是避免重写业务逻辑,只做接口收口。
|
||||
"""
|
||||
|
||||
def __init__(self, backend: Any) -> None:
|
||||
self.backend = backend
|
||||
self._spec = ToolSpec(
|
||||
name=str(getattr(backend, "name")),
|
||||
description=str(getattr(backend, "description", "")),
|
||||
input_schema=dict(getattr(backend, "parameters", {"type": "object", "properties": {}})),
|
||||
)
|
||||
|
||||
@property
|
||||
def spec(self) -> ToolSpec:
|
||||
return self._spec
|
||||
|
||||
async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult:
|
||||
try:
|
||||
call_arguments = dict(arguments)
|
||||
self._inject_runtime_context(call_arguments, context)
|
||||
content = await self.backend.execute(**call_arguments)
|
||||
result = self._normalize_output(content)
|
||||
return ToolResult(
|
||||
success=result["success"],
|
||||
content=result["content"],
|
||||
tool_name=self.spec.name,
|
||||
error=result.get("error"),
|
||||
raw_output=content,
|
||||
)
|
||||
except Exception as exc:
|
||||
return ToolResult(
|
||||
success=False,
|
||||
content=f"Tool {self.spec.name} failed: {exc}",
|
||||
tool_name=self.spec.name,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
def _inject_runtime_context(self, arguments: dict[str, Any], context: ToolContext) -> None:
|
||||
"""把少量 runtime 上下文注入到后端工具参数中。
|
||||
|
||||
当前只做最小注入:
|
||||
- 只有当 backend 明确暴露对应字段时才注入
|
||||
- 避免把 ToolContext 整个对象直接塞给现有 builtin 工具
|
||||
"""
|
||||
|
||||
if "current_session_id" not in arguments and hasattr(self.backend, "current_session_id"):
|
||||
arguments["current_session_id"] = context.session_id
|
||||
|
||||
@staticmethod
|
||||
def _normalize_output(content: Any) -> dict[str, Any]:
|
||||
"""把后端工具返回值转成统一 success/content/error 语义。
|
||||
|
||||
对现有 builtin 工具最关键的是:
|
||||
- 若返回的是 JSON 字符串,且包含 `success` 字段,就尊重它
|
||||
- 否则默认视为普通成功文本
|
||||
"""
|
||||
|
||||
if isinstance(content, str):
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
except json.JSONDecodeError:
|
||||
return {"success": True, "content": content}
|
||||
if isinstance(parsed, dict) and "success" in parsed:
|
||||
return {
|
||||
"success": bool(parsed.get("success")),
|
||||
"content": content,
|
||||
"error": parsed.get("error"),
|
||||
}
|
||||
return {"success": True, "content": content}
|
||||
return {"success": True, "content": str(content)}
|
||||
17
app-instance/backend/beaver/tools/builtins/__init__.py
Normal file
17
app-instance/backend/beaver/tools/builtins/__init__.py
Normal file
@ -0,0 +1,17 @@
|
||||
"""Built-in Beaver tools."""
|
||||
|
||||
from .echo import EchoTool, echo_tool
|
||||
from .memory import MemoryTool, memory_tool
|
||||
from .skill_view import SkillViewTool, skill_view
|
||||
from .session_search import SessionSearchTool, session_search
|
||||
|
||||
__all__ = [
|
||||
"EchoTool",
|
||||
"MemoryTool",
|
||||
"SkillViewTool",
|
||||
"SessionSearchTool",
|
||||
"echo_tool",
|
||||
"memory_tool",
|
||||
"skill_view",
|
||||
"session_search",
|
||||
]
|
||||
43
app-instance/backend/beaver/tools/builtins/echo.py
Normal file
43
app-instance/backend/beaver/tools/builtins/echo.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""最小调试工具:把输入原样回显。
|
||||
|
||||
它的价值不是业务能力,而是运行时验证:
|
||||
当你只想确认 tool loop 是否能走通时,`echo` 是最便宜、最确定的测试工具。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
ECHO_TOOL_DESCRIPTION = "Echo the provided text back to the agent. Useful for verifying tool calling."
|
||||
|
||||
ECHO_TOOL_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "The text to echo back.",
|
||||
}
|
||||
},
|
||||
"required": ["text"],
|
||||
}
|
||||
|
||||
|
||||
def echo_tool(*, text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EchoTool:
|
||||
"""面向 runtime 的最小内建工具。"""
|
||||
|
||||
name: str = "echo"
|
||||
description: str = ECHO_TOOL_DESCRIPTION
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(ECHO_TOOL_PARAMETERS))
|
||||
|
||||
async def execute(self, **kwargs: Any) -> str:
|
||||
text = kwargs.get("text")
|
||||
if not isinstance(text, str):
|
||||
raise ValueError("echo tool requires a string field 'text'")
|
||||
return echo_tool(text=text)
|
||||
129
app-instance/backend/beaver/tools/builtins/memory.py
Normal file
129
app-instance/backend/beaver/tools/builtins/memory.py
Normal file
@ -0,0 +1,129 @@
|
||||
"""Beaver 内置 memory tool。
|
||||
|
||||
这个文件的职责很单纯:把 `MemoryStore` 暴露成一个 agent runtime 可以调用的统一工具。
|
||||
|
||||
设计边界:
|
||||
1. `store.py` 负责底层数据与并发安全
|
||||
2. 本文件负责工具接口、参数校验分发、JSON 响应
|
||||
3. 更高层的 engine / loader 之后再决定如何把这个工具注册进 runtime
|
||||
|
||||
换句话说,本文件是“memory 能力的工具化外壳”,不是记忆实现本身。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from beaver.memory.curated.store import MemoryStore
|
||||
|
||||
MEMORY_TOOL_DESCRIPTION = (
|
||||
"Save durable information to persistent memory that survives across sessions. "
|
||||
"Use this proactively for user corrections, preferences, environment facts, "
|
||||
"project conventions, and stable tool quirks. Do not store temporary task "
|
||||
"progress or raw session logs here; use session search for historical detail."
|
||||
)
|
||||
|
||||
MEMORY_TOOL_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {
|
||||
"type": "string",
|
||||
"enum": ["add", "replace", "remove"],
|
||||
"description": "The memory operation to perform.",
|
||||
},
|
||||
"target": {
|
||||
"type": "string",
|
||||
"enum": ["memory", "user"],
|
||||
"description": "Which curated store to update.",
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The new entry content. Required for add and replace.",
|
||||
},
|
||||
"old_text": {
|
||||
"type": "string",
|
||||
"description": "A short unique substring identifying the entry to replace or remove.",
|
||||
},
|
||||
},
|
||||
"required": ["action", "target"],
|
||||
}
|
||||
|
||||
|
||||
def memory_tool(
|
||||
*,
|
||||
action: str,
|
||||
target: str = "memory",
|
||||
content: str | None = None,
|
||||
old_text: str | None = None,
|
||||
store: MemoryStore | None = None,
|
||||
) -> str:
|
||||
"""分发 Hermes 风格的 CRUD memory API,并返回 JSON 字符串。
|
||||
|
||||
这里统一采用 JSON 返回,是为了兼容常见 tool-calling 场景:
|
||||
- LLM 更容易消费结构化结果
|
||||
- Web/API/日志层也更容易透传和记录
|
||||
"""
|
||||
|
||||
if store is None:
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"error": "Memory store is not available for this runtime.",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
if target not in {"memory", "user"}:
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"Invalid target '{target}'. Use 'memory' or 'user'.",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
if action == "add":
|
||||
if not content:
|
||||
result = {"success": False, "error": "content is required for add."}
|
||||
else:
|
||||
result = store.add(target, content)
|
||||
elif action == "replace":
|
||||
if not old_text:
|
||||
result = {"success": False, "error": "old_text is required for replace."}
|
||||
elif not content:
|
||||
result = {"success": False, "error": "content is required for replace."}
|
||||
else:
|
||||
result = store.replace(target, old_text, content)
|
||||
elif action == "remove":
|
||||
if not old_text:
|
||||
result = {"success": False, "error": "old_text is required for remove."}
|
||||
else:
|
||||
result = store.remove(target, old_text)
|
||||
else:
|
||||
result = {
|
||||
"success": False,
|
||||
"error": f"Unknown action '{action}'. Use add, replace, or remove.",
|
||||
}
|
||||
|
||||
return json.dumps(result, ensure_ascii=False)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class MemoryTool:
|
||||
"""面向 runtime 的轻量工具封装。
|
||||
|
||||
这里故意保持很薄:
|
||||
1. 不重复实现业务逻辑
|
||||
2. 不重复维护 schema
|
||||
3. 只做 `execute()` 到 `memory_tool()` 的桥接
|
||||
"""
|
||||
|
||||
store: MemoryStore
|
||||
name: str = "memory"
|
||||
description: str = MEMORY_TOOL_DESCRIPTION
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(MEMORY_TOOL_PARAMETERS))
|
||||
|
||||
async def execute(self, **kwargs: Any) -> str:
|
||||
return memory_tool(store=self.store, **kwargs)
|
||||
418
app-instance/backend/beaver/tools/builtins/session_search.py
Normal file
418
app-instance/backend/beaver/tools/builtins/session_search.py
Normal file
@ -0,0 +1,418 @@
|
||||
"""Beaver 内置 session_search tool。
|
||||
|
||||
这个工具对应 Hermes-agent 的跨会话检索能力,目标不是把所有历史内容塞回主上下文,
|
||||
而是按需从过去的 session 中找回“之前发生过什么”。
|
||||
|
||||
当前实现保留了几个关键行为:
|
||||
1. query 为空时进入 recent/browse 模式,只列最近会话,不走 LLM,总成本很低
|
||||
2. query 不为空时走 transcript DB 的搜索接口,预期底层是 FTS 风格检索
|
||||
3. 自动排除当前 session lineage,避免把当前上下文又搜出来一遍
|
||||
4. 对长会话做 match-centered truncation,而不是无脑截前 N 字符
|
||||
5. summarizer 是可选依赖;没有时降级返回 raw preview,而不是整条工具失败
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Awaitable, Callable, Protocol
|
||||
|
||||
MAX_SESSION_CHARS = 100_000
|
||||
|
||||
|
||||
class SessionSearchDB(Protocol):
|
||||
"""session_search 依赖的最小数据库契约。
|
||||
|
||||
这里没有直接绑定某个具体 SQLite 实现,而是先定义行为接口。
|
||||
这样后面无论你接的是 Hermes 风格 state DB、还是 Beaver 自己的 transcript store,
|
||||
只要满足这些方法就能工作。
|
||||
"""
|
||||
|
||||
def list_sessions_rich(
|
||||
self,
|
||||
*,
|
||||
limit: int,
|
||||
exclude_sources: list[str] | None = None,
|
||||
) -> list[dict[str, Any]]: ...
|
||||
|
||||
def get_session(self, session_id: str) -> dict[str, Any] | None: ...
|
||||
|
||||
def get_messages_as_conversation(self, session_id: str) -> list[dict[str, Any]]: ...
|
||||
|
||||
def search_messages(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
role_filter: list[str] | None = None,
|
||||
exclude_sources: list[str] | None = None,
|
||||
limit: int,
|
||||
offset: int = 0,
|
||||
) -> list[dict[str, Any]]: ...
|
||||
|
||||
|
||||
SessionSummarizer = Callable[[str, str, dict[str, Any]], Awaitable[str | None]]
|
||||
|
||||
_HIDDEN_SESSION_SOURCES = ("tool",)
|
||||
|
||||
SESSION_SEARCH_TOOL_DESCRIPTION = (
|
||||
"Search prior sessions for historical context, or browse recent sessions when "
|
||||
"query is omitted. Use this when the user references past work, prior fixes, "
|
||||
"or earlier decisions instead of asking them to repeat themselves."
|
||||
)
|
||||
|
||||
SESSION_SEARCH_TOOL_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Keyword, phrase, or boolean FTS query. Omit to browse recent sessions.",
|
||||
},
|
||||
"role_filter": {
|
||||
"type": "string",
|
||||
"description": "Optional comma-separated roles to search, for example 'user,assistant'.",
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"default": 3,
|
||||
"minimum": 1,
|
||||
"maximum": 5,
|
||||
"description": "Maximum number of sessions to return.",
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
}
|
||||
|
||||
|
||||
def _format_timestamp(value: int | float | str | None) -> str:
|
||||
"""把时间戳或字符串格式化成更可读的展示文本。"""
|
||||
if value is None:
|
||||
return "unknown"
|
||||
try:
|
||||
if isinstance(value, (int, float)):
|
||||
return datetime.fromtimestamp(value).strftime("%B %d, %Y at %I:%M %p")
|
||||
if isinstance(value, str):
|
||||
if value.replace(".", "").replace("-", "").isdigit():
|
||||
return datetime.fromtimestamp(float(value)).strftime("%B %d, %Y at %I:%M %p")
|
||||
return value
|
||||
except (OSError, OverflowError, ValueError):
|
||||
pass
|
||||
return str(value)
|
||||
|
||||
|
||||
def _format_conversation(messages: list[dict[str, Any]]) -> str:
|
||||
"""把消息列表整理成适合摘要模型消费的 transcript 文本。
|
||||
|
||||
这里会保留:
|
||||
- role
|
||||
- assistant 的 tool calls 名称
|
||||
- tool 输出的简短内容
|
||||
|
||||
但不会原样塞入超长工具输出,否则摘要成本会被单个工具结果拉爆。
|
||||
"""
|
||||
parts: list[str] = []
|
||||
for message in messages:
|
||||
role = str(message.get("role", "unknown")).upper()
|
||||
content = message.get("content") or ""
|
||||
tool_name = message.get("tool_name")
|
||||
|
||||
if role == "TOOL" and tool_name:
|
||||
if len(content) > 500:
|
||||
content = content[:250] + "\n...[truncated]...\n" + content[-250:]
|
||||
parts.append(f"[TOOL:{tool_name}]: {content}")
|
||||
continue
|
||||
|
||||
if role == "ASSISTANT":
|
||||
tool_calls = message.get("tool_calls")
|
||||
if isinstance(tool_calls, list) and tool_calls:
|
||||
names: list[str] = []
|
||||
for tool_call in tool_calls:
|
||||
if isinstance(tool_call, dict):
|
||||
names.append(
|
||||
tool_call.get("name")
|
||||
or tool_call.get("function", {}).get("name", "?")
|
||||
)
|
||||
if names:
|
||||
parts.append(f"[ASSISTANT]: [Called: {', '.join(names)}]")
|
||||
parts.append(f"[ASSISTANT]: {content}")
|
||||
continue
|
||||
|
||||
parts.append(f"[{role}]: {content}")
|
||||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
def _truncate_around_matches(full_text: str, query: str, *, max_chars: int = MAX_SESSION_CHARS) -> str:
|
||||
"""围绕匹配位置截取上下文,而不是固定截头。
|
||||
|
||||
优先级:
|
||||
1. 先找整句 query
|
||||
2. 找不到再找多词近邻共现
|
||||
3. 再退化到逐词匹配
|
||||
|
||||
这样做的目的,是尽量把与 query 最相关的对话片段保留下来,提高 summarizer 的命中率。
|
||||
"""
|
||||
if len(full_text) <= max_chars:
|
||||
return full_text
|
||||
|
||||
text_lower = full_text.lower()
|
||||
query_lower = query.lower().strip()
|
||||
match_positions = [match.start() for match in re.finditer(re.escape(query_lower), text_lower)]
|
||||
|
||||
if not match_positions:
|
||||
terms = query_lower.split()
|
||||
if len(terms) > 1:
|
||||
positions: dict[str, list[int]] = {
|
||||
term: [match.start() for match in re.finditer(re.escape(term), text_lower)]
|
||||
for term in terms
|
||||
}
|
||||
rarest = min(terms, key=lambda term: len(positions.get(term, [])))
|
||||
for position in positions.get(rarest, []):
|
||||
if all(
|
||||
any(abs(candidate - position) < 200 for candidate in positions.get(term, []))
|
||||
for term in terms
|
||||
if term != rarest
|
||||
):
|
||||
match_positions.append(position)
|
||||
|
||||
if not match_positions:
|
||||
for term in query_lower.split():
|
||||
match_positions.extend(match.start() for match in re.finditer(re.escape(term), text_lower))
|
||||
|
||||
if not match_positions:
|
||||
head = full_text[:max_chars]
|
||||
suffix = "\n\n...[later conversation truncated]..." if max_chars < len(full_text) else ""
|
||||
return head + suffix
|
||||
|
||||
best_start = 0
|
||||
best_count = 0
|
||||
for candidate in sorted(match_positions):
|
||||
window_start = max(0, candidate - max_chars // 4)
|
||||
window_end = window_start + max_chars
|
||||
if window_end > len(full_text):
|
||||
window_start = max(0, len(full_text) - max_chars)
|
||||
window_end = len(full_text)
|
||||
count = sum(1 for position in match_positions if window_start <= position < window_end)
|
||||
if count > best_count:
|
||||
best_count = count
|
||||
best_start = window_start
|
||||
|
||||
start = best_start
|
||||
end = min(len(full_text), start + max_chars)
|
||||
prefix = "...[earlier conversation truncated]...\n\n" if start > 0 else ""
|
||||
suffix = "\n\n...[later conversation truncated]..." if end < len(full_text) else ""
|
||||
return prefix + full_text[start:end] + suffix
|
||||
|
||||
|
||||
def _resolve_to_parent(db: SessionSearchDB, session_id: str | None) -> str | None:
|
||||
"""沿 parent_session_id 向上追溯到 lineage root。
|
||||
|
||||
这样可以把 delegation/compression 形成的子 session 归并回同一条主会话链,
|
||||
避免检索结果里出现多个其实属于同一轮上下文的碎片 session。
|
||||
"""
|
||||
visited: set[str] = set()
|
||||
current = session_id
|
||||
while current and current not in visited:
|
||||
visited.add(current)
|
||||
session = db.get_session(current)
|
||||
if not session:
|
||||
break
|
||||
parent = session.get("parent_session_id")
|
||||
if not parent:
|
||||
break
|
||||
current = parent
|
||||
return current
|
||||
|
||||
|
||||
def _list_recent_sessions(
|
||||
db: SessionSearchDB,
|
||||
*,
|
||||
limit: int,
|
||||
current_session_id: str | None = None,
|
||||
) -> str:
|
||||
"""recent mode:仅列出最近 session 的元数据,不做摘要调用。"""
|
||||
sessions = db.list_sessions_rich(
|
||||
limit=limit + 5,
|
||||
exclude_sources=list(_HIDDEN_SESSION_SOURCES),
|
||||
)
|
||||
current_root = _resolve_to_parent(db, current_session_id) if current_session_id else None
|
||||
results: list[dict[str, Any]] = []
|
||||
for session in sessions:
|
||||
session_id = session.get("id", "")
|
||||
if current_root and session_id == current_root:
|
||||
continue
|
||||
if current_session_id and session_id == current_session_id:
|
||||
continue
|
||||
if session.get("parent_session_id"):
|
||||
continue
|
||||
results.append(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"title": session.get("title") or None,
|
||||
"source": session.get("source", ""),
|
||||
"started_at": session.get("started_at", ""),
|
||||
"last_active": session.get("last_active", ""),
|
||||
"message_count": session.get("message_count", 0),
|
||||
"preview": session.get("preview", ""),
|
||||
}
|
||||
)
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"mode": "recent",
|
||||
"results": results,
|
||||
"count": len(results),
|
||||
"message": f"Showing {len(results)} most recent sessions.",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
async def session_search(
|
||||
*,
|
||||
query: str = "",
|
||||
role_filter: str | None = None,
|
||||
limit: int = 3,
|
||||
db: SessionSearchDB | None = None,
|
||||
current_session_id: str | None = None,
|
||||
summarizer: SessionSummarizer | None = None,
|
||||
) -> str:
|
||||
"""搜索过去的会话并返回结构化 JSON 结果。
|
||||
|
||||
运行流程:
|
||||
1. 空 query -> recent mode
|
||||
2. 有 query -> 调 transcript DB 搜索
|
||||
3. 去掉当前会话链
|
||||
4. 拉取命中的 session transcript
|
||||
5. 对 transcript 做 match-centered truncation
|
||||
6. 如果提供 summarizer,就并发摘要;否则回退 raw preview
|
||||
"""
|
||||
|
||||
if db is None:
|
||||
return json.dumps({"success": False, "error": "Session database is not available."}, ensure_ascii=False)
|
||||
|
||||
limit = max(1, min(limit, 5))
|
||||
if not query or not query.strip():
|
||||
return _list_recent_sessions(db, limit=limit, current_session_id=current_session_id)
|
||||
|
||||
role_list = [item.strip() for item in (role_filter or "").split(",") if item.strip()] or None
|
||||
try:
|
||||
raw_results = db.search_messages(
|
||||
query=query.strip(),
|
||||
role_filter=role_list,
|
||||
exclude_sources=list(_HIDDEN_SESSION_SOURCES),
|
||||
limit=50,
|
||||
offset=0,
|
||||
)
|
||||
except Exception as exc:
|
||||
logging.error("Session search failed during FTS lookup: %s", exc, exc_info=True)
|
||||
return json.dumps({"success": False, "error": f"Search failed: {exc}"}, ensure_ascii=False)
|
||||
|
||||
if not raw_results:
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"query": query.strip(),
|
||||
"results": [],
|
||||
"count": 0,
|
||||
"message": "No matching sessions found.",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
current_root = _resolve_to_parent(db, current_session_id) if current_session_id else None
|
||||
seen_sessions: dict[str, dict[str, Any]] = {}
|
||||
for result in raw_results:
|
||||
raw_session_id = result["session_id"]
|
||||
resolved_session_id = _resolve_to_parent(db, raw_session_id) or raw_session_id
|
||||
if current_root and resolved_session_id == current_root:
|
||||
continue
|
||||
if current_session_id and raw_session_id == current_session_id:
|
||||
continue
|
||||
if resolved_session_id not in seen_sessions:
|
||||
entry = dict(result)
|
||||
entry["session_id"] = resolved_session_id
|
||||
seen_sessions[resolved_session_id] = entry
|
||||
if len(seen_sessions) >= limit:
|
||||
break
|
||||
|
||||
prepared: list[tuple[str, dict[str, Any], str, dict[str, Any]]] = []
|
||||
for session_id, match_info in seen_sessions.items():
|
||||
try:
|
||||
messages = db.get_messages_as_conversation(session_id)
|
||||
if not messages:
|
||||
continue
|
||||
session_meta = db.get_session(session_id) or {}
|
||||
transcript = _truncate_around_matches(_format_conversation(messages), query.strip())
|
||||
prepared.append((session_id, match_info, transcript, session_meta))
|
||||
except Exception as exc:
|
||||
logging.warning("Failed to prepare session %s: %s", session_id, exc, exc_info=True)
|
||||
|
||||
if summarizer is not None:
|
||||
summaries = await asyncio.gather(
|
||||
*(summarizer(transcript, query.strip(), session_meta) for _, _, transcript, session_meta in prepared),
|
||||
return_exceptions=True,
|
||||
)
|
||||
else:
|
||||
summaries = [None] * len(prepared)
|
||||
|
||||
results: list[dict[str, Any]] = []
|
||||
for (session_id, match_info, transcript, _), summary in zip(prepared, summaries):
|
||||
resolved_summary: str | None
|
||||
if isinstance(summary, Exception):
|
||||
logging.warning("Failed to summarize session %s: %s", session_id, summary, exc_info=True)
|
||||
resolved_summary = None
|
||||
else:
|
||||
resolved_summary = summary
|
||||
|
||||
if not resolved_summary:
|
||||
preview = transcript[:500] + ("\n…[truncated]" if len(transcript) > 500 else "")
|
||||
resolved_summary = f"[Raw preview — summarization unavailable]\n{preview}"
|
||||
|
||||
results.append(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"when": _format_timestamp(match_info.get("session_started")),
|
||||
"source": match_info.get("source", "unknown"),
|
||||
"model": match_info.get("model"),
|
||||
"summary": resolved_summary,
|
||||
}
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"query": query.strip(),
|
||||
"results": results,
|
||||
"count": len(results),
|
||||
"sessions_searched": len(seen_sessions),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SessionSearchTool:
|
||||
"""面向 runtime 的轻量 session_search 工具封装。"""
|
||||
|
||||
db: SessionSearchDB
|
||||
current_session_id: str | None = None
|
||||
summarizer: SessionSummarizer | None = None
|
||||
name: str = "session_search"
|
||||
description: str = SESSION_SEARCH_TOOL_DESCRIPTION
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(SESSION_SEARCH_TOOL_PARAMETERS))
|
||||
|
||||
async def execute(self, **kwargs: Any) -> str:
|
||||
current_session_id = kwargs.pop("current_session_id", None)
|
||||
return await session_search(
|
||||
db=self.db,
|
||||
current_session_id=current_session_id if current_session_id is not None else self.current_session_id,
|
||||
summarizer=self.summarizer,
|
||||
**kwargs,
|
||||
)
|
||||
82
app-instance/backend/beaver/tools/builtins/skill_view.py
Normal file
82
app-instance/backend/beaver/tools/builtins/skill_view.py
Normal file
@ -0,0 +1,82 @@
|
||||
"""Beaver 内置 skill_view tool。
|
||||
|
||||
这个工具对应 Hermes 风格的显式 skill loading path:
|
||||
1. skill 正文默认不会长期塞进 system prompt
|
||||
2. 模型若想查看某个 skill 的完整正文或支持文件,必须显式调用 `skill_view`
|
||||
|
||||
这样 skill 的按需展开路径会保持显式,而不是依赖 prompt 里长期堆目录信息。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from beaver.skills.catalog.loader import SkillsLoader
|
||||
|
||||
SKILL_VIEW_TOOL_DESCRIPTION = (
|
||||
"Load the full content of a skill or one of its supporting files. "
|
||||
"Use this when you want to inspect a skill in detail."
|
||||
)
|
||||
|
||||
SKILL_VIEW_TOOL_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The skill name to inspect.",
|
||||
},
|
||||
"file_path": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional relative path to a supporting file inside the skill directory, "
|
||||
"for example 'references/usage.md'. Omit to load SKILL.md itself."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["name"],
|
||||
}
|
||||
|
||||
|
||||
def skill_view(*, name: str, file_path: str | None = None, loader: SkillsLoader | None = None) -> str:
|
||||
"""读取 skill 正文或支持文件,并返回结构化 JSON。"""
|
||||
|
||||
if loader is None:
|
||||
return json.dumps({"success": False, "error": "Skills loader is not available."}, ensure_ascii=False)
|
||||
|
||||
try:
|
||||
viewed = loader.view_skill(name, file_path=file_path)
|
||||
except FileNotFoundError as exc:
|
||||
return json.dumps({"success": False, "error": str(exc)}, ensure_ascii=False)
|
||||
except ValueError as exc:
|
||||
return json.dumps({"success": False, "error": str(exc)}, ensure_ascii=False)
|
||||
|
||||
if viewed is None:
|
||||
return json.dumps({"success": False, "error": f"Unknown skill '{name}'."}, ensure_ascii=False)
|
||||
|
||||
display_name, content = viewed
|
||||
support_files = loader.list_skill_supporting_files(name)
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"name": name,
|
||||
"file": display_name,
|
||||
"content": content,
|
||||
"supporting_files": support_files,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillViewTool:
|
||||
"""面向 runtime 的 skill_view 工具封装。"""
|
||||
|
||||
loader: SkillsLoader
|
||||
name: str = "skill_view"
|
||||
description: str = SKILL_VIEW_TOOL_DESCRIPTION
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(SKILL_VIEW_TOOL_PARAMETERS))
|
||||
|
||||
async def execute(self, **kwargs: Any) -> str:
|
||||
return skill_view(loader=self.loader, **kwargs)
|
||||
2
app-instance/backend/beaver/tools/mcp/__init__.py
Normal file
2
app-instance/backend/beaver/tools/mcp/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
"""MCP-backed tool integrations."""
|
||||
|
||||
2
app-instance/backend/beaver/tools/policies/__init__.py
Normal file
2
app-instance/backend/beaver/tools/policies/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
"""Tool policy guards."""
|
||||
|
||||
5
app-instance/backend/beaver/tools/registry/__init__.py
Normal file
5
app-instance/backend/beaver/tools/registry/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Tool registration and discovery."""
|
||||
|
||||
from .tool_registry import ToolRegistry
|
||||
|
||||
__all__ = ["ToolRegistry"]
|
||||
55
app-instance/backend/beaver/tools/registry/tool_registry.py
Normal file
55
app-instance/backend/beaver/tools/registry/tool_registry.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""Beaver 工具注册表。
|
||||
|
||||
这层只做三件事:
|
||||
1. 注册工具
|
||||
2. 按名称查找工具
|
||||
3. 导出 provider 可消费的 tool schemas
|
||||
|
||||
不要把执行逻辑塞进这里。
|
||||
执行属于 runtime/executor,那样边界更清晰。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
from beaver.tools.base import BaseTool, ToolSpec
|
||||
|
||||
|
||||
class ToolRegistry:
|
||||
"""统一维护当前 runtime 可用的工具集合。"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._tools: dict[str, BaseTool] = {}
|
||||
|
||||
def register(self, tool: BaseTool, *, replace: bool = False) -> None:
|
||||
"""注册一个工具。
|
||||
|
||||
默认不允许重名覆盖,避免 loader/runtime 不小心把同名工具静默冲掉。
|
||||
"""
|
||||
|
||||
name = tool.spec.name
|
||||
if not replace and name in self._tools:
|
||||
raise ValueError(f"Tool '{name}' is already registered")
|
||||
self._tools[name] = tool
|
||||
|
||||
def register_many(self, tools: Iterable[BaseTool], *, replace: bool = False) -> None:
|
||||
for tool in tools:
|
||||
self.register(tool, replace=replace)
|
||||
|
||||
def get(self, name: str) -> BaseTool | None:
|
||||
return self._tools.get(name)
|
||||
|
||||
def require(self, name: str) -> BaseTool:
|
||||
tool = self.get(name)
|
||||
if tool is None:
|
||||
raise KeyError(f"Unknown tool '{name}'")
|
||||
return tool
|
||||
|
||||
def list_specs(self) -> list[ToolSpec]:
|
||||
return [tool.spec for tool in self._tools.values()]
|
||||
|
||||
def export_provider_schemas(self) -> list[dict]:
|
||||
"""导出给 provider 的函数工具 schema 列表。"""
|
||||
|
||||
return [spec.to_provider_schema() for spec in self.list_specs()]
|
||||
5
app-instance/backend/beaver/tools/runtime/__init__.py
Normal file
5
app-instance/backend/beaver/tools/runtime/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Tool execution runtime helpers."""
|
||||
|
||||
from .executor import ToolExecutor
|
||||
|
||||
__all__ = ["ToolExecutor"]
|
||||
114
app-instance/backend/beaver/tools/runtime/executor.py
Normal file
114
app-instance/backend/beaver/tools/runtime/executor.py
Normal file
@ -0,0 +1,114 @@
|
||||
"""Beaver 工具执行器。
|
||||
|
||||
这层专门负责把 provider 返回的 tool call 转成真正的工具执行。
|
||||
它不关心 provider 是 OpenAI、Anthropic 还是 Codex,只关心:
|
||||
|
||||
1. 工具叫什么
|
||||
2. 参数是什么
|
||||
3. registry 能不能找到它
|
||||
4. 执行结果怎么标准化
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers.base import ToolCallRequest
|
||||
from beaver.tools.base import ToolContext, ToolResult
|
||||
from beaver.tools.registry.tool_registry import ToolRegistry
|
||||
|
||||
|
||||
class ToolExecutor:
|
||||
"""统一执行单个 tool call。"""
|
||||
|
||||
def __init__(self, registry: ToolRegistry) -> None:
|
||||
self.registry = registry
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
tool_name: str,
|
||||
arguments: dict[str, Any] | None,
|
||||
*,
|
||||
context: ToolContext | None = None,
|
||||
) -> ToolResult:
|
||||
"""按工具名执行一次调用。"""
|
||||
|
||||
tool = self.registry.get(tool_name)
|
||||
if tool is None:
|
||||
return ToolResult(
|
||||
success=False,
|
||||
content=f"Tool {tool_name} is not registered.",
|
||||
tool_name=tool_name,
|
||||
error="tool_not_found",
|
||||
)
|
||||
return await tool.invoke(arguments or {}, context or ToolContext())
|
||||
|
||||
async def execute_tool_call(
|
||||
self,
|
||||
tool_call: ToolCallRequest | dict[str, Any],
|
||||
*,
|
||||
context: ToolContext | None = None,
|
||||
) -> ToolResult:
|
||||
"""执行 provider 返回的一次结构化 tool call。
|
||||
|
||||
兼容两种输入:
|
||||
- `ToolCallRequest`
|
||||
- OpenAI 风格 dict
|
||||
"""
|
||||
|
||||
try:
|
||||
tool_name, arguments = self._normalize_tool_call(tool_call)
|
||||
except Exception as exc:
|
||||
return ToolResult(
|
||||
success=False,
|
||||
content=f"Tool call could not be parsed: {exc}",
|
||||
tool_name=self._extract_tool_name(tool_call),
|
||||
error="tool_call_parse_error",
|
||||
)
|
||||
|
||||
parse_error = arguments.pop("__beaver_tool_argument_parse_error__", None)
|
||||
if parse_error is not None:
|
||||
return ToolResult(
|
||||
success=False,
|
||||
content=f"Tool call arguments for {tool_name} could not be parsed: {parse_error}",
|
||||
tool_name=tool_name,
|
||||
error="tool_call_argument_parse_error",
|
||||
raw_output=arguments.get("__raw_arguments__"),
|
||||
)
|
||||
return await self.execute(tool_name, arguments, context=context)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_tool_call(tool_call: ToolCallRequest | dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||
if isinstance(tool_call, ToolCallRequest):
|
||||
return tool_call.name, dict(tool_call.arguments)
|
||||
|
||||
function = tool_call.get("function")
|
||||
if isinstance(function, dict):
|
||||
name = function.get("name")
|
||||
arguments = function.get("arguments", {})
|
||||
else:
|
||||
name = tool_call.get("name")
|
||||
arguments = tool_call.get("arguments", {})
|
||||
|
||||
if not name:
|
||||
raise ValueError("Tool call is missing a tool name")
|
||||
if isinstance(arguments, str):
|
||||
try:
|
||||
arguments = json.loads(arguments)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError(f"Tool call arguments for {name!r} are not valid JSON") from exc
|
||||
if not isinstance(arguments, dict):
|
||||
raise ValueError(f"Tool call arguments for {name!r} must be a dict")
|
||||
return str(name), arguments
|
||||
|
||||
@staticmethod
|
||||
def _extract_tool_name(tool_call: ToolCallRequest | dict[str, Any]) -> str:
|
||||
if isinstance(tool_call, ToolCallRequest):
|
||||
return str(tool_call.name or "unknown")
|
||||
function = tool_call.get("function")
|
||||
if isinstance(function, dict) and function.get("name"):
|
||||
return str(function["name"])
|
||||
if tool_call.get("name"):
|
||||
return str(tool_call["name"])
|
||||
return "unknown"
|
||||
Reference in New Issue
Block a user