Files
beaver_project/app-instance/backend/beaver/engine/session/search.py
steven_li 30ab74ffb2 feat(engine): 添加MCP连接管理和工具集成功能
- 集成MCP连接管理器,支持MCP服务器连接
- 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、
  PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、
  TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等
- 实现工具注册和装配功能
- 添加技能选择上下文参数
- 支持思考模式控制参数thinking_enabled

feat(coordinator): 重构任务执行计划器参数命名

- 将learning_candidate_enabled重命名为allow_candidate_generation
- 更新TeamGraphScheduler中的参数传递
- 修改LocalAgentRunner中的相关参数处理
- 更新README文档中的相应描述

refactor(context): 标准化工具调用参数格式

- 添加_json导入用于参数序列化
- 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷
- 修复工具调用中参数非字符串类型的序列化问题

refactor(session): 优化消息历史记录过滤逻辑

- 修改get_messages_as_conversation为基于运行状态过滤消息
- 排除未完成、失败或错误结束的运行记录
- 改进对话历史的可见性控制机制

fix(store): 修复FTS索引重建逻辑

- 添加异常处理防止FTS索引创建失败
- 实现_rebuild_fts_index方法重新构建全文搜索索引
- 优化索引触发器和表的维护流程
2026-05-14 09:43:48 +08:00

157 lines
5.0 KiB
Python

"""Beaver session 子系统的检索能力。"""
from __future__ import annotations
import re
import sqlite3
from typing import Any
from .store import SessionStore
class SessionSearchService:
"""围绕 `SessionStore` 提供 browsing / FTS / lineage 辅助能力。"""
def __init__(self, store: SessionStore) -> None:
self.store = store
@staticmethod
def _sanitize_fts5_query(query: str) -> str:
quoted_parts: list[str] = []
def preserve(match: re.Match[str]) -> str:
quoted_parts.append(match.group(0))
return f"\x00Q{len(quoted_parts) - 1}\x00"
sanitized = re.sub(r'"[^"]*"', preserve, query)
sanitized = re.sub(r'[+{}()\"^]', " ", sanitized)
sanitized = re.sub(r"\*+", "*", sanitized)
sanitized = re.sub(r"(^|\s)\*", r"\1", sanitized)
sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())
sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized)
for index, quoted in enumerate(quoted_parts):
sanitized = sanitized.replace(f"\x00Q{index}\x00", quoted)
return sanitized.strip()
def resolve_session_id(self, session_id_or_prefix: str) -> str | None:
"""用完整 ID 或唯一前缀解析出目标 session_id。"""
exact = self.store.get_session_record(session_id_or_prefix)
if exact is not None:
return exact.session_id
escaped = (
session_id_or_prefix
.replace("\\", "\\\\")
.replace("%", "\\%")
.replace("_", "\\_")
)
rows = self.store._fetchall(
"""
SELECT id
FROM sessions
WHERE id LIKE ? ESCAPE '\\'
ORDER BY started_at DESC
LIMIT 2
""",
(f"{escaped}%",),
)
if len(rows) == 1:
return rows[0]["id"]
return None
def list_sessions_rich(
self,
*,
limit: int = 20,
offset: int = 0,
include_children: bool = False,
source: str | None = None,
exclude_sources: list[str] | None = None,
exclude_end_reasons: list[str] | None = None,
) -> list[dict[str, Any]]:
"""列出最近活跃的 session 及其摘要元数据。"""
clauses: list[str] = []
params: list[Any] = []
if not include_children:
clauses.append("parent_session_id IS NULL")
if source:
clauses.append("source = ?")
params.append(source)
if exclude_sources:
placeholders = ",".join("?" for _ in exclude_sources)
clauses.append(f"source NOT IN ({placeholders})")
params.extend(exclude_sources)
if exclude_end_reasons:
placeholders = ",".join("?" for _ in exclude_end_reasons)
clauses.append(f"(end_reason IS NULL OR end_reason NOT IN ({placeholders}))")
params.extend(exclude_end_reasons)
where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
params.extend([limit, offset])
rows = self.store._fetchall(
f"""
SELECT *
FROM sessions
{where}
ORDER BY last_active DESC
LIMIT ? OFFSET ?
""",
tuple(params),
)
return rows
def search_messages(
self,
*,
query: str,
role_filter: list[str] | None = None,
exclude_sources: list[str] | None = None,
limit: int = 20,
offset: int = 0,
) -> list[dict[str, Any]]:
"""使用 FTS5 搜索 session transcript。"""
query = self._sanitize_fts5_query(query)
if not query:
return []
clauses = ["messages_fts MATCH ?", "m.context_visible = 1"]
params: list[Any] = [query]
if exclude_sources:
placeholders = ",".join("?" for _ in exclude_sources)
clauses.append(f"s.source NOT IN ({placeholders})")
params.extend(exclude_sources)
if role_filter:
placeholders = ",".join("?" for _ in role_filter)
clauses.append(f"m.role IN ({placeholders})")
params.extend(role_filter)
params.extend([limit, offset])
sql = f"""
SELECT
m.id,
m.session_id,
m.role,
s.source,
s.model,
s.started_at AS session_started,
snippet(messages_fts, 0, '>>>', '<<<', '...', 40) AS snippet
FROM messages_fts
JOIN messages m ON m.id = messages_fts.rowid
JOIN sessions s ON s.id = m.session_id
WHERE {' AND '.join(clauses)}
ORDER BY rank
LIMIT ? OFFSET ?
"""
try:
return self.store._fetchall(sql, tuple(params))
except sqlite3.Error as exc:
raise RuntimeError(f"Session transcript search failed for query={query!r}") from exc