- 集成MCP连接管理器,支持MCP服务器连接 - 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、 PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、 TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等 - 实现工具注册和装配功能 - 添加技能选择上下文参数 - 支持思考模式控制参数thinking_enabled feat(coordinator): 重构任务执行计划器参数命名 - 将learning_candidate_enabled重命名为allow_candidate_generation - 更新TeamGraphScheduler中的参数传递 - 修改LocalAgentRunner中的相关参数处理 - 更新README文档中的相应描述 refactor(context): 标准化工具调用参数格式 - 添加_json导入用于参数序列化 - 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷 - 修复工具调用中参数非字符串类型的序列化问题 refactor(session): 优化消息历史记录过滤逻辑 - 修改get_messages_as_conversation为基于运行状态过滤消息 - 排除未完成、失败或错误结束的运行记录 - 改进对话历史的可见性控制机制 fix(store): 修复FTS索引重建逻辑 - 添加异常处理防止FTS索引创建失败 - 实现_rebuild_fts_index方法重新构建全文搜索索引 - 优化索引触发器和表的维护流程
157 lines
5.0 KiB
Python
157 lines
5.0 KiB
Python
"""Beaver session 子系统的检索能力。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import sqlite3
|
|
from typing import Any
|
|
|
|
from .store import SessionStore
|
|
|
|
|
|
class SessionSearchService:
|
|
"""围绕 `SessionStore` 提供 browsing / FTS / lineage 辅助能力。"""
|
|
|
|
def __init__(self, store: SessionStore) -> None:
|
|
self.store = store
|
|
|
|
@staticmethod
|
|
def _sanitize_fts5_query(query: str) -> str:
|
|
quoted_parts: list[str] = []
|
|
|
|
def preserve(match: re.Match[str]) -> str:
|
|
quoted_parts.append(match.group(0))
|
|
return f"\x00Q{len(quoted_parts) - 1}\x00"
|
|
|
|
sanitized = re.sub(r'"[^"]*"', preserve, query)
|
|
sanitized = re.sub(r'[+{}()\"^]', " ", sanitized)
|
|
sanitized = re.sub(r"\*+", "*", sanitized)
|
|
sanitized = re.sub(r"(^|\s)\*", r"\1", sanitized)
|
|
sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
|
|
sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())
|
|
sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized)
|
|
|
|
for index, quoted in enumerate(quoted_parts):
|
|
sanitized = sanitized.replace(f"\x00Q{index}\x00", quoted)
|
|
return sanitized.strip()
|
|
|
|
def resolve_session_id(self, session_id_or_prefix: str) -> str | None:
|
|
"""用完整 ID 或唯一前缀解析出目标 session_id。"""
|
|
|
|
exact = self.store.get_session_record(session_id_or_prefix)
|
|
if exact is not None:
|
|
return exact.session_id
|
|
|
|
escaped = (
|
|
session_id_or_prefix
|
|
.replace("\\", "\\\\")
|
|
.replace("%", "\\%")
|
|
.replace("_", "\\_")
|
|
)
|
|
rows = self.store._fetchall(
|
|
"""
|
|
SELECT id
|
|
FROM sessions
|
|
WHERE id LIKE ? ESCAPE '\\'
|
|
ORDER BY started_at DESC
|
|
LIMIT 2
|
|
""",
|
|
(f"{escaped}%",),
|
|
)
|
|
if len(rows) == 1:
|
|
return rows[0]["id"]
|
|
return None
|
|
|
|
def list_sessions_rich(
|
|
self,
|
|
*,
|
|
limit: int = 20,
|
|
offset: int = 0,
|
|
include_children: bool = False,
|
|
source: str | None = None,
|
|
exclude_sources: list[str] | None = None,
|
|
exclude_end_reasons: list[str] | None = None,
|
|
) -> list[dict[str, Any]]:
|
|
"""列出最近活跃的 session 及其摘要元数据。"""
|
|
|
|
clauses: list[str] = []
|
|
params: list[Any] = []
|
|
|
|
if not include_children:
|
|
clauses.append("parent_session_id IS NULL")
|
|
if source:
|
|
clauses.append("source = ?")
|
|
params.append(source)
|
|
if exclude_sources:
|
|
placeholders = ",".join("?" for _ in exclude_sources)
|
|
clauses.append(f"source NOT IN ({placeholders})")
|
|
params.extend(exclude_sources)
|
|
if exclude_end_reasons:
|
|
placeholders = ",".join("?" for _ in exclude_end_reasons)
|
|
clauses.append(f"(end_reason IS NULL OR end_reason NOT IN ({placeholders}))")
|
|
params.extend(exclude_end_reasons)
|
|
|
|
where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
|
|
params.extend([limit, offset])
|
|
rows = self.store._fetchall(
|
|
f"""
|
|
SELECT *
|
|
FROM sessions
|
|
{where}
|
|
ORDER BY last_active DESC
|
|
LIMIT ? OFFSET ?
|
|
""",
|
|
tuple(params),
|
|
)
|
|
return rows
|
|
|
|
def search_messages(
|
|
self,
|
|
*,
|
|
query: str,
|
|
role_filter: list[str] | None = None,
|
|
exclude_sources: list[str] | None = None,
|
|
limit: int = 20,
|
|
offset: int = 0,
|
|
) -> list[dict[str, Any]]:
|
|
"""使用 FTS5 搜索 session transcript。"""
|
|
|
|
query = self._sanitize_fts5_query(query)
|
|
if not query:
|
|
return []
|
|
|
|
clauses = ["messages_fts MATCH ?", "m.context_visible = 1"]
|
|
params: list[Any] = [query]
|
|
|
|
if exclude_sources:
|
|
placeholders = ",".join("?" for _ in exclude_sources)
|
|
clauses.append(f"s.source NOT IN ({placeholders})")
|
|
params.extend(exclude_sources)
|
|
if role_filter:
|
|
placeholders = ",".join("?" for _ in role_filter)
|
|
clauses.append(f"m.role IN ({placeholders})")
|
|
params.extend(role_filter)
|
|
|
|
params.extend([limit, offset])
|
|
sql = f"""
|
|
SELECT
|
|
m.id,
|
|
m.session_id,
|
|
m.role,
|
|
s.source,
|
|
s.model,
|
|
s.started_at AS session_started,
|
|
snippet(messages_fts, 0, '>>>', '<<<', '...', 40) AS snippet
|
|
FROM messages_fts
|
|
JOIN messages m ON m.id = messages_fts.rowid
|
|
JOIN sessions s ON s.id = m.session_id
|
|
WHERE {' AND '.join(clauses)}
|
|
ORDER BY rank
|
|
LIMIT ? OFFSET ?
|
|
"""
|
|
|
|
try:
|
|
return self.store._fetchall(sql, tuple(params))
|
|
except sqlite3.Error as exc:
|
|
raise RuntimeError(f"Session transcript search failed for query={query!r}") from exc
|