- 将所有环境变量前缀从NANO_改为BEAVER_ - 更新README.md文档内容,包括项目介绍、组件说明和快速开始指南 - 修改.gitignore文件,添加auth-portal运行时路径排除规则 - 更新app-instance镜像标签从nano/app-instance改为beaver/app-instance - 增强技能安全检查器,支持工具前缀白名单功能 - 添加技能草稿重新检查安全性API端点 - 扩展证据选择器,收集工具调用名称用于技能学习 - 改进技能合成器,基于实际调用的工具生成工具提示 - 优化路由超时处理机制,增加重试逻辑 - 更新后端架构文档,添加可视化入口和基础概念说明 - 实现在WebSocket消息中传递工具迭代次数信息
120 lines
4.9 KiB
Python
120 lines
4.9 KiB
Python
"""Evidence selection for skill learning."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
from beaver.engine.session.manager import SessionManager
|
|
from beaver.memory.runs.store import RunMemoryStore
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class EvidencePacket:
|
|
run_ids: list[str]
|
|
session_ids: list[str]
|
|
task_summaries: list[str]
|
|
session_excerpts: list[str]
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
class EvidenceSelector:
|
|
def __init__(self, run_store: RunMemoryStore, session_manager: SessionManager | None = None) -> None:
|
|
self.run_store = run_store
|
|
self.session_manager = session_manager
|
|
|
|
def select_runs_for_revision(self, skill_name: str, version: str, limit: int = 5) -> list[str]:
|
|
runs = self.run_store.list_runs_by_skill(skill_name, version=version, limit=limit)
|
|
return [record.run_id for record in runs]
|
|
|
|
def select_runs_for_new_skill(self, theme: str, limit: int = 5) -> list[str]:
|
|
lowered = theme.lower().strip()
|
|
matches = []
|
|
for record in self.run_store.list_runs():
|
|
if lowered and lowered not in record.task_text.lower():
|
|
continue
|
|
matches.append(record.run_id)
|
|
return matches[-limit:]
|
|
|
|
def build_evidence_packet(self, run_ids: list[str], session_ids: list[str] | None = None) -> EvidencePacket:
|
|
runs_by_id = {record.run_id: record for record in self.run_store.list_runs()}
|
|
resolved_run_ids: list[str] = []
|
|
resolved_session_ids: list[str] = list(dict.fromkeys(session_ids or []))
|
|
task_summaries: list[str] = []
|
|
session_excerpts: list[str] = []
|
|
tool_names: list[str] = []
|
|
selected_tool_names: list[str] = []
|
|
for run_id in run_ids:
|
|
record = runs_by_id.get(run_id)
|
|
if record is None:
|
|
continue
|
|
resolved_run_ids.append(run_id)
|
|
if record.session_id not in resolved_session_ids:
|
|
resolved_session_ids.append(record.session_id)
|
|
summary = record.task_text.strip()
|
|
if summary:
|
|
task_summaries.append(summary[:400])
|
|
if self.session_manager is not None:
|
|
excerpt = self._session_excerpt(record.session_id, run_id)
|
|
if excerpt:
|
|
session_excerpts.append(excerpt)
|
|
run_tool_names, run_selected_tool_names = self._run_tool_names(record.session_id, run_id)
|
|
tool_names.extend(run_tool_names)
|
|
selected_tool_names.extend(run_selected_tool_names)
|
|
return EvidencePacket(
|
|
run_ids=resolved_run_ids,
|
|
session_ids=resolved_session_ids,
|
|
task_summaries=task_summaries[:8],
|
|
session_excerpts=session_excerpts[:6],
|
|
metadata={
|
|
"bounded": True,
|
|
"tool_names": _unique_strings(tool_names),
|
|
"selected_tool_names": _unique_strings(selected_tool_names),
|
|
},
|
|
)
|
|
|
|
def _session_excerpt(self, session_id: str, run_id: str) -> str:
|
|
if self.session_manager is None:
|
|
return ""
|
|
events = self.session_manager.get_run_event_records(session_id, run_id)
|
|
visible: list[str] = []
|
|
for event in events:
|
|
if not event.context_visible or not event.content:
|
|
continue
|
|
visible.append(f"{event.role}: {event.content.strip()}")
|
|
return "\n".join(visible[:12])[:2000]
|
|
|
|
def _run_tool_names(self, session_id: str, run_id: str) -> tuple[list[str], list[str]]:
|
|
if self.session_manager is None:
|
|
return [], []
|
|
|
|
names: list[str] = []
|
|
selected_names: list[str] = []
|
|
for event in self.session_manager.get_run_event_records(session_id, run_id):
|
|
if event.tool_name:
|
|
names.append(event.tool_name)
|
|
if event.tool_calls:
|
|
for call in event.tool_calls:
|
|
if not isinstance(call, dict):
|
|
continue
|
|
name = call.get("name")
|
|
function = call.get("function")
|
|
if not name and isinstance(function, dict):
|
|
name = function.get("name")
|
|
if name:
|
|
names.append(str(name))
|
|
if event.event_type == "tool_selection_snapshotted" and isinstance(event.event_payload, dict):
|
|
selected = event.event_payload.get("tool_names")
|
|
if isinstance(selected, list):
|
|
selected_names.extend(str(item) for item in selected if str(item).strip())
|
|
return _unique_strings(names), _unique_strings(selected_names)
|
|
|
|
|
|
def _unique_strings(values: list[str]) -> list[str]:
|
|
result: list[str] = []
|
|
for value in values:
|
|
cleaned = str(value).strip()
|
|
if cleaned and cleaned not in result:
|
|
result.append(cleaned)
|
|
return result
|