"""Evidence selection for skill learning.""" from __future__ import annotations from dataclasses import dataclass, field from typing import Any from beaver.engine.session.manager import SessionManager from beaver.memory.runs.store import RunMemoryStore @dataclass(slots=True) class EvidencePacket: run_ids: list[str] session_ids: list[str] task_summaries: list[str] session_excerpts: list[str] metadata: dict[str, Any] = field(default_factory=dict) class EvidenceSelector: def __init__(self, run_store: RunMemoryStore, session_manager: SessionManager | None = None) -> None: self.run_store = run_store self.session_manager = session_manager def select_runs_for_revision(self, skill_name: str, version: str, limit: int = 5) -> list[str]: runs = self.run_store.list_runs_by_skill(skill_name, version=version, limit=limit) return [record.run_id for record in runs] def select_runs_for_new_skill(self, theme: str, limit: int = 5) -> list[str]: lowered = theme.lower().strip() matches = [] for record in self.run_store.list_runs(): if lowered and lowered not in record.task_text.lower(): continue matches.append(record.run_id) return matches[-limit:] def build_evidence_packet(self, run_ids: list[str], session_ids: list[str] | None = None) -> EvidencePacket: runs_by_id = {record.run_id: record for record in self.run_store.list_runs()} resolved_run_ids: list[str] = [] resolved_session_ids: list[str] = list(dict.fromkeys(session_ids or [])) task_summaries: list[str] = [] session_excerpts: list[str] = [] tool_names: list[str] = [] selected_tool_names: list[str] = [] for run_id in run_ids: record = runs_by_id.get(run_id) if record is None: continue resolved_run_ids.append(run_id) if record.session_id not in resolved_session_ids: resolved_session_ids.append(record.session_id) summary = record.task_text.strip() if summary: task_summaries.append(summary[:400]) if self.session_manager is not None: excerpt = self._session_excerpt(record.session_id, run_id) if excerpt: session_excerpts.append(excerpt) run_tool_names, run_selected_tool_names = self._run_tool_names(record.session_id, run_id) tool_names.extend(run_tool_names) selected_tool_names.extend(run_selected_tool_names) return EvidencePacket( run_ids=resolved_run_ids, session_ids=resolved_session_ids, task_summaries=task_summaries[:8], session_excerpts=session_excerpts[:6], metadata={ "bounded": True, "tool_names": _unique_strings(tool_names), "selected_tool_names": _unique_strings(selected_tool_names), }, ) def _session_excerpt(self, session_id: str, run_id: str) -> str: if self.session_manager is None: return "" events = self.session_manager.get_run_event_records(session_id, run_id) visible: list[str] = [] for event in events: if not event.context_visible or not event.content: continue visible.append(f"{event.role}: {event.content.strip()}") return "\n".join(visible[:12])[:2000] def _run_tool_names(self, session_id: str, run_id: str) -> tuple[list[str], list[str]]: if self.session_manager is None: return [], [] names: list[str] = [] selected_names: list[str] = [] for event in self.session_manager.get_run_event_records(session_id, run_id): if event.tool_name: names.append(event.tool_name) if event.tool_calls: for call in event.tool_calls: if not isinstance(call, dict): continue name = call.get("name") function = call.get("function") if not name and isinstance(function, dict): name = function.get("name") if name: names.append(str(name)) if event.event_type == "tool_selection_snapshotted" and isinstance(event.event_payload, dict): selected = event.event_payload.get("tool_names") if isinstance(selected, list): selected_names.extend(str(item) for item in selected if str(item).strip()) return _unique_strings(names), _unique_strings(selected_names) def _unique_strings(values: list[str]) -> list[str]: result: list[str] = [] for value in values: cleaned = str(value).strip() if cleaned and cleaned not in result: result.append(cleaned) return result