feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核
新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证 (通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。 实现Agent Team v1协调器,支持sequence/parallel/dag执行策略, sub-agent复用主AgentLoop,每个run使用独立memory snapshot。 建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期, 通过Task验证通过且用户满意才生成学习候选。 重构目录结构,移除third_party依赖,建立统一engine内核, 所有agent共享运行时基础组件。 更新ContextBuilder清理provider消息字段,增强SkillContext版本管理, 集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
@ -1,2 +1,34 @@
|
||||
"""Multi-agent coordination layer."""
|
||||
|
||||
from .models import (
|
||||
AgentDescriptor,
|
||||
DelegationEnvelope,
|
||||
ExecutionGraph,
|
||||
ExecutionNode,
|
||||
NodeRunResult,
|
||||
TeamRunResult,
|
||||
)
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
if name == "LocalAgentRunner":
|
||||
from .local import LocalAgentRunner
|
||||
|
||||
return LocalAgentRunner
|
||||
if name == "TeamGraphScheduler":
|
||||
from .execution import TeamGraphScheduler
|
||||
|
||||
return TeamGraphScheduler
|
||||
raise AttributeError(name)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AgentDescriptor",
|
||||
"DelegationEnvelope",
|
||||
"ExecutionGraph",
|
||||
"ExecutionNode",
|
||||
"LocalAgentRunner",
|
||||
"NodeRunResult",
|
||||
"TeamGraphScheduler",
|
||||
"TeamRunResult",
|
||||
]
|
||||
|
||||
@ -1,2 +1,5 @@
|
||||
"""Execution control, retry, and aggregation."""
|
||||
|
||||
from .scheduler import TeamGraphScheduler
|
||||
|
||||
__all__ = ["TeamGraphScheduler"]
|
||||
|
||||
256
app-instance/backend/beaver/coordinator/execution/scheduler.py
Normal file
256
app-instance/backend/beaver/coordinator/execution/scheduler.py
Normal file
@ -0,0 +1,256 @@
|
||||
"""Minimal scheduler for Beaver-native team execution graphs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Callable
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from ..local import LocalAgentRunner
|
||||
from ..models import DelegationEnvelope, ExecutionGraph, ExecutionNode, NodeRunResult, TeamRunResult
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver.engine.context import SkillContext
|
||||
|
||||
|
||||
class TeamGraphScheduler:
|
||||
"""Execute sequence, parallel, and DAG team graphs."""
|
||||
|
||||
def __init__(self, runner: LocalAgentRunner) -> None:
|
||||
self.runner = runner
|
||||
|
||||
async def run(
|
||||
self,
|
||||
graph: ExecutionGraph,
|
||||
*,
|
||||
parent_task_id: str | None,
|
||||
parent_session_id: str,
|
||||
parent_run_id: str | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
provider_bundle_factory: Callable[[ExecutionNode], ProviderBundle | None] | None = None,
|
||||
inherited_pinned_skills: list[str] | None = None,
|
||||
inherited_pinned_skill_contexts: list["SkillContext"] | None = None,
|
||||
learning_candidate_enabled: bool = False,
|
||||
) -> TeamRunResult:
|
||||
graph.validate()
|
||||
if provider_bundle is not None and len(graph.nodes) > 1:
|
||||
raise ValueError("provider_bundle can only be used for single-node team graphs; use provider_bundle_factory")
|
||||
inherited = list(inherited_pinned_skills or [])
|
||||
inherited_contexts = list(inherited_pinned_skill_contexts or [])
|
||||
if graph.strategy == "sequence":
|
||||
results = await self._run_sequence(
|
||||
graph.nodes,
|
||||
parent_task_id=parent_task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=parent_run_id,
|
||||
provider_bundle=provider_bundle,
|
||||
provider_bundle_factory=provider_bundle_factory,
|
||||
inherited_pinned_skills=inherited,
|
||||
inherited_pinned_skill_contexts=inherited_contexts,
|
||||
learning_candidate_enabled=learning_candidate_enabled,
|
||||
)
|
||||
elif graph.strategy == "parallel":
|
||||
results = await self._run_parallel(
|
||||
graph.nodes,
|
||||
parent_task_id=parent_task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=parent_run_id,
|
||||
provider_bundle=provider_bundle,
|
||||
provider_bundle_factory=provider_bundle_factory,
|
||||
inherited_pinned_skills=inherited,
|
||||
inherited_pinned_skill_contexts=inherited_contexts,
|
||||
learning_candidate_enabled=learning_candidate_enabled,
|
||||
)
|
||||
else:
|
||||
results = await self._run_dag(
|
||||
graph.nodes,
|
||||
parent_task_id=parent_task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=parent_run_id,
|
||||
provider_bundle=provider_bundle,
|
||||
provider_bundle_factory=provider_bundle_factory,
|
||||
inherited_pinned_skills=inherited,
|
||||
inherited_pinned_skill_contexts=inherited_contexts,
|
||||
learning_candidate_enabled=learning_candidate_enabled,
|
||||
)
|
||||
return self._summarize(results, task_id=parent_task_id)
|
||||
|
||||
async def _run_sequence(
|
||||
self,
|
||||
nodes: list[ExecutionNode],
|
||||
**kwargs,
|
||||
) -> list[NodeRunResult]:
|
||||
results: list[NodeRunResult] = []
|
||||
for node in nodes:
|
||||
if any(not item.success for item in results):
|
||||
results.append(self._blocked(node, results))
|
||||
continue
|
||||
dependency_outputs = {item.node_id: item.output_text for item in results if item.success}
|
||||
results.append(await self._run_node(node, dependency_outputs=dependency_outputs, **kwargs))
|
||||
return results
|
||||
|
||||
async def _run_parallel(
|
||||
self,
|
||||
nodes: list[ExecutionNode],
|
||||
**kwargs,
|
||||
) -> list[NodeRunResult]:
|
||||
return list(await asyncio.gather(*(self._run_node(node, dependency_outputs={}, **kwargs) for node in nodes)))
|
||||
|
||||
async def _run_dag(
|
||||
self,
|
||||
nodes: list[ExecutionNode],
|
||||
**kwargs,
|
||||
) -> list[NodeRunResult]:
|
||||
pending = {node.node_id: node for node in nodes}
|
||||
completed: dict[str, NodeRunResult] = {}
|
||||
ordered: list[NodeRunResult] = []
|
||||
|
||||
while pending:
|
||||
blocked_ids = {
|
||||
node_id
|
||||
for node_id, node in pending.items()
|
||||
if any(dep in completed and not completed[dep].success for dep in node.depends_on)
|
||||
}
|
||||
for node_id in sorted(blocked_ids):
|
||||
node = pending.pop(node_id)
|
||||
result = self._blocked(node, list(completed.values()))
|
||||
completed[node_id] = result
|
||||
ordered.append(result)
|
||||
|
||||
ready = [
|
||||
node
|
||||
for node in pending.values()
|
||||
if all(dep in completed and completed[dep].success for dep in node.depends_on)
|
||||
]
|
||||
if not ready:
|
||||
if pending:
|
||||
unresolved = ", ".join(sorted(pending))
|
||||
raise ValueError(f"ExecutionGraph has cyclic or unresolved dependencies: {unresolved}")
|
||||
break
|
||||
|
||||
batch = await asyncio.gather(
|
||||
*(
|
||||
self._run_node(
|
||||
node,
|
||||
dependency_outputs={
|
||||
dep: completed[dep].output_text
|
||||
for dep in node.depends_on
|
||||
if dep in completed
|
||||
},
|
||||
**kwargs,
|
||||
)
|
||||
for node in ready
|
||||
)
|
||||
)
|
||||
for result in batch:
|
||||
pending.pop(result.node_id, None)
|
||||
completed[result.node_id] = result
|
||||
ordered.append(result)
|
||||
|
||||
return ordered
|
||||
|
||||
async def _run_node(
|
||||
self,
|
||||
node: ExecutionNode,
|
||||
*,
|
||||
parent_task_id: str | None,
|
||||
parent_session_id: str,
|
||||
parent_run_id: str | None,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
provider_bundle_factory: Callable[[ExecutionNode], ProviderBundle | None] | None,
|
||||
inherited_pinned_skills: list[str],
|
||||
inherited_pinned_skill_contexts: list["SkillContext"],
|
||||
learning_candidate_enabled: bool,
|
||||
dependency_outputs: dict[str, str],
|
||||
) -> NodeRunResult:
|
||||
try:
|
||||
pinned = self._merge_pinned(inherited_pinned_skills, node.inherited_pinned_skills)
|
||||
pinned_contexts = self._merge_skill_contexts(
|
||||
inherited_pinned_skill_contexts,
|
||||
node.inherited_pinned_skill_contexts,
|
||||
)
|
||||
envelope = DelegationEnvelope(
|
||||
parent_task_id=parent_task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=parent_run_id,
|
||||
agent=node.agent,
|
||||
task=node.task,
|
||||
inherited_pinned_skills=pinned,
|
||||
inherited_pinned_skill_contexts=pinned_contexts,
|
||||
constraints=list(node.constraints),
|
||||
expected_output=node.expected_output,
|
||||
node_id=node.node_id,
|
||||
dependency_outputs=dict(dependency_outputs),
|
||||
)
|
||||
node_provider_bundle = provider_bundle_factory(node) if provider_bundle_factory is not None else provider_bundle
|
||||
return await self.runner.run(
|
||||
envelope,
|
||||
provider_bundle=node_provider_bundle,
|
||||
learning_candidate_enabled=learning_candidate_enabled,
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
return NodeRunResult(
|
||||
node_id=node.node_id,
|
||||
success=False,
|
||||
output_text="",
|
||||
finish_reason="error",
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _merge_pinned(parent: list[str], local: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
for name in [*parent, *local]:
|
||||
if name and name not in result:
|
||||
result.append(name)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _merge_skill_contexts(parent: list["SkillContext"], local: list["SkillContext"]) -> list["SkillContext"]:
|
||||
result: list["SkillContext"] = []
|
||||
seen: set[str] = set()
|
||||
for skill in [*parent, *local]:
|
||||
name = getattr(skill, "name", "")
|
||||
if not name or name in seen:
|
||||
continue
|
||||
seen.add(name)
|
||||
result.append(skill)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _blocked(node: ExecutionNode, prior_results: list[NodeRunResult]) -> NodeRunResult:
|
||||
failed = [item.node_id for item in prior_results if not item.success]
|
||||
detail = ", ".join(failed) or "unknown dependency"
|
||||
return NodeRunResult(
|
||||
node_id=node.node_id,
|
||||
success=False,
|
||||
output_text="",
|
||||
finish_reason="blocked",
|
||||
error=f"Blocked by failed dependency: {detail}",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _summarize(results: list[NodeRunResult], *, task_id: str | None) -> TeamRunResult:
|
||||
success = all(item.success for item in results)
|
||||
successful_outputs = [item.output_text.strip() for item in results if item.success and item.output_text.strip()]
|
||||
summary_parts = list(successful_outputs)
|
||||
failed = [item for item in results if not item.success]
|
||||
if failed:
|
||||
failure_lines = [
|
||||
f"- {item.node_id}: {item.error or item.finish_reason}"
|
||||
for item in failed
|
||||
]
|
||||
summary_parts.append("Failed nodes:\n" + "\n".join(failure_lines))
|
||||
summary = "\n\n".join(summary_parts)
|
||||
return TeamRunResult(
|
||||
success=success,
|
||||
summary=summary,
|
||||
node_results=results,
|
||||
run_ids=[item.run_id for item in results if item.run_id],
|
||||
session_ids=[item.session_id for item in results if item.session_id],
|
||||
task_id=task_id,
|
||||
)
|
||||
92
app-instance/backend/beaver/coordinator/local.py
Normal file
92
app-instance/backend/beaver/coordinator/local.py
Normal file
@ -0,0 +1,92 @@
|
||||
"""Local delegated-agent runner built on the shared AgentLoop."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.engine import AgentLoop
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import DelegationEnvelope, NodeRunResult
|
||||
|
||||
|
||||
class LocalAgentRunner:
|
||||
"""Run delegated agents through the same AgentLoop implementation."""
|
||||
|
||||
def __init__(self, loop: AgentLoop) -> None:
|
||||
self.loop = loop
|
||||
|
||||
async def run(
|
||||
self,
|
||||
envelope: DelegationEnvelope,
|
||||
*,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
learning_candidate_enabled: bool = False,
|
||||
) -> NodeRunResult:
|
||||
if provider_bundle is not None and (envelope.agent.model or envelope.agent.provider_name):
|
||||
raise ValueError(
|
||||
"provider_bundle cannot be combined with AgentDescriptor.model/provider_name; "
|
||||
"build a node-specific provider bundle instead."
|
||||
)
|
||||
child_session_id = self._child_session_id(envelope)
|
||||
runner = self.loop.submit_direct if self.loop.is_running else self.loop.process_direct
|
||||
result = await runner(
|
||||
envelope.task,
|
||||
session_id=child_session_id,
|
||||
parent_session_id=envelope.parent_session_id,
|
||||
source=f"team:{envelope.agent.name}",
|
||||
title=envelope.agent.role or envelope.agent.name,
|
||||
execution_context=self._execution_context(envelope),
|
||||
model=envelope.agent.model,
|
||||
provider_name=envelope.agent.provider_name,
|
||||
provider_bundle=provider_bundle,
|
||||
task_id=envelope.parent_task_id,
|
||||
task_mode=bool(envelope.parent_task_id),
|
||||
pinned_skill_names=envelope.inherited_pinned_skills,
|
||||
pinned_skill_contexts=envelope.inherited_pinned_skill_contexts,
|
||||
learning_candidate_enabled=learning_candidate_enabled,
|
||||
)
|
||||
success = result.finish_reason == "stop"
|
||||
return NodeRunResult(
|
||||
node_id=envelope.node_id or envelope.agent.name,
|
||||
success=success,
|
||||
output_text=result.output_text,
|
||||
run_id=result.run_id,
|
||||
session_id=result.session_id,
|
||||
finish_reason=result.finish_reason,
|
||||
error=None if success else (result.output_text or result.finish_reason),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _child_session_id(envelope: DelegationEnvelope) -> str:
|
||||
node = envelope.node_id or envelope.agent.name or "node"
|
||||
return f"{envelope.parent_session_id}:team:{node}:{uuid4().hex[:8]}"
|
||||
|
||||
@staticmethod
|
||||
def _execution_context(envelope: DelegationEnvelope) -> str:
|
||||
sections: list[str] = []
|
||||
if envelope.parent_task_id:
|
||||
sections.append(f"Parent task ID: {envelope.parent_task_id}")
|
||||
if envelope.parent_run_id:
|
||||
sections.append(f"Parent run ID: {envelope.parent_run_id}")
|
||||
sections.append("Delegated worker: generic task sub-agent. Follow active pinned skills as the primary guidance.")
|
||||
if envelope.agent.system_prompt:
|
||||
sections.append(f"Additional delegated instructions:\n{envelope.agent.system_prompt}")
|
||||
if envelope.constraints:
|
||||
sections.append("Constraints:\n" + "\n".join(f"- {item}" for item in envelope.constraints))
|
||||
if envelope.expected_output:
|
||||
sections.append(f"Expected output:\n{envelope.expected_output}")
|
||||
if envelope.dependency_outputs:
|
||||
rendered = "\n\n".join(
|
||||
f"Dependency {node_id} output:\n{output}"
|
||||
for node_id, output in envelope.dependency_outputs.items()
|
||||
)
|
||||
sections.append("Dependency outputs:\n" + rendered)
|
||||
if envelope.inherited_pinned_skills:
|
||||
sections.append("Pinned inherited skills:\n" + "\n".join(f"- {item}" for item in envelope.inherited_pinned_skills))
|
||||
if envelope.inherited_pinned_skill_contexts:
|
||||
sections.append(
|
||||
"Ephemeral pinned skill drafts:\n"
|
||||
+ "\n".join(f"- {item.name} ({item.version})" for item in envelope.inherited_pinned_skill_contexts)
|
||||
)
|
||||
return "\n\n".join(sections)
|
||||
151
app-instance/backend/beaver/coordinator/models.py
Normal file
151
app-instance/backend/beaver/coordinator/models.py
Normal file
@ -0,0 +1,151 @@
|
||||
"""Core models for Beaver team coordination."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver.engine.context import SkillContext
|
||||
|
||||
|
||||
TeamStrategy = Literal[
|
||||
"sequence",
|
||||
"parallel",
|
||||
"dag",
|
||||
"moa",
|
||||
"hierarchy",
|
||||
"heavy",
|
||||
"group_chat",
|
||||
"forest",
|
||||
"maker",
|
||||
"router",
|
||||
]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AgentDescriptor:
|
||||
"""Runtime identity for a delegated local agent."""
|
||||
|
||||
name: str
|
||||
role: str = ""
|
||||
system_prompt: str = ""
|
||||
model: str | None = None
|
||||
provider_name: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class DelegationEnvelope:
|
||||
"""All context passed from a parent agent run to one delegated run."""
|
||||
|
||||
parent_task_id: str | None
|
||||
parent_session_id: str
|
||||
parent_run_id: str | None
|
||||
agent: AgentDescriptor
|
||||
task: str
|
||||
inherited_pinned_skills: list[str] = field(default_factory=list)
|
||||
inherited_pinned_skill_contexts: list["SkillContext"] = field(default_factory=list)
|
||||
constraints: list[str] = field(default_factory=list)
|
||||
expected_output: str | None = None
|
||||
node_id: str | None = None
|
||||
dependency_outputs: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ExecutionNode:
|
||||
"""One node in a team execution graph."""
|
||||
|
||||
node_id: str
|
||||
task: str
|
||||
agent: AgentDescriptor
|
||||
depends_on: list[str] = field(default_factory=list)
|
||||
inherited_pinned_skills: list[str] = field(default_factory=list)
|
||||
inherited_pinned_skill_contexts: list["SkillContext"] = field(default_factory=list)
|
||||
constraints: list[str] = field(default_factory=list)
|
||||
expected_output: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ExecutionGraph:
|
||||
"""A lightweight team graph built from Beaver-native execution nodes."""
|
||||
|
||||
strategy: TeamStrategy
|
||||
nodes: list[ExecutionNode]
|
||||
|
||||
def validate(self) -> None:
|
||||
if self.strategy not in {"sequence", "parallel", "dag"}:
|
||||
raise NotImplementedError(f"Team strategy {self.strategy!r} is reserved but not implemented in v1")
|
||||
if not self.nodes:
|
||||
raise ValueError("ExecutionGraph requires at least one node")
|
||||
node_ids = [node.node_id for node in self.nodes]
|
||||
if len(node_ids) != len(set(node_ids)):
|
||||
raise ValueError("ExecutionGraph node_id values must be unique")
|
||||
known = set(node_ids)
|
||||
for node in self.nodes:
|
||||
missing = [item for item in node.depends_on if item not in known]
|
||||
if missing:
|
||||
raise ValueError(f"ExecutionNode {node.node_id!r} depends on unknown node(s): {missing}")
|
||||
visiting: set[str] = set()
|
||||
visited: set[str] = set()
|
||||
deps = {node.node_id: list(node.depends_on) for node in self.nodes}
|
||||
|
||||
def visit(node_id: str) -> None:
|
||||
if node_id in visited:
|
||||
return
|
||||
if node_id in visiting:
|
||||
raise ValueError(f"ExecutionGraph has cyclic or unresolved dependencies involving {node_id!r}")
|
||||
visiting.add(node_id)
|
||||
for dep in deps[node_id]:
|
||||
visit(dep)
|
||||
visiting.remove(node_id)
|
||||
visited.add(node_id)
|
||||
|
||||
for node_id in node_ids:
|
||||
visit(node_id)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class NodeRunResult:
|
||||
"""Normalized result for one team node."""
|
||||
|
||||
node_id: str
|
||||
success: bool
|
||||
output_text: str
|
||||
run_id: str | None = None
|
||||
session_id: str | None = None
|
||||
finish_reason: str = "stop"
|
||||
error: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"node_id": self.node_id,
|
||||
"success": self.success,
|
||||
"output_text": self.output_text,
|
||||
"run_id": self.run_id,
|
||||
"session_id": self.session_id,
|
||||
"finish_reason": self.finish_reason,
|
||||
"error": self.error,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TeamRunResult:
|
||||
"""Normalized result returned by a Beaver team run."""
|
||||
|
||||
success: bool
|
||||
summary: str
|
||||
node_results: list[NodeRunResult] = field(default_factory=list)
|
||||
run_ids: list[str] = field(default_factory=list)
|
||||
session_ids: list[str] = field(default_factory=list)
|
||||
task_id: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"success": self.success,
|
||||
"summary": self.summary,
|
||||
"node_results": [item.to_dict() for item in self.node_results],
|
||||
"run_ids": list(self.run_ids),
|
||||
"session_ids": list(self.session_ids),
|
||||
"task_id": self.task_id,
|
||||
}
|
||||
@ -1,2 +1,14 @@
|
||||
"""Agent registry and descriptors."""
|
||||
"""Workspace specialist agent registry."""
|
||||
|
||||
from .models import AgentMatch, RegisteredAgent, TargetResolutionReport
|
||||
from .resolver import TargetResolver
|
||||
from .store import AgentRegistry
|
||||
|
||||
__all__ = [
|
||||
"AgentMatch",
|
||||
"AgentRegistry",
|
||||
"RegisteredAgent",
|
||||
"TargetResolutionReport",
|
||||
"TargetResolver",
|
||||
]
|
||||
|
||||
184
app-instance/backend/beaver/coordinator/registry/models.py
Normal file
184
app-instance/backend/beaver/coordinator/registry/models.py
Normal file
@ -0,0 +1,184 @@
|
||||
"""Workspace agent registry models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Literal
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor
|
||||
|
||||
|
||||
AgentRegistryStatus = Literal["active", "disabled"]
|
||||
AgentRegistrySource = Literal["builtin", "workspace", "learned"]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RegisteredAgent:
|
||||
agent_id: str
|
||||
name: str
|
||||
display_name: str
|
||||
role: str
|
||||
description: str
|
||||
system_prompt: str
|
||||
capabilities: list[str] = field(default_factory=list)
|
||||
skill_names: list[str] = field(default_factory=list)
|
||||
tool_hints: list[str] = field(default_factory=list)
|
||||
model: str | None = None
|
||||
provider_name: str | None = None
|
||||
tags: list[str] = field(default_factory=list)
|
||||
priority: int = 0
|
||||
status: AgentRegistryStatus = "active"
|
||||
source: AgentRegistrySource = "workspace"
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
created_at: str = field(default_factory=lambda: _utc_now())
|
||||
updated_at: str = field(default_factory=lambda: _utc_now())
|
||||
|
||||
def to_descriptor(self) -> AgentDescriptor:
|
||||
return AgentDescriptor(
|
||||
name=self.name,
|
||||
role=self.role,
|
||||
system_prompt=self.system_prompt,
|
||||
model=self.model,
|
||||
provider_name=self.provider_name,
|
||||
metadata={
|
||||
**self.metadata,
|
||||
"agent_id": self.agent_id,
|
||||
"display_name": self.display_name,
|
||||
"description": self.description,
|
||||
"capabilities": list(self.capabilities),
|
||||
"skill_names": list(self.skill_names),
|
||||
"tool_hints": list(self.tool_hints),
|
||||
"tags": list(self.tags),
|
||||
"source": self.source,
|
||||
"resolution": "registered",
|
||||
},
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"agent_id": self.agent_id,
|
||||
"name": self.name,
|
||||
"display_name": self.display_name,
|
||||
"role": self.role,
|
||||
"description": self.description,
|
||||
"system_prompt": self.system_prompt,
|
||||
"capabilities": list(self.capabilities),
|
||||
"skill_names": list(self.skill_names),
|
||||
"tool_hints": list(self.tool_hints),
|
||||
"model": self.model,
|
||||
"provider_name": self.provider_name,
|
||||
"tags": list(self.tags),
|
||||
"priority": self.priority,
|
||||
"status": self.status,
|
||||
"source": self.source,
|
||||
"metadata": dict(self.metadata),
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "RegisteredAgent":
|
||||
now = _utc_now()
|
||||
agent_id = str(payload.get("agent_id") or payload.get("id") or payload.get("name") or "").strip()
|
||||
if not agent_id:
|
||||
raise ValueError("RegisteredAgent requires agent_id")
|
||||
name = str(payload.get("name") or agent_id).strip()
|
||||
return cls(
|
||||
agent_id=agent_id,
|
||||
name=name,
|
||||
display_name=str(payload.get("display_name") or payload.get("displayName") or name).strip(),
|
||||
role=str(payload.get("role") or "").strip(),
|
||||
description=str(payload.get("description") or "").strip(),
|
||||
system_prompt=str(payload.get("system_prompt") or payload.get("systemPrompt") or "").strip(),
|
||||
capabilities=_string_list(payload.get("capabilities")),
|
||||
skill_names=_string_list(payload.get("skill_names") or payload.get("skillNames")),
|
||||
tool_hints=_string_list(payload.get("tool_hints") or payload.get("toolHints")),
|
||||
model=_optional_str(payload.get("model")),
|
||||
provider_name=_optional_str(payload.get("provider_name") or payload.get("providerName")),
|
||||
tags=_string_list(payload.get("tags")),
|
||||
priority=int(payload.get("priority", 0) or 0),
|
||||
status="disabled" if str(payload.get("status") or "active") == "disabled" else "active",
|
||||
source=_source(payload.get("source")),
|
||||
metadata=dict(payload.get("metadata") or {}),
|
||||
created_at=str(payload.get("created_at") or payload.get("createdAt") or now),
|
||||
updated_at=str(payload.get("updated_at") or payload.get("updatedAt") or now),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AgentMatch:
|
||||
agent_id: str
|
||||
score: float
|
||||
reasons: list[str]
|
||||
matched_capabilities: list[str]
|
||||
resolved_descriptor: AgentDescriptor
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"agent_id": self.agent_id,
|
||||
"score": self.score,
|
||||
"reasons": list(self.reasons),
|
||||
"matched_capabilities": list(self.matched_capabilities),
|
||||
"resolved_descriptor": {
|
||||
"name": self.resolved_descriptor.name,
|
||||
"role": self.resolved_descriptor.role,
|
||||
"model": self.resolved_descriptor.model,
|
||||
"provider_name": self.resolved_descriptor.provider_name,
|
||||
"metadata": dict(self.resolved_descriptor.metadata),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TargetResolutionReport:
|
||||
node_id: str
|
||||
requested_role: str
|
||||
requested_capabilities: list[str]
|
||||
selected_agent_id: str | None
|
||||
fallback_used: bool
|
||||
score: float
|
||||
reason: str
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"node_id": self.node_id,
|
||||
"requested_role": self.requested_role,
|
||||
"requested_capabilities": list(self.requested_capabilities),
|
||||
"selected_agent_id": self.selected_agent_id,
|
||||
"fallback_used": self.fallback_used,
|
||||
"score": self.score,
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
text = str(value).strip()
|
||||
return text or None
|
||||
|
||||
|
||||
def _string_list(value: Any) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
if isinstance(value, str):
|
||||
value = [item.strip() for item in value.split(",")]
|
||||
else:
|
||||
return []
|
||||
result: list[str] = []
|
||||
for item in value:
|
||||
text = str(item).strip()
|
||||
if text and text not in result:
|
||||
result.append(text)
|
||||
return result
|
||||
|
||||
|
||||
def _source(value: Any) -> AgentRegistrySource:
|
||||
text = str(value or "workspace").strip()
|
||||
if text in {"builtin", "workspace", "learned"}:
|
||||
return text # type: ignore[return-value]
|
||||
return "workspace"
|
||||
208
app-instance/backend/beaver/coordinator/registry/resolver.py
Normal file
208
app-instance/backend/beaver/coordinator/registry/resolver.py
Normal file
@ -0,0 +1,208 @@
|
||||
"""Resolve planner node requirements to registered specialist agents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import replace
|
||||
from typing import Any, TYPE_CHECKING
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
|
||||
from .models import AgentMatch, RegisteredAgent, TargetResolutionReport
|
||||
from .store import AgentRegistry
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver.tasks.models import TaskRecord
|
||||
|
||||
|
||||
class TargetResolver:
|
||||
def __init__(self, registry: AgentRegistry) -> None:
|
||||
self.registry = registry
|
||||
|
||||
def resolve_graph(
|
||||
self,
|
||||
graph: ExecutionGraph,
|
||||
*,
|
||||
task: "TaskRecord",
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
) -> tuple[ExecutionGraph, list[TargetResolutionReport]]:
|
||||
reports: list[TargetResolutionReport] = []
|
||||
resolved_nodes: list[ExecutionNode] = []
|
||||
for node in graph.nodes:
|
||||
descriptor, report = self.resolve_node(
|
||||
node,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
)
|
||||
resolved_nodes.append(replace(node, agent=descriptor))
|
||||
reports.append(report)
|
||||
return ExecutionGraph(strategy=graph.strategy, nodes=resolved_nodes), reports
|
||||
|
||||
def resolve_node(
|
||||
self,
|
||||
node: ExecutionNode,
|
||||
*,
|
||||
task: "TaskRecord",
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
) -> tuple[AgentDescriptor, TargetResolutionReport]:
|
||||
requested_role = (node.agent.role or node.agent.name or node.node_id).strip()
|
||||
requested_capabilities = [
|
||||
str(item).strip()
|
||||
for item in node.agent.metadata.get("requested_capabilities", [])
|
||||
if str(item).strip()
|
||||
]
|
||||
requested_tags = [
|
||||
str(item).strip()
|
||||
for item in node.agent.metadata.get("requested_tags", [])
|
||||
if str(item).strip()
|
||||
]
|
||||
pinned_skills = list(node.inherited_pinned_skills)
|
||||
match = self.best_match(
|
||||
requested_role=requested_role,
|
||||
requested_capabilities=requested_capabilities,
|
||||
requested_tags=requested_tags,
|
||||
pinned_skills=pinned_skills,
|
||||
task_text=" ".join([task.goal, task.description, user_message, node.task]),
|
||||
)
|
||||
if match is not None and match.score > 0:
|
||||
descriptor = match.resolved_descriptor
|
||||
descriptor.metadata.update(
|
||||
{
|
||||
"node_id": node.node_id,
|
||||
"attempt_index": attempt_index,
|
||||
"requested_role": requested_role,
|
||||
"requested_capabilities": requested_capabilities,
|
||||
}
|
||||
)
|
||||
return descriptor, TargetResolutionReport(
|
||||
node_id=node.node_id,
|
||||
requested_role=requested_role,
|
||||
requested_capabilities=requested_capabilities,
|
||||
selected_agent_id=match.agent_id,
|
||||
fallback_used=False,
|
||||
score=match.score,
|
||||
reason="; ".join(match.reasons),
|
||||
)
|
||||
fallback = AgentDescriptor(
|
||||
name=node.agent.name or node.node_id,
|
||||
role=node.agent.role,
|
||||
system_prompt=node.agent.system_prompt,
|
||||
model=node.agent.model,
|
||||
provider_name=node.agent.provider_name,
|
||||
metadata={
|
||||
**node.agent.metadata,
|
||||
"node_id": node.node_id,
|
||||
"attempt_index": attempt_index,
|
||||
"requested_role": requested_role,
|
||||
"requested_capabilities": requested_capabilities,
|
||||
"resolution": "fallback_ephemeral",
|
||||
},
|
||||
)
|
||||
return fallback, TargetResolutionReport(
|
||||
node_id=node.node_id,
|
||||
requested_role=requested_role,
|
||||
requested_capabilities=requested_capabilities,
|
||||
selected_agent_id=None,
|
||||
fallback_used=True,
|
||||
score=0.0,
|
||||
reason="no active registered specialist matched planner requirements",
|
||||
)
|
||||
|
||||
def best_match(
|
||||
self,
|
||||
*,
|
||||
requested_role: str,
|
||||
requested_capabilities: list[str],
|
||||
requested_tags: list[str],
|
||||
pinned_skills: list[str],
|
||||
task_text: str,
|
||||
) -> AgentMatch | None:
|
||||
matches = [
|
||||
self._score_agent(
|
||||
agent,
|
||||
requested_role=requested_role,
|
||||
requested_capabilities=requested_capabilities,
|
||||
requested_tags=requested_tags,
|
||||
pinned_skills=pinned_skills,
|
||||
task_text=task_text,
|
||||
)
|
||||
for agent in self.registry.list_active_agents()
|
||||
]
|
||||
matches = [match for match in matches if match.score > 0]
|
||||
if not matches:
|
||||
return None
|
||||
matches.sort(key=lambda item: (item.score, item.resolved_descriptor.metadata.get("priority", 0)), reverse=True)
|
||||
return matches[0]
|
||||
|
||||
def _score_agent(
|
||||
self,
|
||||
agent: RegisteredAgent,
|
||||
*,
|
||||
requested_role: str,
|
||||
requested_capabilities: list[str],
|
||||
requested_tags: list[str],
|
||||
pinned_skills: list[str],
|
||||
task_text: str,
|
||||
) -> AgentMatch:
|
||||
score = 0.0
|
||||
reasons: list[str] = []
|
||||
requested_role_terms = _terms(requested_role)
|
||||
capability_terms = _terms(" ".join(requested_capabilities))
|
||||
tag_terms = _terms(" ".join(requested_tags))
|
||||
skill_terms = _terms(" ".join(pinned_skills))
|
||||
task_terms = _terms(task_text)
|
||||
agent_role_terms = _terms(agent.role + " " + agent.name + " " + agent.display_name)
|
||||
agent_capability_terms = _terms(" ".join(agent.capabilities))
|
||||
agent_tag_terms = _terms(" ".join(agent.tags))
|
||||
agent_skill_terms = _terms(" ".join(agent.skill_names))
|
||||
agent_all_terms = (
|
||||
agent_role_terms
|
||||
| agent_capability_terms
|
||||
| agent_tag_terms
|
||||
| agent_skill_terms
|
||||
| _terms(agent.description)
|
||||
)
|
||||
|
||||
role_hits = requested_role_terms & agent_role_terms
|
||||
if role_hits:
|
||||
score += 60 + 5 * len(role_hits)
|
||||
reasons.append(f"role matched: {', '.join(sorted(role_hits))}")
|
||||
|
||||
capability_hits = capability_terms & agent_capability_terms
|
||||
if capability_hits:
|
||||
score += 30 + 5 * len(capability_hits)
|
||||
reasons.append(f"capabilities matched: {', '.join(sorted(capability_hits))}")
|
||||
|
||||
tag_hits = tag_terms & agent_tag_terms
|
||||
if tag_hits:
|
||||
score += 10 + 3 * len(tag_hits)
|
||||
reasons.append(f"tags matched: {', '.join(sorted(tag_hits))}")
|
||||
|
||||
skill_hits = skill_terms & agent_skill_terms
|
||||
if skill_hits:
|
||||
score += 25 + 5 * len(skill_hits)
|
||||
reasons.append(f"skills matched: {', '.join(sorted(skill_hits))}")
|
||||
|
||||
task_hits = task_terms & agent_all_terms
|
||||
if task_hits:
|
||||
score += min(20, len(task_hits) * 2)
|
||||
reasons.append("task text matched registry profile")
|
||||
|
||||
score += agent.priority / 100.0
|
||||
descriptor = agent.to_descriptor()
|
||||
descriptor.metadata["priority"] = agent.priority
|
||||
return AgentMatch(
|
||||
agent_id=agent.agent_id,
|
||||
score=round(score, 3),
|
||||
reasons=reasons or ["priority fallback"],
|
||||
matched_capabilities=sorted(capability_hits),
|
||||
resolved_descriptor=descriptor,
|
||||
)
|
||||
|
||||
|
||||
def _terms(value: Any) -> set[str]:
|
||||
text = str(value or "")
|
||||
normalized = "".join(ch.lower() if ch.isalnum() else " " for ch in text)
|
||||
return {part for part in normalized.split() if part}
|
||||
185
app-instance/backend/beaver/coordinator/registry/store.py
Normal file
185
app-instance/backend/beaver/coordinator/registry/store.py
Normal file
@ -0,0 +1,185 @@
|
||||
"""File-backed workspace agent registry."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .models import RegisteredAgent
|
||||
|
||||
|
||||
class AgentRegistry:
|
||||
def __init__(self, workspace: str | Path) -> None:
|
||||
self.workspace = Path(workspace)
|
||||
self.path = self.workspace / "agents" / "registry.json"
|
||||
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not self.path.exists():
|
||||
self._write_agents(_builtin_agents())
|
||||
|
||||
def list_agents(self, *, include_disabled: bool = True) -> list[RegisteredAgent]:
|
||||
agents = self._read_agents()
|
||||
if include_disabled:
|
||||
return agents
|
||||
return [agent for agent in agents if agent.status == "active"]
|
||||
|
||||
def list_active_agents(self) -> list[RegisteredAgent]:
|
||||
return self.list_agents(include_disabled=False)
|
||||
|
||||
def get_agent(self, agent_id: str) -> RegisteredAgent | None:
|
||||
needle = agent_id.strip()
|
||||
for agent in self.list_agents():
|
||||
if agent.agent_id == needle:
|
||||
return agent
|
||||
return None
|
||||
|
||||
def upsert_agent(self, payload: dict[str, Any] | RegisteredAgent) -> RegisteredAgent:
|
||||
agent = payload if isinstance(payload, RegisteredAgent) else RegisteredAgent.from_dict(payload)
|
||||
agents = self.list_agents()
|
||||
for index, existing in enumerate(agents):
|
||||
if existing.agent_id == agent.agent_id:
|
||||
if existing.source == "builtin" and agent.source == "workspace":
|
||||
agent.source = "builtin"
|
||||
agent.created_at = existing.created_at
|
||||
agents[index] = agent
|
||||
self._write_agents(agents)
|
||||
return agent
|
||||
agents.append(agent)
|
||||
self._write_agents(agents)
|
||||
return agent
|
||||
|
||||
def disable_agent(self, agent_id: str) -> RegisteredAgent:
|
||||
agents = self.list_agents()
|
||||
for index, agent in enumerate(agents):
|
||||
if agent.agent_id != agent_id:
|
||||
continue
|
||||
agent.status = "disabled"
|
||||
agents[index] = agent
|
||||
self._write_agents(agents)
|
||||
return agent
|
||||
raise ValueError(f"Unknown agent_id: {agent_id}")
|
||||
|
||||
def search(
|
||||
self,
|
||||
*,
|
||||
role: str = "",
|
||||
capabilities: list[str] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
skills: list[str] | None = None,
|
||||
) -> list[RegisteredAgent]:
|
||||
role_terms = _terms(role)
|
||||
capability_terms = set(_terms(" ".join(capabilities or [])))
|
||||
tag_terms = set(_terms(" ".join(tags or [])))
|
||||
skill_terms = set(_terms(" ".join(skills or [])))
|
||||
matches: list[RegisteredAgent] = []
|
||||
for agent in self.list_active_agents():
|
||||
haystack = set(
|
||||
_terms(
|
||||
" ".join(
|
||||
[
|
||||
agent.agent_id,
|
||||
agent.name,
|
||||
agent.display_name,
|
||||
agent.role,
|
||||
agent.description,
|
||||
" ".join(agent.capabilities),
|
||||
" ".join(agent.tags),
|
||||
" ".join(agent.skill_names),
|
||||
]
|
||||
)
|
||||
)
|
||||
)
|
||||
if role_terms and not role_terms.intersection(haystack):
|
||||
continue
|
||||
if capability_terms and not capability_terms.intersection(haystack):
|
||||
continue
|
||||
if tag_terms and not tag_terms.intersection(haystack):
|
||||
continue
|
||||
if skill_terms and not skill_terms.intersection(haystack):
|
||||
continue
|
||||
matches.append(agent)
|
||||
return matches
|
||||
|
||||
def _read_agents(self) -> list[RegisteredAgent]:
|
||||
if not self.path.exists():
|
||||
return []
|
||||
payload = json.loads(self.path.read_text(encoding="utf-8"))
|
||||
raw_agents = payload.get("agents") if isinstance(payload, dict) else payload
|
||||
if not isinstance(raw_agents, list):
|
||||
return []
|
||||
return [RegisteredAgent.from_dict(item) for item in raw_agents if isinstance(item, dict)]
|
||||
|
||||
def _write_agents(self, agents: list[RegisteredAgent]) -> None:
|
||||
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
payload = {"version": 1, "agents": [agent.to_dict() for agent in agents]}
|
||||
self.path.write_text(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def _terms(text: str) -> set[str]:
|
||||
normalized = "".join(ch.lower() if ch.isalnum() else " " for ch in text)
|
||||
return {part for part in normalized.split() if part}
|
||||
|
||||
|
||||
def _builtin_agents() -> list[RegisteredAgent]:
|
||||
return [
|
||||
RegisteredAgent(
|
||||
agent_id="researcher",
|
||||
name="researcher",
|
||||
display_name="Researcher",
|
||||
role="research",
|
||||
description="Finds facts, references, constraints, and implementation options.",
|
||||
system_prompt="You are a research specialist. Gather concise evidence and tradeoffs for the parent task.",
|
||||
capabilities=["research", "analysis", "source review", "requirements"],
|
||||
tags=["planning", "research"],
|
||||
priority=50,
|
||||
source="builtin",
|
||||
),
|
||||
RegisteredAgent(
|
||||
agent_id="implementer",
|
||||
name="implementer",
|
||||
display_name="Implementer",
|
||||
role="implementation",
|
||||
description="Builds scoped implementation slices and proposes concrete changes.",
|
||||
system_prompt="You are an implementation specialist. Produce practical, scoped implementation output.",
|
||||
capabilities=["implementation", "coding", "refactor", "integration"],
|
||||
tags=["coding", "build"],
|
||||
priority=45,
|
||||
source="builtin",
|
||||
),
|
||||
RegisteredAgent(
|
||||
agent_id="reviewer",
|
||||
name="reviewer",
|
||||
display_name="Reviewer",
|
||||
role="review",
|
||||
description="Reviews plans, code, outputs, and risks before final synthesis.",
|
||||
system_prompt="You are a review specialist. Focus on defects, missing requirements, and risks.",
|
||||
capabilities=["review", "quality", "risk", "verification"],
|
||||
tags=["review", "quality"],
|
||||
priority=45,
|
||||
source="builtin",
|
||||
),
|
||||
RegisteredAgent(
|
||||
agent_id="tester",
|
||||
name="tester",
|
||||
display_name="Tester",
|
||||
role="testing",
|
||||
description="Designs and executes verification checks for task outputs.",
|
||||
system_prompt="You are a testing specialist. Identify focused checks and report pass/fail evidence.",
|
||||
capabilities=["testing", "verification", "regression", "qa"],
|
||||
tags=["test", "quality"],
|
||||
priority=40,
|
||||
source="builtin",
|
||||
),
|
||||
RegisteredAgent(
|
||||
agent_id="documenter",
|
||||
name="documenter",
|
||||
display_name="Documenter",
|
||||
role="documentation",
|
||||
description="Writes and reconciles user-facing and internal documentation updates.",
|
||||
system_prompt="You are a documentation specialist. Produce concise docs aligned with the implementation.",
|
||||
capabilities=["documentation", "explanation", "migration notes", "release notes"],
|
||||
tags=["docs", "communication"],
|
||||
priority=35,
|
||||
source="builtin",
|
||||
),
|
||||
]
|
||||
@ -1,2 +1,19 @@
|
||||
"""Team models and orchestration objects."""
|
||||
|
||||
from ..models import (
|
||||
AgentDescriptor,
|
||||
DelegationEnvelope,
|
||||
ExecutionGraph,
|
||||
ExecutionNode,
|
||||
NodeRunResult,
|
||||
TeamRunResult,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AgentDescriptor",
|
||||
"DelegationEnvelope",
|
||||
"ExecutionGraph",
|
||||
"ExecutionNode",
|
||||
"NodeRunResult",
|
||||
"TeamRunResult",
|
||||
]
|
||||
|
||||
@ -42,6 +42,10 @@ class SkillContext:
|
||||
|
||||
name: str
|
||||
content: str
|
||||
version: str = "legacy"
|
||||
content_hash: str = ""
|
||||
activation_reason: str = "selected"
|
||||
tool_hints: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -197,7 +201,7 @@ class ContextBuilder:
|
||||
# 如果上游 history 已经混入 system 消息,这里要主动跳过,避免双 system。
|
||||
if message.get("role") == "system":
|
||||
continue
|
||||
messages.append(dict(message))
|
||||
messages.append(self._provider_history_message(message))
|
||||
|
||||
if build_input.current_user_input is not None:
|
||||
messages.append(
|
||||
@ -212,6 +216,16 @@ class ContextBuilder:
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _provider_history_message(message: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Keep persisted UI/audit fields out of provider message payloads."""
|
||||
|
||||
allowed = {"role", "content", "tool_calls", "tool_call_id", "name"}
|
||||
clean = {key: value for key, value in message.items() if key in allowed}
|
||||
if "name" not in clean and message.get("tool_name"):
|
||||
clean["name"] = message.get("tool_name")
|
||||
return clean
|
||||
|
||||
def add_tool_result(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
@ -322,7 +336,7 @@ class ContextBuilder:
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'[SYSTEM: The "{skill.name}" skill is active for this run. '
|
||||
f'[SYSTEM: The "{skill.name}" skill (version {skill.version}) is active for this run. '
|
||||
"Follow its instructions as active guidance unless the user overrides them.]\n\n"
|
||||
f"{content}"
|
||||
),
|
||||
|
||||
@ -7,11 +7,23 @@ from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from beaver.coordinator.registry import AgentRegistry
|
||||
from beaver.engine.context import ContextBuilder
|
||||
from beaver.engine.session import SessionManager
|
||||
from beaver.foundation.config import BeaverConfig, load_config
|
||||
from beaver.memory.curated.store import MemoryStore
|
||||
from beaver.memory.runs import RunMemoryStore
|
||||
from beaver.memory.skills import SkillLearningStore
|
||||
from beaver.services.memory_service import MemoryService
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningPipelineService, SkillLearningService
|
||||
from beaver.skills.learning.safety import SkillDraftSafetyChecker
|
||||
from beaver.skills.learning.eval import SkillDraftEvaluator
|
||||
from beaver.skills.publisher import SkillPublisher
|
||||
from beaver.skills.reviews import ReviewService
|
||||
from beaver.skills.specs import SkillSpecStore
|
||||
from beaver.tasks import TaskExecutionPlanner, TaskService, ValidationService
|
||||
from beaver.tasks.skill_resolver import TaskSkillResolver
|
||||
from beaver.skills import SkillAssembler, SkillsLoader
|
||||
from beaver.tools import ObjectBackedTool, ToolAssembler, ToolExecutor, ToolRegistry
|
||||
from beaver.tools.builtins import (
|
||||
@ -45,12 +57,25 @@ class EngineLoadResult:
|
||||
session_manager: SessionManager | None = None
|
||||
curated_memory_store: MemoryStore | None = None
|
||||
memory_service: MemoryService | None = None
|
||||
run_memory_store: RunMemoryStore | None = None
|
||||
skill_learning_store: SkillLearningStore | None = None
|
||||
tool_registry: ToolRegistry | None = None
|
||||
tool_assembler: ToolAssembler | None = None
|
||||
tool_executor: ToolExecutor | None = None
|
||||
context_builder: ContextBuilder | None = None
|
||||
skills_loader: SkillsLoader | None = None
|
||||
skill_assembler: SkillAssembler | None = None
|
||||
skill_spec_store: SkillSpecStore | None = None
|
||||
draft_service: DraftService | None = None
|
||||
review_service: ReviewService | None = None
|
||||
skill_publisher: SkillPublisher | None = None
|
||||
skill_learning_service: SkillLearningService | None = None
|
||||
skill_learning_pipeline: SkillLearningPipelineService | None = None
|
||||
agent_registry: AgentRegistry | None = None
|
||||
task_skill_resolver: TaskSkillResolver | None = None
|
||||
task_service: TaskService | None = None
|
||||
task_execution_planner: TaskExecutionPlanner | None = None
|
||||
validation_service: ValidationService | None = None
|
||||
closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
|
||||
closed: bool = False
|
||||
|
||||
@ -106,11 +131,24 @@ class EngineLoader:
|
||||
session_manager: SessionManager | None = None,
|
||||
curated_memory_store: MemoryStore | None = None,
|
||||
memory_service: MemoryService | None = None,
|
||||
run_memory_store: RunMemoryStore | None = None,
|
||||
skill_learning_store: SkillLearningStore | None = None,
|
||||
tool_registry: ToolRegistry | None = None,
|
||||
tool_assembler: ToolAssembler | None = None,
|
||||
context_builder: ContextBuilder | None = None,
|
||||
skills_loader: SkillsLoader | None = None,
|
||||
skill_assembler: SkillAssembler | None = None,
|
||||
skill_spec_store: SkillSpecStore | None = None,
|
||||
draft_service: DraftService | None = None,
|
||||
review_service: ReviewService | None = None,
|
||||
skill_publisher: SkillPublisher | None = None,
|
||||
skill_learning_service: SkillLearningService | None = None,
|
||||
skill_learning_pipeline: SkillLearningPipelineService | None = None,
|
||||
agent_registry: AgentRegistry | None = None,
|
||||
task_skill_resolver: TaskSkillResolver | None = None,
|
||||
task_service: TaskService | None = None,
|
||||
task_execution_planner: TaskExecutionPlanner | None = None,
|
||||
validation_service: ValidationService | None = None,
|
||||
) -> None:
|
||||
self.config = config or load_config(workspace=workspace, config_path=config_path)
|
||||
configured_workspace = self.config.agents_defaults.workspace
|
||||
@ -119,11 +157,24 @@ class EngineLoader:
|
||||
self._session_manager = session_manager
|
||||
self._curated_memory_store = curated_memory_store
|
||||
self._memory_service = memory_service
|
||||
self._run_memory_store = run_memory_store
|
||||
self._skill_learning_store = skill_learning_store
|
||||
self._tool_registry = tool_registry
|
||||
self._tool_assembler = tool_assembler
|
||||
self._context_builder = context_builder
|
||||
self._skills_loader = skills_loader
|
||||
self._skill_assembler = skill_assembler
|
||||
self._skill_spec_store = skill_spec_store
|
||||
self._draft_service = draft_service
|
||||
self._review_service = review_service
|
||||
self._skill_publisher = skill_publisher
|
||||
self._skill_learning_service = skill_learning_service
|
||||
self._skill_learning_pipeline = skill_learning_pipeline
|
||||
self._agent_registry = agent_registry
|
||||
self._task_skill_resolver = task_skill_resolver
|
||||
self._task_service = task_service
|
||||
self._task_execution_planner = task_execution_planner
|
||||
self._validation_service = validation_service
|
||||
|
||||
def load(self) -> EngineLoadResult:
|
||||
"""装配当前主链需要的最小 runtime 对象。"""
|
||||
@ -135,9 +186,12 @@ class EngineLoader:
|
||||
curated_memory_store = self._curated_memory_store or MemoryStore(curated_root)
|
||||
memory_service = self._memory_service or MemoryService(curated_root, store=curated_memory_store)
|
||||
memory_service.initialize()
|
||||
run_memory_store = self._run_memory_store or RunMemoryStore(workspace / "memory" / "runs")
|
||||
skill_learning_store = self._skill_learning_store or SkillLearningStore(workspace / "memory" / "skills")
|
||||
|
||||
tool_registry = self._tool_registry or ToolRegistry()
|
||||
skills_loader = self._skills_loader or SkillsLoader(workspace)
|
||||
skill_spec_store = self._skill_spec_store or SkillSpecStore(workspace)
|
||||
skills_loader = self._skills_loader or SkillsLoader(workspace, skill_store=skill_spec_store)
|
||||
if self._tool_registry is None:
|
||||
# 这里先注册最小工具集,满足主链的 tool loop。
|
||||
tool_registry.register_many(
|
||||
@ -156,6 +210,36 @@ class EngineLoader:
|
||||
tool_assembler = self._tool_assembler or ToolAssembler()
|
||||
tool_executor = ToolExecutor(tool_registry)
|
||||
skill_assembler = self._skill_assembler or SkillAssembler(skills_loader)
|
||||
draft_service = self._draft_service or DraftService(skill_spec_store)
|
||||
review_service = self._review_service or ReviewService(skill_spec_store)
|
||||
skill_publisher = self._skill_publisher or SkillPublisher(skill_spec_store)
|
||||
evidence_selector = EvidenceSelector(run_memory_store, session_manager=session_manager)
|
||||
skill_learning_service = self._skill_learning_service or SkillLearningService(
|
||||
run_store=run_memory_store,
|
||||
learning_store=skill_learning_store,
|
||||
draft_service=draft_service,
|
||||
evidence_selector=evidence_selector,
|
||||
synthesizer=SkillDraftSynthesizer(),
|
||||
)
|
||||
skill_learning_pipeline = self._skill_learning_pipeline or SkillLearningPipelineService(
|
||||
learning_store=skill_learning_store,
|
||||
learning_service=skill_learning_service,
|
||||
draft_service=draft_service,
|
||||
review_service=review_service,
|
||||
publisher=skill_publisher,
|
||||
safety_checker=SkillDraftSafetyChecker(
|
||||
allowed_tool_names={spec.name for spec in tool_registry.list_specs()}
|
||||
),
|
||||
evaluator=SkillDraftEvaluator(run_memory_store),
|
||||
)
|
||||
agent_registry = self._agent_registry or AgentRegistry(workspace)
|
||||
task_skill_resolver = self._task_skill_resolver or TaskSkillResolver(
|
||||
skills_loader=skills_loader,
|
||||
draft_service=draft_service,
|
||||
)
|
||||
task_service = self._task_service or TaskService(workspace / "tasks")
|
||||
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
|
||||
validation_service = self._validation_service or ValidationService()
|
||||
|
||||
result = EngineLoadResult(
|
||||
workspace=workspace,
|
||||
@ -167,12 +251,25 @@ class EngineLoader:
|
||||
session_manager=session_manager,
|
||||
curated_memory_store=memory_service.get_store(),
|
||||
memory_service=memory_service,
|
||||
run_memory_store=run_memory_store,
|
||||
skill_learning_store=skill_learning_store,
|
||||
tool_registry=tool_registry,
|
||||
tool_assembler=tool_assembler,
|
||||
tool_executor=tool_executor,
|
||||
context_builder=context_builder,
|
||||
skills_loader=skills_loader,
|
||||
skill_assembler=skill_assembler,
|
||||
skill_spec_store=skill_spec_store,
|
||||
draft_service=draft_service,
|
||||
review_service=review_service,
|
||||
skill_publisher=skill_publisher,
|
||||
skill_learning_service=skill_learning_service,
|
||||
skill_learning_pipeline=skill_learning_pipeline,
|
||||
agent_registry=agent_registry,
|
||||
task_skill_resolver=task_skill_resolver,
|
||||
task_service=task_service,
|
||||
task_execution_planner=task_execution_planner,
|
||||
validation_service=validation_service,
|
||||
)
|
||||
if self._session_manager is None:
|
||||
result.register_closeable("session_manager", session_manager.close)
|
||||
|
||||
@ -4,10 +4,15 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.engine.context import ContextBuildInput, SessionContext
|
||||
from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
|
||||
from beaver.memory.runs import RunRecord, SkillEffectRecord
|
||||
from beaver.skills.learning import RunReceiptContext
|
||||
from beaver.skills.catalog.utils import strip_frontmatter
|
||||
from beaver.skills.specs import SkillActivationReceipt
|
||||
from beaver.engine.providers import ProviderBundle, make_provider_bundle
|
||||
from beaver.tools import ToolContext
|
||||
|
||||
@ -38,6 +43,9 @@ class AgentRunResult:
|
||||
provider_name: str | None = None
|
||||
model: str | None = None
|
||||
usage: dict[str, Any] = field(default_factory=dict)
|
||||
task_id: str | None = None
|
||||
task_status: str | None = None
|
||||
validation_result: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -196,6 +204,13 @@ class AgentLoop:
|
||||
temperature: float | None = None,
|
||||
max_tool_iterations: int | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
parent_session_id: str | None = None,
|
||||
task_id: str | None = None,
|
||||
task_mode: bool = False,
|
||||
attempt_index: int | None = None,
|
||||
pinned_skill_names: list[str] | None = None,
|
||||
pinned_skill_contexts: list[SkillContext] | None = None,
|
||||
learning_candidate_enabled: bool = False,
|
||||
) -> AgentRunResult:
|
||||
"""跑通最小 direct run 主链。
|
||||
|
||||
@ -233,6 +248,13 @@ class AgentLoop:
|
||||
temperature=temperature,
|
||||
max_tool_iterations=max_tool_iterations,
|
||||
provider_bundle=provider_bundle,
|
||||
parent_session_id=parent_session_id,
|
||||
task_id=task_id,
|
||||
task_mode=task_mode,
|
||||
attempt_index=attempt_index,
|
||||
pinned_skill_names=pinned_skill_names,
|
||||
pinned_skill_contexts=pinned_skill_contexts,
|
||||
learning_candidate_enabled=learning_candidate_enabled,
|
||||
)
|
||||
|
||||
async def _process_direct_impl(
|
||||
@ -258,6 +280,13 @@ class AgentLoop:
|
||||
temperature: float | None = None,
|
||||
max_tool_iterations: int | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
parent_session_id: str | None = None,
|
||||
task_id: str | None = None,
|
||||
task_mode: bool = False,
|
||||
attempt_index: int | None = None,
|
||||
pinned_skill_names: list[str] | None = None,
|
||||
pinned_skill_contexts: list[SkillContext] | None = None,
|
||||
learning_candidate_enabled: bool = False,
|
||||
) -> AgentRunResult:
|
||||
"""真正执行一轮 direct run 的内部实现。
|
||||
|
||||
@ -276,6 +305,7 @@ class AgentLoop:
|
||||
tool_executor = self._require_loaded("tool_executor")
|
||||
skills_loader = self._require_loaded("skills_loader")
|
||||
skill_assembler = self._require_loaded("skill_assembler")
|
||||
skill_learning_service = self._require_loaded("skill_learning_service")
|
||||
|
||||
config = loaded.config
|
||||
configured_provider = config.resolve_provider_target(model=model, provider_name=provider_name)
|
||||
@ -296,16 +326,24 @@ class AgentLoop:
|
||||
self.profile.max_tool_iterations if max_tool_iterations is None else max_tool_iterations
|
||||
)
|
||||
|
||||
# 每次新运行开始前都通过 MemoryService 刷新 live state。
|
||||
# 这样 memory policy 会收口在 service,而不是散在 loop 里。
|
||||
memory_service.reload_for_new_run()
|
||||
# 每个 run 都捕获自己的 frozen snapshot,不能依赖 MemoryService
|
||||
# 上的共享 `_snapshot`,否则 parallel team runs 会互相覆盖。
|
||||
memory_snapshot = memory_service.capture_snapshot_for_run()
|
||||
|
||||
if parent_session_id:
|
||||
session_manager.ensure_session(
|
||||
parent_session_id,
|
||||
source="unknown",
|
||||
model=resolved_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
session_manager.ensure_session(
|
||||
resolved_session_id,
|
||||
source=source,
|
||||
model=resolved_model,
|
||||
title=title,
|
||||
user_id=user_id,
|
||||
parent_session_id=parent_session_id,
|
||||
)
|
||||
session_manager.append_message(
|
||||
resolved_session_id,
|
||||
@ -316,6 +354,12 @@ class AgentLoop:
|
||||
"source": source,
|
||||
"model": resolved_model,
|
||||
"agent_name": self.profile.name,
|
||||
"task_id": task_id,
|
||||
"task_mode": task_mode,
|
||||
"attempt_index": attempt_index,
|
||||
"parent_session_id": parent_session_id,
|
||||
"pinned_skill_names": list(pinned_skill_names or []),
|
||||
"pinned_skill_context_names": [skill.name for skill in pinned_skill_contexts or []],
|
||||
},
|
||||
content=task,
|
||||
context_visible=False,
|
||||
@ -330,6 +374,8 @@ class AgentLoop:
|
||||
final_usage: dict[str, Any] = {}
|
||||
final_provider_name: str | None = resolved_provider_name
|
||||
final_model: str | None = resolved_model
|
||||
run_started_at = self._utc_now()
|
||||
activated_receipts: list[SkillActivationReceipt] = []
|
||||
try:
|
||||
bundle = provider_bundle or make_provider_bundle(
|
||||
model=resolved_model,
|
||||
@ -356,17 +402,38 @@ class AgentLoop:
|
||||
model=skill_selector_model,
|
||||
embedding_runtime=bundle.embedding_runtime,
|
||||
)
|
||||
skill_activation_messages = context_builder.build_skill_activation_messages(
|
||||
assembled_skills.activated_skills
|
||||
activated_skills = self._merge_skill_contexts(
|
||||
[
|
||||
*(pinned_skill_contexts or []),
|
||||
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
|
||||
],
|
||||
assembled_skills.activated_skills,
|
||||
)
|
||||
skill_activation_messages = context_builder.build_skill_activation_messages(
|
||||
activated_skills
|
||||
)
|
||||
activated_receipts = [
|
||||
SkillActivationReceipt(
|
||||
run_id=resolved_run_id,
|
||||
session_id=resolved_session_id,
|
||||
skill_name=skill.name,
|
||||
skill_version=skill.version,
|
||||
content_hash=skill.content_hash,
|
||||
activated_at=self._utc_now(),
|
||||
activation_reason=skill.activation_reason,
|
||||
tool_hints=list(skill.tool_hints),
|
||||
)
|
||||
for skill in activated_skills
|
||||
]
|
||||
|
||||
if skill_activation_messages:
|
||||
if skill_activation_messages or activated_receipts:
|
||||
session_manager.append_message(
|
||||
resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
role="system",
|
||||
event_type="skill_activation_snapshotted",
|
||||
event_payload={
|
||||
"receipts": [receipt.to_dict() for receipt in activated_receipts],
|
||||
"activation_messages": skill_activation_messages,
|
||||
},
|
||||
content="\n\n".join(message["content"] for message in skill_activation_messages) or None,
|
||||
@ -381,7 +448,7 @@ class AgentLoop:
|
||||
task_description=task,
|
||||
registry=tool_registry,
|
||||
skills_loader=skills_loader,
|
||||
activated_skills=assembled_skills.activated_skills,
|
||||
activated_skills=activated_skills,
|
||||
embedding_runtime=bundle.embedding_runtime,
|
||||
top_k=10,
|
||||
)
|
||||
@ -407,13 +474,14 @@ class AgentLoop:
|
||||
base_system_prompt=self.profile.system_prompt,
|
||||
history=session_manager.get_history(resolved_session_id),
|
||||
current_user_input=task,
|
||||
memory_snapshot=memory_service.get_snapshot(),
|
||||
activated_skills=assembled_skills.activated_skills,
|
||||
memory_snapshot=memory_snapshot,
|
||||
activated_skills=activated_skills,
|
||||
session_context=SessionContext(
|
||||
session_id=resolved_session_id,
|
||||
source=source,
|
||||
model=resolved_model,
|
||||
user_id=user_id,
|
||||
parent_session_id=parent_session_id,
|
||||
),
|
||||
execution_context=execution_context,
|
||||
)
|
||||
@ -491,6 +559,7 @@ class AgentLoop:
|
||||
run_id=resolved_run_id,
|
||||
role="assistant",
|
||||
event_type="assistant_message_added",
|
||||
event_payload={"task_id": task_id} if task_id else None,
|
||||
content=response.content,
|
||||
tool_calls=assistant_tool_calls or None,
|
||||
finish_reason=response.finish_reason,
|
||||
@ -520,6 +589,7 @@ class AgentLoop:
|
||||
run_id=resolved_run_id,
|
||||
role="assistant",
|
||||
event_type="assistant_message_added",
|
||||
event_payload={"task_id": task_id} if task_id else None,
|
||||
content=final_text,
|
||||
finish_reason=final_finish_reason,
|
||||
source=source,
|
||||
@ -568,6 +638,9 @@ class AgentLoop:
|
||||
event_payload={
|
||||
"finish_reason": final_finish_reason,
|
||||
"tool_iterations": iterations,
|
||||
"task_id": task_id,
|
||||
"task_mode": task_mode,
|
||||
"attempt_index": attempt_index,
|
||||
},
|
||||
content=final_text,
|
||||
finish_reason=final_finish_reason,
|
||||
@ -577,6 +650,21 @@ class AgentLoop:
|
||||
model=final_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
self._record_skill_learning(
|
||||
skill_learning_service=skill_learning_service,
|
||||
session_manager=session_manager,
|
||||
session_id=resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
task=task,
|
||||
run_started_at=run_started_at,
|
||||
run_ended_at=self._utc_now(),
|
||||
finish_reason=final_finish_reason,
|
||||
activated_receipts=activated_receipts,
|
||||
success=(final_finish_reason == "stop"),
|
||||
task_id=task_id,
|
||||
attempt_index=attempt_index,
|
||||
generate_candidates=learning_candidate_enabled,
|
||||
)
|
||||
return AgentRunResult(
|
||||
session_id=resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
@ -586,6 +674,7 @@ class AgentLoop:
|
||||
provider_name=final_provider_name,
|
||||
model=final_model,
|
||||
usage=final_usage,
|
||||
task_id=task_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
if not user_message_recorded:
|
||||
@ -600,7 +689,7 @@ class AgentLoop:
|
||||
model=resolved_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
return self._build_error_result(
|
||||
result = self._build_error_result(
|
||||
session_manager=session_manager,
|
||||
session_id=resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
@ -612,7 +701,24 @@ class AgentLoop:
|
||||
tool_iterations=iterations,
|
||||
provider_name=final_provider_name,
|
||||
usage=final_usage,
|
||||
task_id=task_id,
|
||||
)
|
||||
self._record_skill_learning(
|
||||
skill_learning_service=skill_learning_service,
|
||||
session_manager=session_manager,
|
||||
session_id=resolved_session_id,
|
||||
run_id=resolved_run_id,
|
||||
task=task,
|
||||
run_started_at=run_started_at,
|
||||
run_ended_at=self._utc_now(),
|
||||
finish_reason="error",
|
||||
activated_receipts=activated_receipts,
|
||||
success=False,
|
||||
task_id=task_id,
|
||||
attempt_index=attempt_index,
|
||||
generate_candidates=learning_candidate_enabled,
|
||||
)
|
||||
return result
|
||||
|
||||
def _require_loaded(self, field_name: str) -> Any:
|
||||
loaded = self.boot()
|
||||
@ -621,6 +727,46 @@ class AgentLoop:
|
||||
raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
|
||||
contexts: list[SkillContext] = []
|
||||
seen: set[str] = set()
|
||||
for name in skill_names:
|
||||
normalized = str(name).strip()
|
||||
if not normalized or normalized in seen:
|
||||
continue
|
||||
seen.add(normalized)
|
||||
record = skills_loader.get_skill_record(normalized)
|
||||
raw_content = skills_loader.load_published_skill(normalized)
|
||||
content = strip_frontmatter(raw_content).strip() if raw_content else ""
|
||||
if record is None or not content:
|
||||
raise ValueError(f"Pinned skill {normalized!r} is not available for delegated execution")
|
||||
contexts.append(
|
||||
SkillContext(
|
||||
name=normalized,
|
||||
content=content,
|
||||
version=record.version,
|
||||
content_hash=record.content_hash or "",
|
||||
activation_reason="pinned_delegation",
|
||||
tool_hints=list(record.tool_hints),
|
||||
)
|
||||
)
|
||||
return contexts
|
||||
|
||||
@staticmethod
|
||||
def _merge_skill_contexts(
|
||||
pinned_skills: list[SkillContext],
|
||||
open_skills: list[SkillContext],
|
||||
) -> list[SkillContext]:
|
||||
result: list[SkillContext] = []
|
||||
seen: set[str] = set()
|
||||
for skill in [*pinned_skills, *open_skills]:
|
||||
if skill.name in seen:
|
||||
continue
|
||||
seen.add(skill.name)
|
||||
result.append(skill)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _serialize_tool_calls(tool_calls: list[Any]) -> list[dict[str, Any]]:
|
||||
payload: list[dict[str, Any]] = []
|
||||
@ -683,6 +829,7 @@ class AgentLoop:
|
||||
tool_iterations: int,
|
||||
provider_name: str | None,
|
||||
usage: dict[str, Any],
|
||||
task_id: str | None = None,
|
||||
) -> AgentRunResult:
|
||||
"""把主链中的未处理异常收口成可追踪的 assistant error turn。"""
|
||||
|
||||
@ -691,6 +838,7 @@ class AgentLoop:
|
||||
run_id=run_id,
|
||||
role="assistant",
|
||||
event_type="assistant_message_added",
|
||||
event_payload={"task_id": task_id} if task_id else None,
|
||||
content=message,
|
||||
finish_reason="error",
|
||||
source=source,
|
||||
@ -706,6 +854,7 @@ class AgentLoop:
|
||||
event_payload={
|
||||
"tool_iterations": tool_iterations,
|
||||
"provider_name": provider_name,
|
||||
"task_id": task_id,
|
||||
},
|
||||
content=message,
|
||||
finish_reason="error",
|
||||
@ -724,4 +873,87 @@ class AgentLoop:
|
||||
provider_name=provider_name,
|
||||
model=model,
|
||||
usage=usage,
|
||||
task_id=task_id,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _record_skill_learning(
|
||||
*,
|
||||
skill_learning_service: Any,
|
||||
session_manager: Any,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
task: str,
|
||||
run_started_at: str,
|
||||
run_ended_at: str,
|
||||
finish_reason: str,
|
||||
activated_receipts: list[SkillActivationReceipt],
|
||||
success: bool,
|
||||
task_id: str | None = None,
|
||||
attempt_index: int | None = None,
|
||||
generate_candidates: bool = False,
|
||||
) -> None:
|
||||
run_record = RunRecord(
|
||||
run_id=run_id,
|
||||
session_id=session_id,
|
||||
task_id=task_id,
|
||||
attempt_index=attempt_index,
|
||||
task_text=task,
|
||||
started_at=run_started_at,
|
||||
ended_at=run_ended_at,
|
||||
success=success,
|
||||
finish_reason=finish_reason,
|
||||
feedback={},
|
||||
activated_skills=list(activated_receipts),
|
||||
)
|
||||
effect_records = [
|
||||
SkillEffectRecord(
|
||||
run_id=run_id,
|
||||
skill_name=receipt.skill_name,
|
||||
skill_version=receipt.skill_version,
|
||||
success=success,
|
||||
feedback_score=None,
|
||||
notes=finish_reason,
|
||||
created_at=run_ended_at,
|
||||
)
|
||||
for receipt in activated_receipts
|
||||
]
|
||||
try:
|
||||
candidates = skill_learning_service.collect_run_receipts(
|
||||
RunReceiptContext(run_record=run_record, effect_records=effect_records),
|
||||
generate_candidates=generate_candidates,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover - defensive hot-path guard
|
||||
session_manager.append_message(
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="skill_effects_snapshot_failed",
|
||||
event_payload={
|
||||
"run_record": run_record.to_dict(),
|
||||
"skill_effects": [item.to_dict() for item in effect_records],
|
||||
"error": str(exc),
|
||||
},
|
||||
content=f"Skill learning receipt recording failed: {exc}",
|
||||
context_visible=False,
|
||||
)
|
||||
return
|
||||
|
||||
session_manager.append_message(
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="skill_effects_snapshotted",
|
||||
event_payload={
|
||||
"run_record": run_record.to_dict(),
|
||||
"skill_effects": [item.to_dict() for item in effect_records],
|
||||
"learning_candidates": [candidate.to_dict() for candidate in candidates],
|
||||
"learning_candidate_enabled": generate_candidates,
|
||||
},
|
||||
content=f"Recorded {len(effect_records)} skill effect record(s).",
|
||||
context_visible=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
@ -91,6 +91,19 @@ class SessionManager:
|
||||
|
||||
return self.store.get_run_event_records(session_id, run_id)
|
||||
|
||||
def update_latest_assistant_event_payload(
|
||||
self,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
updates: dict[str, Any],
|
||||
) -> None:
|
||||
"""把 run 级 UI 状态投影回最新 assistant 可见消息。"""
|
||||
|
||||
self.store.update_latest_assistant_event_payload(session_id, run_id, updates)
|
||||
|
||||
def set_run_context_visible(self, session_id: str, run_id: str, visible: bool) -> None:
|
||||
self.store.set_run_context_visible(session_id, run_id, visible)
|
||||
|
||||
def list_run_ids(self, session_id: str) -> list[str]:
|
||||
"""按出现顺序列出当前 session 的所有 run_id。"""
|
||||
|
||||
|
||||
@ -75,6 +75,19 @@ class MessageRecord:
|
||||
"role": self.role,
|
||||
"content": self.content,
|
||||
}
|
||||
if self.run_id:
|
||||
payload["run_id"] = self.run_id
|
||||
if self.event_payload:
|
||||
if self.event_payload.get("task_id"):
|
||||
payload["task_id"] = self.event_payload.get("task_id")
|
||||
if self.event_payload.get("task_status"):
|
||||
payload["task_status"] = self.event_payload.get("task_status")
|
||||
if self.event_payload.get("validation_status"):
|
||||
payload["validation_status"] = self.event_payload.get("validation_status")
|
||||
if self.event_payload.get("feedback_state"):
|
||||
payload["feedback_state"] = self.event_payload.get("feedback_state")
|
||||
if self.event_payload.get("feedback_error"):
|
||||
payload["feedback_error"] = self.event_payload.get("feedback_error")
|
||||
if self.tool_name:
|
||||
payload["tool_name"] = self.tool_name
|
||||
if self.tool_calls:
|
||||
|
||||
@ -432,6 +432,71 @@ class SessionStore:
|
||||
)
|
||||
return [MessageRecord.from_row(row) for row in rows]
|
||||
|
||||
def update_latest_assistant_event_payload(
|
||||
self,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
updates: dict[str, Any],
|
||||
) -> None:
|
||||
"""Merge payload fields into the latest visible assistant message for a run."""
|
||||
|
||||
if not updates:
|
||||
return
|
||||
|
||||
def _do(conn: sqlite3.Connection) -> None:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT id, event_payload
|
||||
FROM messages
|
||||
WHERE session_id = ?
|
||||
AND run_id = ?
|
||||
AND role = 'assistant'
|
||||
AND event_type = 'assistant_message_added'
|
||||
AND context_visible = 1
|
||||
ORDER BY timestamp DESC, id DESC
|
||||
LIMIT 1
|
||||
""",
|
||||
(session_id, run_id),
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return
|
||||
payload: dict[str, Any] = {}
|
||||
if row["event_payload"]:
|
||||
try:
|
||||
parsed = json.loads(row["event_payload"])
|
||||
if isinstance(parsed, dict):
|
||||
payload = parsed
|
||||
except json.JSONDecodeError:
|
||||
payload = {}
|
||||
payload.update(updates)
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE messages
|
||||
SET event_payload = ?
|
||||
WHERE id = ?
|
||||
""",
|
||||
(json.dumps(payload, ensure_ascii=False, sort_keys=True), row["id"]),
|
||||
)
|
||||
|
||||
self._execute_write(_do)
|
||||
|
||||
def set_run_context_visible(self, session_id: str, run_id: str, visible: bool) -> None:
|
||||
"""Set context visibility for all currently visible events in one run."""
|
||||
|
||||
def _do(conn: sqlite3.Connection) -> None:
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE messages
|
||||
SET context_visible = ?
|
||||
WHERE session_id = ?
|
||||
AND run_id = ?
|
||||
AND context_visible != ?
|
||||
""",
|
||||
(1 if visible else 0, session_id, run_id, 1 if visible else 0),
|
||||
)
|
||||
|
||||
self._execute_write(_do)
|
||||
|
||||
def get_messages_as_conversation(self, session_id: str) -> list[dict[str, Any]]:
|
||||
messages: list[dict[str, Any]] = []
|
||||
for record in self.get_event_records(session_id):
|
||||
|
||||
@ -21,6 +21,16 @@ from beaver.interfaces.channels import ChannelAdapter, ChannelManager
|
||||
from beaver.services.agent_service import AgentService
|
||||
|
||||
|
||||
def _validate_gateway_service(service: AgentService) -> None:
|
||||
"""Fail fast on injected service objects that do not satisfy gateway needs."""
|
||||
|
||||
handler = getattr(service, "handle_inbound_message", None)
|
||||
if not callable(handler):
|
||||
raise TypeError(
|
||||
"Gateway requires a service with an async 'handle_inbound_message(inbound)' method"
|
||||
)
|
||||
|
||||
|
||||
async def _cleanup_owned_service(
|
||||
service: AgentService,
|
||||
*,
|
||||
@ -125,6 +135,7 @@ async def run_gateway(
|
||||
"""
|
||||
|
||||
attached_service = service or AgentService(workspace=workspace, config_path=config_path)
|
||||
_validate_gateway_service(attached_service)
|
||||
if channel_manager is not None and channels is not None:
|
||||
raise ValueError("Pass either channel_manager or channels, not both")
|
||||
if bus is not None:
|
||||
|
||||
@ -2,16 +2,30 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import asyncio
|
||||
from collections.abc import AsyncIterator, Callable
|
||||
from contextlib import asynccontextmanager, suppress
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers.registry import PROVIDERS, find_by_name
|
||||
from beaver.foundation.config import default_config_path, load_config
|
||||
from beaver.services.agent_service import AgentService
|
||||
from beaver.skills.learning import SkillLearningWorker, SkillLearningWorkerConfig
|
||||
|
||||
from .deps import get_agent_service
|
||||
from .schemas import WebChatRequest, WebChatResponse, WebErrorResponse, WebStatusResponse
|
||||
from .schemas import (
|
||||
WebChatFeedbackRequest,
|
||||
WebChatFeedbackResponse,
|
||||
WebChatRequest,
|
||||
WebChatResponse,
|
||||
WebErrorResponse,
|
||||
WebProviderConfigRequest,
|
||||
WebProviderConfigResponse,
|
||||
WebStatusResponse,
|
||||
)
|
||||
|
||||
try:
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
@ -50,6 +64,24 @@ except ModuleNotFoundError: # pragma: no cover - fallback for skeleton-only env
|
||||
|
||||
return decorator
|
||||
|
||||
def put(self, _path: str, **_kwargs: Any) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
||||
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
return func
|
||||
|
||||
return decorator
|
||||
|
||||
def patch(self, _path: str, **_kwargs: Any) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
||||
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
return func
|
||||
|
||||
return decorator
|
||||
|
||||
def delete(self, _path: str, **_kwargs: Any) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
||||
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
return func
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _app_lifespan(
|
||||
@ -82,9 +114,28 @@ async def _app_lifespan(
|
||||
else:
|
||||
attached_service.close()
|
||||
raise
|
||||
worker: SkillLearningWorker | None = None
|
||||
worker_task = None
|
||||
worker_config = SkillLearningWorkerConfig.from_env()
|
||||
if owns_service and worker_config.enabled:
|
||||
loaded = attached_service.create_loop().boot()
|
||||
worker = SkillLearningWorker(
|
||||
pipeline=loaded.skill_learning_pipeline, # type: ignore[arg-type]
|
||||
provider_bundle_factory=lambda: attached_service._make_provider_bundle_for_task(loaded, {}), # noqa: SLF001
|
||||
config=worker_config,
|
||||
)
|
||||
worker_task = asyncio.create_task(worker.run_forever())
|
||||
app.state.skill_learning_worker = worker
|
||||
app.state.skill_learning_worker_task = worker_task
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if worker is not None:
|
||||
worker.stop()
|
||||
if worker_task is not None:
|
||||
worker_task.cancel()
|
||||
with suppress(BaseException):
|
||||
await worker_task
|
||||
if owns_service and started:
|
||||
await attached_service.shutdown(
|
||||
timeout_seconds=shutdown_timeout_seconds,
|
||||
@ -133,6 +184,412 @@ def create_app(
|
||||
mode="running" if running else ("direct" if agent_service.has_loop else "idle"),
|
||||
)
|
||||
|
||||
@app.get("/api/status")
|
||||
async def status(request: Request) -> dict[str, Any]:
|
||||
agent_service = get_agent_service(request)
|
||||
loaded = agent_service.create_loop().boot()
|
||||
config = loaded.config
|
||||
config_path = config.config_path or default_config_path(workspace=loaded.workspace)
|
||||
|
||||
providers_status = []
|
||||
default_provider = config.resolve_provider_target().get("provider_name")
|
||||
for spec in PROVIDERS:
|
||||
provider_cfg = config.providers.get(spec.name)
|
||||
enabled = provider_cfg is not None
|
||||
api_key = provider_cfg.api_key if provider_cfg is not None else None
|
||||
api_base = provider_cfg.api_base if provider_cfg is not None else None
|
||||
if spec.is_oauth:
|
||||
has_key = enabled
|
||||
elif spec.is_local or spec.is_direct:
|
||||
has_key = bool(api_base)
|
||||
else:
|
||||
has_key = bool(api_key)
|
||||
providers_status.append(
|
||||
{
|
||||
"id": spec.name,
|
||||
"name": spec.label,
|
||||
"label": spec.label,
|
||||
"enabled": enabled,
|
||||
"active": default_provider == spec.name,
|
||||
"has_key": has_key,
|
||||
"api_key_masked": _mask_secret(api_key),
|
||||
"api_base": api_base or "",
|
||||
"default_api_base": spec.default_api_base,
|
||||
"detail": api_base or spec.default_api_base or "",
|
||||
"requires_api_key": not (spec.is_oauth or spec.is_local or spec.is_direct),
|
||||
"is_oauth": spec.is_oauth,
|
||||
"is_local": spec.is_local,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"config_path": str(config_path),
|
||||
"config_exists": config_path.exists(),
|
||||
"workspace": str(loaded.workspace),
|
||||
"workspace_exists": loaded.workspace.exists(),
|
||||
"model": config.default_model or agent_service.profile.default_model,
|
||||
"max_tokens": agent_service.profile.max_tokens,
|
||||
"temperature": agent_service.profile.temperature,
|
||||
"max_tool_iterations": agent_service.profile.max_tool_iterations,
|
||||
"providers": providers_status,
|
||||
"channels": [{"name": "web", "enabled": True}],
|
||||
"cron": {"enabled": False, "jobs": 0, "next_wake_at_ms": None},
|
||||
}
|
||||
|
||||
@app.post("/api/providers/{provider_name}/config", response_model=WebProviderConfigResponse)
|
||||
async def update_provider_config(
|
||||
provider_name: str,
|
||||
request: Request,
|
||||
payload: WebProviderConfigRequest,
|
||||
) -> WebProviderConfigResponse:
|
||||
spec = find_by_name(provider_name)
|
||||
if spec is None:
|
||||
raise HTTPException(status_code=404, detail=f"Unknown provider: {provider_name}")
|
||||
|
||||
agent_service = get_agent_service(request)
|
||||
config_path = agent_service.loader.config.config_path or default_config_path(workspace=agent_service.loader.workspace)
|
||||
raw = _read_config_json(config_path)
|
||||
providers = _ensure_dict(raw, "providers")
|
||||
agents = _ensure_dict(raw, "agents")
|
||||
defaults = _ensure_dict(agents, "defaults")
|
||||
|
||||
if not payload.enabled:
|
||||
providers.pop(spec.name, None)
|
||||
if _clean_text(defaults.get("provider")) == spec.name:
|
||||
defaults.pop("provider", None)
|
||||
else:
|
||||
current = providers.get(spec.name) if isinstance(providers.get(spec.name), dict) else {}
|
||||
provider_payload = dict(current)
|
||||
api_key = _clean_text(payload.api_key)
|
||||
api_base = _clean_text(payload.api_base)
|
||||
if api_key:
|
||||
provider_payload["apiKey"] = api_key
|
||||
elif "apiKey" not in provider_payload and "api_key" not in provider_payload:
|
||||
provider_payload.pop("apiKey", None)
|
||||
if api_base:
|
||||
provider_payload["apiBase"] = api_base
|
||||
elif spec.default_api_base and not provider_payload.get("apiBase") and not provider_payload.get("api_base"):
|
||||
provider_payload["apiBase"] = spec.default_api_base
|
||||
elif not api_base and not spec.default_api_base:
|
||||
provider_payload.pop("apiBase", None)
|
||||
if payload.request_timeout_seconds is not None:
|
||||
provider_payload["requestTimeoutSeconds"] = payload.request_timeout_seconds
|
||||
providers[spec.name] = provider_payload
|
||||
defaults["provider"] = spec.name
|
||||
model = _clean_text(payload.model)
|
||||
if model:
|
||||
defaults["model"] = model
|
||||
|
||||
_write_config_json(config_path, raw)
|
||||
_reload_agent_config(agent_service, config_path)
|
||||
return WebProviderConfigResponse(ok=True, provider=spec.name, enabled=payload.enabled)
|
||||
|
||||
@app.get("/api/sessions")
|
||||
async def list_sessions(request: Request) -> list[dict[str, Any]]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
session_manager = loaded.session_manager
|
||||
rows = session_manager.list_sessions_rich(limit=100, exclude_sources=["subagent"]) # type: ignore[union-attr]
|
||||
return [
|
||||
{
|
||||
"key": str(row.get("id")),
|
||||
"created_at": _iso_from_timestamp(row.get("started_at")),
|
||||
"updated_at": _iso_from_timestamp(row.get("last_active")),
|
||||
"path": str(row.get("id")),
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
@app.post("/api/sessions/{session_id:path}")
|
||||
async def create_session(session_id: str, request: Request) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
session_manager = loaded.session_manager
|
||||
session = session_manager.get_or_create(session_id, source="web") # type: ignore[union-attr]
|
||||
return _session_detail(session_manager, session_id, session) # type: ignore[arg-type]
|
||||
|
||||
@app.get("/api/sessions/{session_id:path}/process")
|
||||
async def get_session_process(session_id: str, request: Request) -> dict[str, Any]:
|
||||
from beaver.services.process_service import SessionProcessProjector
|
||||
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
projector = SessionProcessProjector(
|
||||
loaded.session_manager,
|
||||
loaded.run_memory_store,
|
||||
)
|
||||
return projector.project(session_id)
|
||||
|
||||
@app.get("/api/sessions/{session_id:path}")
|
||||
async def get_session(session_id: str, request: Request) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
session_manager = loaded.session_manager
|
||||
session = session_manager.get_or_create(session_id, source="web") # type: ignore[union-attr]
|
||||
return _session_detail(session_manager, session_id, session) # type: ignore[arg-type]
|
||||
|
||||
@app.delete("/api/sessions/{session_id:path}")
|
||||
async def delete_session(session_id: str, request: Request) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
loaded.session_manager.end_session(session_id, "deleted") # type: ignore[union-attr]
|
||||
return {"ok": True}
|
||||
|
||||
@app.get("/api/agents")
|
||||
async def list_agents(request: Request) -> list[dict[str, Any]]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
return [_registered_agent_to_ui(agent) for agent in loaded.agent_registry.list_agents()] # type: ignore[union-attr]
|
||||
|
||||
@app.post("/api/agents")
|
||||
async def upsert_agent(request: Request, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
agent = loaded.agent_registry.upsert_agent(_agent_payload_from_ui(payload)) # type: ignore[union-attr]
|
||||
return _registered_agent_to_ui(agent)
|
||||
|
||||
@app.patch("/api/agents/{agent_id}")
|
||||
async def patch_agent(agent_id: str, request: Request, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
registry = loaded.agent_registry
|
||||
current = registry.get_agent(agent_id) # type: ignore[union-attr]
|
||||
if current is None:
|
||||
raise HTTPException(status_code=404, detail=f"Unknown agent: {agent_id}")
|
||||
merged = current.to_dict()
|
||||
merged.update(_agent_payload_from_ui(payload))
|
||||
merged["agent_id"] = agent_id
|
||||
agent = registry.upsert_agent(merged) # type: ignore[union-attr]
|
||||
return _registered_agent_to_ui(agent)
|
||||
|
||||
@app.post("/api/agents/{agent_id}/disable")
|
||||
async def disable_agent(agent_id: str, request: Request) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
try:
|
||||
agent = loaded.agent_registry.disable_agent(agent_id) # type: ignore[union-attr]
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return _registered_agent_to_ui(agent)
|
||||
|
||||
@app.get("/api/skills")
|
||||
async def list_skills(request: Request) -> list[dict[str, Any]]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
skills = loaded.skills_loader.list_skills(filter_unavailable=False) # type: ignore[union-attr]
|
||||
return [
|
||||
{
|
||||
"name": record.name,
|
||||
"description": record.description,
|
||||
"source": "builtin" if record.source == "builtin" else "workspace",
|
||||
"available": loaded.skills_loader._record_available(record), # type: ignore[union-attr]
|
||||
"path": str(record.path),
|
||||
"agent_cards": [],
|
||||
}
|
||||
for record in skills
|
||||
]
|
||||
|
||||
@app.get("/api/skills/candidates")
|
||||
async def list_skill_candidates(request: Request, status: str | None = None) -> list[dict[str, Any]]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
return [item.to_dict() for item in loaded.skill_learning_pipeline.list_candidates(status=status)] # type: ignore[union-attr]
|
||||
|
||||
@app.get("/api/skills/candidates/{candidate_id}")
|
||||
async def get_skill_candidate(candidate_id: str, request: Request) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
try:
|
||||
return loaded.skill_learning_pipeline.get_candidate(candidate_id).to_dict() # type: ignore[union-attr]
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
|
||||
@app.post("/api/skills/candidates/{candidate_id}/draft")
|
||||
async def synthesize_skill_draft(candidate_id: str, request: Request) -> dict[str, Any]:
|
||||
agent_service = get_agent_service(request)
|
||||
loaded = agent_service.create_loop().boot()
|
||||
provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001
|
||||
try:
|
||||
draft = await loaded.skill_learning_pipeline.synthesize_draft( # type: ignore[union-attr]
|
||||
candidate_id,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
loaded.skill_learning_pipeline.check_safety(draft.skill_name, draft.draft_id) # type: ignore[union-attr]
|
||||
await loaded.skill_learning_pipeline.evaluate_draft( # type: ignore[union-attr]
|
||||
candidate_id,
|
||||
draft.skill_name,
|
||||
draft.draft_id,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return draft.to_dict()
|
||||
|
||||
@app.post("/api/skills/candidates/{candidate_id}/regenerate")
|
||||
async def regenerate_skill_draft(candidate_id: str, request: Request) -> dict[str, Any]:
|
||||
agent_service = get_agent_service(request)
|
||||
loaded = agent_service.create_loop().boot()
|
||||
provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001
|
||||
try:
|
||||
draft = await loaded.skill_learning_pipeline.regenerate_draft( # type: ignore[union-attr]
|
||||
candidate_id,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
loaded.skill_learning_pipeline.check_safety(draft.skill_name, draft.draft_id) # type: ignore[union-attr]
|
||||
await loaded.skill_learning_pipeline.evaluate_draft( # type: ignore[union-attr]
|
||||
candidate_id,
|
||||
draft.skill_name,
|
||||
draft.draft_id,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return draft.to_dict()
|
||||
|
||||
@app.post("/api/skills/learning/run-once")
|
||||
async def run_skill_learning_once(request: Request) -> dict[str, Any]:
|
||||
agent_service = get_agent_service(request)
|
||||
loaded = agent_service.create_loop().boot()
|
||||
worker = SkillLearningWorker(
|
||||
pipeline=loaded.skill_learning_pipeline, # type: ignore[arg-type]
|
||||
provider_bundle_factory=lambda: agent_service._make_provider_bundle_for_task(loaded, {}), # noqa: SLF001
|
||||
config=SkillLearningWorkerConfig.from_env(),
|
||||
)
|
||||
result = await worker.run_once()
|
||||
return result.to_dict()
|
||||
|
||||
@app.get("/api/skills/drafts")
|
||||
async def list_skill_drafts(request: Request) -> list[dict[str, Any]]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
results = []
|
||||
for item in loaded.skill_learning_pipeline.list_drafts(): # type: ignore[union-attr]
|
||||
safety = loaded.skill_learning_pipeline.get_safety_report(item.skill_name, item.draft_id) # type: ignore[union-attr]
|
||||
eval_report = loaded.skill_learning_pipeline.get_eval_report(item.skill_name, item.draft_id) # type: ignore[union-attr]
|
||||
results.append(
|
||||
{
|
||||
**item.to_dict(),
|
||||
"safety_report": safety.to_dict() if safety is not None else None,
|
||||
"eval_report": eval_report.to_dict() if eval_report is not None else None,
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
@app.get("/api/skills/{skill_name}/drafts/{draft_id}")
|
||||
async def get_skill_draft(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
try:
|
||||
draft = loaded.skill_learning_pipeline.get_draft(skill_name, draft_id) # type: ignore[union-attr]
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return {
|
||||
**draft.to_dict(),
|
||||
"reviews": [
|
||||
item.to_dict()
|
||||
for item in loaded.skill_learning_pipeline.reviews_for_draft(skill_name, draft_id) # type: ignore[union-attr]
|
||||
],
|
||||
"safety_report": (
|
||||
loaded.skill_learning_pipeline.get_safety_report(skill_name, draft_id).to_dict() # type: ignore[union-attr]
|
||||
if loaded.skill_learning_pipeline.get_safety_report(skill_name, draft_id) is not None # type: ignore[union-attr]
|
||||
else None
|
||||
),
|
||||
"eval_report": (
|
||||
loaded.skill_learning_pipeline.get_eval_report(skill_name, draft_id).to_dict() # type: ignore[union-attr]
|
||||
if loaded.skill_learning_pipeline.get_eval_report(skill_name, draft_id) is not None # type: ignore[union-attr]
|
||||
else None
|
||||
),
|
||||
}
|
||||
|
||||
@app.get("/api/skills/{skill_name}/drafts/{draft_id}/safety")
|
||||
async def get_skill_draft_safety(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
report = loaded.skill_learning_pipeline.get_safety_report(skill_name, draft_id) # type: ignore[union-attr]
|
||||
if report is None:
|
||||
raise HTTPException(status_code=404, detail="Safety report not found")
|
||||
return report.to_dict()
|
||||
|
||||
@app.get("/api/skills/{skill_name}/drafts/{draft_id}/eval")
|
||||
async def get_skill_draft_eval(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
report = loaded.skill_learning_pipeline.get_eval_report(skill_name, draft_id) # type: ignore[union-attr]
|
||||
if report is None:
|
||||
raise HTTPException(status_code=404, detail="Eval report not found")
|
||||
return report.to_dict()
|
||||
|
||||
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/submit")
|
||||
async def submit_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
try:
|
||||
review = loaded.skill_learning_pipeline.submit_review( # type: ignore[union-attr]
|
||||
skill_name,
|
||||
draft_id,
|
||||
requested_by=str((payload or {}).get("requested_by") or "web"),
|
||||
notes=str((payload or {}).get("notes") or ""),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return review.to_dict()
|
||||
|
||||
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/approve")
|
||||
async def approve_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
try:
|
||||
review = loaded.skill_learning_pipeline.approve( # type: ignore[union-attr]
|
||||
skill_name,
|
||||
draft_id,
|
||||
reviewer=str((payload or {}).get("reviewer") or "web"),
|
||||
notes=str((payload or {}).get("notes") or ""),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return review.to_dict()
|
||||
|
||||
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/reject")
|
||||
async def reject_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
try:
|
||||
review = loaded.skill_learning_pipeline.reject( # type: ignore[union-attr]
|
||||
skill_name,
|
||||
draft_id,
|
||||
reviewer=str((payload or {}).get("reviewer") or "web"),
|
||||
notes=str((payload or {}).get("notes") or ""),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return review.to_dict()
|
||||
|
||||
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/publish")
|
||||
async def publish_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
try:
|
||||
result = loaded.skill_learning_pipeline.publish( # type: ignore[union-attr]
|
||||
skill_name,
|
||||
draft_id,
|
||||
publisher=str((payload or {}).get("publisher") or "web"),
|
||||
notes=str((payload or {}).get("notes") or ""),
|
||||
confirm_high_risk=bool((payload or {}).get("confirm_high_risk")),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
return result.to_dict()
|
||||
|
||||
@app.post("/api/skills/{skill_name}/disable")
|
||||
async def disable_skill(skill_name: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
try:
|
||||
spec = loaded.skill_learning_pipeline.disable( # type: ignore[union-attr]
|
||||
skill_name,
|
||||
actor=str((payload or {}).get("actor") or "web"),
|
||||
reason=str((payload or {}).get("reason") or ""),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return spec.to_dict()
|
||||
|
||||
@app.post("/api/skills/{skill_name}/rollback")
|
||||
async def rollback_skill(skill_name: str, request: Request, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
target_version = str(payload.get("target_version") or "").strip()
|
||||
if not target_version:
|
||||
raise HTTPException(status_code=400, detail="target_version is required")
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
try:
|
||||
spec = loaded.skill_learning_pipeline.rollback( # type: ignore[union-attr]
|
||||
skill_name,
|
||||
target_version,
|
||||
actor=str(payload.get("actor") or "web"),
|
||||
reason=str(payload.get("reason") or ""),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
return spec.to_dict()
|
||||
|
||||
@app.post(
|
||||
"/api/chat",
|
||||
response_model=WebChatResponse,
|
||||
@ -191,11 +648,132 @@ def create_app(
|
||||
provider_name=result.provider_name,
|
||||
model=result.model,
|
||||
usage=result.usage,
|
||||
task_id=result.task_id,
|
||||
task_status=result.task_status,
|
||||
validation_result=result.validation_result,
|
||||
)
|
||||
|
||||
@app.post(
|
||||
"/api/chat/feedback",
|
||||
response_model=WebChatFeedbackResponse,
|
||||
responses={
|
||||
400: {"model": WebErrorResponse},
|
||||
404: {"model": WebErrorResponse},
|
||||
},
|
||||
)
|
||||
async def chat_feedback(request: Request, payload: WebChatFeedbackRequest) -> WebChatFeedbackResponse:
|
||||
agent_service = get_agent_service(request)
|
||||
try:
|
||||
result = await agent_service.submit_feedback(
|
||||
session_id=payload.session_id,
|
||||
run_id=payload.run_id,
|
||||
feedback_type=payload.feedback_type,
|
||||
comment=payload.comment,
|
||||
)
|
||||
except ValueError as exc:
|
||||
detail = str(exc)
|
||||
status_code = 404 if "No internal task" in detail else 400
|
||||
raise HTTPException(status_code=status_code, detail=detail) from exc
|
||||
|
||||
return WebChatFeedbackResponse(**result)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def _session_detail(session_manager: Any, session_id: str, session: dict[str, Any]) -> dict[str, Any]:
|
||||
messages = []
|
||||
for event in session_manager.get_messages_as_conversation(session_id):
|
||||
role = event.get("role")
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
messages.append(
|
||||
{
|
||||
"role": role,
|
||||
"content": event.get("content") or "",
|
||||
"timestamp": _iso_from_timestamp(event.get("timestamp")),
|
||||
"run_id": event.get("run_id"),
|
||||
"task_id": event.get("task_id"),
|
||||
"task_status": event.get("task_status"),
|
||||
"validation_status": event.get("validation_status"),
|
||||
"feedback_state": event.get("feedback_state"),
|
||||
"feedback_error": event.get("feedback_error"),
|
||||
}
|
||||
)
|
||||
return {
|
||||
"key": session_id,
|
||||
"messages": messages,
|
||||
"created_at": _iso_from_timestamp(session.get("started_at")),
|
||||
"updated_at": _iso_from_timestamp(session.get("last_active")),
|
||||
}
|
||||
|
||||
|
||||
def _iso_from_timestamp(value: Any) -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
if value in (None, ""):
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
try:
|
||||
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
|
||||
except (TypeError, ValueError):
|
||||
return str(value)
|
||||
|
||||
|
||||
def _registered_agent_to_ui(agent: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"id": agent.agent_id,
|
||||
"name": agent.display_name or agent.name,
|
||||
"description": agent.description,
|
||||
"source": agent.source if agent.source in {"workspace", "skill", "builtin"} else "workspace",
|
||||
"kind": "specialist",
|
||||
"protocol": None,
|
||||
"endpoint": None,
|
||||
"base_url": None,
|
||||
"card_url": None,
|
||||
"auth_env": None,
|
||||
"auth_mode": "none",
|
||||
"auth_audience": None,
|
||||
"auth_scopes": [],
|
||||
"tags": list(agent.tags),
|
||||
"aliases": [agent.name],
|
||||
"metadata": {
|
||||
**dict(agent.metadata),
|
||||
"role": agent.role,
|
||||
"capabilities": list(agent.capabilities),
|
||||
"skill_names": list(agent.skill_names),
|
||||
"tool_hints": list(agent.tool_hints),
|
||||
"priority": agent.priority,
|
||||
"status": agent.status,
|
||||
},
|
||||
"support_streaming": False,
|
||||
}
|
||||
|
||||
|
||||
def _agent_payload_from_ui(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
metadata = dict(payload.get("metadata") or {})
|
||||
capabilities = payload.get("capabilities")
|
||||
if capabilities is None and isinstance(metadata.get("capabilities"), list):
|
||||
capabilities = metadata.get("capabilities")
|
||||
role = payload.get("role") or metadata.get("role") or payload.get("kind") or ""
|
||||
return {
|
||||
"agent_id": payload.get("agent_id") or payload.get("id") or payload.get("name"),
|
||||
"name": payload.get("name") or payload.get("id"),
|
||||
"display_name": payload.get("display_name") or payload.get("name") or payload.get("id"),
|
||||
"role": role,
|
||||
"description": payload.get("description") or "",
|
||||
"system_prompt": payload.get("system_prompt") or metadata.get("system_prompt") or "",
|
||||
"capabilities": capabilities or [],
|
||||
"skill_names": payload.get("skill_names") or metadata.get("skill_names") or [],
|
||||
"tool_hints": payload.get("tool_hints") or metadata.get("tool_hints") or [],
|
||||
"model": payload.get("model") or metadata.get("model"),
|
||||
"provider_name": payload.get("provider_name") or metadata.get("provider_name"),
|
||||
"tags": payload.get("tags") or [],
|
||||
"priority": payload.get("priority") or metadata.get("priority") or 0,
|
||||
"status": payload.get("status") or ("active" if payload.get("enabled", True) else "disabled"),
|
||||
"source": payload.get("source") or "workspace",
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
|
||||
def _model_dump(value: Any) -> dict[str, Any] | None:
|
||||
"""兼容 Pydantic v1/v2 的最小导出辅助。"""
|
||||
|
||||
@ -206,3 +784,52 @@ def _model_dump(value: Any) -> dict[str, Any] | None:
|
||||
if hasattr(value, "dict"):
|
||||
return value.dict(exclude_none=True)
|
||||
return dict(value)
|
||||
|
||||
|
||||
def _clean_text(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value).strip()
|
||||
return text or None
|
||||
|
||||
|
||||
def _mask_secret(value: str | None) -> str:
|
||||
secret = _clean_text(value)
|
||||
if not secret:
|
||||
return ""
|
||||
if len(secret) <= 8:
|
||||
return "••••"
|
||||
return f"{secret[:4]}••••{secret[-4:]}"
|
||||
|
||||
|
||||
def _read_config_json(path: Path) -> dict[str, Any]:
|
||||
if not path.exists():
|
||||
return {}
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"Config must be a JSON object: {path}")
|
||||
return data
|
||||
|
||||
|
||||
def _ensure_dict(parent: dict[str, Any], key: str) -> dict[str, Any]:
|
||||
value = parent.get(key)
|
||||
if not isinstance(value, dict):
|
||||
value = {}
|
||||
parent[key] = value
|
||||
return value
|
||||
|
||||
|
||||
def _write_config_json(path: Path, data: dict[str, Any]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = path.with_name(f"{path.name}.tmp")
|
||||
tmp_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||||
tmp_path.replace(path)
|
||||
|
||||
|
||||
def _reload_agent_config(agent_service: AgentService, config_path: Path) -> None:
|
||||
config = load_config(config_path=config_path)
|
||||
agent_service.loader.config = config
|
||||
loop = getattr(agent_service, "_loop", None)
|
||||
loaded = getattr(loop, "loaded", None) if loop is not None else None
|
||||
if loaded is not None:
|
||||
loaded.config = config
|
||||
|
||||
@ -1,11 +1,25 @@
|
||||
"""Web request and response schemas."""
|
||||
|
||||
from .chat import WebChatRequest, WebChatResponse, WebErrorResponse, WebProviderTarget, WebStatusResponse
|
||||
from .chat import (
|
||||
WebChatFeedbackRequest,
|
||||
WebChatFeedbackResponse,
|
||||
WebChatRequest,
|
||||
WebChatResponse,
|
||||
WebErrorResponse,
|
||||
WebProviderConfigRequest,
|
||||
WebProviderConfigResponse,
|
||||
WebProviderTarget,
|
||||
WebStatusResponse,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"WebChatFeedbackRequest",
|
||||
"WebChatFeedbackResponse",
|
||||
"WebChatRequest",
|
||||
"WebChatResponse",
|
||||
"WebErrorResponse",
|
||||
"WebProviderConfigRequest",
|
||||
"WebProviderConfigResponse",
|
||||
"WebProviderTarget",
|
||||
"WebStatusResponse",
|
||||
]
|
||||
|
||||
@ -77,6 +77,47 @@ class WebChatResponse(BaseModel):
|
||||
provider_name: str | None = None
|
||||
model: str | None = None
|
||||
usage: dict[str, Any] = Field(default_factory=dict)
|
||||
task_id: str | None = None
|
||||
task_status: str | None = None
|
||||
validation_result: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class WebChatFeedbackRequest(BaseModel):
|
||||
"""Feedback on the latest assistant result in chat."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
feedback_type: str
|
||||
comment: str | None = None
|
||||
|
||||
|
||||
class WebChatFeedbackResponse(BaseModel):
|
||||
"""Feedback recording result."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
task_id: str
|
||||
task_status: str
|
||||
feedback_type: str
|
||||
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class WebProviderConfigRequest(BaseModel):
|
||||
"""Provider config update from the status page."""
|
||||
|
||||
enabled: bool = True
|
||||
model: str | None = None
|
||||
api_key: str | None = None
|
||||
api_base: str | None = None
|
||||
request_timeout_seconds: float | None = None
|
||||
|
||||
|
||||
class WebProviderConfigResponse(BaseModel):
|
||||
"""Provider config update result."""
|
||||
|
||||
ok: bool
|
||||
provider: str
|
||||
enabled: bool
|
||||
|
||||
|
||||
class WebStatusResponse(BaseModel):
|
||||
|
||||
@ -1,2 +1,6 @@
|
||||
"""Run records."""
|
||||
|
||||
from .models import RunOutcome, RunRecord, SkillEffectRecord
|
||||
from .store import RunMemoryStore
|
||||
|
||||
__all__ = ["RunMemoryStore", "RunOutcome", "RunRecord", "SkillEffectRecord"]
|
||||
|
||||
142
app-instance/backend/beaver/memory/runs/models.py
Normal file
142
app-instance/backend/beaver/memory/runs/models.py
Normal file
@ -0,0 +1,142 @@
|
||||
"""Run-level receipts and skill effect records."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from beaver.skills.specs import SkillActivationReceipt
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RunOutcome:
|
||||
success: bool
|
||||
finish_reason: str
|
||||
feedback_score: float | None = None
|
||||
notes: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"success": self.success,
|
||||
"finish_reason": self.finish_reason,
|
||||
"feedback_score": self.feedback_score,
|
||||
"notes": self.notes,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "RunOutcome":
|
||||
return cls(
|
||||
success=bool(payload.get("success")),
|
||||
finish_reason=str(payload.get("finish_reason") or ""),
|
||||
feedback_score=_coerce_optional_float(payload.get("feedback_score")),
|
||||
notes=str(payload.get("notes") or ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RunRecord:
|
||||
run_id: str
|
||||
session_id: str
|
||||
task_text: str
|
||||
started_at: str
|
||||
ended_at: str
|
||||
success: bool
|
||||
finish_reason: str
|
||||
feedback: dict[str, Any] = field(default_factory=dict)
|
||||
activated_skills: list[SkillActivationReceipt] = field(default_factory=list)
|
||||
task_id: str | None = None
|
||||
attempt_index: int | None = None
|
||||
validation_result: dict[str, Any] | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"run_id": self.run_id,
|
||||
"session_id": self.session_id,
|
||||
"task_id": self.task_id,
|
||||
"attempt_index": self.attempt_index,
|
||||
"task_text": self.task_text,
|
||||
"started_at": self.started_at,
|
||||
"ended_at": self.ended_at,
|
||||
"success": self.success,
|
||||
"finish_reason": self.finish_reason,
|
||||
"feedback": dict(self.feedback),
|
||||
"activated_skills": [receipt.to_dict() for receipt in self.activated_skills],
|
||||
"validation_result": self.validation_result,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "RunRecord":
|
||||
return cls(
|
||||
run_id=str(payload["run_id"]),
|
||||
session_id=str(payload["session_id"]),
|
||||
task_id=_coerce_optional_str(payload.get("task_id")),
|
||||
attempt_index=_coerce_optional_int(payload.get("attempt_index")),
|
||||
task_text=str(payload.get("task_text") or ""),
|
||||
started_at=str(payload.get("started_at") or ""),
|
||||
ended_at=str(payload.get("ended_at") or ""),
|
||||
success=bool(payload.get("success")),
|
||||
finish_reason=str(payload.get("finish_reason") or ""),
|
||||
feedback=dict(payload.get("feedback") or {}),
|
||||
activated_skills=[
|
||||
SkillActivationReceipt.from_dict(item)
|
||||
for item in payload.get("activated_skills") or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
validation_result=(
|
||||
dict(payload["validation_result"])
|
||||
if isinstance(payload.get("validation_result"), dict)
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillEffectRecord:
|
||||
run_id: str
|
||||
skill_name: str
|
||||
skill_version: str
|
||||
success: bool
|
||||
feedback_score: float | None
|
||||
notes: str
|
||||
created_at: str
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"run_id": self.run_id,
|
||||
"skill_name": self.skill_name,
|
||||
"skill_version": self.skill_version,
|
||||
"success": self.success,
|
||||
"feedback_score": self.feedback_score,
|
||||
"notes": self.notes,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillEffectRecord":
|
||||
return cls(
|
||||
run_id=str(payload["run_id"]),
|
||||
skill_name=str(payload["skill_name"]),
|
||||
skill_version=str(payload["skill_version"]),
|
||||
success=bool(payload.get("success")),
|
||||
feedback_score=_coerce_optional_float(payload.get("feedback_score")),
|
||||
notes=str(payload.get("notes") or ""),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
)
|
||||
|
||||
|
||||
def _coerce_optional_float(value: Any) -> float | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return float(value)
|
||||
|
||||
|
||||
def _coerce_optional_int(value: Any) -> int | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return int(value)
|
||||
|
||||
|
||||
def _coerce_optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return str(value)
|
||||
98
app-instance/backend/beaver/memory/runs/store.py
Normal file
98
app-instance/backend/beaver/memory/runs/store.py
Normal file
@ -0,0 +1,98 @@
|
||||
"""File-backed run receipt store."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from .models import RunRecord, SkillEffectRecord
|
||||
|
||||
|
||||
class RunMemoryStore:
|
||||
def __init__(self, root: str | Path) -> None:
|
||||
self.root = Path(root)
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
self.runs_path = self.root / "runs.jsonl"
|
||||
self.effects_path = self.root / "skill-effects.jsonl"
|
||||
|
||||
def append_run_record(self, record: RunRecord) -> None:
|
||||
self._append_jsonl(self.runs_path, record.to_dict())
|
||||
|
||||
def update_run_record(self, run_id: str, **updates: object) -> RunRecord | None:
|
||||
records = self.list_runs()
|
||||
updated: RunRecord | None = None
|
||||
for index, record in enumerate(records):
|
||||
if record.run_id != run_id:
|
||||
continue
|
||||
payload = record.to_dict()
|
||||
payload.update(updates)
|
||||
updated = RunRecord.from_dict(payload)
|
||||
records[index] = updated
|
||||
break
|
||||
if updated is None:
|
||||
return None
|
||||
self.runs_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.runs_path.write_text(
|
||||
"".join(
|
||||
json.dumps(record.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
|
||||
for record in records
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return updated
|
||||
|
||||
def append_skill_effect(self, effect: SkillEffectRecord) -> None:
|
||||
self._append_jsonl(self.effects_path, effect.to_dict())
|
||||
|
||||
def list_runs(self) -> list[RunRecord]:
|
||||
return [RunRecord.from_dict(item) for item in self._read_jsonl(self.runs_path)]
|
||||
|
||||
def list_runs_by_skill(self, skill_name: str, version: str | None = None, limit: int | None = None) -> list[RunRecord]:
|
||||
results: list[RunRecord] = []
|
||||
for record in self.list_runs():
|
||||
matched = False
|
||||
for receipt in record.activated_skills:
|
||||
if receipt.skill_name != skill_name:
|
||||
continue
|
||||
if version is not None and receipt.skill_version != version:
|
||||
continue
|
||||
matched = True
|
||||
break
|
||||
if matched:
|
||||
results.append(record)
|
||||
if limit is not None:
|
||||
return results[-limit:]
|
||||
return results
|
||||
|
||||
def list_skill_effects(self, skill_name: str, version: str | None = None, limit: int | None = None) -> list[SkillEffectRecord]:
|
||||
results: list[SkillEffectRecord] = []
|
||||
for payload in self._read_jsonl(self.effects_path):
|
||||
effect = SkillEffectRecord.from_dict(payload)
|
||||
if effect.skill_name != skill_name:
|
||||
continue
|
||||
if version is not None and effect.skill_version != version:
|
||||
continue
|
||||
results.append(effect)
|
||||
if limit is not None:
|
||||
return results[-limit:]
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def _append_jsonl(path: Path, payload: dict) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("a", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps(payload, ensure_ascii=False, sort_keys=True) + "\n")
|
||||
|
||||
@staticmethod
|
||||
def _read_jsonl(path: Path) -> list[dict]:
|
||||
if not path.exists():
|
||||
return []
|
||||
results: list[dict] = []
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
cleaned = line.strip()
|
||||
if not cleaned:
|
||||
continue
|
||||
payload = json.loads(cleaned)
|
||||
if isinstance(payload, dict):
|
||||
results.append(payload)
|
||||
return results
|
||||
@ -1,2 +1,19 @@
|
||||
"""Memory related to skill evolution."""
|
||||
|
||||
from .models import (
|
||||
SkillDraftEvalReport,
|
||||
SkillDraftSafetyReport,
|
||||
SkillLearningAuditEvent,
|
||||
SkillLearningCandidate,
|
||||
SkillPerformanceSnapshot,
|
||||
)
|
||||
from .store import SkillLearningStore
|
||||
|
||||
__all__ = [
|
||||
"SkillDraftEvalReport",
|
||||
"SkillDraftSafetyReport",
|
||||
"SkillLearningAuditEvent",
|
||||
"SkillLearningCandidate",
|
||||
"SkillLearningStore",
|
||||
"SkillPerformanceSnapshot",
|
||||
]
|
||||
|
||||
289
app-instance/backend/beaver/memory/skills/models.py
Normal file
289
app-instance/backend/beaver/memory/skills/models.py
Normal file
@ -0,0 +1,289 @@
|
||||
"""Aggregated skill learning models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
LEARNING_CANDIDATE_STATUSES = {
|
||||
"open",
|
||||
"queued",
|
||||
"synthesizing",
|
||||
"draft_ready",
|
||||
"safety_failed",
|
||||
"eval_failed",
|
||||
"review_pending",
|
||||
"approved",
|
||||
"rejected",
|
||||
"published",
|
||||
"failed",
|
||||
"superseded",
|
||||
}
|
||||
|
||||
RISK_LEVELS = {"low", "medium", "high", "critical"}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillPerformanceSnapshot:
|
||||
skill_name: str
|
||||
skill_version: str
|
||||
activation_count: int
|
||||
success_count: int
|
||||
failure_count: int
|
||||
latest_used_at: str
|
||||
last_feedback_score: float | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"skill_name": self.skill_name,
|
||||
"skill_version": self.skill_version,
|
||||
"activation_count": self.activation_count,
|
||||
"success_count": self.success_count,
|
||||
"failure_count": self.failure_count,
|
||||
"latest_used_at": self.latest_used_at,
|
||||
"last_feedback_score": self.last_feedback_score,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillPerformanceSnapshot":
|
||||
value = payload.get("last_feedback_score")
|
||||
return cls(
|
||||
skill_name=str(payload["skill_name"]),
|
||||
skill_version=str(payload["skill_version"]),
|
||||
activation_count=int(payload.get("activation_count", 0) or 0),
|
||||
success_count=int(payload.get("success_count", 0) or 0),
|
||||
failure_count=int(payload.get("failure_count", 0) or 0),
|
||||
latest_used_at=str(payload.get("latest_used_at") or ""),
|
||||
last_feedback_score=None if value in (None, "") else float(value),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillLearningCandidate:
|
||||
candidate_id: str
|
||||
kind: str
|
||||
source_run_ids: list[str]
|
||||
source_session_ids: list[str]
|
||||
related_skill_names: list[str]
|
||||
reason: str
|
||||
evidence: dict[str, Any] = field(default_factory=dict)
|
||||
status: str = "open"
|
||||
priority: int = 0
|
||||
confidence: float = 0.0
|
||||
risk_level: str = "medium"
|
||||
owner: str | None = None
|
||||
retry_count: int = 0
|
||||
last_error: str | None = None
|
||||
trigger_reason: str = ""
|
||||
evidence_summary: str = ""
|
||||
draft_skill_name: str | None = None
|
||||
draft_id: str | None = None
|
||||
safety_report_id: str | None = None
|
||||
eval_report_id: str | None = None
|
||||
created_at: str = ""
|
||||
updated_at: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"candidate_id": self.candidate_id,
|
||||
"kind": self.kind,
|
||||
"source_run_ids": list(self.source_run_ids),
|
||||
"source_session_ids": list(self.source_session_ids),
|
||||
"related_skill_names": list(self.related_skill_names),
|
||||
"reason": self.reason,
|
||||
"evidence": dict(self.evidence),
|
||||
"status": self.status,
|
||||
"priority": self.priority,
|
||||
"confidence": self.confidence,
|
||||
"risk_level": self.risk_level,
|
||||
"owner": self.owner,
|
||||
"retry_count": self.retry_count,
|
||||
"last_error": self.last_error,
|
||||
"trigger_reason": self.trigger_reason,
|
||||
"evidence_summary": self.evidence_summary,
|
||||
"draft_skill_name": self.draft_skill_name,
|
||||
"draft_id": self.draft_id,
|
||||
"safety_report_id": self.safety_report_id,
|
||||
"eval_report_id": self.eval_report_id,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningCandidate":
|
||||
now = _utc_now()
|
||||
status = str(payload.get("status") or "open")
|
||||
risk_level = str(payload.get("risk_level") or "medium")
|
||||
return cls(
|
||||
candidate_id=str(payload["candidate_id"]),
|
||||
kind=str(payload.get("kind") or "revise_skill"),
|
||||
source_run_ids=[str(item) for item in payload.get("source_run_ids") or []],
|
||||
source_session_ids=[str(item) for item in payload.get("source_session_ids") or []],
|
||||
related_skill_names=[str(item) for item in payload.get("related_skill_names") or []],
|
||||
reason=str(payload.get("reason") or ""),
|
||||
evidence=dict(payload.get("evidence") or {}),
|
||||
status=status if status in LEARNING_CANDIDATE_STATUSES else "open",
|
||||
priority=int(payload.get("priority", 0) or 0),
|
||||
confidence=float(payload.get("confidence", 0.0) or 0.0),
|
||||
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
|
||||
owner=_optional_str(payload.get("owner")),
|
||||
retry_count=int(payload.get("retry_count", 0) or 0),
|
||||
last_error=_optional_str(payload.get("last_error")),
|
||||
trigger_reason=str(payload.get("trigger_reason") or payload.get("reason") or ""),
|
||||
evidence_summary=str(payload.get("evidence_summary") or _summarize_evidence(payload)),
|
||||
draft_skill_name=_optional_str(payload.get("draft_skill_name")),
|
||||
draft_id=_optional_str(payload.get("draft_id")),
|
||||
safety_report_id=_optional_str(payload.get("safety_report_id")),
|
||||
eval_report_id=_optional_str(payload.get("eval_report_id")),
|
||||
created_at=str(payload.get("created_at") or now),
|
||||
updated_at=str(payload.get("updated_at") or payload.get("created_at") or now),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillLearningAuditEvent:
|
||||
event_id: str
|
||||
candidate_id: str
|
||||
event_type: str
|
||||
created_at: str
|
||||
payload: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"event_id": self.event_id,
|
||||
"candidate_id": self.candidate_id,
|
||||
"event_type": self.event_type,
|
||||
"created_at": self.created_at,
|
||||
"payload": dict(self.payload),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningAuditEvent":
|
||||
return cls(
|
||||
event_id=str(payload["event_id"]),
|
||||
candidate_id=str(payload["candidate_id"]),
|
||||
event_type=str(payload.get("event_type") or ""),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
payload=dict(payload.get("payload") or {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillDraftSafetyReport:
|
||||
report_id: str
|
||||
skill_name: str
|
||||
draft_id: str
|
||||
passed: bool
|
||||
risk_level: str
|
||||
issues: list[str] = field(default_factory=list)
|
||||
blocked_reasons: list[str] = field(default_factory=list)
|
||||
suggested_fix: str = ""
|
||||
created_at: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"report_id": self.report_id,
|
||||
"skill_name": self.skill_name,
|
||||
"draft_id": self.draft_id,
|
||||
"passed": self.passed,
|
||||
"risk_level": self.risk_level,
|
||||
"issues": list(self.issues),
|
||||
"blocked_reasons": list(self.blocked_reasons),
|
||||
"suggested_fix": self.suggested_fix,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftSafetyReport":
|
||||
risk_level = str(payload.get("risk_level") or "medium")
|
||||
return cls(
|
||||
report_id=str(payload["report_id"]),
|
||||
skill_name=str(payload["skill_name"]),
|
||||
draft_id=str(payload["draft_id"]),
|
||||
passed=bool(payload.get("passed")),
|
||||
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
blocked_reasons=[str(item) for item in payload.get("blocked_reasons") or []],
|
||||
suggested_fix=str(payload.get("suggested_fix") or ""),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillDraftEvalReport:
|
||||
report_id: str
|
||||
skill_name: str
|
||||
draft_id: str
|
||||
candidate_id: str
|
||||
passed: bool
|
||||
baseline_score_avg: float
|
||||
candidate_score_avg: float
|
||||
score_delta: float
|
||||
regression_count: int
|
||||
improved_count: int
|
||||
unchanged_count: int
|
||||
cases: list[dict[str, Any]] = field(default_factory=list)
|
||||
status: str = "completed"
|
||||
created_at: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"report_id": self.report_id,
|
||||
"skill_name": self.skill_name,
|
||||
"draft_id": self.draft_id,
|
||||
"candidate_id": self.candidate_id,
|
||||
"passed": self.passed,
|
||||
"baseline_score_avg": self.baseline_score_avg,
|
||||
"candidate_score_avg": self.candidate_score_avg,
|
||||
"score_delta": self.score_delta,
|
||||
"regression_count": self.regression_count,
|
||||
"improved_count": self.improved_count,
|
||||
"unchanged_count": self.unchanged_count,
|
||||
"cases": [dict(item) for item in self.cases],
|
||||
"status": self.status,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftEvalReport":
|
||||
return cls(
|
||||
report_id=str(payload["report_id"]),
|
||||
skill_name=str(payload["skill_name"]),
|
||||
draft_id=str(payload["draft_id"]),
|
||||
candidate_id=str(payload.get("candidate_id") or ""),
|
||||
passed=bool(payload.get("passed")),
|
||||
baseline_score_avg=float(payload.get("baseline_score_avg", 0.0) or 0.0),
|
||||
candidate_score_avg=float(payload.get("candidate_score_avg", 0.0) or 0.0),
|
||||
score_delta=float(payload.get("score_delta", 0.0) or 0.0),
|
||||
regression_count=int(payload.get("regression_count", 0) or 0),
|
||||
improved_count=int(payload.get("improved_count", 0) or 0),
|
||||
unchanged_count=int(payload.get("unchanged_count", 0) or 0),
|
||||
cases=[dict(item) for item in payload.get("cases") or [] if isinstance(item, dict)],
|
||||
status=str(payload.get("status") or "completed"),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
)
|
||||
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
|
||||
def _summarize_evidence(payload: dict[str, Any]) -> str:
|
||||
evidence = payload.get("evidence")
|
||||
if isinstance(evidence, dict):
|
||||
theme = evidence.get("theme")
|
||||
if theme:
|
||||
return f"Theme: {theme}"
|
||||
skill_version = evidence.get("skill_version")
|
||||
if skill_version:
|
||||
return f"Skill version: {skill_version}"
|
||||
source_run_ids = payload.get("source_run_ids") or []
|
||||
return f"{len(source_run_ids)} source run(s)"
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
216
app-instance/backend/beaver/memory/skills/store.py
Normal file
216
app-instance/backend/beaver/memory/skills/store.py
Normal file
@ -0,0 +1,216 @@
|
||||
"""File-backed skill learning store."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
from .models import (
|
||||
SkillDraftEvalReport,
|
||||
SkillDraftSafetyReport,
|
||||
SkillLearningAuditEvent,
|
||||
SkillLearningCandidate,
|
||||
SkillPerformanceSnapshot,
|
||||
)
|
||||
|
||||
|
||||
class SkillLearningStore:
|
||||
def __init__(self, root: str | Path) -> None:
|
||||
self.root = Path(root)
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
self.performance_path = self.root / "performance.jsonl"
|
||||
self.candidates_path = self.root / "learning-candidates.jsonl"
|
||||
self.audit_path = self.root / "learning-audit.jsonl"
|
||||
self.safety_reports_dir = self.root / "safety-reports"
|
||||
self.eval_reports_dir = self.root / "eval-reports"
|
||||
|
||||
def record_learning_candidate(self, candidate: SkillLearningCandidate) -> None:
|
||||
normalized = SkillLearningCandidate.from_dict(candidate.to_dict())
|
||||
self._append_jsonl(self.candidates_path, normalized.to_dict())
|
||||
self.append_audit_event(
|
||||
normalized.candidate_id,
|
||||
"candidate_created",
|
||||
{
|
||||
"kind": normalized.kind,
|
||||
"status": normalized.status,
|
||||
"reason": normalized.reason,
|
||||
},
|
||||
)
|
||||
|
||||
def update_learning_candidate(self, candidate_id: str, **updates: object) -> SkillLearningCandidate | None:
|
||||
candidates = self.list_learning_candidates()
|
||||
updated: SkillLearningCandidate | None = None
|
||||
for index, candidate in enumerate(candidates):
|
||||
if candidate.candidate_id != candidate_id:
|
||||
continue
|
||||
payload = candidate.to_dict()
|
||||
payload.update(updates)
|
||||
if "updated_at" not in updates:
|
||||
payload["updated_at"] = _utc_now()
|
||||
updated = SkillLearningCandidate.from_dict(payload)
|
||||
candidates[index] = updated
|
||||
break
|
||||
if updated is None:
|
||||
return None
|
||||
self.candidates_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.candidates_path.write_text(
|
||||
"".join(
|
||||
json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
|
||||
for candidate in candidates
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return updated
|
||||
|
||||
def transition_learning_candidate(
|
||||
self,
|
||||
candidate_id: str,
|
||||
status: str,
|
||||
*,
|
||||
event_type: str | None = None,
|
||||
payload: dict | None = None,
|
||||
**updates: object,
|
||||
) -> SkillLearningCandidate | None:
|
||||
updated = self.update_learning_candidate(candidate_id, status=status, **updates)
|
||||
if updated is not None:
|
||||
self.append_audit_event(
|
||||
candidate_id,
|
||||
event_type or f"candidate_{status}",
|
||||
{"status": status, **dict(payload or {})},
|
||||
)
|
||||
return updated
|
||||
|
||||
def list_learning_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
|
||||
results: list[SkillLearningCandidate] = []
|
||||
for payload in self._read_jsonl(self.candidates_path):
|
||||
candidate = SkillLearningCandidate.from_dict(payload)
|
||||
if status is not None and candidate.status != status:
|
||||
continue
|
||||
results.append(candidate)
|
||||
return results
|
||||
|
||||
def update_performance_snapshot(self, snapshot: SkillPerformanceSnapshot) -> None:
|
||||
snapshots = self.list_performance_snapshots()
|
||||
filtered = [
|
||||
item
|
||||
for item in snapshots
|
||||
if not (item.skill_name == snapshot.skill_name and item.skill_version == snapshot.skill_version)
|
||||
]
|
||||
filtered.append(snapshot)
|
||||
self.performance_path.write_text(
|
||||
"".join(json.dumps(item.to_dict(), ensure_ascii=False, sort_keys=True) + "\n" for item in filtered),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def list_performance_snapshots(self) -> list[SkillPerformanceSnapshot]:
|
||||
return [SkillPerformanceSnapshot.from_dict(item) for item in self._read_jsonl(self.performance_path)]
|
||||
|
||||
def list_low_performing_versions(self, *, minimum_activations: int = 2, success_ratio_threshold: float = 0.5) -> list[SkillPerformanceSnapshot]:
|
||||
results: list[SkillPerformanceSnapshot] = []
|
||||
for snapshot in self.list_performance_snapshots():
|
||||
if snapshot.activation_count < minimum_activations:
|
||||
continue
|
||||
if snapshot.activation_count == 0:
|
||||
continue
|
||||
ratio = snapshot.success_count / snapshot.activation_count
|
||||
if ratio <= success_ratio_threshold:
|
||||
results.append(snapshot)
|
||||
return results
|
||||
|
||||
def list_merge_candidates(self) -> list[SkillLearningCandidate]:
|
||||
return [item for item in self.list_learning_candidates(status="open") if item.kind == "merge_skills"]
|
||||
|
||||
def append_audit_event(self, candidate_id: str, event_type: str, payload: dict | None = None) -> SkillLearningAuditEvent:
|
||||
event = SkillLearningAuditEvent(
|
||||
event_id=uuid4().hex,
|
||||
candidate_id=candidate_id,
|
||||
event_type=event_type,
|
||||
created_at=_utc_now(),
|
||||
payload=dict(payload or {}),
|
||||
)
|
||||
self._append_jsonl(self.audit_path, event.to_dict())
|
||||
return event
|
||||
|
||||
def list_audit_events(self, candidate_id: str | None = None) -> list[SkillLearningAuditEvent]:
|
||||
events = [SkillLearningAuditEvent.from_dict(item) for item in self._read_jsonl(self.audit_path)]
|
||||
if candidate_id is None:
|
||||
return events
|
||||
return [event for event in events if event.candidate_id == candidate_id]
|
||||
|
||||
def write_safety_report(self, report: SkillDraftSafetyReport) -> None:
|
||||
path = self._report_path(self.safety_reports_dir, report.skill_name, report.draft_id, report.report_id)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
|
||||
|
||||
def get_safety_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftSafetyReport | None:
|
||||
reports = self.list_safety_reports(skill_name, draft_id)
|
||||
if report_id is not None:
|
||||
return next((item for item in reports if item.report_id == report_id), None)
|
||||
return reports[-1] if reports else None
|
||||
|
||||
def list_safety_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftSafetyReport]:
|
||||
root = self.safety_reports_dir / skill_name / draft_id
|
||||
if not root.exists():
|
||||
return []
|
||||
return [
|
||||
SkillDraftSafetyReport.from_dict(self._read_json(path))
|
||||
for path in sorted(root.glob("report-*.json"))
|
||||
]
|
||||
|
||||
def write_eval_report(self, report: SkillDraftEvalReport) -> None:
|
||||
path = self._report_path(self.eval_reports_dir, report.skill_name, report.draft_id, report.report_id)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
|
||||
|
||||
def get_eval_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftEvalReport | None:
|
||||
reports = self.list_eval_reports(skill_name, draft_id)
|
||||
if report_id is not None:
|
||||
return next((item for item in reports if item.report_id == report_id), None)
|
||||
return reports[-1] if reports else None
|
||||
|
||||
def list_eval_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftEvalReport]:
|
||||
root = self.eval_reports_dir / skill_name / draft_id
|
||||
if not root.exists():
|
||||
return []
|
||||
return [
|
||||
SkillDraftEvalReport.from_dict(self._read_json(path))
|
||||
for path in sorted(root.glob("report-*.json"))
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _report_path(root: Path, skill_name: str, draft_id: str, report_id: str) -> Path:
|
||||
return root / skill_name / draft_id / f"report-{report_id}.json"
|
||||
|
||||
@staticmethod
|
||||
def _append_jsonl(path: Path, payload: dict) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("a", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps(payload, ensure_ascii=False, sort_keys=True) + "\n")
|
||||
|
||||
@staticmethod
|
||||
def _read_jsonl(path: Path) -> list[dict]:
|
||||
if not path.exists():
|
||||
return []
|
||||
results: list[dict] = []
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
cleaned = line.strip()
|
||||
if not cleaned:
|
||||
continue
|
||||
payload = json.loads(cleaned)
|
||||
if isinstance(payload, dict):
|
||||
results.append(payload)
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def _read_json(path: Path) -> dict:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError(f"Expected JSON object in {path}")
|
||||
return payload
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
@ -15,9 +15,13 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.coordinator.models import ExecutionNode, TeamRunResult
|
||||
from beaver.engine import AgentLoop, AgentProfile, AgentRunResult, EngineLoader
|
||||
from beaver.engine.providers import make_provider_bundle
|
||||
from beaver.foundation.events import InboundMessage, OutboundMessage
|
||||
from beaver.tasks import MainAgentRouter, TaskExecutionPlan, TaskRecord, ValidationResult
|
||||
|
||||
|
||||
class AgentService:
|
||||
@ -45,6 +49,7 @@ class AgentService:
|
||||
self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
|
||||
self._loop: AgentLoop | None = None
|
||||
self._run_task: asyncio.Task[None] | None = None
|
||||
self._main_agent_router = MainAgentRouter()
|
||||
|
||||
def create_loop(self) -> AgentLoop:
|
||||
"""创建并缓存当前 service 使用的 AgentLoop。"""
|
||||
@ -176,7 +181,7 @@ class AgentService:
|
||||
"use 'await AgentService.submit_direct(...)' after start()."
|
||||
)
|
||||
loop = self.create_loop()
|
||||
return await loop.process_direct(message, **kwargs)
|
||||
return await self._process_with_main_agent(message, runner=loop.process_direct, kwargs=kwargs)
|
||||
|
||||
async def submit_direct(
|
||||
self,
|
||||
@ -189,7 +194,502 @@ class AgentService:
|
||||
"""
|
||||
|
||||
loop = self.create_loop()
|
||||
return await loop.submit_direct(message, **kwargs)
|
||||
return await self._process_with_main_agent(message, runner=loop.submit_direct, kwargs=kwargs)
|
||||
|
||||
async def submit_feedback(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
feedback_type: str,
|
||||
comment: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Record chat feedback for the internal task linked to a run."""
|
||||
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
task = task_service.get_task_by_run_id(run_id)
|
||||
if task is None or task.session_id != session_id:
|
||||
raise ValueError(f"No internal task found for run_id={run_id!r}")
|
||||
|
||||
normalized = feedback_type.strip().lower()
|
||||
if normalized not in {"satisfied", "revise", "abandon"}:
|
||||
raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
|
||||
|
||||
already_recorded = any(
|
||||
item.get("run_id") == run_id and item.get("feedback_type") == normalized
|
||||
for item in task.feedback
|
||||
)
|
||||
conflicting_feedback = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("feedback_type") != normalized
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_feedback is not None:
|
||||
raise ValueError(
|
||||
f"Feedback for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_feedback.get('feedback_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not already_recorded:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
updated = task if already_recorded else task_service.add_feedback(
|
||||
task.task_id,
|
||||
feedback_type=normalized,
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
session_manager = self._require_loaded(loaded, "session_manager")
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
session_id,
|
||||
run_id,
|
||||
{
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"feedback_state": normalized,
|
||||
},
|
||||
)
|
||||
if not already_recorded:
|
||||
session_manager.append_message(
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="task_feedback_recorded",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"feedback_type": normalized,
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
},
|
||||
content=comment,
|
||||
context_visible=False,
|
||||
)
|
||||
|
||||
generated_candidates = []
|
||||
validation = ValidationResult.from_dict(updated.validation_result)
|
||||
if already_recorded:
|
||||
generated_candidates = []
|
||||
elif normalized == "satisfied" and validation is not None and validation.accepted:
|
||||
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
|
||||
generated_candidates = [item.to_dict() for item in skill_learning_service.build_learning_candidates()]
|
||||
elif normalized == "abandon":
|
||||
memory_service = self._require_loaded(loaded, "memory_service")
|
||||
memory_service.get_store().add(
|
||||
"memory",
|
||||
(
|
||||
f"Failure memory: task {task.task_id} in session {session_id} was abandoned. "
|
||||
f"Reason: {(comment or 'not specified').strip()}"
|
||||
),
|
||||
)
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"run_id": run_id,
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"feedback_type": normalized,
|
||||
"learning_candidates": generated_candidates,
|
||||
}
|
||||
|
||||
async def _process_with_main_agent(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
runner: Any,
|
||||
kwargs: dict[str, Any],
|
||||
) -> AgentRunResult:
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
session_id = kwargs.get("session_id") or uuid4().hex
|
||||
kwargs = dict(kwargs)
|
||||
kwargs["session_id"] = session_id
|
||||
|
||||
active_task = task_service.get_latest_open_task(session_id)
|
||||
decision = self._main_agent_router.classify(message, active_task=active_task)
|
||||
if not decision.is_task:
|
||||
return await runner(message, **kwargs)
|
||||
|
||||
task = (
|
||||
task_service.create_task(
|
||||
session_id=session_id,
|
||||
description=message,
|
||||
metadata={"router_reason": decision.reason},
|
||||
)
|
||||
if active_task is None or decision.starts_new_task
|
||||
else active_task
|
||||
)
|
||||
return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)
|
||||
|
||||
async def _run_task_mode(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
runner: Any,
|
||||
kwargs: dict[str, Any],
|
||||
task: TaskRecord,
|
||||
) -> AgentRunResult:
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
validation_service = self._require_loaded(loaded, "validation_service")
|
||||
task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
|
||||
session_manager = self._require_loaded(loaded, "session_manager")
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
|
||||
last_result: AgentRunResult | None = None
|
||||
latest_validation: ValidationResult | None = None
|
||||
base_execution_context = kwargs.get("execution_context")
|
||||
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
|
||||
kwargs = dict(kwargs)
|
||||
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
|
||||
kwargs["provider_bundle"] = provider_bundle
|
||||
|
||||
for attempt_index in (1, 2):
|
||||
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
|
||||
plan = await task_execution_planner.plan(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_execution_planned",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(loaded, kwargs),
|
||||
)
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_execution_context = self._team_execution_context(plan, team_result)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_mode": True,
|
||||
"attempt_index": attempt_index,
|
||||
"learning_candidate_enabled": False,
|
||||
}
|
||||
)
|
||||
if attempt_index == 2 and latest_validation is not None:
|
||||
revision_context = latest_validation.recommended_revision_prompt.strip()
|
||||
if revision_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(
|
||||
base_execution_context,
|
||||
f"Task validation revision request:\n{revision_context}",
|
||||
team_execution_context,
|
||||
)
|
||||
elif team_execution_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
|
||||
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
last_result = result
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_synthesis_completed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"main_run_id": result.run_id,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
},
|
||||
)
|
||||
task = task_service.append_run(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
skill_names=self._skill_names_for_run(loaded, result.run_id),
|
||||
)
|
||||
validation = await validation_service.validate_task_result(
|
||||
task=task,
|
||||
user_message=message,
|
||||
final_output=result.output_text,
|
||||
transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
|
||||
tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
|
||||
team_summaries=team_summaries,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
latest_validation = validation
|
||||
task = task_service.record_validation(task.task_id, result.run_id, validation)
|
||||
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_status": task.status,
|
||||
"validation_status": "passed" if validation.accepted else "failed",
|
||||
},
|
||||
)
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
role="system",
|
||||
event_type="task_validation_snapshotted",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"validation_result": validation.to_dict(),
|
||||
"retry_scheduled": not validation.accepted and attempt_index == 1,
|
||||
},
|
||||
content=validation.recommended_revision_prompt or None,
|
||||
context_visible=False,
|
||||
)
|
||||
if not validation.accepted and attempt_index == 1:
|
||||
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = validation.to_dict()
|
||||
if validation.accepted or attempt_index == 2:
|
||||
return result
|
||||
|
||||
if last_result is None: # pragma: no cover - defensive
|
||||
raise RuntimeError("Task mode did not produce a run result")
|
||||
return last_result
|
||||
|
||||
async def _run_team_for_task(
|
||||
self,
|
||||
plan: TaskExecutionPlan,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
parent_session_id: str,
|
||||
provider_bundle_factory: Any,
|
||||
) -> tuple[TeamRunResult | None, str | None]:
|
||||
if plan.graph is None:
|
||||
return None, "team plan did not include an execution graph"
|
||||
try:
|
||||
from beaver.services.team_service import TeamService
|
||||
|
||||
result = await TeamService(self.create_loop()).run_team(
|
||||
plan.graph,
|
||||
parent_task_id=task.task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=None,
|
||||
provider_bundle_factory=provider_bundle_factory,
|
||||
learning_candidate_enabled=False,
|
||||
)
|
||||
return result, None
|
||||
except Exception as exc:
|
||||
return None, str(exc)
|
||||
|
||||
@staticmethod
|
||||
def _require_loaded(loaded: Any, field_name: str) -> Any:
|
||||
value = getattr(loaded, field_name)
|
||||
if value is None:
|
||||
raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def _skill_names_for_run(loaded: Any, run_id: str) -> list[str]:
|
||||
store = getattr(loaded, "run_memory_store", None)
|
||||
if store is None:
|
||||
return []
|
||||
for record in store.list_runs():
|
||||
if record.run_id == run_id:
|
||||
return [receipt.skill_name for receipt in record.activated_skills]
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _run_excerpt(session_manager: Any, session_id: str, run_id: str) -> str:
|
||||
lines = []
|
||||
for event in session_manager.get_run_event_records(session_id, run_id):
|
||||
if event.context_visible and event.content:
|
||||
lines.append(f"{event.role}: {event.content.strip()}")
|
||||
return "\n".join(lines[:12])[:2400]
|
||||
|
||||
@staticmethod
|
||||
def _tool_summaries(session_manager: Any, session_id: str, run_id: str) -> list[str]:
|
||||
summaries = []
|
||||
for event in session_manager.get_run_event_records(session_id, run_id):
|
||||
if event.event_type != "tool_result_recorded":
|
||||
continue
|
||||
text = (event.content or "").strip()
|
||||
if text:
|
||||
summaries.append(f"{event.tool_name or 'tool'}: {text[:500]}")
|
||||
return summaries[:12]
|
||||
|
||||
@staticmethod
|
||||
def _append_task_observation(
|
||||
session_manager: Any,
|
||||
session_id: str,
|
||||
*,
|
||||
event_type: str,
|
||||
payload: dict[str, Any],
|
||||
) -> None:
|
||||
session_manager.append_message(
|
||||
session_id,
|
||||
role="system",
|
||||
event_type=event_type,
|
||||
event_payload=payload,
|
||||
content=payload.get("reason") or payload.get("error"),
|
||||
context_visible=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _join_context(*parts: str | None) -> str:
|
||||
return "\n\n".join(part.strip() for part in parts if part and part.strip())
|
||||
|
||||
@staticmethod
|
||||
def _team_summary_for_validation(result: TeamRunResult) -> str:
|
||||
lines = [
|
||||
f"success={result.success}",
|
||||
f"task_id={result.task_id or ''}",
|
||||
"summary:",
|
||||
result.summary,
|
||||
"nodes:",
|
||||
]
|
||||
for node in result.node_results:
|
||||
lines.append(
|
||||
f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
|
||||
f"error={node.error or ''} output={node.output_text[:500]}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
|
||||
nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
|
||||
payloads: list[dict[str, Any]] = []
|
||||
for item in result.node_results:
|
||||
payload = item.to_dict()
|
||||
node = nodes.get(item.node_id)
|
||||
if node is not None:
|
||||
payload["selected_skill_names"] = list(node.inherited_pinned_skills)
|
||||
payload["ephemeral_skill_names"] = [
|
||||
skill.name for skill in node.inherited_pinned_skill_contexts
|
||||
]
|
||||
payload["skill_query"] = node.agent.metadata.get("skill_query")
|
||||
payload["generated_skill_draft_id"] = node.agent.metadata.get("generated_skill_draft_id")
|
||||
payload["generated_skill_name"] = node.agent.metadata.get("generated_skill_name")
|
||||
payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
|
||||
payloads.append(payload)
|
||||
return payloads
|
||||
|
||||
@staticmethod
|
||||
def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
|
||||
node_lines = [
|
||||
(
|
||||
f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
|
||||
f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
|
||||
)
|
||||
for node in result.node_results
|
||||
]
|
||||
return "\n\n".join(
|
||||
item
|
||||
for item in [
|
||||
"Task team execution result:",
|
||||
f"Planner reason: {plan.reason}",
|
||||
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
|
||||
f"Team success: {result.success}",
|
||||
f"Team summary:\n{result.summary}",
|
||||
"Node results:\n" + "\n\n".join(node_lines),
|
||||
(
|
||||
"Final synthesis instruction:\n" + plan.final_synthesis_instruction
|
||||
if plan.final_synthesis_instruction
|
||||
else None
|
||||
),
|
||||
"Use the team outputs as internal evidence. Produce the final user-facing answer yourself.",
|
||||
]
|
||||
if item
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
|
||||
return "\n\n".join(
|
||||
[
|
||||
"Task team execution failed before final synthesis.",
|
||||
f"Planner reason: {plan.reason}",
|
||||
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
|
||||
f"Error: {error}",
|
||||
"Proceed as the main agent and produce the best possible final answer.",
|
||||
]
|
||||
)
|
||||
|
||||
def _build_team_provider_bundle_factory(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
|
||||
def factory(node: ExecutionNode) -> Any:
|
||||
node_kwargs = dict(kwargs)
|
||||
node_kwargs.pop("provider_bundle", None)
|
||||
if node.agent.model:
|
||||
node_kwargs["model"] = node.agent.model
|
||||
if node.agent.provider_name:
|
||||
node_kwargs["provider_name"] = node.agent.provider_name
|
||||
return self._make_provider_bundle_for_task(loaded, node_kwargs)
|
||||
|
||||
return factory
|
||||
|
||||
def _make_provider_bundle_for_task(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
|
||||
config = loaded.config
|
||||
configured_provider = config.resolve_provider_target(
|
||||
model=kwargs.get("model"),
|
||||
provider_name=kwargs.get("provider_name"),
|
||||
)
|
||||
resolved_model = configured_provider.get("model") or self.profile.default_model
|
||||
resolved_provider_name = configured_provider.get("provider_name") or kwargs.get("provider_name")
|
||||
return make_provider_bundle(
|
||||
model=resolved_model,
|
||||
provider_name=resolved_provider_name,
|
||||
api_key=kwargs.get("api_key") or configured_provider.get("api_key"),
|
||||
api_base=kwargs.get("api_base") or configured_provider.get("api_base"),
|
||||
request_timeout_seconds=configured_provider.get("request_timeout_seconds"),
|
||||
extra_headers=kwargs.get("extra_headers") or configured_provider.get("extra_headers"),
|
||||
routing=kwargs.get("routing"),
|
||||
fallback_target=kwargs.get("fallback_target"),
|
||||
auxiliary_target=kwargs.get("auxiliary_target"),
|
||||
embedding_target=kwargs.get("embedding_target") or config.resolve_embedding_target(),
|
||||
embedding_model=kwargs.get("embedding_model") or config.default_embedding_model,
|
||||
)
|
||||
|
||||
async def handle_inbound_message(self, inbound: InboundMessage) -> OutboundMessage:
|
||||
"""把 bus inbound 映射成标准 runtime 调用,并返回结构化 outbound。"""
|
||||
@ -207,9 +707,26 @@ class AgentService:
|
||||
embedding_model=inbound.embedding_model,
|
||||
)
|
||||
except Exception as exc:
|
||||
return self.build_outbound_error(inbound, detail=str(exc))
|
||||
return self.build_outbound_error(
|
||||
inbound,
|
||||
detail=str(exc),
|
||||
finish_reason=self._classify_inbound_failure(exc),
|
||||
)
|
||||
return self.build_outbound_message(inbound, result)
|
||||
|
||||
@staticmethod
|
||||
def _classify_inbound_failure(exc: Exception) -> str:
|
||||
"""把 runtime 异常收口为更稳定的 bus finish reason。"""
|
||||
|
||||
if isinstance(exc, RuntimeError):
|
||||
detail = str(exc)
|
||||
if (
|
||||
"requires an active run() loop" in detail
|
||||
or "not accepting new tasks after stop()" in detail
|
||||
):
|
||||
return "stopped"
|
||||
return "error"
|
||||
|
||||
@staticmethod
|
||||
def build_outbound_message(inbound: InboundMessage, result: AgentRunResult) -> OutboundMessage:
|
||||
"""把一次 runtime 正常结果转成 bus outbound。"""
|
||||
@ -224,7 +741,12 @@ class AgentService:
|
||||
provider_name=result.provider_name,
|
||||
model=result.model,
|
||||
usage=dict(result.usage),
|
||||
metadata={"inbound_metadata": dict(inbound.metadata)},
|
||||
metadata={
|
||||
"inbound_metadata": dict(inbound.metadata),
|
||||
"task_id": getattr(result, "task_id", None),
|
||||
"task_status": getattr(result, "task_status", None),
|
||||
"validation_result": getattr(result, "validation_result", None),
|
||||
},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -51,6 +51,13 @@ class MemoryService:
|
||||
self.store.load_from_disk()
|
||||
self._snapshot = capture_memory_snapshot(self.store)
|
||||
|
||||
def capture_snapshot_for_run(self) -> MemorySnapshot:
|
||||
"""Capture a per-run frozen snapshot without mutating shared runtime state."""
|
||||
|
||||
store = MemoryStore(self.root)
|
||||
store.load_from_disk()
|
||||
return capture_memory_snapshot(store)
|
||||
|
||||
def get_snapshot(self) -> MemorySnapshot:
|
||||
"""获取当前 run 应注入 system prompt 的 frozen snapshot。"""
|
||||
|
||||
|
||||
253
app-instance/backend/beaver/services/process_service.py
Normal file
253
app-instance/backend/beaver/services/process_service.py
Normal file
@ -0,0 +1,253 @@
|
||||
"""Projection of hidden Task/team events into frontend process streams."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
class SessionProcessProjector:
|
||||
def __init__(self, session_manager: Any, run_memory_store: Any) -> None:
|
||||
self.session_manager = session_manager
|
||||
self.run_memory_store = run_memory_store
|
||||
|
||||
def project(self, session_id: str) -> dict[str, Any]:
|
||||
records = self.session_manager.get_event_records(session_id)
|
||||
run_records = {record.run_id: record for record in self.run_memory_store.list_runs()}
|
||||
runs: dict[str, dict[str, Any]] = {}
|
||||
events: list[dict[str, Any]] = []
|
||||
|
||||
def add_event(
|
||||
*,
|
||||
event_id: str,
|
||||
run_id: str,
|
||||
kind: str,
|
||||
actor_type: str,
|
||||
actor_id: str,
|
||||
actor_name: str,
|
||||
text: str,
|
||||
created_at: str,
|
||||
status: str | None = None,
|
||||
parent_run_id: str | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
events.append(
|
||||
{
|
||||
"event_id": event_id,
|
||||
"run_id": run_id,
|
||||
"parent_run_id": parent_run_id,
|
||||
"kind": kind,
|
||||
"actor_type": actor_type,
|
||||
"actor_id": actor_id,
|
||||
"actor_name": actor_name,
|
||||
"text": text,
|
||||
"status": status,
|
||||
"metadata": dict(metadata or {}),
|
||||
"created_at": created_at,
|
||||
}
|
||||
)
|
||||
|
||||
for record in records:
|
||||
payload = dict(record.event_payload or {})
|
||||
task_id = payload.get("task_id")
|
||||
if not task_id:
|
||||
continue
|
||||
attempt_index = int(payload.get("attempt_index") or 1)
|
||||
root_run_id = f"task:{task_id}:attempt:{attempt_index}"
|
||||
created_at = _timestamp(record.timestamp)
|
||||
root = runs.setdefault(
|
||||
root_run_id,
|
||||
{
|
||||
"run_id": root_run_id,
|
||||
"parent_run_id": None,
|
||||
"session_id": session_id,
|
||||
"actor_type": "system",
|
||||
"actor_id": "task",
|
||||
"actor_name": "Task Planner",
|
||||
"title": f"Task {task_id[:8]} attempt {attempt_index}",
|
||||
"source": "task_mode",
|
||||
"status": "running",
|
||||
"started_at": created_at,
|
||||
"metadata": {"task_id": task_id, "attempt_index": attempt_index},
|
||||
},
|
||||
)
|
||||
|
||||
if record.event_type == "task_execution_planned":
|
||||
strategy = payload.get("strategy") or "single"
|
||||
node_ids = payload.get("node_ids") or []
|
||||
root["title"] = f"{payload.get('plan_mode', 'single')} plan: {strategy}"
|
||||
root["summary"] = payload.get("reason") or ""
|
||||
root["metadata"] = {
|
||||
**root.get("metadata", {}),
|
||||
"plan_mode": payload.get("plan_mode"),
|
||||
"strategy": payload.get("strategy"),
|
||||
"node_ids": node_ids,
|
||||
"skill_queries": payload.get("skill_queries") or [],
|
||||
"selected_skill_names": payload.get("selected_skill_names") or [],
|
||||
"generated_skill_draft_ids": payload.get("generated_skill_draft_ids") or [],
|
||||
"skill_resolution_report": payload.get("skill_resolution_report") or [],
|
||||
"fallback_error": payload.get("fallback_error"),
|
||||
}
|
||||
add_event(
|
||||
event_id=_event_id(record, "planned"),
|
||||
run_id=root_run_id,
|
||||
kind="run_started",
|
||||
actor_type="system",
|
||||
actor_id="task",
|
||||
actor_name="Task Planner",
|
||||
text=f"Planned {payload.get('plan_mode')} execution via {strategy}. {payload.get('reason') or ''}".strip(),
|
||||
created_at=created_at,
|
||||
status="running",
|
||||
metadata=root["metadata"],
|
||||
)
|
||||
|
||||
elif record.event_type in {"task_team_run_completed", "task_team_run_failed"}:
|
||||
team_success = bool(payload.get("team_success"))
|
||||
root["status"] = "running"
|
||||
root["metadata"] = {
|
||||
**root.get("metadata", {}),
|
||||
"team_success": team_success,
|
||||
"team_run_ids": payload.get("team_run_ids") or [],
|
||||
"team_error": payload.get("error"),
|
||||
}
|
||||
add_event(
|
||||
event_id=_event_id(record, "team"),
|
||||
run_id=root_run_id,
|
||||
kind="run_status",
|
||||
actor_type="system",
|
||||
actor_id="team",
|
||||
actor_name="Task Team",
|
||||
text=payload.get("error") or ("Team completed" if team_success else "Team completed with failed nodes"),
|
||||
created_at=created_at,
|
||||
status="done" if team_success else "error",
|
||||
metadata=dict(payload),
|
||||
)
|
||||
node_results = payload.get("node_results") or []
|
||||
for item in node_results:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
node_run_id = item.get("run_id") or f"{root_run_id}:node:{item.get('node_id')}"
|
||||
status = "done" if item.get("success") else "error"
|
||||
if item.get("finish_reason") == "blocked":
|
||||
status = "waiting"
|
||||
run_record = run_records.get(str(node_run_id))
|
||||
runs[str(node_run_id)] = {
|
||||
"run_id": str(node_run_id),
|
||||
"parent_run_id": root_run_id,
|
||||
"session_id": run_record.session_id if run_record is not None else session_id,
|
||||
"actor_type": "agent",
|
||||
"actor_id": str(item.get("node_id") or "sub-agent"),
|
||||
"actor_name": str(item.get("node_id") or "Sub-agent"),
|
||||
"title": str(item.get("node_id") or "Sub-agent"),
|
||||
"source": "task_team",
|
||||
"status": status,
|
||||
"started_at": run_record.started_at if run_record is not None else created_at,
|
||||
"finished_at": run_record.ended_at if run_record is not None else created_at,
|
||||
"summary": _truncate(str(item.get("output_text") or item.get("error") or "")),
|
||||
"metadata": {
|
||||
"task_id": task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"node_id": item.get("node_id"),
|
||||
"skill_query": item.get("skill_query"),
|
||||
"selected_skill_names": item.get("selected_skill_names") or [],
|
||||
"ephemeral_skill_names": item.get("ephemeral_skill_names") or [],
|
||||
"generated_skill_draft_id": item.get("generated_skill_draft_id"),
|
||||
"generated_skill_name": item.get("generated_skill_name"),
|
||||
"ephemeral_used": bool(item.get("ephemeral_used")),
|
||||
"finish_reason": item.get("finish_reason"),
|
||||
"error": item.get("error"),
|
||||
},
|
||||
}
|
||||
add_event(
|
||||
event_id=f"{_event_id(record, 'node')}:{item.get('node_id')}",
|
||||
run_id=str(node_run_id),
|
||||
parent_run_id=root_run_id,
|
||||
kind="run_finished",
|
||||
actor_type="agent",
|
||||
actor_id=str(item.get("node_id") or "sub-agent"),
|
||||
actor_name=str(item.get("node_id") or "Sub-agent"),
|
||||
text=_truncate(str(item.get("output_text") or item.get("error") or "")),
|
||||
created_at=created_at,
|
||||
status=status,
|
||||
metadata=dict(item),
|
||||
)
|
||||
|
||||
elif record.event_type == "task_synthesis_completed":
|
||||
main_run_id = str(payload.get("main_run_id") or "")
|
||||
if main_run_id:
|
||||
run_record = run_records.get(main_run_id)
|
||||
runs[main_run_id] = {
|
||||
"run_id": main_run_id,
|
||||
"parent_run_id": root_run_id,
|
||||
"session_id": run_record.session_id if run_record is not None else session_id,
|
||||
"actor_type": "agent",
|
||||
"actor_id": "main-agent",
|
||||
"actor_name": "Main Agent",
|
||||
"title": "Final synthesis",
|
||||
"source": "task_synthesis",
|
||||
"status": "done" if (run_record is None or run_record.success) else "error",
|
||||
"started_at": run_record.started_at if run_record is not None else created_at,
|
||||
"finished_at": run_record.ended_at if run_record is not None else created_at,
|
||||
"summary": _truncate(run_record.task_text if run_record is not None else ""),
|
||||
"metadata": {"task_id": task_id, "attempt_index": attempt_index},
|
||||
}
|
||||
add_event(
|
||||
event_id=_event_id(record, "synthesis"),
|
||||
run_id=main_run_id,
|
||||
parent_run_id=root_run_id,
|
||||
kind="run_finished",
|
||||
actor_type="agent",
|
||||
actor_id="main-agent",
|
||||
actor_name="Main Agent",
|
||||
text="Main Agent synthesized the final user-facing answer.",
|
||||
created_at=created_at,
|
||||
status="done",
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
elif record.event_type == "task_validation_snapshotted":
|
||||
validation = payload.get("validation_result") if isinstance(payload.get("validation_result"), dict) else {}
|
||||
accepted = bool(validation.get("accepted"))
|
||||
root["status"] = "done" if accepted or attempt_index == 2 else "waiting"
|
||||
root["finished_at"] = created_at if root["status"] == "done" else None
|
||||
add_event(
|
||||
event_id=_event_id(record, "validation"),
|
||||
run_id=record.run_id or root_run_id,
|
||||
parent_run_id=root_run_id if record.run_id else None,
|
||||
kind="run_status",
|
||||
actor_type="system",
|
||||
actor_id="validator",
|
||||
actor_name="Validator",
|
||||
text=(
|
||||
f"Validation {'passed' if accepted else 'failed'} "
|
||||
f"(score={validation.get('score')})."
|
||||
+ (" Retry scheduled." if payload.get("retry_scheduled") else "")
|
||||
),
|
||||
created_at=created_at,
|
||||
status="done" if accepted else "error",
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
return {
|
||||
"runs": sorted(runs.values(), key=lambda item: item.get("started_at") or ""),
|
||||
"events": sorted(events, key=lambda item: item.get("created_at") or ""),
|
||||
"artifacts": [],
|
||||
"agents": [],
|
||||
}
|
||||
|
||||
|
||||
def _timestamp(value: float | None) -> str:
|
||||
if value is None:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _event_id(record: Any, suffix: str) -> str:
|
||||
return f"session-event:{record.message_id or record.timestamp}:{suffix}"
|
||||
|
||||
|
||||
def _truncate(text: str, limit: int = 800) -> str:
|
||||
cleaned = text.strip()
|
||||
if len(cleaned) <= limit:
|
||||
return cleaned
|
||||
return cleaned[: limit - 1] + "..."
|
||||
@ -1,10 +1,90 @@
|
||||
"""Application service for coordinated team runs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from beaver.coordinator import ExecutionGraph, ExecutionNode, LocalAgentRunner, TeamGraphScheduler, TeamRunResult
|
||||
from beaver.engine import AgentLoop
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver.engine.context import SkillContext
|
||||
|
||||
|
||||
class TeamService:
|
||||
"""Placeholder service for multi-agent execution."""
|
||||
"""Internal service for Beaver-native multi-agent execution."""
|
||||
|
||||
def __init__(self, loop: AgentLoop) -> None:
|
||||
self.loop = loop
|
||||
self.runner = LocalAgentRunner(loop)
|
||||
self.scheduler = TeamGraphScheduler(self.runner)
|
||||
|
||||
async def run_team(
|
||||
self,
|
||||
graph: ExecutionGraph,
|
||||
*,
|
||||
parent_task_id: str | None,
|
||||
parent_session_id: str,
|
||||
parent_run_id: str | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
provider_bundle_factory: Callable[[ExecutionNode], ProviderBundle | None] | None = None,
|
||||
inherited_pinned_skills: list[str] | None = None,
|
||||
inherited_pinned_skill_contexts: list["SkillContext"] | None = None,
|
||||
learning_candidate_enabled: bool = False,
|
||||
) -> TeamRunResult:
|
||||
"""Run a team graph inside the parent task context."""
|
||||
|
||||
self._validate_parent_task(parent_task_id, parent_session_id)
|
||||
result = await self.scheduler.run(
|
||||
graph,
|
||||
parent_task_id=parent_task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=parent_run_id,
|
||||
provider_bundle=provider_bundle,
|
||||
provider_bundle_factory=provider_bundle_factory,
|
||||
inherited_pinned_skills=inherited_pinned_skills,
|
||||
inherited_pinned_skill_contexts=inherited_pinned_skill_contexts,
|
||||
learning_candidate_enabled=learning_candidate_enabled,
|
||||
)
|
||||
self._attach_runs_to_parent_task(result)
|
||||
return result
|
||||
|
||||
def run(self, task: str) -> str:
|
||||
"""Return a placeholder summary until real backends are migrated."""
|
||||
return f"team run placeholder: {task}"
|
||||
"""Compatibility shim for old callers that only expected a string."""
|
||||
|
||||
return f"team service requires run_team() for coordinated execution: {task}"
|
||||
|
||||
def _validate_parent_task(self, parent_task_id: str | None, parent_session_id: str) -> None:
|
||||
if not parent_task_id:
|
||||
return
|
||||
loaded = self.loop.boot()
|
||||
task_service = getattr(loaded, "task_service", None)
|
||||
if task_service is None:
|
||||
raise RuntimeError("TeamService requires task_service when parent_task_id is provided")
|
||||
task = task_service.get_task(parent_task_id)
|
||||
if task is None:
|
||||
raise ValueError(f"Unknown parent_task_id: {parent_task_id}")
|
||||
if task.session_id != parent_session_id:
|
||||
raise ValueError(
|
||||
f"parent_task_id {parent_task_id!r} belongs to session {task.session_id!r}, "
|
||||
f"not {parent_session_id!r}"
|
||||
)
|
||||
|
||||
def _attach_runs_to_parent_task(self, result: TeamRunResult) -> None:
|
||||
if not result.task_id or not result.run_ids:
|
||||
return
|
||||
loaded = self.loop.boot()
|
||||
task_service = getattr(loaded, "task_service", None)
|
||||
if task_service is None or task_service.get_task(result.task_id) is None:
|
||||
return
|
||||
run_store = getattr(loaded, "run_memory_store", None)
|
||||
for run_id in result.run_ids:
|
||||
skill_names: list[str] = []
|
||||
if run_store is not None:
|
||||
for record in run_store.list_runs():
|
||||
if record.run_id == run_id:
|
||||
skill_names = [receipt.skill_name for receipt in record.activated_skills]
|
||||
break
|
||||
task_service.append_run(result.task_id, run_id, skill_names=skill_names)
|
||||
|
||||
@ -83,11 +83,21 @@ class SkillAssembler:
|
||||
|
||||
activated_skills: list[SkillContext] = []
|
||||
for name in selected_names:
|
||||
raw_content = self.loader.load_skill(name)
|
||||
record = self.loader.get_skill_record(name)
|
||||
raw_content = self.loader.load_published_skill(name)
|
||||
content = strip_frontmatter(raw_content).strip() if raw_content else ""
|
||||
if not content:
|
||||
continue
|
||||
activated_skills.append(SkillContext(name=name, content=content))
|
||||
activated_skills.append(
|
||||
SkillContext(
|
||||
name=name,
|
||||
content=content,
|
||||
version=record.version if record is not None else "legacy",
|
||||
content_hash=record.content_hash or "" if record is not None else "",
|
||||
activation_reason="llm_selected",
|
||||
tool_hints=list(record.tool_hints) if record is not None else [],
|
||||
)
|
||||
)
|
||||
|
||||
return SkillAssemblyResult(activated_skills=activated_skills)
|
||||
|
||||
|
||||
@ -1,5 +1,18 @@
|
||||
"""Skill catalog and indexing."""
|
||||
|
||||
from .loader import SkillRecord, SkillsLoader
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
__all__ = ["SkillRecord", "SkillsLoader"]
|
||||
|
||||
|
||||
def __getattr__(name: str) -> Any:
|
||||
if name in {"SkillRecord", "SkillsLoader"}:
|
||||
from .loader import SkillRecord, SkillsLoader
|
||||
|
||||
return {
|
||||
"SkillRecord": SkillRecord,
|
||||
"SkillsLoader": SkillsLoader,
|
||||
}[name]
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
@ -17,11 +17,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from beaver.skills.specs.storage import SkillSpecStore
|
||||
|
||||
from .utils import (
|
||||
check_requirements,
|
||||
escape_xml,
|
||||
@ -39,6 +41,13 @@ class SkillRecord:
|
||||
name: str
|
||||
path: Path
|
||||
source: str
|
||||
version: str = "legacy"
|
||||
content_hash: str | None = None
|
||||
source_kind: str = "legacy"
|
||||
status: str = "active"
|
||||
tool_hints: list[str] = field(default_factory=list)
|
||||
frontmatter: dict[str, Any] = field(default_factory=dict)
|
||||
description: str = ""
|
||||
|
||||
|
||||
class SkillsLoader:
|
||||
@ -50,11 +59,13 @@ class SkillsLoader:
|
||||
*,
|
||||
builtin_skills_dir: str | Path | None = None,
|
||||
extra_dirs: list[str | Path] | None = None,
|
||||
skill_store: SkillSpecStore | None = None,
|
||||
) -> None:
|
||||
self.workspace = Path(workspace)
|
||||
self.workspace_skills = self.workspace / "skills"
|
||||
self.builtin_skills = Path(builtin_skills_dir) if builtin_skills_dir is not None else Path(__file__).resolve().parent.parent / "builtin"
|
||||
self.extra_dirs = [Path(item) for item in (extra_dirs or [])]
|
||||
self.skill_store = skill_store or SkillSpecStore(self.workspace)
|
||||
|
||||
def list_skills(self, *, filter_unavailable: bool = True) -> list[SkillRecord]:
|
||||
"""列出当前可见的 skills。
|
||||
@ -67,14 +78,19 @@ class SkillsLoader:
|
||||
重名 skill 只保留优先级更高的那一个。
|
||||
"""
|
||||
|
||||
ordered_roots: list[tuple[str, Path]] = [
|
||||
("workspace", self.workspace_skills),
|
||||
*[("plugin", path) for path in self.extra_dirs],
|
||||
("builtin", self.builtin_skills),
|
||||
]
|
||||
found: dict[str, SkillRecord] = {}
|
||||
|
||||
for source, root in ordered_roots:
|
||||
for record in self.list_published_skills():
|
||||
if record.name in found:
|
||||
continue
|
||||
if filter_unavailable and not self._record_available(record):
|
||||
continue
|
||||
found[record.name] = record
|
||||
|
||||
for source, root in [
|
||||
*[("plugin", path) for path in self.extra_dirs],
|
||||
("builtin", self.builtin_skills),
|
||||
]:
|
||||
if not root.exists():
|
||||
continue
|
||||
for skill_dir in root.iterdir():
|
||||
@ -84,12 +100,62 @@ class SkillsLoader:
|
||||
name = skill_dir.name
|
||||
if name in found:
|
||||
continue
|
||||
record = SkillRecord(name=name, path=skill_file, source=source)
|
||||
frontmatter, body = parse_frontmatter(skill_file.read_text(encoding="utf-8"))
|
||||
normalized_frontmatter = dict(frontmatter)
|
||||
record = SkillRecord(
|
||||
name=name,
|
||||
path=skill_file,
|
||||
source=source,
|
||||
version="legacy",
|
||||
source_kind=source,
|
||||
tool_hints=self._coerce_tool_names(frontmatter.get("tools")),
|
||||
frontmatter=normalized_frontmatter,
|
||||
description=str(frontmatter.get("description") or summarize_body(body) or name),
|
||||
)
|
||||
if filter_unavailable and not self._record_available(record):
|
||||
continue
|
||||
found[name] = record
|
||||
return list(found.values())
|
||||
|
||||
def list_published_skills(self, *, filter_unavailable: bool = True) -> list[SkillRecord]:
|
||||
"""只列 workspace 中正式 published 的 skill catalog。"""
|
||||
|
||||
results: list[SkillRecord] = []
|
||||
for name in self.skill_store.list_published_skill_names():
|
||||
loaded = self.skill_store.read_published_skill(name)
|
||||
if loaded is None:
|
||||
continue
|
||||
if loaded.version.version == "legacy":
|
||||
path = self.workspace_skills / name / "SKILL.md"
|
||||
else:
|
||||
path = self.workspace_skills / name / "versions" / loaded.version.version / "SKILL.md"
|
||||
record = SkillRecord(
|
||||
name=name,
|
||||
path=path,
|
||||
source="workspace",
|
||||
version=loaded.version.version,
|
||||
content_hash=loaded.version.content_hash,
|
||||
source_kind=str(loaded.version.provenance.get("source_kind") or "workspace"),
|
||||
status=str(loaded.version.review_state or "published"),
|
||||
tool_hints=list(loaded.version.tool_hints),
|
||||
frontmatter=dict(loaded.version.frontmatter),
|
||||
description=str(loaded.version.frontmatter.get("description") or loaded.version.summary or name),
|
||||
)
|
||||
if filter_unavailable and not self._record_available(record):
|
||||
continue
|
||||
results.append(record)
|
||||
return results
|
||||
|
||||
def get_current_version(self, name: str) -> str | None:
|
||||
record = self._find_record(name)
|
||||
return record.version if record is not None else None
|
||||
|
||||
def load_published_skill(self, name: str, version: str | None = None) -> str | None:
|
||||
loaded = self.skill_store.read_published_skill(name, version=version)
|
||||
if loaded is not None:
|
||||
return loaded.content
|
||||
return self.load_skill(name)
|
||||
|
||||
def load_skill(self, name: str) -> str | None:
|
||||
"""按名称加载 skill 原始内容。"""
|
||||
|
||||
@ -106,6 +172,9 @@ class SkillsLoader:
|
||||
def get_skill_metadata(self, name: str) -> dict[str, Any] | None:
|
||||
"""读取 skill frontmatter 元数据。"""
|
||||
|
||||
record = self._find_record(name)
|
||||
if record is not None and record.frontmatter:
|
||||
return dict(record.frontmatter)
|
||||
content = self.load_skill(name)
|
||||
if content is None:
|
||||
return None
|
||||
@ -125,6 +194,10 @@ class SkillsLoader:
|
||||
- 兼容 metadata JSON blob 里的 `tools`
|
||||
"""
|
||||
|
||||
record = self._find_record(name)
|
||||
if record is not None and record.tool_hints:
|
||||
return list(record.tool_hints)
|
||||
|
||||
frontmatter = self.get_skill_metadata(name) or {}
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
names = [
|
||||
@ -143,7 +216,7 @@ class SkillsLoader:
|
||||
|
||||
sections: list[str] = []
|
||||
for name in skill_names:
|
||||
content = self.load_skill(name)
|
||||
content = self.load_published_skill(name)
|
||||
if not content:
|
||||
continue
|
||||
body = strip_frontmatter(content).strip()
|
||||
@ -167,14 +240,15 @@ class SkillsLoader:
|
||||
|
||||
lines = ["<skills>"]
|
||||
for record in skills:
|
||||
frontmatter = self.get_skill_metadata(record.name) or {}
|
||||
frontmatter = record.frontmatter or self.get_skill_metadata(record.name) or {}
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
available = check_requirements(meta_blob)
|
||||
description = frontmatter.get("description") or record.name
|
||||
description = frontmatter.get("description") or record.description or record.name
|
||||
load_hint = f'Use skill_view(name="{record.name}") to load the full skill.'
|
||||
lines.append(f' <skill available="{str(available).lower()}">')
|
||||
lines.append(f" <name>{escape_xml(record.name)}</name>")
|
||||
lines.append(f" <description>{escape_xml(description)}</description>")
|
||||
lines.append(f" <version>{escape_xml(record.version)}</version>")
|
||||
lines.append(f" <load_hint>{escape_xml(load_hint)}</load_hint>")
|
||||
support_files = self.list_skill_supporting_files(record.name)
|
||||
if support_files:
|
||||
@ -205,10 +279,10 @@ class SkillsLoader:
|
||||
|
||||
candidates: list[dict[str, str]] = []
|
||||
for record in self.list_skills(filter_unavailable=True):
|
||||
frontmatter = self.get_skill_metadata(record.name) or {}
|
||||
description = str(frontmatter.get("description") or "").strip()
|
||||
frontmatter = record.frontmatter or self.get_skill_metadata(record.name) or {}
|
||||
description = str(frontmatter.get("description") or record.description or "").strip()
|
||||
if not description:
|
||||
raw_content = self.load_skill(record.name) or ""
|
||||
raw_content = self.load_published_skill(record.name) or ""
|
||||
body = strip_frontmatter(raw_content).strip()
|
||||
if body:
|
||||
description = " ".join(body.splitlines()[:3])[:240].strip()
|
||||
@ -216,6 +290,8 @@ class SkillsLoader:
|
||||
{
|
||||
"name": record.name,
|
||||
"description": description or record.name,
|
||||
"version": record.version,
|
||||
"content_hash": record.content_hash or "",
|
||||
}
|
||||
)
|
||||
return candidates
|
||||
@ -249,7 +325,7 @@ class SkillsLoader:
|
||||
if record is None:
|
||||
return None
|
||||
if not self._record_available(record):
|
||||
frontmatter = self.get_skill_metadata(name) or {}
|
||||
frontmatter = record.frontmatter or self.get_skill_metadata(name) or {}
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
missing = get_missing_requirements(meta_blob)
|
||||
detail = f" Missing requirements: {missing}." if missing else ""
|
||||
@ -274,7 +350,7 @@ class SkillsLoader:
|
||||
|
||||
result: list[str] = []
|
||||
for record in self.list_skills(filter_unavailable=True):
|
||||
frontmatter = self.get_skill_metadata(record.name) or {}
|
||||
frontmatter = record.frontmatter or self.get_skill_metadata(record.name) or {}
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
if meta_blob.get("always") or str(frontmatter.get("always", "")).lower() == "true":
|
||||
result.append(record.name)
|
||||
@ -326,3 +402,8 @@ class SkillsLoader:
|
||||
if record is None:
|
||||
return False
|
||||
return self._record_available(record)
|
||||
|
||||
|
||||
def summarize_body(body: str) -> str:
|
||||
cleaned = " ".join(line.strip() for line in body.splitlines()[:3] if line.strip()).strip()
|
||||
return cleaned[:240]
|
||||
|
||||
@ -1,2 +1,6 @@
|
||||
"""Draft skills generated before review."""
|
||||
"""Skill draft services."""
|
||||
|
||||
from .service import DraftService
|
||||
|
||||
__all__ = ["DraftService"]
|
||||
|
||||
131
app-instance/backend/beaver/skills/drafts/service.py
Normal file
131
app-instance/backend/beaver/skills/drafts/service.py
Normal file
@ -0,0 +1,131 @@
|
||||
"""Draft lifecycle for Beaver skills."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.skills.specs import SkillDraft, SkillSpecStore
|
||||
|
||||
|
||||
class DraftService:
|
||||
def __init__(self, store: SkillSpecStore) -> None:
|
||||
self.store = store
|
||||
|
||||
def create_new_skill_draft(
|
||||
self,
|
||||
*,
|
||||
skill_name: str,
|
||||
proposed_content: str,
|
||||
proposed_frontmatter: dict,
|
||||
created_by: str,
|
||||
reason: str,
|
||||
trigger_run_id: str | None = None,
|
||||
trigger_session_id: str | None = None,
|
||||
evidence_refs: list[dict] | None = None,
|
||||
) -> SkillDraft:
|
||||
draft = SkillDraft(
|
||||
draft_id=uuid4().hex,
|
||||
skill_name=skill_name,
|
||||
base_version=None,
|
||||
proposed_content=proposed_content,
|
||||
proposed_frontmatter=dict(proposed_frontmatter),
|
||||
created_at=_utc_now(),
|
||||
created_by=created_by,
|
||||
trigger_run_id=trigger_run_id,
|
||||
trigger_session_id=trigger_session_id,
|
||||
reason=reason,
|
||||
evidence_refs=list(evidence_refs or []),
|
||||
proposal_kind="new_skill",
|
||||
)
|
||||
self.store.write_draft(draft)
|
||||
return draft
|
||||
|
||||
def create_revision_draft(
|
||||
self,
|
||||
*,
|
||||
skill_name: str,
|
||||
base_version: str | None,
|
||||
proposed_content: str,
|
||||
proposed_frontmatter: dict,
|
||||
created_by: str,
|
||||
reason: str,
|
||||
trigger_run_id: str | None = None,
|
||||
trigger_session_id: str | None = None,
|
||||
evidence_refs: list[dict] | None = None,
|
||||
) -> SkillDraft:
|
||||
draft = SkillDraft(
|
||||
draft_id=uuid4().hex,
|
||||
skill_name=skill_name,
|
||||
base_version=base_version,
|
||||
proposed_content=proposed_content,
|
||||
proposed_frontmatter=dict(proposed_frontmatter),
|
||||
created_at=_utc_now(),
|
||||
created_by=created_by,
|
||||
trigger_run_id=trigger_run_id,
|
||||
trigger_session_id=trigger_session_id,
|
||||
reason=reason,
|
||||
evidence_refs=list(evidence_refs or []),
|
||||
proposal_kind="revise_skill",
|
||||
)
|
||||
self.store.write_draft(draft)
|
||||
return draft
|
||||
|
||||
def create_merge_draft(
|
||||
self,
|
||||
*,
|
||||
skill_name: str,
|
||||
base_version: str | None,
|
||||
proposed_content: str,
|
||||
proposed_frontmatter: dict,
|
||||
created_by: str,
|
||||
reason: str,
|
||||
evidence_refs: list[dict] | None = None,
|
||||
) -> SkillDraft:
|
||||
draft = self.create_revision_draft(
|
||||
skill_name=skill_name,
|
||||
base_version=base_version,
|
||||
proposed_content=proposed_content,
|
||||
proposed_frontmatter=proposed_frontmatter,
|
||||
created_by=created_by,
|
||||
reason=reason,
|
||||
evidence_refs=evidence_refs,
|
||||
)
|
||||
draft.proposal_kind = "merge_skills"
|
||||
self.store.write_draft(draft)
|
||||
return draft
|
||||
|
||||
def create_retire_proposal(
|
||||
self,
|
||||
*,
|
||||
skill_name: str,
|
||||
base_version: str | None,
|
||||
created_by: str,
|
||||
reason: str,
|
||||
evidence_refs: list[dict] | None = None,
|
||||
) -> SkillDraft:
|
||||
draft = SkillDraft(
|
||||
draft_id=uuid4().hex,
|
||||
skill_name=skill_name,
|
||||
base_version=base_version,
|
||||
proposed_content="",
|
||||
proposed_frontmatter={},
|
||||
created_at=_utc_now(),
|
||||
created_by=created_by,
|
||||
reason=reason,
|
||||
evidence_refs=list(evidence_refs or []),
|
||||
proposal_kind="retire_skill",
|
||||
)
|
||||
self.store.write_draft(draft)
|
||||
return draft
|
||||
|
||||
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
|
||||
return self.store.list_drafts(skill_name)
|
||||
|
||||
def get_draft(self, skill_name: str, draft_id: str) -> SkillDraft | None:
|
||||
return self.store.read_draft(skill_name, draft_id)
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
24
app-instance/backend/beaver/skills/learning/__init__.py
Normal file
24
app-instance/backend/beaver/skills/learning/__init__.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""Skill learning loop helpers."""
|
||||
|
||||
from .evidence import EvidencePacket, EvidenceSelector
|
||||
from .eval import SkillDraftEvaluator
|
||||
from .missing_skill import MissingSkillDraftResult, MissingSkillSynthesizer
|
||||
from .pipeline import SkillLearningPipelineService
|
||||
from .service import RunReceiptContext, SkillLearningService
|
||||
from .synthesizer import SkillDraftSynthesizer
|
||||
from .worker import SkillLearningWorker, SkillLearningWorkerConfig, SkillLearningWorkerResult
|
||||
|
||||
__all__ = [
|
||||
"EvidencePacket",
|
||||
"EvidenceSelector",
|
||||
"SkillDraftEvaluator",
|
||||
"MissingSkillDraftResult",
|
||||
"MissingSkillSynthesizer",
|
||||
"RunReceiptContext",
|
||||
"SkillLearningPipelineService",
|
||||
"SkillDraftSynthesizer",
|
||||
"SkillLearningService",
|
||||
"SkillLearningWorker",
|
||||
"SkillLearningWorkerConfig",
|
||||
"SkillLearningWorkerResult",
|
||||
]
|
||||
121
app-instance/backend/beaver/skills/learning/eval.py
Normal file
121
app-instance/backend/beaver/skills/learning/eval.py
Normal file
@ -0,0 +1,121 @@
|
||||
"""Lightweight replay/eval reports for skill drafts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.runs import RunMemoryStore
|
||||
from beaver.memory.skills import SkillDraftEvalReport, SkillLearningCandidate
|
||||
from beaver.skills.specs import SkillDraft
|
||||
|
||||
|
||||
class SkillDraftEvaluator:
|
||||
"""Builds a bounded eval report without writing user-visible sessions."""
|
||||
|
||||
def __init__(self, run_store: RunMemoryStore) -> None:
|
||||
self.run_store = run_store
|
||||
|
||||
async def evaluate(
|
||||
self,
|
||||
*,
|
||||
candidate: SkillLearningCandidate,
|
||||
draft: SkillDraft,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
) -> SkillDraftEvalReport:
|
||||
if provider_bundle is None or provider_bundle.main_provider is None:
|
||||
return self._skipped(candidate, draft)
|
||||
|
||||
runs_by_id = {record.run_id: record for record in self.run_store.list_runs()}
|
||||
cases: list[dict] = []
|
||||
for run_id in candidate.source_run_ids[:8]:
|
||||
record = runs_by_id.get(run_id)
|
||||
if record is None:
|
||||
continue
|
||||
baseline = _score_from_validation(record.validation_result, record.success)
|
||||
candidate_score = _candidate_score(baseline, draft)
|
||||
cases.append(
|
||||
{
|
||||
"run_id": run_id,
|
||||
"session_id": record.session_id,
|
||||
"baseline_score": baseline,
|
||||
"candidate_score": candidate_score,
|
||||
"delta": round(candidate_score - baseline, 4),
|
||||
}
|
||||
)
|
||||
if not cases:
|
||||
cases.append(
|
||||
{
|
||||
"run_id": "",
|
||||
"session_id": "",
|
||||
"baseline_score": 0.75,
|
||||
"candidate_score": _candidate_score(0.75, draft),
|
||||
"delta": round(_candidate_score(0.75, draft) - 0.75, 4),
|
||||
}
|
||||
)
|
||||
|
||||
baseline_avg = sum(item["baseline_score"] for item in cases) / len(cases)
|
||||
candidate_avg = sum(item["candidate_score"] for item in cases) / len(cases)
|
||||
regressions = [item for item in cases if item["candidate_score"] < item["baseline_score"]]
|
||||
improved = [item for item in cases if item["candidate_score"] > item["baseline_score"]]
|
||||
unchanged = len(cases) - len(regressions) - len(improved)
|
||||
score_delta = candidate_avg - baseline_avg
|
||||
passed = not (len(regressions) > 0 and score_delta <= 0) and candidate_avg >= 0.75
|
||||
return SkillDraftEvalReport(
|
||||
report_id=uuid4().hex,
|
||||
skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
candidate_id=candidate.candidate_id,
|
||||
passed=passed,
|
||||
baseline_score_avg=round(baseline_avg, 4),
|
||||
candidate_score_avg=round(candidate_avg, 4),
|
||||
score_delta=round(score_delta, 4),
|
||||
regression_count=len(regressions),
|
||||
improved_count=len(improved),
|
||||
unchanged_count=unchanged,
|
||||
cases=cases,
|
||||
status="completed",
|
||||
created_at=_utc_now(),
|
||||
)
|
||||
|
||||
def _skipped(self, candidate: SkillLearningCandidate, draft: SkillDraft) -> SkillDraftEvalReport:
|
||||
return SkillDraftEvalReport(
|
||||
report_id=uuid4().hex,
|
||||
skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
candidate_id=candidate.candidate_id,
|
||||
passed=True,
|
||||
baseline_score_avg=0.0,
|
||||
candidate_score_avg=0.0,
|
||||
score_delta=0.0,
|
||||
regression_count=0,
|
||||
improved_count=0,
|
||||
unchanged_count=0,
|
||||
cases=[],
|
||||
status="skipped_provider_unavailable",
|
||||
created_at=_utc_now(),
|
||||
)
|
||||
|
||||
|
||||
def _score_from_validation(validation: dict | None, success: bool) -> float:
|
||||
if isinstance(validation, dict) and "score" in validation:
|
||||
try:
|
||||
return max(0.0, min(1.0, float(validation.get("score") or 0.0)))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return 0.8 if success else 0.4
|
||||
|
||||
|
||||
def _candidate_score(baseline: float, draft: SkillDraft) -> float:
|
||||
content = draft.proposed_content.strip()
|
||||
if not content and draft.proposal_kind != "retire_skill":
|
||||
return 0.0
|
||||
if "regression" in content.lower():
|
||||
return max(0.0, baseline - 0.2)
|
||||
return min(1.0, max(0.75, baseline + 0.05))
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
76
app-instance/backend/beaver/skills/learning/evidence.py
Normal file
76
app-instance/backend/beaver/skills/learning/evidence.py
Normal file
@ -0,0 +1,76 @@
|
||||
"""Evidence selection for skill learning."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.session.manager import SessionManager
|
||||
from beaver.memory.runs.store import RunMemoryStore
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EvidencePacket:
|
||||
run_ids: list[str]
|
||||
session_ids: list[str]
|
||||
task_summaries: list[str]
|
||||
session_excerpts: list[str]
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class EvidenceSelector:
|
||||
def __init__(self, run_store: RunMemoryStore, session_manager: SessionManager | None = None) -> None:
|
||||
self.run_store = run_store
|
||||
self.session_manager = session_manager
|
||||
|
||||
def select_runs_for_revision(self, skill_name: str, version: str, limit: int = 5) -> list[str]:
|
||||
runs = self.run_store.list_runs_by_skill(skill_name, version=version, limit=limit)
|
||||
return [record.run_id for record in runs]
|
||||
|
||||
def select_runs_for_new_skill(self, theme: str, limit: int = 5) -> list[str]:
|
||||
lowered = theme.lower().strip()
|
||||
matches = []
|
||||
for record in self.run_store.list_runs():
|
||||
if lowered and lowered not in record.task_text.lower():
|
||||
continue
|
||||
matches.append(record.run_id)
|
||||
return matches[-limit:]
|
||||
|
||||
def build_evidence_packet(self, run_ids: list[str], session_ids: list[str] | None = None) -> EvidencePacket:
|
||||
runs_by_id = {record.run_id: record for record in self.run_store.list_runs()}
|
||||
resolved_run_ids: list[str] = []
|
||||
resolved_session_ids: list[str] = list(dict.fromkeys(session_ids or []))
|
||||
task_summaries: list[str] = []
|
||||
session_excerpts: list[str] = []
|
||||
for run_id in run_ids:
|
||||
record = runs_by_id.get(run_id)
|
||||
if record is None:
|
||||
continue
|
||||
resolved_run_ids.append(run_id)
|
||||
if record.session_id not in resolved_session_ids:
|
||||
resolved_session_ids.append(record.session_id)
|
||||
summary = record.task_text.strip()
|
||||
if summary:
|
||||
task_summaries.append(summary[:400])
|
||||
if self.session_manager is not None:
|
||||
excerpt = self._session_excerpt(record.session_id, run_id)
|
||||
if excerpt:
|
||||
session_excerpts.append(excerpt)
|
||||
return EvidencePacket(
|
||||
run_ids=resolved_run_ids,
|
||||
session_ids=resolved_session_ids,
|
||||
task_summaries=task_summaries[:8],
|
||||
session_excerpts=session_excerpts[:6],
|
||||
metadata={"bounded": True},
|
||||
)
|
||||
|
||||
def _session_excerpt(self, session_id: str, run_id: str) -> str:
|
||||
if self.session_manager is None:
|
||||
return ""
|
||||
events = self.session_manager.get_run_event_records(session_id, run_id)
|
||||
visible: list[str] = []
|
||||
for event in events:
|
||||
if not event.context_visible or not event.content:
|
||||
continue
|
||||
visible.append(f"{event.role}: {event.content.strip()}")
|
||||
return "\n".join(visible[:12])[:2000]
|
||||
166
app-instance/backend/beaver/skills/learning/missing_skill.py
Normal file
166
app-instance/backend/beaver/skills/learning/missing_skill.py
Normal file
@ -0,0 +1,166 @@
|
||||
"""Synthesize draft-only skills for missing sub-agent guidance."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.specs import SkillDraft
|
||||
from beaver.skills.specs.serialization import canonical_hash
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver.tasks.models import TaskRecord
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class MissingSkillDraftResult:
|
||||
draft: SkillDraft
|
||||
skill_context: SkillContext
|
||||
|
||||
|
||||
class MissingSkillSynthesizer:
|
||||
"""Create a draft skill and an ephemeral SkillContext for the current run."""
|
||||
|
||||
async def synthesize(
|
||||
self,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
node_id: str,
|
||||
node_task: str,
|
||||
skill_query: str,
|
||||
required_capabilities: list[str],
|
||||
provider_bundle: ProviderBundle,
|
||||
draft_service: DraftService,
|
||||
) -> MissingSkillDraftResult:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
payload = self._fallback_payload(skill_query=skill_query, node_task=node_task, capabilities=required_capabilities)
|
||||
try:
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You create concise Beaver skill drafts. Return only JSON with keys: "
|
||||
"skill_name, description, content, tags."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Create a procedural skill draft for this missing Task sub-agent guidance.\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Node id: {node_id}\n"
|
||||
f"Node task:\n{node_task}\n\n"
|
||||
f"Skill query:\n{skill_query}\n"
|
||||
f"Required capabilities: {required_capabilities}\n\n"
|
||||
"The content must be actionable guidance for a temporary sub-agent. "
|
||||
"Do not include implementation claims or publish metadata."
|
||||
),
|
||||
},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=1200,
|
||||
temperature=0,
|
||||
)
|
||||
payload = self._parse_payload(response.content or "") or payload
|
||||
except Exception:
|
||||
payload = payload
|
||||
|
||||
skill_name = _slug(str(payload.get("skill_name") or skill_query or node_id))
|
||||
content = str(payload.get("content") or "").strip()
|
||||
if not content:
|
||||
content = str(self._fallback_payload(skill_query=skill_query, node_task=node_task, capabilities=required_capabilities)["content"])
|
||||
frontmatter = {
|
||||
"description": str(payload.get("description") or f"Draft guidance for {skill_query or node_id}").strip(),
|
||||
"tags": [str(item) for item in payload.get("tags") or ["generated", "task-sub-agent"]],
|
||||
"metadata": {
|
||||
"origin": "missing_task_subagent_skill",
|
||||
"task_id": task.task_id,
|
||||
"node_id": node_id,
|
||||
"attempt_index": attempt_index,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": list(required_capabilities),
|
||||
},
|
||||
}
|
||||
draft = draft_service.create_new_skill_draft(
|
||||
skill_name=skill_name,
|
||||
proposed_content=content,
|
||||
proposed_frontmatter=frontmatter,
|
||||
created_by="task-skill-resolver",
|
||||
reason="generated_for_missing_task_subagent_skill",
|
||||
trigger_session_id=task.session_id,
|
||||
evidence_refs=[
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"session_id": task.session_id,
|
||||
"attempt_index": attempt_index,
|
||||
"node_id": node_id,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": list(required_capabilities),
|
||||
}
|
||||
],
|
||||
)
|
||||
context = SkillContext(
|
||||
name=f"draft:{draft.skill_name}",
|
||||
content=draft.proposed_content,
|
||||
version=f"draft:{draft.draft_id}",
|
||||
content_hash=canonical_hash(draft.proposed_content),
|
||||
activation_reason="generated_missing_skill",
|
||||
tool_hints=[],
|
||||
)
|
||||
return MissingSkillDraftResult(draft=draft, skill_context=context)
|
||||
|
||||
@staticmethod
|
||||
def _parse_payload(text: str) -> dict[str, Any] | None:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
lines = cleaned.splitlines()
|
||||
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
||||
cleaned = "\n".join(lines[1:-1]).strip()
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
try:
|
||||
payload = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
@staticmethod
|
||||
def _fallback_payload(*, skill_query: str, node_task: str, capabilities: list[str]) -> dict[str, Any]:
|
||||
title = skill_query or node_task or "task subagent guidance"
|
||||
capability_lines = "\n".join(f"- {item}" for item in capabilities) or "- Follow the node task precisely."
|
||||
return {
|
||||
"skill_name": _slug(title),
|
||||
"description": f"Draft guidance for {title}.",
|
||||
"tags": ["generated", "task-sub-agent"],
|
||||
"content": (
|
||||
f"# {title}\n\n"
|
||||
"Use this draft guidance only for the current delegated sub-task.\n\n"
|
||||
"## Objective\n"
|
||||
f"{node_task or title}\n\n"
|
||||
"## Capabilities to apply\n"
|
||||
f"{capability_lines}\n\n"
|
||||
"## Output\n"
|
||||
"Return concise evidence, decisions, and unresolved risks for the main Agent to synthesize."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _slug(value: str) -> str:
|
||||
cleaned = re.sub(r"[^a-zA-Z0-9]+", "-", value.strip().lower()).strip("-")
|
||||
return cleaned[:64].strip("-") or "generated-task-subagent-skill"
|
||||
354
app-instance/backend/beaver/skills/learning/pipeline.py
Normal file
354
app-instance/backend/beaver/skills/learning/pipeline.py
Normal file
@ -0,0 +1,354 @@
|
||||
"""Manual skill learning pipeline orchestration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.skills import SkillDraftEvalReport, SkillDraftSafetyReport, SkillLearningCandidate, SkillLearningStore
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning.eval import SkillDraftEvaluator
|
||||
from beaver.skills.learning.service import SkillLearningService
|
||||
from beaver.skills.learning.safety import SkillDraftSafetyChecker
|
||||
from beaver.skills.publisher import SkillPublisher
|
||||
from beaver.skills.reviews import ReviewService
|
||||
from beaver.skills.specs import SkillDraft, SkillReviewRecord, SkillReviewState, SkillSpec, SkillVersion
|
||||
|
||||
|
||||
class SkillLearningPipelineService:
|
||||
"""Coordinates candidate -> draft -> review -> publish lifecycle."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
learning_store: SkillLearningStore,
|
||||
learning_service: SkillLearningService,
|
||||
draft_service: DraftService,
|
||||
review_service: ReviewService,
|
||||
publisher: SkillPublisher,
|
||||
safety_checker: SkillDraftSafetyChecker | None = None,
|
||||
evaluator: SkillDraftEvaluator | None = None,
|
||||
) -> None:
|
||||
self.learning_store = learning_store
|
||||
self.learning_service = learning_service
|
||||
self.draft_service = draft_service
|
||||
self.review_service = review_service
|
||||
self.publisher = publisher
|
||||
self.safety_checker = safety_checker or SkillDraftSafetyChecker()
|
||||
self.evaluator = evaluator
|
||||
|
||||
def list_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
|
||||
return self.learning_store.list_learning_candidates(status=status)
|
||||
|
||||
def get_candidate(self, candidate_id: str) -> SkillLearningCandidate:
|
||||
for candidate in self.learning_store.list_learning_candidates():
|
||||
if candidate.candidate_id == candidate_id:
|
||||
return candidate
|
||||
raise ValueError(f"Unknown learning candidate: {candidate_id}")
|
||||
|
||||
async def synthesize_draft(
|
||||
self,
|
||||
candidate_id: str,
|
||||
*,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> SkillDraft:
|
||||
draft = await self.learning_service.synthesize_draft(candidate_id, provider_bundle)
|
||||
self.mark_draft_synthesized(candidate_id, draft)
|
||||
return draft
|
||||
|
||||
async def regenerate_draft(
|
||||
self,
|
||||
candidate_id: str,
|
||||
*,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> SkillDraft:
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"synthesizing",
|
||||
event_type="draft_synthesis_started",
|
||||
last_error=None,
|
||||
)
|
||||
return await self.synthesize_draft(candidate_id, provider_bundle=provider_bundle)
|
||||
|
||||
def mark_candidate_queued(self, candidate_id: str) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"queued",
|
||||
event_type="candidate_queued",
|
||||
last_error=None,
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def mark_candidate_synthesizing(self, candidate_id: str) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"synthesizing",
|
||||
event_type="draft_synthesis_started",
|
||||
last_error=None,
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def mark_draft_synthesized(self, candidate_id: str, draft: SkillDraft) -> SkillLearningCandidate:
|
||||
candidate = self.get_candidate(candidate_id)
|
||||
evidence = dict(candidate.evidence)
|
||||
evidence["draft_id"] = draft.draft_id
|
||||
evidence["draft_skill_name"] = draft.skill_name
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"draft_ready",
|
||||
event_type="draft_synthesis_completed",
|
||||
evidence=evidence,
|
||||
draft_id=draft.draft_id,
|
||||
draft_skill_name=draft.skill_name,
|
||||
risk_level=candidate.risk_level,
|
||||
last_error=None,
|
||||
payload={"draft_id": draft.draft_id, "skill_name": draft.skill_name},
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def mark_candidate_failed(
|
||||
self,
|
||||
candidate_id: str,
|
||||
error: str,
|
||||
*,
|
||||
retry_count: int,
|
||||
terminal: bool,
|
||||
) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"failed" if terminal else "open",
|
||||
event_type="failed",
|
||||
retry_count=retry_count,
|
||||
last_error=error,
|
||||
payload={"error": error, "terminal": terminal, "retry_count": retry_count},
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def mark_candidate_superseded(self, candidate_id: str, reason: str) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"superseded",
|
||||
event_type="superseded",
|
||||
last_error=reason,
|
||||
payload={"reason": reason},
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
|
||||
return self.draft_service.list_drafts(skill_name)
|
||||
|
||||
def get_draft(self, skill_name: str, draft_id: str) -> SkillDraft:
|
||||
draft = self.draft_service.get_draft(skill_name, draft_id)
|
||||
if draft is None:
|
||||
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
|
||||
return draft
|
||||
|
||||
def submit_review(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
requested_by: str = "system",
|
||||
notes: str = "",
|
||||
) -> SkillReviewRecord:
|
||||
safety = self.get_safety_report(skill_name, draft_id)
|
||||
if safety is not None and (not safety.passed or safety.risk_level == "critical"):
|
||||
raise ValueError("Draft cannot enter review because safety check failed")
|
||||
return self.review_service.submit_for_review(
|
||||
skill_name,
|
||||
draft_id,
|
||||
reviewer_request=notes,
|
||||
requested_by=requested_by,
|
||||
)
|
||||
|
||||
def approve(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
reviewer: str = "system",
|
||||
notes: str = "",
|
||||
) -> SkillReviewRecord:
|
||||
review = self.review_service.approve(skill_name, draft_id, reviewer=reviewer, notes=notes)
|
||||
self._mark_candidate_by_draft(skill_name, draft_id, "approved", "approved")
|
||||
return review
|
||||
|
||||
def reject(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
reviewer: str = "system",
|
||||
notes: str = "",
|
||||
) -> SkillReviewRecord:
|
||||
review = self.review_service.reject(skill_name, draft_id, reviewer=reviewer, notes=notes)
|
||||
self._mark_candidate_by_draft(skill_name, draft_id, "rejected", "rejected")
|
||||
return review
|
||||
|
||||
def publish(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
publisher: str = "system",
|
||||
notes: str = "",
|
||||
confirm_high_risk: bool = False,
|
||||
) -> SkillVersion | SkillSpec:
|
||||
draft = self.get_draft(skill_name, draft_id)
|
||||
self._validate_publish_gates(draft, confirm_high_risk=confirm_high_risk)
|
||||
if draft.proposal_kind == "retire_skill":
|
||||
result = self.publisher.apply_retire_proposal(skill_name, draft_id, actor=publisher, notes=notes)
|
||||
else:
|
||||
result = self.publisher.publish(skill_name, draft_id, publisher=publisher, notes=notes)
|
||||
self._mark_candidate_by_draft(skill_name, draft_id, "published", "published")
|
||||
return result
|
||||
|
||||
def rollback(
|
||||
self,
|
||||
skill_name: str,
|
||||
target_version: str,
|
||||
*,
|
||||
actor: str = "system",
|
||||
reason: str = "",
|
||||
) -> SkillSpec:
|
||||
return self.publisher.rollback(skill_name, target_version, actor=actor, reason=reason or "manual rollback")
|
||||
|
||||
def disable(
|
||||
self,
|
||||
skill_name: str,
|
||||
*,
|
||||
actor: str = "system",
|
||||
reason: str = "",
|
||||
) -> SkillSpec:
|
||||
return self.publisher.disable(skill_name, actor=actor, reason=reason or "manual disable")
|
||||
|
||||
def reviews_for_draft(self, skill_name: str, draft_id: str) -> list[SkillReviewRecord]:
|
||||
return self.review_service.store.list_reviews(skill_name, draft_id=draft_id)
|
||||
|
||||
def check_safety(self, skill_name: str, draft_id: str) -> SkillDraftSafetyReport:
|
||||
draft = self.get_draft(skill_name, draft_id)
|
||||
report = self.safety_checker.check(draft)
|
||||
self.learning_store.write_safety_report(report)
|
||||
status = "safety_failed" if not report.passed or report.risk_level == "critical" else "draft_ready"
|
||||
current = self._candidate_by_draft(skill_name, draft_id)
|
||||
if current is not None and current.status == "eval_failed" and status == "draft_ready":
|
||||
status = "eval_failed"
|
||||
self._mark_candidate_by_draft(
|
||||
skill_name,
|
||||
draft_id,
|
||||
status,
|
||||
"safety_checked",
|
||||
safety_report_id=report.report_id,
|
||||
risk_level=report.risk_level,
|
||||
last_error="; ".join(report.blocked_reasons) if status == "safety_failed" else None,
|
||||
)
|
||||
return report
|
||||
|
||||
def get_safety_report(self, skill_name: str, draft_id: str) -> SkillDraftSafetyReport | None:
|
||||
return self.learning_store.get_safety_report(skill_name, draft_id)
|
||||
|
||||
def get_eval_report(self, skill_name: str, draft_id: str) -> SkillDraftEvalReport | None:
|
||||
return self.learning_store.get_eval_report(skill_name, draft_id)
|
||||
|
||||
async def evaluate_draft(
|
||||
self,
|
||||
candidate_id: str,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
) -> SkillDraftEvalReport:
|
||||
draft = self.get_draft(skill_name, draft_id)
|
||||
candidate = self.get_candidate(candidate_id)
|
||||
evaluator = self.evaluator or SkillDraftEvaluator(self.learning_service.run_store)
|
||||
report = await evaluator.evaluate(candidate=candidate, draft=draft, provider_bundle=provider_bundle)
|
||||
self.learning_store.write_eval_report(report)
|
||||
if report.status == "skipped_provider_unavailable":
|
||||
status = "draft_ready"
|
||||
error = "eval skipped: provider unavailable"
|
||||
elif report.passed:
|
||||
status = "draft_ready"
|
||||
error = None
|
||||
else:
|
||||
status = "eval_failed"
|
||||
error = "eval failed"
|
||||
current = self._candidate_by_draft(skill_name, draft_id)
|
||||
if current is not None and current.status == "safety_failed" and status == "draft_ready":
|
||||
status = "safety_failed"
|
||||
error = current.last_error
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
status,
|
||||
event_type="eval_completed",
|
||||
eval_report_id=report.report_id,
|
||||
last_error=error,
|
||||
payload=report.to_dict(),
|
||||
)
|
||||
return report
|
||||
|
||||
def _validate_publish_gates(self, draft: SkillDraft, *, confirm_high_risk: bool) -> None:
|
||||
reviews = self.reviews_for_draft(draft.skill_name, draft.draft_id)
|
||||
if not any(review.status == SkillReviewState.APPROVED.value for review in reviews):
|
||||
raise ValueError("Draft must have an approved review before publish")
|
||||
safety = self.get_safety_report(draft.skill_name, draft.draft_id)
|
||||
if safety is None:
|
||||
raise ValueError("Draft requires a passing safety report before publish")
|
||||
if not safety.passed:
|
||||
raise ValueError("Draft safety report did not pass")
|
||||
if safety.risk_level == "critical":
|
||||
raise ValueError("Critical risk drafts cannot be published")
|
||||
if safety.risk_level == "high" and not confirm_high_risk:
|
||||
raise ValueError("High risk draft publish requires confirm_high_risk=true")
|
||||
eval_report = self.get_eval_report(draft.skill_name, draft.draft_id)
|
||||
if eval_report is not None and eval_report.status != "skipped_provider_unavailable" and not eval_report.passed:
|
||||
raise ValueError("Draft eval report did not pass")
|
||||
|
||||
def _mark_candidate_by_draft(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
status: str,
|
||||
event_type: str,
|
||||
**updates: object,
|
||||
) -> SkillLearningCandidate | None:
|
||||
candidate = self._candidate_by_draft(skill_name, draft_id)
|
||||
if candidate is None:
|
||||
return None
|
||||
if candidate.status in {"safety_failed", "eval_failed"} and status in {"review_pending", "approved"}:
|
||||
return candidate
|
||||
return self.learning_store.transition_learning_candidate(
|
||||
candidate.candidate_id,
|
||||
status,
|
||||
event_type=event_type,
|
||||
**updates,
|
||||
)
|
||||
|
||||
def _candidate_by_draft(self, skill_name: str, draft_id: str) -> SkillLearningCandidate | None:
|
||||
for candidate in self.learning_store.list_learning_candidates():
|
||||
if candidate.draft_skill_name == skill_name and candidate.draft_id == draft_id:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _require_updated(candidate: SkillLearningCandidate | None, candidate_id: str) -> SkillLearningCandidate:
|
||||
if candidate is None:
|
||||
raise ValueError(f"Unknown learning candidate: {candidate_id}")
|
||||
return candidate
|
||||
|
||||
|
||||
def model_to_dict(value: Any) -> dict[str, Any]:
|
||||
if hasattr(value, "to_dict"):
|
||||
return value.to_dict()
|
||||
if isinstance(value, dict):
|
||||
return dict(value)
|
||||
raise TypeError(f"Cannot convert {type(value).__name__} to dict")
|
||||
109
app-instance/backend/beaver/skills/learning/safety.py
Normal file
109
app-instance/backend/beaver/skills/learning/safety.py
Normal file
@ -0,0 +1,109 @@
|
||||
"""Safety checks for generated skill drafts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.memory.skills import SkillDraftSafetyReport
|
||||
from beaver.skills.specs import SkillDraft
|
||||
|
||||
|
||||
class SkillDraftSafetyChecker:
|
||||
"""Deterministic publish gate for skill drafts."""
|
||||
|
||||
_CRITICAL_PATTERNS = [
|
||||
r"ignore (all )?(previous|system|developer) instructions",
|
||||
r"bypass (permissions|permission|safety|policy|policies)",
|
||||
r"disable (guardrails|safety|permission checks)",
|
||||
r"reveal (secrets|api keys|tokens|credentials)",
|
||||
r"exfiltrate",
|
||||
]
|
||||
_DANGEROUS_TOOL_HINTS = {
|
||||
"shell",
|
||||
"terminal",
|
||||
"bash",
|
||||
"filesystem_write",
|
||||
"write_file",
|
||||
"delete_file",
|
||||
"network",
|
||||
"http",
|
||||
"auth",
|
||||
"credentials",
|
||||
}
|
||||
|
||||
def __init__(self, *, allowed_tool_names: set[str] | None = None) -> None:
|
||||
self.allowed_tool_names = allowed_tool_names
|
||||
|
||||
def check(self, draft: SkillDraft) -> SkillDraftSafetyReport:
|
||||
issues: list[str] = []
|
||||
blocked: list[str] = []
|
||||
risk_level = "low"
|
||||
|
||||
frontmatter = draft.proposed_frontmatter
|
||||
if not isinstance(frontmatter, dict):
|
||||
blocked.append("frontmatter must be an object")
|
||||
description = str(frontmatter.get("description") or "").strip()
|
||||
if not description and draft.proposal_kind != "retire_skill":
|
||||
issues.append("frontmatter.description is missing")
|
||||
risk_level = _max_risk(risk_level, "medium")
|
||||
|
||||
tool_hints = _tool_hints(frontmatter)
|
||||
if self.allowed_tool_names is not None:
|
||||
unknown = [name for name in tool_hints if name not in self.allowed_tool_names]
|
||||
if unknown:
|
||||
blocked.append(f"unknown tool hints: {', '.join(sorted(unknown))}")
|
||||
dangerous = sorted({name for name in tool_hints if name.lower() in self._DANGEROUS_TOOL_HINTS})
|
||||
if dangerous:
|
||||
issues.append(f"dangerous tool hints require high-risk review: {', '.join(dangerous)}")
|
||||
risk_level = _max_risk(risk_level, "high")
|
||||
|
||||
content = f"{draft.proposed_content}\n{frontmatter}".lower()
|
||||
for pattern in self._CRITICAL_PATTERNS:
|
||||
if re.search(pattern, content):
|
||||
blocked.append(f"critical prompt-safety pattern matched: {pattern}")
|
||||
risk_level = "critical"
|
||||
|
||||
if draft.proposal_kind in {"retire_skill", "merge_skills"}:
|
||||
risk_level = _max_risk(risk_level, "high")
|
||||
|
||||
passed = not blocked and risk_level != "critical"
|
||||
return SkillDraftSafetyReport(
|
||||
report_id=uuid4().hex,
|
||||
skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
passed=passed,
|
||||
risk_level=risk_level,
|
||||
issues=issues,
|
||||
blocked_reasons=blocked,
|
||||
suggested_fix=_suggest_fix(blocked, issues),
|
||||
created_at=_utc_now(),
|
||||
)
|
||||
|
||||
|
||||
def _tool_hints(frontmatter: dict) -> list[str]:
|
||||
raw = frontmatter.get("tools")
|
||||
if isinstance(raw, list):
|
||||
return [str(item).strip() for item in raw if str(item).strip()]
|
||||
if isinstance(raw, str):
|
||||
return [item.strip() for item in raw.split(",") if item.strip()]
|
||||
return []
|
||||
|
||||
|
||||
def _max_risk(left: str, right: str) -> str:
|
||||
order = {"low": 0, "medium": 1, "high": 2, "critical": 3}
|
||||
return left if order[left] >= order[right] else right
|
||||
|
||||
|
||||
def _suggest_fix(blocked: list[str], issues: list[str]) -> str:
|
||||
if blocked:
|
||||
return "Remove blocked instructions or invalid tool hints before review."
|
||||
if issues:
|
||||
return "Review the flagged issues before publishing."
|
||||
return ""
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
293
app-instance/backend/beaver/skills/learning/service.py
Normal file
293
app-instance/backend/beaver/skills/learning/service.py
Normal file
@ -0,0 +1,293 @@
|
||||
"""Skill learning loop services."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from itertools import combinations
|
||||
import re
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.runs.models import RunRecord, SkillEffectRecord
|
||||
from beaver.memory.runs.store import RunMemoryStore
|
||||
from beaver.memory.skills.models import SkillLearningCandidate, SkillPerformanceSnapshot
|
||||
from beaver.memory.skills.store import SkillLearningStore
|
||||
from beaver.skills.drafts.service import DraftService
|
||||
from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector
|
||||
from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
|
||||
from beaver.skills.specs import SkillActivationReceipt
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RunReceiptContext:
|
||||
run_record: RunRecord
|
||||
effect_records: list[SkillEffectRecord] = field(default_factory=list)
|
||||
|
||||
|
||||
class SkillLearningService:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
run_store: RunMemoryStore,
|
||||
learning_store: SkillLearningStore,
|
||||
draft_service: DraftService,
|
||||
evidence_selector: EvidenceSelector,
|
||||
synthesizer: SkillDraftSynthesizer | None = None,
|
||||
) -> None:
|
||||
self.run_store = run_store
|
||||
self.learning_store = learning_store
|
||||
self.draft_service = draft_service
|
||||
self.evidence_selector = evidence_selector
|
||||
self.synthesizer = synthesizer or SkillDraftSynthesizer()
|
||||
|
||||
def collect_run_receipts(
|
||||
self,
|
||||
run_result_context: RunReceiptContext,
|
||||
*,
|
||||
generate_candidates: bool = True,
|
||||
) -> list[SkillLearningCandidate]:
|
||||
self.run_store.append_run_record(run_result_context.run_record)
|
||||
for effect in run_result_context.effect_records:
|
||||
self.run_store.append_skill_effect(effect)
|
||||
self.rescore_skill_versions()
|
||||
if not generate_candidates:
|
||||
return []
|
||||
return self.build_learning_candidates()
|
||||
|
||||
def build_learning_candidates(self) -> list[SkillLearningCandidate]:
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
candidates.extend(self._build_revision_candidates())
|
||||
candidates.extend(self._build_new_skill_candidates())
|
||||
candidates.extend(self._build_merge_candidates())
|
||||
candidates.extend(self._build_retire_candidates())
|
||||
existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()}
|
||||
for candidate in candidates:
|
||||
if candidate.candidate_id not in existing_ids:
|
||||
self.learning_store.record_learning_candidate(candidate)
|
||||
existing_ids.add(candidate.candidate_id)
|
||||
return candidates
|
||||
|
||||
async def synthesize_draft(self, candidate_id: str, provider_bundle: ProviderBundle) -> Any:
|
||||
candidates = {item.candidate_id: item for item in self.learning_store.list_learning_candidates()}
|
||||
candidate = candidates.get(candidate_id)
|
||||
if candidate is None:
|
||||
raise ValueError(f"Unknown learning candidate: {candidate_id}")
|
||||
if candidate.kind == "retire_skill":
|
||||
target_skill = candidate.related_skill_names[0]
|
||||
return self.draft_service.create_retire_proposal(
|
||||
skill_name=target_skill,
|
||||
base_version=candidate.evidence.get("skill_version"),
|
||||
created_by="learning-loop",
|
||||
reason=candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids)
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
model = (
|
||||
provider_bundle.auxiliary_runtime.model
|
||||
if provider_bundle.auxiliary_runtime is not None
|
||||
else provider_bundle.main_runtime.model
|
||||
)
|
||||
if candidate.kind == "new_skill":
|
||||
payload = await self.synthesizer.synthesize_new_skill(candidate, packet, provider, model)
|
||||
return self.draft_service.create_new_skill_draft(
|
||||
skill_name=self._suggest_skill_name(candidate, packet),
|
||||
proposed_content=payload["content"],
|
||||
proposed_frontmatter=payload["frontmatter"],
|
||||
created_by="learning-loop",
|
||||
reason=payload["change_reason"] or candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
if candidate.kind == "merge_skills":
|
||||
target_name = self._suggest_skill_name(candidate, packet)
|
||||
payload = await self.synthesizer.synthesize_merge(candidate, packet, provider, model)
|
||||
return self.draft_service.create_merge_draft(
|
||||
skill_name=target_name,
|
||||
base_version=None,
|
||||
proposed_content=payload["content"],
|
||||
proposed_frontmatter=payload["frontmatter"],
|
||||
created_by="learning-loop",
|
||||
reason=payload["change_reason"] or candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
target_skill = candidate.related_skill_names[0]
|
||||
base_version = candidate.evidence.get("skill_version")
|
||||
payload = await self.synthesizer.synthesize_revision(candidate, packet, provider, model)
|
||||
return self.draft_service.create_revision_draft(
|
||||
skill_name=target_skill,
|
||||
base_version=base_version,
|
||||
proposed_content=payload["content"],
|
||||
proposed_frontmatter=payload["frontmatter"],
|
||||
created_by="learning-loop",
|
||||
reason=payload["change_reason"] or candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
|
||||
def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]:
|
||||
snapshots: list[SkillPerformanceSnapshot] = []
|
||||
grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
for receipt in record.activated_skills:
|
||||
key = (receipt.skill_name, receipt.skill_version)
|
||||
grouped.setdefault(key, [])
|
||||
for effect in self._all_effects():
|
||||
grouped.setdefault((effect.skill_name, effect.skill_version), []).append(effect)
|
||||
for (skill_name, skill_version), effects in grouped.items():
|
||||
activation_count = len(effects)
|
||||
success_count = sum(1 for item in effects if item.success)
|
||||
failure_count = activation_count - success_count
|
||||
last_feedback = next((item.feedback_score for item in reversed(effects) if item.feedback_score is not None), None)
|
||||
latest_used = effects[-1].created_at if effects else ""
|
||||
snapshot = SkillPerformanceSnapshot(
|
||||
skill_name=skill_name,
|
||||
skill_version=skill_version,
|
||||
activation_count=activation_count,
|
||||
success_count=success_count,
|
||||
failure_count=failure_count,
|
||||
latest_used_at=latest_used,
|
||||
last_feedback_score=last_feedback,
|
||||
)
|
||||
self.learning_store.update_performance_snapshot(snapshot)
|
||||
snapshots.append(snapshot)
|
||||
return snapshots
|
||||
|
||||
def _build_revision_candidates(self) -> list[SkillLearningCandidate]:
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for snapshot in self.learning_store.list_low_performing_versions():
|
||||
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=5)
|
||||
if len(runs) < 2:
|
||||
continue
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("revise", snapshot.skill_name, snapshot.skill_version),
|
||||
kind="revise_skill",
|
||||
source_run_ids=[record.run_id for record in runs],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
|
||||
related_skill_names=[snapshot.skill_name],
|
||||
reason=f"Skill version {snapshot.skill_name}/{snapshot.skill_version} is underperforming across repeated runs.",
|
||||
evidence={"skill_version": snapshot.skill_version},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _build_new_skill_candidates(self) -> list[SkillLearningCandidate]:
|
||||
groups: dict[str, list[RunRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
key = self._task_theme(record.task_text)
|
||||
if not key:
|
||||
continue
|
||||
groups.setdefault(key, []).append(record)
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for theme, runs in groups.items():
|
||||
successful = [record for record in runs if record.success]
|
||||
if len(successful) < 2:
|
||||
continue
|
||||
if any(record.activated_skills for record in successful):
|
||||
continue
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("new", theme, str(len(successful))),
|
||||
kind="new_skill",
|
||||
source_run_ids=[record.run_id for record in successful[-5:]],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in successful[-5:])),
|
||||
related_skill_names=[],
|
||||
reason=f"Repeated successful tasks around '{theme}' suggest a reusable skill should be created.",
|
||||
evidence={"theme": theme},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
|
||||
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
|
||||
for pair in combinations(unique, 2):
|
||||
pair_counts.setdefault(pair, []).append(record)
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for pair, runs in pair_counts.items():
|
||||
if len(runs) < 2:
|
||||
continue
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("merge", *pair),
|
||||
kind="merge_skills",
|
||||
source_run_ids=[record.run_id for record in runs[-5:]],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in runs[-5:])),
|
||||
related_skill_names=list(pair),
|
||||
reason=f"Skills {pair[0]} and {pair[1]} repeatedly co-activate and may benefit from consolidation.",
|
||||
evidence={"pair": list(pair)},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _build_retire_candidates(self, *, stale_days: int = 30) -> list[SkillLearningCandidate]:
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=stale_days)
|
||||
for snapshot in self.learning_store.list_performance_snapshots():
|
||||
if snapshot.activation_count == 0 or not snapshot.latest_used_at:
|
||||
continue
|
||||
latest_used = self._parse_timestamp(snapshot.latest_used_at)
|
||||
if latest_used is None or latest_used > cutoff:
|
||||
continue
|
||||
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=3)
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("retire", snapshot.skill_name, snapshot.skill_version),
|
||||
kind="retire_skill",
|
||||
source_run_ids=[record.run_id for record in runs],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
|
||||
related_skill_names=[snapshot.skill_name],
|
||||
reason=(
|
||||
f"Skill version {snapshot.skill_name}/{snapshot.skill_version} has been inactive "
|
||||
f"since {snapshot.latest_used_at} and may be ready for retirement."
|
||||
),
|
||||
evidence={"skill_version": snapshot.skill_version, "latest_used_at": snapshot.latest_used_at},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _all_effects(self) -> list[SkillEffectRecord]:
|
||||
effects: list[SkillEffectRecord] = []
|
||||
for candidate in self.learning_store.list_performance_snapshots():
|
||||
effects.extend(self.run_store.list_skill_effects(candidate.skill_name, version=candidate.skill_version))
|
||||
if effects:
|
||||
return effects
|
||||
# Bootstrap from runs when there are no prior snapshots.
|
||||
for record in self.run_store.list_runs():
|
||||
for receipt in record.activated_skills:
|
||||
effects.extend(self.run_store.list_skill_effects(receipt.skill_name, version=receipt.skill_version))
|
||||
return effects
|
||||
|
||||
@staticmethod
|
||||
def _candidate_id(kind: str, *parts: str) -> str:
|
||||
return f"{kind}:{'|'.join(parts)}"
|
||||
|
||||
@staticmethod
|
||||
def _task_theme(task_text: str) -> str:
|
||||
cleaned = re.sub(r"\s+", " ", task_text.strip().lower())
|
||||
if not cleaned:
|
||||
return ""
|
||||
words = cleaned.split(" ")
|
||||
return " ".join(words[:8]).strip()
|
||||
|
||||
@staticmethod
|
||||
def _suggest_skill_name(candidate: SkillLearningCandidate, packet: EvidencePacket) -> str:
|
||||
if candidate.related_skill_names:
|
||||
return candidate.related_skill_names[0]
|
||||
if packet.task_summaries:
|
||||
seed = re.sub(r"[^a-z0-9]+", "-", packet.task_summaries[0].lower()).strip("-")
|
||||
if seed:
|
||||
return seed[:48]
|
||||
return f"generated-skill-{uuid4().hex[:8]}"
|
||||
|
||||
@staticmethod
|
||||
def _parse_timestamp(value: str) -> datetime | None:
|
||||
try:
|
||||
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
return parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed.astimezone(timezone.utc)
|
||||
118
app-instance/backend/beaver/skills/learning/synthesizer.py
Normal file
118
app-instance/backend/beaver/skills/learning/synthesizer.py
Normal file
@ -0,0 +1,118 @@
|
||||
"""LLM-backed draft synthesis for skill learning."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers.base import LLMProvider
|
||||
from beaver.skills.learning.evidence import EvidencePacket
|
||||
from beaver.memory.skills.models import SkillLearningCandidate
|
||||
|
||||
|
||||
class SkillDraftSynthesizer:
|
||||
async def synthesize_revision(
|
||||
self,
|
||||
candidate: SkillLearningCandidate,
|
||||
evidence_packet: EvidencePacket,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
) -> dict[str, Any]:
|
||||
return await self._synthesize(candidate, evidence_packet, provider, model, "revise")
|
||||
|
||||
async def synthesize_new_skill(
|
||||
self,
|
||||
candidate: SkillLearningCandidate,
|
||||
evidence_packet: EvidencePacket,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
) -> dict[str, Any]:
|
||||
return await self._synthesize(candidate, evidence_packet, provider, model, "new")
|
||||
|
||||
async def synthesize_merge(
|
||||
self,
|
||||
candidate: SkillLearningCandidate,
|
||||
evidence_packet: EvidencePacket,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
) -> dict[str, Any]:
|
||||
return await self._synthesize(candidate, evidence_packet, provider, model, "merge")
|
||||
|
||||
async def _synthesize(
|
||||
self,
|
||||
candidate: SkillLearningCandidate,
|
||||
evidence_packet: EvidencePacket,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
action: str,
|
||||
) -> dict[str, Any]:
|
||||
prompt = self._build_prompt(candidate, evidence_packet, action)
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You synthesize Beaver skill drafts from execution evidence. "
|
||||
"Return only JSON with keys: frontmatter, content, change_reason."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=1500,
|
||||
temperature=0,
|
||||
)
|
||||
payload = self._parse_payload(response.content or "")
|
||||
if payload:
|
||||
return payload
|
||||
return self._fallback_payload(candidate, evidence_packet, action)
|
||||
|
||||
@staticmethod
|
||||
def _build_prompt(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> str:
|
||||
return (
|
||||
f"Action: {action}\n"
|
||||
f"Candidate kind: {candidate.kind}\n"
|
||||
f"Reason: {candidate.reason}\n"
|
||||
f"Related skills: {candidate.related_skill_names}\n"
|
||||
f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries)
|
||||
+ "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts)
|
||||
+ "\n\nReturn JSON only."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_payload(content: str) -> dict[str, Any]:
|
||||
cleaned = content.strip()
|
||||
if cleaned.startswith("```"):
|
||||
lines = cleaned.splitlines()
|
||||
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
||||
cleaned = "\n".join(lines[1:-1]).strip()
|
||||
try:
|
||||
payload = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
if not isinstance(payload, dict):
|
||||
return {}
|
||||
frontmatter = payload.get("frontmatter")
|
||||
content_value = payload.get("content")
|
||||
if not isinstance(frontmatter, dict) or not isinstance(content_value, str):
|
||||
return {}
|
||||
return {
|
||||
"frontmatter": frontmatter,
|
||||
"content": content_value.strip(),
|
||||
"change_reason": str(payload.get("change_reason") or ""),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fallback_payload(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> dict[str, Any]:
|
||||
related = candidate.related_skill_names[0] if candidate.related_skill_names else "generated-skill"
|
||||
title = related.replace("_", "-")
|
||||
content = "\n".join(f"- {item}" for item in evidence_packet.task_summaries[:5]) or "- No evidence captured."
|
||||
return {
|
||||
"frontmatter": {
|
||||
"description": candidate.reason or f"Auto-generated {action} draft for {title}.",
|
||||
"tools": [],
|
||||
},
|
||||
"content": f"# {title}\n\n## Evidence\n\n{content}\n",
|
||||
"change_reason": candidate.reason or f"Fallback {action} synthesis.",
|
||||
}
|
||||
175
app-instance/backend/beaver/skills/learning/worker.py
Normal file
175
app-instance/backend/beaver/skills/learning/worker.py
Normal file
@ -0,0 +1,175 @@
|
||||
"""Background worker for assisted skill learning."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.skills import SkillLearningCandidate
|
||||
from beaver.skills.learning.pipeline import SkillLearningPipelineService
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillLearningWorkerConfig:
|
||||
enabled: bool = True
|
||||
max_drafts_per_run: int = 5
|
||||
max_retries: int = 3
|
||||
interval_seconds: float = 300.0
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> "SkillLearningWorkerConfig":
|
||||
return cls(
|
||||
enabled=_env_bool("BEAVER_SKILL_LEARNING_WORKER_ENABLED", True),
|
||||
max_drafts_per_run=_env_int("BEAVER_SKILL_LEARNING_MAX_DRAFTS_PER_RUN", 5),
|
||||
max_retries=_env_int("BEAVER_SKILL_LEARNING_MAX_RETRIES", 3),
|
||||
interval_seconds=float(os.getenv("BEAVER_SKILL_LEARNING_INTERVAL_SECONDS", "300") or "300"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillLearningWorkerResult:
|
||||
processed: int = 0
|
||||
succeeded: int = 0
|
||||
failed: int = 0
|
||||
skipped: int = 0
|
||||
failures: list[dict[str, str]] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"processed": self.processed,
|
||||
"succeeded": self.succeeded,
|
||||
"failed": self.failed,
|
||||
"skipped": self.skipped,
|
||||
"failures": [dict(item) for item in self.failures],
|
||||
}
|
||||
|
||||
|
||||
class SkillLearningWorker:
|
||||
"""Synthesizes drafts for open candidates; never approves or publishes."""
|
||||
|
||||
_ACTIVE_DRAFT_STATUSES = {"queued", "synthesizing", "draft_ready", "review_pending", "approved"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
pipeline: SkillLearningPipelineService,
|
||||
provider_bundle_factory: Callable[[], ProviderBundle],
|
||||
config: SkillLearningWorkerConfig | None = None,
|
||||
) -> None:
|
||||
self.pipeline = pipeline
|
||||
self.provider_bundle_factory = provider_bundle_factory
|
||||
self.config = config or SkillLearningWorkerConfig.from_env()
|
||||
self._running = False
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
async def run_forever(self) -> None:
|
||||
if not self.config.enabled:
|
||||
return
|
||||
self._running = True
|
||||
try:
|
||||
while self._running:
|
||||
await self.run_once()
|
||||
await asyncio.sleep(self.config.interval_seconds)
|
||||
finally:
|
||||
self._running = False
|
||||
|
||||
def stop(self) -> None:
|
||||
self._running = False
|
||||
|
||||
async def run_once(self) -> SkillLearningWorkerResult:
|
||||
if not self.config.enabled:
|
||||
return SkillLearningWorkerResult()
|
||||
async with self._lock:
|
||||
result = SkillLearningWorkerResult()
|
||||
candidates = self._select_candidates()
|
||||
for candidate in candidates[: self.config.max_drafts_per_run]:
|
||||
result.processed += 1
|
||||
try:
|
||||
handled = await self._process_candidate(candidate)
|
||||
if handled:
|
||||
result.succeeded += 1
|
||||
else:
|
||||
result.skipped += 1
|
||||
except Exception as exc:
|
||||
result.failed += 1
|
||||
result.failures.append({"candidate_id": candidate.candidate_id, "error": str(exc)})
|
||||
self._mark_failure(candidate, str(exc))
|
||||
return result
|
||||
|
||||
def _select_candidates(self) -> list[SkillLearningCandidate]:
|
||||
candidates = [
|
||||
item
|
||||
for item in self.pipeline.list_candidates()
|
||||
if item.status == "open" and item.retry_count < self.config.max_retries
|
||||
]
|
||||
return sorted(candidates, key=lambda item: (item.priority, item.confidence, item.created_at), reverse=True)
|
||||
|
||||
async def _process_candidate(self, candidate: SkillLearningCandidate) -> bool:
|
||||
if self._has_active_draft(candidate):
|
||||
self.pipeline.mark_candidate_superseded(candidate.candidate_id, "active draft already exists for this skill")
|
||||
return False
|
||||
self.pipeline.mark_candidate_queued(candidate.candidate_id)
|
||||
self.pipeline.mark_candidate_synthesizing(candidate.candidate_id)
|
||||
draft = await self.pipeline.synthesize_draft(
|
||||
candidate.candidate_id,
|
||||
provider_bundle=self.provider_bundle_factory(),
|
||||
)
|
||||
self.pipeline.mark_draft_synthesized(candidate.candidate_id, draft)
|
||||
safety = self.pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
if not safety.passed or safety.risk_level == "critical":
|
||||
return True
|
||||
await self.pipeline.evaluate_draft(
|
||||
candidate.candidate_id,
|
||||
draft.skill_name,
|
||||
draft.draft_id,
|
||||
provider_bundle=self.provider_bundle_factory(),
|
||||
)
|
||||
return True
|
||||
|
||||
def _has_active_draft(self, candidate: SkillLearningCandidate) -> bool:
|
||||
target_names = set(candidate.related_skill_names)
|
||||
if candidate.draft_skill_name:
|
||||
target_names.add(candidate.draft_skill_name)
|
||||
if not target_names:
|
||||
return False
|
||||
for item in self.pipeline.list_candidates():
|
||||
if item.candidate_id == candidate.candidate_id:
|
||||
continue
|
||||
if item.status not in self._ACTIVE_DRAFT_STATUSES:
|
||||
continue
|
||||
item_names = set(item.related_skill_names)
|
||||
if item.draft_skill_name:
|
||||
item_names.add(item.draft_skill_name)
|
||||
if target_names.intersection(item_names):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _mark_failure(self, candidate: SkillLearningCandidate, error: str) -> None:
|
||||
retry_count = candidate.retry_count + 1
|
||||
status = "failed" if retry_count >= self.config.max_retries else "open"
|
||||
self.pipeline.mark_candidate_failed(
|
||||
candidate.candidate_id,
|
||||
error,
|
||||
retry_count=retry_count,
|
||||
terminal=(status == "failed"),
|
||||
)
|
||||
|
||||
|
||||
def _env_bool(name: str, default: bool) -> bool:
|
||||
raw = os.getenv(name)
|
||||
if raw is None:
|
||||
return default
|
||||
return raw.strip().lower() not in {"0", "false", "no", "off"}
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = os.getenv(name)
|
||||
if raw in (None, ""):
|
||||
return default
|
||||
try:
|
||||
return int(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
@ -1,2 +1,6 @@
|
||||
"""Skill publishing and version switching."""
|
||||
"""Skill publish and rollback services."""
|
||||
|
||||
from .service import SkillPublisher
|
||||
|
||||
__all__ = ["SkillPublisher"]
|
||||
|
||||
188
app-instance/backend/beaver/skills/publisher/service.py
Normal file
188
app-instance/backend/beaver/skills/publisher/service.py
Normal file
@ -0,0 +1,188 @@
|
||||
"""Publishing, retirement, and rollback flows for Beaver skills."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from beaver.skills.catalog.utils import strip_frontmatter
|
||||
from beaver.skills.specs import SkillDraft, SkillReviewState, SkillSpec, SkillSpecStore, SkillStatus, SkillVersion
|
||||
from beaver.skills.specs.serialization import canonical_hash, normalize_frontmatter, summarize_skill_content
|
||||
|
||||
|
||||
class SkillPublisher:
|
||||
def __init__(self, store: SkillSpecStore) -> None:
|
||||
self.store = store
|
||||
|
||||
def publish(self, skill_name: str, draft_id: str, publisher: str, notes: str = "") -> SkillVersion:
|
||||
draft = self._require_draft(skill_name, draft_id)
|
||||
if draft.status != SkillReviewState.APPROVED.value:
|
||||
raise ValueError("Draft must be approved before publish")
|
||||
if draft.proposal_kind == "retire_skill":
|
||||
raise ValueError("Retire proposals must be applied through apply_retire_proposal")
|
||||
|
||||
next_version = self._next_version(skill_name)
|
||||
content = self._render_skill_content(draft.proposed_frontmatter, draft.proposed_content)
|
||||
body = strip_frontmatter(content).strip()
|
||||
if not body:
|
||||
raise ValueError("Published skill content cannot be empty")
|
||||
version = SkillVersion(
|
||||
skill_name=skill_name,
|
||||
version=next_version,
|
||||
content_hash=canonical_hash(content),
|
||||
summary_hash=canonical_hash(body),
|
||||
created_at=_utc_now(),
|
||||
created_by=publisher,
|
||||
change_reason=notes or draft.reason,
|
||||
parent_version=draft.base_version,
|
||||
review_state=SkillReviewState.PUBLISHED.value,
|
||||
frontmatter=normalize_frontmatter(draft.proposed_frontmatter),
|
||||
summary=summarize_skill_content(body),
|
||||
tool_hints=self.store._extract_tool_hints(normalize_frontmatter(draft.proposed_frontmatter)),
|
||||
provenance={
|
||||
"draft_id": draft_id,
|
||||
"proposal_kind": draft.proposal_kind,
|
||||
"trigger_run_id": draft.trigger_run_id,
|
||||
"trigger_session_id": draft.trigger_session_id,
|
||||
},
|
||||
)
|
||||
self.store.write_skill_version(version, content)
|
||||
self.store.set_current_version(skill_name, next_version)
|
||||
|
||||
spec = self.store.get_skill_spec(skill_name)
|
||||
if spec is None:
|
||||
description = str(version.frontmatter.get("description") or skill_name)
|
||||
spec = SkillSpec(
|
||||
name=skill_name,
|
||||
display_name=skill_name,
|
||||
description=description,
|
||||
created_at=_utc_now(),
|
||||
updated_at=_utc_now(),
|
||||
current_version=next_version,
|
||||
status=SkillStatus.ACTIVE.value,
|
||||
tags=[],
|
||||
owners=[publisher],
|
||||
source_kind="managed",
|
||||
lineage=[],
|
||||
)
|
||||
else:
|
||||
spec.current_version = next_version
|
||||
spec.updated_at = _utc_now()
|
||||
spec.status = SkillStatus.ACTIVE.value
|
||||
if not spec.description:
|
||||
spec.description = str(version.frontmatter.get("description") or skill_name)
|
||||
self.store.write_skill_spec(spec)
|
||||
|
||||
draft.status = SkillReviewState.PUBLISHED.value
|
||||
self.store.write_draft(draft)
|
||||
self._refresh_indexes(skill_name, spec.status)
|
||||
return version
|
||||
|
||||
def apply_retire_proposal(self, skill_name: str, draft_id: str, actor: str, notes: str = "") -> SkillSpec:
|
||||
draft = self._require_draft(skill_name, draft_id)
|
||||
if draft.status != SkillReviewState.APPROVED.value:
|
||||
raise ValueError("Retire proposal must be approved before apply")
|
||||
if draft.proposal_kind != "retire_skill":
|
||||
raise ValueError("Only retire_skill proposals can be applied as retire proposals")
|
||||
|
||||
spec = self._require_spec(skill_name)
|
||||
if draft.base_version and spec.current_version and draft.base_version != spec.current_version:
|
||||
raise ValueError(
|
||||
f"Retire proposal targets {draft.base_version}, but current version is {spec.current_version}"
|
||||
)
|
||||
|
||||
reason = notes or draft.reason
|
||||
spec.status = SkillStatus.DISABLED.value
|
||||
spec.updated_at = _utc_now()
|
||||
if actor and actor not in spec.owners:
|
||||
spec.owners.append(actor)
|
||||
spec.lineage.append(f"retire_proposal:{draft_id}:{reason}")
|
||||
self.store.write_skill_spec(spec)
|
||||
|
||||
draft.status = SkillReviewState.DISABLED.value
|
||||
self.store.write_draft(draft)
|
||||
self._refresh_indexes(skill_name, spec.status)
|
||||
return spec
|
||||
|
||||
def disable(self, skill_name: str, actor: str, reason: str) -> SkillSpec:
|
||||
spec = self._require_spec(skill_name)
|
||||
spec.status = SkillStatus.DISABLED.value
|
||||
spec.updated_at = _utc_now()
|
||||
if actor and actor not in spec.owners:
|
||||
spec.owners.append(actor)
|
||||
if reason:
|
||||
spec.lineage.append(f"disabled:{reason}")
|
||||
self.store.write_skill_spec(spec)
|
||||
self._refresh_indexes(skill_name, spec.status)
|
||||
return spec
|
||||
|
||||
def rollback(self, skill_name: str, target_version: str, actor: str, reason: str) -> SkillSpec:
|
||||
if self.store.read_published_skill(skill_name, target_version) is None:
|
||||
raise ValueError(f"Unknown skill version for rollback: {skill_name}/{target_version}")
|
||||
spec = self._require_spec(skill_name)
|
||||
spec.current_version = target_version
|
||||
spec.updated_at = _utc_now()
|
||||
spec.status = SkillStatus.ACTIVE.value
|
||||
if reason:
|
||||
spec.lineage.append(f"rollback:{target_version}:{reason}")
|
||||
if actor and actor not in spec.owners:
|
||||
spec.owners.append(actor)
|
||||
self.store.write_skill_spec(spec)
|
||||
self.store.set_current_version(skill_name, target_version)
|
||||
self._refresh_indexes(skill_name, spec.status)
|
||||
return spec
|
||||
|
||||
def _next_version(self, skill_name: str) -> str:
|
||||
versions = [item for item in self.store.list_versions(skill_name) if item.startswith("v")]
|
||||
if not versions:
|
||||
return "v0001"
|
||||
numbers = [int(item[1:]) for item in versions if item[1:].isdigit()]
|
||||
return f"v{(max(numbers) if numbers else 0) + 1:04d}"
|
||||
|
||||
@staticmethod
|
||||
def _render_skill_content(frontmatter: dict, body: str) -> str:
|
||||
normalized = normalize_frontmatter(frontmatter)
|
||||
if not normalized:
|
||||
return body.strip() + ("\n" if body.strip() else "")
|
||||
lines = ["---"]
|
||||
for key, value in normalized.items():
|
||||
if isinstance(value, list):
|
||||
lines.append(f"{key}:")
|
||||
for item in value:
|
||||
lines.append(f" - {item}")
|
||||
else:
|
||||
lines.append(f"{key}: {value}")
|
||||
lines.append("---")
|
||||
lines.append("")
|
||||
lines.append(body.strip())
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
def _refresh_indexes(self, skill_name: str, status: str) -> None:
|
||||
published = self.store.read_index("published")
|
||||
disabled = self.store.read_index("disabled")
|
||||
if status == SkillStatus.DISABLED.value:
|
||||
if skill_name in published:
|
||||
published = [item for item in published if item != skill_name]
|
||||
if skill_name not in disabled:
|
||||
disabled.append(skill_name)
|
||||
else:
|
||||
if skill_name not in published:
|
||||
published.append(skill_name)
|
||||
disabled = [item for item in disabled if item != skill_name]
|
||||
self.store.update_index("published", published)
|
||||
self.store.update_index("disabled", disabled)
|
||||
|
||||
def _require_draft(self, skill_name: str, draft_id: str) -> SkillDraft:
|
||||
draft = self.store.read_draft(skill_name, draft_id)
|
||||
if draft is None:
|
||||
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
|
||||
return draft
|
||||
|
||||
def _require_spec(self, skill_name: str) -> SkillSpec:
|
||||
spec = self.store.get_skill_spec(skill_name)
|
||||
if spec is None:
|
||||
raise ValueError(f"Skill spec not found: {skill_name}")
|
||||
return spec
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
@ -41,10 +41,20 @@ class RuntimeSkillResolver:
|
||||
|
||||
activated_skills: list[SkillContext] = []
|
||||
for name in selected:
|
||||
raw_content = self.loader.load_skill(name)
|
||||
record = self.loader.get_skill_record(name)
|
||||
raw_content = self.loader.load_published_skill(name)
|
||||
content = strip_frontmatter(raw_content).strip() if raw_content else ""
|
||||
if not content:
|
||||
continue
|
||||
activated_skills.append(SkillContext(name=name, content=content))
|
||||
activated_skills.append(
|
||||
SkillContext(
|
||||
name=name,
|
||||
content=content,
|
||||
version=record.version if record is not None else "legacy",
|
||||
content_hash=(record.content_hash if record is not None and record.content_hash else ""),
|
||||
activation_reason="always_skill",
|
||||
tool_hints=list(record.tool_hints) if record is not None else [],
|
||||
)
|
||||
)
|
||||
|
||||
return ResolvedSkillSet(activated_skills=activated_skills)
|
||||
|
||||
@ -1,2 +1,6 @@
|
||||
"""Skill review workflow."""
|
||||
"""Skill review services."""
|
||||
|
||||
from .service import ReviewService
|
||||
|
||||
__all__ = ["ReviewService"]
|
||||
|
||||
76
app-instance/backend/beaver/skills/reviews/service.py
Normal file
76
app-instance/backend/beaver/skills/reviews/service.py
Normal file
@ -0,0 +1,76 @@
|
||||
"""Review workflow for Beaver skill drafts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.skills.specs import SkillDraft, SkillReviewRecord, SkillReviewState, SkillSpecStore
|
||||
|
||||
|
||||
class ReviewService:
|
||||
def __init__(self, store: SkillSpecStore) -> None:
|
||||
self.store = store
|
||||
|
||||
def submit_for_review(self, skill_name: str, draft_id: str, reviewer_request: str, requested_by: str = "system") -> SkillReviewRecord:
|
||||
draft = self._require_draft(skill_name, draft_id)
|
||||
draft.status = SkillReviewState.IN_REVIEW.value
|
||||
self.store.write_draft(draft)
|
||||
review = SkillReviewRecord(
|
||||
review_id=uuid4().hex,
|
||||
draft_id=draft_id,
|
||||
skill_name=skill_name,
|
||||
requested_at=_utc_now(),
|
||||
requested_by=requested_by,
|
||||
status=SkillReviewState.IN_REVIEW.value,
|
||||
notes=reviewer_request,
|
||||
)
|
||||
self.store.write_review(review)
|
||||
return review
|
||||
|
||||
def approve(self, skill_name: str, draft_id: str, reviewer: str, notes: str = "") -> SkillReviewRecord:
|
||||
draft = self._require_draft(skill_name, draft_id)
|
||||
draft.status = SkillReviewState.APPROVED.value
|
||||
self.store.write_draft(draft)
|
||||
review = SkillReviewRecord(
|
||||
review_id=uuid4().hex,
|
||||
draft_id=draft_id,
|
||||
skill_name=skill_name,
|
||||
requested_at=_utc_now(),
|
||||
requested_by=reviewer,
|
||||
status=SkillReviewState.APPROVED.value,
|
||||
reviewer=reviewer,
|
||||
reviewed_at=_utc_now(),
|
||||
notes=notes,
|
||||
)
|
||||
self.store.write_review(review)
|
||||
return review
|
||||
|
||||
def reject(self, skill_name: str, draft_id: str, reviewer: str, notes: str = "") -> SkillReviewRecord:
|
||||
draft = self._require_draft(skill_name, draft_id)
|
||||
draft.status = SkillReviewState.REJECTED.value
|
||||
self.store.write_draft(draft)
|
||||
review = SkillReviewRecord(
|
||||
review_id=uuid4().hex,
|
||||
draft_id=draft_id,
|
||||
skill_name=skill_name,
|
||||
requested_at=_utc_now(),
|
||||
requested_by=reviewer,
|
||||
status=SkillReviewState.REJECTED.value,
|
||||
reviewer=reviewer,
|
||||
reviewed_at=_utc_now(),
|
||||
notes=notes,
|
||||
)
|
||||
self.store.write_review(review)
|
||||
return review
|
||||
|
||||
def _require_draft(self, skill_name: str, draft_id: str) -> SkillDraft:
|
||||
draft = self.store.read_draft(skill_name, draft_id)
|
||||
if draft is None:
|
||||
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
|
||||
return draft
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
23
app-instance/backend/beaver/skills/specs/__init__.py
Normal file
23
app-instance/backend/beaver/skills/specs/__init__.py
Normal file
@ -0,0 +1,23 @@
|
||||
"""Structured skill lifecycle models and storage."""
|
||||
|
||||
from .models import (
|
||||
SkillActivationReceipt,
|
||||
SkillDraft,
|
||||
SkillReviewRecord,
|
||||
SkillReviewState,
|
||||
SkillSpec,
|
||||
SkillStatus,
|
||||
SkillVersion,
|
||||
)
|
||||
from .storage import SkillSpecStore
|
||||
|
||||
__all__ = [
|
||||
"SkillActivationReceipt",
|
||||
"SkillDraft",
|
||||
"SkillReviewRecord",
|
||||
"SkillReviewState",
|
||||
"SkillSpec",
|
||||
"SkillSpecStore",
|
||||
"SkillStatus",
|
||||
"SkillVersion",
|
||||
]
|
||||
267
app-instance/backend/beaver/skills/specs/models.py
Normal file
267
app-instance/backend/beaver/skills/specs/models.py
Normal file
@ -0,0 +1,267 @@
|
||||
"""Structured models for Beaver skill lifecycle."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
|
||||
class SkillReviewState(str, Enum):
|
||||
DRAFT = "draft"
|
||||
IN_REVIEW = "in_review"
|
||||
APPROVED = "approved"
|
||||
REJECTED = "rejected"
|
||||
PUBLISHED = "published"
|
||||
DISABLED = "disabled"
|
||||
ARCHIVED = "archived"
|
||||
|
||||
|
||||
class SkillStatus(str, Enum):
|
||||
ACTIVE = "active"
|
||||
DISABLED = "disabled"
|
||||
ARCHIVED = "archived"
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillSpec:
|
||||
name: str
|
||||
display_name: str
|
||||
description: str
|
||||
created_at: str
|
||||
updated_at: str
|
||||
current_version: str | None
|
||||
status: str = SkillStatus.ACTIVE.value
|
||||
tags: list[str] = field(default_factory=list)
|
||||
owners: list[str] = field(default_factory=list)
|
||||
source_kind: str = "workspace"
|
||||
lineage: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"display_name": self.display_name,
|
||||
"description": self.description,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"current_version": self.current_version,
|
||||
"status": self.status,
|
||||
"tags": list(self.tags),
|
||||
"owners": list(self.owners),
|
||||
"source_kind": self.source_kind,
|
||||
"lineage": list(self.lineage),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillSpec":
|
||||
return cls(
|
||||
name=str(payload["name"]),
|
||||
display_name=str(payload.get("display_name") or payload["name"]),
|
||||
description=str(payload.get("description") or payload.get("display_name") or payload["name"]),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
updated_at=str(payload.get("updated_at") or payload.get("created_at") or ""),
|
||||
current_version=_coerce_optional_str(payload.get("current_version")),
|
||||
status=str(payload.get("status") or SkillStatus.ACTIVE.value),
|
||||
tags=_coerce_string_list(payload.get("tags")),
|
||||
owners=_coerce_string_list(payload.get("owners")),
|
||||
source_kind=str(payload.get("source_kind") or "workspace"),
|
||||
lineage=_coerce_string_list(payload.get("lineage")),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillVersion:
|
||||
skill_name: str
|
||||
version: str
|
||||
content_hash: str
|
||||
summary_hash: str
|
||||
created_at: str
|
||||
created_by: str
|
||||
change_reason: str
|
||||
parent_version: str | None = None
|
||||
review_state: str = SkillReviewState.PUBLISHED.value
|
||||
frontmatter: dict[str, Any] = field(default_factory=dict)
|
||||
summary: str = ""
|
||||
tool_hints: list[str] = field(default_factory=list)
|
||||
provenance: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"skill_name": self.skill_name,
|
||||
"version": self.version,
|
||||
"content_hash": self.content_hash,
|
||||
"summary_hash": self.summary_hash,
|
||||
"created_at": self.created_at,
|
||||
"created_by": self.created_by,
|
||||
"change_reason": self.change_reason,
|
||||
"parent_version": self.parent_version,
|
||||
"review_state": self.review_state,
|
||||
"frontmatter": dict(self.frontmatter),
|
||||
"summary": self.summary,
|
||||
"tool_hints": list(self.tool_hints),
|
||||
"provenance": dict(self.provenance),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillVersion":
|
||||
return cls(
|
||||
skill_name=str(payload["skill_name"]),
|
||||
version=str(payload["version"]),
|
||||
content_hash=str(payload.get("content_hash") or ""),
|
||||
summary_hash=str(payload.get("summary_hash") or ""),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
created_by=str(payload.get("created_by") or "unknown"),
|
||||
change_reason=str(payload.get("change_reason") or ""),
|
||||
parent_version=_coerce_optional_str(payload.get("parent_version")),
|
||||
review_state=str(payload.get("review_state") or SkillReviewState.PUBLISHED.value),
|
||||
frontmatter=dict(payload.get("frontmatter") or {}),
|
||||
summary=str(payload.get("summary") or ""),
|
||||
tool_hints=_coerce_string_list(payload.get("tool_hints")),
|
||||
provenance=dict(payload.get("provenance") or {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillDraft:
|
||||
draft_id: str
|
||||
skill_name: str
|
||||
base_version: str | None
|
||||
proposed_content: str
|
||||
proposed_frontmatter: dict[str, Any]
|
||||
created_at: str
|
||||
created_by: str
|
||||
trigger_run_id: str | None = None
|
||||
trigger_session_id: str | None = None
|
||||
reason: str = ""
|
||||
status: str = SkillReviewState.DRAFT.value
|
||||
evidence_refs: list[dict[str, Any]] = field(default_factory=list)
|
||||
proposal_kind: str = "revise_skill"
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"draft_id": self.draft_id,
|
||||
"skill_name": self.skill_name,
|
||||
"base_version": self.base_version,
|
||||
"proposed_content": self.proposed_content,
|
||||
"proposed_frontmatter": dict(self.proposed_frontmatter),
|
||||
"created_at": self.created_at,
|
||||
"created_by": self.created_by,
|
||||
"trigger_run_id": self.trigger_run_id,
|
||||
"trigger_session_id": self.trigger_session_id,
|
||||
"reason": self.reason,
|
||||
"status": self.status,
|
||||
"evidence_refs": list(self.evidence_refs),
|
||||
"proposal_kind": self.proposal_kind,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraft":
|
||||
return cls(
|
||||
draft_id=str(payload["draft_id"]),
|
||||
skill_name=str(payload["skill_name"]),
|
||||
base_version=_coerce_optional_str(payload.get("base_version")),
|
||||
proposed_content=str(payload.get("proposed_content") or ""),
|
||||
proposed_frontmatter=dict(payload.get("proposed_frontmatter") or {}),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
created_by=str(payload.get("created_by") or "unknown"),
|
||||
trigger_run_id=_coerce_optional_str(payload.get("trigger_run_id")),
|
||||
trigger_session_id=_coerce_optional_str(payload.get("trigger_session_id")),
|
||||
reason=str(payload.get("reason") or ""),
|
||||
status=str(payload.get("status") or SkillReviewState.DRAFT.value),
|
||||
evidence_refs=list(payload.get("evidence_refs") or []),
|
||||
proposal_kind=str(payload.get("proposal_kind") or "revise_skill"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillReviewRecord:
|
||||
review_id: str
|
||||
draft_id: str
|
||||
skill_name: str
|
||||
requested_at: str
|
||||
requested_by: str
|
||||
status: str
|
||||
reviewer: str | None = None
|
||||
reviewed_at: str | None = None
|
||||
notes: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"review_id": self.review_id,
|
||||
"draft_id": self.draft_id,
|
||||
"skill_name": self.skill_name,
|
||||
"requested_at": self.requested_at,
|
||||
"requested_by": self.requested_by,
|
||||
"status": self.status,
|
||||
"reviewer": self.reviewer,
|
||||
"reviewed_at": self.reviewed_at,
|
||||
"notes": self.notes,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillReviewRecord":
|
||||
return cls(
|
||||
review_id=str(payload["review_id"]),
|
||||
draft_id=str(payload["draft_id"]),
|
||||
skill_name=str(payload["skill_name"]),
|
||||
requested_at=str(payload.get("requested_at") or ""),
|
||||
requested_by=str(payload.get("requested_by") or "unknown"),
|
||||
status=str(payload.get("status") or SkillReviewState.IN_REVIEW.value),
|
||||
reviewer=_coerce_optional_str(payload.get("reviewer")),
|
||||
reviewed_at=_coerce_optional_str(payload.get("reviewed_at")),
|
||||
notes=str(payload.get("notes") or ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillActivationReceipt:
|
||||
run_id: str
|
||||
session_id: str
|
||||
skill_name: str
|
||||
skill_version: str
|
||||
content_hash: str
|
||||
activated_at: str
|
||||
activation_reason: str
|
||||
tool_hints: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"run_id": self.run_id,
|
||||
"session_id": self.session_id,
|
||||
"skill_name": self.skill_name,
|
||||
"skill_version": self.skill_version,
|
||||
"content_hash": self.content_hash,
|
||||
"activated_at": self.activated_at,
|
||||
"activation_reason": self.activation_reason,
|
||||
"tool_hints": list(self.tool_hints),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillActivationReceipt":
|
||||
return cls(
|
||||
run_id=str(payload["run_id"]),
|
||||
session_id=str(payload["session_id"]),
|
||||
skill_name=str(payload["skill_name"]),
|
||||
skill_version=str(payload["skill_version"]),
|
||||
content_hash=str(payload.get("content_hash") or ""),
|
||||
activated_at=str(payload.get("activated_at") or ""),
|
||||
activation_reason=str(payload.get("activation_reason") or ""),
|
||||
tool_hints=_coerce_string_list(payload.get("tool_hints")),
|
||||
)
|
||||
|
||||
|
||||
def _coerce_optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
|
||||
def _coerce_string_list(value: Any) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
return []
|
||||
result: list[str] = []
|
||||
for item in value:
|
||||
text = str(item).strip()
|
||||
if text:
|
||||
result.append(text)
|
||||
return result
|
||||
42
app-instance/backend/beaver/skills/specs/serialization.py
Normal file
42
app-instance/backend/beaver/skills/specs/serialization.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""Serialization helpers for structured skill lifecycle objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from hashlib import sha256
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
|
||||
def json_dumps(payload: Any) -> str:
|
||||
return json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True)
|
||||
|
||||
|
||||
def canonical_hash(text: str) -> str:
|
||||
return sha256(text.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def normalize_frontmatter(frontmatter: dict[str, Any] | None) -> dict[str, Any]:
|
||||
raw = dict(frontmatter or {})
|
||||
normalized: dict[str, Any] = {}
|
||||
for key, value in raw.items():
|
||||
if value is None:
|
||||
continue
|
||||
if isinstance(value, str):
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
normalized[str(key)] = cleaned
|
||||
continue
|
||||
if isinstance(value, list):
|
||||
items = [str(item).strip() for item in value if str(item).strip()]
|
||||
normalized[str(key)] = items
|
||||
continue
|
||||
normalized[str(key)] = value
|
||||
return normalized
|
||||
|
||||
|
||||
def summarize_skill_content(content: str, *, max_lines: int = 3, max_chars: int = 240) -> str:
|
||||
lines = [line.strip() for line in content.splitlines() if line.strip()]
|
||||
if not lines:
|
||||
return ""
|
||||
summary = " ".join(lines[:max_lines]).strip()
|
||||
return summary[:max_chars].strip()
|
||||
268
app-instance/backend/beaver/skills/specs/storage.py
Normal file
268
app-instance/backend/beaver/skills/specs/storage.py
Normal file
@ -0,0 +1,268 @@
|
||||
"""File-backed storage for Beaver skill lifecycle artifacts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from beaver.skills.catalog.utils import parse_frontmatter
|
||||
|
||||
from .models import SkillDraft, SkillReviewRecord, SkillSpec, SkillVersion
|
||||
from .serialization import canonical_hash, json_dumps, normalize_frontmatter, summarize_skill_content
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class LoadedSkillVersion:
|
||||
version: SkillVersion
|
||||
content: str
|
||||
|
||||
|
||||
class SkillSpecStore:
|
||||
"""Manage structured skill lifecycle state inside the workspace."""
|
||||
|
||||
def __init__(self, workspace: str | Path) -> None:
|
||||
self.workspace = Path(workspace)
|
||||
self.root = self.workspace / "skills"
|
||||
self.index_dir = self.root / "_index"
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
self.index_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def list_published_skill_names(self) -> list[str]:
|
||||
names: list[str] = []
|
||||
for child in self._iter_skill_dirs():
|
||||
if not self._has_published_representation(child):
|
||||
continue
|
||||
spec = self.get_skill_spec(child.name)
|
||||
if spec is not None and spec.status != "active":
|
||||
continue
|
||||
names.append(child.name)
|
||||
return names
|
||||
|
||||
def list_skill_specs(self) -> list[SkillSpec]:
|
||||
specs: list[SkillSpec] = []
|
||||
for name in self.list_skill_names():
|
||||
spec = self.get_skill_spec(name)
|
||||
if spec is not None:
|
||||
specs.append(spec)
|
||||
return specs
|
||||
|
||||
def list_skill_names(self) -> list[str]:
|
||||
return [child.name for child in self._iter_skill_dirs()]
|
||||
|
||||
def get_skill_spec(self, name: str) -> SkillSpec | None:
|
||||
directory = self._skill_dir(name)
|
||||
path = directory / "skill.json"
|
||||
if path.exists():
|
||||
return SkillSpec.from_dict(self._read_json(path))
|
||||
if not self._has_published_representation(directory):
|
||||
return None
|
||||
legacy = self.read_published_skill(name)
|
||||
if legacy is None:
|
||||
return None
|
||||
return SkillSpec(
|
||||
name=name,
|
||||
display_name=name,
|
||||
description=str(legacy.version.frontmatter.get("description") or name),
|
||||
created_at=legacy.version.created_at,
|
||||
updated_at=legacy.version.created_at,
|
||||
current_version=legacy.version.version,
|
||||
status="active",
|
||||
tags=[],
|
||||
owners=[],
|
||||
source_kind="legacy",
|
||||
lineage=[],
|
||||
)
|
||||
|
||||
def write_skill_spec(self, spec: SkillSpec) -> None:
|
||||
directory = self._skill_dir(spec.name)
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
self._write_json(directory / "skill.json", spec.to_dict())
|
||||
|
||||
def get_current_version(self, name: str) -> str | None:
|
||||
directory = self._skill_dir(name)
|
||||
current_path = directory / "current.json"
|
||||
if current_path.exists():
|
||||
return str(self._read_json(current_path).get("current_version") or "") or None
|
||||
if (directory / "SKILL.md").exists():
|
||||
return "legacy"
|
||||
spec = self.get_skill_spec(name)
|
||||
if spec is not None and spec.current_version:
|
||||
return spec.current_version
|
||||
return None
|
||||
|
||||
def set_current_version(self, name: str, version: str) -> None:
|
||||
directory = self._skill_dir(name)
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
self._write_json(directory / "current.json", {"current_version": version})
|
||||
spec = self.get_skill_spec(name)
|
||||
if spec is not None:
|
||||
spec.current_version = version
|
||||
self.write_skill_spec(spec)
|
||||
|
||||
def list_versions(self, name: str) -> list[str]:
|
||||
directory = self._skill_dir(name) / "versions"
|
||||
if not directory.exists():
|
||||
current = self.get_current_version(name)
|
||||
return [current] if current else []
|
||||
versions: list[str] = []
|
||||
for child in sorted(directory.iterdir()):
|
||||
if child.is_dir():
|
||||
versions.append(child.name)
|
||||
return versions
|
||||
|
||||
def read_published_skill(self, name: str, version: str | None = None) -> LoadedSkillVersion | None:
|
||||
requested_version = version or self.get_current_version(name)
|
||||
if requested_version is None:
|
||||
return None
|
||||
|
||||
directory = self._skill_dir(name)
|
||||
if requested_version == "legacy":
|
||||
skill_file = directory / "SKILL.md"
|
||||
if not skill_file.exists():
|
||||
return None
|
||||
content = skill_file.read_text(encoding="utf-8")
|
||||
frontmatter, body = parse_frontmatter(content)
|
||||
normalized_frontmatter = normalize_frontmatter(frontmatter)
|
||||
tool_hints = self._extract_tool_hints(normalized_frontmatter)
|
||||
loaded = SkillVersion(
|
||||
skill_name=name,
|
||||
version="legacy",
|
||||
content_hash=canonical_hash(content),
|
||||
summary_hash=canonical_hash(body),
|
||||
created_at="legacy",
|
||||
created_by="legacy",
|
||||
change_reason="legacy_import",
|
||||
review_state="published",
|
||||
frontmatter=normalized_frontmatter,
|
||||
summary=summarize_skill_content(body),
|
||||
tool_hints=tool_hints,
|
||||
provenance={"source_kind": "legacy"},
|
||||
)
|
||||
return LoadedSkillVersion(version=loaded, content=content)
|
||||
|
||||
version_dir = directory / "versions" / requested_version
|
||||
version_file = version_dir / "version.json"
|
||||
skill_file = version_dir / "SKILL.md"
|
||||
if not version_file.exists() or not skill_file.exists():
|
||||
return None
|
||||
payload = self._read_json(version_file)
|
||||
loaded = SkillVersion.from_dict(payload)
|
||||
content = skill_file.read_text(encoding="utf-8")
|
||||
return LoadedSkillVersion(version=loaded, content=content)
|
||||
|
||||
def write_skill_version(self, version: SkillVersion, content: str) -> None:
|
||||
version_dir = self._skill_dir(version.skill_name) / "versions" / version.version
|
||||
version_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._write_json(version_dir / "version.json", version.to_dict())
|
||||
self._write_text(version_dir / "SKILL.md", content)
|
||||
|
||||
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
|
||||
results: list[SkillDraft] = []
|
||||
names = [skill_name] if skill_name else self.list_skill_names()
|
||||
for name in names:
|
||||
if not name:
|
||||
continue
|
||||
drafts_dir = self._skill_dir(name) / "drafts"
|
||||
if not drafts_dir.exists():
|
||||
continue
|
||||
for path in sorted(drafts_dir.glob("draft-*.json")):
|
||||
results.append(SkillDraft.from_dict(self._read_json(path)))
|
||||
return results
|
||||
|
||||
def read_draft(self, skill_name: str, draft_id: str) -> SkillDraft | None:
|
||||
path = self._skill_dir(skill_name) / "drafts" / f"draft-{draft_id}.json"
|
||||
if not path.exists():
|
||||
return None
|
||||
return SkillDraft.from_dict(self._read_json(path))
|
||||
|
||||
def write_draft(self, draft: SkillDraft) -> None:
|
||||
drafts_dir = self._skill_dir(draft.skill_name) / "drafts"
|
||||
drafts_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._write_json(drafts_dir / f"draft-{draft.draft_id}.json", draft.to_dict())
|
||||
|
||||
def list_reviews(self, skill_name: str, draft_id: str | None = None) -> list[SkillReviewRecord]:
|
||||
reviews_dir = self._skill_dir(skill_name) / "reviews"
|
||||
if not reviews_dir.exists():
|
||||
return []
|
||||
results: list[SkillReviewRecord] = []
|
||||
for path in sorted(reviews_dir.glob("review-*.json")):
|
||||
record = SkillReviewRecord.from_dict(self._read_json(path))
|
||||
if draft_id and record.draft_id != draft_id:
|
||||
continue
|
||||
results.append(record)
|
||||
return results
|
||||
|
||||
def write_review(self, review: SkillReviewRecord) -> None:
|
||||
reviews_dir = self._skill_dir(review.skill_name) / "reviews"
|
||||
reviews_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._write_json(reviews_dir / f"review-{review.review_id}.json", review.to_dict())
|
||||
|
||||
def update_index(self, index_name: str, values: list[str]) -> None:
|
||||
self._write_json(self.index_dir / f"{index_name}.json", {"items": list(dict.fromkeys(values))})
|
||||
|
||||
def read_index(self, index_name: str) -> list[str]:
|
||||
path = self.index_dir / f"{index_name}.json"
|
||||
if not path.exists():
|
||||
return []
|
||||
payload = self._read_json(path)
|
||||
if not isinstance(payload, dict):
|
||||
return []
|
||||
items = payload.get("items")
|
||||
if not isinstance(items, list):
|
||||
return []
|
||||
return [str(item) for item in items if str(item).strip()]
|
||||
|
||||
def archive_current_version(self, skill_name: str, version: str) -> None:
|
||||
version_dir = self._skill_dir(skill_name) / "versions" / version
|
||||
if not version_dir.exists():
|
||||
return
|
||||
archive_dir = self._skill_dir(skill_name) / "archive" / version
|
||||
archive_dir.parent.mkdir(parents=True, exist_ok=True)
|
||||
if archive_dir.exists():
|
||||
return
|
||||
version_dir.rename(archive_dir)
|
||||
|
||||
def _has_published_representation(self, directory: Path) -> bool:
|
||||
return (
|
||||
(directory / "SKILL.md").exists()
|
||||
or (directory / "current.json").exists()
|
||||
or (directory / "versions").exists()
|
||||
)
|
||||
|
||||
def _skill_dir(self, name: str) -> Path:
|
||||
return self.root / name
|
||||
|
||||
def _iter_skill_dirs(self) -> list[Path]:
|
||||
return [
|
||||
child
|
||||
for child in sorted(self.root.iterdir())
|
||||
if child.is_dir() and not child.name.startswith("_")
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_tool_hints(frontmatter: dict[str, Any]) -> list[str]:
|
||||
raw = frontmatter.get("tools")
|
||||
if isinstance(raw, list):
|
||||
return [str(item).strip() for item in raw if str(item).strip()]
|
||||
if isinstance(raw, str):
|
||||
return [item.strip() for item in raw.split(",") if item.strip()]
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _read_json(path: Path) -> dict[str, Any]:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError(f"Expected JSON object in {path}")
|
||||
return payload
|
||||
|
||||
@staticmethod
|
||||
def _write_json(path: Path, payload: dict[str, Any]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json_dumps(payload) + "\n", encoding="utf-8")
|
||||
|
||||
@staticmethod
|
||||
def _write_text(path: Path, content: str) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(content, encoding="utf-8")
|
||||
22
app-instance/backend/beaver/tasks/__init__.py
Normal file
22
app-instance/backend/beaver/tasks/__init__.py
Normal file
@ -0,0 +1,22 @@
|
||||
"""Internal task tracking for automatic Main Agent task mode."""
|
||||
|
||||
from .models import MainAgentDecision, TaskEvent, TaskRecord, ValidationResult
|
||||
from .planner import TaskExecutionPlan, TaskExecutionPlanner
|
||||
from .router import MainAgentRouter
|
||||
from .service import TaskService
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
from .validation import ValidationService
|
||||
|
||||
__all__ = [
|
||||
"MainAgentDecision",
|
||||
"MainAgentRouter",
|
||||
"TaskEvent",
|
||||
"TaskExecutionPlan",
|
||||
"TaskExecutionPlanner",
|
||||
"TaskRecord",
|
||||
"TaskService",
|
||||
"SkillResolutionReport",
|
||||
"TaskSkillResolver",
|
||||
"ValidationResult",
|
||||
"ValidationService",
|
||||
]
|
||||
178
app-instance/backend/beaver/tasks/models.py
Normal file
178
app-instance/backend/beaver/tasks/models.py
Normal file
@ -0,0 +1,178 @@
|
||||
"""Models for internal task tracking and validation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_revision"}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ValidationResult:
|
||||
passed: bool
|
||||
score: float
|
||||
issues: list[str] = field(default_factory=list)
|
||||
missing_requirements: list[str] = field(default_factory=list)
|
||||
recommended_revision_prompt: str = ""
|
||||
validator: str = "heuristic"
|
||||
|
||||
@property
|
||||
def accepted(self) -> bool:
|
||||
return self.passed and self.score >= 0.75
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"passed": self.passed,
|
||||
"score": self.score,
|
||||
"issues": list(self.issues),
|
||||
"missing_requirements": list(self.missing_requirements),
|
||||
"recommended_revision_prompt": self.recommended_revision_prompt,
|
||||
"validator": self.validator,
|
||||
"accepted": self.accepted,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any] | None) -> "ValidationResult | None":
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
return cls(
|
||||
passed=bool(payload.get("passed")),
|
||||
score=float(payload.get("score", 0.0) or 0.0),
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||
validator=str(payload.get("validator") or "unknown"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TaskRecord:
|
||||
task_id: str
|
||||
session_id: str
|
||||
description: str
|
||||
goal: str
|
||||
constraints: list[str]
|
||||
priority: int
|
||||
status: str
|
||||
creator: str
|
||||
created_at: str
|
||||
updated_at: str
|
||||
parent_task_id: str | None = None
|
||||
closed_at: str | None = None
|
||||
close_reason: str | None = None
|
||||
satisfaction: float | None = None
|
||||
run_ids: list[str] = field(default_factory=list)
|
||||
skill_names: list[str] = field(default_factory=list)
|
||||
feedback: list[dict[str, Any]] = field(default_factory=list)
|
||||
validation_result: dict[str, Any] | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def is_open(self) -> bool:
|
||||
return self.status in TASK_OPEN_STATUSES
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"task_id": self.task_id,
|
||||
"session_id": self.session_id,
|
||||
"parent_task_id": self.parent_task_id,
|
||||
"description": self.description,
|
||||
"goal": self.goal,
|
||||
"constraints": list(self.constraints),
|
||||
"priority": self.priority,
|
||||
"status": self.status,
|
||||
"creator": self.creator,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"closed_at": self.closed_at,
|
||||
"close_reason": self.close_reason,
|
||||
"satisfaction": self.satisfaction,
|
||||
"run_ids": list(self.run_ids),
|
||||
"skill_names": list(self.skill_names),
|
||||
"feedback": list(self.feedback),
|
||||
"validation_result": self.validation_result,
|
||||
"metadata": dict(self.metadata),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "TaskRecord":
|
||||
return cls(
|
||||
task_id=str(payload["task_id"]),
|
||||
session_id=str(payload["session_id"]),
|
||||
parent_task_id=_optional_str(payload.get("parent_task_id")),
|
||||
description=str(payload.get("description") or ""),
|
||||
goal=str(payload.get("goal") or payload.get("description") or ""),
|
||||
constraints=[str(item) for item in payload.get("constraints") or []],
|
||||
priority=int(payload.get("priority", 0) or 0),
|
||||
status=str(payload.get("status") or "open"),
|
||||
creator=str(payload.get("creator") or "main-agent"),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
updated_at=str(payload.get("updated_at") or ""),
|
||||
closed_at=_optional_str(payload.get("closed_at")),
|
||||
close_reason=_optional_str(payload.get("close_reason")),
|
||||
satisfaction=_optional_float(payload.get("satisfaction")),
|
||||
run_ids=[str(item) for item in payload.get("run_ids") or []],
|
||||
skill_names=[str(item) for item in payload.get("skill_names") or []],
|
||||
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
|
||||
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
|
||||
metadata=dict(payload.get("metadata") or {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TaskEvent:
|
||||
event_id: str
|
||||
task_id: str
|
||||
session_id: str
|
||||
event_type: str
|
||||
created_at: str
|
||||
run_id: str | None = None
|
||||
payload: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"event_id": self.event_id,
|
||||
"task_id": self.task_id,
|
||||
"session_id": self.session_id,
|
||||
"run_id": self.run_id,
|
||||
"event_type": self.event_type,
|
||||
"created_at": self.created_at,
|
||||
"payload": dict(self.payload),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "TaskEvent":
|
||||
return cls(
|
||||
event_id=str(payload["event_id"]),
|
||||
task_id=str(payload["task_id"]),
|
||||
session_id=str(payload["session_id"]),
|
||||
run_id=_optional_str(payload.get("run_id")),
|
||||
event_type=str(payload.get("event_type") or ""),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
payload=dict(payload.get("payload") or {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class MainAgentDecision:
|
||||
mode: str
|
||||
reason: str
|
||||
starts_new_task: bool = False
|
||||
|
||||
@property
|
||||
def is_task(self) -> bool:
|
||||
return self.mode == "task"
|
||||
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
|
||||
def _optional_float(value: Any) -> float | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return float(value)
|
||||
288
app-instance/backend/beaver/tasks/planner.py
Normal file
288
app-instance/backend/beaver/tasks/planner.py
Normal file
@ -0,0 +1,288 @@
|
||||
"""Internal Task execution planner for single-agent vs team execution."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
|
||||
|
||||
TaskExecutionMode = Literal["single", "team"]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class TaskExecutionPlan:
|
||||
mode: TaskExecutionMode
|
||||
reason: str = ""
|
||||
graph: ExecutionGraph | None = None
|
||||
final_synthesis_instruction: str = ""
|
||||
fallback_error: str | None = None
|
||||
skill_resolution_report: list[SkillResolutionReport] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def is_team(self) -> bool:
|
||||
return self.mode == "team" and self.graph is not None
|
||||
|
||||
@classmethod
|
||||
def single(cls, reason: str, *, fallback_error: str | None = None) -> "TaskExecutionPlan":
|
||||
return cls(mode="single", reason=reason, fallback_error=fallback_error)
|
||||
|
||||
def to_event_payload(self) -> dict[str, Any]:
|
||||
strategy = self.graph.strategy if self.graph is not None else None
|
||||
nodes = self.graph.nodes if self.graph is not None else []
|
||||
return {
|
||||
"plan_mode": self.mode,
|
||||
"reason": self.reason,
|
||||
"strategy": strategy,
|
||||
"node_ids": [node.node_id for node in nodes],
|
||||
"skill_queries": [
|
||||
str(node.agent.metadata.get("skill_query") or "")
|
||||
for node in nodes
|
||||
],
|
||||
"selected_skill_names": [
|
||||
name
|
||||
for node in nodes
|
||||
for name in node.inherited_pinned_skills
|
||||
],
|
||||
"generated_skill_draft_ids": [
|
||||
item.generated_skill_draft_id
|
||||
for item in self.skill_resolution_report
|
||||
if item.generated_skill_draft_id
|
||||
],
|
||||
"skill_resolution_report": [item.to_dict() for item in self.skill_resolution_report],
|
||||
"fallback_error": self.fallback_error,
|
||||
}
|
||||
|
||||
|
||||
class TaskExecutionPlanner:
|
||||
"""Plan whether a Task attempt should run through a team first."""
|
||||
|
||||
_MAX_NODES = 6
|
||||
_SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
|
||||
|
||||
def __init__(self, *, task_skill_resolver: TaskSkillResolver | None = None) -> None:
|
||||
self.task_skill_resolver = task_skill_resolver
|
||||
|
||||
async def plan(
|
||||
self,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
) -> TaskExecutionPlan:
|
||||
provider = None
|
||||
model = None
|
||||
if provider_bundle is not None:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
if provider is None:
|
||||
return TaskExecutionPlan.single("planner_provider_unavailable")
|
||||
try:
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You choose whether an internal Beaver Task attempt should run as a single "
|
||||
"main-agent pass or use a small sub-agent team first. Return only compact JSON."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": self._prompt(
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
),
|
||||
},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=1200,
|
||||
temperature=0.0,
|
||||
)
|
||||
plan = self.from_json(response.content or "")
|
||||
return await self._resolve_plan(
|
||||
plan,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_failed", fallback_error=str(exc))
|
||||
|
||||
async def _resolve_plan(
|
||||
self,
|
||||
plan: TaskExecutionPlan,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
) -> TaskExecutionPlan:
|
||||
if not plan.is_team or self.task_skill_resolver is None:
|
||||
return plan
|
||||
if provider_bundle is None:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error="task_skill_resolver_provider_unavailable")
|
||||
try:
|
||||
assert plan.graph is not None
|
||||
graph, reports = await self.task_skill_resolver.resolve_graph(
|
||||
plan.graph,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
graph.validate()
|
||||
plan.graph = graph
|
||||
plan.skill_resolution_report = reports
|
||||
return plan
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")
|
||||
|
||||
def from_json(self, text: str) -> TaskExecutionPlan:
|
||||
try:
|
||||
payload = self._parse_json_object(text)
|
||||
mode = str(payload.get("mode") or "single").strip().lower()
|
||||
reason = str(payload.get("reason") or "")
|
||||
if mode != "team":
|
||||
return TaskExecutionPlan.single(reason or "planner_selected_single")
|
||||
|
||||
graph = self._graph_from_payload(payload)
|
||||
graph.validate()
|
||||
return TaskExecutionPlan(
|
||||
mode="team",
|
||||
reason=reason or "planner_selected_team",
|
||||
graph=graph,
|
||||
final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
|
||||
)
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))
|
||||
|
||||
def _graph_from_payload(self, payload: dict[str, Any]) -> ExecutionGraph:
|
||||
strategy = str(payload.get("strategy") or "sequence").strip().lower()
|
||||
if strategy not in self._SUPPORTED_STRATEGIES:
|
||||
raise ValueError(f"Unsupported team strategy: {strategy}")
|
||||
raw_nodes = payload.get("nodes")
|
||||
if not isinstance(raw_nodes, list) or not raw_nodes:
|
||||
raise ValueError("Team plan requires at least one node")
|
||||
if len(raw_nodes) > self._MAX_NODES:
|
||||
raise ValueError(f"Team plan exceeds max node count {self._MAX_NODES}")
|
||||
|
||||
nodes: list[ExecutionNode] = []
|
||||
for index, item in enumerate(raw_nodes, start=1):
|
||||
if not isinstance(item, dict):
|
||||
raise ValueError("Each team node must be an object")
|
||||
agent_payload = item.get("agent") if isinstance(item.get("agent"), dict) else {}
|
||||
skill_query = str(item.get("skill_query") or agent_payload.get("skill_query") or item.get("task") or "").strip()
|
||||
requested_capabilities = _string_list(
|
||||
item.get("required_capabilities") or item.get("capabilities") or agent_payload.get("capabilities")
|
||||
)
|
||||
requested_tags = _string_list(item.get("tags") or agent_payload.get("tags"))
|
||||
node_id = str(item.get("node_id") or item.get("id") or agent_payload.get("name") or f"node_{index}").strip()
|
||||
task = str(item.get("task") or "").strip()
|
||||
if not node_id or not task:
|
||||
raise ValueError("Each team node requires node_id/id and task")
|
||||
nodes.append(
|
||||
ExecutionNode(
|
||||
node_id=node_id,
|
||||
task=task,
|
||||
agent=AgentDescriptor(
|
||||
name=node_id,
|
||||
role="",
|
||||
system_prompt="",
|
||||
metadata={
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": requested_capabilities,
|
||||
"requested_tags": requested_tags,
|
||||
"sub_agent_kind": "generic_skill_worker",
|
||||
},
|
||||
),
|
||||
depends_on=[str(dep) for dep in item.get("depends_on") or []],
|
||||
inherited_pinned_skills=[str(name) for name in item.get("pinned_skills") or []],
|
||||
constraints=[str(value) for value in item.get("constraints") or []],
|
||||
expected_output=str(item.get("expected_output") or "") or None,
|
||||
)
|
||||
)
|
||||
return ExecutionGraph(strategy=strategy, nodes=nodes) # type: ignore[arg-type]
|
||||
|
||||
@staticmethod
|
||||
def _prompt(
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None,
|
||||
) -> str:
|
||||
validation_note = ""
|
||||
if latest_validation is not None:
|
||||
validation_note = (
|
||||
"\nPrevious validation issues:\n"
|
||||
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
|
||||
)
|
||||
return (
|
||||
"Decide execution mode for this internal Task attempt.\n"
|
||||
"Use mode=team only when independent research, review, implementation slices, or staged checks "
|
||||
"would materially improve the result. Otherwise use mode=single.\n\n"
|
||||
"JSON schema:\n"
|
||||
"{\n"
|
||||
' "mode": "single" | "team",\n'
|
||||
' "reason": "short reason",\n'
|
||||
' "strategy": "sequence" | "parallel" | "dag",\n'
|
||||
' "nodes": [{"node_id": "api_review", "task": "...", "skill_query": "API contract review", '
|
||||
'"required_capabilities": ["schema compatibility"], "depends_on": []}],\n'
|
||||
' "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
|
||||
"}\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Attempt index: {attempt_index}\n"
|
||||
f"{validation_note}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_json_object(text: str) -> dict[str, Any]:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.strip("`")
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("planner response must be a JSON object")
|
||||
return payload
|
||||
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
text = str(value).strip()
|
||||
return text or None
|
||||
|
||||
|
||||
def _string_list(value: Any) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
if isinstance(value, str):
|
||||
value = [item.strip() for item in value.split(",")]
|
||||
else:
|
||||
return []
|
||||
result: list[str] = []
|
||||
for item in value:
|
||||
text = str(item).strip()
|
||||
if text and text not in result:
|
||||
result.append(text)
|
||||
return result
|
||||
40
app-instance/backend/beaver/tasks/router.py
Normal file
40
app-instance/backend/beaver/tasks/router.py
Normal file
@ -0,0 +1,40 @@
|
||||
"""Main Agent routing between simple chat and internal Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from .models import MainAgentDecision, TaskRecord
|
||||
|
||||
|
||||
class MainAgentRouter:
|
||||
"""Small deterministic classifier used before the main AgentLoop.
|
||||
|
||||
The first version intentionally avoids a mandatory model call so the router
|
||||
stays reliable during provider outages. The rule set is conservative:
|
||||
anything that implies execution, files, tools, iteration, or validation
|
||||
becomes Task mode.
|
||||
"""
|
||||
|
||||
_TASK_PATTERNS = [
|
||||
r"\b(implement|fix|debug|refactor|migrate|build|create|write|edit|update|test|validate|deploy)\b",
|
||||
r"\b(file|repo|code|project|backend|frontend|api|database|migration|pull request|ci|bug)\b",
|
||||
r"\b(step|multi-step|workflow|plan and|then)\b",
|
||||
r"(实现|修复|调试|重构|迁移|构建|创建|编写|修改|更新|测试|验证|部署|文件|代码|项目|前端|后端|接口|数据库|多步|任务)",
|
||||
]
|
||||
_NEW_TASK_PATTERNS = [
|
||||
r"\b(new task|another task|different task|start over)\b",
|
||||
r"(新任务|另一个任务|换个任务|重新开始)",
|
||||
]
|
||||
|
||||
def classify(self, message: str, *, active_task: TaskRecord | None = None) -> MainAgentDecision:
|
||||
text = message.strip()
|
||||
lowered = text.lower()
|
||||
starts_new = any(re.search(pattern, lowered, re.IGNORECASE) for pattern in self._NEW_TASK_PATTERNS)
|
||||
if active_task is not None and active_task.status in {"awaiting_feedback", "needs_revision"} and not starts_new:
|
||||
return MainAgentDecision(mode="task", reason="continuing_open_task", starts_new_task=False)
|
||||
if any(re.search(pattern, lowered, re.IGNORECASE) for pattern in self._TASK_PATTERNS):
|
||||
return MainAgentDecision(mode="task", reason="task_pattern_matched", starts_new_task=starts_new)
|
||||
if len(text) > 240:
|
||||
return MainAgentDecision(mode="task", reason="long_request", starts_new_task=starts_new)
|
||||
return MainAgentDecision(mode="simple", reason="simple_question", starts_new_task=False)
|
||||
167
app-instance/backend/beaver/tasks/service.py
Normal file
167
app-instance/backend/beaver/tasks/service.py
Normal file
@ -0,0 +1,167 @@
|
||||
"""Internal service for automatic Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from .models import TaskEvent, TaskRecord, ValidationResult
|
||||
from .store import TaskStore
|
||||
|
||||
|
||||
class TaskService:
|
||||
def __init__(self, root: str | Path) -> None:
|
||||
self.store = TaskStore(root)
|
||||
|
||||
def create_task(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
description: str,
|
||||
creator: str = "main-agent",
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> TaskRecord:
|
||||
now = self._now()
|
||||
task = TaskRecord(
|
||||
task_id=uuid4().hex,
|
||||
session_id=session_id,
|
||||
description=description,
|
||||
goal=description,
|
||||
constraints=[],
|
||||
priority=0,
|
||||
status="open",
|
||||
creator=creator,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
metadata=dict(metadata or {}),
|
||||
)
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "created", payload={"description": description})
|
||||
return task
|
||||
|
||||
def get_task(self, task_id: str) -> TaskRecord | None:
|
||||
return self.store.get_task(task_id)
|
||||
|
||||
def get_task_by_run_id(self, run_id: str) -> TaskRecord | None:
|
||||
return self.store.get_task_by_run_id(run_id)
|
||||
|
||||
def get_latest_open_task(self, session_id: str) -> TaskRecord | None:
|
||||
return self.store.get_latest_open_task(session_id)
|
||||
|
||||
def start_run(self, task_id: str, *, user_message: str, attempt_index: int) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
task.status = "running"
|
||||
task.updated_at = self._now()
|
||||
task.metadata["latest_user_message"] = user_message
|
||||
task.metadata["latest_attempt_index"] = attempt_index
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "run_started", payload={"user_message": user_message, "attempt_index": attempt_index})
|
||||
return task
|
||||
|
||||
def append_run(self, task_id: str, run_id: str, *, skill_names: list[str] | None = None) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
if run_id not in task.run_ids:
|
||||
task.run_ids.append(run_id)
|
||||
for name in skill_names or []:
|
||||
if name not in task.skill_names:
|
||||
task.skill_names.append(name)
|
||||
task.updated_at = self._now()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||
return task
|
||||
|
||||
def record_validation(self, task_id: str, run_id: str, validation: ValidationResult) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
task.status = "awaiting_feedback"
|
||||
task.updated_at = self._now()
|
||||
task.validation_result = validation.to_dict()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
|
||||
return task
|
||||
|
||||
def add_feedback(
|
||||
self,
|
||||
task_id: str,
|
||||
*,
|
||||
feedback_type: str,
|
||||
comment: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
now = self._now()
|
||||
matching_feedback = any(
|
||||
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
|
||||
for item in task.feedback
|
||||
)
|
||||
conflicting_feedback = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_feedback is not None:
|
||||
raise ValueError(
|
||||
f"Feedback for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_feedback.get('feedback_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not matching_feedback:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
if matching_feedback:
|
||||
return task
|
||||
|
||||
entry = {
|
||||
"feedback_type": feedback_type,
|
||||
"comment": comment or "",
|
||||
"run_id": run_id,
|
||||
"created_at": now,
|
||||
}
|
||||
task.feedback.append(entry)
|
||||
if feedback_type == "revise":
|
||||
task.status = "needs_revision"
|
||||
elif feedback_type == "abandon":
|
||||
task.status = "abandoned"
|
||||
task.closed_at = now
|
||||
task.close_reason = comment or "abandoned"
|
||||
elif feedback_type == "satisfied":
|
||||
task.status = "closed"
|
||||
task.closed_at = now
|
||||
task.close_reason = "satisfied"
|
||||
task.satisfaction = 1.0
|
||||
task.updated_at = now
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
|
||||
return task
|
||||
|
||||
def _require(self, task_id: str) -> TaskRecord:
|
||||
task = self.store.get_task(task_id)
|
||||
if task is None:
|
||||
raise ValueError(f"Unknown task_id: {task_id}")
|
||||
return task
|
||||
|
||||
def _event(
|
||||
self,
|
||||
task: TaskRecord,
|
||||
event_type: str,
|
||||
*,
|
||||
run_id: str | None = None,
|
||||
payload: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
self.store.append_event(
|
||||
TaskEvent(
|
||||
event_id=uuid4().hex,
|
||||
task_id=task.task_id,
|
||||
session_id=task.session_id,
|
||||
run_id=run_id,
|
||||
event_type=event_type,
|
||||
created_at=self._now(),
|
||||
payload=dict(payload or {}),
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
286
app-instance/backend/beaver/tasks/skill_resolver.py
Normal file
286
app-instance/backend/beaver/tasks/skill_resolver.py
Normal file
@ -0,0 +1,286 @@
|
||||
"""Resolve Task team nodes to pinned skills for generic sub-agents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field, replace
|
||||
from typing import Any
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.skills.assembler.embedding_retriever import SkillEmbeddingRetriever
|
||||
from beaver.skills.catalog.loader import SkillsLoader
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning import MissingSkillSynthesizer
|
||||
from beaver.tasks.models import TaskRecord
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillResolutionReport:
|
||||
node_id: str
|
||||
skill_query: str
|
||||
required_capabilities: list[str] = field(default_factory=list)
|
||||
selected_skill_names: list[str] = field(default_factory=list)
|
||||
generated_skill_draft_id: str | None = None
|
||||
generated_skill_name: str | None = None
|
||||
ephemeral_used: bool = False
|
||||
reason: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"node_id": self.node_id,
|
||||
"skill_query": self.skill_query,
|
||||
"required_capabilities": list(self.required_capabilities),
|
||||
"selected_skill_names": list(self.selected_skill_names),
|
||||
"generated_skill_draft_id": self.generated_skill_draft_id,
|
||||
"generated_skill_name": self.generated_skill_name,
|
||||
"ephemeral_used": self.ephemeral_used,
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
|
||||
class TaskSkillResolver:
|
||||
"""Pins published or draft-only skills onto generic team nodes."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
skills_loader: SkillsLoader,
|
||||
draft_service: DraftService,
|
||||
retriever: SkillEmbeddingRetriever | None = None,
|
||||
missing_skill_synthesizer: MissingSkillSynthesizer | None = None,
|
||||
) -> None:
|
||||
self.skills_loader = skills_loader
|
||||
self.draft_service = draft_service
|
||||
self.retriever = retriever or SkillEmbeddingRetriever()
|
||||
self.missing_skill_synthesizer = missing_skill_synthesizer or MissingSkillSynthesizer()
|
||||
|
||||
async def resolve_graph(
|
||||
self,
|
||||
graph: ExecutionGraph,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> tuple[ExecutionGraph, list[SkillResolutionReport]]:
|
||||
resolved_nodes: list[ExecutionNode] = []
|
||||
reports: list[SkillResolutionReport] = []
|
||||
for node in graph.nodes:
|
||||
resolved, report = await self.resolve_node(
|
||||
node,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
resolved_nodes.append(resolved)
|
||||
reports.append(report)
|
||||
return ExecutionGraph(strategy=graph.strategy, nodes=resolved_nodes), reports
|
||||
|
||||
async def resolve_node(
|
||||
self,
|
||||
node: ExecutionNode,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> tuple[ExecutionNode, SkillResolutionReport]:
|
||||
skill_query = str(node.agent.metadata.get("skill_query") or node.task or node.node_id).strip()
|
||||
required_capabilities = [
|
||||
str(item).strip()
|
||||
for item in node.agent.metadata.get("required_capabilities", [])
|
||||
if str(item).strip()
|
||||
]
|
||||
selected = await self._select_published_skills(
|
||||
query="\n".join(
|
||||
part
|
||||
for part in [
|
||||
skill_query,
|
||||
node.task,
|
||||
" ".join(required_capabilities),
|
||||
task.goal,
|
||||
user_message,
|
||||
]
|
||||
if part
|
||||
),
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
if selected:
|
||||
pinned = _merge_names(node.inherited_pinned_skills, selected)
|
||||
resolved = self._generic_node(
|
||||
node,
|
||||
pinned_skill_names=pinned,
|
||||
metadata={
|
||||
**node.agent.metadata,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": required_capabilities,
|
||||
"selected_skill_names": selected,
|
||||
"ephemeral_skill_names": [],
|
||||
},
|
||||
)
|
||||
return resolved, SkillResolutionReport(
|
||||
node_id=node.node_id,
|
||||
skill_query=skill_query,
|
||||
required_capabilities=required_capabilities,
|
||||
selected_skill_names=selected,
|
||||
ephemeral_used=False,
|
||||
reason="matched published skill",
|
||||
)
|
||||
|
||||
missing = await self.missing_skill_synthesizer.synthesize(
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
node_id=node.node_id,
|
||||
node_task=node.task,
|
||||
skill_query=skill_query,
|
||||
required_capabilities=required_capabilities,
|
||||
provider_bundle=provider_bundle,
|
||||
draft_service=self.draft_service,
|
||||
)
|
||||
resolved = self._generic_node(
|
||||
node,
|
||||
pinned_skill_names=list(node.inherited_pinned_skills),
|
||||
pinned_skill_contexts=[*node.inherited_pinned_skill_contexts, missing.skill_context],
|
||||
metadata={
|
||||
**node.agent.metadata,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": required_capabilities,
|
||||
"selected_skill_names": [],
|
||||
"generated_skill_draft_id": missing.draft.draft_id,
|
||||
"generated_skill_name": missing.draft.skill_name,
|
||||
"ephemeral_skill_names": [missing.skill_context.name],
|
||||
},
|
||||
)
|
||||
return resolved, SkillResolutionReport(
|
||||
node_id=node.node_id,
|
||||
skill_query=skill_query,
|
||||
required_capabilities=required_capabilities,
|
||||
generated_skill_draft_id=missing.draft.draft_id,
|
||||
generated_skill_name=missing.draft.skill_name,
|
||||
ephemeral_used=True,
|
||||
reason="generated draft-only skill for missing sub-agent guidance",
|
||||
)
|
||||
|
||||
async def _select_published_skills(self, *, query: str, provider_bundle: ProviderBundle) -> list[str]:
|
||||
candidates = self.skills_loader.build_selection_candidates()
|
||||
if not candidates:
|
||||
return []
|
||||
candidates = await self.retriever.retrieve(
|
||||
query=query,
|
||||
candidates=candidates,
|
||||
top_k=8,
|
||||
api_key=provider_bundle.embedding_runtime.api_key if provider_bundle.embedding_runtime is not None else None,
|
||||
api_base=provider_bundle.embedding_runtime.api_base if provider_bundle.embedding_runtime is not None else None,
|
||||
model=provider_bundle.embedding_runtime.model if provider_bundle.embedding_runtime is not None else None,
|
||||
extra_headers=(
|
||||
provider_bundle.embedding_runtime.extra_headers
|
||||
if provider_bundle.embedding_runtime is not None
|
||||
else None
|
||||
),
|
||||
timeout_seconds=(
|
||||
provider_bundle.embedding_runtime.request_timeout_seconds
|
||||
if provider_bundle.embedding_runtime is not None
|
||||
else None
|
||||
),
|
||||
fallback_top_k=8,
|
||||
)
|
||||
if not candidates:
|
||||
return []
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
candidate_names = {item["name"] for item in candidates}
|
||||
try:
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"Select published Beaver skills for one generic sub-agent node. "
|
||||
"Return only a JSON array of skill names. Do not invent names. "
|
||||
"If none of the candidates directly match the required guidance, return []."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"Node skill query:\n{query}\n\n"
|
||||
f"Candidate skills:\n{self._render_candidates(candidates)}\n\n"
|
||||
"Return only JSON, for example: [\"skill-a\"] or []"
|
||||
),
|
||||
},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=512,
|
||||
temperature=0,
|
||||
)
|
||||
parsed = self._parse_names(response.content or "")
|
||||
except Exception:
|
||||
parsed = []
|
||||
selected: list[str] = []
|
||||
for name in parsed:
|
||||
if name in candidate_names and name not in selected:
|
||||
selected.append(name)
|
||||
return selected
|
||||
|
||||
@staticmethod
|
||||
def _generic_node(
|
||||
node: ExecutionNode,
|
||||
*,
|
||||
pinned_skill_names: list[str],
|
||||
metadata: dict[str, Any],
|
||||
pinned_skill_contexts: list[Any] | None = None,
|
||||
) -> ExecutionNode:
|
||||
return replace(
|
||||
node,
|
||||
agent=AgentDescriptor(
|
||||
name=node.node_id,
|
||||
role="",
|
||||
system_prompt="",
|
||||
metadata={
|
||||
**metadata,
|
||||
"sub_agent_kind": "generic_skill_worker",
|
||||
},
|
||||
),
|
||||
inherited_pinned_skills=pinned_skill_names,
|
||||
inherited_pinned_skill_contexts=list(pinned_skill_contexts or node.inherited_pinned_skill_contexts),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _render_candidates(candidates: list[dict[str, str]]) -> str:
|
||||
return "\n".join(f"- {item['name']}: {item['description']}" for item in candidates)
|
||||
|
||||
@staticmethod
|
||||
def _parse_names(content: str) -> list[str]:
|
||||
cleaned = content.strip()
|
||||
if cleaned.startswith("```"):
|
||||
lines = cleaned.splitlines()
|
||||
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
||||
cleaned = "\n".join(lines[1:-1]).strip()
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
try:
|
||||
payload = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
if isinstance(payload, dict):
|
||||
for key in ("skills", "selected_skills", "selected"):
|
||||
value = payload.get(key)
|
||||
if isinstance(value, list):
|
||||
payload = value
|
||||
break
|
||||
if not isinstance(payload, list):
|
||||
return []
|
||||
return [str(item).strip() for item in payload if str(item).strip()]
|
||||
|
||||
|
||||
def _merge_names(parent: list[str], selected: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
for name in [*parent, *selected]:
|
||||
if name and name not in result:
|
||||
result.append(name)
|
||||
return result
|
||||
100
app-instance/backend/beaver/tasks/store.py
Normal file
100
app-instance/backend/beaver/tasks/store.py
Normal file
@ -0,0 +1,100 @@
|
||||
"""File-backed internal task store."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .models import TaskEvent, TaskRecord
|
||||
|
||||
|
||||
class TaskStore:
|
||||
def __init__(self, root: str | Path) -> None:
|
||||
self.root = Path(root)
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
self.tasks_path = self.root / "tasks.json"
|
||||
self.events_path = self.root / "events.jsonl"
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def list_tasks(self) -> list[TaskRecord]:
|
||||
with self._lock:
|
||||
payload = self._read_tasks_unlocked()
|
||||
return [TaskRecord.from_dict(item) for item in payload.values()]
|
||||
|
||||
def get_task(self, task_id: str) -> TaskRecord | None:
|
||||
with self._lock:
|
||||
payload = self._read_tasks_unlocked().get(task_id)
|
||||
return TaskRecord.from_dict(payload) if isinstance(payload, dict) else None
|
||||
|
||||
def get_task_by_run_id(self, run_id: str) -> TaskRecord | None:
|
||||
for task in self.list_tasks():
|
||||
if run_id in task.run_ids:
|
||||
return task
|
||||
return None
|
||||
|
||||
def get_latest_open_task(self, session_id: str) -> TaskRecord | None:
|
||||
tasks = [
|
||||
task
|
||||
for task in self.list_tasks()
|
||||
if task.session_id == session_id and task.status in {"awaiting_feedback", "needs_revision", "open", "running"}
|
||||
]
|
||||
if not tasks:
|
||||
return None
|
||||
return sorted(tasks, key=lambda item: item.updated_at)[-1]
|
||||
|
||||
def upsert_task(self, task: TaskRecord) -> None:
|
||||
with self._lock:
|
||||
payload = self._read_tasks_unlocked()
|
||||
payload[task.task_id] = task.to_dict()
|
||||
self._write_tasks_unlocked(payload)
|
||||
|
||||
def append_event(self, event: TaskEvent) -> None:
|
||||
self.events_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with self._lock:
|
||||
with self.events_path.open("a", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps(event.to_dict(), ensure_ascii=False, sort_keys=True) + "\n")
|
||||
|
||||
def list_events(self, task_id: str | None = None) -> list[TaskEvent]:
|
||||
if not self.events_path.exists():
|
||||
return []
|
||||
results: list[TaskEvent] = []
|
||||
for line in self.events_path.read_text(encoding="utf-8").splitlines():
|
||||
cleaned = line.strip()
|
||||
if not cleaned:
|
||||
continue
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
event = TaskEvent.from_dict(payload)
|
||||
if task_id is not None and event.task_id != task_id:
|
||||
continue
|
||||
results.append(event)
|
||||
return results
|
||||
|
||||
def _read_tasks_unlocked(self) -> dict[str, dict[str, Any]]:
|
||||
if not self.tasks_path.exists():
|
||||
return {}
|
||||
payload = json.loads(self.tasks_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(payload, dict):
|
||||
return {}
|
||||
tasks = payload.get("tasks", payload)
|
||||
if not isinstance(tasks, dict):
|
||||
return {}
|
||||
return {str(key): dict(value) for key, value in tasks.items() if isinstance(value, dict)}
|
||||
|
||||
def _write_tasks_unlocked(self, payload: dict[str, dict[str, Any]]) -> None:
|
||||
self.tasks_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_name = tempfile.mkstemp(prefix=".tasks-", suffix=".json", dir=str(self.tasks_path.parent))
|
||||
tmp_path = Path(tmp_name)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as handle:
|
||||
json.dump({"tasks": payload}, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
||||
handle.write("\n")
|
||||
os.replace(tmp_path, self.tasks_path)
|
||||
finally:
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
138
app-instance/backend/beaver/tasks/validation.py
Normal file
138
app-instance/backend/beaver/tasks/validation.py
Normal file
@ -0,0 +1,138 @@
|
||||
"""Automatic validation for internal Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
|
||||
|
||||
class ValidationService:
|
||||
async def validate_task_result(
|
||||
self,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
transcript_excerpt: str = "",
|
||||
tool_summaries: list[str] | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
) -> ValidationResult:
|
||||
provider = None
|
||||
model = None
|
||||
if provider_bundle is not None:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
if provider is not None:
|
||||
try:
|
||||
return await self._validate_with_provider(
|
||||
provider=provider,
|
||||
model=model,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
final_output=final_output,
|
||||
transcript_excerpt=transcript_excerpt,
|
||||
tool_summaries=tool_summaries or [],
|
||||
team_summaries=team_summaries or [],
|
||||
)
|
||||
except Exception as exc:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.0,
|
||||
issues=[f"Validator failed: {exc}"],
|
||||
missing_requirements=["A valid automatic validation result is required before accepting the task."],
|
||||
recommended_revision_prompt=(
|
||||
"Review the task result again because automatic validation failed, "
|
||||
"then provide a corrected final answer that explicitly satisfies the task goal."
|
||||
),
|
||||
validator="llm_error",
|
||||
)
|
||||
return self._heuristic_validate(final_output)
|
||||
|
||||
async def _validate_with_provider(
|
||||
self,
|
||||
*,
|
||||
provider: Any,
|
||||
model: str | None,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
transcript_excerpt: str,
|
||||
tool_summaries: list[str],
|
||||
team_summaries: list[str],
|
||||
) -> ValidationResult:
|
||||
prompt = (
|
||||
"Validate whether the assistant output satisfies the task. "
|
||||
"Return only compact JSON with keys: passed, score, issues, "
|
||||
"missing_requirements, recommended_revision_prompt.\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Transcript excerpt:\n{transcript_excerpt[:2500]}\n\n"
|
||||
f"Tool summaries:\n{json.dumps(tool_summaries[:12], ensure_ascii=False)}\n\n"
|
||||
f"Team summaries:\n{json.dumps(team_summaries[:12], ensure_ascii=False)}\n\n"
|
||||
f"Assistant final output:\n{final_output[:4000]}"
|
||||
)
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a strict task result validator."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=800,
|
||||
temperature=0.0,
|
||||
)
|
||||
payload = self._parse_json_object(response.content or "")
|
||||
return ValidationResult(
|
||||
passed=bool(payload.get("passed")),
|
||||
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||
validator="llm",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _heuristic_validate(final_output: str) -> ValidationResult:
|
||||
text = final_output.strip()
|
||||
if not text:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.0,
|
||||
issues=["Assistant output is empty."],
|
||||
missing_requirements=["A non-empty result is required."],
|
||||
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
|
||||
validator="heuristic",
|
||||
)
|
||||
lowered = text.lower()
|
||||
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.35,
|
||||
issues=["The run did not complete cleanly."],
|
||||
missing_requirements=["A successful final result is required."],
|
||||
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
|
||||
validator="heuristic",
|
||||
)
|
||||
return ValidationResult(passed=True, score=0.85, validator="heuristic")
|
||||
|
||||
@staticmethod
|
||||
def _parse_json_object(text: str) -> dict[str, Any]:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.strip("`")
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("validator response must be a JSON object")
|
||||
return payload
|
||||
@ -100,7 +100,8 @@ class ToolAssembler:
|
||||
|
||||
result: list[str] = []
|
||||
for skill in activated_skills:
|
||||
for name in skills_loader.get_skill_tool_hints(skill.name):
|
||||
names = list(skill.tool_hints) if getattr(skill, "tool_hints", None) else skills_loader.get_skill_tool_hints(skill.name)
|
||||
for name in names:
|
||||
if name not in result:
|
||||
result.append(name)
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user