feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核

新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证
(通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。

实现Agent Team v1协调器,支持sequence/parallel/dag执行策略,
sub-agent复用主AgentLoop,每个run使用独立memory snapshot。

建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期,
通过Task验证通过且用户满意才生成学习候选。

重构目录结构,移除third_party依赖,建立统一engine内核,
所有agent共享运行时基础组件。

更新ContextBuilder清理provider消息字段,增强SkillContext版本管理,
集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
2026-05-08 17:14:14 +08:00
parent 5ba5c7e4c1
commit 8a12c30141
93 changed files with 16724 additions and 1247 deletions

View File

@ -1,2 +1,34 @@
"""Multi-agent coordination layer."""
from .models import (
AgentDescriptor,
DelegationEnvelope,
ExecutionGraph,
ExecutionNode,
NodeRunResult,
TeamRunResult,
)
def __getattr__(name: str):
if name == "LocalAgentRunner":
from .local import LocalAgentRunner
return LocalAgentRunner
if name == "TeamGraphScheduler":
from .execution import TeamGraphScheduler
return TeamGraphScheduler
raise AttributeError(name)
__all__ = [
"AgentDescriptor",
"DelegationEnvelope",
"ExecutionGraph",
"ExecutionNode",
"LocalAgentRunner",
"NodeRunResult",
"TeamGraphScheduler",
"TeamRunResult",
]

View File

@ -1,2 +1,5 @@
"""Execution control, retry, and aggregation."""
from .scheduler import TeamGraphScheduler
__all__ = ["TeamGraphScheduler"]

View File

@ -0,0 +1,256 @@
"""Minimal scheduler for Beaver-native team execution graphs."""
from __future__ import annotations
import asyncio
from collections.abc import Callable
from typing import TYPE_CHECKING
from beaver.engine.providers import ProviderBundle
from ..local import LocalAgentRunner
from ..models import DelegationEnvelope, ExecutionGraph, ExecutionNode, NodeRunResult, TeamRunResult
if TYPE_CHECKING:
from beaver.engine.context import SkillContext
class TeamGraphScheduler:
"""Execute sequence, parallel, and DAG team graphs."""
def __init__(self, runner: LocalAgentRunner) -> None:
self.runner = runner
async def run(
self,
graph: ExecutionGraph,
*,
parent_task_id: str | None,
parent_session_id: str,
parent_run_id: str | None = None,
provider_bundle: ProviderBundle | None = None,
provider_bundle_factory: Callable[[ExecutionNode], ProviderBundle | None] | None = None,
inherited_pinned_skills: list[str] | None = None,
inherited_pinned_skill_contexts: list["SkillContext"] | None = None,
learning_candidate_enabled: bool = False,
) -> TeamRunResult:
graph.validate()
if provider_bundle is not None and len(graph.nodes) > 1:
raise ValueError("provider_bundle can only be used for single-node team graphs; use provider_bundle_factory")
inherited = list(inherited_pinned_skills or [])
inherited_contexts = list(inherited_pinned_skill_contexts or [])
if graph.strategy == "sequence":
results = await self._run_sequence(
graph.nodes,
parent_task_id=parent_task_id,
parent_session_id=parent_session_id,
parent_run_id=parent_run_id,
provider_bundle=provider_bundle,
provider_bundle_factory=provider_bundle_factory,
inherited_pinned_skills=inherited,
inherited_pinned_skill_contexts=inherited_contexts,
learning_candidate_enabled=learning_candidate_enabled,
)
elif graph.strategy == "parallel":
results = await self._run_parallel(
graph.nodes,
parent_task_id=parent_task_id,
parent_session_id=parent_session_id,
parent_run_id=parent_run_id,
provider_bundle=provider_bundle,
provider_bundle_factory=provider_bundle_factory,
inherited_pinned_skills=inherited,
inherited_pinned_skill_contexts=inherited_contexts,
learning_candidate_enabled=learning_candidate_enabled,
)
else:
results = await self._run_dag(
graph.nodes,
parent_task_id=parent_task_id,
parent_session_id=parent_session_id,
parent_run_id=parent_run_id,
provider_bundle=provider_bundle,
provider_bundle_factory=provider_bundle_factory,
inherited_pinned_skills=inherited,
inherited_pinned_skill_contexts=inherited_contexts,
learning_candidate_enabled=learning_candidate_enabled,
)
return self._summarize(results, task_id=parent_task_id)
async def _run_sequence(
self,
nodes: list[ExecutionNode],
**kwargs,
) -> list[NodeRunResult]:
results: list[NodeRunResult] = []
for node in nodes:
if any(not item.success for item in results):
results.append(self._blocked(node, results))
continue
dependency_outputs = {item.node_id: item.output_text for item in results if item.success}
results.append(await self._run_node(node, dependency_outputs=dependency_outputs, **kwargs))
return results
async def _run_parallel(
self,
nodes: list[ExecutionNode],
**kwargs,
) -> list[NodeRunResult]:
return list(await asyncio.gather(*(self._run_node(node, dependency_outputs={}, **kwargs) for node in nodes)))
async def _run_dag(
self,
nodes: list[ExecutionNode],
**kwargs,
) -> list[NodeRunResult]:
pending = {node.node_id: node for node in nodes}
completed: dict[str, NodeRunResult] = {}
ordered: list[NodeRunResult] = []
while pending:
blocked_ids = {
node_id
for node_id, node in pending.items()
if any(dep in completed and not completed[dep].success for dep in node.depends_on)
}
for node_id in sorted(blocked_ids):
node = pending.pop(node_id)
result = self._blocked(node, list(completed.values()))
completed[node_id] = result
ordered.append(result)
ready = [
node
for node in pending.values()
if all(dep in completed and completed[dep].success for dep in node.depends_on)
]
if not ready:
if pending:
unresolved = ", ".join(sorted(pending))
raise ValueError(f"ExecutionGraph has cyclic or unresolved dependencies: {unresolved}")
break
batch = await asyncio.gather(
*(
self._run_node(
node,
dependency_outputs={
dep: completed[dep].output_text
for dep in node.depends_on
if dep in completed
},
**kwargs,
)
for node in ready
)
)
for result in batch:
pending.pop(result.node_id, None)
completed[result.node_id] = result
ordered.append(result)
return ordered
async def _run_node(
self,
node: ExecutionNode,
*,
parent_task_id: str | None,
parent_session_id: str,
parent_run_id: str | None,
provider_bundle: ProviderBundle | None,
provider_bundle_factory: Callable[[ExecutionNode], ProviderBundle | None] | None,
inherited_pinned_skills: list[str],
inherited_pinned_skill_contexts: list["SkillContext"],
learning_candidate_enabled: bool,
dependency_outputs: dict[str, str],
) -> NodeRunResult:
try:
pinned = self._merge_pinned(inherited_pinned_skills, node.inherited_pinned_skills)
pinned_contexts = self._merge_skill_contexts(
inherited_pinned_skill_contexts,
node.inherited_pinned_skill_contexts,
)
envelope = DelegationEnvelope(
parent_task_id=parent_task_id,
parent_session_id=parent_session_id,
parent_run_id=parent_run_id,
agent=node.agent,
task=node.task,
inherited_pinned_skills=pinned,
inherited_pinned_skill_contexts=pinned_contexts,
constraints=list(node.constraints),
expected_output=node.expected_output,
node_id=node.node_id,
dependency_outputs=dict(dependency_outputs),
)
node_provider_bundle = provider_bundle_factory(node) if provider_bundle_factory is not None else provider_bundle
return await self.runner.run(
envelope,
provider_bundle=node_provider_bundle,
learning_candidate_enabled=learning_candidate_enabled,
)
except asyncio.CancelledError:
raise
except Exception as exc:
return NodeRunResult(
node_id=node.node_id,
success=False,
output_text="",
finish_reason="error",
error=str(exc),
)
@staticmethod
def _merge_pinned(parent: list[str], local: list[str]) -> list[str]:
result: list[str] = []
for name in [*parent, *local]:
if name and name not in result:
result.append(name)
return result
@staticmethod
def _merge_skill_contexts(parent: list["SkillContext"], local: list["SkillContext"]) -> list["SkillContext"]:
result: list["SkillContext"] = []
seen: set[str] = set()
for skill in [*parent, *local]:
name = getattr(skill, "name", "")
if not name or name in seen:
continue
seen.add(name)
result.append(skill)
return result
@staticmethod
def _blocked(node: ExecutionNode, prior_results: list[NodeRunResult]) -> NodeRunResult:
failed = [item.node_id for item in prior_results if not item.success]
detail = ", ".join(failed) or "unknown dependency"
return NodeRunResult(
node_id=node.node_id,
success=False,
output_text="",
finish_reason="blocked",
error=f"Blocked by failed dependency: {detail}",
)
@staticmethod
def _summarize(results: list[NodeRunResult], *, task_id: str | None) -> TeamRunResult:
success = all(item.success for item in results)
successful_outputs = [item.output_text.strip() for item in results if item.success and item.output_text.strip()]
summary_parts = list(successful_outputs)
failed = [item for item in results if not item.success]
if failed:
failure_lines = [
f"- {item.node_id}: {item.error or item.finish_reason}"
for item in failed
]
summary_parts.append("Failed nodes:\n" + "\n".join(failure_lines))
summary = "\n\n".join(summary_parts)
return TeamRunResult(
success=success,
summary=summary,
node_results=results,
run_ids=[item.run_id for item in results if item.run_id],
session_ids=[item.session_id for item in results if item.session_id],
task_id=task_id,
)

View File

@ -0,0 +1,92 @@
"""Local delegated-agent runner built on the shared AgentLoop."""
from __future__ import annotations
from uuid import uuid4
from beaver.engine import AgentLoop
from beaver.engine.providers import ProviderBundle
from .models import DelegationEnvelope, NodeRunResult
class LocalAgentRunner:
"""Run delegated agents through the same AgentLoop implementation."""
def __init__(self, loop: AgentLoop) -> None:
self.loop = loop
async def run(
self,
envelope: DelegationEnvelope,
*,
provider_bundle: ProviderBundle | None = None,
learning_candidate_enabled: bool = False,
) -> NodeRunResult:
if provider_bundle is not None and (envelope.agent.model or envelope.agent.provider_name):
raise ValueError(
"provider_bundle cannot be combined with AgentDescriptor.model/provider_name; "
"build a node-specific provider bundle instead."
)
child_session_id = self._child_session_id(envelope)
runner = self.loop.submit_direct if self.loop.is_running else self.loop.process_direct
result = await runner(
envelope.task,
session_id=child_session_id,
parent_session_id=envelope.parent_session_id,
source=f"team:{envelope.agent.name}",
title=envelope.agent.role or envelope.agent.name,
execution_context=self._execution_context(envelope),
model=envelope.agent.model,
provider_name=envelope.agent.provider_name,
provider_bundle=provider_bundle,
task_id=envelope.parent_task_id,
task_mode=bool(envelope.parent_task_id),
pinned_skill_names=envelope.inherited_pinned_skills,
pinned_skill_contexts=envelope.inherited_pinned_skill_contexts,
learning_candidate_enabled=learning_candidate_enabled,
)
success = result.finish_reason == "stop"
return NodeRunResult(
node_id=envelope.node_id or envelope.agent.name,
success=success,
output_text=result.output_text,
run_id=result.run_id,
session_id=result.session_id,
finish_reason=result.finish_reason,
error=None if success else (result.output_text or result.finish_reason),
)
@staticmethod
def _child_session_id(envelope: DelegationEnvelope) -> str:
node = envelope.node_id or envelope.agent.name or "node"
return f"{envelope.parent_session_id}:team:{node}:{uuid4().hex[:8]}"
@staticmethod
def _execution_context(envelope: DelegationEnvelope) -> str:
sections: list[str] = []
if envelope.parent_task_id:
sections.append(f"Parent task ID: {envelope.parent_task_id}")
if envelope.parent_run_id:
sections.append(f"Parent run ID: {envelope.parent_run_id}")
sections.append("Delegated worker: generic task sub-agent. Follow active pinned skills as the primary guidance.")
if envelope.agent.system_prompt:
sections.append(f"Additional delegated instructions:\n{envelope.agent.system_prompt}")
if envelope.constraints:
sections.append("Constraints:\n" + "\n".join(f"- {item}" for item in envelope.constraints))
if envelope.expected_output:
sections.append(f"Expected output:\n{envelope.expected_output}")
if envelope.dependency_outputs:
rendered = "\n\n".join(
f"Dependency {node_id} output:\n{output}"
for node_id, output in envelope.dependency_outputs.items()
)
sections.append("Dependency outputs:\n" + rendered)
if envelope.inherited_pinned_skills:
sections.append("Pinned inherited skills:\n" + "\n".join(f"- {item}" for item in envelope.inherited_pinned_skills))
if envelope.inherited_pinned_skill_contexts:
sections.append(
"Ephemeral pinned skill drafts:\n"
+ "\n".join(f"- {item.name} ({item.version})" for item in envelope.inherited_pinned_skill_contexts)
)
return "\n\n".join(sections)

View File

@ -0,0 +1,151 @@
"""Core models for Beaver team coordination."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Literal
if TYPE_CHECKING:
from beaver.engine.context import SkillContext
TeamStrategy = Literal[
"sequence",
"parallel",
"dag",
"moa",
"hierarchy",
"heavy",
"group_chat",
"forest",
"maker",
"router",
]
@dataclass(slots=True)
class AgentDescriptor:
"""Runtime identity for a delegated local agent."""
name: str
role: str = ""
system_prompt: str = ""
model: str | None = None
provider_name: str | None = None
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass(slots=True)
class DelegationEnvelope:
"""All context passed from a parent agent run to one delegated run."""
parent_task_id: str | None
parent_session_id: str
parent_run_id: str | None
agent: AgentDescriptor
task: str
inherited_pinned_skills: list[str] = field(default_factory=list)
inherited_pinned_skill_contexts: list["SkillContext"] = field(default_factory=list)
constraints: list[str] = field(default_factory=list)
expected_output: str | None = None
node_id: str | None = None
dependency_outputs: dict[str, str] = field(default_factory=dict)
@dataclass(slots=True)
class ExecutionNode:
"""One node in a team execution graph."""
node_id: str
task: str
agent: AgentDescriptor
depends_on: list[str] = field(default_factory=list)
inherited_pinned_skills: list[str] = field(default_factory=list)
inherited_pinned_skill_contexts: list["SkillContext"] = field(default_factory=list)
constraints: list[str] = field(default_factory=list)
expected_output: str | None = None
@dataclass(slots=True)
class ExecutionGraph:
"""A lightweight team graph built from Beaver-native execution nodes."""
strategy: TeamStrategy
nodes: list[ExecutionNode]
def validate(self) -> None:
if self.strategy not in {"sequence", "parallel", "dag"}:
raise NotImplementedError(f"Team strategy {self.strategy!r} is reserved but not implemented in v1")
if not self.nodes:
raise ValueError("ExecutionGraph requires at least one node")
node_ids = [node.node_id for node in self.nodes]
if len(node_ids) != len(set(node_ids)):
raise ValueError("ExecutionGraph node_id values must be unique")
known = set(node_ids)
for node in self.nodes:
missing = [item for item in node.depends_on if item not in known]
if missing:
raise ValueError(f"ExecutionNode {node.node_id!r} depends on unknown node(s): {missing}")
visiting: set[str] = set()
visited: set[str] = set()
deps = {node.node_id: list(node.depends_on) for node in self.nodes}
def visit(node_id: str) -> None:
if node_id in visited:
return
if node_id in visiting:
raise ValueError(f"ExecutionGraph has cyclic or unresolved dependencies involving {node_id!r}")
visiting.add(node_id)
for dep in deps[node_id]:
visit(dep)
visiting.remove(node_id)
visited.add(node_id)
for node_id in node_ids:
visit(node_id)
@dataclass(slots=True)
class NodeRunResult:
"""Normalized result for one team node."""
node_id: str
success: bool
output_text: str
run_id: str | None = None
session_id: str | None = None
finish_reason: str = "stop"
error: str | None = None
def to_dict(self) -> dict[str, Any]:
return {
"node_id": self.node_id,
"success": self.success,
"output_text": self.output_text,
"run_id": self.run_id,
"session_id": self.session_id,
"finish_reason": self.finish_reason,
"error": self.error,
}
@dataclass(slots=True)
class TeamRunResult:
"""Normalized result returned by a Beaver team run."""
success: bool
summary: str
node_results: list[NodeRunResult] = field(default_factory=list)
run_ids: list[str] = field(default_factory=list)
session_ids: list[str] = field(default_factory=list)
task_id: str | None = None
def to_dict(self) -> dict[str, Any]:
return {
"success": self.success,
"summary": self.summary,
"node_results": [item.to_dict() for item in self.node_results],
"run_ids": list(self.run_ids),
"session_ids": list(self.session_ids),
"task_id": self.task_id,
}

View File

@ -1,2 +1,14 @@
"""Agent registry and descriptors."""
"""Workspace specialist agent registry."""
from .models import AgentMatch, RegisteredAgent, TargetResolutionReport
from .resolver import TargetResolver
from .store import AgentRegistry
__all__ = [
"AgentMatch",
"AgentRegistry",
"RegisteredAgent",
"TargetResolutionReport",
"TargetResolver",
]

View File

@ -0,0 +1,184 @@
"""Workspace agent registry models."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any, Literal
from beaver.coordinator.models import AgentDescriptor
AgentRegistryStatus = Literal["active", "disabled"]
AgentRegistrySource = Literal["builtin", "workspace", "learned"]
@dataclass(slots=True)
class RegisteredAgent:
agent_id: str
name: str
display_name: str
role: str
description: str
system_prompt: str
capabilities: list[str] = field(default_factory=list)
skill_names: list[str] = field(default_factory=list)
tool_hints: list[str] = field(default_factory=list)
model: str | None = None
provider_name: str | None = None
tags: list[str] = field(default_factory=list)
priority: int = 0
status: AgentRegistryStatus = "active"
source: AgentRegistrySource = "workspace"
metadata: dict[str, Any] = field(default_factory=dict)
created_at: str = field(default_factory=lambda: _utc_now())
updated_at: str = field(default_factory=lambda: _utc_now())
def to_descriptor(self) -> AgentDescriptor:
return AgentDescriptor(
name=self.name,
role=self.role,
system_prompt=self.system_prompt,
model=self.model,
provider_name=self.provider_name,
metadata={
**self.metadata,
"agent_id": self.agent_id,
"display_name": self.display_name,
"description": self.description,
"capabilities": list(self.capabilities),
"skill_names": list(self.skill_names),
"tool_hints": list(self.tool_hints),
"tags": list(self.tags),
"source": self.source,
"resolution": "registered",
},
)
def to_dict(self) -> dict[str, Any]:
return {
"agent_id": self.agent_id,
"name": self.name,
"display_name": self.display_name,
"role": self.role,
"description": self.description,
"system_prompt": self.system_prompt,
"capabilities": list(self.capabilities),
"skill_names": list(self.skill_names),
"tool_hints": list(self.tool_hints),
"model": self.model,
"provider_name": self.provider_name,
"tags": list(self.tags),
"priority": self.priority,
"status": self.status,
"source": self.source,
"metadata": dict(self.metadata),
"created_at": self.created_at,
"updated_at": self.updated_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "RegisteredAgent":
now = _utc_now()
agent_id = str(payload.get("agent_id") or payload.get("id") or payload.get("name") or "").strip()
if not agent_id:
raise ValueError("RegisteredAgent requires agent_id")
name = str(payload.get("name") or agent_id).strip()
return cls(
agent_id=agent_id,
name=name,
display_name=str(payload.get("display_name") or payload.get("displayName") or name).strip(),
role=str(payload.get("role") or "").strip(),
description=str(payload.get("description") or "").strip(),
system_prompt=str(payload.get("system_prompt") or payload.get("systemPrompt") or "").strip(),
capabilities=_string_list(payload.get("capabilities")),
skill_names=_string_list(payload.get("skill_names") or payload.get("skillNames")),
tool_hints=_string_list(payload.get("tool_hints") or payload.get("toolHints")),
model=_optional_str(payload.get("model")),
provider_name=_optional_str(payload.get("provider_name") or payload.get("providerName")),
tags=_string_list(payload.get("tags")),
priority=int(payload.get("priority", 0) or 0),
status="disabled" if str(payload.get("status") or "active") == "disabled" else "active",
source=_source(payload.get("source")),
metadata=dict(payload.get("metadata") or {}),
created_at=str(payload.get("created_at") or payload.get("createdAt") or now),
updated_at=str(payload.get("updated_at") or payload.get("updatedAt") or now),
)
@dataclass(slots=True)
class AgentMatch:
agent_id: str
score: float
reasons: list[str]
matched_capabilities: list[str]
resolved_descriptor: AgentDescriptor
def to_dict(self) -> dict[str, Any]:
return {
"agent_id": self.agent_id,
"score": self.score,
"reasons": list(self.reasons),
"matched_capabilities": list(self.matched_capabilities),
"resolved_descriptor": {
"name": self.resolved_descriptor.name,
"role": self.resolved_descriptor.role,
"model": self.resolved_descriptor.model,
"provider_name": self.resolved_descriptor.provider_name,
"metadata": dict(self.resolved_descriptor.metadata),
},
}
@dataclass(slots=True)
class TargetResolutionReport:
node_id: str
requested_role: str
requested_capabilities: list[str]
selected_agent_id: str | None
fallback_used: bool
score: float
reason: str
def to_dict(self) -> dict[str, Any]:
return {
"node_id": self.node_id,
"requested_role": self.requested_role,
"requested_capabilities": list(self.requested_capabilities),
"selected_agent_id": self.selected_agent_id,
"fallback_used": self.fallback_used,
"score": self.score,
"reason": self.reason,
}
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
text = str(value).strip()
return text or None
def _string_list(value: Any) -> list[str]:
if not isinstance(value, list):
if isinstance(value, str):
value = [item.strip() for item in value.split(",")]
else:
return []
result: list[str] = []
for item in value:
text = str(item).strip()
if text and text not in result:
result.append(text)
return result
def _source(value: Any) -> AgentRegistrySource:
text = str(value or "workspace").strip()
if text in {"builtin", "workspace", "learned"}:
return text # type: ignore[return-value]
return "workspace"

View File

@ -0,0 +1,208 @@
"""Resolve planner node requirements to registered specialist agents."""
from __future__ import annotations
from dataclasses import replace
from typing import Any, TYPE_CHECKING
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from .models import AgentMatch, RegisteredAgent, TargetResolutionReport
from .store import AgentRegistry
if TYPE_CHECKING:
from beaver.tasks.models import TaskRecord
class TargetResolver:
def __init__(self, registry: AgentRegistry) -> None:
self.registry = registry
def resolve_graph(
self,
graph: ExecutionGraph,
*,
task: "TaskRecord",
user_message: str,
attempt_index: int,
) -> tuple[ExecutionGraph, list[TargetResolutionReport]]:
reports: list[TargetResolutionReport] = []
resolved_nodes: list[ExecutionNode] = []
for node in graph.nodes:
descriptor, report = self.resolve_node(
node,
task=task,
user_message=user_message,
attempt_index=attempt_index,
)
resolved_nodes.append(replace(node, agent=descriptor))
reports.append(report)
return ExecutionGraph(strategy=graph.strategy, nodes=resolved_nodes), reports
def resolve_node(
self,
node: ExecutionNode,
*,
task: "TaskRecord",
user_message: str,
attempt_index: int,
) -> tuple[AgentDescriptor, TargetResolutionReport]:
requested_role = (node.agent.role or node.agent.name or node.node_id).strip()
requested_capabilities = [
str(item).strip()
for item in node.agent.metadata.get("requested_capabilities", [])
if str(item).strip()
]
requested_tags = [
str(item).strip()
for item in node.agent.metadata.get("requested_tags", [])
if str(item).strip()
]
pinned_skills = list(node.inherited_pinned_skills)
match = self.best_match(
requested_role=requested_role,
requested_capabilities=requested_capabilities,
requested_tags=requested_tags,
pinned_skills=pinned_skills,
task_text=" ".join([task.goal, task.description, user_message, node.task]),
)
if match is not None and match.score > 0:
descriptor = match.resolved_descriptor
descriptor.metadata.update(
{
"node_id": node.node_id,
"attempt_index": attempt_index,
"requested_role": requested_role,
"requested_capabilities": requested_capabilities,
}
)
return descriptor, TargetResolutionReport(
node_id=node.node_id,
requested_role=requested_role,
requested_capabilities=requested_capabilities,
selected_agent_id=match.agent_id,
fallback_used=False,
score=match.score,
reason="; ".join(match.reasons),
)
fallback = AgentDescriptor(
name=node.agent.name or node.node_id,
role=node.agent.role,
system_prompt=node.agent.system_prompt,
model=node.agent.model,
provider_name=node.agent.provider_name,
metadata={
**node.agent.metadata,
"node_id": node.node_id,
"attempt_index": attempt_index,
"requested_role": requested_role,
"requested_capabilities": requested_capabilities,
"resolution": "fallback_ephemeral",
},
)
return fallback, TargetResolutionReport(
node_id=node.node_id,
requested_role=requested_role,
requested_capabilities=requested_capabilities,
selected_agent_id=None,
fallback_used=True,
score=0.0,
reason="no active registered specialist matched planner requirements",
)
def best_match(
self,
*,
requested_role: str,
requested_capabilities: list[str],
requested_tags: list[str],
pinned_skills: list[str],
task_text: str,
) -> AgentMatch | None:
matches = [
self._score_agent(
agent,
requested_role=requested_role,
requested_capabilities=requested_capabilities,
requested_tags=requested_tags,
pinned_skills=pinned_skills,
task_text=task_text,
)
for agent in self.registry.list_active_agents()
]
matches = [match for match in matches if match.score > 0]
if not matches:
return None
matches.sort(key=lambda item: (item.score, item.resolved_descriptor.metadata.get("priority", 0)), reverse=True)
return matches[0]
def _score_agent(
self,
agent: RegisteredAgent,
*,
requested_role: str,
requested_capabilities: list[str],
requested_tags: list[str],
pinned_skills: list[str],
task_text: str,
) -> AgentMatch:
score = 0.0
reasons: list[str] = []
requested_role_terms = _terms(requested_role)
capability_terms = _terms(" ".join(requested_capabilities))
tag_terms = _terms(" ".join(requested_tags))
skill_terms = _terms(" ".join(pinned_skills))
task_terms = _terms(task_text)
agent_role_terms = _terms(agent.role + " " + agent.name + " " + agent.display_name)
agent_capability_terms = _terms(" ".join(agent.capabilities))
agent_tag_terms = _terms(" ".join(agent.tags))
agent_skill_terms = _terms(" ".join(agent.skill_names))
agent_all_terms = (
agent_role_terms
| agent_capability_terms
| agent_tag_terms
| agent_skill_terms
| _terms(agent.description)
)
role_hits = requested_role_terms & agent_role_terms
if role_hits:
score += 60 + 5 * len(role_hits)
reasons.append(f"role matched: {', '.join(sorted(role_hits))}")
capability_hits = capability_terms & agent_capability_terms
if capability_hits:
score += 30 + 5 * len(capability_hits)
reasons.append(f"capabilities matched: {', '.join(sorted(capability_hits))}")
tag_hits = tag_terms & agent_tag_terms
if tag_hits:
score += 10 + 3 * len(tag_hits)
reasons.append(f"tags matched: {', '.join(sorted(tag_hits))}")
skill_hits = skill_terms & agent_skill_terms
if skill_hits:
score += 25 + 5 * len(skill_hits)
reasons.append(f"skills matched: {', '.join(sorted(skill_hits))}")
task_hits = task_terms & agent_all_terms
if task_hits:
score += min(20, len(task_hits) * 2)
reasons.append("task text matched registry profile")
score += agent.priority / 100.0
descriptor = agent.to_descriptor()
descriptor.metadata["priority"] = agent.priority
return AgentMatch(
agent_id=agent.agent_id,
score=round(score, 3),
reasons=reasons or ["priority fallback"],
matched_capabilities=sorted(capability_hits),
resolved_descriptor=descriptor,
)
def _terms(value: Any) -> set[str]:
text = str(value or "")
normalized = "".join(ch.lower() if ch.isalnum() else " " for ch in text)
return {part for part in normalized.split() if part}

View File

@ -0,0 +1,185 @@
"""File-backed workspace agent registry."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from .models import RegisteredAgent
class AgentRegistry:
def __init__(self, workspace: str | Path) -> None:
self.workspace = Path(workspace)
self.path = self.workspace / "agents" / "registry.json"
self.path.parent.mkdir(parents=True, exist_ok=True)
if not self.path.exists():
self._write_agents(_builtin_agents())
def list_agents(self, *, include_disabled: bool = True) -> list[RegisteredAgent]:
agents = self._read_agents()
if include_disabled:
return agents
return [agent for agent in agents if agent.status == "active"]
def list_active_agents(self) -> list[RegisteredAgent]:
return self.list_agents(include_disabled=False)
def get_agent(self, agent_id: str) -> RegisteredAgent | None:
needle = agent_id.strip()
for agent in self.list_agents():
if agent.agent_id == needle:
return agent
return None
def upsert_agent(self, payload: dict[str, Any] | RegisteredAgent) -> RegisteredAgent:
agent = payload if isinstance(payload, RegisteredAgent) else RegisteredAgent.from_dict(payload)
agents = self.list_agents()
for index, existing in enumerate(agents):
if existing.agent_id == agent.agent_id:
if existing.source == "builtin" and agent.source == "workspace":
agent.source = "builtin"
agent.created_at = existing.created_at
agents[index] = agent
self._write_agents(agents)
return agent
agents.append(agent)
self._write_agents(agents)
return agent
def disable_agent(self, agent_id: str) -> RegisteredAgent:
agents = self.list_agents()
for index, agent in enumerate(agents):
if agent.agent_id != agent_id:
continue
agent.status = "disabled"
agents[index] = agent
self._write_agents(agents)
return agent
raise ValueError(f"Unknown agent_id: {agent_id}")
def search(
self,
*,
role: str = "",
capabilities: list[str] | None = None,
tags: list[str] | None = None,
skills: list[str] | None = None,
) -> list[RegisteredAgent]:
role_terms = _terms(role)
capability_terms = set(_terms(" ".join(capabilities or [])))
tag_terms = set(_terms(" ".join(tags or [])))
skill_terms = set(_terms(" ".join(skills or [])))
matches: list[RegisteredAgent] = []
for agent in self.list_active_agents():
haystack = set(
_terms(
" ".join(
[
agent.agent_id,
agent.name,
agent.display_name,
agent.role,
agent.description,
" ".join(agent.capabilities),
" ".join(agent.tags),
" ".join(agent.skill_names),
]
)
)
)
if role_terms and not role_terms.intersection(haystack):
continue
if capability_terms and not capability_terms.intersection(haystack):
continue
if tag_terms and not tag_terms.intersection(haystack):
continue
if skill_terms and not skill_terms.intersection(haystack):
continue
matches.append(agent)
return matches
def _read_agents(self) -> list[RegisteredAgent]:
if not self.path.exists():
return []
payload = json.loads(self.path.read_text(encoding="utf-8"))
raw_agents = payload.get("agents") if isinstance(payload, dict) else payload
if not isinstance(raw_agents, list):
return []
return [RegisteredAgent.from_dict(item) for item in raw_agents if isinstance(item, dict)]
def _write_agents(self, agents: list[RegisteredAgent]) -> None:
self.path.parent.mkdir(parents=True, exist_ok=True)
payload = {"version": 1, "agents": [agent.to_dict() for agent in agents]}
self.path.write_text(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
def _terms(text: str) -> set[str]:
normalized = "".join(ch.lower() if ch.isalnum() else " " for ch in text)
return {part for part in normalized.split() if part}
def _builtin_agents() -> list[RegisteredAgent]:
return [
RegisteredAgent(
agent_id="researcher",
name="researcher",
display_name="Researcher",
role="research",
description="Finds facts, references, constraints, and implementation options.",
system_prompt="You are a research specialist. Gather concise evidence and tradeoffs for the parent task.",
capabilities=["research", "analysis", "source review", "requirements"],
tags=["planning", "research"],
priority=50,
source="builtin",
),
RegisteredAgent(
agent_id="implementer",
name="implementer",
display_name="Implementer",
role="implementation",
description="Builds scoped implementation slices and proposes concrete changes.",
system_prompt="You are an implementation specialist. Produce practical, scoped implementation output.",
capabilities=["implementation", "coding", "refactor", "integration"],
tags=["coding", "build"],
priority=45,
source="builtin",
),
RegisteredAgent(
agent_id="reviewer",
name="reviewer",
display_name="Reviewer",
role="review",
description="Reviews plans, code, outputs, and risks before final synthesis.",
system_prompt="You are a review specialist. Focus on defects, missing requirements, and risks.",
capabilities=["review", "quality", "risk", "verification"],
tags=["review", "quality"],
priority=45,
source="builtin",
),
RegisteredAgent(
agent_id="tester",
name="tester",
display_name="Tester",
role="testing",
description="Designs and executes verification checks for task outputs.",
system_prompt="You are a testing specialist. Identify focused checks and report pass/fail evidence.",
capabilities=["testing", "verification", "regression", "qa"],
tags=["test", "quality"],
priority=40,
source="builtin",
),
RegisteredAgent(
agent_id="documenter",
name="documenter",
display_name="Documenter",
role="documentation",
description="Writes and reconciles user-facing and internal documentation updates.",
system_prompt="You are a documentation specialist. Produce concise docs aligned with the implementation.",
capabilities=["documentation", "explanation", "migration notes", "release notes"],
tags=["docs", "communication"],
priority=35,
source="builtin",
),
]

View File

@ -1,2 +1,19 @@
"""Team models and orchestration objects."""
from ..models import (
AgentDescriptor,
DelegationEnvelope,
ExecutionGraph,
ExecutionNode,
NodeRunResult,
TeamRunResult,
)
__all__ = [
"AgentDescriptor",
"DelegationEnvelope",
"ExecutionGraph",
"ExecutionNode",
"NodeRunResult",
"TeamRunResult",
]

View File

@ -42,6 +42,10 @@ class SkillContext:
name: str
content: str
version: str = "legacy"
content_hash: str = ""
activation_reason: str = "selected"
tool_hints: list[str] = field(default_factory=list)
@dataclass(slots=True)
@ -197,7 +201,7 @@ class ContextBuilder:
# 如果上游 history 已经混入 system 消息,这里要主动跳过,避免双 system。
if message.get("role") == "system":
continue
messages.append(dict(message))
messages.append(self._provider_history_message(message))
if build_input.current_user_input is not None:
messages.append(
@ -212,6 +216,16 @@ class ContextBuilder:
messages=messages,
)
@staticmethod
def _provider_history_message(message: dict[str, Any]) -> dict[str, Any]:
"""Keep persisted UI/audit fields out of provider message payloads."""
allowed = {"role", "content", "tool_calls", "tool_call_id", "name"}
clean = {key: value for key, value in message.items() if key in allowed}
if "name" not in clean and message.get("tool_name"):
clean["name"] = message.get("tool_name")
return clean
def add_tool_result(
self,
messages: list[dict[str, Any]],
@ -322,7 +336,7 @@ class ContextBuilder:
{
"role": "user",
"content": (
f'[SYSTEM: The "{skill.name}" skill is active for this run. '
f'[SYSTEM: The "{skill.name}" skill (version {skill.version}) is active for this run. '
"Follow its instructions as active guidance unless the user overrides them.]\n\n"
f"{content}"
),

View File

@ -7,11 +7,23 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable
from beaver.coordinator.registry import AgentRegistry
from beaver.engine.context import ContextBuilder
from beaver.engine.session import SessionManager
from beaver.foundation.config import BeaverConfig, load_config
from beaver.memory.curated.store import MemoryStore
from beaver.memory.runs import RunMemoryStore
from beaver.memory.skills import SkillLearningStore
from beaver.services.memory_service import MemoryService
from beaver.skills.drafts import DraftService
from beaver.skills.learning import EvidenceSelector, SkillDraftSynthesizer, SkillLearningPipelineService, SkillLearningService
from beaver.skills.learning.safety import SkillDraftSafetyChecker
from beaver.skills.learning.eval import SkillDraftEvaluator
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
from beaver.tasks import TaskExecutionPlanner, TaskService, ValidationService
from beaver.tasks.skill_resolver import TaskSkillResolver
from beaver.skills import SkillAssembler, SkillsLoader
from beaver.tools import ObjectBackedTool, ToolAssembler, ToolExecutor, ToolRegistry
from beaver.tools.builtins import (
@ -45,12 +57,25 @@ class EngineLoadResult:
session_manager: SessionManager | None = None
curated_memory_store: MemoryStore | None = None
memory_service: MemoryService | None = None
run_memory_store: RunMemoryStore | None = None
skill_learning_store: SkillLearningStore | None = None
tool_registry: ToolRegistry | None = None
tool_assembler: ToolAssembler | None = None
tool_executor: ToolExecutor | None = None
context_builder: ContextBuilder | None = None
skills_loader: SkillsLoader | None = None
skill_assembler: SkillAssembler | None = None
skill_spec_store: SkillSpecStore | None = None
draft_service: DraftService | None = None
review_service: ReviewService | None = None
skill_publisher: SkillPublisher | None = None
skill_learning_service: SkillLearningService | None = None
skill_learning_pipeline: SkillLearningPipelineService | None = None
agent_registry: AgentRegistry | None = None
task_skill_resolver: TaskSkillResolver | None = None
task_service: TaskService | None = None
task_execution_planner: TaskExecutionPlanner | None = None
validation_service: ValidationService | None = None
closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
closed: bool = False
@ -106,11 +131,24 @@ class EngineLoader:
session_manager: SessionManager | None = None,
curated_memory_store: MemoryStore | None = None,
memory_service: MemoryService | None = None,
run_memory_store: RunMemoryStore | None = None,
skill_learning_store: SkillLearningStore | None = None,
tool_registry: ToolRegistry | None = None,
tool_assembler: ToolAssembler | None = None,
context_builder: ContextBuilder | None = None,
skills_loader: SkillsLoader | None = None,
skill_assembler: SkillAssembler | None = None,
skill_spec_store: SkillSpecStore | None = None,
draft_service: DraftService | None = None,
review_service: ReviewService | None = None,
skill_publisher: SkillPublisher | None = None,
skill_learning_service: SkillLearningService | None = None,
skill_learning_pipeline: SkillLearningPipelineService | None = None,
agent_registry: AgentRegistry | None = None,
task_skill_resolver: TaskSkillResolver | None = None,
task_service: TaskService | None = None,
task_execution_planner: TaskExecutionPlanner | None = None,
validation_service: ValidationService | None = None,
) -> None:
self.config = config or load_config(workspace=workspace, config_path=config_path)
configured_workspace = self.config.agents_defaults.workspace
@ -119,11 +157,24 @@ class EngineLoader:
self._session_manager = session_manager
self._curated_memory_store = curated_memory_store
self._memory_service = memory_service
self._run_memory_store = run_memory_store
self._skill_learning_store = skill_learning_store
self._tool_registry = tool_registry
self._tool_assembler = tool_assembler
self._context_builder = context_builder
self._skills_loader = skills_loader
self._skill_assembler = skill_assembler
self._skill_spec_store = skill_spec_store
self._draft_service = draft_service
self._review_service = review_service
self._skill_publisher = skill_publisher
self._skill_learning_service = skill_learning_service
self._skill_learning_pipeline = skill_learning_pipeline
self._agent_registry = agent_registry
self._task_skill_resolver = task_skill_resolver
self._task_service = task_service
self._task_execution_planner = task_execution_planner
self._validation_service = validation_service
def load(self) -> EngineLoadResult:
"""装配当前主链需要的最小 runtime 对象。"""
@ -135,9 +186,12 @@ class EngineLoader:
curated_memory_store = self._curated_memory_store or MemoryStore(curated_root)
memory_service = self._memory_service or MemoryService(curated_root, store=curated_memory_store)
memory_service.initialize()
run_memory_store = self._run_memory_store or RunMemoryStore(workspace / "memory" / "runs")
skill_learning_store = self._skill_learning_store or SkillLearningStore(workspace / "memory" / "skills")
tool_registry = self._tool_registry or ToolRegistry()
skills_loader = self._skills_loader or SkillsLoader(workspace)
skill_spec_store = self._skill_spec_store or SkillSpecStore(workspace)
skills_loader = self._skills_loader or SkillsLoader(workspace, skill_store=skill_spec_store)
if self._tool_registry is None:
# 这里先注册最小工具集,满足主链的 tool loop。
tool_registry.register_many(
@ -156,6 +210,36 @@ class EngineLoader:
tool_assembler = self._tool_assembler or ToolAssembler()
tool_executor = ToolExecutor(tool_registry)
skill_assembler = self._skill_assembler or SkillAssembler(skills_loader)
draft_service = self._draft_service or DraftService(skill_spec_store)
review_service = self._review_service or ReviewService(skill_spec_store)
skill_publisher = self._skill_publisher or SkillPublisher(skill_spec_store)
evidence_selector = EvidenceSelector(run_memory_store, session_manager=session_manager)
skill_learning_service = self._skill_learning_service or SkillLearningService(
run_store=run_memory_store,
learning_store=skill_learning_store,
draft_service=draft_service,
evidence_selector=evidence_selector,
synthesizer=SkillDraftSynthesizer(),
)
skill_learning_pipeline = self._skill_learning_pipeline or SkillLearningPipelineService(
learning_store=skill_learning_store,
learning_service=skill_learning_service,
draft_service=draft_service,
review_service=review_service,
publisher=skill_publisher,
safety_checker=SkillDraftSafetyChecker(
allowed_tool_names={spec.name for spec in tool_registry.list_specs()}
),
evaluator=SkillDraftEvaluator(run_memory_store),
)
agent_registry = self._agent_registry or AgentRegistry(workspace)
task_skill_resolver = self._task_skill_resolver or TaskSkillResolver(
skills_loader=skills_loader,
draft_service=draft_service,
)
task_service = self._task_service or TaskService(workspace / "tasks")
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
validation_service = self._validation_service or ValidationService()
result = EngineLoadResult(
workspace=workspace,
@ -167,12 +251,25 @@ class EngineLoader:
session_manager=session_manager,
curated_memory_store=memory_service.get_store(),
memory_service=memory_service,
run_memory_store=run_memory_store,
skill_learning_store=skill_learning_store,
tool_registry=tool_registry,
tool_assembler=tool_assembler,
tool_executor=tool_executor,
context_builder=context_builder,
skills_loader=skills_loader,
skill_assembler=skill_assembler,
skill_spec_store=skill_spec_store,
draft_service=draft_service,
review_service=review_service,
skill_publisher=skill_publisher,
skill_learning_service=skill_learning_service,
skill_learning_pipeline=skill_learning_pipeline,
agent_registry=agent_registry,
task_skill_resolver=task_skill_resolver,
task_service=task_service,
task_execution_planner=task_execution_planner,
validation_service=validation_service,
)
if self._session_manager is None:
result.register_closeable("session_manager", session_manager.close)

View File

@ -4,10 +4,15 @@ from __future__ import annotations
import asyncio
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
from uuid import uuid4
from beaver.engine.context import ContextBuildInput, SessionContext
from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
from beaver.memory.runs import RunRecord, SkillEffectRecord
from beaver.skills.learning import RunReceiptContext
from beaver.skills.catalog.utils import strip_frontmatter
from beaver.skills.specs import SkillActivationReceipt
from beaver.engine.providers import ProviderBundle, make_provider_bundle
from beaver.tools import ToolContext
@ -38,6 +43,9 @@ class AgentRunResult:
provider_name: str | None = None
model: str | None = None
usage: dict[str, Any] = field(default_factory=dict)
task_id: str | None = None
task_status: str | None = None
validation_result: dict[str, Any] | None = None
@dataclass(slots=True)
@ -196,6 +204,13 @@ class AgentLoop:
temperature: float | None = None,
max_tool_iterations: int | None = None,
provider_bundle: ProviderBundle | None = None,
parent_session_id: str | None = None,
task_id: str | None = None,
task_mode: bool = False,
attempt_index: int | None = None,
pinned_skill_names: list[str] | None = None,
pinned_skill_contexts: list[SkillContext] | None = None,
learning_candidate_enabled: bool = False,
) -> AgentRunResult:
"""跑通最小 direct run 主链。
@ -233,6 +248,13 @@ class AgentLoop:
temperature=temperature,
max_tool_iterations=max_tool_iterations,
provider_bundle=provider_bundle,
parent_session_id=parent_session_id,
task_id=task_id,
task_mode=task_mode,
attempt_index=attempt_index,
pinned_skill_names=pinned_skill_names,
pinned_skill_contexts=pinned_skill_contexts,
learning_candidate_enabled=learning_candidate_enabled,
)
async def _process_direct_impl(
@ -258,6 +280,13 @@ class AgentLoop:
temperature: float | None = None,
max_tool_iterations: int | None = None,
provider_bundle: ProviderBundle | None = None,
parent_session_id: str | None = None,
task_id: str | None = None,
task_mode: bool = False,
attempt_index: int | None = None,
pinned_skill_names: list[str] | None = None,
pinned_skill_contexts: list[SkillContext] | None = None,
learning_candidate_enabled: bool = False,
) -> AgentRunResult:
"""真正执行一轮 direct run 的内部实现。
@ -276,6 +305,7 @@ class AgentLoop:
tool_executor = self._require_loaded("tool_executor")
skills_loader = self._require_loaded("skills_loader")
skill_assembler = self._require_loaded("skill_assembler")
skill_learning_service = self._require_loaded("skill_learning_service")
config = loaded.config
configured_provider = config.resolve_provider_target(model=model, provider_name=provider_name)
@ -296,16 +326,24 @@ class AgentLoop:
self.profile.max_tool_iterations if max_tool_iterations is None else max_tool_iterations
)
# 每次新运行开始前都通过 MemoryService 刷新 live state。
# 这样 memory policy 会收口在 service而不是散在 loop 里
memory_service.reload_for_new_run()
# 每个 run 都捕获自己的 frozen snapshot不能依赖 MemoryService
# 上的共享 `_snapshot`,否则 parallel team runs 会互相覆盖
memory_snapshot = memory_service.capture_snapshot_for_run()
if parent_session_id:
session_manager.ensure_session(
parent_session_id,
source="unknown",
model=resolved_model,
user_id=user_id,
)
session_manager.ensure_session(
resolved_session_id,
source=source,
model=resolved_model,
title=title,
user_id=user_id,
parent_session_id=parent_session_id,
)
session_manager.append_message(
resolved_session_id,
@ -316,6 +354,12 @@ class AgentLoop:
"source": source,
"model": resolved_model,
"agent_name": self.profile.name,
"task_id": task_id,
"task_mode": task_mode,
"attempt_index": attempt_index,
"parent_session_id": parent_session_id,
"pinned_skill_names": list(pinned_skill_names or []),
"pinned_skill_context_names": [skill.name for skill in pinned_skill_contexts or []],
},
content=task,
context_visible=False,
@ -330,6 +374,8 @@ class AgentLoop:
final_usage: dict[str, Any] = {}
final_provider_name: str | None = resolved_provider_name
final_model: str | None = resolved_model
run_started_at = self._utc_now()
activated_receipts: list[SkillActivationReceipt] = []
try:
bundle = provider_bundle or make_provider_bundle(
model=resolved_model,
@ -356,17 +402,38 @@ class AgentLoop:
model=skill_selector_model,
embedding_runtime=bundle.embedding_runtime,
)
skill_activation_messages = context_builder.build_skill_activation_messages(
assembled_skills.activated_skills
activated_skills = self._merge_skill_contexts(
[
*(pinned_skill_contexts or []),
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
],
assembled_skills.activated_skills,
)
skill_activation_messages = context_builder.build_skill_activation_messages(
activated_skills
)
activated_receipts = [
SkillActivationReceipt(
run_id=resolved_run_id,
session_id=resolved_session_id,
skill_name=skill.name,
skill_version=skill.version,
content_hash=skill.content_hash,
activated_at=self._utc_now(),
activation_reason=skill.activation_reason,
tool_hints=list(skill.tool_hints),
)
for skill in activated_skills
]
if skill_activation_messages:
if skill_activation_messages or activated_receipts:
session_manager.append_message(
resolved_session_id,
run_id=resolved_run_id,
role="system",
event_type="skill_activation_snapshotted",
event_payload={
"receipts": [receipt.to_dict() for receipt in activated_receipts],
"activation_messages": skill_activation_messages,
},
content="\n\n".join(message["content"] for message in skill_activation_messages) or None,
@ -381,7 +448,7 @@ class AgentLoop:
task_description=task,
registry=tool_registry,
skills_loader=skills_loader,
activated_skills=assembled_skills.activated_skills,
activated_skills=activated_skills,
embedding_runtime=bundle.embedding_runtime,
top_k=10,
)
@ -407,13 +474,14 @@ class AgentLoop:
base_system_prompt=self.profile.system_prompt,
history=session_manager.get_history(resolved_session_id),
current_user_input=task,
memory_snapshot=memory_service.get_snapshot(),
activated_skills=assembled_skills.activated_skills,
memory_snapshot=memory_snapshot,
activated_skills=activated_skills,
session_context=SessionContext(
session_id=resolved_session_id,
source=source,
model=resolved_model,
user_id=user_id,
parent_session_id=parent_session_id,
),
execution_context=execution_context,
)
@ -491,6 +559,7 @@ class AgentLoop:
run_id=resolved_run_id,
role="assistant",
event_type="assistant_message_added",
event_payload={"task_id": task_id} if task_id else None,
content=response.content,
tool_calls=assistant_tool_calls or None,
finish_reason=response.finish_reason,
@ -520,6 +589,7 @@ class AgentLoop:
run_id=resolved_run_id,
role="assistant",
event_type="assistant_message_added",
event_payload={"task_id": task_id} if task_id else None,
content=final_text,
finish_reason=final_finish_reason,
source=source,
@ -568,6 +638,9 @@ class AgentLoop:
event_payload={
"finish_reason": final_finish_reason,
"tool_iterations": iterations,
"task_id": task_id,
"task_mode": task_mode,
"attempt_index": attempt_index,
},
content=final_text,
finish_reason=final_finish_reason,
@ -577,6 +650,21 @@ class AgentLoop:
model=final_model,
user_id=user_id,
)
self._record_skill_learning(
skill_learning_service=skill_learning_service,
session_manager=session_manager,
session_id=resolved_session_id,
run_id=resolved_run_id,
task=task,
run_started_at=run_started_at,
run_ended_at=self._utc_now(),
finish_reason=final_finish_reason,
activated_receipts=activated_receipts,
success=(final_finish_reason == "stop"),
task_id=task_id,
attempt_index=attempt_index,
generate_candidates=learning_candidate_enabled,
)
return AgentRunResult(
session_id=resolved_session_id,
run_id=resolved_run_id,
@ -586,6 +674,7 @@ class AgentLoop:
provider_name=final_provider_name,
model=final_model,
usage=final_usage,
task_id=task_id,
)
except Exception as exc:
if not user_message_recorded:
@ -600,7 +689,7 @@ class AgentLoop:
model=resolved_model,
user_id=user_id,
)
return self._build_error_result(
result = self._build_error_result(
session_manager=session_manager,
session_id=resolved_session_id,
run_id=resolved_run_id,
@ -612,7 +701,24 @@ class AgentLoop:
tool_iterations=iterations,
provider_name=final_provider_name,
usage=final_usage,
task_id=task_id,
)
self._record_skill_learning(
skill_learning_service=skill_learning_service,
session_manager=session_manager,
session_id=resolved_session_id,
run_id=resolved_run_id,
task=task,
run_started_at=run_started_at,
run_ended_at=self._utc_now(),
finish_reason="error",
activated_receipts=activated_receipts,
success=False,
task_id=task_id,
attempt_index=attempt_index,
generate_candidates=learning_candidate_enabled,
)
return result
def _require_loaded(self, field_name: str) -> Any:
loaded = self.boot()
@ -621,6 +727,46 @@ class AgentLoop:
raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
return value
@staticmethod
def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
contexts: list[SkillContext] = []
seen: set[str] = set()
for name in skill_names:
normalized = str(name).strip()
if not normalized or normalized in seen:
continue
seen.add(normalized)
record = skills_loader.get_skill_record(normalized)
raw_content = skills_loader.load_published_skill(normalized)
content = strip_frontmatter(raw_content).strip() if raw_content else ""
if record is None or not content:
raise ValueError(f"Pinned skill {normalized!r} is not available for delegated execution")
contexts.append(
SkillContext(
name=normalized,
content=content,
version=record.version,
content_hash=record.content_hash or "",
activation_reason="pinned_delegation",
tool_hints=list(record.tool_hints),
)
)
return contexts
@staticmethod
def _merge_skill_contexts(
pinned_skills: list[SkillContext],
open_skills: list[SkillContext],
) -> list[SkillContext]:
result: list[SkillContext] = []
seen: set[str] = set()
for skill in [*pinned_skills, *open_skills]:
if skill.name in seen:
continue
seen.add(skill.name)
result.append(skill)
return result
@staticmethod
def _serialize_tool_calls(tool_calls: list[Any]) -> list[dict[str, Any]]:
payload: list[dict[str, Any]] = []
@ -683,6 +829,7 @@ class AgentLoop:
tool_iterations: int,
provider_name: str | None,
usage: dict[str, Any],
task_id: str | None = None,
) -> AgentRunResult:
"""把主链中的未处理异常收口成可追踪的 assistant error turn。"""
@ -691,6 +838,7 @@ class AgentLoop:
run_id=run_id,
role="assistant",
event_type="assistant_message_added",
event_payload={"task_id": task_id} if task_id else None,
content=message,
finish_reason="error",
source=source,
@ -706,6 +854,7 @@ class AgentLoop:
event_payload={
"tool_iterations": tool_iterations,
"provider_name": provider_name,
"task_id": task_id,
},
content=message,
finish_reason="error",
@ -724,4 +873,87 @@ class AgentLoop:
provider_name=provider_name,
model=model,
usage=usage,
task_id=task_id,
)
@staticmethod
def _record_skill_learning(
*,
skill_learning_service: Any,
session_manager: Any,
session_id: str,
run_id: str,
task: str,
run_started_at: str,
run_ended_at: str,
finish_reason: str,
activated_receipts: list[SkillActivationReceipt],
success: bool,
task_id: str | None = None,
attempt_index: int | None = None,
generate_candidates: bool = False,
) -> None:
run_record = RunRecord(
run_id=run_id,
session_id=session_id,
task_id=task_id,
attempt_index=attempt_index,
task_text=task,
started_at=run_started_at,
ended_at=run_ended_at,
success=success,
finish_reason=finish_reason,
feedback={},
activated_skills=list(activated_receipts),
)
effect_records = [
SkillEffectRecord(
run_id=run_id,
skill_name=receipt.skill_name,
skill_version=receipt.skill_version,
success=success,
feedback_score=None,
notes=finish_reason,
created_at=run_ended_at,
)
for receipt in activated_receipts
]
try:
candidates = skill_learning_service.collect_run_receipts(
RunReceiptContext(run_record=run_record, effect_records=effect_records),
generate_candidates=generate_candidates,
)
except Exception as exc: # pragma: no cover - defensive hot-path guard
session_manager.append_message(
session_id,
run_id=run_id,
role="system",
event_type="skill_effects_snapshot_failed",
event_payload={
"run_record": run_record.to_dict(),
"skill_effects": [item.to_dict() for item in effect_records],
"error": str(exc),
},
content=f"Skill learning receipt recording failed: {exc}",
context_visible=False,
)
return
session_manager.append_message(
session_id,
run_id=run_id,
role="system",
event_type="skill_effects_snapshotted",
event_payload={
"run_record": run_record.to_dict(),
"skill_effects": [item.to_dict() for item in effect_records],
"learning_candidates": [candidate.to_dict() for candidate in candidates],
"learning_candidate_enabled": generate_candidates,
},
content=f"Recorded {len(effect_records)} skill effect record(s).",
context_visible=False,
)
@staticmethod
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()

View File

@ -91,6 +91,19 @@ class SessionManager:
return self.store.get_run_event_records(session_id, run_id)
def update_latest_assistant_event_payload(
self,
session_id: str,
run_id: str,
updates: dict[str, Any],
) -> None:
"""把 run 级 UI 状态投影回最新 assistant 可见消息。"""
self.store.update_latest_assistant_event_payload(session_id, run_id, updates)
def set_run_context_visible(self, session_id: str, run_id: str, visible: bool) -> None:
self.store.set_run_context_visible(session_id, run_id, visible)
def list_run_ids(self, session_id: str) -> list[str]:
"""按出现顺序列出当前 session 的所有 run_id。"""

View File

@ -75,6 +75,19 @@ class MessageRecord:
"role": self.role,
"content": self.content,
}
if self.run_id:
payload["run_id"] = self.run_id
if self.event_payload:
if self.event_payload.get("task_id"):
payload["task_id"] = self.event_payload.get("task_id")
if self.event_payload.get("task_status"):
payload["task_status"] = self.event_payload.get("task_status")
if self.event_payload.get("validation_status"):
payload["validation_status"] = self.event_payload.get("validation_status")
if self.event_payload.get("feedback_state"):
payload["feedback_state"] = self.event_payload.get("feedback_state")
if self.event_payload.get("feedback_error"):
payload["feedback_error"] = self.event_payload.get("feedback_error")
if self.tool_name:
payload["tool_name"] = self.tool_name
if self.tool_calls:

View File

@ -432,6 +432,71 @@ class SessionStore:
)
return [MessageRecord.from_row(row) for row in rows]
def update_latest_assistant_event_payload(
self,
session_id: str,
run_id: str,
updates: dict[str, Any],
) -> None:
"""Merge payload fields into the latest visible assistant message for a run."""
if not updates:
return
def _do(conn: sqlite3.Connection) -> None:
row = conn.execute(
"""
SELECT id, event_payload
FROM messages
WHERE session_id = ?
AND run_id = ?
AND role = 'assistant'
AND event_type = 'assistant_message_added'
AND context_visible = 1
ORDER BY timestamp DESC, id DESC
LIMIT 1
""",
(session_id, run_id),
).fetchone()
if row is None:
return
payload: dict[str, Any] = {}
if row["event_payload"]:
try:
parsed = json.loads(row["event_payload"])
if isinstance(parsed, dict):
payload = parsed
except json.JSONDecodeError:
payload = {}
payload.update(updates)
conn.execute(
"""
UPDATE messages
SET event_payload = ?
WHERE id = ?
""",
(json.dumps(payload, ensure_ascii=False, sort_keys=True), row["id"]),
)
self._execute_write(_do)
def set_run_context_visible(self, session_id: str, run_id: str, visible: bool) -> None:
"""Set context visibility for all currently visible events in one run."""
def _do(conn: sqlite3.Connection) -> None:
conn.execute(
"""
UPDATE messages
SET context_visible = ?
WHERE session_id = ?
AND run_id = ?
AND context_visible != ?
""",
(1 if visible else 0, session_id, run_id, 1 if visible else 0),
)
self._execute_write(_do)
def get_messages_as_conversation(self, session_id: str) -> list[dict[str, Any]]:
messages: list[dict[str, Any]] = []
for record in self.get_event_records(session_id):

View File

@ -21,6 +21,16 @@ from beaver.interfaces.channels import ChannelAdapter, ChannelManager
from beaver.services.agent_service import AgentService
def _validate_gateway_service(service: AgentService) -> None:
"""Fail fast on injected service objects that do not satisfy gateway needs."""
handler = getattr(service, "handle_inbound_message", None)
if not callable(handler):
raise TypeError(
"Gateway requires a service with an async 'handle_inbound_message(inbound)' method"
)
async def _cleanup_owned_service(
service: AgentService,
*,
@ -125,6 +135,7 @@ async def run_gateway(
"""
attached_service = service or AgentService(workspace=workspace, config_path=config_path)
_validate_gateway_service(attached_service)
if channel_manager is not None and channels is not None:
raise ValueError("Pass either channel_manager or channels, not both")
if bus is not None:

View File

@ -2,16 +2,30 @@
from __future__ import annotations
import json
import asyncio
from collections.abc import AsyncIterator, Callable
from contextlib import asynccontextmanager, suppress
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from beaver.engine.providers.registry import PROVIDERS, find_by_name
from beaver.foundation.config import default_config_path, load_config
from beaver.services.agent_service import AgentService
from beaver.skills.learning import SkillLearningWorker, SkillLearningWorkerConfig
from .deps import get_agent_service
from .schemas import WebChatRequest, WebChatResponse, WebErrorResponse, WebStatusResponse
from .schemas import (
WebChatFeedbackRequest,
WebChatFeedbackResponse,
WebChatRequest,
WebChatResponse,
WebErrorResponse,
WebProviderConfigRequest,
WebProviderConfigResponse,
WebStatusResponse,
)
try:
from fastapi import FastAPI, HTTPException, Request
@ -50,6 +64,24 @@ except ModuleNotFoundError: # pragma: no cover - fallback for skeleton-only env
return decorator
def put(self, _path: str, **_kwargs: Any) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
return func
return decorator
def patch(self, _path: str, **_kwargs: Any) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
return func
return decorator
def delete(self, _path: str, **_kwargs: Any) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
return func
return decorator
@asynccontextmanager
async def _app_lifespan(
@ -82,9 +114,28 @@ async def _app_lifespan(
else:
attached_service.close()
raise
worker: SkillLearningWorker | None = None
worker_task = None
worker_config = SkillLearningWorkerConfig.from_env()
if owns_service and worker_config.enabled:
loaded = attached_service.create_loop().boot()
worker = SkillLearningWorker(
pipeline=loaded.skill_learning_pipeline, # type: ignore[arg-type]
provider_bundle_factory=lambda: attached_service._make_provider_bundle_for_task(loaded, {}), # noqa: SLF001
config=worker_config,
)
worker_task = asyncio.create_task(worker.run_forever())
app.state.skill_learning_worker = worker
app.state.skill_learning_worker_task = worker_task
try:
yield
finally:
if worker is not None:
worker.stop()
if worker_task is not None:
worker_task.cancel()
with suppress(BaseException):
await worker_task
if owns_service and started:
await attached_service.shutdown(
timeout_seconds=shutdown_timeout_seconds,
@ -133,6 +184,412 @@ def create_app(
mode="running" if running else ("direct" if agent_service.has_loop else "idle"),
)
@app.get("/api/status")
async def status(request: Request) -> dict[str, Any]:
agent_service = get_agent_service(request)
loaded = agent_service.create_loop().boot()
config = loaded.config
config_path = config.config_path or default_config_path(workspace=loaded.workspace)
providers_status = []
default_provider = config.resolve_provider_target().get("provider_name")
for spec in PROVIDERS:
provider_cfg = config.providers.get(spec.name)
enabled = provider_cfg is not None
api_key = provider_cfg.api_key if provider_cfg is not None else None
api_base = provider_cfg.api_base if provider_cfg is not None else None
if spec.is_oauth:
has_key = enabled
elif spec.is_local or spec.is_direct:
has_key = bool(api_base)
else:
has_key = bool(api_key)
providers_status.append(
{
"id": spec.name,
"name": spec.label,
"label": spec.label,
"enabled": enabled,
"active": default_provider == spec.name,
"has_key": has_key,
"api_key_masked": _mask_secret(api_key),
"api_base": api_base or "",
"default_api_base": spec.default_api_base,
"detail": api_base or spec.default_api_base or "",
"requires_api_key": not (spec.is_oauth or spec.is_local or spec.is_direct),
"is_oauth": spec.is_oauth,
"is_local": spec.is_local,
}
)
return {
"config_path": str(config_path),
"config_exists": config_path.exists(),
"workspace": str(loaded.workspace),
"workspace_exists": loaded.workspace.exists(),
"model": config.default_model or agent_service.profile.default_model,
"max_tokens": agent_service.profile.max_tokens,
"temperature": agent_service.profile.temperature,
"max_tool_iterations": agent_service.profile.max_tool_iterations,
"providers": providers_status,
"channels": [{"name": "web", "enabled": True}],
"cron": {"enabled": False, "jobs": 0, "next_wake_at_ms": None},
}
@app.post("/api/providers/{provider_name}/config", response_model=WebProviderConfigResponse)
async def update_provider_config(
provider_name: str,
request: Request,
payload: WebProviderConfigRequest,
) -> WebProviderConfigResponse:
spec = find_by_name(provider_name)
if spec is None:
raise HTTPException(status_code=404, detail=f"Unknown provider: {provider_name}")
agent_service = get_agent_service(request)
config_path = agent_service.loader.config.config_path or default_config_path(workspace=agent_service.loader.workspace)
raw = _read_config_json(config_path)
providers = _ensure_dict(raw, "providers")
agents = _ensure_dict(raw, "agents")
defaults = _ensure_dict(agents, "defaults")
if not payload.enabled:
providers.pop(spec.name, None)
if _clean_text(defaults.get("provider")) == spec.name:
defaults.pop("provider", None)
else:
current = providers.get(spec.name) if isinstance(providers.get(spec.name), dict) else {}
provider_payload = dict(current)
api_key = _clean_text(payload.api_key)
api_base = _clean_text(payload.api_base)
if api_key:
provider_payload["apiKey"] = api_key
elif "apiKey" not in provider_payload and "api_key" not in provider_payload:
provider_payload.pop("apiKey", None)
if api_base:
provider_payload["apiBase"] = api_base
elif spec.default_api_base and not provider_payload.get("apiBase") and not provider_payload.get("api_base"):
provider_payload["apiBase"] = spec.default_api_base
elif not api_base and not spec.default_api_base:
provider_payload.pop("apiBase", None)
if payload.request_timeout_seconds is not None:
provider_payload["requestTimeoutSeconds"] = payload.request_timeout_seconds
providers[spec.name] = provider_payload
defaults["provider"] = spec.name
model = _clean_text(payload.model)
if model:
defaults["model"] = model
_write_config_json(config_path, raw)
_reload_agent_config(agent_service, config_path)
return WebProviderConfigResponse(ok=True, provider=spec.name, enabled=payload.enabled)
@app.get("/api/sessions")
async def list_sessions(request: Request) -> list[dict[str, Any]]:
loaded = get_agent_service(request).create_loop().boot()
session_manager = loaded.session_manager
rows = session_manager.list_sessions_rich(limit=100, exclude_sources=["subagent"]) # type: ignore[union-attr]
return [
{
"key": str(row.get("id")),
"created_at": _iso_from_timestamp(row.get("started_at")),
"updated_at": _iso_from_timestamp(row.get("last_active")),
"path": str(row.get("id")),
}
for row in rows
]
@app.post("/api/sessions/{session_id:path}")
async def create_session(session_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
session_manager = loaded.session_manager
session = session_manager.get_or_create(session_id, source="web") # type: ignore[union-attr]
return _session_detail(session_manager, session_id, session) # type: ignore[arg-type]
@app.get("/api/sessions/{session_id:path}/process")
async def get_session_process(session_id: str, request: Request) -> dict[str, Any]:
from beaver.services.process_service import SessionProcessProjector
loaded = get_agent_service(request).create_loop().boot()
projector = SessionProcessProjector(
loaded.session_manager,
loaded.run_memory_store,
)
return projector.project(session_id)
@app.get("/api/sessions/{session_id:path}")
async def get_session(session_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
session_manager = loaded.session_manager
session = session_manager.get_or_create(session_id, source="web") # type: ignore[union-attr]
return _session_detail(session_manager, session_id, session) # type: ignore[arg-type]
@app.delete("/api/sessions/{session_id:path}")
async def delete_session(session_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
loaded.session_manager.end_session(session_id, "deleted") # type: ignore[union-attr]
return {"ok": True}
@app.get("/api/agents")
async def list_agents(request: Request) -> list[dict[str, Any]]:
loaded = get_agent_service(request).create_loop().boot()
return [_registered_agent_to_ui(agent) for agent in loaded.agent_registry.list_agents()] # type: ignore[union-attr]
@app.post("/api/agents")
async def upsert_agent(request: Request, payload: dict[str, Any]) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
agent = loaded.agent_registry.upsert_agent(_agent_payload_from_ui(payload)) # type: ignore[union-attr]
return _registered_agent_to_ui(agent)
@app.patch("/api/agents/{agent_id}")
async def patch_agent(agent_id: str, request: Request, payload: dict[str, Any]) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
registry = loaded.agent_registry
current = registry.get_agent(agent_id) # type: ignore[union-attr]
if current is None:
raise HTTPException(status_code=404, detail=f"Unknown agent: {agent_id}")
merged = current.to_dict()
merged.update(_agent_payload_from_ui(payload))
merged["agent_id"] = agent_id
agent = registry.upsert_agent(merged) # type: ignore[union-attr]
return _registered_agent_to_ui(agent)
@app.post("/api/agents/{agent_id}/disable")
async def disable_agent(agent_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
agent = loaded.agent_registry.disable_agent(agent_id) # type: ignore[union-attr]
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return _registered_agent_to_ui(agent)
@app.get("/api/skills")
async def list_skills(request: Request) -> list[dict[str, Any]]:
loaded = get_agent_service(request).create_loop().boot()
skills = loaded.skills_loader.list_skills(filter_unavailable=False) # type: ignore[union-attr]
return [
{
"name": record.name,
"description": record.description,
"source": "builtin" if record.source == "builtin" else "workspace",
"available": loaded.skills_loader._record_available(record), # type: ignore[union-attr]
"path": str(record.path),
"agent_cards": [],
}
for record in skills
]
@app.get("/api/skills/candidates")
async def list_skill_candidates(request: Request, status: str | None = None) -> list[dict[str, Any]]:
loaded = get_agent_service(request).create_loop().boot()
return [item.to_dict() for item in loaded.skill_learning_pipeline.list_candidates(status=status)] # type: ignore[union-attr]
@app.get("/api/skills/candidates/{candidate_id}")
async def get_skill_candidate(candidate_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
return loaded.skill_learning_pipeline.get_candidate(candidate_id).to_dict() # type: ignore[union-attr]
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
@app.post("/api/skills/candidates/{candidate_id}/draft")
async def synthesize_skill_draft(candidate_id: str, request: Request) -> dict[str, Any]:
agent_service = get_agent_service(request)
loaded = agent_service.create_loop().boot()
provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001
try:
draft = await loaded.skill_learning_pipeline.synthesize_draft( # type: ignore[union-attr]
candidate_id,
provider_bundle=provider_bundle,
)
loaded.skill_learning_pipeline.check_safety(draft.skill_name, draft.draft_id) # type: ignore[union-attr]
await loaded.skill_learning_pipeline.evaluate_draft( # type: ignore[union-attr]
candidate_id,
draft.skill_name,
draft.draft_id,
provider_bundle=provider_bundle,
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return draft.to_dict()
@app.post("/api/skills/candidates/{candidate_id}/regenerate")
async def regenerate_skill_draft(candidate_id: str, request: Request) -> dict[str, Any]:
agent_service = get_agent_service(request)
loaded = agent_service.create_loop().boot()
provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001
try:
draft = await loaded.skill_learning_pipeline.regenerate_draft( # type: ignore[union-attr]
candidate_id,
provider_bundle=provider_bundle,
)
loaded.skill_learning_pipeline.check_safety(draft.skill_name, draft.draft_id) # type: ignore[union-attr]
await loaded.skill_learning_pipeline.evaluate_draft( # type: ignore[union-attr]
candidate_id,
draft.skill_name,
draft.draft_id,
provider_bundle=provider_bundle,
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return draft.to_dict()
@app.post("/api/skills/learning/run-once")
async def run_skill_learning_once(request: Request) -> dict[str, Any]:
agent_service = get_agent_service(request)
loaded = agent_service.create_loop().boot()
worker = SkillLearningWorker(
pipeline=loaded.skill_learning_pipeline, # type: ignore[arg-type]
provider_bundle_factory=lambda: agent_service._make_provider_bundle_for_task(loaded, {}), # noqa: SLF001
config=SkillLearningWorkerConfig.from_env(),
)
result = await worker.run_once()
return result.to_dict()
@app.get("/api/skills/drafts")
async def list_skill_drafts(request: Request) -> list[dict[str, Any]]:
loaded = get_agent_service(request).create_loop().boot()
results = []
for item in loaded.skill_learning_pipeline.list_drafts(): # type: ignore[union-attr]
safety = loaded.skill_learning_pipeline.get_safety_report(item.skill_name, item.draft_id) # type: ignore[union-attr]
eval_report = loaded.skill_learning_pipeline.get_eval_report(item.skill_name, item.draft_id) # type: ignore[union-attr]
results.append(
{
**item.to_dict(),
"safety_report": safety.to_dict() if safety is not None else None,
"eval_report": eval_report.to_dict() if eval_report is not None else None,
}
)
return results
@app.get("/api/skills/{skill_name}/drafts/{draft_id}")
async def get_skill_draft(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
draft = loaded.skill_learning_pipeline.get_draft(skill_name, draft_id) # type: ignore[union-attr]
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return {
**draft.to_dict(),
"reviews": [
item.to_dict()
for item in loaded.skill_learning_pipeline.reviews_for_draft(skill_name, draft_id) # type: ignore[union-attr]
],
"safety_report": (
loaded.skill_learning_pipeline.get_safety_report(skill_name, draft_id).to_dict() # type: ignore[union-attr]
if loaded.skill_learning_pipeline.get_safety_report(skill_name, draft_id) is not None # type: ignore[union-attr]
else None
),
"eval_report": (
loaded.skill_learning_pipeline.get_eval_report(skill_name, draft_id).to_dict() # type: ignore[union-attr]
if loaded.skill_learning_pipeline.get_eval_report(skill_name, draft_id) is not None # type: ignore[union-attr]
else None
),
}
@app.get("/api/skills/{skill_name}/drafts/{draft_id}/safety")
async def get_skill_draft_safety(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
report = loaded.skill_learning_pipeline.get_safety_report(skill_name, draft_id) # type: ignore[union-attr]
if report is None:
raise HTTPException(status_code=404, detail="Safety report not found")
return report.to_dict()
@app.get("/api/skills/{skill_name}/drafts/{draft_id}/eval")
async def get_skill_draft_eval(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
report = loaded.skill_learning_pipeline.get_eval_report(skill_name, draft_id) # type: ignore[union-attr]
if report is None:
raise HTTPException(status_code=404, detail="Eval report not found")
return report.to_dict()
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/submit")
async def submit_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
review = loaded.skill_learning_pipeline.submit_review( # type: ignore[union-attr]
skill_name,
draft_id,
requested_by=str((payload or {}).get("requested_by") or "web"),
notes=str((payload or {}).get("notes") or ""),
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return review.to_dict()
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/approve")
async def approve_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
review = loaded.skill_learning_pipeline.approve( # type: ignore[union-attr]
skill_name,
draft_id,
reviewer=str((payload or {}).get("reviewer") or "web"),
notes=str((payload or {}).get("notes") or ""),
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return review.to_dict()
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/reject")
async def reject_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
review = loaded.skill_learning_pipeline.reject( # type: ignore[union-attr]
skill_name,
draft_id,
reviewer=str((payload or {}).get("reviewer") or "web"),
notes=str((payload or {}).get("notes") or ""),
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return review.to_dict()
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/publish")
async def publish_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
result = loaded.skill_learning_pipeline.publish( # type: ignore[union-attr]
skill_name,
draft_id,
publisher=str((payload or {}).get("publisher") or "web"),
notes=str((payload or {}).get("notes") or ""),
confirm_high_risk=bool((payload or {}).get("confirm_high_risk")),
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
return result.to_dict()
@app.post("/api/skills/{skill_name}/disable")
async def disable_skill(skill_name: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
spec = loaded.skill_learning_pipeline.disable( # type: ignore[union-attr]
skill_name,
actor=str((payload or {}).get("actor") or "web"),
reason=str((payload or {}).get("reason") or ""),
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return spec.to_dict()
@app.post("/api/skills/{skill_name}/rollback")
async def rollback_skill(skill_name: str, request: Request, payload: dict[str, Any]) -> dict[str, Any]:
target_version = str(payload.get("target_version") or "").strip()
if not target_version:
raise HTTPException(status_code=400, detail="target_version is required")
loaded = get_agent_service(request).create_loop().boot()
try:
spec = loaded.skill_learning_pipeline.rollback( # type: ignore[union-attr]
skill_name,
target_version,
actor=str(payload.get("actor") or "web"),
reason=str(payload.get("reason") or ""),
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
return spec.to_dict()
@app.post(
"/api/chat",
response_model=WebChatResponse,
@ -191,11 +648,132 @@ def create_app(
provider_name=result.provider_name,
model=result.model,
usage=result.usage,
task_id=result.task_id,
task_status=result.task_status,
validation_result=result.validation_result,
)
@app.post(
"/api/chat/feedback",
response_model=WebChatFeedbackResponse,
responses={
400: {"model": WebErrorResponse},
404: {"model": WebErrorResponse},
},
)
async def chat_feedback(request: Request, payload: WebChatFeedbackRequest) -> WebChatFeedbackResponse:
agent_service = get_agent_service(request)
try:
result = await agent_service.submit_feedback(
session_id=payload.session_id,
run_id=payload.run_id,
feedback_type=payload.feedback_type,
comment=payload.comment,
)
except ValueError as exc:
detail = str(exc)
status_code = 404 if "No internal task" in detail else 400
raise HTTPException(status_code=status_code, detail=detail) from exc
return WebChatFeedbackResponse(**result)
return app
def _session_detail(session_manager: Any, session_id: str, session: dict[str, Any]) -> dict[str, Any]:
messages = []
for event in session_manager.get_messages_as_conversation(session_id):
role = event.get("role")
if role not in {"user", "assistant"}:
continue
messages.append(
{
"role": role,
"content": event.get("content") or "",
"timestamp": _iso_from_timestamp(event.get("timestamp")),
"run_id": event.get("run_id"),
"task_id": event.get("task_id"),
"task_status": event.get("task_status"),
"validation_status": event.get("validation_status"),
"feedback_state": event.get("feedback_state"),
"feedback_error": event.get("feedback_error"),
}
)
return {
"key": session_id,
"messages": messages,
"created_at": _iso_from_timestamp(session.get("started_at")),
"updated_at": _iso_from_timestamp(session.get("last_active")),
}
def _iso_from_timestamp(value: Any) -> str:
from datetime import datetime, timezone
if value in (None, ""):
return datetime.now(timezone.utc).isoformat()
try:
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
except (TypeError, ValueError):
return str(value)
def _registered_agent_to_ui(agent: Any) -> dict[str, Any]:
return {
"id": agent.agent_id,
"name": agent.display_name or agent.name,
"description": agent.description,
"source": agent.source if agent.source in {"workspace", "skill", "builtin"} else "workspace",
"kind": "specialist",
"protocol": None,
"endpoint": None,
"base_url": None,
"card_url": None,
"auth_env": None,
"auth_mode": "none",
"auth_audience": None,
"auth_scopes": [],
"tags": list(agent.tags),
"aliases": [agent.name],
"metadata": {
**dict(agent.metadata),
"role": agent.role,
"capabilities": list(agent.capabilities),
"skill_names": list(agent.skill_names),
"tool_hints": list(agent.tool_hints),
"priority": agent.priority,
"status": agent.status,
},
"support_streaming": False,
}
def _agent_payload_from_ui(payload: dict[str, Any]) -> dict[str, Any]:
metadata = dict(payload.get("metadata") or {})
capabilities = payload.get("capabilities")
if capabilities is None and isinstance(metadata.get("capabilities"), list):
capabilities = metadata.get("capabilities")
role = payload.get("role") or metadata.get("role") or payload.get("kind") or ""
return {
"agent_id": payload.get("agent_id") or payload.get("id") or payload.get("name"),
"name": payload.get("name") or payload.get("id"),
"display_name": payload.get("display_name") or payload.get("name") or payload.get("id"),
"role": role,
"description": payload.get("description") or "",
"system_prompt": payload.get("system_prompt") or metadata.get("system_prompt") or "",
"capabilities": capabilities or [],
"skill_names": payload.get("skill_names") or metadata.get("skill_names") or [],
"tool_hints": payload.get("tool_hints") or metadata.get("tool_hints") or [],
"model": payload.get("model") or metadata.get("model"),
"provider_name": payload.get("provider_name") or metadata.get("provider_name"),
"tags": payload.get("tags") or [],
"priority": payload.get("priority") or metadata.get("priority") or 0,
"status": payload.get("status") or ("active" if payload.get("enabled", True) else "disabled"),
"source": payload.get("source") or "workspace",
"metadata": metadata,
}
def _model_dump(value: Any) -> dict[str, Any] | None:
"""兼容 Pydantic v1/v2 的最小导出辅助。"""
@ -206,3 +784,52 @@ def _model_dump(value: Any) -> dict[str, Any] | None:
if hasattr(value, "dict"):
return value.dict(exclude_none=True)
return dict(value)
def _clean_text(value: Any) -> str | None:
if value is None:
return None
text = str(value).strip()
return text or None
def _mask_secret(value: str | None) -> str:
secret = _clean_text(value)
if not secret:
return ""
if len(secret) <= 8:
return "••••"
return f"{secret[:4]}••••{secret[-4:]}"
def _read_config_json(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
data = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(data, dict):
raise ValueError(f"Config must be a JSON object: {path}")
return data
def _ensure_dict(parent: dict[str, Any], key: str) -> dict[str, Any]:
value = parent.get(key)
if not isinstance(value, dict):
value = {}
parent[key] = value
return value
def _write_config_json(path: Path, data: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
tmp_path = path.with_name(f"{path.name}.tmp")
tmp_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
tmp_path.replace(path)
def _reload_agent_config(agent_service: AgentService, config_path: Path) -> None:
config = load_config(config_path=config_path)
agent_service.loader.config = config
loop = getattr(agent_service, "_loop", None)
loaded = getattr(loop, "loaded", None) if loop is not None else None
if loaded is not None:
loaded.config = config

View File

@ -1,11 +1,25 @@
"""Web request and response schemas."""
from .chat import WebChatRequest, WebChatResponse, WebErrorResponse, WebProviderTarget, WebStatusResponse
from .chat import (
WebChatFeedbackRequest,
WebChatFeedbackResponse,
WebChatRequest,
WebChatResponse,
WebErrorResponse,
WebProviderConfigRequest,
WebProviderConfigResponse,
WebProviderTarget,
WebStatusResponse,
)
__all__ = [
"WebChatFeedbackRequest",
"WebChatFeedbackResponse",
"WebChatRequest",
"WebChatResponse",
"WebErrorResponse",
"WebProviderConfigRequest",
"WebProviderConfigResponse",
"WebProviderTarget",
"WebStatusResponse",
]

View File

@ -77,6 +77,47 @@ class WebChatResponse(BaseModel):
provider_name: str | None = None
model: str | None = None
usage: dict[str, Any] = Field(default_factory=dict)
task_id: str | None = None
task_status: str | None = None
validation_result: dict[str, Any] | None = None
class WebChatFeedbackRequest(BaseModel):
"""Feedback on the latest assistant result in chat."""
session_id: str
run_id: str
feedback_type: str
comment: str | None = None
class WebChatFeedbackResponse(BaseModel):
"""Feedback recording result."""
session_id: str
run_id: str
task_id: str
task_status: str
feedback_type: str
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
class WebProviderConfigRequest(BaseModel):
"""Provider config update from the status page."""
enabled: bool = True
model: str | None = None
api_key: str | None = None
api_base: str | None = None
request_timeout_seconds: float | None = None
class WebProviderConfigResponse(BaseModel):
"""Provider config update result."""
ok: bool
provider: str
enabled: bool
class WebStatusResponse(BaseModel):

View File

@ -1,2 +1,6 @@
"""Run records."""
from .models import RunOutcome, RunRecord, SkillEffectRecord
from .store import RunMemoryStore
__all__ = ["RunMemoryStore", "RunOutcome", "RunRecord", "SkillEffectRecord"]

View File

@ -0,0 +1,142 @@
"""Run-level receipts and skill effect records."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from beaver.skills.specs import SkillActivationReceipt
@dataclass(slots=True)
class RunOutcome:
success: bool
finish_reason: str
feedback_score: float | None = None
notes: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"success": self.success,
"finish_reason": self.finish_reason,
"feedback_score": self.feedback_score,
"notes": self.notes,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "RunOutcome":
return cls(
success=bool(payload.get("success")),
finish_reason=str(payload.get("finish_reason") or ""),
feedback_score=_coerce_optional_float(payload.get("feedback_score")),
notes=str(payload.get("notes") or ""),
)
@dataclass(slots=True)
class RunRecord:
run_id: str
session_id: str
task_text: str
started_at: str
ended_at: str
success: bool
finish_reason: str
feedback: dict[str, Any] = field(default_factory=dict)
activated_skills: list[SkillActivationReceipt] = field(default_factory=list)
task_id: str | None = None
attempt_index: int | None = None
validation_result: dict[str, Any] | None = None
def to_dict(self) -> dict[str, Any]:
return {
"run_id": self.run_id,
"session_id": self.session_id,
"task_id": self.task_id,
"attempt_index": self.attempt_index,
"task_text": self.task_text,
"started_at": self.started_at,
"ended_at": self.ended_at,
"success": self.success,
"finish_reason": self.finish_reason,
"feedback": dict(self.feedback),
"activated_skills": [receipt.to_dict() for receipt in self.activated_skills],
"validation_result": self.validation_result,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "RunRecord":
return cls(
run_id=str(payload["run_id"]),
session_id=str(payload["session_id"]),
task_id=_coerce_optional_str(payload.get("task_id")),
attempt_index=_coerce_optional_int(payload.get("attempt_index")),
task_text=str(payload.get("task_text") or ""),
started_at=str(payload.get("started_at") or ""),
ended_at=str(payload.get("ended_at") or ""),
success=bool(payload.get("success")),
finish_reason=str(payload.get("finish_reason") or ""),
feedback=dict(payload.get("feedback") or {}),
activated_skills=[
SkillActivationReceipt.from_dict(item)
for item in payload.get("activated_skills") or []
if isinstance(item, dict)
],
validation_result=(
dict(payload["validation_result"])
if isinstance(payload.get("validation_result"), dict)
else None
),
)
@dataclass(slots=True)
class SkillEffectRecord:
run_id: str
skill_name: str
skill_version: str
success: bool
feedback_score: float | None
notes: str
created_at: str
def to_dict(self) -> dict[str, Any]:
return {
"run_id": self.run_id,
"skill_name": self.skill_name,
"skill_version": self.skill_version,
"success": self.success,
"feedback_score": self.feedback_score,
"notes": self.notes,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillEffectRecord":
return cls(
run_id=str(payload["run_id"]),
skill_name=str(payload["skill_name"]),
skill_version=str(payload["skill_version"]),
success=bool(payload.get("success")),
feedback_score=_coerce_optional_float(payload.get("feedback_score")),
notes=str(payload.get("notes") or ""),
created_at=str(payload.get("created_at") or ""),
)
def _coerce_optional_float(value: Any) -> float | None:
if value in (None, ""):
return None
return float(value)
def _coerce_optional_int(value: Any) -> int | None:
if value in (None, ""):
return None
return int(value)
def _coerce_optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)

View File

@ -0,0 +1,98 @@
"""File-backed run receipt store."""
from __future__ import annotations
import json
from pathlib import Path
from .models import RunRecord, SkillEffectRecord
class RunMemoryStore:
def __init__(self, root: str | Path) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self.runs_path = self.root / "runs.jsonl"
self.effects_path = self.root / "skill-effects.jsonl"
def append_run_record(self, record: RunRecord) -> None:
self._append_jsonl(self.runs_path, record.to_dict())
def update_run_record(self, run_id: str, **updates: object) -> RunRecord | None:
records = self.list_runs()
updated: RunRecord | None = None
for index, record in enumerate(records):
if record.run_id != run_id:
continue
payload = record.to_dict()
payload.update(updates)
updated = RunRecord.from_dict(payload)
records[index] = updated
break
if updated is None:
return None
self.runs_path.parent.mkdir(parents=True, exist_ok=True)
self.runs_path.write_text(
"".join(
json.dumps(record.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
for record in records
),
encoding="utf-8",
)
return updated
def append_skill_effect(self, effect: SkillEffectRecord) -> None:
self._append_jsonl(self.effects_path, effect.to_dict())
def list_runs(self) -> list[RunRecord]:
return [RunRecord.from_dict(item) for item in self._read_jsonl(self.runs_path)]
def list_runs_by_skill(self, skill_name: str, version: str | None = None, limit: int | None = None) -> list[RunRecord]:
results: list[RunRecord] = []
for record in self.list_runs():
matched = False
for receipt in record.activated_skills:
if receipt.skill_name != skill_name:
continue
if version is not None and receipt.skill_version != version:
continue
matched = True
break
if matched:
results.append(record)
if limit is not None:
return results[-limit:]
return results
def list_skill_effects(self, skill_name: str, version: str | None = None, limit: int | None = None) -> list[SkillEffectRecord]:
results: list[SkillEffectRecord] = []
for payload in self._read_jsonl(self.effects_path):
effect = SkillEffectRecord.from_dict(payload)
if effect.skill_name != skill_name:
continue
if version is not None and effect.skill_version != version:
continue
results.append(effect)
if limit is not None:
return results[-limit:]
return results
@staticmethod
def _append_jsonl(path: Path, payload: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(payload, ensure_ascii=False, sort_keys=True) + "\n")
@staticmethod
def _read_jsonl(path: Path) -> list[dict]:
if not path.exists():
return []
results: list[dict] = []
for line in path.read_text(encoding="utf-8").splitlines():
cleaned = line.strip()
if not cleaned:
continue
payload = json.loads(cleaned)
if isinstance(payload, dict):
results.append(payload)
return results

View File

@ -1,2 +1,19 @@
"""Memory related to skill evolution."""
from .models import (
SkillDraftEvalReport,
SkillDraftSafetyReport,
SkillLearningAuditEvent,
SkillLearningCandidate,
SkillPerformanceSnapshot,
)
from .store import SkillLearningStore
__all__ = [
"SkillDraftEvalReport",
"SkillDraftSafetyReport",
"SkillLearningAuditEvent",
"SkillLearningCandidate",
"SkillLearningStore",
"SkillPerformanceSnapshot",
]

View File

@ -0,0 +1,289 @@
"""Aggregated skill learning models."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
LEARNING_CANDIDATE_STATUSES = {
"open",
"queued",
"synthesizing",
"draft_ready",
"safety_failed",
"eval_failed",
"review_pending",
"approved",
"rejected",
"published",
"failed",
"superseded",
}
RISK_LEVELS = {"low", "medium", "high", "critical"}
@dataclass(slots=True)
class SkillPerformanceSnapshot:
skill_name: str
skill_version: str
activation_count: int
success_count: int
failure_count: int
latest_used_at: str
last_feedback_score: float | None = None
def to_dict(self) -> dict[str, Any]:
return {
"skill_name": self.skill_name,
"skill_version": self.skill_version,
"activation_count": self.activation_count,
"success_count": self.success_count,
"failure_count": self.failure_count,
"latest_used_at": self.latest_used_at,
"last_feedback_score": self.last_feedback_score,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillPerformanceSnapshot":
value = payload.get("last_feedback_score")
return cls(
skill_name=str(payload["skill_name"]),
skill_version=str(payload["skill_version"]),
activation_count=int(payload.get("activation_count", 0) or 0),
success_count=int(payload.get("success_count", 0) or 0),
failure_count=int(payload.get("failure_count", 0) or 0),
latest_used_at=str(payload.get("latest_used_at") or ""),
last_feedback_score=None if value in (None, "") else float(value),
)
@dataclass(slots=True)
class SkillLearningCandidate:
candidate_id: str
kind: str
source_run_ids: list[str]
source_session_ids: list[str]
related_skill_names: list[str]
reason: str
evidence: dict[str, Any] = field(default_factory=dict)
status: str = "open"
priority: int = 0
confidence: float = 0.0
risk_level: str = "medium"
owner: str | None = None
retry_count: int = 0
last_error: str | None = None
trigger_reason: str = ""
evidence_summary: str = ""
draft_skill_name: str | None = None
draft_id: str | None = None
safety_report_id: str | None = None
eval_report_id: str | None = None
created_at: str = ""
updated_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"candidate_id": self.candidate_id,
"kind": self.kind,
"source_run_ids": list(self.source_run_ids),
"source_session_ids": list(self.source_session_ids),
"related_skill_names": list(self.related_skill_names),
"reason": self.reason,
"evidence": dict(self.evidence),
"status": self.status,
"priority": self.priority,
"confidence": self.confidence,
"risk_level": self.risk_level,
"owner": self.owner,
"retry_count": self.retry_count,
"last_error": self.last_error,
"trigger_reason": self.trigger_reason,
"evidence_summary": self.evidence_summary,
"draft_skill_name": self.draft_skill_name,
"draft_id": self.draft_id,
"safety_report_id": self.safety_report_id,
"eval_report_id": self.eval_report_id,
"created_at": self.created_at,
"updated_at": self.updated_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningCandidate":
now = _utc_now()
status = str(payload.get("status") or "open")
risk_level = str(payload.get("risk_level") or "medium")
return cls(
candidate_id=str(payload["candidate_id"]),
kind=str(payload.get("kind") or "revise_skill"),
source_run_ids=[str(item) for item in payload.get("source_run_ids") or []],
source_session_ids=[str(item) for item in payload.get("source_session_ids") or []],
related_skill_names=[str(item) for item in payload.get("related_skill_names") or []],
reason=str(payload.get("reason") or ""),
evidence=dict(payload.get("evidence") or {}),
status=status if status in LEARNING_CANDIDATE_STATUSES else "open",
priority=int(payload.get("priority", 0) or 0),
confidence=float(payload.get("confidence", 0.0) or 0.0),
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
owner=_optional_str(payload.get("owner")),
retry_count=int(payload.get("retry_count", 0) or 0),
last_error=_optional_str(payload.get("last_error")),
trigger_reason=str(payload.get("trigger_reason") or payload.get("reason") or ""),
evidence_summary=str(payload.get("evidence_summary") or _summarize_evidence(payload)),
draft_skill_name=_optional_str(payload.get("draft_skill_name")),
draft_id=_optional_str(payload.get("draft_id")),
safety_report_id=_optional_str(payload.get("safety_report_id")),
eval_report_id=_optional_str(payload.get("eval_report_id")),
created_at=str(payload.get("created_at") or now),
updated_at=str(payload.get("updated_at") or payload.get("created_at") or now),
)
@dataclass(slots=True)
class SkillLearningAuditEvent:
event_id: str
candidate_id: str
event_type: str
created_at: str
payload: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"event_id": self.event_id,
"candidate_id": self.candidate_id,
"event_type": self.event_type,
"created_at": self.created_at,
"payload": dict(self.payload),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningAuditEvent":
return cls(
event_id=str(payload["event_id"]),
candidate_id=str(payload["candidate_id"]),
event_type=str(payload.get("event_type") or ""),
created_at=str(payload.get("created_at") or ""),
payload=dict(payload.get("payload") or {}),
)
@dataclass(slots=True)
class SkillDraftSafetyReport:
report_id: str
skill_name: str
draft_id: str
passed: bool
risk_level: str
issues: list[str] = field(default_factory=list)
blocked_reasons: list[str] = field(default_factory=list)
suggested_fix: str = ""
created_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"report_id": self.report_id,
"skill_name": self.skill_name,
"draft_id": self.draft_id,
"passed": self.passed,
"risk_level": self.risk_level,
"issues": list(self.issues),
"blocked_reasons": list(self.blocked_reasons),
"suggested_fix": self.suggested_fix,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftSafetyReport":
risk_level = str(payload.get("risk_level") or "medium")
return cls(
report_id=str(payload["report_id"]),
skill_name=str(payload["skill_name"]),
draft_id=str(payload["draft_id"]),
passed=bool(payload.get("passed")),
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
issues=[str(item) for item in payload.get("issues") or []],
blocked_reasons=[str(item) for item in payload.get("blocked_reasons") or []],
suggested_fix=str(payload.get("suggested_fix") or ""),
created_at=str(payload.get("created_at") or ""),
)
@dataclass(slots=True)
class SkillDraftEvalReport:
report_id: str
skill_name: str
draft_id: str
candidate_id: str
passed: bool
baseline_score_avg: float
candidate_score_avg: float
score_delta: float
regression_count: int
improved_count: int
unchanged_count: int
cases: list[dict[str, Any]] = field(default_factory=list)
status: str = "completed"
created_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"report_id": self.report_id,
"skill_name": self.skill_name,
"draft_id": self.draft_id,
"candidate_id": self.candidate_id,
"passed": self.passed,
"baseline_score_avg": self.baseline_score_avg,
"candidate_score_avg": self.candidate_score_avg,
"score_delta": self.score_delta,
"regression_count": self.regression_count,
"improved_count": self.improved_count,
"unchanged_count": self.unchanged_count,
"cases": [dict(item) for item in self.cases],
"status": self.status,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftEvalReport":
return cls(
report_id=str(payload["report_id"]),
skill_name=str(payload["skill_name"]),
draft_id=str(payload["draft_id"]),
candidate_id=str(payload.get("candidate_id") or ""),
passed=bool(payload.get("passed")),
baseline_score_avg=float(payload.get("baseline_score_avg", 0.0) or 0.0),
candidate_score_avg=float(payload.get("candidate_score_avg", 0.0) or 0.0),
score_delta=float(payload.get("score_delta", 0.0) or 0.0),
regression_count=int(payload.get("regression_count", 0) or 0),
improved_count=int(payload.get("improved_count", 0) or 0),
unchanged_count=int(payload.get("unchanged_count", 0) or 0),
cases=[dict(item) for item in payload.get("cases") or [] if isinstance(item, dict)],
status=str(payload.get("status") or "completed"),
created_at=str(payload.get("created_at") or ""),
)
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)
def _summarize_evidence(payload: dict[str, Any]) -> str:
evidence = payload.get("evidence")
if isinstance(evidence, dict):
theme = evidence.get("theme")
if theme:
return f"Theme: {theme}"
skill_version = evidence.get("skill_version")
if skill_version:
return f"Skill version: {skill_version}"
source_run_ids = payload.get("source_run_ids") or []
return f"{len(source_run_ids)} source run(s)"
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,216 @@
"""File-backed skill learning store."""
from __future__ import annotations
import json
from pathlib import Path
from uuid import uuid4
from .models import (
SkillDraftEvalReport,
SkillDraftSafetyReport,
SkillLearningAuditEvent,
SkillLearningCandidate,
SkillPerformanceSnapshot,
)
class SkillLearningStore:
def __init__(self, root: str | Path) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self.performance_path = self.root / "performance.jsonl"
self.candidates_path = self.root / "learning-candidates.jsonl"
self.audit_path = self.root / "learning-audit.jsonl"
self.safety_reports_dir = self.root / "safety-reports"
self.eval_reports_dir = self.root / "eval-reports"
def record_learning_candidate(self, candidate: SkillLearningCandidate) -> None:
normalized = SkillLearningCandidate.from_dict(candidate.to_dict())
self._append_jsonl(self.candidates_path, normalized.to_dict())
self.append_audit_event(
normalized.candidate_id,
"candidate_created",
{
"kind": normalized.kind,
"status": normalized.status,
"reason": normalized.reason,
},
)
def update_learning_candidate(self, candidate_id: str, **updates: object) -> SkillLearningCandidate | None:
candidates = self.list_learning_candidates()
updated: SkillLearningCandidate | None = None
for index, candidate in enumerate(candidates):
if candidate.candidate_id != candidate_id:
continue
payload = candidate.to_dict()
payload.update(updates)
if "updated_at" not in updates:
payload["updated_at"] = _utc_now()
updated = SkillLearningCandidate.from_dict(payload)
candidates[index] = updated
break
if updated is None:
return None
self.candidates_path.parent.mkdir(parents=True, exist_ok=True)
self.candidates_path.write_text(
"".join(
json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
for candidate in candidates
),
encoding="utf-8",
)
return updated
def transition_learning_candidate(
self,
candidate_id: str,
status: str,
*,
event_type: str | None = None,
payload: dict | None = None,
**updates: object,
) -> SkillLearningCandidate | None:
updated = self.update_learning_candidate(candidate_id, status=status, **updates)
if updated is not None:
self.append_audit_event(
candidate_id,
event_type or f"candidate_{status}",
{"status": status, **dict(payload or {})},
)
return updated
def list_learning_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
results: list[SkillLearningCandidate] = []
for payload in self._read_jsonl(self.candidates_path):
candidate = SkillLearningCandidate.from_dict(payload)
if status is not None and candidate.status != status:
continue
results.append(candidate)
return results
def update_performance_snapshot(self, snapshot: SkillPerformanceSnapshot) -> None:
snapshots = self.list_performance_snapshots()
filtered = [
item
for item in snapshots
if not (item.skill_name == snapshot.skill_name and item.skill_version == snapshot.skill_version)
]
filtered.append(snapshot)
self.performance_path.write_text(
"".join(json.dumps(item.to_dict(), ensure_ascii=False, sort_keys=True) + "\n" for item in filtered),
encoding="utf-8",
)
def list_performance_snapshots(self) -> list[SkillPerformanceSnapshot]:
return [SkillPerformanceSnapshot.from_dict(item) for item in self._read_jsonl(self.performance_path)]
def list_low_performing_versions(self, *, minimum_activations: int = 2, success_ratio_threshold: float = 0.5) -> list[SkillPerformanceSnapshot]:
results: list[SkillPerformanceSnapshot] = []
for snapshot in self.list_performance_snapshots():
if snapshot.activation_count < minimum_activations:
continue
if snapshot.activation_count == 0:
continue
ratio = snapshot.success_count / snapshot.activation_count
if ratio <= success_ratio_threshold:
results.append(snapshot)
return results
def list_merge_candidates(self) -> list[SkillLearningCandidate]:
return [item for item in self.list_learning_candidates(status="open") if item.kind == "merge_skills"]
def append_audit_event(self, candidate_id: str, event_type: str, payload: dict | None = None) -> SkillLearningAuditEvent:
event = SkillLearningAuditEvent(
event_id=uuid4().hex,
candidate_id=candidate_id,
event_type=event_type,
created_at=_utc_now(),
payload=dict(payload or {}),
)
self._append_jsonl(self.audit_path, event.to_dict())
return event
def list_audit_events(self, candidate_id: str | None = None) -> list[SkillLearningAuditEvent]:
events = [SkillLearningAuditEvent.from_dict(item) for item in self._read_jsonl(self.audit_path)]
if candidate_id is None:
return events
return [event for event in events if event.candidate_id == candidate_id]
def write_safety_report(self, report: SkillDraftSafetyReport) -> None:
path = self._report_path(self.safety_reports_dir, report.skill_name, report.draft_id, report.report_id)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
def get_safety_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftSafetyReport | None:
reports = self.list_safety_reports(skill_name, draft_id)
if report_id is not None:
return next((item for item in reports if item.report_id == report_id), None)
return reports[-1] if reports else None
def list_safety_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftSafetyReport]:
root = self.safety_reports_dir / skill_name / draft_id
if not root.exists():
return []
return [
SkillDraftSafetyReport.from_dict(self._read_json(path))
for path in sorted(root.glob("report-*.json"))
]
def write_eval_report(self, report: SkillDraftEvalReport) -> None:
path = self._report_path(self.eval_reports_dir, report.skill_name, report.draft_id, report.report_id)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
def get_eval_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftEvalReport | None:
reports = self.list_eval_reports(skill_name, draft_id)
if report_id is not None:
return next((item for item in reports if item.report_id == report_id), None)
return reports[-1] if reports else None
def list_eval_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftEvalReport]:
root = self.eval_reports_dir / skill_name / draft_id
if not root.exists():
return []
return [
SkillDraftEvalReport.from_dict(self._read_json(path))
for path in sorted(root.glob("report-*.json"))
]
@staticmethod
def _report_path(root: Path, skill_name: str, draft_id: str, report_id: str) -> Path:
return root / skill_name / draft_id / f"report-{report_id}.json"
@staticmethod
def _append_jsonl(path: Path, payload: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(payload, ensure_ascii=False, sort_keys=True) + "\n")
@staticmethod
def _read_jsonl(path: Path) -> list[dict]:
if not path.exists():
return []
results: list[dict] = []
for line in path.read_text(encoding="utf-8").splitlines():
cleaned = line.strip()
if not cleaned:
continue
payload = json.loads(cleaned)
if isinstance(payload, dict):
results.append(payload)
return results
@staticmethod
def _read_json(path: Path) -> dict:
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise ValueError(f"Expected JSON object in {path}")
return payload
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -15,9 +15,13 @@ from __future__ import annotations
import asyncio
from pathlib import Path
from typing import Any
from uuid import uuid4
from beaver.coordinator.models import ExecutionNode, TeamRunResult
from beaver.engine import AgentLoop, AgentProfile, AgentRunResult, EngineLoader
from beaver.engine.providers import make_provider_bundle
from beaver.foundation.events import InboundMessage, OutboundMessage
from beaver.tasks import MainAgentRouter, TaskExecutionPlan, TaskRecord, ValidationResult
class AgentService:
@ -45,6 +49,7 @@ class AgentService:
self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
self._loop: AgentLoop | None = None
self._run_task: asyncio.Task[None] | None = None
self._main_agent_router = MainAgentRouter()
def create_loop(self) -> AgentLoop:
"""创建并缓存当前 service 使用的 AgentLoop。"""
@ -176,7 +181,7 @@ class AgentService:
"use 'await AgentService.submit_direct(...)' after start()."
)
loop = self.create_loop()
return await loop.process_direct(message, **kwargs)
return await self._process_with_main_agent(message, runner=loop.process_direct, kwargs=kwargs)
async def submit_direct(
self,
@ -189,7 +194,502 @@ class AgentService:
"""
loop = self.create_loop()
return await loop.submit_direct(message, **kwargs)
return await self._process_with_main_agent(message, runner=loop.submit_direct, kwargs=kwargs)
async def submit_feedback(
self,
*,
session_id: str,
run_id: str,
feedback_type: str,
comment: str | None = None,
) -> dict[str, Any]:
"""Record chat feedback for the internal task linked to a run."""
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
task = task_service.get_task_by_run_id(run_id)
if task is None or task.session_id != session_id:
raise ValueError(f"No internal task found for run_id={run_id!r}")
normalized = feedback_type.strip().lower()
if normalized not in {"satisfied", "revise", "abandon"}:
raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
already_recorded = any(
item.get("run_id") == run_id and item.get("feedback_type") == normalized
for item in task.feedback
)
conflicting_feedback = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != normalized
),
None,
)
if conflicting_feedback is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
)
if task.status in {"closed", "abandoned"} and not already_recorded:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
updated = task if already_recorded else task_service.add_feedback(
task.task_id,
feedback_type=normalized,
comment=comment,
run_id=run_id,
)
session_manager = self._require_loaded(loaded, "session_manager")
session_manager.update_latest_assistant_event_payload(
session_id,
run_id,
{
"task_id": updated.task_id,
"task_status": updated.status,
"feedback_state": normalized,
},
)
if not already_recorded:
session_manager.append_message(
session_id,
run_id=run_id,
role="system",
event_type="task_feedback_recorded",
event_payload={
"task_id": task.task_id,
"feedback_type": normalized,
"comment": comment,
"task_status": updated.status,
},
content=comment,
context_visible=False,
)
generated_candidates = []
validation = ValidationResult.from_dict(updated.validation_result)
if already_recorded:
generated_candidates = []
elif normalized == "satisfied" and validation is not None and validation.accepted:
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
generated_candidates = [item.to_dict() for item in skill_learning_service.build_learning_candidates()]
elif normalized == "abandon":
memory_service = self._require_loaded(loaded, "memory_service")
memory_service.get_store().add(
"memory",
(
f"Failure memory: task {task.task_id} in session {session_id} was abandoned. "
f"Reason: {(comment or 'not specified').strip()}"
),
)
return {
"session_id": session_id,
"run_id": run_id,
"task_id": updated.task_id,
"task_status": updated.status,
"feedback_type": normalized,
"learning_candidates": generated_candidates,
}
async def _process_with_main_agent(
self,
message: str,
*,
runner: Any,
kwargs: dict[str, Any],
) -> AgentRunResult:
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
session_id = kwargs.get("session_id") or uuid4().hex
kwargs = dict(kwargs)
kwargs["session_id"] = session_id
active_task = task_service.get_latest_open_task(session_id)
decision = self._main_agent_router.classify(message, active_task=active_task)
if not decision.is_task:
return await runner(message, **kwargs)
task = (
task_service.create_task(
session_id=session_id,
description=message,
metadata={"router_reason": decision.reason},
)
if active_task is None or decision.starts_new_task
else active_task
)
return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)
async def _run_task_mode(
self,
message: str,
*,
runner: Any,
kwargs: dict[str, Any],
task: TaskRecord,
) -> AgentRunResult:
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
validation_service = self._require_loaded(loaded, "validation_service")
task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
session_manager = self._require_loaded(loaded, "session_manager")
run_memory_store = self._require_loaded(loaded, "run_memory_store")
last_result: AgentRunResult | None = None
latest_validation: ValidationResult | None = None
base_execution_context = kwargs.get("execution_context")
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
kwargs = dict(kwargs)
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
kwargs["provider_bundle"] = provider_bundle
for attempt_index in (1, 2):
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
plan = await task_execution_planner.plan(
task=task,
user_message=message,
attempt_index=attempt_index,
latest_validation=latest_validation,
provider_bundle=provider_bundle,
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_execution_planned",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
**plan.to_event_payload(),
},
)
team_summaries: list[str] = []
team_execution_context = ""
if plan.is_team:
team_result, team_error = await self._run_team_for_task(
plan,
task=task,
parent_session_id=kwargs["session_id"],
provider_bundle_factory=team_provider_bundle_factory
or self._build_team_provider_bundle_factory(loaded, kwargs),
)
if team_result is not None:
team_summaries = [self._team_summary_for_validation(team_result)]
team_execution_context = self._team_execution_context(plan, team_result)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": team_result.run_ids,
"team_success": team_result.success,
"node_results": self._team_node_results_for_event(plan, team_result),
"reason": plan.reason,
"error": None if team_result.success else "one or more team nodes failed",
},
)
else:
team_summaries = [f"Team execution failed: {team_error}"]
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": [],
"team_success": False,
"reason": plan.reason,
"error": team_error,
},
)
attempt_kwargs = dict(kwargs)
attempt_kwargs.update(
{
"task_id": task.task_id,
"task_mode": True,
"attempt_index": attempt_index,
"learning_candidate_enabled": False,
}
)
if attempt_index == 2 and latest_validation is not None:
revision_context = latest_validation.recommended_revision_prompt.strip()
if revision_context:
attempt_kwargs["execution_context"] = self._join_context(
base_execution_context,
f"Task validation revision request:\n{revision_context}",
team_execution_context,
)
elif team_execution_context:
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
result = await runner(message, **attempt_kwargs)
last_result = result
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_synthesis_completed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"main_run_id": result.run_id,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
},
)
task = task_service.append_run(
task.task_id,
result.run_id,
skill_names=self._skill_names_for_run(loaded, result.run_id),
)
validation = await validation_service.validate_task_result(
task=task,
user_message=message,
final_output=result.output_text,
transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
team_summaries=team_summaries,
provider_bundle=provider_bundle,
)
latest_validation = validation
task = task_service.record_validation(task.task_id, result.run_id, validation)
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"task_id": task.task_id,
"task_status": task.status,
"validation_status": "passed" if validation.accepted else "failed",
},
)
session_manager.append_message(
result.session_id,
run_id=result.run_id,
role="system",
event_type="task_validation_snapshotted",
event_payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"validation_result": validation.to_dict(),
"retry_scheduled": not validation.accepted and attempt_index == 1,
},
content=validation.recommended_revision_prompt or None,
context_visible=False,
)
if not validation.accepted and attempt_index == 1:
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
result.task_id = task.task_id
result.task_status = task.status
result.validation_result = validation.to_dict()
if validation.accepted or attempt_index == 2:
return result
if last_result is None: # pragma: no cover - defensive
raise RuntimeError("Task mode did not produce a run result")
return last_result
async def _run_team_for_task(
self,
plan: TaskExecutionPlan,
*,
task: TaskRecord,
parent_session_id: str,
provider_bundle_factory: Any,
) -> tuple[TeamRunResult | None, str | None]:
if plan.graph is None:
return None, "team plan did not include an execution graph"
try:
from beaver.services.team_service import TeamService
result = await TeamService(self.create_loop()).run_team(
plan.graph,
parent_task_id=task.task_id,
parent_session_id=parent_session_id,
parent_run_id=None,
provider_bundle_factory=provider_bundle_factory,
learning_candidate_enabled=False,
)
return result, None
except Exception as exc:
return None, str(exc)
@staticmethod
def _require_loaded(loaded: Any, field_name: str) -> Any:
value = getattr(loaded, field_name)
if value is None:
raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
return value
@staticmethod
def _skill_names_for_run(loaded: Any, run_id: str) -> list[str]:
store = getattr(loaded, "run_memory_store", None)
if store is None:
return []
for record in store.list_runs():
if record.run_id == run_id:
return [receipt.skill_name for receipt in record.activated_skills]
return []
@staticmethod
def _run_excerpt(session_manager: Any, session_id: str, run_id: str) -> str:
lines = []
for event in session_manager.get_run_event_records(session_id, run_id):
if event.context_visible and event.content:
lines.append(f"{event.role}: {event.content.strip()}")
return "\n".join(lines[:12])[:2400]
@staticmethod
def _tool_summaries(session_manager: Any, session_id: str, run_id: str) -> list[str]:
summaries = []
for event in session_manager.get_run_event_records(session_id, run_id):
if event.event_type != "tool_result_recorded":
continue
text = (event.content or "").strip()
if text:
summaries.append(f"{event.tool_name or 'tool'}: {text[:500]}")
return summaries[:12]
@staticmethod
def _append_task_observation(
session_manager: Any,
session_id: str,
*,
event_type: str,
payload: dict[str, Any],
) -> None:
session_manager.append_message(
session_id,
role="system",
event_type=event_type,
event_payload=payload,
content=payload.get("reason") or payload.get("error"),
context_visible=False,
)
@staticmethod
def _join_context(*parts: str | None) -> str:
return "\n\n".join(part.strip() for part in parts if part and part.strip())
@staticmethod
def _team_summary_for_validation(result: TeamRunResult) -> str:
lines = [
f"success={result.success}",
f"task_id={result.task_id or ''}",
"summary:",
result.summary,
"nodes:",
]
for node in result.node_results:
lines.append(
f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
f"error={node.error or ''} output={node.output_text[:500]}"
)
return "\n".join(lines)
@staticmethod
def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
payloads: list[dict[str, Any]] = []
for item in result.node_results:
payload = item.to_dict()
node = nodes.get(item.node_id)
if node is not None:
payload["selected_skill_names"] = list(node.inherited_pinned_skills)
payload["ephemeral_skill_names"] = [
skill.name for skill in node.inherited_pinned_skill_contexts
]
payload["skill_query"] = node.agent.metadata.get("skill_query")
payload["generated_skill_draft_id"] = node.agent.metadata.get("generated_skill_draft_id")
payload["generated_skill_name"] = node.agent.metadata.get("generated_skill_name")
payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
payloads.append(payload)
return payloads
@staticmethod
def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
node_lines = [
(
f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
)
for node in result.node_results
]
return "\n\n".join(
item
for item in [
"Task team execution result:",
f"Planner reason: {plan.reason}",
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
f"Team success: {result.success}",
f"Team summary:\n{result.summary}",
"Node results:\n" + "\n\n".join(node_lines),
(
"Final synthesis instruction:\n" + plan.final_synthesis_instruction
if plan.final_synthesis_instruction
else None
),
"Use the team outputs as internal evidence. Produce the final user-facing answer yourself.",
]
if item
)
@staticmethod
def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
return "\n\n".join(
[
"Task team execution failed before final synthesis.",
f"Planner reason: {plan.reason}",
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
f"Error: {error}",
"Proceed as the main agent and produce the best possible final answer.",
]
)
def _build_team_provider_bundle_factory(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
def factory(node: ExecutionNode) -> Any:
node_kwargs = dict(kwargs)
node_kwargs.pop("provider_bundle", None)
if node.agent.model:
node_kwargs["model"] = node.agent.model
if node.agent.provider_name:
node_kwargs["provider_name"] = node.agent.provider_name
return self._make_provider_bundle_for_task(loaded, node_kwargs)
return factory
def _make_provider_bundle_for_task(self, loaded: Any, kwargs: dict[str, Any]) -> Any:
config = loaded.config
configured_provider = config.resolve_provider_target(
model=kwargs.get("model"),
provider_name=kwargs.get("provider_name"),
)
resolved_model = configured_provider.get("model") or self.profile.default_model
resolved_provider_name = configured_provider.get("provider_name") or kwargs.get("provider_name")
return make_provider_bundle(
model=resolved_model,
provider_name=resolved_provider_name,
api_key=kwargs.get("api_key") or configured_provider.get("api_key"),
api_base=kwargs.get("api_base") or configured_provider.get("api_base"),
request_timeout_seconds=configured_provider.get("request_timeout_seconds"),
extra_headers=kwargs.get("extra_headers") or configured_provider.get("extra_headers"),
routing=kwargs.get("routing"),
fallback_target=kwargs.get("fallback_target"),
auxiliary_target=kwargs.get("auxiliary_target"),
embedding_target=kwargs.get("embedding_target") or config.resolve_embedding_target(),
embedding_model=kwargs.get("embedding_model") or config.default_embedding_model,
)
async def handle_inbound_message(self, inbound: InboundMessage) -> OutboundMessage:
"""把 bus inbound 映射成标准 runtime 调用,并返回结构化 outbound。"""
@ -207,9 +707,26 @@ class AgentService:
embedding_model=inbound.embedding_model,
)
except Exception as exc:
return self.build_outbound_error(inbound, detail=str(exc))
return self.build_outbound_error(
inbound,
detail=str(exc),
finish_reason=self._classify_inbound_failure(exc),
)
return self.build_outbound_message(inbound, result)
@staticmethod
def _classify_inbound_failure(exc: Exception) -> str:
"""把 runtime 异常收口为更稳定的 bus finish reason。"""
if isinstance(exc, RuntimeError):
detail = str(exc)
if (
"requires an active run() loop" in detail
or "not accepting new tasks after stop()" in detail
):
return "stopped"
return "error"
@staticmethod
def build_outbound_message(inbound: InboundMessage, result: AgentRunResult) -> OutboundMessage:
"""把一次 runtime 正常结果转成 bus outbound。"""
@ -224,7 +741,12 @@ class AgentService:
provider_name=result.provider_name,
model=result.model,
usage=dict(result.usage),
metadata={"inbound_metadata": dict(inbound.metadata)},
metadata={
"inbound_metadata": dict(inbound.metadata),
"task_id": getattr(result, "task_id", None),
"task_status": getattr(result, "task_status", None),
"validation_result": getattr(result, "validation_result", None),
},
)
@staticmethod

View File

@ -51,6 +51,13 @@ class MemoryService:
self.store.load_from_disk()
self._snapshot = capture_memory_snapshot(self.store)
def capture_snapshot_for_run(self) -> MemorySnapshot:
"""Capture a per-run frozen snapshot without mutating shared runtime state."""
store = MemoryStore(self.root)
store.load_from_disk()
return capture_memory_snapshot(store)
def get_snapshot(self) -> MemorySnapshot:
"""获取当前 run 应注入 system prompt 的 frozen snapshot。"""

View File

@ -0,0 +1,253 @@
"""Projection of hidden Task/team events into frontend process streams."""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any
class SessionProcessProjector:
def __init__(self, session_manager: Any, run_memory_store: Any) -> None:
self.session_manager = session_manager
self.run_memory_store = run_memory_store
def project(self, session_id: str) -> dict[str, Any]:
records = self.session_manager.get_event_records(session_id)
run_records = {record.run_id: record for record in self.run_memory_store.list_runs()}
runs: dict[str, dict[str, Any]] = {}
events: list[dict[str, Any]] = []
def add_event(
*,
event_id: str,
run_id: str,
kind: str,
actor_type: str,
actor_id: str,
actor_name: str,
text: str,
created_at: str,
status: str | None = None,
parent_run_id: str | None = None,
metadata: dict[str, Any] | None = None,
) -> None:
events.append(
{
"event_id": event_id,
"run_id": run_id,
"parent_run_id": parent_run_id,
"kind": kind,
"actor_type": actor_type,
"actor_id": actor_id,
"actor_name": actor_name,
"text": text,
"status": status,
"metadata": dict(metadata or {}),
"created_at": created_at,
}
)
for record in records:
payload = dict(record.event_payload or {})
task_id = payload.get("task_id")
if not task_id:
continue
attempt_index = int(payload.get("attempt_index") or 1)
root_run_id = f"task:{task_id}:attempt:{attempt_index}"
created_at = _timestamp(record.timestamp)
root = runs.setdefault(
root_run_id,
{
"run_id": root_run_id,
"parent_run_id": None,
"session_id": session_id,
"actor_type": "system",
"actor_id": "task",
"actor_name": "Task Planner",
"title": f"Task {task_id[:8]} attempt {attempt_index}",
"source": "task_mode",
"status": "running",
"started_at": created_at,
"metadata": {"task_id": task_id, "attempt_index": attempt_index},
},
)
if record.event_type == "task_execution_planned":
strategy = payload.get("strategy") or "single"
node_ids = payload.get("node_ids") or []
root["title"] = f"{payload.get('plan_mode', 'single')} plan: {strategy}"
root["summary"] = payload.get("reason") or ""
root["metadata"] = {
**root.get("metadata", {}),
"plan_mode": payload.get("plan_mode"),
"strategy": payload.get("strategy"),
"node_ids": node_ids,
"skill_queries": payload.get("skill_queries") or [],
"selected_skill_names": payload.get("selected_skill_names") or [],
"generated_skill_draft_ids": payload.get("generated_skill_draft_ids") or [],
"skill_resolution_report": payload.get("skill_resolution_report") or [],
"fallback_error": payload.get("fallback_error"),
}
add_event(
event_id=_event_id(record, "planned"),
run_id=root_run_id,
kind="run_started",
actor_type="system",
actor_id="task",
actor_name="Task Planner",
text=f"Planned {payload.get('plan_mode')} execution via {strategy}. {payload.get('reason') or ''}".strip(),
created_at=created_at,
status="running",
metadata=root["metadata"],
)
elif record.event_type in {"task_team_run_completed", "task_team_run_failed"}:
team_success = bool(payload.get("team_success"))
root["status"] = "running"
root["metadata"] = {
**root.get("metadata", {}),
"team_success": team_success,
"team_run_ids": payload.get("team_run_ids") or [],
"team_error": payload.get("error"),
}
add_event(
event_id=_event_id(record, "team"),
run_id=root_run_id,
kind="run_status",
actor_type="system",
actor_id="team",
actor_name="Task Team",
text=payload.get("error") or ("Team completed" if team_success else "Team completed with failed nodes"),
created_at=created_at,
status="done" if team_success else "error",
metadata=dict(payload),
)
node_results = payload.get("node_results") or []
for item in node_results:
if not isinstance(item, dict):
continue
node_run_id = item.get("run_id") or f"{root_run_id}:node:{item.get('node_id')}"
status = "done" if item.get("success") else "error"
if item.get("finish_reason") == "blocked":
status = "waiting"
run_record = run_records.get(str(node_run_id))
runs[str(node_run_id)] = {
"run_id": str(node_run_id),
"parent_run_id": root_run_id,
"session_id": run_record.session_id if run_record is not None else session_id,
"actor_type": "agent",
"actor_id": str(item.get("node_id") or "sub-agent"),
"actor_name": str(item.get("node_id") or "Sub-agent"),
"title": str(item.get("node_id") or "Sub-agent"),
"source": "task_team",
"status": status,
"started_at": run_record.started_at if run_record is not None else created_at,
"finished_at": run_record.ended_at if run_record is not None else created_at,
"summary": _truncate(str(item.get("output_text") or item.get("error") or "")),
"metadata": {
"task_id": task_id,
"attempt_index": attempt_index,
"node_id": item.get("node_id"),
"skill_query": item.get("skill_query"),
"selected_skill_names": item.get("selected_skill_names") or [],
"ephemeral_skill_names": item.get("ephemeral_skill_names") or [],
"generated_skill_draft_id": item.get("generated_skill_draft_id"),
"generated_skill_name": item.get("generated_skill_name"),
"ephemeral_used": bool(item.get("ephemeral_used")),
"finish_reason": item.get("finish_reason"),
"error": item.get("error"),
},
}
add_event(
event_id=f"{_event_id(record, 'node')}:{item.get('node_id')}",
run_id=str(node_run_id),
parent_run_id=root_run_id,
kind="run_finished",
actor_type="agent",
actor_id=str(item.get("node_id") or "sub-agent"),
actor_name=str(item.get("node_id") or "Sub-agent"),
text=_truncate(str(item.get("output_text") or item.get("error") or "")),
created_at=created_at,
status=status,
metadata=dict(item),
)
elif record.event_type == "task_synthesis_completed":
main_run_id = str(payload.get("main_run_id") or "")
if main_run_id:
run_record = run_records.get(main_run_id)
runs[main_run_id] = {
"run_id": main_run_id,
"parent_run_id": root_run_id,
"session_id": run_record.session_id if run_record is not None else session_id,
"actor_type": "agent",
"actor_id": "main-agent",
"actor_name": "Main Agent",
"title": "Final synthesis",
"source": "task_synthesis",
"status": "done" if (run_record is None or run_record.success) else "error",
"started_at": run_record.started_at if run_record is not None else created_at,
"finished_at": run_record.ended_at if run_record is not None else created_at,
"summary": _truncate(run_record.task_text if run_record is not None else ""),
"metadata": {"task_id": task_id, "attempt_index": attempt_index},
}
add_event(
event_id=_event_id(record, "synthesis"),
run_id=main_run_id,
parent_run_id=root_run_id,
kind="run_finished",
actor_type="agent",
actor_id="main-agent",
actor_name="Main Agent",
text="Main Agent synthesized the final user-facing answer.",
created_at=created_at,
status="done",
metadata=dict(payload),
)
elif record.event_type == "task_validation_snapshotted":
validation = payload.get("validation_result") if isinstance(payload.get("validation_result"), dict) else {}
accepted = bool(validation.get("accepted"))
root["status"] = "done" if accepted or attempt_index == 2 else "waiting"
root["finished_at"] = created_at if root["status"] == "done" else None
add_event(
event_id=_event_id(record, "validation"),
run_id=record.run_id or root_run_id,
parent_run_id=root_run_id if record.run_id else None,
kind="run_status",
actor_type="system",
actor_id="validator",
actor_name="Validator",
text=(
f"Validation {'passed' if accepted else 'failed'} "
f"(score={validation.get('score')})."
+ (" Retry scheduled." if payload.get("retry_scheduled") else "")
),
created_at=created_at,
status="done" if accepted else "error",
metadata=dict(payload),
)
return {
"runs": sorted(runs.values(), key=lambda item: item.get("started_at") or ""),
"events": sorted(events, key=lambda item: item.get("created_at") or ""),
"artifacts": [],
"agents": [],
}
def _timestamp(value: float | None) -> str:
if value is None:
return datetime.now(timezone.utc).isoformat()
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
def _event_id(record: Any, suffix: str) -> str:
return f"session-event:{record.message_id or record.timestamp}:{suffix}"
def _truncate(text: str, limit: int = 800) -> str:
cleaned = text.strip()
if len(cleaned) <= limit:
return cleaned
return cleaned[: limit - 1] + "..."

View File

@ -1,10 +1,90 @@
"""Application service for coordinated team runs."""
from __future__ import annotations
from collections.abc import Callable
from typing import TYPE_CHECKING
from beaver.coordinator import ExecutionGraph, ExecutionNode, LocalAgentRunner, TeamGraphScheduler, TeamRunResult
from beaver.engine import AgentLoop
from beaver.engine.providers import ProviderBundle
if TYPE_CHECKING:
from beaver.engine.context import SkillContext
class TeamService:
"""Placeholder service for multi-agent execution."""
"""Internal service for Beaver-native multi-agent execution."""
def __init__(self, loop: AgentLoop) -> None:
self.loop = loop
self.runner = LocalAgentRunner(loop)
self.scheduler = TeamGraphScheduler(self.runner)
async def run_team(
self,
graph: ExecutionGraph,
*,
parent_task_id: str | None,
parent_session_id: str,
parent_run_id: str | None = None,
provider_bundle: ProviderBundle | None = None,
provider_bundle_factory: Callable[[ExecutionNode], ProviderBundle | None] | None = None,
inherited_pinned_skills: list[str] | None = None,
inherited_pinned_skill_contexts: list["SkillContext"] | None = None,
learning_candidate_enabled: bool = False,
) -> TeamRunResult:
"""Run a team graph inside the parent task context."""
self._validate_parent_task(parent_task_id, parent_session_id)
result = await self.scheduler.run(
graph,
parent_task_id=parent_task_id,
parent_session_id=parent_session_id,
parent_run_id=parent_run_id,
provider_bundle=provider_bundle,
provider_bundle_factory=provider_bundle_factory,
inherited_pinned_skills=inherited_pinned_skills,
inherited_pinned_skill_contexts=inherited_pinned_skill_contexts,
learning_candidate_enabled=learning_candidate_enabled,
)
self._attach_runs_to_parent_task(result)
return result
def run(self, task: str) -> str:
"""Return a placeholder summary until real backends are migrated."""
return f"team run placeholder: {task}"
"""Compatibility shim for old callers that only expected a string."""
return f"team service requires run_team() for coordinated execution: {task}"
def _validate_parent_task(self, parent_task_id: str | None, parent_session_id: str) -> None:
if not parent_task_id:
return
loaded = self.loop.boot()
task_service = getattr(loaded, "task_service", None)
if task_service is None:
raise RuntimeError("TeamService requires task_service when parent_task_id is provided")
task = task_service.get_task(parent_task_id)
if task is None:
raise ValueError(f"Unknown parent_task_id: {parent_task_id}")
if task.session_id != parent_session_id:
raise ValueError(
f"parent_task_id {parent_task_id!r} belongs to session {task.session_id!r}, "
f"not {parent_session_id!r}"
)
def _attach_runs_to_parent_task(self, result: TeamRunResult) -> None:
if not result.task_id or not result.run_ids:
return
loaded = self.loop.boot()
task_service = getattr(loaded, "task_service", None)
if task_service is None or task_service.get_task(result.task_id) is None:
return
run_store = getattr(loaded, "run_memory_store", None)
for run_id in result.run_ids:
skill_names: list[str] = []
if run_store is not None:
for record in run_store.list_runs():
if record.run_id == run_id:
skill_names = [receipt.skill_name for receipt in record.activated_skills]
break
task_service.append_run(result.task_id, run_id, skill_names=skill_names)

View File

@ -83,11 +83,21 @@ class SkillAssembler:
activated_skills: list[SkillContext] = []
for name in selected_names:
raw_content = self.loader.load_skill(name)
record = self.loader.get_skill_record(name)
raw_content = self.loader.load_published_skill(name)
content = strip_frontmatter(raw_content).strip() if raw_content else ""
if not content:
continue
activated_skills.append(SkillContext(name=name, content=content))
activated_skills.append(
SkillContext(
name=name,
content=content,
version=record.version if record is not None else "legacy",
content_hash=record.content_hash or "" if record is not None else "",
activation_reason="llm_selected",
tool_hints=list(record.tool_hints) if record is not None else [],
)
)
return SkillAssemblyResult(activated_skills=activated_skills)

View File

@ -1,5 +1,18 @@
"""Skill catalog and indexing."""
from .loader import SkillRecord, SkillsLoader
from __future__ import annotations
from typing import Any
__all__ = ["SkillRecord", "SkillsLoader"]
def __getattr__(name: str) -> Any:
if name in {"SkillRecord", "SkillsLoader"}:
from .loader import SkillRecord, SkillsLoader
return {
"SkillRecord": SkillRecord,
"SkillsLoader": SkillsLoader,
}[name]
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

View File

@ -17,11 +17,13 @@
from __future__ import annotations
from dataclasses import dataclass
from dataclasses import dataclass, field
import json
from pathlib import Path
from typing import Any
from beaver.skills.specs.storage import SkillSpecStore
from .utils import (
check_requirements,
escape_xml,
@ -39,6 +41,13 @@ class SkillRecord:
name: str
path: Path
source: str
version: str = "legacy"
content_hash: str | None = None
source_kind: str = "legacy"
status: str = "active"
tool_hints: list[str] = field(default_factory=list)
frontmatter: dict[str, Any] = field(default_factory=dict)
description: str = ""
class SkillsLoader:
@ -50,11 +59,13 @@ class SkillsLoader:
*,
builtin_skills_dir: str | Path | None = None,
extra_dirs: list[str | Path] | None = None,
skill_store: SkillSpecStore | None = None,
) -> None:
self.workspace = Path(workspace)
self.workspace_skills = self.workspace / "skills"
self.builtin_skills = Path(builtin_skills_dir) if builtin_skills_dir is not None else Path(__file__).resolve().parent.parent / "builtin"
self.extra_dirs = [Path(item) for item in (extra_dirs or [])]
self.skill_store = skill_store or SkillSpecStore(self.workspace)
def list_skills(self, *, filter_unavailable: bool = True) -> list[SkillRecord]:
"""列出当前可见的 skills。
@ -67,14 +78,19 @@ class SkillsLoader:
重名 skill 只保留优先级更高的那一个。
"""
ordered_roots: list[tuple[str, Path]] = [
("workspace", self.workspace_skills),
*[("plugin", path) for path in self.extra_dirs],
("builtin", self.builtin_skills),
]
found: dict[str, SkillRecord] = {}
for source, root in ordered_roots:
for record in self.list_published_skills():
if record.name in found:
continue
if filter_unavailable and not self._record_available(record):
continue
found[record.name] = record
for source, root in [
*[("plugin", path) for path in self.extra_dirs],
("builtin", self.builtin_skills),
]:
if not root.exists():
continue
for skill_dir in root.iterdir():
@ -84,12 +100,62 @@ class SkillsLoader:
name = skill_dir.name
if name in found:
continue
record = SkillRecord(name=name, path=skill_file, source=source)
frontmatter, body = parse_frontmatter(skill_file.read_text(encoding="utf-8"))
normalized_frontmatter = dict(frontmatter)
record = SkillRecord(
name=name,
path=skill_file,
source=source,
version="legacy",
source_kind=source,
tool_hints=self._coerce_tool_names(frontmatter.get("tools")),
frontmatter=normalized_frontmatter,
description=str(frontmatter.get("description") or summarize_body(body) or name),
)
if filter_unavailable and not self._record_available(record):
continue
found[name] = record
return list(found.values())
def list_published_skills(self, *, filter_unavailable: bool = True) -> list[SkillRecord]:
"""只列 workspace 中正式 published 的 skill catalog。"""
results: list[SkillRecord] = []
for name in self.skill_store.list_published_skill_names():
loaded = self.skill_store.read_published_skill(name)
if loaded is None:
continue
if loaded.version.version == "legacy":
path = self.workspace_skills / name / "SKILL.md"
else:
path = self.workspace_skills / name / "versions" / loaded.version.version / "SKILL.md"
record = SkillRecord(
name=name,
path=path,
source="workspace",
version=loaded.version.version,
content_hash=loaded.version.content_hash,
source_kind=str(loaded.version.provenance.get("source_kind") or "workspace"),
status=str(loaded.version.review_state or "published"),
tool_hints=list(loaded.version.tool_hints),
frontmatter=dict(loaded.version.frontmatter),
description=str(loaded.version.frontmatter.get("description") or loaded.version.summary or name),
)
if filter_unavailable and not self._record_available(record):
continue
results.append(record)
return results
def get_current_version(self, name: str) -> str | None:
record = self._find_record(name)
return record.version if record is not None else None
def load_published_skill(self, name: str, version: str | None = None) -> str | None:
loaded = self.skill_store.read_published_skill(name, version=version)
if loaded is not None:
return loaded.content
return self.load_skill(name)
def load_skill(self, name: str) -> str | None:
"""按名称加载 skill 原始内容。"""
@ -106,6 +172,9 @@ class SkillsLoader:
def get_skill_metadata(self, name: str) -> dict[str, Any] | None:
"""读取 skill frontmatter 元数据。"""
record = self._find_record(name)
if record is not None and record.frontmatter:
return dict(record.frontmatter)
content = self.load_skill(name)
if content is None:
return None
@ -125,6 +194,10 @@ class SkillsLoader:
- 兼容 metadata JSON blob 里的 `tools`
"""
record = self._find_record(name)
if record is not None and record.tool_hints:
return list(record.tool_hints)
frontmatter = self.get_skill_metadata(name) or {}
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
names = [
@ -143,7 +216,7 @@ class SkillsLoader:
sections: list[str] = []
for name in skill_names:
content = self.load_skill(name)
content = self.load_published_skill(name)
if not content:
continue
body = strip_frontmatter(content).strip()
@ -167,14 +240,15 @@ class SkillsLoader:
lines = ["<skills>"]
for record in skills:
frontmatter = self.get_skill_metadata(record.name) or {}
frontmatter = record.frontmatter or self.get_skill_metadata(record.name) or {}
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
available = check_requirements(meta_blob)
description = frontmatter.get("description") or record.name
description = frontmatter.get("description") or record.description or record.name
load_hint = f'Use skill_view(name="{record.name}") to load the full skill.'
lines.append(f' <skill available="{str(available).lower()}">')
lines.append(f" <name>{escape_xml(record.name)}</name>")
lines.append(f" <description>{escape_xml(description)}</description>")
lines.append(f" <version>{escape_xml(record.version)}</version>")
lines.append(f" <load_hint>{escape_xml(load_hint)}</load_hint>")
support_files = self.list_skill_supporting_files(record.name)
if support_files:
@ -205,10 +279,10 @@ class SkillsLoader:
candidates: list[dict[str, str]] = []
for record in self.list_skills(filter_unavailable=True):
frontmatter = self.get_skill_metadata(record.name) or {}
description = str(frontmatter.get("description") or "").strip()
frontmatter = record.frontmatter or self.get_skill_metadata(record.name) or {}
description = str(frontmatter.get("description") or record.description or "").strip()
if not description:
raw_content = self.load_skill(record.name) or ""
raw_content = self.load_published_skill(record.name) or ""
body = strip_frontmatter(raw_content).strip()
if body:
description = " ".join(body.splitlines()[:3])[:240].strip()
@ -216,6 +290,8 @@ class SkillsLoader:
{
"name": record.name,
"description": description or record.name,
"version": record.version,
"content_hash": record.content_hash or "",
}
)
return candidates
@ -249,7 +325,7 @@ class SkillsLoader:
if record is None:
return None
if not self._record_available(record):
frontmatter = self.get_skill_metadata(name) or {}
frontmatter = record.frontmatter or self.get_skill_metadata(name) or {}
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
missing = get_missing_requirements(meta_blob)
detail = f" Missing requirements: {missing}." if missing else ""
@ -274,7 +350,7 @@ class SkillsLoader:
result: list[str] = []
for record in self.list_skills(filter_unavailable=True):
frontmatter = self.get_skill_metadata(record.name) or {}
frontmatter = record.frontmatter or self.get_skill_metadata(record.name) or {}
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
if meta_blob.get("always") or str(frontmatter.get("always", "")).lower() == "true":
result.append(record.name)
@ -326,3 +402,8 @@ class SkillsLoader:
if record is None:
return False
return self._record_available(record)
def summarize_body(body: str) -> str:
cleaned = " ".join(line.strip() for line in body.splitlines()[:3] if line.strip()).strip()
return cleaned[:240]

View File

@ -1,2 +1,6 @@
"""Draft skills generated before review."""
"""Skill draft services."""
from .service import DraftService
__all__ = ["DraftService"]

View File

@ -0,0 +1,131 @@
"""Draft lifecycle for Beaver skills."""
from __future__ import annotations
from uuid import uuid4
from beaver.skills.specs import SkillDraft, SkillSpecStore
class DraftService:
def __init__(self, store: SkillSpecStore) -> None:
self.store = store
def create_new_skill_draft(
self,
*,
skill_name: str,
proposed_content: str,
proposed_frontmatter: dict,
created_by: str,
reason: str,
trigger_run_id: str | None = None,
trigger_session_id: str | None = None,
evidence_refs: list[dict] | None = None,
) -> SkillDraft:
draft = SkillDraft(
draft_id=uuid4().hex,
skill_name=skill_name,
base_version=None,
proposed_content=proposed_content,
proposed_frontmatter=dict(proposed_frontmatter),
created_at=_utc_now(),
created_by=created_by,
trigger_run_id=trigger_run_id,
trigger_session_id=trigger_session_id,
reason=reason,
evidence_refs=list(evidence_refs or []),
proposal_kind="new_skill",
)
self.store.write_draft(draft)
return draft
def create_revision_draft(
self,
*,
skill_name: str,
base_version: str | None,
proposed_content: str,
proposed_frontmatter: dict,
created_by: str,
reason: str,
trigger_run_id: str | None = None,
trigger_session_id: str | None = None,
evidence_refs: list[dict] | None = None,
) -> SkillDraft:
draft = SkillDraft(
draft_id=uuid4().hex,
skill_name=skill_name,
base_version=base_version,
proposed_content=proposed_content,
proposed_frontmatter=dict(proposed_frontmatter),
created_at=_utc_now(),
created_by=created_by,
trigger_run_id=trigger_run_id,
trigger_session_id=trigger_session_id,
reason=reason,
evidence_refs=list(evidence_refs or []),
proposal_kind="revise_skill",
)
self.store.write_draft(draft)
return draft
def create_merge_draft(
self,
*,
skill_name: str,
base_version: str | None,
proposed_content: str,
proposed_frontmatter: dict,
created_by: str,
reason: str,
evidence_refs: list[dict] | None = None,
) -> SkillDraft:
draft = self.create_revision_draft(
skill_name=skill_name,
base_version=base_version,
proposed_content=proposed_content,
proposed_frontmatter=proposed_frontmatter,
created_by=created_by,
reason=reason,
evidence_refs=evidence_refs,
)
draft.proposal_kind = "merge_skills"
self.store.write_draft(draft)
return draft
def create_retire_proposal(
self,
*,
skill_name: str,
base_version: str | None,
created_by: str,
reason: str,
evidence_refs: list[dict] | None = None,
) -> SkillDraft:
draft = SkillDraft(
draft_id=uuid4().hex,
skill_name=skill_name,
base_version=base_version,
proposed_content="",
proposed_frontmatter={},
created_at=_utc_now(),
created_by=created_by,
reason=reason,
evidence_refs=list(evidence_refs or []),
proposal_kind="retire_skill",
)
self.store.write_draft(draft)
return draft
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
return self.store.list_drafts(skill_name)
def get_draft(self, skill_name: str, draft_id: str) -> SkillDraft | None:
return self.store.read_draft(skill_name, draft_id)
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,24 @@
"""Skill learning loop helpers."""
from .evidence import EvidencePacket, EvidenceSelector
from .eval import SkillDraftEvaluator
from .missing_skill import MissingSkillDraftResult, MissingSkillSynthesizer
from .pipeline import SkillLearningPipelineService
from .service import RunReceiptContext, SkillLearningService
from .synthesizer import SkillDraftSynthesizer
from .worker import SkillLearningWorker, SkillLearningWorkerConfig, SkillLearningWorkerResult
__all__ = [
"EvidencePacket",
"EvidenceSelector",
"SkillDraftEvaluator",
"MissingSkillDraftResult",
"MissingSkillSynthesizer",
"RunReceiptContext",
"SkillLearningPipelineService",
"SkillDraftSynthesizer",
"SkillLearningService",
"SkillLearningWorker",
"SkillLearningWorkerConfig",
"SkillLearningWorkerResult",
]

View File

@ -0,0 +1,121 @@
"""Lightweight replay/eval reports for skill drafts."""
from __future__ import annotations
from uuid import uuid4
from beaver.engine.providers import ProviderBundle
from beaver.memory.runs import RunMemoryStore
from beaver.memory.skills import SkillDraftEvalReport, SkillLearningCandidate
from beaver.skills.specs import SkillDraft
class SkillDraftEvaluator:
"""Builds a bounded eval report without writing user-visible sessions."""
def __init__(self, run_store: RunMemoryStore) -> None:
self.run_store = run_store
async def evaluate(
self,
*,
candidate: SkillLearningCandidate,
draft: SkillDraft,
provider_bundle: ProviderBundle | None,
) -> SkillDraftEvalReport:
if provider_bundle is None or provider_bundle.main_provider is None:
return self._skipped(candidate, draft)
runs_by_id = {record.run_id: record for record in self.run_store.list_runs()}
cases: list[dict] = []
for run_id in candidate.source_run_ids[:8]:
record = runs_by_id.get(run_id)
if record is None:
continue
baseline = _score_from_validation(record.validation_result, record.success)
candidate_score = _candidate_score(baseline, draft)
cases.append(
{
"run_id": run_id,
"session_id": record.session_id,
"baseline_score": baseline,
"candidate_score": candidate_score,
"delta": round(candidate_score - baseline, 4),
}
)
if not cases:
cases.append(
{
"run_id": "",
"session_id": "",
"baseline_score": 0.75,
"candidate_score": _candidate_score(0.75, draft),
"delta": round(_candidate_score(0.75, draft) - 0.75, 4),
}
)
baseline_avg = sum(item["baseline_score"] for item in cases) / len(cases)
candidate_avg = sum(item["candidate_score"] for item in cases) / len(cases)
regressions = [item for item in cases if item["candidate_score"] < item["baseline_score"]]
improved = [item for item in cases if item["candidate_score"] > item["baseline_score"]]
unchanged = len(cases) - len(regressions) - len(improved)
score_delta = candidate_avg - baseline_avg
passed = not (len(regressions) > 0 and score_delta <= 0) and candidate_avg >= 0.75
return SkillDraftEvalReport(
report_id=uuid4().hex,
skill_name=draft.skill_name,
draft_id=draft.draft_id,
candidate_id=candidate.candidate_id,
passed=passed,
baseline_score_avg=round(baseline_avg, 4),
candidate_score_avg=round(candidate_avg, 4),
score_delta=round(score_delta, 4),
regression_count=len(regressions),
improved_count=len(improved),
unchanged_count=unchanged,
cases=cases,
status="completed",
created_at=_utc_now(),
)
def _skipped(self, candidate: SkillLearningCandidate, draft: SkillDraft) -> SkillDraftEvalReport:
return SkillDraftEvalReport(
report_id=uuid4().hex,
skill_name=draft.skill_name,
draft_id=draft.draft_id,
candidate_id=candidate.candidate_id,
passed=True,
baseline_score_avg=0.0,
candidate_score_avg=0.0,
score_delta=0.0,
regression_count=0,
improved_count=0,
unchanged_count=0,
cases=[],
status="skipped_provider_unavailable",
created_at=_utc_now(),
)
def _score_from_validation(validation: dict | None, success: bool) -> float:
if isinstance(validation, dict) and "score" in validation:
try:
return max(0.0, min(1.0, float(validation.get("score") or 0.0)))
except (TypeError, ValueError):
pass
return 0.8 if success else 0.4
def _candidate_score(baseline: float, draft: SkillDraft) -> float:
content = draft.proposed_content.strip()
if not content and draft.proposal_kind != "retire_skill":
return 0.0
if "regression" in content.lower():
return max(0.0, baseline - 0.2)
return min(1.0, max(0.75, baseline + 0.05))
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,76 @@
"""Evidence selection for skill learning."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from beaver.engine.session.manager import SessionManager
from beaver.memory.runs.store import RunMemoryStore
@dataclass(slots=True)
class EvidencePacket:
run_ids: list[str]
session_ids: list[str]
task_summaries: list[str]
session_excerpts: list[str]
metadata: dict[str, Any] = field(default_factory=dict)
class EvidenceSelector:
def __init__(self, run_store: RunMemoryStore, session_manager: SessionManager | None = None) -> None:
self.run_store = run_store
self.session_manager = session_manager
def select_runs_for_revision(self, skill_name: str, version: str, limit: int = 5) -> list[str]:
runs = self.run_store.list_runs_by_skill(skill_name, version=version, limit=limit)
return [record.run_id for record in runs]
def select_runs_for_new_skill(self, theme: str, limit: int = 5) -> list[str]:
lowered = theme.lower().strip()
matches = []
for record in self.run_store.list_runs():
if lowered and lowered not in record.task_text.lower():
continue
matches.append(record.run_id)
return matches[-limit:]
def build_evidence_packet(self, run_ids: list[str], session_ids: list[str] | None = None) -> EvidencePacket:
runs_by_id = {record.run_id: record for record in self.run_store.list_runs()}
resolved_run_ids: list[str] = []
resolved_session_ids: list[str] = list(dict.fromkeys(session_ids or []))
task_summaries: list[str] = []
session_excerpts: list[str] = []
for run_id in run_ids:
record = runs_by_id.get(run_id)
if record is None:
continue
resolved_run_ids.append(run_id)
if record.session_id not in resolved_session_ids:
resolved_session_ids.append(record.session_id)
summary = record.task_text.strip()
if summary:
task_summaries.append(summary[:400])
if self.session_manager is not None:
excerpt = self._session_excerpt(record.session_id, run_id)
if excerpt:
session_excerpts.append(excerpt)
return EvidencePacket(
run_ids=resolved_run_ids,
session_ids=resolved_session_ids,
task_summaries=task_summaries[:8],
session_excerpts=session_excerpts[:6],
metadata={"bounded": True},
)
def _session_excerpt(self, session_id: str, run_id: str) -> str:
if self.session_manager is None:
return ""
events = self.session_manager.get_run_event_records(session_id, run_id)
visible: list[str] = []
for event in events:
if not event.context_visible or not event.content:
continue
visible.append(f"{event.role}: {event.content.strip()}")
return "\n".join(visible[:12])[:2000]

View File

@ -0,0 +1,166 @@
"""Synthesize draft-only skills for missing sub-agent guidance."""
from __future__ import annotations
import json
import re
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from beaver.engine.context import SkillContext
from beaver.engine.providers import ProviderBundle
from beaver.skills.drafts import DraftService
from beaver.skills.specs import SkillDraft
from beaver.skills.specs.serialization import canonical_hash
if TYPE_CHECKING:
from beaver.tasks.models import TaskRecord
@dataclass(slots=True)
class MissingSkillDraftResult:
draft: SkillDraft
skill_context: SkillContext
class MissingSkillSynthesizer:
"""Create a draft skill and an ephemeral SkillContext for the current run."""
async def synthesize(
self,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
node_id: str,
node_task: str,
skill_query: str,
required_capabilities: list[str],
provider_bundle: ProviderBundle,
draft_service: DraftService,
) -> MissingSkillDraftResult:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
payload = self._fallback_payload(skill_query=skill_query, node_task=node_task, capabilities=required_capabilities)
try:
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"You create concise Beaver skill drafts. Return only JSON with keys: "
"skill_name, description, content, tags."
),
},
{
"role": "user",
"content": (
"Create a procedural skill draft for this missing Task sub-agent guidance.\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Node id: {node_id}\n"
f"Node task:\n{node_task}\n\n"
f"Skill query:\n{skill_query}\n"
f"Required capabilities: {required_capabilities}\n\n"
"The content must be actionable guidance for a temporary sub-agent. "
"Do not include implementation claims or publish metadata."
),
},
],
tools=None,
model=model,
max_tokens=1200,
temperature=0,
)
payload = self._parse_payload(response.content or "") or payload
except Exception:
payload = payload
skill_name = _slug(str(payload.get("skill_name") or skill_query or node_id))
content = str(payload.get("content") or "").strip()
if not content:
content = str(self._fallback_payload(skill_query=skill_query, node_task=node_task, capabilities=required_capabilities)["content"])
frontmatter = {
"description": str(payload.get("description") or f"Draft guidance for {skill_query or node_id}").strip(),
"tags": [str(item) for item in payload.get("tags") or ["generated", "task-sub-agent"]],
"metadata": {
"origin": "missing_task_subagent_skill",
"task_id": task.task_id,
"node_id": node_id,
"attempt_index": attempt_index,
"skill_query": skill_query,
"required_capabilities": list(required_capabilities),
},
}
draft = draft_service.create_new_skill_draft(
skill_name=skill_name,
proposed_content=content,
proposed_frontmatter=frontmatter,
created_by="task-skill-resolver",
reason="generated_for_missing_task_subagent_skill",
trigger_session_id=task.session_id,
evidence_refs=[
{
"task_id": task.task_id,
"session_id": task.session_id,
"attempt_index": attempt_index,
"node_id": node_id,
"skill_query": skill_query,
"required_capabilities": list(required_capabilities),
}
],
)
context = SkillContext(
name=f"draft:{draft.skill_name}",
content=draft.proposed_content,
version=f"draft:{draft.draft_id}",
content_hash=canonical_hash(draft.proposed_content),
activation_reason="generated_missing_skill",
tool_hints=[],
)
return MissingSkillDraftResult(draft=draft, skill_context=context)
@staticmethod
def _parse_payload(text: str) -> dict[str, Any] | None:
cleaned = text.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
try:
payload = json.loads(cleaned)
except json.JSONDecodeError:
return None
return payload if isinstance(payload, dict) else None
@staticmethod
def _fallback_payload(*, skill_query: str, node_task: str, capabilities: list[str]) -> dict[str, Any]:
title = skill_query or node_task or "task subagent guidance"
capability_lines = "\n".join(f"- {item}" for item in capabilities) or "- Follow the node task precisely."
return {
"skill_name": _slug(title),
"description": f"Draft guidance for {title}.",
"tags": ["generated", "task-sub-agent"],
"content": (
f"# {title}\n\n"
"Use this draft guidance only for the current delegated sub-task.\n\n"
"## Objective\n"
f"{node_task or title}\n\n"
"## Capabilities to apply\n"
f"{capability_lines}\n\n"
"## Output\n"
"Return concise evidence, decisions, and unresolved risks for the main Agent to synthesize."
),
}
def _slug(value: str) -> str:
cleaned = re.sub(r"[^a-zA-Z0-9]+", "-", value.strip().lower()).strip("-")
return cleaned[:64].strip("-") or "generated-task-subagent-skill"

View File

@ -0,0 +1,354 @@
"""Manual skill learning pipeline orchestration."""
from __future__ import annotations
from typing import Any
from beaver.engine.providers import ProviderBundle
from beaver.memory.skills import SkillDraftEvalReport, SkillDraftSafetyReport, SkillLearningCandidate, SkillLearningStore
from beaver.skills.drafts import DraftService
from beaver.skills.learning.eval import SkillDraftEvaluator
from beaver.skills.learning.service import SkillLearningService
from beaver.skills.learning.safety import SkillDraftSafetyChecker
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillDraft, SkillReviewRecord, SkillReviewState, SkillSpec, SkillVersion
class SkillLearningPipelineService:
"""Coordinates candidate -> draft -> review -> publish lifecycle."""
def __init__(
self,
*,
learning_store: SkillLearningStore,
learning_service: SkillLearningService,
draft_service: DraftService,
review_service: ReviewService,
publisher: SkillPublisher,
safety_checker: SkillDraftSafetyChecker | None = None,
evaluator: SkillDraftEvaluator | None = None,
) -> None:
self.learning_store = learning_store
self.learning_service = learning_service
self.draft_service = draft_service
self.review_service = review_service
self.publisher = publisher
self.safety_checker = safety_checker or SkillDraftSafetyChecker()
self.evaluator = evaluator
def list_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
return self.learning_store.list_learning_candidates(status=status)
def get_candidate(self, candidate_id: str) -> SkillLearningCandidate:
for candidate in self.learning_store.list_learning_candidates():
if candidate.candidate_id == candidate_id:
return candidate
raise ValueError(f"Unknown learning candidate: {candidate_id}")
async def synthesize_draft(
self,
candidate_id: str,
*,
provider_bundle: ProviderBundle,
) -> SkillDraft:
draft = await self.learning_service.synthesize_draft(candidate_id, provider_bundle)
self.mark_draft_synthesized(candidate_id, draft)
return draft
async def regenerate_draft(
self,
candidate_id: str,
*,
provider_bundle: ProviderBundle,
) -> SkillDraft:
self.learning_store.transition_learning_candidate(
candidate_id,
"synthesizing",
event_type="draft_synthesis_started",
last_error=None,
)
return await self.synthesize_draft(candidate_id, provider_bundle=provider_bundle)
def mark_candidate_queued(self, candidate_id: str) -> SkillLearningCandidate:
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"queued",
event_type="candidate_queued",
last_error=None,
),
candidate_id,
)
def mark_candidate_synthesizing(self, candidate_id: str) -> SkillLearningCandidate:
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"synthesizing",
event_type="draft_synthesis_started",
last_error=None,
),
candidate_id,
)
def mark_draft_synthesized(self, candidate_id: str, draft: SkillDraft) -> SkillLearningCandidate:
candidate = self.get_candidate(candidate_id)
evidence = dict(candidate.evidence)
evidence["draft_id"] = draft.draft_id
evidence["draft_skill_name"] = draft.skill_name
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"draft_ready",
event_type="draft_synthesis_completed",
evidence=evidence,
draft_id=draft.draft_id,
draft_skill_name=draft.skill_name,
risk_level=candidate.risk_level,
last_error=None,
payload={"draft_id": draft.draft_id, "skill_name": draft.skill_name},
),
candidate_id,
)
def mark_candidate_failed(
self,
candidate_id: str,
error: str,
*,
retry_count: int,
terminal: bool,
) -> SkillLearningCandidate:
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"failed" if terminal else "open",
event_type="failed",
retry_count=retry_count,
last_error=error,
payload={"error": error, "terminal": terminal, "retry_count": retry_count},
),
candidate_id,
)
def mark_candidate_superseded(self, candidate_id: str, reason: str) -> SkillLearningCandidate:
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"superseded",
event_type="superseded",
last_error=reason,
payload={"reason": reason},
),
candidate_id,
)
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
return self.draft_service.list_drafts(skill_name)
def get_draft(self, skill_name: str, draft_id: str) -> SkillDraft:
draft = self.draft_service.get_draft(skill_name, draft_id)
if draft is None:
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
return draft
def submit_review(
self,
skill_name: str,
draft_id: str,
*,
requested_by: str = "system",
notes: str = "",
) -> SkillReviewRecord:
safety = self.get_safety_report(skill_name, draft_id)
if safety is not None and (not safety.passed or safety.risk_level == "critical"):
raise ValueError("Draft cannot enter review because safety check failed")
return self.review_service.submit_for_review(
skill_name,
draft_id,
reviewer_request=notes,
requested_by=requested_by,
)
def approve(
self,
skill_name: str,
draft_id: str,
*,
reviewer: str = "system",
notes: str = "",
) -> SkillReviewRecord:
review = self.review_service.approve(skill_name, draft_id, reviewer=reviewer, notes=notes)
self._mark_candidate_by_draft(skill_name, draft_id, "approved", "approved")
return review
def reject(
self,
skill_name: str,
draft_id: str,
*,
reviewer: str = "system",
notes: str = "",
) -> SkillReviewRecord:
review = self.review_service.reject(skill_name, draft_id, reviewer=reviewer, notes=notes)
self._mark_candidate_by_draft(skill_name, draft_id, "rejected", "rejected")
return review
def publish(
self,
skill_name: str,
draft_id: str,
*,
publisher: str = "system",
notes: str = "",
confirm_high_risk: bool = False,
) -> SkillVersion | SkillSpec:
draft = self.get_draft(skill_name, draft_id)
self._validate_publish_gates(draft, confirm_high_risk=confirm_high_risk)
if draft.proposal_kind == "retire_skill":
result = self.publisher.apply_retire_proposal(skill_name, draft_id, actor=publisher, notes=notes)
else:
result = self.publisher.publish(skill_name, draft_id, publisher=publisher, notes=notes)
self._mark_candidate_by_draft(skill_name, draft_id, "published", "published")
return result
def rollback(
self,
skill_name: str,
target_version: str,
*,
actor: str = "system",
reason: str = "",
) -> SkillSpec:
return self.publisher.rollback(skill_name, target_version, actor=actor, reason=reason or "manual rollback")
def disable(
self,
skill_name: str,
*,
actor: str = "system",
reason: str = "",
) -> SkillSpec:
return self.publisher.disable(skill_name, actor=actor, reason=reason or "manual disable")
def reviews_for_draft(self, skill_name: str, draft_id: str) -> list[SkillReviewRecord]:
return self.review_service.store.list_reviews(skill_name, draft_id=draft_id)
def check_safety(self, skill_name: str, draft_id: str) -> SkillDraftSafetyReport:
draft = self.get_draft(skill_name, draft_id)
report = self.safety_checker.check(draft)
self.learning_store.write_safety_report(report)
status = "safety_failed" if not report.passed or report.risk_level == "critical" else "draft_ready"
current = self._candidate_by_draft(skill_name, draft_id)
if current is not None and current.status == "eval_failed" and status == "draft_ready":
status = "eval_failed"
self._mark_candidate_by_draft(
skill_name,
draft_id,
status,
"safety_checked",
safety_report_id=report.report_id,
risk_level=report.risk_level,
last_error="; ".join(report.blocked_reasons) if status == "safety_failed" else None,
)
return report
def get_safety_report(self, skill_name: str, draft_id: str) -> SkillDraftSafetyReport | None:
return self.learning_store.get_safety_report(skill_name, draft_id)
def get_eval_report(self, skill_name: str, draft_id: str) -> SkillDraftEvalReport | None:
return self.learning_store.get_eval_report(skill_name, draft_id)
async def evaluate_draft(
self,
candidate_id: str,
skill_name: str,
draft_id: str,
*,
provider_bundle: ProviderBundle | None,
) -> SkillDraftEvalReport:
draft = self.get_draft(skill_name, draft_id)
candidate = self.get_candidate(candidate_id)
evaluator = self.evaluator or SkillDraftEvaluator(self.learning_service.run_store)
report = await evaluator.evaluate(candidate=candidate, draft=draft, provider_bundle=provider_bundle)
self.learning_store.write_eval_report(report)
if report.status == "skipped_provider_unavailable":
status = "draft_ready"
error = "eval skipped: provider unavailable"
elif report.passed:
status = "draft_ready"
error = None
else:
status = "eval_failed"
error = "eval failed"
current = self._candidate_by_draft(skill_name, draft_id)
if current is not None and current.status == "safety_failed" and status == "draft_ready":
status = "safety_failed"
error = current.last_error
self.learning_store.transition_learning_candidate(
candidate_id,
status,
event_type="eval_completed",
eval_report_id=report.report_id,
last_error=error,
payload=report.to_dict(),
)
return report
def _validate_publish_gates(self, draft: SkillDraft, *, confirm_high_risk: bool) -> None:
reviews = self.reviews_for_draft(draft.skill_name, draft.draft_id)
if not any(review.status == SkillReviewState.APPROVED.value for review in reviews):
raise ValueError("Draft must have an approved review before publish")
safety = self.get_safety_report(draft.skill_name, draft.draft_id)
if safety is None:
raise ValueError("Draft requires a passing safety report before publish")
if not safety.passed:
raise ValueError("Draft safety report did not pass")
if safety.risk_level == "critical":
raise ValueError("Critical risk drafts cannot be published")
if safety.risk_level == "high" and not confirm_high_risk:
raise ValueError("High risk draft publish requires confirm_high_risk=true")
eval_report = self.get_eval_report(draft.skill_name, draft.draft_id)
if eval_report is not None and eval_report.status != "skipped_provider_unavailable" and not eval_report.passed:
raise ValueError("Draft eval report did not pass")
def _mark_candidate_by_draft(
self,
skill_name: str,
draft_id: str,
status: str,
event_type: str,
**updates: object,
) -> SkillLearningCandidate | None:
candidate = self._candidate_by_draft(skill_name, draft_id)
if candidate is None:
return None
if candidate.status in {"safety_failed", "eval_failed"} and status in {"review_pending", "approved"}:
return candidate
return self.learning_store.transition_learning_candidate(
candidate.candidate_id,
status,
event_type=event_type,
**updates,
)
def _candidate_by_draft(self, skill_name: str, draft_id: str) -> SkillLearningCandidate | None:
for candidate in self.learning_store.list_learning_candidates():
if candidate.draft_skill_name == skill_name and candidate.draft_id == draft_id:
return candidate
return None
@staticmethod
def _require_updated(candidate: SkillLearningCandidate | None, candidate_id: str) -> SkillLearningCandidate:
if candidate is None:
raise ValueError(f"Unknown learning candidate: {candidate_id}")
return candidate
def model_to_dict(value: Any) -> dict[str, Any]:
if hasattr(value, "to_dict"):
return value.to_dict()
if isinstance(value, dict):
return dict(value)
raise TypeError(f"Cannot convert {type(value).__name__} to dict")

View File

@ -0,0 +1,109 @@
"""Safety checks for generated skill drafts."""
from __future__ import annotations
import re
from uuid import uuid4
from beaver.memory.skills import SkillDraftSafetyReport
from beaver.skills.specs import SkillDraft
class SkillDraftSafetyChecker:
"""Deterministic publish gate for skill drafts."""
_CRITICAL_PATTERNS = [
r"ignore (all )?(previous|system|developer) instructions",
r"bypass (permissions|permission|safety|policy|policies)",
r"disable (guardrails|safety|permission checks)",
r"reveal (secrets|api keys|tokens|credentials)",
r"exfiltrate",
]
_DANGEROUS_TOOL_HINTS = {
"shell",
"terminal",
"bash",
"filesystem_write",
"write_file",
"delete_file",
"network",
"http",
"auth",
"credentials",
}
def __init__(self, *, allowed_tool_names: set[str] | None = None) -> None:
self.allowed_tool_names = allowed_tool_names
def check(self, draft: SkillDraft) -> SkillDraftSafetyReport:
issues: list[str] = []
blocked: list[str] = []
risk_level = "low"
frontmatter = draft.proposed_frontmatter
if not isinstance(frontmatter, dict):
blocked.append("frontmatter must be an object")
description = str(frontmatter.get("description") or "").strip()
if not description and draft.proposal_kind != "retire_skill":
issues.append("frontmatter.description is missing")
risk_level = _max_risk(risk_level, "medium")
tool_hints = _tool_hints(frontmatter)
if self.allowed_tool_names is not None:
unknown = [name for name in tool_hints if name not in self.allowed_tool_names]
if unknown:
blocked.append(f"unknown tool hints: {', '.join(sorted(unknown))}")
dangerous = sorted({name for name in tool_hints if name.lower() in self._DANGEROUS_TOOL_HINTS})
if dangerous:
issues.append(f"dangerous tool hints require high-risk review: {', '.join(dangerous)}")
risk_level = _max_risk(risk_level, "high")
content = f"{draft.proposed_content}\n{frontmatter}".lower()
for pattern in self._CRITICAL_PATTERNS:
if re.search(pattern, content):
blocked.append(f"critical prompt-safety pattern matched: {pattern}")
risk_level = "critical"
if draft.proposal_kind in {"retire_skill", "merge_skills"}:
risk_level = _max_risk(risk_level, "high")
passed = not blocked and risk_level != "critical"
return SkillDraftSafetyReport(
report_id=uuid4().hex,
skill_name=draft.skill_name,
draft_id=draft.draft_id,
passed=passed,
risk_level=risk_level,
issues=issues,
blocked_reasons=blocked,
suggested_fix=_suggest_fix(blocked, issues),
created_at=_utc_now(),
)
def _tool_hints(frontmatter: dict) -> list[str]:
raw = frontmatter.get("tools")
if isinstance(raw, list):
return [str(item).strip() for item in raw if str(item).strip()]
if isinstance(raw, str):
return [item.strip() for item in raw.split(",") if item.strip()]
return []
def _max_risk(left: str, right: str) -> str:
order = {"low": 0, "medium": 1, "high": 2, "critical": 3}
return left if order[left] >= order[right] else right
def _suggest_fix(blocked: list[str], issues: list[str]) -> str:
if blocked:
return "Remove blocked instructions or invalid tool hints before review."
if issues:
return "Review the flagged issues before publishing."
return ""
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,293 @@
"""Skill learning loop services."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from itertools import combinations
import re
from typing import Any
from uuid import uuid4
from beaver.engine.providers import ProviderBundle
from beaver.memory.runs.models import RunRecord, SkillEffectRecord
from beaver.memory.runs.store import RunMemoryStore
from beaver.memory.skills.models import SkillLearningCandidate, SkillPerformanceSnapshot
from beaver.memory.skills.store import SkillLearningStore
from beaver.skills.drafts.service import DraftService
from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector
from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
from beaver.skills.specs import SkillActivationReceipt
@dataclass(slots=True)
class RunReceiptContext:
run_record: RunRecord
effect_records: list[SkillEffectRecord] = field(default_factory=list)
class SkillLearningService:
def __init__(
self,
*,
run_store: RunMemoryStore,
learning_store: SkillLearningStore,
draft_service: DraftService,
evidence_selector: EvidenceSelector,
synthesizer: SkillDraftSynthesizer | None = None,
) -> None:
self.run_store = run_store
self.learning_store = learning_store
self.draft_service = draft_service
self.evidence_selector = evidence_selector
self.synthesizer = synthesizer or SkillDraftSynthesizer()
def collect_run_receipts(
self,
run_result_context: RunReceiptContext,
*,
generate_candidates: bool = True,
) -> list[SkillLearningCandidate]:
self.run_store.append_run_record(run_result_context.run_record)
for effect in run_result_context.effect_records:
self.run_store.append_skill_effect(effect)
self.rescore_skill_versions()
if not generate_candidates:
return []
return self.build_learning_candidates()
def build_learning_candidates(self) -> list[SkillLearningCandidate]:
candidates: list[SkillLearningCandidate] = []
candidates.extend(self._build_revision_candidates())
candidates.extend(self._build_new_skill_candidates())
candidates.extend(self._build_merge_candidates())
candidates.extend(self._build_retire_candidates())
existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()}
for candidate in candidates:
if candidate.candidate_id not in existing_ids:
self.learning_store.record_learning_candidate(candidate)
existing_ids.add(candidate.candidate_id)
return candidates
async def synthesize_draft(self, candidate_id: str, provider_bundle: ProviderBundle) -> Any:
candidates = {item.candidate_id: item for item in self.learning_store.list_learning_candidates()}
candidate = candidates.get(candidate_id)
if candidate is None:
raise ValueError(f"Unknown learning candidate: {candidate_id}")
if candidate.kind == "retire_skill":
target_skill = candidate.related_skill_names[0]
return self.draft_service.create_retire_proposal(
skill_name=target_skill,
base_version=candidate.evidence.get("skill_version"),
created_by="learning-loop",
reason=candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids)
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
model = (
provider_bundle.auxiliary_runtime.model
if provider_bundle.auxiliary_runtime is not None
else provider_bundle.main_runtime.model
)
if candidate.kind == "new_skill":
payload = await self.synthesizer.synthesize_new_skill(candidate, packet, provider, model)
return self.draft_service.create_new_skill_draft(
skill_name=self._suggest_skill_name(candidate, packet),
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
if candidate.kind == "merge_skills":
target_name = self._suggest_skill_name(candidate, packet)
payload = await self.synthesizer.synthesize_merge(candidate, packet, provider, model)
return self.draft_service.create_merge_draft(
skill_name=target_name,
base_version=None,
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
target_skill = candidate.related_skill_names[0]
base_version = candidate.evidence.get("skill_version")
payload = await self.synthesizer.synthesize_revision(candidate, packet, provider, model)
return self.draft_service.create_revision_draft(
skill_name=target_skill,
base_version=base_version,
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]:
snapshots: list[SkillPerformanceSnapshot] = []
grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {}
for record in self.run_store.list_runs():
for receipt in record.activated_skills:
key = (receipt.skill_name, receipt.skill_version)
grouped.setdefault(key, [])
for effect in self._all_effects():
grouped.setdefault((effect.skill_name, effect.skill_version), []).append(effect)
for (skill_name, skill_version), effects in grouped.items():
activation_count = len(effects)
success_count = sum(1 for item in effects if item.success)
failure_count = activation_count - success_count
last_feedback = next((item.feedback_score for item in reversed(effects) if item.feedback_score is not None), None)
latest_used = effects[-1].created_at if effects else ""
snapshot = SkillPerformanceSnapshot(
skill_name=skill_name,
skill_version=skill_version,
activation_count=activation_count,
success_count=success_count,
failure_count=failure_count,
latest_used_at=latest_used,
last_feedback_score=last_feedback,
)
self.learning_store.update_performance_snapshot(snapshot)
snapshots.append(snapshot)
return snapshots
def _build_revision_candidates(self) -> list[SkillLearningCandidate]:
candidates: list[SkillLearningCandidate] = []
for snapshot in self.learning_store.list_low_performing_versions():
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=5)
if len(runs) < 2:
continue
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("revise", snapshot.skill_name, snapshot.skill_version),
kind="revise_skill",
source_run_ids=[record.run_id for record in runs],
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
related_skill_names=[snapshot.skill_name],
reason=f"Skill version {snapshot.skill_name}/{snapshot.skill_version} is underperforming across repeated runs.",
evidence={"skill_version": snapshot.skill_version},
status="open",
)
candidates.append(candidate)
return candidates
def _build_new_skill_candidates(self) -> list[SkillLearningCandidate]:
groups: dict[str, list[RunRecord]] = {}
for record in self.run_store.list_runs():
key = self._task_theme(record.task_text)
if not key:
continue
groups.setdefault(key, []).append(record)
candidates: list[SkillLearningCandidate] = []
for theme, runs in groups.items():
successful = [record for record in runs if record.success]
if len(successful) < 2:
continue
if any(record.activated_skills for record in successful):
continue
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("new", theme, str(len(successful))),
kind="new_skill",
source_run_ids=[record.run_id for record in successful[-5:]],
source_session_ids=list(dict.fromkeys(record.session_id for record in successful[-5:])),
related_skill_names=[],
reason=f"Repeated successful tasks around '{theme}' suggest a reusable skill should be created.",
evidence={"theme": theme},
status="open",
)
candidates.append(candidate)
return candidates
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
for record in self.run_store.list_runs():
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
for pair in combinations(unique, 2):
pair_counts.setdefault(pair, []).append(record)
candidates: list[SkillLearningCandidate] = []
for pair, runs in pair_counts.items():
if len(runs) < 2:
continue
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("merge", *pair),
kind="merge_skills",
source_run_ids=[record.run_id for record in runs[-5:]],
source_session_ids=list(dict.fromkeys(record.session_id for record in runs[-5:])),
related_skill_names=list(pair),
reason=f"Skills {pair[0]} and {pair[1]} repeatedly co-activate and may benefit from consolidation.",
evidence={"pair": list(pair)},
status="open",
)
candidates.append(candidate)
return candidates
def _build_retire_candidates(self, *, stale_days: int = 30) -> list[SkillLearningCandidate]:
candidates: list[SkillLearningCandidate] = []
cutoff = datetime.now(timezone.utc) - timedelta(days=stale_days)
for snapshot in self.learning_store.list_performance_snapshots():
if snapshot.activation_count == 0 or not snapshot.latest_used_at:
continue
latest_used = self._parse_timestamp(snapshot.latest_used_at)
if latest_used is None or latest_used > cutoff:
continue
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=3)
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("retire", snapshot.skill_name, snapshot.skill_version),
kind="retire_skill",
source_run_ids=[record.run_id for record in runs],
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
related_skill_names=[snapshot.skill_name],
reason=(
f"Skill version {snapshot.skill_name}/{snapshot.skill_version} has been inactive "
f"since {snapshot.latest_used_at} and may be ready for retirement."
),
evidence={"skill_version": snapshot.skill_version, "latest_used_at": snapshot.latest_used_at},
status="open",
)
candidates.append(candidate)
return candidates
def _all_effects(self) -> list[SkillEffectRecord]:
effects: list[SkillEffectRecord] = []
for candidate in self.learning_store.list_performance_snapshots():
effects.extend(self.run_store.list_skill_effects(candidate.skill_name, version=candidate.skill_version))
if effects:
return effects
# Bootstrap from runs when there are no prior snapshots.
for record in self.run_store.list_runs():
for receipt in record.activated_skills:
effects.extend(self.run_store.list_skill_effects(receipt.skill_name, version=receipt.skill_version))
return effects
@staticmethod
def _candidate_id(kind: str, *parts: str) -> str:
return f"{kind}:{'|'.join(parts)}"
@staticmethod
def _task_theme(task_text: str) -> str:
cleaned = re.sub(r"\s+", " ", task_text.strip().lower())
if not cleaned:
return ""
words = cleaned.split(" ")
return " ".join(words[:8]).strip()
@staticmethod
def _suggest_skill_name(candidate: SkillLearningCandidate, packet: EvidencePacket) -> str:
if candidate.related_skill_names:
return candidate.related_skill_names[0]
if packet.task_summaries:
seed = re.sub(r"[^a-z0-9]+", "-", packet.task_summaries[0].lower()).strip("-")
if seed:
return seed[:48]
return f"generated-skill-{uuid4().hex[:8]}"
@staticmethod
def _parse_timestamp(value: str) -> datetime | None:
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return None
if parsed.tzinfo is None:
return parsed.replace(tzinfo=timezone.utc)
return parsed.astimezone(timezone.utc)

View File

@ -0,0 +1,118 @@
"""LLM-backed draft synthesis for skill learning."""
from __future__ import annotations
import json
from typing import Any
from beaver.engine.providers.base import LLMProvider
from beaver.skills.learning.evidence import EvidencePacket
from beaver.memory.skills.models import SkillLearningCandidate
class SkillDraftSynthesizer:
async def synthesize_revision(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
) -> dict[str, Any]:
return await self._synthesize(candidate, evidence_packet, provider, model, "revise")
async def synthesize_new_skill(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
) -> dict[str, Any]:
return await self._synthesize(candidate, evidence_packet, provider, model, "new")
async def synthesize_merge(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
) -> dict[str, Any]:
return await self._synthesize(candidate, evidence_packet, provider, model, "merge")
async def _synthesize(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
action: str,
) -> dict[str, Any]:
prompt = self._build_prompt(candidate, evidence_packet, action)
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"You synthesize Beaver skill drafts from execution evidence. "
"Return only JSON with keys: frontmatter, content, change_reason."
),
},
{"role": "user", "content": prompt},
],
tools=None,
model=model,
max_tokens=1500,
temperature=0,
)
payload = self._parse_payload(response.content or "")
if payload:
return payload
return self._fallback_payload(candidate, evidence_packet, action)
@staticmethod
def _build_prompt(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> str:
return (
f"Action: {action}\n"
f"Candidate kind: {candidate.kind}\n"
f"Reason: {candidate.reason}\n"
f"Related skills: {candidate.related_skill_names}\n"
f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries)
+ "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts)
+ "\n\nReturn JSON only."
)
@staticmethod
def _parse_payload(content: str) -> dict[str, Any]:
cleaned = content.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
try:
payload = json.loads(cleaned)
except json.JSONDecodeError:
return {}
if not isinstance(payload, dict):
return {}
frontmatter = payload.get("frontmatter")
content_value = payload.get("content")
if not isinstance(frontmatter, dict) or not isinstance(content_value, str):
return {}
return {
"frontmatter": frontmatter,
"content": content_value.strip(),
"change_reason": str(payload.get("change_reason") or ""),
}
@staticmethod
def _fallback_payload(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> dict[str, Any]:
related = candidate.related_skill_names[0] if candidate.related_skill_names else "generated-skill"
title = related.replace("_", "-")
content = "\n".join(f"- {item}" for item in evidence_packet.task_summaries[:5]) or "- No evidence captured."
return {
"frontmatter": {
"description": candidate.reason or f"Auto-generated {action} draft for {title}.",
"tools": [],
},
"content": f"# {title}\n\n## Evidence\n\n{content}\n",
"change_reason": candidate.reason or f"Fallback {action} synthesis.",
}

View File

@ -0,0 +1,175 @@
"""Background worker for assisted skill learning."""
from __future__ import annotations
import asyncio
import os
from dataclasses import dataclass, field
from typing import Callable
from beaver.engine.providers import ProviderBundle
from beaver.memory.skills import SkillLearningCandidate
from beaver.skills.learning.pipeline import SkillLearningPipelineService
@dataclass(slots=True)
class SkillLearningWorkerConfig:
enabled: bool = True
max_drafts_per_run: int = 5
max_retries: int = 3
interval_seconds: float = 300.0
@classmethod
def from_env(cls) -> "SkillLearningWorkerConfig":
return cls(
enabled=_env_bool("BEAVER_SKILL_LEARNING_WORKER_ENABLED", True),
max_drafts_per_run=_env_int("BEAVER_SKILL_LEARNING_MAX_DRAFTS_PER_RUN", 5),
max_retries=_env_int("BEAVER_SKILL_LEARNING_MAX_RETRIES", 3),
interval_seconds=float(os.getenv("BEAVER_SKILL_LEARNING_INTERVAL_SECONDS", "300") or "300"),
)
@dataclass(slots=True)
class SkillLearningWorkerResult:
processed: int = 0
succeeded: int = 0
failed: int = 0
skipped: int = 0
failures: list[dict[str, str]] = field(default_factory=list)
def to_dict(self) -> dict:
return {
"processed": self.processed,
"succeeded": self.succeeded,
"failed": self.failed,
"skipped": self.skipped,
"failures": [dict(item) for item in self.failures],
}
class SkillLearningWorker:
"""Synthesizes drafts for open candidates; never approves or publishes."""
_ACTIVE_DRAFT_STATUSES = {"queued", "synthesizing", "draft_ready", "review_pending", "approved"}
def __init__(
self,
*,
pipeline: SkillLearningPipelineService,
provider_bundle_factory: Callable[[], ProviderBundle],
config: SkillLearningWorkerConfig | None = None,
) -> None:
self.pipeline = pipeline
self.provider_bundle_factory = provider_bundle_factory
self.config = config or SkillLearningWorkerConfig.from_env()
self._running = False
self._lock = asyncio.Lock()
async def run_forever(self) -> None:
if not self.config.enabled:
return
self._running = True
try:
while self._running:
await self.run_once()
await asyncio.sleep(self.config.interval_seconds)
finally:
self._running = False
def stop(self) -> None:
self._running = False
async def run_once(self) -> SkillLearningWorkerResult:
if not self.config.enabled:
return SkillLearningWorkerResult()
async with self._lock:
result = SkillLearningWorkerResult()
candidates = self._select_candidates()
for candidate in candidates[: self.config.max_drafts_per_run]:
result.processed += 1
try:
handled = await self._process_candidate(candidate)
if handled:
result.succeeded += 1
else:
result.skipped += 1
except Exception as exc:
result.failed += 1
result.failures.append({"candidate_id": candidate.candidate_id, "error": str(exc)})
self._mark_failure(candidate, str(exc))
return result
def _select_candidates(self) -> list[SkillLearningCandidate]:
candidates = [
item
for item in self.pipeline.list_candidates()
if item.status == "open" and item.retry_count < self.config.max_retries
]
return sorted(candidates, key=lambda item: (item.priority, item.confidence, item.created_at), reverse=True)
async def _process_candidate(self, candidate: SkillLearningCandidate) -> bool:
if self._has_active_draft(candidate):
self.pipeline.mark_candidate_superseded(candidate.candidate_id, "active draft already exists for this skill")
return False
self.pipeline.mark_candidate_queued(candidate.candidate_id)
self.pipeline.mark_candidate_synthesizing(candidate.candidate_id)
draft = await self.pipeline.synthesize_draft(
candidate.candidate_id,
provider_bundle=self.provider_bundle_factory(),
)
self.pipeline.mark_draft_synthesized(candidate.candidate_id, draft)
safety = self.pipeline.check_safety(draft.skill_name, draft.draft_id)
if not safety.passed or safety.risk_level == "critical":
return True
await self.pipeline.evaluate_draft(
candidate.candidate_id,
draft.skill_name,
draft.draft_id,
provider_bundle=self.provider_bundle_factory(),
)
return True
def _has_active_draft(self, candidate: SkillLearningCandidate) -> bool:
target_names = set(candidate.related_skill_names)
if candidate.draft_skill_name:
target_names.add(candidate.draft_skill_name)
if not target_names:
return False
for item in self.pipeline.list_candidates():
if item.candidate_id == candidate.candidate_id:
continue
if item.status not in self._ACTIVE_DRAFT_STATUSES:
continue
item_names = set(item.related_skill_names)
if item.draft_skill_name:
item_names.add(item.draft_skill_name)
if target_names.intersection(item_names):
return True
return False
def _mark_failure(self, candidate: SkillLearningCandidate, error: str) -> None:
retry_count = candidate.retry_count + 1
status = "failed" if retry_count >= self.config.max_retries else "open"
self.pipeline.mark_candidate_failed(
candidate.candidate_id,
error,
retry_count=retry_count,
terminal=(status == "failed"),
)
def _env_bool(name: str, default: bool) -> bool:
raw = os.getenv(name)
if raw is None:
return default
return raw.strip().lower() not in {"0", "false", "no", "off"}
def _env_int(name: str, default: int) -> int:
raw = os.getenv(name)
if raw in (None, ""):
return default
try:
return int(raw)
except ValueError:
return default

View File

@ -1,2 +1,6 @@
"""Skill publishing and version switching."""
"""Skill publish and rollback services."""
from .service import SkillPublisher
__all__ = ["SkillPublisher"]

View File

@ -0,0 +1,188 @@
"""Publishing, retirement, and rollback flows for Beaver skills."""
from __future__ import annotations
from beaver.skills.catalog.utils import strip_frontmatter
from beaver.skills.specs import SkillDraft, SkillReviewState, SkillSpec, SkillSpecStore, SkillStatus, SkillVersion
from beaver.skills.specs.serialization import canonical_hash, normalize_frontmatter, summarize_skill_content
class SkillPublisher:
def __init__(self, store: SkillSpecStore) -> None:
self.store = store
def publish(self, skill_name: str, draft_id: str, publisher: str, notes: str = "") -> SkillVersion:
draft = self._require_draft(skill_name, draft_id)
if draft.status != SkillReviewState.APPROVED.value:
raise ValueError("Draft must be approved before publish")
if draft.proposal_kind == "retire_skill":
raise ValueError("Retire proposals must be applied through apply_retire_proposal")
next_version = self._next_version(skill_name)
content = self._render_skill_content(draft.proposed_frontmatter, draft.proposed_content)
body = strip_frontmatter(content).strip()
if not body:
raise ValueError("Published skill content cannot be empty")
version = SkillVersion(
skill_name=skill_name,
version=next_version,
content_hash=canonical_hash(content),
summary_hash=canonical_hash(body),
created_at=_utc_now(),
created_by=publisher,
change_reason=notes or draft.reason,
parent_version=draft.base_version,
review_state=SkillReviewState.PUBLISHED.value,
frontmatter=normalize_frontmatter(draft.proposed_frontmatter),
summary=summarize_skill_content(body),
tool_hints=self.store._extract_tool_hints(normalize_frontmatter(draft.proposed_frontmatter)),
provenance={
"draft_id": draft_id,
"proposal_kind": draft.proposal_kind,
"trigger_run_id": draft.trigger_run_id,
"trigger_session_id": draft.trigger_session_id,
},
)
self.store.write_skill_version(version, content)
self.store.set_current_version(skill_name, next_version)
spec = self.store.get_skill_spec(skill_name)
if spec is None:
description = str(version.frontmatter.get("description") or skill_name)
spec = SkillSpec(
name=skill_name,
display_name=skill_name,
description=description,
created_at=_utc_now(),
updated_at=_utc_now(),
current_version=next_version,
status=SkillStatus.ACTIVE.value,
tags=[],
owners=[publisher],
source_kind="managed",
lineage=[],
)
else:
spec.current_version = next_version
spec.updated_at = _utc_now()
spec.status = SkillStatus.ACTIVE.value
if not spec.description:
spec.description = str(version.frontmatter.get("description") or skill_name)
self.store.write_skill_spec(spec)
draft.status = SkillReviewState.PUBLISHED.value
self.store.write_draft(draft)
self._refresh_indexes(skill_name, spec.status)
return version
def apply_retire_proposal(self, skill_name: str, draft_id: str, actor: str, notes: str = "") -> SkillSpec:
draft = self._require_draft(skill_name, draft_id)
if draft.status != SkillReviewState.APPROVED.value:
raise ValueError("Retire proposal must be approved before apply")
if draft.proposal_kind != "retire_skill":
raise ValueError("Only retire_skill proposals can be applied as retire proposals")
spec = self._require_spec(skill_name)
if draft.base_version and spec.current_version and draft.base_version != spec.current_version:
raise ValueError(
f"Retire proposal targets {draft.base_version}, but current version is {spec.current_version}"
)
reason = notes or draft.reason
spec.status = SkillStatus.DISABLED.value
spec.updated_at = _utc_now()
if actor and actor not in spec.owners:
spec.owners.append(actor)
spec.lineage.append(f"retire_proposal:{draft_id}:{reason}")
self.store.write_skill_spec(spec)
draft.status = SkillReviewState.DISABLED.value
self.store.write_draft(draft)
self._refresh_indexes(skill_name, spec.status)
return spec
def disable(self, skill_name: str, actor: str, reason: str) -> SkillSpec:
spec = self._require_spec(skill_name)
spec.status = SkillStatus.DISABLED.value
spec.updated_at = _utc_now()
if actor and actor not in spec.owners:
spec.owners.append(actor)
if reason:
spec.lineage.append(f"disabled:{reason}")
self.store.write_skill_spec(spec)
self._refresh_indexes(skill_name, spec.status)
return spec
def rollback(self, skill_name: str, target_version: str, actor: str, reason: str) -> SkillSpec:
if self.store.read_published_skill(skill_name, target_version) is None:
raise ValueError(f"Unknown skill version for rollback: {skill_name}/{target_version}")
spec = self._require_spec(skill_name)
spec.current_version = target_version
spec.updated_at = _utc_now()
spec.status = SkillStatus.ACTIVE.value
if reason:
spec.lineage.append(f"rollback:{target_version}:{reason}")
if actor and actor not in spec.owners:
spec.owners.append(actor)
self.store.write_skill_spec(spec)
self.store.set_current_version(skill_name, target_version)
self._refresh_indexes(skill_name, spec.status)
return spec
def _next_version(self, skill_name: str) -> str:
versions = [item for item in self.store.list_versions(skill_name) if item.startswith("v")]
if not versions:
return "v0001"
numbers = [int(item[1:]) for item in versions if item[1:].isdigit()]
return f"v{(max(numbers) if numbers else 0) + 1:04d}"
@staticmethod
def _render_skill_content(frontmatter: dict, body: str) -> str:
normalized = normalize_frontmatter(frontmatter)
if not normalized:
return body.strip() + ("\n" if body.strip() else "")
lines = ["---"]
for key, value in normalized.items():
if isinstance(value, list):
lines.append(f"{key}:")
for item in value:
lines.append(f" - {item}")
else:
lines.append(f"{key}: {value}")
lines.append("---")
lines.append("")
lines.append(body.strip())
return "\n".join(lines).rstrip() + "\n"
def _refresh_indexes(self, skill_name: str, status: str) -> None:
published = self.store.read_index("published")
disabled = self.store.read_index("disabled")
if status == SkillStatus.DISABLED.value:
if skill_name in published:
published = [item for item in published if item != skill_name]
if skill_name not in disabled:
disabled.append(skill_name)
else:
if skill_name not in published:
published.append(skill_name)
disabled = [item for item in disabled if item != skill_name]
self.store.update_index("published", published)
self.store.update_index("disabled", disabled)
def _require_draft(self, skill_name: str, draft_id: str) -> SkillDraft:
draft = self.store.read_draft(skill_name, draft_id)
if draft is None:
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
return draft
def _require_spec(self, skill_name: str) -> SkillSpec:
spec = self.store.get_skill_spec(skill_name)
if spec is None:
raise ValueError(f"Skill spec not found: {skill_name}")
return spec
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -41,10 +41,20 @@ class RuntimeSkillResolver:
activated_skills: list[SkillContext] = []
for name in selected:
raw_content = self.loader.load_skill(name)
record = self.loader.get_skill_record(name)
raw_content = self.loader.load_published_skill(name)
content = strip_frontmatter(raw_content).strip() if raw_content else ""
if not content:
continue
activated_skills.append(SkillContext(name=name, content=content))
activated_skills.append(
SkillContext(
name=name,
content=content,
version=record.version if record is not None else "legacy",
content_hash=(record.content_hash if record is not None and record.content_hash else ""),
activation_reason="always_skill",
tool_hints=list(record.tool_hints) if record is not None else [],
)
)
return ResolvedSkillSet(activated_skills=activated_skills)

View File

@ -1,2 +1,6 @@
"""Skill review workflow."""
"""Skill review services."""
from .service import ReviewService
__all__ = ["ReviewService"]

View File

@ -0,0 +1,76 @@
"""Review workflow for Beaver skill drafts."""
from __future__ import annotations
from uuid import uuid4
from beaver.skills.specs import SkillDraft, SkillReviewRecord, SkillReviewState, SkillSpecStore
class ReviewService:
def __init__(self, store: SkillSpecStore) -> None:
self.store = store
def submit_for_review(self, skill_name: str, draft_id: str, reviewer_request: str, requested_by: str = "system") -> SkillReviewRecord:
draft = self._require_draft(skill_name, draft_id)
draft.status = SkillReviewState.IN_REVIEW.value
self.store.write_draft(draft)
review = SkillReviewRecord(
review_id=uuid4().hex,
draft_id=draft_id,
skill_name=skill_name,
requested_at=_utc_now(),
requested_by=requested_by,
status=SkillReviewState.IN_REVIEW.value,
notes=reviewer_request,
)
self.store.write_review(review)
return review
def approve(self, skill_name: str, draft_id: str, reviewer: str, notes: str = "") -> SkillReviewRecord:
draft = self._require_draft(skill_name, draft_id)
draft.status = SkillReviewState.APPROVED.value
self.store.write_draft(draft)
review = SkillReviewRecord(
review_id=uuid4().hex,
draft_id=draft_id,
skill_name=skill_name,
requested_at=_utc_now(),
requested_by=reviewer,
status=SkillReviewState.APPROVED.value,
reviewer=reviewer,
reviewed_at=_utc_now(),
notes=notes,
)
self.store.write_review(review)
return review
def reject(self, skill_name: str, draft_id: str, reviewer: str, notes: str = "") -> SkillReviewRecord:
draft = self._require_draft(skill_name, draft_id)
draft.status = SkillReviewState.REJECTED.value
self.store.write_draft(draft)
review = SkillReviewRecord(
review_id=uuid4().hex,
draft_id=draft_id,
skill_name=skill_name,
requested_at=_utc_now(),
requested_by=reviewer,
status=SkillReviewState.REJECTED.value,
reviewer=reviewer,
reviewed_at=_utc_now(),
notes=notes,
)
self.store.write_review(review)
return review
def _require_draft(self, skill_name: str, draft_id: str) -> SkillDraft:
draft = self.store.read_draft(skill_name, draft_id)
if draft is None:
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
return draft
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,23 @@
"""Structured skill lifecycle models and storage."""
from .models import (
SkillActivationReceipt,
SkillDraft,
SkillReviewRecord,
SkillReviewState,
SkillSpec,
SkillStatus,
SkillVersion,
)
from .storage import SkillSpecStore
__all__ = [
"SkillActivationReceipt",
"SkillDraft",
"SkillReviewRecord",
"SkillReviewState",
"SkillSpec",
"SkillSpecStore",
"SkillStatus",
"SkillVersion",
]

View File

@ -0,0 +1,267 @@
"""Structured models for Beaver skill lifecycle."""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
class SkillReviewState(str, Enum):
DRAFT = "draft"
IN_REVIEW = "in_review"
APPROVED = "approved"
REJECTED = "rejected"
PUBLISHED = "published"
DISABLED = "disabled"
ARCHIVED = "archived"
class SkillStatus(str, Enum):
ACTIVE = "active"
DISABLED = "disabled"
ARCHIVED = "archived"
@dataclass(slots=True)
class SkillSpec:
name: str
display_name: str
description: str
created_at: str
updated_at: str
current_version: str | None
status: str = SkillStatus.ACTIVE.value
tags: list[str] = field(default_factory=list)
owners: list[str] = field(default_factory=list)
source_kind: str = "workspace"
lineage: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return {
"name": self.name,
"display_name": self.display_name,
"description": self.description,
"created_at": self.created_at,
"updated_at": self.updated_at,
"current_version": self.current_version,
"status": self.status,
"tags": list(self.tags),
"owners": list(self.owners),
"source_kind": self.source_kind,
"lineage": list(self.lineage),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillSpec":
return cls(
name=str(payload["name"]),
display_name=str(payload.get("display_name") or payload["name"]),
description=str(payload.get("description") or payload.get("display_name") or payload["name"]),
created_at=str(payload.get("created_at") or ""),
updated_at=str(payload.get("updated_at") or payload.get("created_at") or ""),
current_version=_coerce_optional_str(payload.get("current_version")),
status=str(payload.get("status") or SkillStatus.ACTIVE.value),
tags=_coerce_string_list(payload.get("tags")),
owners=_coerce_string_list(payload.get("owners")),
source_kind=str(payload.get("source_kind") or "workspace"),
lineage=_coerce_string_list(payload.get("lineage")),
)
@dataclass(slots=True)
class SkillVersion:
skill_name: str
version: str
content_hash: str
summary_hash: str
created_at: str
created_by: str
change_reason: str
parent_version: str | None = None
review_state: str = SkillReviewState.PUBLISHED.value
frontmatter: dict[str, Any] = field(default_factory=dict)
summary: str = ""
tool_hints: list[str] = field(default_factory=list)
provenance: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"skill_name": self.skill_name,
"version": self.version,
"content_hash": self.content_hash,
"summary_hash": self.summary_hash,
"created_at": self.created_at,
"created_by": self.created_by,
"change_reason": self.change_reason,
"parent_version": self.parent_version,
"review_state": self.review_state,
"frontmatter": dict(self.frontmatter),
"summary": self.summary,
"tool_hints": list(self.tool_hints),
"provenance": dict(self.provenance),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillVersion":
return cls(
skill_name=str(payload["skill_name"]),
version=str(payload["version"]),
content_hash=str(payload.get("content_hash") or ""),
summary_hash=str(payload.get("summary_hash") or ""),
created_at=str(payload.get("created_at") or ""),
created_by=str(payload.get("created_by") or "unknown"),
change_reason=str(payload.get("change_reason") or ""),
parent_version=_coerce_optional_str(payload.get("parent_version")),
review_state=str(payload.get("review_state") or SkillReviewState.PUBLISHED.value),
frontmatter=dict(payload.get("frontmatter") or {}),
summary=str(payload.get("summary") or ""),
tool_hints=_coerce_string_list(payload.get("tool_hints")),
provenance=dict(payload.get("provenance") or {}),
)
@dataclass(slots=True)
class SkillDraft:
draft_id: str
skill_name: str
base_version: str | None
proposed_content: str
proposed_frontmatter: dict[str, Any]
created_at: str
created_by: str
trigger_run_id: str | None = None
trigger_session_id: str | None = None
reason: str = ""
status: str = SkillReviewState.DRAFT.value
evidence_refs: list[dict[str, Any]] = field(default_factory=list)
proposal_kind: str = "revise_skill"
def to_dict(self) -> dict[str, Any]:
return {
"draft_id": self.draft_id,
"skill_name": self.skill_name,
"base_version": self.base_version,
"proposed_content": self.proposed_content,
"proposed_frontmatter": dict(self.proposed_frontmatter),
"created_at": self.created_at,
"created_by": self.created_by,
"trigger_run_id": self.trigger_run_id,
"trigger_session_id": self.trigger_session_id,
"reason": self.reason,
"status": self.status,
"evidence_refs": list(self.evidence_refs),
"proposal_kind": self.proposal_kind,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraft":
return cls(
draft_id=str(payload["draft_id"]),
skill_name=str(payload["skill_name"]),
base_version=_coerce_optional_str(payload.get("base_version")),
proposed_content=str(payload.get("proposed_content") or ""),
proposed_frontmatter=dict(payload.get("proposed_frontmatter") or {}),
created_at=str(payload.get("created_at") or ""),
created_by=str(payload.get("created_by") or "unknown"),
trigger_run_id=_coerce_optional_str(payload.get("trigger_run_id")),
trigger_session_id=_coerce_optional_str(payload.get("trigger_session_id")),
reason=str(payload.get("reason") or ""),
status=str(payload.get("status") or SkillReviewState.DRAFT.value),
evidence_refs=list(payload.get("evidence_refs") or []),
proposal_kind=str(payload.get("proposal_kind") or "revise_skill"),
)
@dataclass(slots=True)
class SkillReviewRecord:
review_id: str
draft_id: str
skill_name: str
requested_at: str
requested_by: str
status: str
reviewer: str | None = None
reviewed_at: str | None = None
notes: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"review_id": self.review_id,
"draft_id": self.draft_id,
"skill_name": self.skill_name,
"requested_at": self.requested_at,
"requested_by": self.requested_by,
"status": self.status,
"reviewer": self.reviewer,
"reviewed_at": self.reviewed_at,
"notes": self.notes,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillReviewRecord":
return cls(
review_id=str(payload["review_id"]),
draft_id=str(payload["draft_id"]),
skill_name=str(payload["skill_name"]),
requested_at=str(payload.get("requested_at") or ""),
requested_by=str(payload.get("requested_by") or "unknown"),
status=str(payload.get("status") or SkillReviewState.IN_REVIEW.value),
reviewer=_coerce_optional_str(payload.get("reviewer")),
reviewed_at=_coerce_optional_str(payload.get("reviewed_at")),
notes=str(payload.get("notes") or ""),
)
@dataclass(slots=True)
class SkillActivationReceipt:
run_id: str
session_id: str
skill_name: str
skill_version: str
content_hash: str
activated_at: str
activation_reason: str
tool_hints: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return {
"run_id": self.run_id,
"session_id": self.session_id,
"skill_name": self.skill_name,
"skill_version": self.skill_version,
"content_hash": self.content_hash,
"activated_at": self.activated_at,
"activation_reason": self.activation_reason,
"tool_hints": list(self.tool_hints),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillActivationReceipt":
return cls(
run_id=str(payload["run_id"]),
session_id=str(payload["session_id"]),
skill_name=str(payload["skill_name"]),
skill_version=str(payload["skill_version"]),
content_hash=str(payload.get("content_hash") or ""),
activated_at=str(payload.get("activated_at") or ""),
activation_reason=str(payload.get("activation_reason") or ""),
tool_hints=_coerce_string_list(payload.get("tool_hints")),
)
def _coerce_optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)
def _coerce_string_list(value: Any) -> list[str]:
if not isinstance(value, list):
return []
result: list[str] = []
for item in value:
text = str(item).strip()
if text:
result.append(text)
return result

View File

@ -0,0 +1,42 @@
"""Serialization helpers for structured skill lifecycle objects."""
from __future__ import annotations
from hashlib import sha256
import json
from typing import Any
def json_dumps(payload: Any) -> str:
return json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True)
def canonical_hash(text: str) -> str:
return sha256(text.encode("utf-8")).hexdigest()
def normalize_frontmatter(frontmatter: dict[str, Any] | None) -> dict[str, Any]:
raw = dict(frontmatter or {})
normalized: dict[str, Any] = {}
for key, value in raw.items():
if value is None:
continue
if isinstance(value, str):
cleaned = value.strip()
if cleaned:
normalized[str(key)] = cleaned
continue
if isinstance(value, list):
items = [str(item).strip() for item in value if str(item).strip()]
normalized[str(key)] = items
continue
normalized[str(key)] = value
return normalized
def summarize_skill_content(content: str, *, max_lines: int = 3, max_chars: int = 240) -> str:
lines = [line.strip() for line in content.splitlines() if line.strip()]
if not lines:
return ""
summary = " ".join(lines[:max_lines]).strip()
return summary[:max_chars].strip()

View File

@ -0,0 +1,268 @@
"""File-backed storage for Beaver skill lifecycle artifacts."""
from __future__ import annotations
from dataclasses import dataclass
import json
from pathlib import Path
from typing import Any
from beaver.skills.catalog.utils import parse_frontmatter
from .models import SkillDraft, SkillReviewRecord, SkillSpec, SkillVersion
from .serialization import canonical_hash, json_dumps, normalize_frontmatter, summarize_skill_content
@dataclass(slots=True)
class LoadedSkillVersion:
version: SkillVersion
content: str
class SkillSpecStore:
"""Manage structured skill lifecycle state inside the workspace."""
def __init__(self, workspace: str | Path) -> None:
self.workspace = Path(workspace)
self.root = self.workspace / "skills"
self.index_dir = self.root / "_index"
self.root.mkdir(parents=True, exist_ok=True)
self.index_dir.mkdir(parents=True, exist_ok=True)
def list_published_skill_names(self) -> list[str]:
names: list[str] = []
for child in self._iter_skill_dirs():
if not self._has_published_representation(child):
continue
spec = self.get_skill_spec(child.name)
if spec is not None and spec.status != "active":
continue
names.append(child.name)
return names
def list_skill_specs(self) -> list[SkillSpec]:
specs: list[SkillSpec] = []
for name in self.list_skill_names():
spec = self.get_skill_spec(name)
if spec is not None:
specs.append(spec)
return specs
def list_skill_names(self) -> list[str]:
return [child.name for child in self._iter_skill_dirs()]
def get_skill_spec(self, name: str) -> SkillSpec | None:
directory = self._skill_dir(name)
path = directory / "skill.json"
if path.exists():
return SkillSpec.from_dict(self._read_json(path))
if not self._has_published_representation(directory):
return None
legacy = self.read_published_skill(name)
if legacy is None:
return None
return SkillSpec(
name=name,
display_name=name,
description=str(legacy.version.frontmatter.get("description") or name),
created_at=legacy.version.created_at,
updated_at=legacy.version.created_at,
current_version=legacy.version.version,
status="active",
tags=[],
owners=[],
source_kind="legacy",
lineage=[],
)
def write_skill_spec(self, spec: SkillSpec) -> None:
directory = self._skill_dir(spec.name)
directory.mkdir(parents=True, exist_ok=True)
self._write_json(directory / "skill.json", spec.to_dict())
def get_current_version(self, name: str) -> str | None:
directory = self._skill_dir(name)
current_path = directory / "current.json"
if current_path.exists():
return str(self._read_json(current_path).get("current_version") or "") or None
if (directory / "SKILL.md").exists():
return "legacy"
spec = self.get_skill_spec(name)
if spec is not None and spec.current_version:
return spec.current_version
return None
def set_current_version(self, name: str, version: str) -> None:
directory = self._skill_dir(name)
directory.mkdir(parents=True, exist_ok=True)
self._write_json(directory / "current.json", {"current_version": version})
spec = self.get_skill_spec(name)
if spec is not None:
spec.current_version = version
self.write_skill_spec(spec)
def list_versions(self, name: str) -> list[str]:
directory = self._skill_dir(name) / "versions"
if not directory.exists():
current = self.get_current_version(name)
return [current] if current else []
versions: list[str] = []
for child in sorted(directory.iterdir()):
if child.is_dir():
versions.append(child.name)
return versions
def read_published_skill(self, name: str, version: str | None = None) -> LoadedSkillVersion | None:
requested_version = version or self.get_current_version(name)
if requested_version is None:
return None
directory = self._skill_dir(name)
if requested_version == "legacy":
skill_file = directory / "SKILL.md"
if not skill_file.exists():
return None
content = skill_file.read_text(encoding="utf-8")
frontmatter, body = parse_frontmatter(content)
normalized_frontmatter = normalize_frontmatter(frontmatter)
tool_hints = self._extract_tool_hints(normalized_frontmatter)
loaded = SkillVersion(
skill_name=name,
version="legacy",
content_hash=canonical_hash(content),
summary_hash=canonical_hash(body),
created_at="legacy",
created_by="legacy",
change_reason="legacy_import",
review_state="published",
frontmatter=normalized_frontmatter,
summary=summarize_skill_content(body),
tool_hints=tool_hints,
provenance={"source_kind": "legacy"},
)
return LoadedSkillVersion(version=loaded, content=content)
version_dir = directory / "versions" / requested_version
version_file = version_dir / "version.json"
skill_file = version_dir / "SKILL.md"
if not version_file.exists() or not skill_file.exists():
return None
payload = self._read_json(version_file)
loaded = SkillVersion.from_dict(payload)
content = skill_file.read_text(encoding="utf-8")
return LoadedSkillVersion(version=loaded, content=content)
def write_skill_version(self, version: SkillVersion, content: str) -> None:
version_dir = self._skill_dir(version.skill_name) / "versions" / version.version
version_dir.mkdir(parents=True, exist_ok=True)
self._write_json(version_dir / "version.json", version.to_dict())
self._write_text(version_dir / "SKILL.md", content)
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
results: list[SkillDraft] = []
names = [skill_name] if skill_name else self.list_skill_names()
for name in names:
if not name:
continue
drafts_dir = self._skill_dir(name) / "drafts"
if not drafts_dir.exists():
continue
for path in sorted(drafts_dir.glob("draft-*.json")):
results.append(SkillDraft.from_dict(self._read_json(path)))
return results
def read_draft(self, skill_name: str, draft_id: str) -> SkillDraft | None:
path = self._skill_dir(skill_name) / "drafts" / f"draft-{draft_id}.json"
if not path.exists():
return None
return SkillDraft.from_dict(self._read_json(path))
def write_draft(self, draft: SkillDraft) -> None:
drafts_dir = self._skill_dir(draft.skill_name) / "drafts"
drafts_dir.mkdir(parents=True, exist_ok=True)
self._write_json(drafts_dir / f"draft-{draft.draft_id}.json", draft.to_dict())
def list_reviews(self, skill_name: str, draft_id: str | None = None) -> list[SkillReviewRecord]:
reviews_dir = self._skill_dir(skill_name) / "reviews"
if not reviews_dir.exists():
return []
results: list[SkillReviewRecord] = []
for path in sorted(reviews_dir.glob("review-*.json")):
record = SkillReviewRecord.from_dict(self._read_json(path))
if draft_id and record.draft_id != draft_id:
continue
results.append(record)
return results
def write_review(self, review: SkillReviewRecord) -> None:
reviews_dir = self._skill_dir(review.skill_name) / "reviews"
reviews_dir.mkdir(parents=True, exist_ok=True)
self._write_json(reviews_dir / f"review-{review.review_id}.json", review.to_dict())
def update_index(self, index_name: str, values: list[str]) -> None:
self._write_json(self.index_dir / f"{index_name}.json", {"items": list(dict.fromkeys(values))})
def read_index(self, index_name: str) -> list[str]:
path = self.index_dir / f"{index_name}.json"
if not path.exists():
return []
payload = self._read_json(path)
if not isinstance(payload, dict):
return []
items = payload.get("items")
if not isinstance(items, list):
return []
return [str(item) for item in items if str(item).strip()]
def archive_current_version(self, skill_name: str, version: str) -> None:
version_dir = self._skill_dir(skill_name) / "versions" / version
if not version_dir.exists():
return
archive_dir = self._skill_dir(skill_name) / "archive" / version
archive_dir.parent.mkdir(parents=True, exist_ok=True)
if archive_dir.exists():
return
version_dir.rename(archive_dir)
def _has_published_representation(self, directory: Path) -> bool:
return (
(directory / "SKILL.md").exists()
or (directory / "current.json").exists()
or (directory / "versions").exists()
)
def _skill_dir(self, name: str) -> Path:
return self.root / name
def _iter_skill_dirs(self) -> list[Path]:
return [
child
for child in sorted(self.root.iterdir())
if child.is_dir() and not child.name.startswith("_")
]
@staticmethod
def _extract_tool_hints(frontmatter: dict[str, Any]) -> list[str]:
raw = frontmatter.get("tools")
if isinstance(raw, list):
return [str(item).strip() for item in raw if str(item).strip()]
if isinstance(raw, str):
return [item.strip() for item in raw.split(",") if item.strip()]
return []
@staticmethod
def _read_json(path: Path) -> dict[str, Any]:
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise ValueError(f"Expected JSON object in {path}")
return payload
@staticmethod
def _write_json(path: Path, payload: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json_dumps(payload) + "\n", encoding="utf-8")
@staticmethod
def _write_text(path: Path, content: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content, encoding="utf-8")

View File

@ -0,0 +1,22 @@
"""Internal task tracking for automatic Main Agent task mode."""
from .models import MainAgentDecision, TaskEvent, TaskRecord, ValidationResult
from .planner import TaskExecutionPlan, TaskExecutionPlanner
from .router import MainAgentRouter
from .service import TaskService
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
from .validation import ValidationService
__all__ = [
"MainAgentDecision",
"MainAgentRouter",
"TaskEvent",
"TaskExecutionPlan",
"TaskExecutionPlanner",
"TaskRecord",
"TaskService",
"SkillResolutionReport",
"TaskSkillResolver",
"ValidationResult",
"ValidationService",
]

View File

@ -0,0 +1,178 @@
"""Models for internal task tracking and validation."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_revision"}
@dataclass(slots=True)
class ValidationResult:
passed: bool
score: float
issues: list[str] = field(default_factory=list)
missing_requirements: list[str] = field(default_factory=list)
recommended_revision_prompt: str = ""
validator: str = "heuristic"
@property
def accepted(self) -> bool:
return self.passed and self.score >= 0.75
def to_dict(self) -> dict[str, Any]:
return {
"passed": self.passed,
"score": self.score,
"issues": list(self.issues),
"missing_requirements": list(self.missing_requirements),
"recommended_revision_prompt": self.recommended_revision_prompt,
"validator": self.validator,
"accepted": self.accepted,
}
@classmethod
def from_dict(cls, payload: dict[str, Any] | None) -> "ValidationResult | None":
if not isinstance(payload, dict):
return None
return cls(
passed=bool(payload.get("passed")),
score=float(payload.get("score", 0.0) or 0.0),
issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator=str(payload.get("validator") or "unknown"),
)
@dataclass(slots=True)
class TaskRecord:
task_id: str
session_id: str
description: str
goal: str
constraints: list[str]
priority: int
status: str
creator: str
created_at: str
updated_at: str
parent_task_id: str | None = None
closed_at: str | None = None
close_reason: str | None = None
satisfaction: float | None = None
run_ids: list[str] = field(default_factory=list)
skill_names: list[str] = field(default_factory=list)
feedback: list[dict[str, Any]] = field(default_factory=list)
validation_result: dict[str, Any] | None = None
metadata: dict[str, Any] = field(default_factory=dict)
@property
def is_open(self) -> bool:
return self.status in TASK_OPEN_STATUSES
def to_dict(self) -> dict[str, Any]:
return {
"task_id": self.task_id,
"session_id": self.session_id,
"parent_task_id": self.parent_task_id,
"description": self.description,
"goal": self.goal,
"constraints": list(self.constraints),
"priority": self.priority,
"status": self.status,
"creator": self.creator,
"created_at": self.created_at,
"updated_at": self.updated_at,
"closed_at": self.closed_at,
"close_reason": self.close_reason,
"satisfaction": self.satisfaction,
"run_ids": list(self.run_ids),
"skill_names": list(self.skill_names),
"feedback": list(self.feedback),
"validation_result": self.validation_result,
"metadata": dict(self.metadata),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "TaskRecord":
return cls(
task_id=str(payload["task_id"]),
session_id=str(payload["session_id"]),
parent_task_id=_optional_str(payload.get("parent_task_id")),
description=str(payload.get("description") or ""),
goal=str(payload.get("goal") or payload.get("description") or ""),
constraints=[str(item) for item in payload.get("constraints") or []],
priority=int(payload.get("priority", 0) or 0),
status=str(payload.get("status") or "open"),
creator=str(payload.get("creator") or "main-agent"),
created_at=str(payload.get("created_at") or ""),
updated_at=str(payload.get("updated_at") or ""),
closed_at=_optional_str(payload.get("closed_at")),
close_reason=_optional_str(payload.get("close_reason")),
satisfaction=_optional_float(payload.get("satisfaction")),
run_ids=[str(item) for item in payload.get("run_ids") or []],
skill_names=[str(item) for item in payload.get("skill_names") or []],
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
metadata=dict(payload.get("metadata") or {}),
)
@dataclass(slots=True)
class TaskEvent:
event_id: str
task_id: str
session_id: str
event_type: str
created_at: str
run_id: str | None = None
payload: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"event_id": self.event_id,
"task_id": self.task_id,
"session_id": self.session_id,
"run_id": self.run_id,
"event_type": self.event_type,
"created_at": self.created_at,
"payload": dict(self.payload),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "TaskEvent":
return cls(
event_id=str(payload["event_id"]),
task_id=str(payload["task_id"]),
session_id=str(payload["session_id"]),
run_id=_optional_str(payload.get("run_id")),
event_type=str(payload.get("event_type") or ""),
created_at=str(payload.get("created_at") or ""),
payload=dict(payload.get("payload") or {}),
)
@dataclass(slots=True)
class MainAgentDecision:
mode: str
reason: str
starts_new_task: bool = False
@property
def is_task(self) -> bool:
return self.mode == "task"
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)
def _optional_float(value: Any) -> float | None:
if value in (None, ""):
return None
return float(value)

View File

@ -0,0 +1,288 @@
"""Internal Task execution planner for single-agent vs team execution."""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any, Literal
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
TaskExecutionMode = Literal["single", "team"]
@dataclass(slots=True)
class TaskExecutionPlan:
mode: TaskExecutionMode
reason: str = ""
graph: ExecutionGraph | None = None
final_synthesis_instruction: str = ""
fallback_error: str | None = None
skill_resolution_report: list[SkillResolutionReport] = field(default_factory=list)
@property
def is_team(self) -> bool:
return self.mode == "team" and self.graph is not None
@classmethod
def single(cls, reason: str, *, fallback_error: str | None = None) -> "TaskExecutionPlan":
return cls(mode="single", reason=reason, fallback_error=fallback_error)
def to_event_payload(self) -> dict[str, Any]:
strategy = self.graph.strategy if self.graph is not None else None
nodes = self.graph.nodes if self.graph is not None else []
return {
"plan_mode": self.mode,
"reason": self.reason,
"strategy": strategy,
"node_ids": [node.node_id for node in nodes],
"skill_queries": [
str(node.agent.metadata.get("skill_query") or "")
for node in nodes
],
"selected_skill_names": [
name
for node in nodes
for name in node.inherited_pinned_skills
],
"generated_skill_draft_ids": [
item.generated_skill_draft_id
for item in self.skill_resolution_report
if item.generated_skill_draft_id
],
"skill_resolution_report": [item.to_dict() for item in self.skill_resolution_report],
"fallback_error": self.fallback_error,
}
class TaskExecutionPlanner:
"""Plan whether a Task attempt should run through a team first."""
_MAX_NODES = 6
_SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
def __init__(self, *, task_skill_resolver: TaskSkillResolver | None = None) -> None:
self.task_skill_resolver = task_skill_resolver
async def plan(
self,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
provider_bundle: ProviderBundle | None = None,
) -> TaskExecutionPlan:
provider = None
model = None
if provider_bundle is not None:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
if provider is None:
return TaskExecutionPlan.single("planner_provider_unavailable")
try:
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"You choose whether an internal Beaver Task attempt should run as a single "
"main-agent pass or use a small sub-agent team first. Return only compact JSON."
),
},
{
"role": "user",
"content": self._prompt(
task=task,
user_message=user_message,
attempt_index=attempt_index,
latest_validation=latest_validation,
),
},
],
tools=None,
model=model,
max_tokens=1200,
temperature=0.0,
)
plan = self.from_json(response.content or "")
return await self._resolve_plan(
plan,
task=task,
user_message=user_message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
except Exception as exc:
return TaskExecutionPlan.single("planner_failed", fallback_error=str(exc))
async def _resolve_plan(
self,
plan: TaskExecutionPlan,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
provider_bundle: ProviderBundle | None,
) -> TaskExecutionPlan:
if not plan.is_team or self.task_skill_resolver is None:
return plan
if provider_bundle is None:
return TaskExecutionPlan.single("planner_fallback_single", fallback_error="task_skill_resolver_provider_unavailable")
try:
assert plan.graph is not None
graph, reports = await self.task_skill_resolver.resolve_graph(
plan.graph,
task=task,
user_message=user_message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
graph.validate()
plan.graph = graph
plan.skill_resolution_report = reports
return plan
except Exception as exc:
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")
def from_json(self, text: str) -> TaskExecutionPlan:
try:
payload = self._parse_json_object(text)
mode = str(payload.get("mode") or "single").strip().lower()
reason = str(payload.get("reason") or "")
if mode != "team":
return TaskExecutionPlan.single(reason or "planner_selected_single")
graph = self._graph_from_payload(payload)
graph.validate()
return TaskExecutionPlan(
mode="team",
reason=reason or "planner_selected_team",
graph=graph,
final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
)
except Exception as exc:
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))
def _graph_from_payload(self, payload: dict[str, Any]) -> ExecutionGraph:
strategy = str(payload.get("strategy") or "sequence").strip().lower()
if strategy not in self._SUPPORTED_STRATEGIES:
raise ValueError(f"Unsupported team strategy: {strategy}")
raw_nodes = payload.get("nodes")
if not isinstance(raw_nodes, list) or not raw_nodes:
raise ValueError("Team plan requires at least one node")
if len(raw_nodes) > self._MAX_NODES:
raise ValueError(f"Team plan exceeds max node count {self._MAX_NODES}")
nodes: list[ExecutionNode] = []
for index, item in enumerate(raw_nodes, start=1):
if not isinstance(item, dict):
raise ValueError("Each team node must be an object")
agent_payload = item.get("agent") if isinstance(item.get("agent"), dict) else {}
skill_query = str(item.get("skill_query") or agent_payload.get("skill_query") or item.get("task") or "").strip()
requested_capabilities = _string_list(
item.get("required_capabilities") or item.get("capabilities") or agent_payload.get("capabilities")
)
requested_tags = _string_list(item.get("tags") or agent_payload.get("tags"))
node_id = str(item.get("node_id") or item.get("id") or agent_payload.get("name") or f"node_{index}").strip()
task = str(item.get("task") or "").strip()
if not node_id or not task:
raise ValueError("Each team node requires node_id/id and task")
nodes.append(
ExecutionNode(
node_id=node_id,
task=task,
agent=AgentDescriptor(
name=node_id,
role="",
system_prompt="",
metadata={
"skill_query": skill_query,
"required_capabilities": requested_capabilities,
"requested_tags": requested_tags,
"sub_agent_kind": "generic_skill_worker",
},
),
depends_on=[str(dep) for dep in item.get("depends_on") or []],
inherited_pinned_skills=[str(name) for name in item.get("pinned_skills") or []],
constraints=[str(value) for value in item.get("constraints") or []],
expected_output=str(item.get("expected_output") or "") or None,
)
)
return ExecutionGraph(strategy=strategy, nodes=nodes) # type: ignore[arg-type]
@staticmethod
def _prompt(
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None,
) -> str:
validation_note = ""
if latest_validation is not None:
validation_note = (
"\nPrevious validation issues:\n"
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
)
return (
"Decide execution mode for this internal Task attempt.\n"
"Use mode=team only when independent research, review, implementation slices, or staged checks "
"would materially improve the result. Otherwise use mode=single.\n\n"
"JSON schema:\n"
"{\n"
' "mode": "single" | "team",\n'
' "reason": "short reason",\n'
' "strategy": "sequence" | "parallel" | "dag",\n'
' "nodes": [{"node_id": "api_review", "task": "...", "skill_query": "API contract review", '
'"required_capabilities": ["schema compatibility"], "depends_on": []}],\n'
' "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
"}\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Attempt index: {attempt_index}\n"
f"{validation_note}"
)
@staticmethod
def _parse_json_object(text: str) -> dict[str, Any]:
cleaned = text.strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
payload = json.loads(cleaned)
if not isinstance(payload, dict):
raise ValueError("planner response must be a JSON object")
return payload
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
text = str(value).strip()
return text or None
def _string_list(value: Any) -> list[str]:
if not isinstance(value, list):
if isinstance(value, str):
value = [item.strip() for item in value.split(",")]
else:
return []
result: list[str] = []
for item in value:
text = str(item).strip()
if text and text not in result:
result.append(text)
return result

View File

@ -0,0 +1,40 @@
"""Main Agent routing between simple chat and internal Task mode."""
from __future__ import annotations
import re
from .models import MainAgentDecision, TaskRecord
class MainAgentRouter:
"""Small deterministic classifier used before the main AgentLoop.
The first version intentionally avoids a mandatory model call so the router
stays reliable during provider outages. The rule set is conservative:
anything that implies execution, files, tools, iteration, or validation
becomes Task mode.
"""
_TASK_PATTERNS = [
r"\b(implement|fix|debug|refactor|migrate|build|create|write|edit|update|test|validate|deploy)\b",
r"\b(file|repo|code|project|backend|frontend|api|database|migration|pull request|ci|bug)\b",
r"\b(step|multi-step|workflow|plan and|then)\b",
r"(实现|修复|调试|重构|迁移|构建|创建|编写|修改|更新|测试|验证|部署|文件|代码|项目|前端|后端|接口|数据库|多步|任务)",
]
_NEW_TASK_PATTERNS = [
r"\b(new task|another task|different task|start over)\b",
r"(新任务|另一个任务|换个任务|重新开始)",
]
def classify(self, message: str, *, active_task: TaskRecord | None = None) -> MainAgentDecision:
text = message.strip()
lowered = text.lower()
starts_new = any(re.search(pattern, lowered, re.IGNORECASE) for pattern in self._NEW_TASK_PATTERNS)
if active_task is not None and active_task.status in {"awaiting_feedback", "needs_revision"} and not starts_new:
return MainAgentDecision(mode="task", reason="continuing_open_task", starts_new_task=False)
if any(re.search(pattern, lowered, re.IGNORECASE) for pattern in self._TASK_PATTERNS):
return MainAgentDecision(mode="task", reason="task_pattern_matched", starts_new_task=starts_new)
if len(text) > 240:
return MainAgentDecision(mode="task", reason="long_request", starts_new_task=starts_new)
return MainAgentDecision(mode="simple", reason="simple_question", starts_new_task=False)

View File

@ -0,0 +1,167 @@
"""Internal service for automatic Task mode."""
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from uuid import uuid4
from .models import TaskEvent, TaskRecord, ValidationResult
from .store import TaskStore
class TaskService:
def __init__(self, root: str | Path) -> None:
self.store = TaskStore(root)
def create_task(
self,
*,
session_id: str,
description: str,
creator: str = "main-agent",
metadata: dict[str, Any] | None = None,
) -> TaskRecord:
now = self._now()
task = TaskRecord(
task_id=uuid4().hex,
session_id=session_id,
description=description,
goal=description,
constraints=[],
priority=0,
status="open",
creator=creator,
created_at=now,
updated_at=now,
metadata=dict(metadata or {}),
)
self.store.upsert_task(task)
self._event(task, "created", payload={"description": description})
return task
def get_task(self, task_id: str) -> TaskRecord | None:
return self.store.get_task(task_id)
def get_task_by_run_id(self, run_id: str) -> TaskRecord | None:
return self.store.get_task_by_run_id(run_id)
def get_latest_open_task(self, session_id: str) -> TaskRecord | None:
return self.store.get_latest_open_task(session_id)
def start_run(self, task_id: str, *, user_message: str, attempt_index: int) -> TaskRecord:
task = self._require(task_id)
task.status = "running"
task.updated_at = self._now()
task.metadata["latest_user_message"] = user_message
task.metadata["latest_attempt_index"] = attempt_index
self.store.upsert_task(task)
self._event(task, "run_started", payload={"user_message": user_message, "attempt_index": attempt_index})
return task
def append_run(self, task_id: str, run_id: str, *, skill_names: list[str] | None = None) -> TaskRecord:
task = self._require(task_id)
if run_id not in task.run_ids:
task.run_ids.append(run_id)
for name in skill_names or []:
if name not in task.skill_names:
task.skill_names.append(name)
task.updated_at = self._now()
self.store.upsert_task(task)
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
return task
def record_validation(self, task_id: str, run_id: str, validation: ValidationResult) -> TaskRecord:
task = self._require(task_id)
task.status = "awaiting_feedback"
task.updated_at = self._now()
task.validation_result = validation.to_dict()
self.store.upsert_task(task)
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
return task
def add_feedback(
self,
task_id: str,
*,
feedback_type: str,
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
matching_feedback = any(
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
for item in task.feedback
)
conflicting_feedback = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
),
None,
)
if conflicting_feedback is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_feedback:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_feedback:
return task
entry = {
"feedback_type": feedback_type,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if feedback_type == "revise":
task.status = "needs_revision"
elif feedback_type == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = comment or "abandoned"
elif feedback_type == "satisfied":
task.status = "closed"
task.closed_at = now
task.close_reason = "satisfied"
task.satisfaction = 1.0
task.updated_at = now
self.store.upsert_task(task)
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
return task
def _require(self, task_id: str) -> TaskRecord:
task = self.store.get_task(task_id)
if task is None:
raise ValueError(f"Unknown task_id: {task_id}")
return task
def _event(
self,
task: TaskRecord,
event_type: str,
*,
run_id: str | None = None,
payload: dict[str, Any] | None = None,
) -> None:
self.store.append_event(
TaskEvent(
event_id=uuid4().hex,
task_id=task.task_id,
session_id=task.session_id,
run_id=run_id,
event_type=event_type,
created_at=self._now(),
payload=dict(payload or {}),
)
)
@staticmethod
def _now() -> str:
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,286 @@
"""Resolve Task team nodes to pinned skills for generic sub-agents."""
from __future__ import annotations
import json
from dataclasses import dataclass, field, replace
from typing import Any
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine.providers import ProviderBundle
from beaver.skills.assembler.embedding_retriever import SkillEmbeddingRetriever
from beaver.skills.catalog.loader import SkillsLoader
from beaver.skills.drafts import DraftService
from beaver.skills.learning import MissingSkillSynthesizer
from beaver.tasks.models import TaskRecord
@dataclass(slots=True)
class SkillResolutionReport:
node_id: str
skill_query: str
required_capabilities: list[str] = field(default_factory=list)
selected_skill_names: list[str] = field(default_factory=list)
generated_skill_draft_id: str | None = None
generated_skill_name: str | None = None
ephemeral_used: bool = False
reason: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"node_id": self.node_id,
"skill_query": self.skill_query,
"required_capabilities": list(self.required_capabilities),
"selected_skill_names": list(self.selected_skill_names),
"generated_skill_draft_id": self.generated_skill_draft_id,
"generated_skill_name": self.generated_skill_name,
"ephemeral_used": self.ephemeral_used,
"reason": self.reason,
}
class TaskSkillResolver:
"""Pins published or draft-only skills onto generic team nodes."""
def __init__(
self,
*,
skills_loader: SkillsLoader,
draft_service: DraftService,
retriever: SkillEmbeddingRetriever | None = None,
missing_skill_synthesizer: MissingSkillSynthesizer | None = None,
) -> None:
self.skills_loader = skills_loader
self.draft_service = draft_service
self.retriever = retriever or SkillEmbeddingRetriever()
self.missing_skill_synthesizer = missing_skill_synthesizer or MissingSkillSynthesizer()
async def resolve_graph(
self,
graph: ExecutionGraph,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
provider_bundle: ProviderBundle,
) -> tuple[ExecutionGraph, list[SkillResolutionReport]]:
resolved_nodes: list[ExecutionNode] = []
reports: list[SkillResolutionReport] = []
for node in graph.nodes:
resolved, report = await self.resolve_node(
node,
task=task,
user_message=user_message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
resolved_nodes.append(resolved)
reports.append(report)
return ExecutionGraph(strategy=graph.strategy, nodes=resolved_nodes), reports
async def resolve_node(
self,
node: ExecutionNode,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
provider_bundle: ProviderBundle,
) -> tuple[ExecutionNode, SkillResolutionReport]:
skill_query = str(node.agent.metadata.get("skill_query") or node.task or node.node_id).strip()
required_capabilities = [
str(item).strip()
for item in node.agent.metadata.get("required_capabilities", [])
if str(item).strip()
]
selected = await self._select_published_skills(
query="\n".join(
part
for part in [
skill_query,
node.task,
" ".join(required_capabilities),
task.goal,
user_message,
]
if part
),
provider_bundle=provider_bundle,
)
if selected:
pinned = _merge_names(node.inherited_pinned_skills, selected)
resolved = self._generic_node(
node,
pinned_skill_names=pinned,
metadata={
**node.agent.metadata,
"skill_query": skill_query,
"required_capabilities": required_capabilities,
"selected_skill_names": selected,
"ephemeral_skill_names": [],
},
)
return resolved, SkillResolutionReport(
node_id=node.node_id,
skill_query=skill_query,
required_capabilities=required_capabilities,
selected_skill_names=selected,
ephemeral_used=False,
reason="matched published skill",
)
missing = await self.missing_skill_synthesizer.synthesize(
task=task,
user_message=user_message,
attempt_index=attempt_index,
node_id=node.node_id,
node_task=node.task,
skill_query=skill_query,
required_capabilities=required_capabilities,
provider_bundle=provider_bundle,
draft_service=self.draft_service,
)
resolved = self._generic_node(
node,
pinned_skill_names=list(node.inherited_pinned_skills),
pinned_skill_contexts=[*node.inherited_pinned_skill_contexts, missing.skill_context],
metadata={
**node.agent.metadata,
"skill_query": skill_query,
"required_capabilities": required_capabilities,
"selected_skill_names": [],
"generated_skill_draft_id": missing.draft.draft_id,
"generated_skill_name": missing.draft.skill_name,
"ephemeral_skill_names": [missing.skill_context.name],
},
)
return resolved, SkillResolutionReport(
node_id=node.node_id,
skill_query=skill_query,
required_capabilities=required_capabilities,
generated_skill_draft_id=missing.draft.draft_id,
generated_skill_name=missing.draft.skill_name,
ephemeral_used=True,
reason="generated draft-only skill for missing sub-agent guidance",
)
async def _select_published_skills(self, *, query: str, provider_bundle: ProviderBundle) -> list[str]:
candidates = self.skills_loader.build_selection_candidates()
if not candidates:
return []
candidates = await self.retriever.retrieve(
query=query,
candidates=candidates,
top_k=8,
api_key=provider_bundle.embedding_runtime.api_key if provider_bundle.embedding_runtime is not None else None,
api_base=provider_bundle.embedding_runtime.api_base if provider_bundle.embedding_runtime is not None else None,
model=provider_bundle.embedding_runtime.model if provider_bundle.embedding_runtime is not None else None,
extra_headers=(
provider_bundle.embedding_runtime.extra_headers
if provider_bundle.embedding_runtime is not None
else None
),
timeout_seconds=(
provider_bundle.embedding_runtime.request_timeout_seconds
if provider_bundle.embedding_runtime is not None
else None
),
fallback_top_k=8,
)
if not candidates:
return []
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
candidate_names = {item["name"] for item in candidates}
try:
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"Select published Beaver skills for one generic sub-agent node. "
"Return only a JSON array of skill names. Do not invent names. "
"If none of the candidates directly match the required guidance, return []."
),
},
{
"role": "user",
"content": (
f"Node skill query:\n{query}\n\n"
f"Candidate skills:\n{self._render_candidates(candidates)}\n\n"
"Return only JSON, for example: [\"skill-a\"] or []"
),
},
],
tools=None,
model=model,
max_tokens=512,
temperature=0,
)
parsed = self._parse_names(response.content or "")
except Exception:
parsed = []
selected: list[str] = []
for name in parsed:
if name in candidate_names and name not in selected:
selected.append(name)
return selected
@staticmethod
def _generic_node(
node: ExecutionNode,
*,
pinned_skill_names: list[str],
metadata: dict[str, Any],
pinned_skill_contexts: list[Any] | None = None,
) -> ExecutionNode:
return replace(
node,
agent=AgentDescriptor(
name=node.node_id,
role="",
system_prompt="",
metadata={
**metadata,
"sub_agent_kind": "generic_skill_worker",
},
),
inherited_pinned_skills=pinned_skill_names,
inherited_pinned_skill_contexts=list(pinned_skill_contexts or node.inherited_pinned_skill_contexts),
)
@staticmethod
def _render_candidates(candidates: list[dict[str, str]]) -> str:
return "\n".join(f"- {item['name']}: {item['description']}" for item in candidates)
@staticmethod
def _parse_names(content: str) -> list[str]:
cleaned = content.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
try:
payload = json.loads(cleaned)
except json.JSONDecodeError:
return []
if isinstance(payload, dict):
for key in ("skills", "selected_skills", "selected"):
value = payload.get(key)
if isinstance(value, list):
payload = value
break
if not isinstance(payload, list):
return []
return [str(item).strip() for item in payload if str(item).strip()]
def _merge_names(parent: list[str], selected: list[str]) -> list[str]:
result: list[str] = []
for name in [*parent, *selected]:
if name and name not in result:
result.append(name)
return result

View File

@ -0,0 +1,100 @@
"""File-backed internal task store."""
from __future__ import annotations
import json
import os
import tempfile
import threading
from pathlib import Path
from typing import Any
from .models import TaskEvent, TaskRecord
class TaskStore:
def __init__(self, root: str | Path) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self.tasks_path = self.root / "tasks.json"
self.events_path = self.root / "events.jsonl"
self._lock = threading.Lock()
def list_tasks(self) -> list[TaskRecord]:
with self._lock:
payload = self._read_tasks_unlocked()
return [TaskRecord.from_dict(item) for item in payload.values()]
def get_task(self, task_id: str) -> TaskRecord | None:
with self._lock:
payload = self._read_tasks_unlocked().get(task_id)
return TaskRecord.from_dict(payload) if isinstance(payload, dict) else None
def get_task_by_run_id(self, run_id: str) -> TaskRecord | None:
for task in self.list_tasks():
if run_id in task.run_ids:
return task
return None
def get_latest_open_task(self, session_id: str) -> TaskRecord | None:
tasks = [
task
for task in self.list_tasks()
if task.session_id == session_id and task.status in {"awaiting_feedback", "needs_revision", "open", "running"}
]
if not tasks:
return None
return sorted(tasks, key=lambda item: item.updated_at)[-1]
def upsert_task(self, task: TaskRecord) -> None:
with self._lock:
payload = self._read_tasks_unlocked()
payload[task.task_id] = task.to_dict()
self._write_tasks_unlocked(payload)
def append_event(self, event: TaskEvent) -> None:
self.events_path.parent.mkdir(parents=True, exist_ok=True)
with self._lock:
with self.events_path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(event.to_dict(), ensure_ascii=False, sort_keys=True) + "\n")
def list_events(self, task_id: str | None = None) -> list[TaskEvent]:
if not self.events_path.exists():
return []
results: list[TaskEvent] = []
for line in self.events_path.read_text(encoding="utf-8").splitlines():
cleaned = line.strip()
if not cleaned:
continue
payload = json.loads(cleaned)
if not isinstance(payload, dict):
continue
event = TaskEvent.from_dict(payload)
if task_id is not None and event.task_id != task_id:
continue
results.append(event)
return results
def _read_tasks_unlocked(self) -> dict[str, dict[str, Any]]:
if not self.tasks_path.exists():
return {}
payload = json.loads(self.tasks_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
return {}
tasks = payload.get("tasks", payload)
if not isinstance(tasks, dict):
return {}
return {str(key): dict(value) for key, value in tasks.items() if isinstance(value, dict)}
def _write_tasks_unlocked(self, payload: dict[str, dict[str, Any]]) -> None:
self.tasks_path.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_name = tempfile.mkstemp(prefix=".tasks-", suffix=".json", dir=str(self.tasks_path.parent))
tmp_path = Path(tmp_name)
try:
with os.fdopen(fd, "w", encoding="utf-8") as handle:
json.dump({"tasks": payload}, handle, ensure_ascii=False, indent=2, sort_keys=True)
handle.write("\n")
os.replace(tmp_path, self.tasks_path)
finally:
if tmp_path.exists():
tmp_path.unlink()

View File

@ -0,0 +1,138 @@
"""Automatic validation for internal Task mode."""
from __future__ import annotations
import json
from typing import Any
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
class ValidationService:
async def validate_task_result(
self,
*,
task: TaskRecord,
user_message: str,
final_output: str,
transcript_excerpt: str = "",
tool_summaries: list[str] | None = None,
team_summaries: list[str] | None = None,
provider_bundle: ProviderBundle | None = None,
) -> ValidationResult:
provider = None
model = None
if provider_bundle is not None:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
if provider is not None:
try:
return await self._validate_with_provider(
provider=provider,
model=model,
task=task,
user_message=user_message,
final_output=final_output,
transcript_excerpt=transcript_excerpt,
tool_summaries=tool_summaries or [],
team_summaries=team_summaries or [],
)
except Exception as exc:
return ValidationResult(
passed=False,
score=0.0,
issues=[f"Validator failed: {exc}"],
missing_requirements=["A valid automatic validation result is required before accepting the task."],
recommended_revision_prompt=(
"Review the task result again because automatic validation failed, "
"then provide a corrected final answer that explicitly satisfies the task goal."
),
validator="llm_error",
)
return self._heuristic_validate(final_output)
async def _validate_with_provider(
self,
*,
provider: Any,
model: str | None,
task: TaskRecord,
user_message: str,
final_output: str,
transcript_excerpt: str,
tool_summaries: list[str],
team_summaries: list[str],
) -> ValidationResult:
prompt = (
"Validate whether the assistant output satisfies the task. "
"Return only compact JSON with keys: passed, score, issues, "
"missing_requirements, recommended_revision_prompt.\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Transcript excerpt:\n{transcript_excerpt[:2500]}\n\n"
f"Tool summaries:\n{json.dumps(tool_summaries[:12], ensure_ascii=False)}\n\n"
f"Team summaries:\n{json.dumps(team_summaries[:12], ensure_ascii=False)}\n\n"
f"Assistant final output:\n{final_output[:4000]}"
)
response = await provider.chat(
messages=[
{"role": "system", "content": "You are a strict task result validator."},
{"role": "user", "content": prompt},
],
tools=None,
model=model,
max_tokens=800,
temperature=0.0,
)
payload = self._parse_json_object(response.content or "")
return ValidationResult(
passed=bool(payload.get("passed")),
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator="llm",
)
@staticmethod
def _heuristic_validate(final_output: str) -> ValidationResult:
text = final_output.strip()
if not text:
return ValidationResult(
passed=False,
score=0.0,
issues=["Assistant output is empty."],
missing_requirements=["A non-empty result is required."],
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
validator="heuristic",
)
lowered = text.lower()
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
return ValidationResult(
passed=False,
score=0.35,
issues=["The run did not complete cleanly."],
missing_requirements=["A successful final result is required."],
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
validator="heuristic",
)
return ValidationResult(passed=True, score=0.85, validator="heuristic")
@staticmethod
def _parse_json_object(text: str) -> dict[str, Any]:
cleaned = text.strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
payload = json.loads(cleaned)
if not isinstance(payload, dict):
raise ValueError("validator response must be a JSON object")
return payload

View File

@ -100,7 +100,8 @@ class ToolAssembler:
result: list[str] = []
for skill in activated_skills:
for name in skills_loader.get_skill_tool_hints(skill.name):
names = list(skill.tool_hints) if getattr(skill, "tool_hints", None) else skills_loader.get_skill_tool_hints(skill.name)
for name in names:
if name not in result:
result.append(name)
return result