feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核

新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证
(通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。

实现Agent Team v1协调器,支持sequence/parallel/dag执行策略,
sub-agent复用主AgentLoop,每个run使用独立memory snapshot。

建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期,
通过Task验证通过且用户满意才生成学习候选。

重构目录结构,移除third_party依赖,建立统一engine内核,
所有agent共享运行时基础组件。

更新ContextBuilder清理provider消息字段,增强SkillContext版本管理,
集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
2026-05-08 17:14:14 +08:00
parent 5ba5c7e4c1
commit 8a12c30141
93 changed files with 16724 additions and 1247 deletions

View File

@ -0,0 +1,22 @@
"""Internal task tracking for automatic Main Agent task mode."""
from .models import MainAgentDecision, TaskEvent, TaskRecord, ValidationResult
from .planner import TaskExecutionPlan, TaskExecutionPlanner
from .router import MainAgentRouter
from .service import TaskService
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
from .validation import ValidationService
__all__ = [
"MainAgentDecision",
"MainAgentRouter",
"TaskEvent",
"TaskExecutionPlan",
"TaskExecutionPlanner",
"TaskRecord",
"TaskService",
"SkillResolutionReport",
"TaskSkillResolver",
"ValidationResult",
"ValidationService",
]

View File

@ -0,0 +1,178 @@
"""Models for internal task tracking and validation."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_revision"}
@dataclass(slots=True)
class ValidationResult:
passed: bool
score: float
issues: list[str] = field(default_factory=list)
missing_requirements: list[str] = field(default_factory=list)
recommended_revision_prompt: str = ""
validator: str = "heuristic"
@property
def accepted(self) -> bool:
return self.passed and self.score >= 0.75
def to_dict(self) -> dict[str, Any]:
return {
"passed": self.passed,
"score": self.score,
"issues": list(self.issues),
"missing_requirements": list(self.missing_requirements),
"recommended_revision_prompt": self.recommended_revision_prompt,
"validator": self.validator,
"accepted": self.accepted,
}
@classmethod
def from_dict(cls, payload: dict[str, Any] | None) -> "ValidationResult | None":
if not isinstance(payload, dict):
return None
return cls(
passed=bool(payload.get("passed")),
score=float(payload.get("score", 0.0) or 0.0),
issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator=str(payload.get("validator") or "unknown"),
)
@dataclass(slots=True)
class TaskRecord:
task_id: str
session_id: str
description: str
goal: str
constraints: list[str]
priority: int
status: str
creator: str
created_at: str
updated_at: str
parent_task_id: str | None = None
closed_at: str | None = None
close_reason: str | None = None
satisfaction: float | None = None
run_ids: list[str] = field(default_factory=list)
skill_names: list[str] = field(default_factory=list)
feedback: list[dict[str, Any]] = field(default_factory=list)
validation_result: dict[str, Any] | None = None
metadata: dict[str, Any] = field(default_factory=dict)
@property
def is_open(self) -> bool:
return self.status in TASK_OPEN_STATUSES
def to_dict(self) -> dict[str, Any]:
return {
"task_id": self.task_id,
"session_id": self.session_id,
"parent_task_id": self.parent_task_id,
"description": self.description,
"goal": self.goal,
"constraints": list(self.constraints),
"priority": self.priority,
"status": self.status,
"creator": self.creator,
"created_at": self.created_at,
"updated_at": self.updated_at,
"closed_at": self.closed_at,
"close_reason": self.close_reason,
"satisfaction": self.satisfaction,
"run_ids": list(self.run_ids),
"skill_names": list(self.skill_names),
"feedback": list(self.feedback),
"validation_result": self.validation_result,
"metadata": dict(self.metadata),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "TaskRecord":
return cls(
task_id=str(payload["task_id"]),
session_id=str(payload["session_id"]),
parent_task_id=_optional_str(payload.get("parent_task_id")),
description=str(payload.get("description") or ""),
goal=str(payload.get("goal") or payload.get("description") or ""),
constraints=[str(item) for item in payload.get("constraints") or []],
priority=int(payload.get("priority", 0) or 0),
status=str(payload.get("status") or "open"),
creator=str(payload.get("creator") or "main-agent"),
created_at=str(payload.get("created_at") or ""),
updated_at=str(payload.get("updated_at") or ""),
closed_at=_optional_str(payload.get("closed_at")),
close_reason=_optional_str(payload.get("close_reason")),
satisfaction=_optional_float(payload.get("satisfaction")),
run_ids=[str(item) for item in payload.get("run_ids") or []],
skill_names=[str(item) for item in payload.get("skill_names") or []],
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
metadata=dict(payload.get("metadata") or {}),
)
@dataclass(slots=True)
class TaskEvent:
event_id: str
task_id: str
session_id: str
event_type: str
created_at: str
run_id: str | None = None
payload: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"event_id": self.event_id,
"task_id": self.task_id,
"session_id": self.session_id,
"run_id": self.run_id,
"event_type": self.event_type,
"created_at": self.created_at,
"payload": dict(self.payload),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "TaskEvent":
return cls(
event_id=str(payload["event_id"]),
task_id=str(payload["task_id"]),
session_id=str(payload["session_id"]),
run_id=_optional_str(payload.get("run_id")),
event_type=str(payload.get("event_type") or ""),
created_at=str(payload.get("created_at") or ""),
payload=dict(payload.get("payload") or {}),
)
@dataclass(slots=True)
class MainAgentDecision:
mode: str
reason: str
starts_new_task: bool = False
@property
def is_task(self) -> bool:
return self.mode == "task"
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)
def _optional_float(value: Any) -> float | None:
if value in (None, ""):
return None
return float(value)

View File

@ -0,0 +1,288 @@
"""Internal Task execution planner for single-agent vs team execution."""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any, Literal
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
TaskExecutionMode = Literal["single", "team"]
@dataclass(slots=True)
class TaskExecutionPlan:
mode: TaskExecutionMode
reason: str = ""
graph: ExecutionGraph | None = None
final_synthesis_instruction: str = ""
fallback_error: str | None = None
skill_resolution_report: list[SkillResolutionReport] = field(default_factory=list)
@property
def is_team(self) -> bool:
return self.mode == "team" and self.graph is not None
@classmethod
def single(cls, reason: str, *, fallback_error: str | None = None) -> "TaskExecutionPlan":
return cls(mode="single", reason=reason, fallback_error=fallback_error)
def to_event_payload(self) -> dict[str, Any]:
strategy = self.graph.strategy if self.graph is not None else None
nodes = self.graph.nodes if self.graph is not None else []
return {
"plan_mode": self.mode,
"reason": self.reason,
"strategy": strategy,
"node_ids": [node.node_id for node in nodes],
"skill_queries": [
str(node.agent.metadata.get("skill_query") or "")
for node in nodes
],
"selected_skill_names": [
name
for node in nodes
for name in node.inherited_pinned_skills
],
"generated_skill_draft_ids": [
item.generated_skill_draft_id
for item in self.skill_resolution_report
if item.generated_skill_draft_id
],
"skill_resolution_report": [item.to_dict() for item in self.skill_resolution_report],
"fallback_error": self.fallback_error,
}
class TaskExecutionPlanner:
"""Plan whether a Task attempt should run through a team first."""
_MAX_NODES = 6
_SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
def __init__(self, *, task_skill_resolver: TaskSkillResolver | None = None) -> None:
self.task_skill_resolver = task_skill_resolver
async def plan(
self,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
provider_bundle: ProviderBundle | None = None,
) -> TaskExecutionPlan:
provider = None
model = None
if provider_bundle is not None:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
if provider is None:
return TaskExecutionPlan.single("planner_provider_unavailable")
try:
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"You choose whether an internal Beaver Task attempt should run as a single "
"main-agent pass or use a small sub-agent team first. Return only compact JSON."
),
},
{
"role": "user",
"content": self._prompt(
task=task,
user_message=user_message,
attempt_index=attempt_index,
latest_validation=latest_validation,
),
},
],
tools=None,
model=model,
max_tokens=1200,
temperature=0.0,
)
plan = self.from_json(response.content or "")
return await self._resolve_plan(
plan,
task=task,
user_message=user_message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
except Exception as exc:
return TaskExecutionPlan.single("planner_failed", fallback_error=str(exc))
async def _resolve_plan(
self,
plan: TaskExecutionPlan,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
provider_bundle: ProviderBundle | None,
) -> TaskExecutionPlan:
if not plan.is_team or self.task_skill_resolver is None:
return plan
if provider_bundle is None:
return TaskExecutionPlan.single("planner_fallback_single", fallback_error="task_skill_resolver_provider_unavailable")
try:
assert plan.graph is not None
graph, reports = await self.task_skill_resolver.resolve_graph(
plan.graph,
task=task,
user_message=user_message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
graph.validate()
plan.graph = graph
plan.skill_resolution_report = reports
return plan
except Exception as exc:
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")
def from_json(self, text: str) -> TaskExecutionPlan:
try:
payload = self._parse_json_object(text)
mode = str(payload.get("mode") or "single").strip().lower()
reason = str(payload.get("reason") or "")
if mode != "team":
return TaskExecutionPlan.single(reason or "planner_selected_single")
graph = self._graph_from_payload(payload)
graph.validate()
return TaskExecutionPlan(
mode="team",
reason=reason or "planner_selected_team",
graph=graph,
final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
)
except Exception as exc:
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))
def _graph_from_payload(self, payload: dict[str, Any]) -> ExecutionGraph:
strategy = str(payload.get("strategy") or "sequence").strip().lower()
if strategy not in self._SUPPORTED_STRATEGIES:
raise ValueError(f"Unsupported team strategy: {strategy}")
raw_nodes = payload.get("nodes")
if not isinstance(raw_nodes, list) or not raw_nodes:
raise ValueError("Team plan requires at least one node")
if len(raw_nodes) > self._MAX_NODES:
raise ValueError(f"Team plan exceeds max node count {self._MAX_NODES}")
nodes: list[ExecutionNode] = []
for index, item in enumerate(raw_nodes, start=1):
if not isinstance(item, dict):
raise ValueError("Each team node must be an object")
agent_payload = item.get("agent") if isinstance(item.get("agent"), dict) else {}
skill_query = str(item.get("skill_query") or agent_payload.get("skill_query") or item.get("task") or "").strip()
requested_capabilities = _string_list(
item.get("required_capabilities") or item.get("capabilities") or agent_payload.get("capabilities")
)
requested_tags = _string_list(item.get("tags") or agent_payload.get("tags"))
node_id = str(item.get("node_id") or item.get("id") or agent_payload.get("name") or f"node_{index}").strip()
task = str(item.get("task") or "").strip()
if not node_id or not task:
raise ValueError("Each team node requires node_id/id and task")
nodes.append(
ExecutionNode(
node_id=node_id,
task=task,
agent=AgentDescriptor(
name=node_id,
role="",
system_prompt="",
metadata={
"skill_query": skill_query,
"required_capabilities": requested_capabilities,
"requested_tags": requested_tags,
"sub_agent_kind": "generic_skill_worker",
},
),
depends_on=[str(dep) for dep in item.get("depends_on") or []],
inherited_pinned_skills=[str(name) for name in item.get("pinned_skills") or []],
constraints=[str(value) for value in item.get("constraints") or []],
expected_output=str(item.get("expected_output") or "") or None,
)
)
return ExecutionGraph(strategy=strategy, nodes=nodes) # type: ignore[arg-type]
@staticmethod
def _prompt(
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None,
) -> str:
validation_note = ""
if latest_validation is not None:
validation_note = (
"\nPrevious validation issues:\n"
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
)
return (
"Decide execution mode for this internal Task attempt.\n"
"Use mode=team only when independent research, review, implementation slices, or staged checks "
"would materially improve the result. Otherwise use mode=single.\n\n"
"JSON schema:\n"
"{\n"
' "mode": "single" | "team",\n'
' "reason": "short reason",\n'
' "strategy": "sequence" | "parallel" | "dag",\n'
' "nodes": [{"node_id": "api_review", "task": "...", "skill_query": "API contract review", '
'"required_capabilities": ["schema compatibility"], "depends_on": []}],\n'
' "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
"}\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Attempt index: {attempt_index}\n"
f"{validation_note}"
)
@staticmethod
def _parse_json_object(text: str) -> dict[str, Any]:
cleaned = text.strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
payload = json.loads(cleaned)
if not isinstance(payload, dict):
raise ValueError("planner response must be a JSON object")
return payload
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
text = str(value).strip()
return text or None
def _string_list(value: Any) -> list[str]:
if not isinstance(value, list):
if isinstance(value, str):
value = [item.strip() for item in value.split(",")]
else:
return []
result: list[str] = []
for item in value:
text = str(item).strip()
if text and text not in result:
result.append(text)
return result

View File

@ -0,0 +1,40 @@
"""Main Agent routing between simple chat and internal Task mode."""
from __future__ import annotations
import re
from .models import MainAgentDecision, TaskRecord
class MainAgentRouter:
"""Small deterministic classifier used before the main AgentLoop.
The first version intentionally avoids a mandatory model call so the router
stays reliable during provider outages. The rule set is conservative:
anything that implies execution, files, tools, iteration, or validation
becomes Task mode.
"""
_TASK_PATTERNS = [
r"\b(implement|fix|debug|refactor|migrate|build|create|write|edit|update|test|validate|deploy)\b",
r"\b(file|repo|code|project|backend|frontend|api|database|migration|pull request|ci|bug)\b",
r"\b(step|multi-step|workflow|plan and|then)\b",
r"(实现|修复|调试|重构|迁移|构建|创建|编写|修改|更新|测试|验证|部署|文件|代码|项目|前端|后端|接口|数据库|多步|任务)",
]
_NEW_TASK_PATTERNS = [
r"\b(new task|another task|different task|start over)\b",
r"(新任务|另一个任务|换个任务|重新开始)",
]
def classify(self, message: str, *, active_task: TaskRecord | None = None) -> MainAgentDecision:
text = message.strip()
lowered = text.lower()
starts_new = any(re.search(pattern, lowered, re.IGNORECASE) for pattern in self._NEW_TASK_PATTERNS)
if active_task is not None and active_task.status in {"awaiting_feedback", "needs_revision"} and not starts_new:
return MainAgentDecision(mode="task", reason="continuing_open_task", starts_new_task=False)
if any(re.search(pattern, lowered, re.IGNORECASE) for pattern in self._TASK_PATTERNS):
return MainAgentDecision(mode="task", reason="task_pattern_matched", starts_new_task=starts_new)
if len(text) > 240:
return MainAgentDecision(mode="task", reason="long_request", starts_new_task=starts_new)
return MainAgentDecision(mode="simple", reason="simple_question", starts_new_task=False)

View File

@ -0,0 +1,167 @@
"""Internal service for automatic Task mode."""
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from uuid import uuid4
from .models import TaskEvent, TaskRecord, ValidationResult
from .store import TaskStore
class TaskService:
def __init__(self, root: str | Path) -> None:
self.store = TaskStore(root)
def create_task(
self,
*,
session_id: str,
description: str,
creator: str = "main-agent",
metadata: dict[str, Any] | None = None,
) -> TaskRecord:
now = self._now()
task = TaskRecord(
task_id=uuid4().hex,
session_id=session_id,
description=description,
goal=description,
constraints=[],
priority=0,
status="open",
creator=creator,
created_at=now,
updated_at=now,
metadata=dict(metadata or {}),
)
self.store.upsert_task(task)
self._event(task, "created", payload={"description": description})
return task
def get_task(self, task_id: str) -> TaskRecord | None:
return self.store.get_task(task_id)
def get_task_by_run_id(self, run_id: str) -> TaskRecord | None:
return self.store.get_task_by_run_id(run_id)
def get_latest_open_task(self, session_id: str) -> TaskRecord | None:
return self.store.get_latest_open_task(session_id)
def start_run(self, task_id: str, *, user_message: str, attempt_index: int) -> TaskRecord:
task = self._require(task_id)
task.status = "running"
task.updated_at = self._now()
task.metadata["latest_user_message"] = user_message
task.metadata["latest_attempt_index"] = attempt_index
self.store.upsert_task(task)
self._event(task, "run_started", payload={"user_message": user_message, "attempt_index": attempt_index})
return task
def append_run(self, task_id: str, run_id: str, *, skill_names: list[str] | None = None) -> TaskRecord:
task = self._require(task_id)
if run_id not in task.run_ids:
task.run_ids.append(run_id)
for name in skill_names or []:
if name not in task.skill_names:
task.skill_names.append(name)
task.updated_at = self._now()
self.store.upsert_task(task)
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
return task
def record_validation(self, task_id: str, run_id: str, validation: ValidationResult) -> TaskRecord:
task = self._require(task_id)
task.status = "awaiting_feedback"
task.updated_at = self._now()
task.validation_result = validation.to_dict()
self.store.upsert_task(task)
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
return task
def add_feedback(
self,
task_id: str,
*,
feedback_type: str,
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
matching_feedback = any(
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
for item in task.feedback
)
conflicting_feedback = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
),
None,
)
if conflicting_feedback is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_feedback:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_feedback:
return task
entry = {
"feedback_type": feedback_type,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if feedback_type == "revise":
task.status = "needs_revision"
elif feedback_type == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = comment or "abandoned"
elif feedback_type == "satisfied":
task.status = "closed"
task.closed_at = now
task.close_reason = "satisfied"
task.satisfaction = 1.0
task.updated_at = now
self.store.upsert_task(task)
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
return task
def _require(self, task_id: str) -> TaskRecord:
task = self.store.get_task(task_id)
if task is None:
raise ValueError(f"Unknown task_id: {task_id}")
return task
def _event(
self,
task: TaskRecord,
event_type: str,
*,
run_id: str | None = None,
payload: dict[str, Any] | None = None,
) -> None:
self.store.append_event(
TaskEvent(
event_id=uuid4().hex,
task_id=task.task_id,
session_id=task.session_id,
run_id=run_id,
event_type=event_type,
created_at=self._now(),
payload=dict(payload or {}),
)
)
@staticmethod
def _now() -> str:
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,286 @@
"""Resolve Task team nodes to pinned skills for generic sub-agents."""
from __future__ import annotations
import json
from dataclasses import dataclass, field, replace
from typing import Any
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine.providers import ProviderBundle
from beaver.skills.assembler.embedding_retriever import SkillEmbeddingRetriever
from beaver.skills.catalog.loader import SkillsLoader
from beaver.skills.drafts import DraftService
from beaver.skills.learning import MissingSkillSynthesizer
from beaver.tasks.models import TaskRecord
@dataclass(slots=True)
class SkillResolutionReport:
node_id: str
skill_query: str
required_capabilities: list[str] = field(default_factory=list)
selected_skill_names: list[str] = field(default_factory=list)
generated_skill_draft_id: str | None = None
generated_skill_name: str | None = None
ephemeral_used: bool = False
reason: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"node_id": self.node_id,
"skill_query": self.skill_query,
"required_capabilities": list(self.required_capabilities),
"selected_skill_names": list(self.selected_skill_names),
"generated_skill_draft_id": self.generated_skill_draft_id,
"generated_skill_name": self.generated_skill_name,
"ephemeral_used": self.ephemeral_used,
"reason": self.reason,
}
class TaskSkillResolver:
"""Pins published or draft-only skills onto generic team nodes."""
def __init__(
self,
*,
skills_loader: SkillsLoader,
draft_service: DraftService,
retriever: SkillEmbeddingRetriever | None = None,
missing_skill_synthesizer: MissingSkillSynthesizer | None = None,
) -> None:
self.skills_loader = skills_loader
self.draft_service = draft_service
self.retriever = retriever or SkillEmbeddingRetriever()
self.missing_skill_synthesizer = missing_skill_synthesizer or MissingSkillSynthesizer()
async def resolve_graph(
self,
graph: ExecutionGraph,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
provider_bundle: ProviderBundle,
) -> tuple[ExecutionGraph, list[SkillResolutionReport]]:
resolved_nodes: list[ExecutionNode] = []
reports: list[SkillResolutionReport] = []
for node in graph.nodes:
resolved, report = await self.resolve_node(
node,
task=task,
user_message=user_message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
resolved_nodes.append(resolved)
reports.append(report)
return ExecutionGraph(strategy=graph.strategy, nodes=resolved_nodes), reports
async def resolve_node(
self,
node: ExecutionNode,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
provider_bundle: ProviderBundle,
) -> tuple[ExecutionNode, SkillResolutionReport]:
skill_query = str(node.agent.metadata.get("skill_query") or node.task or node.node_id).strip()
required_capabilities = [
str(item).strip()
for item in node.agent.metadata.get("required_capabilities", [])
if str(item).strip()
]
selected = await self._select_published_skills(
query="\n".join(
part
for part in [
skill_query,
node.task,
" ".join(required_capabilities),
task.goal,
user_message,
]
if part
),
provider_bundle=provider_bundle,
)
if selected:
pinned = _merge_names(node.inherited_pinned_skills, selected)
resolved = self._generic_node(
node,
pinned_skill_names=pinned,
metadata={
**node.agent.metadata,
"skill_query": skill_query,
"required_capabilities": required_capabilities,
"selected_skill_names": selected,
"ephemeral_skill_names": [],
},
)
return resolved, SkillResolutionReport(
node_id=node.node_id,
skill_query=skill_query,
required_capabilities=required_capabilities,
selected_skill_names=selected,
ephemeral_used=False,
reason="matched published skill",
)
missing = await self.missing_skill_synthesizer.synthesize(
task=task,
user_message=user_message,
attempt_index=attempt_index,
node_id=node.node_id,
node_task=node.task,
skill_query=skill_query,
required_capabilities=required_capabilities,
provider_bundle=provider_bundle,
draft_service=self.draft_service,
)
resolved = self._generic_node(
node,
pinned_skill_names=list(node.inherited_pinned_skills),
pinned_skill_contexts=[*node.inherited_pinned_skill_contexts, missing.skill_context],
metadata={
**node.agent.metadata,
"skill_query": skill_query,
"required_capabilities": required_capabilities,
"selected_skill_names": [],
"generated_skill_draft_id": missing.draft.draft_id,
"generated_skill_name": missing.draft.skill_name,
"ephemeral_skill_names": [missing.skill_context.name],
},
)
return resolved, SkillResolutionReport(
node_id=node.node_id,
skill_query=skill_query,
required_capabilities=required_capabilities,
generated_skill_draft_id=missing.draft.draft_id,
generated_skill_name=missing.draft.skill_name,
ephemeral_used=True,
reason="generated draft-only skill for missing sub-agent guidance",
)
async def _select_published_skills(self, *, query: str, provider_bundle: ProviderBundle) -> list[str]:
candidates = self.skills_loader.build_selection_candidates()
if not candidates:
return []
candidates = await self.retriever.retrieve(
query=query,
candidates=candidates,
top_k=8,
api_key=provider_bundle.embedding_runtime.api_key if provider_bundle.embedding_runtime is not None else None,
api_base=provider_bundle.embedding_runtime.api_base if provider_bundle.embedding_runtime is not None else None,
model=provider_bundle.embedding_runtime.model if provider_bundle.embedding_runtime is not None else None,
extra_headers=(
provider_bundle.embedding_runtime.extra_headers
if provider_bundle.embedding_runtime is not None
else None
),
timeout_seconds=(
provider_bundle.embedding_runtime.request_timeout_seconds
if provider_bundle.embedding_runtime is not None
else None
),
fallback_top_k=8,
)
if not candidates:
return []
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
candidate_names = {item["name"] for item in candidates}
try:
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"Select published Beaver skills for one generic sub-agent node. "
"Return only a JSON array of skill names. Do not invent names. "
"If none of the candidates directly match the required guidance, return []."
),
},
{
"role": "user",
"content": (
f"Node skill query:\n{query}\n\n"
f"Candidate skills:\n{self._render_candidates(candidates)}\n\n"
"Return only JSON, for example: [\"skill-a\"] or []"
),
},
],
tools=None,
model=model,
max_tokens=512,
temperature=0,
)
parsed = self._parse_names(response.content or "")
except Exception:
parsed = []
selected: list[str] = []
for name in parsed:
if name in candidate_names and name not in selected:
selected.append(name)
return selected
@staticmethod
def _generic_node(
node: ExecutionNode,
*,
pinned_skill_names: list[str],
metadata: dict[str, Any],
pinned_skill_contexts: list[Any] | None = None,
) -> ExecutionNode:
return replace(
node,
agent=AgentDescriptor(
name=node.node_id,
role="",
system_prompt="",
metadata={
**metadata,
"sub_agent_kind": "generic_skill_worker",
},
),
inherited_pinned_skills=pinned_skill_names,
inherited_pinned_skill_contexts=list(pinned_skill_contexts or node.inherited_pinned_skill_contexts),
)
@staticmethod
def _render_candidates(candidates: list[dict[str, str]]) -> str:
return "\n".join(f"- {item['name']}: {item['description']}" for item in candidates)
@staticmethod
def _parse_names(content: str) -> list[str]:
cleaned = content.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
try:
payload = json.loads(cleaned)
except json.JSONDecodeError:
return []
if isinstance(payload, dict):
for key in ("skills", "selected_skills", "selected"):
value = payload.get(key)
if isinstance(value, list):
payload = value
break
if not isinstance(payload, list):
return []
return [str(item).strip() for item in payload if str(item).strip()]
def _merge_names(parent: list[str], selected: list[str]) -> list[str]:
result: list[str] = []
for name in [*parent, *selected]:
if name and name not in result:
result.append(name)
return result

View File

@ -0,0 +1,100 @@
"""File-backed internal task store."""
from __future__ import annotations
import json
import os
import tempfile
import threading
from pathlib import Path
from typing import Any
from .models import TaskEvent, TaskRecord
class TaskStore:
def __init__(self, root: str | Path) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self.tasks_path = self.root / "tasks.json"
self.events_path = self.root / "events.jsonl"
self._lock = threading.Lock()
def list_tasks(self) -> list[TaskRecord]:
with self._lock:
payload = self._read_tasks_unlocked()
return [TaskRecord.from_dict(item) for item in payload.values()]
def get_task(self, task_id: str) -> TaskRecord | None:
with self._lock:
payload = self._read_tasks_unlocked().get(task_id)
return TaskRecord.from_dict(payload) if isinstance(payload, dict) else None
def get_task_by_run_id(self, run_id: str) -> TaskRecord | None:
for task in self.list_tasks():
if run_id in task.run_ids:
return task
return None
def get_latest_open_task(self, session_id: str) -> TaskRecord | None:
tasks = [
task
for task in self.list_tasks()
if task.session_id == session_id and task.status in {"awaiting_feedback", "needs_revision", "open", "running"}
]
if not tasks:
return None
return sorted(tasks, key=lambda item: item.updated_at)[-1]
def upsert_task(self, task: TaskRecord) -> None:
with self._lock:
payload = self._read_tasks_unlocked()
payload[task.task_id] = task.to_dict()
self._write_tasks_unlocked(payload)
def append_event(self, event: TaskEvent) -> None:
self.events_path.parent.mkdir(parents=True, exist_ok=True)
with self._lock:
with self.events_path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(event.to_dict(), ensure_ascii=False, sort_keys=True) + "\n")
def list_events(self, task_id: str | None = None) -> list[TaskEvent]:
if not self.events_path.exists():
return []
results: list[TaskEvent] = []
for line in self.events_path.read_text(encoding="utf-8").splitlines():
cleaned = line.strip()
if not cleaned:
continue
payload = json.loads(cleaned)
if not isinstance(payload, dict):
continue
event = TaskEvent.from_dict(payload)
if task_id is not None and event.task_id != task_id:
continue
results.append(event)
return results
def _read_tasks_unlocked(self) -> dict[str, dict[str, Any]]:
if not self.tasks_path.exists():
return {}
payload = json.loads(self.tasks_path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
return {}
tasks = payload.get("tasks", payload)
if not isinstance(tasks, dict):
return {}
return {str(key): dict(value) for key, value in tasks.items() if isinstance(value, dict)}
def _write_tasks_unlocked(self, payload: dict[str, dict[str, Any]]) -> None:
self.tasks_path.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_name = tempfile.mkstemp(prefix=".tasks-", suffix=".json", dir=str(self.tasks_path.parent))
tmp_path = Path(tmp_name)
try:
with os.fdopen(fd, "w", encoding="utf-8") as handle:
json.dump({"tasks": payload}, handle, ensure_ascii=False, indent=2, sort_keys=True)
handle.write("\n")
os.replace(tmp_path, self.tasks_path)
finally:
if tmp_path.exists():
tmp_path.unlink()

View File

@ -0,0 +1,138 @@
"""Automatic validation for internal Task mode."""
from __future__ import annotations
import json
from typing import Any
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
class ValidationService:
async def validate_task_result(
self,
*,
task: TaskRecord,
user_message: str,
final_output: str,
transcript_excerpt: str = "",
tool_summaries: list[str] | None = None,
team_summaries: list[str] | None = None,
provider_bundle: ProviderBundle | None = None,
) -> ValidationResult:
provider = None
model = None
if provider_bundle is not None:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
if provider is not None:
try:
return await self._validate_with_provider(
provider=provider,
model=model,
task=task,
user_message=user_message,
final_output=final_output,
transcript_excerpt=transcript_excerpt,
tool_summaries=tool_summaries or [],
team_summaries=team_summaries or [],
)
except Exception as exc:
return ValidationResult(
passed=False,
score=0.0,
issues=[f"Validator failed: {exc}"],
missing_requirements=["A valid automatic validation result is required before accepting the task."],
recommended_revision_prompt=(
"Review the task result again because automatic validation failed, "
"then provide a corrected final answer that explicitly satisfies the task goal."
),
validator="llm_error",
)
return self._heuristic_validate(final_output)
async def _validate_with_provider(
self,
*,
provider: Any,
model: str | None,
task: TaskRecord,
user_message: str,
final_output: str,
transcript_excerpt: str,
tool_summaries: list[str],
team_summaries: list[str],
) -> ValidationResult:
prompt = (
"Validate whether the assistant output satisfies the task. "
"Return only compact JSON with keys: passed, score, issues, "
"missing_requirements, recommended_revision_prompt.\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Transcript excerpt:\n{transcript_excerpt[:2500]}\n\n"
f"Tool summaries:\n{json.dumps(tool_summaries[:12], ensure_ascii=False)}\n\n"
f"Team summaries:\n{json.dumps(team_summaries[:12], ensure_ascii=False)}\n\n"
f"Assistant final output:\n{final_output[:4000]}"
)
response = await provider.chat(
messages=[
{"role": "system", "content": "You are a strict task result validator."},
{"role": "user", "content": prompt},
],
tools=None,
model=model,
max_tokens=800,
temperature=0.0,
)
payload = self._parse_json_object(response.content or "")
return ValidationResult(
passed=bool(payload.get("passed")),
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator="llm",
)
@staticmethod
def _heuristic_validate(final_output: str) -> ValidationResult:
text = final_output.strip()
if not text:
return ValidationResult(
passed=False,
score=0.0,
issues=["Assistant output is empty."],
missing_requirements=["A non-empty result is required."],
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
validator="heuristic",
)
lowered = text.lower()
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
return ValidationResult(
passed=False,
score=0.35,
issues=["The run did not complete cleanly."],
missing_requirements=["A successful final result is required."],
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
validator="heuristic",
)
return ValidationResult(passed=True, score=0.85, validator="heuristic")
@staticmethod
def _parse_json_object(text: str) -> dict[str, Any]:
cleaned = text.strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
payload = json.loads(cleaned)
if not isinstance(payload, dict):
raise ValueError("validator response must be a JSON object")
return payload