feat(coordinator): 添加团队节点默认最大工具迭代次数配置

添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数,
并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。

fix(runtime): 修复团队节点运行成功判断逻辑

更新运行成功判断条件,将 finish_reason 为 "max_tool_iterations_finalized" 的情况
视为运行失败,并添加对原始工具调用输出的检测,避免将其误判为成功完成。

feat(mcp): 添加团队工作流MCP工具类别支持

增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能,
为团队工作流提供本地工具支持。

refactor(engine): 调整AgentLoop最大工具迭代次数设置

将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100,
同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。

perf(mcp): 优化MCP连接管理避免重复连接

添加 mcp_connected 标志来跟踪MCP连接状态,确保 connect_all 只执行一次,
提高性能并避免不必要的重复连接。

refactor(skills): 移除技能团队模板相关功能

移除与技能团队模板相关的代码,包括解析、存储和处理逻辑,
简化技能记录结构和加载流程。

feat(process): 增强会话过程投影器功能

添加技能激活快照事件处理,改进团队运行完成消息显示,
并增强技能激活事件的时间戳记录功能。

refactor(tasks): 简化任务尝试编排器团队执行逻辑

移除团队执行相关代码,将所有任务统一按单步执行处理,
简化任务编排器的复杂度并提升执行效率。

fix(evidence): 修复节点证据评估中需求验证逻辑

更新节点证据评估逻辑,跳过自然语言证据需求的确定性验证,
只执行机器可读的需求验证,避免因自然语言需求导致的节点失败。
This commit is contained in:
2026-06-26 16:36:29 +08:00
parent 53b13e8eac
commit 520a21a027
360 changed files with 13271 additions and 1848 deletions

View File

@ -9,6 +9,7 @@ from beaver.engine.providers import ProviderBundle
from beaver.tasks.evidence import EvidenceBuilder, evaluate_node_evidence
from .models import DelegationEnvelope, NodeRunResult
from .runtime_defaults import DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS
class LocalAgentRunner:
@ -55,7 +56,11 @@ class LocalAgentRunner:
pinned_skill_names=envelope.inherited_pinned_skills,
pinned_skill_contexts=envelope.inherited_pinned_skill_contexts,
allowed_tool_names=envelope.allowed_tool_names,
max_tool_iterations=envelope.max_tool_iterations,
max_tool_iterations=(
envelope.max_tool_iterations
if envelope.max_tool_iterations is not None
else DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS
),
allow_candidate_generation=allow_candidate_generation,
)
loaded = target_loop.boot()
@ -70,7 +75,8 @@ class LocalAgentRunner:
envelope.required_evidence,
result.output_text,
)
run_succeeded = result.finish_reason == "stop"
raw_tool_call_output = self._looks_like_raw_tool_call(result.output_text)
run_succeeded = result.finish_reason in {"stop", "max_tool_iterations_finalized"} and not raw_tool_call_output
if not run_succeeded:
completion_status = "failed"
elif evidence_gaps:
@ -81,7 +87,10 @@ class LocalAgentRunner:
if completion_status == "partial":
error = "; ".join(evidence_gaps)
else:
error = None if success else (result.output_text or result.finish_reason)
if raw_tool_call_output:
error = "finalized output is a raw tool call"
else:
error = None if success else (result.output_text or result.finish_reason)
return NodeRunResult(
node_id=envelope.node_id or envelope.agent.name,
success=success,
@ -169,3 +178,16 @@ class LocalAgentRunner:
"If no published skill matches, return [] and let the node continue without skills."
)
return "\n\n".join(sections)
@staticmethod
def _looks_like_raw_tool_call(output_text: str | None) -> bool:
text = (output_text or "").strip()
if not text:
return False
markers = (
"<DSMLtool_calls>",
"<DSMLinvoke",
"<tool_call",
"<function=",
)
return any(marker in text for marker in markers)

View File

@ -0,0 +1,3 @@
"""Runtime defaults shared by Beaver team planning and execution."""
DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS = 100

View File

@ -48,8 +48,6 @@ class SkillContext:
content_hash: str = ""
activation_reason: str = "selected"
tool_hints: list[str] = field(default_factory=list)
team_template: dict[str, Any] | None = None
team_template_warnings: list[str] = field(default_factory=list)
@dataclass(slots=True)

View File

@ -106,6 +106,7 @@ class EngineLoadResult:
task_execution_planner: TaskExecutionPlanner | None = None
mcp_manager: MCPConnectionManager | None = None
mcp_report: dict[str, dict] = field(default_factory=dict)
mcp_connected: bool = False
closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
closed: bool = False
@ -317,10 +318,7 @@ class EngineLoader:
draft_service=draft_service,
)
task_service = self._task_service or TaskService(workspace / "tasks")
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(
task_skill_resolver=task_skill_resolver,
tool_registry=tool_registry,
)
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner()
mcp_manager = MCPConnectionManager(
self.config.tools.mcp_servers,
authz_config=self.config.authz,

View File

@ -53,7 +53,7 @@ class AgentProfile:
max_tokens: int | None = None
max_context_messages: int = 1000
temperature: float = 0.2
max_tool_iterations: int = 30
max_tool_iterations: int = 100
@dataclass(slots=True)
@ -99,8 +99,8 @@ class _WebSearchLoopGuard:
return None
query = str(payload.get("query") or self._last_query or "").strip()
is_low_quality = payload.get("success") is False or payload.get("quality") == "low"
if not is_low_quality:
is_failed_search = payload.get("success") is False
if not is_failed_search:
self._reset()
self._last_query = query
return None
@ -435,7 +435,9 @@ class AgentLoop:
if include_tools and mcp_manager is not None:
started_at = perf_counter()
try:
loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
if not loaded.mcp_connected:
loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
loaded.mcp_connected = True
loaded.tools = [spec.name for spec in tool_registry.list_specs()]
finally:
add_latency("mcp_ms", started_at)
@ -752,6 +754,11 @@ class AgentLoop:
"memory_store": memory_service.get_store(),
"tool_registry": tool_registry,
"skills_loader": skills_loader,
"loaded": loaded,
"agent_loop": self,
"provider_bundle": bundle,
"user_message": task,
"attempt_index": attempt_index,
"draft_service": getattr(loaded, "draft_service", None),
"beaver_config": loaded.config,
"task_id": task_id,
@ -764,6 +771,7 @@ class AgentLoop:
"session_id": resolved_session_id,
"task_id": task_id,
"run_id": resolved_run_id,
"parent_session_id": parent_session_id,
"allowed_tool_names": (
None if allowed_tool_names is None else list(allowed_tool_names)
),

View File

@ -29,6 +29,7 @@ LOCAL_MCP_CATEGORIES: dict[str, dict[str, str]] = {
"local_coordination_mcp": {"category": "coordination", "display_name": "本地协作工具"},
"local_scheduler_mcp": {"category": "scheduler", "display_name": "本地定时工具"},
"local_web_mcp": {"category": "web", "display_name": "本地联网工具"},
"local_team_workflow_mcp": {"category": "team_workflow", "display_name": "本地 Agent Team Workflow 工具"},
}

View File

@ -56,6 +56,7 @@ LOCAL_TOOL_CATEGORIES = {
"coordination": "Beaver Local Coordination Tools",
"scheduler": "Beaver Local Scheduler Tools",
"web": "Beaver Local Web Tools",
"team_workflow": "Beaver Local Team Workflow Tools",
}
@ -129,6 +130,10 @@ def _category_tools(category: str, workspace: Path) -> tuple[list[BaseTool], Too
ObjectBackedTool(WebFetchTool()),
ObjectBackedTool(WebSearchTool()),
]
elif category == "team_workflow":
from beaver.team_workflows.mcp_tools import create_team_workflow_tools
tools = create_team_workflow_tools()
else:
raise ValueError(f"Unknown local tool category: {category}")
return tools, context

View File

@ -68,7 +68,7 @@ class AgentService:
self.profile.max_tokens = None
self.profile.temperature = 0.2
self.profile.max_context_messages = 1000
self.profile.max_tool_iterations = 30
self.profile.max_tool_iterations = 100
if defaults.max_tokens is not None:
self.profile.max_tokens = max(1, defaults.max_tokens)
if defaults.temperature is not None:

View File

@ -17,6 +17,7 @@ class SessionProcessProjector:
runs: dict[str, dict[str, Any]] = {}
events: list[dict[str, Any]] = []
artifacts: list[dict[str, Any]] = []
projected_skill_activation_run_ids: set[str] = set()
def add_event(
*,
@ -186,6 +187,38 @@ class SessionProcessProjector:
},
)
elif record.event_type == "skill_activation_snapshotted":
run_id = record.run_id or root_run_id
parent_run_id = root_run_id if run_id != root_run_id else None
receipts = [
item
for item in payload.get("receipts") or []
if isinstance(item, dict)
]
selected_skill_names = _receipt_skill_names(receipts)
if selected_skill_names:
projected_skill_activation_run_ids.add(str(run_id))
add_event(
event_id=_event_id(record, "skill-activation"),
run_id=str(run_id),
parent_run_id=parent_run_id,
kind="skill_selected",
actor_type="system",
actor_id="skill-selector",
actor_name="Skill Selector",
text=f"Selected skill guidance: {', '.join(selected_skill_names)}.",
created_at=_receipt_started_at(receipts) or created_at,
status="done",
metadata={
"task_id": task_id,
"attempt_index": attempt_index,
"timeline_type": "skill",
"skill_names": selected_skill_names,
"activation_reasons": _receipt_reasons(receipts),
"receipts": receipts,
},
)
elif record.event_type in {"task_team_run_completed", "task_team_run_failed"}:
team_success = bool(payload.get("team_success"))
root["status"] = "running"
@ -203,7 +236,7 @@ class SessionProcessProjector:
actor_type="system",
actor_id="team",
actor_name="Task Team",
text=payload.get("error") or ("Team completed" if team_success else "Team completed with failed nodes"),
text="Team completed" if team_success else "Team 执行未完成 / 子节点失败",
created_at=created_at,
status="done" if team_success else "error",
metadata={**dict(payload), "timeline_type": "agent_team", "team_run_ids": team_run_ids},
@ -316,7 +349,10 @@ class SessionProcessProjector:
"skill_names": activated_skill_names,
},
}
if activated_skill_names:
if activated_skill_names and main_run_id not in projected_skill_activation_run_ids:
skill_created_at = _activated_skill_started_at(run_record) or (
run_record.started_at if run_record is not None else None
) or created_at
add_event(
event_id=_event_id(record, "synthesis-skills"),
run_id=main_run_id,
@ -326,7 +362,7 @@ class SessionProcessProjector:
actor_id="skill-selector",
actor_name="Skill Selector",
text=f"Selected skill guidance: {', '.join(activated_skill_names)}.",
created_at=created_at,
created_at=skill_created_at,
status="done",
metadata={
"task_id": task_id,
@ -439,6 +475,48 @@ def _activated_skill_reasons(run_record: Any | None) -> list[str]:
return reasons
def _activated_skill_started_at(run_record: Any | None) -> str | None:
if run_record is None:
return None
timestamps = [
str(getattr(receipt, "activated_at", "") or "").strip()
for receipt in getattr(run_record, "activated_skills", []) or []
]
timestamps = [value for value in timestamps if value]
if not timestamps:
return None
return sorted(timestamps)[0]
def _receipt_skill_names(receipts: list[dict[str, Any]]) -> list[str]:
names = []
for receipt in receipts:
skill_name = str(receipt.get("skill_name") or "").strip()
if skill_name:
names.append(skill_name)
return list(dict.fromkeys(names))
def _receipt_reasons(receipts: list[dict[str, Any]]) -> list[str]:
reasons = []
for receipt in receipts:
reason = str(receipt.get("activation_reason") or "").strip()
if reason:
reasons.append(reason)
return reasons
def _receipt_started_at(receipts: list[dict[str, Any]]) -> str | None:
timestamps = [
str(receipt.get("activated_at") or "").strip()
for receipt in receipts
]
timestamps = [value for value in timestamps if value]
if not timestamps:
return None
return sorted(timestamps)[0]
def _tool_call_name(tool_call: dict[str, Any]) -> str:
function_payload = tool_call.get("function")
if isinstance(function_payload, dict):

View File

@ -140,8 +140,6 @@ class SkillAssembler:
content_hash=record.content_hash or "" if record is not None else "",
activation_reason="llm_selected",
tool_hints=list(record.tool_hints) if record is not None else [],
team_template=getattr(record, "team_template", None) if record is not None else None,
team_template_warnings=list(getattr(record, "team_template_warnings", [])) if record is not None else [],
)
)
return activated_skills

View File

@ -28,7 +28,6 @@ from .utils import (
check_requirements,
escape_xml,
extract_required_tool_names,
extract_skill_team_template,
get_missing_requirements,
parse_frontmatter,
parse_skill_metadata_blob,
@ -50,8 +49,6 @@ class SkillRecord:
tool_hints: list[str] = field(default_factory=list)
frontmatter: dict[str, Any] = field(default_factory=dict)
description: str = ""
team_template: dict[str, Any] | None = None
team_template_warnings: list[str] = field(default_factory=list)
class SkillsLoader:
@ -116,7 +113,6 @@ class SkillsLoader:
continue
normalized_frontmatter = dict(frontmatter)
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
template_result = extract_skill_team_template(body)
record = SkillRecord(
name=name,
path=skill_file,
@ -131,8 +127,6 @@ class SkillsLoader:
),
frontmatter=normalized_frontmatter,
description=str(frontmatter.get("description") or summarize_body(body) or name),
team_template=template_result.template,
team_template_warnings=template_result.warnings,
)
if filter_unavailable and not self._record_available(record):
continue
@ -152,7 +146,6 @@ class SkillsLoader:
else:
path = self.workspace_skills / name / "versions" / loaded.version.version / "SKILL.md"
_frontmatter, body = parse_frontmatter(loaded.content)
template_result = extract_skill_team_template(body)
record = SkillRecord(
name=name,
path=path,
@ -167,8 +160,6 @@ class SkillsLoader:
),
frontmatter=dict(loaded.version.frontmatter),
description=str(loaded.version.frontmatter.get("description") or loaded.version.summary or name),
team_template=template_result.template,
team_template_warnings=template_result.warnings,
)
if filter_unavailable and not self._record_available(record):
continue

View File

@ -17,7 +17,6 @@ import json
import os
import re
import shutil
from dataclasses import dataclass, field
from typing import Any
@ -85,27 +84,6 @@ def strip_frontmatter(content: str) -> str:
return body
@dataclass(slots=True)
class SkillTeamTemplateParseResult:
template: dict[str, Any] | None = None
warnings: list[str] = field(default_factory=list)
def extract_skill_team_template(body: str) -> SkillTeamTemplateParseResult:
matches = re.findall(r"```beaver-team-template\s*\n(.*?)\n```", body, re.DOTALL)
if not matches:
return SkillTeamTemplateParseResult()
if len(matches) != 1:
return SkillTeamTemplateParseResult(warnings=["skill defines multiple team templates"])
try:
template = json.loads(matches[0])
except json.JSONDecodeError:
return SkillTeamTemplateParseResult(warnings=["team template JSON is invalid"])
if not isinstance(template, dict) or not isinstance(template.get("nodes", []), list):
return SkillTeamTemplateParseResult(warnings=["team template must be an object with a nodes list"])
return SkillTeamTemplateParseResult(template=template)
def extract_required_tool_names(body: str) -> list[str]:
"""从 canonical skill 正文的 `## Required Tools` 段落提取工具名。

View File

@ -5,12 +5,11 @@ from __future__ import annotations
from time import perf_counter
from typing import Any, Callable
from beaver.coordinator.models import ExecutionNode, TeamRunResult
from beaver.engine import AgentRunResult
from beaver.engine.context import SkillContext
from beaver.prompts.main_agent import normalize_main_agent_prompt_locale
from .evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, render_task_evidence
from .evidence import EvidenceBuilder, TaskEvidencePacket, render_task_evidence
from .models import TaskRecord
from .planner import TaskExecutionPlan
@ -46,7 +45,7 @@ class TaskAttemptOrchestrator:
output_language_instruction = self._output_language_instruction(prompt_locale)
provider_bundle = kwargs.get("provider_bundle") or self.make_provider_bundle_for_task(self.loaded, kwargs)
kwargs = dict(kwargs)
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
kwargs.pop("team_provider_bundle_factory", None)
kwargs["provider_bundle"] = provider_bundle
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
@ -87,75 +86,17 @@ class TaskAttemptOrchestrator:
**plan.to_event_payload(),
},
)
team_summaries: list[str] = []
team_execution_context = ""
team_result: TeamRunResult | None = None
if plan.is_team:
team_result, team_error = await self._run_team_for_task(
plan,
task=task,
parent_session_id=kwargs["session_id"],
provider_bundle_factory=team_provider_bundle_factory
or self._build_team_provider_bundle_factory(kwargs),
plan = TaskExecutionPlan.single(
"legacy_planner_team_ignored",
planner_adaptation=plan.planner_adaptation,
)
if team_result is not None:
team_summaries = [self._team_summary_for_validation(team_result)]
team_packet = TaskEvidencePacket(
task_id=task.task_id,
attempt_index=attempt_index,
main_run=None,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results),
final_output="",
)
team_execution_context = self._join_context(
self._team_execution_context(plan, team_result),
"Rendered team evidence:\n" + render_task_evidence(team_packet),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": team_result.run_ids,
"team_success": team_result.success,
"node_results": self._team_node_results_for_event(plan, team_result),
"reason": plan.reason,
"error": None if team_result.success else "one or more team nodes failed",
},
)
else:
team_summaries = [f"Team execution failed: {team_error}"]
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": [],
"team_success": False,
"reason": plan.reason,
"error": team_error,
},
)
outcome_context, incomplete_prefix, outcome_metadata = self._team_synthesis_outcome(
plan,
team_result,
prompt_locale=prompt_locale,
)
if plan.is_team:
team_execution_context = self._join_context(outcome_context, team_execution_context)
outcome_metadata = {
"task_outcome": "single",
"incomplete_node_ids": [],
"node_statuses": {},
"evidence_gaps": {},
}
attempt_kwargs = dict(kwargs)
attempt_kwargs.update(
@ -171,22 +112,15 @@ class TaskAttemptOrchestrator:
attempt_kwargs["execution_context"] = self._join_context(
base_execution_context,
output_language_instruction,
team_execution_context,
)
if plan.is_team and team_execution_context:
attempt_kwargs["include_tools"] = False
attempt_kwargs["max_tool_iterations"] = 0
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
plan=plan,
team_summaries=team_summaries,
)
result = await runner(message, **attempt_kwargs)
if outcome_metadata["task_outcome"] == "incomplete":
result.output_text = self._apply_incomplete_prefix(result.output_text, incomplete_prefix)
self._append_task_observation(
session_manager,
task.session_id,
@ -210,7 +144,6 @@ class TaskAttemptOrchestrator:
task=task,
attempt_index=attempt_index,
result=result,
team_result=team_result,
)
evidence_text = render_task_evidence(evidence_packet)
evidence_debug = {
@ -256,31 +189,6 @@ class TaskAttemptOrchestrator:
result.validation_result = None
return result
async def _run_team_for_task(
self,
plan: TaskExecutionPlan,
*,
task: TaskRecord,
parent_session_id: str,
provider_bundle_factory: Any,
) -> tuple[TeamRunResult | None, str | None]:
if plan.graph is None:
return None, "team plan did not include an execution graph"
try:
from beaver.services.team_service import TeamService
result = await TeamService(self.create_loop()).run_team(
plan.graph,
parent_task_id=task.task_id,
parent_session_id=parent_session_id,
parent_run_id=None,
provider_bundle_factory=provider_bundle_factory,
allow_candidate_generation=False,
)
return result, None
except Exception as exc:
return None, str(exc)
async def _assemble_task_attempt_skills(
self,
*,
@ -396,7 +304,6 @@ class TaskAttemptOrchestrator:
user_message: str,
attempt_index: int,
plan: TaskExecutionPlan | None = None,
team_summaries: list[str] | None = None,
) -> str:
phase = f"attempt_{attempt_index}"
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
@ -445,8 +352,6 @@ class TaskAttemptOrchestrator:
)
)
sections.append("Execution plan:\n" + "\n".join(plan_lines))
if team_summaries:
sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400])
sections.append(
"Skill selection instruction:\n"
"Prefer reusing previously activated skills when they still match the Task. "
@ -476,140 +381,6 @@ class TaskAttemptOrchestrator:
def _join_context(*parts: str | None) -> str:
return "\n\n".join(part.strip() for part in parts if part and part.strip())
@staticmethod
def _team_summary_for_validation(result: TeamRunResult) -> str:
lines = [
f"success={result.success}",
f"task_id={result.task_id or ''}",
"summary:",
result.summary,
"nodes:",
]
for node in result.node_results:
lines.append(
f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
f"error={node.error or ''} output={node.output_text[:500]}"
)
return "\n".join(lines)
@staticmethod
def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
payloads: list[dict[str, Any]] = []
for item in result.node_results:
payload = item.to_dict()
node = nodes.get(item.node_id)
if node is not None:
payload["selected_skill_names"] = list(node.inherited_pinned_skills)
payload["ephemeral_skill_names"] = [
skill.name for skill in node.inherited_pinned_skill_contexts
]
payload["skill_query"] = node.agent.metadata.get("skill_query")
payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id")
payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name")
payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
payloads.append(payload)
return payloads
@staticmethod
def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]:
if result is None:
return []
return [node.evidence for node in result.node_results if node.evidence is not None]
@staticmethod
def _team_synthesis_outcome(
plan: TaskExecutionPlan,
result: TeamRunResult | None,
*,
prompt_locale: str | None = None,
) -> tuple[str, str, dict[str, Any]]:
if not plan.is_team or plan.graph is None:
metadata = {
"task_outcome": "single",
"incomplete_node_ids": [],
"node_statuses": {},
"evidence_gaps": {},
}
return "Task outcome: single", "", metadata
result_by_node = {
item.node_id: item
for item in (result.node_results if result is not None else [])
}
node_statuses: dict[str, str] = {}
evidence_gaps: dict[str, list[str]] = {}
incomplete_node_ids: list[str] = []
detail_lines: list[str] = []
successful_lines: list[str] = []
for node in plan.graph.nodes:
node_result = result_by_node.get(node.node_id)
status = node_result.completion_status if node_result is not None else "not_run"
node_statuses[node.node_id] = status
gaps = list(node_result.evidence_gaps) if node_result is not None else []
if gaps:
evidence_gaps[node.node_id] = gaps
if node.required_for_completion and status != "succeeded":
incomplete_node_ids.append(node.node_id)
detail_lines.append(
f"- {node.node_id}: status={status}, "
f"finish_reason={node_result.finish_reason if node_result is not None else 'not_run'}, "
f"error={(node_result.error or '') if node_result is not None else 'node did not run'}, "
f"evidence_gaps={gaps}"
)
elif node_result is not None and status == "succeeded":
successful_lines.append(f"- {node.node_id}: {node_result.output_text[:1000]}")
task_outcome = "incomplete" if incomplete_node_ids else "complete"
metadata = {
"task_outcome": task_outcome,
"incomplete_node_ids": incomplete_node_ids,
"node_statuses": node_statuses,
"evidence_gaps": evidence_gaps,
}
context_parts = [
f"Task outcome: {task_outcome}",
"Incomplete node IDs: " + (", ".join(incomplete_node_ids) or "none"),
]
if detail_lines:
context_parts.append("Incomplete required node details:\n" + "\n".join(detail_lines))
if successful_lines:
context_parts.append("Available successful node evidence:\n" + "\n".join(successful_lines))
if task_outcome == "incomplete":
context_parts.append(
"Synthesis requirement: produce a partial report from available evidence and explicitly state "
"that the task is incomplete, partially completed, or missing required evidence."
)
prefix = TaskAttemptOrchestrator._incomplete_prefix(prompt_locale) if incomplete_node_ids else ""
return "\n\n".join(context_parts), prefix, metadata
@staticmethod
def _incomplete_prefix(prompt_locale: str | None) -> str:
locale = normalize_main_agent_prompt_locale(prompt_locale)
if locale == "en":
return "Task incomplete: some required steps failed or lack required evidence. The report below uses available results only.\n\n"
if locale == "zh-Hant":
return "任務未完成:部分必要步驟失敗或缺少必要證據。以下內容僅基於現有結果。\n\n"
return "任务未完成:部分必要步骤失败或缺少必要证据。以下内容仅基于现有结果。\n\n"
@staticmethod
def _apply_incomplete_prefix(output_text: str, prefix: str) -> str:
normalized = output_text.lower()
notices = (
"任务未完成",
"任務未完成",
"部分完成",
"缺少证据",
"缺少證據",
"task incomplete",
"incomplete task",
"partially complete",
"missing evidence",
)
if any(notice in normalized for notice in notices):
return output_text
return prefix + output_text.lstrip()
def _build_task_evidence_packet(
self,
*,
@ -617,7 +388,6 @@ class TaskAttemptOrchestrator:
task: TaskRecord,
attempt_index: int,
result: AgentRunResult,
team_result: TeamRunResult | None,
) -> TaskEvidencePacket:
main_run = EvidenceBuilder(session_manager).build_run_evidence(
result.session_id,
@ -629,67 +399,7 @@ class TaskAttemptOrchestrator:
task_id=task.task_id,
attempt_index=attempt_index,
main_run=main_run,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results) if team_result is not None else [],
team_runs=[],
team_node_results=[],
final_output=result.output_text,
)
@staticmethod
def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
node_lines = [
(
f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
)
for node in result.node_results
]
return "\n\n".join(
item
for item in [
"Task team execution result:",
f"Planner reason: {plan.reason}",
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
f"Team success: {result.success}",
f"Team summary:\n{result.summary}",
"Node results:\n" + "\n\n".join(node_lines),
(
"Final synthesis instruction:\n" + plan.final_synthesis_instruction
if plan.final_synthesis_instruction
else None
),
(
"Use successful team outputs as internal evidence. If one or more nodes failed, "
"do not blindly repeat failed tool calls. Produce a user-visible fallback answer "
"with available evidence and clearly state any missing or uncertain data."
),
]
if item
)
@staticmethod
def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
return "\n\n".join(
[
"Task team execution failed before final synthesis.",
f"Planner reason: {plan.reason}",
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
f"Error: {error}",
(
"Proceed as the main agent. Do not blindly repeat failed tool calls; "
"produce a user-visible fallback answer with available evidence and clearly "
"state any missing or uncertain data."
),
]
)
def _build_team_provider_bundle_factory(self, kwargs: dict[str, Any]) -> Any:
def factory(node: ExecutionNode) -> Any:
node_kwargs = dict(kwargs)
node_kwargs.pop("provider_bundle", None)
if node.agent.model:
node_kwargs["model"] = node.agent.model
if node.agent.provider_name:
node_kwargs["provider_name"] = node.agent.provider_name
return self.make_provider_bundle_for_task(self.loaded, node_kwargs)
return factory

View File

@ -155,7 +155,10 @@ def evaluate_node_evidence(
if not output_text.strip():
_append_unique(gaps, "missing required evidence: output")
else:
_append_unique(gaps, f"unsupported evidence requirement: {requirement}")
# v1 only enforces the coarse machine-readable requirements above.
# Natural-language evidence requirements are preserved for later
# LLM-based validation and must not fail a node deterministically.
continue
return gaps

View File

@ -1,39 +1,27 @@
"""Internal Task execution planner for single-agent vs team execution."""
"""Internal Task execution planner for single-agent task attempts.
Team execution is now started explicitly through local Team Workflow MCP tools.
This planner only records why the normal Task attempt should continue as a
single root-agent run.
"""
from __future__ import annotations
import asyncio
import json
import os
from dataclasses import dataclass, field
from typing import Any, Literal
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.coordinator.models import ExecutionGraph
from beaver.engine.context import SkillContext
from beaver.engine.providers import ProviderBundle
from beaver.tools.registry import ToolRegistry
from .models import TaskRecord
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
from .skill_resolver import SkillResolutionReport
TaskExecutionMode = Literal["single", "team"]
# Temporary name-based denylist until high-risk tool approval is implemented.
# Keep this policy centralized so planner behavior cannot drift by call site.
HIGH_RISK_PLANNER_TOOL_NAMES = frozenset(
{
"delete_file",
"execute_command",
"external_send",
"send_email",
"terminal",
"write_file",
}
)
def _agent_team_enabled() -> bool:
return os.getenv("BEAVER_AGENT_TEAM_ENABLED", "1").strip().lower() not in {"0", "false", "no", "off"}
@ -96,37 +84,7 @@ class TaskExecutionPlan:
class TaskExecutionPlanner:
"""Plan whether a Task attempt should run through a team first."""
_MAX_NODES = 6
_MAX_DEPTH = 4
_SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
_ALLOWED_NODE_FIELDS = {
"node_id",
"task",
"use_skill",
"skill_query",
"depends_on",
"input_contract",
"output_contract",
"requested_tools",
"required_evidence",
"evidence_contract",
"validation_rules",
"required_for_completion",
"block_downstream_on_partial",
"max_tool_iterations",
"constraints",
}
def __init__(
self,
*,
task_skill_resolver: TaskSkillResolver | None = None,
tool_registry: ToolRegistry | None = None,
) -> None:
self.task_skill_resolver = task_skill_resolver
self.tool_registry = tool_registry
"""Return the current Task execution mode for the root AgentLoop."""
async def plan(
self,
@ -144,122 +102,7 @@ class TaskExecutionPlanner:
return TaskExecutionPlan.single("planner_disabled_by_environment")
if not self._needs_team_planning(task=task, user_message=user_message):
return TaskExecutionPlan.single("planner_skipped_simple_task")
provider = None
model = None
if provider_bundle is not None:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
if provider is None:
return TaskExecutionPlan.single("planner_provider_unavailable")
selected_template, base_adaptation = self._select_team_template(activated_skills or [])
try:
response = await asyncio.wait_for(
provider.chat(
messages=[
{
"role": "system",
"content": (
"You choose whether an internal Beaver Task attempt should run as a single "
"main-agent pass or use a small sub-agent team first. Return only compact JSON."
),
},
{
"role": "user",
"content": self._prompt(
task=task,
user_message=user_message,
attempt_index=attempt_index,
skill_summaries=skill_summaries or [],
tool_hints=tool_hints or [],
activated_skills=activated_skills or [],
selected_template=selected_template,
),
},
],
tools=None,
model=model,
max_tokens=4096,
temperature=0.0,
),
timeout=timeout_seconds,
)
try:
plan = self._from_json_or_raise(response.content or "")
except Exception as first_error:
repair_response = await asyncio.wait_for(
provider.chat(
messages=[
{
"role": "system",
"content": "Repair invalid Beaver task planner JSON. Return only one compact JSON object.",
},
{
"role": "user",
"content": (
"Repair the invalid planner JSON using the task-only schema from the original "
f"request. Validation error: {first_error}\nInvalid output:\n{response.content or ''}"
),
},
],
tools=None,
model=model,
max_tokens=4096,
temperature=0.0,
),
timeout=timeout_seconds,
)
try:
plan = self._from_json_or_raise(repair_response.content or "")
except Exception as repair_error:
return TaskExecutionPlan.single(
"planner_fallback_single",
fallback_error=f"initial validation: {first_error}; repair validation: {repair_error}",
planner_adaptation=base_adaptation,
)
self._merge_adaptation(plan, base_adaptation)
return await self._resolve_plan(
plan,
task=task,
user_message=user_message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
except Exception as exc:
detail = str(exc)
error = f"{type(exc).__name__}: {detail}" if detail else type(exc).__name__
return TaskExecutionPlan.single("planner_failed", fallback_error=error)
async def _resolve_plan(
self,
plan: TaskExecutionPlan,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
provider_bundle: ProviderBundle | None,
) -> TaskExecutionPlan:
if not plan.is_team or self.task_skill_resolver is None:
return plan
if provider_bundle is None:
return TaskExecutionPlan.single("planner_fallback_single", fallback_error="task_skill_resolver_provider_unavailable")
try:
assert plan.graph is not None
graph, reports = await self.task_skill_resolver.resolve_graph(
plan.graph,
task=task,
user_message=user_message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
graph.validate()
plan.graph = graph
plan.skill_resolution_report = reports
self._merge_skill_resolution_adaptation(plan, reports)
return plan
except Exception as exc:
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")
return TaskExecutionPlan.single("planner_team_replaced_by_workflow_tools")
@staticmethod
def _needs_team_planning(*, task: TaskRecord, user_message: str) -> bool:
@ -306,307 +149,3 @@ class TaskExecutionPlanner:
"端到端",
)
return any(marker in text for marker in complex_markers)
def from_json(self, text: str) -> TaskExecutionPlan:
try:
return self._from_json_or_raise(text)
except Exception as exc:
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))
def _from_json_or_raise(self, text: str) -> TaskExecutionPlan:
payload = self._parse_json_object(text)
mode = str(payload.get("mode") or "single").strip().lower()
reason = str(payload.get("reason") or "")
adaptation = self._adaptation_from_payload(payload)
if mode != "team":
return TaskExecutionPlan.single(
reason or "planner_selected_single",
planner_adaptation=adaptation,
)
graph = self._graph_from_payload(payload, adaptation=adaptation)
graph.validate(max_depth=self._MAX_DEPTH)
return TaskExecutionPlan(
mode="team",
reason=reason or "planner_selected_team",
graph=graph,
final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
planner_adaptation=adaptation,
)
def _graph_from_payload(
self,
payload: dict[str, Any],
*,
adaptation: dict[str, Any],
) -> ExecutionGraph:
strategy = str(payload.get("strategy") or "sequence").strip().lower()
if strategy not in self._SUPPORTED_STRATEGIES:
raise ValueError(f"Unsupported team strategy: {strategy}")
raw_nodes = payload.get("nodes")
if not isinstance(raw_nodes, list) or not raw_nodes:
raise ValueError("Team plan requires at least one node")
if len(raw_nodes) > self._MAX_NODES:
raise ValueError(f"Team plan exceeds max node count {self._MAX_NODES}")
nodes: list[ExecutionNode] = []
for index, item in enumerate(raw_nodes, start=1):
if not isinstance(item, dict):
raise ValueError("Each team node must be an object")
unsupported = sorted(set(item) - self._ALLOWED_NODE_FIELDS)
if unsupported:
raise ValueError(f"Unsupported team node field(s): {', '.join(unsupported)}")
node_id = str(item.get("node_id") or f"node_{index}").strip()
task = str(item.get("task") or "").strip()
if not node_id or not task:
raise ValueError("Each team node requires node_id and task")
allowed_tool_names = self._resolve_requested_tools(
item.get("requested_tools"),
warnings=adaptation["warnings"],
)
use_skill = _optional_str(item.get("use_skill"))
skill_query = _optional_str(item.get("skill_query")) or task
if use_skill is not None or "skill_query" in item:
adaptation.setdefault("node_skill_bindings", []).append(
{
"node_id": node_id,
"use_skill": use_skill,
"skill_query": skill_query,
}
)
nodes.append(
ExecutionNode(
node_id=node_id,
task=task,
agent=AgentDescriptor(
name=node_id,
role="",
system_prompt="",
metadata={
"use_skill": use_skill,
"skill_query": skill_query,
"required_capabilities": [],
"requested_tags": [],
"sub_agent_kind": "generic_skill_worker",
},
),
depends_on=[str(dep) for dep in item.get("depends_on") or []],
constraints=[str(value) for value in item.get("constraints") or []],
input_contract=_dict_value(item.get("input_contract")),
output_contract=_dict_value(item.get("output_contract")),
allowed_tool_names=allowed_tool_names,
required_evidence=_string_list(item.get("required_evidence")),
evidence_contract=_dict_value(item.get("evidence_contract")),
validation_rules=_string_list(item.get("validation_rules")),
required_for_completion=bool(item.get("required_for_completion", True)),
block_downstream_on_partial=bool(item.get("block_downstream_on_partial", False)),
max_tool_iterations=_optional_int(item.get("max_tool_iterations")),
)
)
return ExecutionGraph(strategy=strategy, nodes=nodes) # type: ignore[arg-type]
def _resolve_requested_tools(self, value: Any, *, warnings: list[str]) -> list[str] | None:
if value is None:
return None
result: list[str] = []
for name in _string_list(value):
if name.lower() in HIGH_RISK_PLANNER_TOOL_NAMES:
_append_unique(warnings, f"requires_high_risk_review: {name}")
continue
if self.tool_registry is None or self.tool_registry.get(name) is None:
_append_unique(warnings, f"unknown tool removed: {name}")
continue
result.append(name)
return result
@staticmethod
def _adaptation_from_payload(payload: dict[str, Any]) -> dict[str, Any]:
raw = payload.get("adaptation")
adaptation = dict(raw) if isinstance(raw, dict) else {}
adaptation["warnings"] = _string_list(adaptation.get("warnings"))
return adaptation
@staticmethod
def _select_team_template(
activated_skills: list[SkillContext],
) -> tuple[SkillContext | None, dict[str, Any]]:
candidates = [
skill
for skill in activated_skills
if isinstance(skill.team_template, dict) and isinstance(skill.team_template.get("nodes"), list)
]
selected = candidates[0] if candidates else None
warnings: list[str] = []
for skill in activated_skills:
for warning in skill.team_template_warnings:
_append_unique(warnings, f"{skill.name}: {warning}")
return selected, {
"template_used": False,
"selected_template": selected.name if selected else None,
"selection_reason": (
"first activated skill with a valid team template"
if selected
else "no activated skill has a valid team template"
),
"ignored_templates": [skill.name for skill in candidates[1:]],
"warnings": warnings,
}
@staticmethod
def _merge_adaptation(plan: TaskExecutionPlan, base: dict[str, Any]) -> None:
payload = dict(plan.planner_adaptation)
warnings: list[str] = []
for warning in [*base.get("warnings", []), *payload.get("warnings", [])]:
_append_unique(warnings, str(warning))
merged = {
"template_used": bool(payload.get("template_used", False)),
"selected_template": base.get("selected_template"),
"selection_reason": base.get("selection_reason"),
"ignored_templates": list(base.get("ignored_templates", [])),
"warnings": warnings,
}
if isinstance(payload.get("node_skill_bindings"), list):
merged["node_skill_bindings"] = [dict(item) for item in payload["node_skill_bindings"] if isinstance(item, dict)]
plan.planner_adaptation = merged
@staticmethod
def _merge_skill_resolution_adaptation(
plan: TaskExecutionPlan,
reports: list[SkillResolutionReport],
) -> None:
warnings = plan.planner_adaptation.setdefault("warnings", [])
bindings = plan.planner_adaptation.get("node_skill_bindings")
binding_by_node = {
str(item.get("node_id")): item
for item in bindings or []
if isinstance(item, dict)
}
for report in reports:
for warning in report.warnings:
_append_unique(warnings, warning)
binding = binding_by_node.get(report.node_id)
if binding is not None and report.requested_skill_name and not report.exact_binding_used:
binding["fallback_reason"] = f"use_skill unresolved; {report.reason}"
@staticmethod
def _prompt(
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
skill_summaries: list[str] | None = None,
tool_hints: list[str] | None = None,
activated_skills: list[SkillContext] | None = None,
selected_template: SkillContext | None = None,
) -> str:
history_note = ""
if task.feedback:
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
skill_note = ""
if skill_summaries:
skill_note = "\nActivated skill summaries:\n" + "\n".join(f"- {item}" for item in skill_summaries)
guidance_note = ""
if activated_skills:
guidance_note = "\nActivated Skill guidance:\n" + "\n".join(
f"[{skill.name}]\n{skill.content}" for skill in activated_skills
)
template_note = ""
if selected_template is not None:
template_note = "\nPrimary Skill team template:\n" + json.dumps(
{
"skill_name": selected_template.name,
"skill_version": selected_template.version,
"template": selected_template.team_template,
},
ensure_ascii=False,
indent=2,
)
tool_note = ""
if tool_hints:
tool_note = "\nActivated skill tool hints:\n" + "\n".join(f"- {item}" for item in tool_hints)
return (
"Decide execution mode for this internal Task attempt.\n"
"Use mode=team only when independent research, review, implementation slices, or staged checks "
"would materially improve the result. Otherwise use mode=single.\n\n"
"JSON schema:\n"
"{\n"
' "mode": "single" | "team",\n'
' "reason": "short reason",\n'
' "strategy": "sequence" | "parallel" | "dag",\n'
' "nodes": [{"node_id": "collect", "task": "...", "use_skill": "optional exact skill", '
'"skill_query": "optional dynamic skill query", "depends_on": [], '
'"input_contract": {}, "output_contract": {}, "requested_tools": [], '
'"required_evidence": [], "evidence_contract": {}, "validation_rules": [], '
'"required_for_completion": true, "block_downstream_on_partial": false, '
'"max_tool_iterations": 3, "constraints": []}],\n'
' "adaptation": {"template_used": true, "warnings": []},\n'
' "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
"}\n\n"
"Node definitions are task-only. Never output agent or role fields. Use at most one primary "
"Skill template; treat all other activated Skills as guidance.\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Attempt index: {attempt_index}\n"
f"{skill_note}"
f"{guidance_note}"
f"{template_note}"
f"{tool_note}"
f"{history_note}"
)
@staticmethod
def _parse_json_object(text: str) -> dict[str, Any]:
cleaned = text.strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
payload = json.loads(cleaned)
if not isinstance(payload, dict):
raise ValueError("planner response must be a JSON object")
return payload
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
text = str(value).strip()
return text or None
def _optional_int(value: Any) -> int | None:
if value in (None, ""):
return None
if isinstance(value, bool):
raise ValueError("max_tool_iterations must be an integer")
result = int(value)
if result < 0:
raise ValueError("max_tool_iterations must be non-negative")
return result
def _dict_value(value: Any) -> dict[str, Any]:
return dict(value) if isinstance(value, dict) else {}
def _append_unique(values: list[str], value: str) -> None:
if value and value not in values:
values.append(value)
def _string_list(value: Any) -> list[str]:
if not isinstance(value, list):
if isinstance(value, str):
value = [item.strip() for item in value.split(",")]
else:
return []
result: list[str] = []
for item in value:
text = str(item).strip()
if text and text not in result:
result.append(text)
return result

View File

@ -0,0 +1,2 @@
"""Local team workflow graph builders."""

View File

@ -0,0 +1,70 @@
"""AgentRearrange graph builder using arrow/comma flow syntax."""
from __future__ import annotations
from typing import Any, Iterable
from beaver.coordinator.models import ExecutionGraph
from .base import (
WorkflowAgentSpec,
agent_name_set,
build_graph_from_dependencies,
edges_to_dependencies,
parse_agents,
validate_no_disconnected_agents,
)
WORKFLOW_NAME = "AgentRearrange"
def build_graph(
*,
task: str,
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
flow: str,
) -> ExecutionGraph:
del task
parsed = parse_agents(agents)
edges = parse_flow(flow, known_agents=agent_name_set(parsed))
dependencies = edges_to_dependencies(agents=parsed, edges=edges)
validate_no_disconnected_agents(agents=parsed, dependencies=dependencies)
return build_graph_from_dependencies(
workflow_name=WORKFLOW_NAME,
strategy="dag",
agents=parsed,
dependencies=dependencies,
)
def parse_flow(flow: str, *, known_agents: set[str]) -> list[tuple[str, str]]:
stages = _parse_stages(flow)
edges: list[tuple[str, str]] = []
for stage in stages:
for name in stage:
if name not in known_agents:
raise ValueError(f"workflow flow references unknown agent: {name}")
for left, right in zip(stages, stages[1:], strict=False):
for source in left:
for target in right:
edge = (source, target)
if edge not in edges:
edges.append(edge)
return edges
def _parse_stages(flow: str) -> list[list[str]]:
raw_flow = str(flow or "").strip()
if not raw_flow:
raise ValueError("workflow flow is required")
stages: list[list[str]] = []
for raw_stage in raw_flow.split("->"):
names = [name.strip() for name in raw_stage.split(",") if name.strip()]
if not names:
raise ValueError("workflow flow contains an empty stage")
if len(names) != len(set(names)):
raise ValueError("workflow flow contains duplicate agent names in a stage")
stages.append(names)
if len(stages) < 2:
raise ValueError("workflow flow must contain at least two stages")
return stages

View File

@ -0,0 +1,273 @@
"""Shared builders for local team workflow graph construction."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Iterable, Literal
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
GraphStrategy = Literal["sequence", "parallel", "dag"]
@dataclass(slots=True)
class WorkflowAgentSpec:
name: str
instruction: str
use_skill: str | None = None
skill_query: str | None = None
allowed_tool_names: list[str] | None = None
required_evidence: list[str] = field(default_factory=list)
evidence_contract: dict[str, Any] = field(default_factory=dict)
validation_rules: list[str] = field(default_factory=list)
required_for_completion: bool = True
block_downstream_on_partial: bool = False
max_tool_iterations: int | None = None
constraints: list[str] = field(default_factory=list)
expected_output: str | None = None
input_contract: dict[str, Any] = field(default_factory=dict)
output_contract: dict[str, Any] = field(default_factory=dict)
@dataclass(slots=True)
class WorkflowBuildResult:
graph: ExecutionGraph
workflow_name: str
def parse_agents(raw_agents: Iterable[WorkflowAgentSpec | dict[str, Any]]) -> list[WorkflowAgentSpec]:
agents: list[WorkflowAgentSpec] = []
for index, raw in enumerate(raw_agents, start=1):
if isinstance(raw, WorkflowAgentSpec):
spec = raw
elif isinstance(raw, dict):
spec = _agent_from_dict(raw, index=index)
else:
raise ValueError("workflow agents must be objects")
agents.append(spec)
validate_agent_names(agents)
return agents
def validate_agent_names(agents: list[WorkflowAgentSpec]) -> None:
if not agents:
raise ValueError("workflow requires at least one agent")
seen: set[str] = set()
for agent in agents:
if not agent.name:
raise ValueError("workflow agent name is required")
if not agent.instruction:
raise ValueError(f"workflow agent {agent.name!r} requires instruction")
if agent.name in seen:
raise ValueError(f"workflow agent names must be unique: {agent.name}")
seen.add(agent.name)
def agent_name_set(agents: list[WorkflowAgentSpec]) -> set[str]:
return {agent.name for agent in agents}
def build_graph_from_dependencies(
*,
workflow_name: str,
strategy: GraphStrategy,
agents: list[WorkflowAgentSpec],
dependencies: dict[str, list[str]],
) -> ExecutionGraph:
nodes = [
build_node(
workflow_name=workflow_name,
agent=agent,
depends_on=dependencies.get(agent.name, []),
)
for agent in agents
]
graph = ExecutionGraph(strategy=strategy, nodes=nodes)
graph.validate()
return graph
def build_node(
*,
workflow_name: str,
agent: WorkflowAgentSpec,
depends_on: list[str],
) -> ExecutionNode:
metadata = {
"sub_agent_kind": "generic_skill_worker",
"workflow_tool": workflow_name,
"workflow_agent_name": agent.name,
}
if agent.use_skill:
metadata["use_skill"] = agent.use_skill
if agent.skill_query:
metadata["skill_query"] = agent.skill_query
return ExecutionNode(
node_id=agent.name,
task=agent.instruction,
agent=AgentDescriptor(
name=agent.name,
role="",
system_prompt="",
metadata=metadata,
),
depends_on=list(depends_on),
constraints=list(agent.constraints),
expected_output=agent.expected_output,
input_contract=dict(agent.input_contract),
output_contract=dict(agent.output_contract),
allowed_tool_names=(
None if agent.allowed_tool_names is None else list(agent.allowed_tool_names)
),
required_evidence=list(agent.required_evidence),
evidence_contract=dict(agent.evidence_contract),
validation_rules=list(agent.validation_rules),
required_for_completion=agent.required_for_completion,
block_downstream_on_partial=agent.block_downstream_on_partial,
max_tool_iterations=agent.max_tool_iterations,
)
def edges_to_dependencies(
*,
agents: list[WorkflowAgentSpec],
edges: Iterable[tuple[str, str] | list[str]],
) -> dict[str, list[str]]:
known = agent_name_set(agents)
dependencies = {agent.name: [] for agent in agents}
for raw_edge in edges:
source, target = _parse_edge(raw_edge)
if source not in known:
raise ValueError(f"workflow edge references unknown agent: {source}")
if target not in known:
raise ValueError(f"workflow edge references unknown agent: {target}")
if source == target:
raise ValueError(f"workflow edge creates a self-cycle: {source}")
if source not in dependencies[target]:
dependencies[target].append(source)
return dependencies
def validate_output_agent(
*,
agents: list[WorkflowAgentSpec],
dependencies: dict[str, list[str]],
output_agent: str,
allow_disconnected: bool = False,
) -> None:
known = agent_name_set(agents)
if output_agent not in known:
raise ValueError(f"workflow output_agent references unknown agent: {output_agent}")
upstream = _upstream_nodes(output_agent, dependencies)
if not upstream:
raise ValueError(f"workflow output_agent {output_agent!r} must be reachable from upstream agents")
if allow_disconnected:
return
connected = set(upstream)
connected.add(output_agent)
disconnected = sorted(known - connected)
if disconnected:
raise ValueError(f"workflow has disconnected agent(s): {', '.join(disconnected)}")
def validate_no_disconnected_agents(
*,
agents: list[WorkflowAgentSpec],
dependencies: dict[str, list[str]],
) -> None:
known = agent_name_set(agents)
connected: set[str] = set()
for target, sources in dependencies.items():
if sources:
connected.add(target)
connected.update(sources)
disconnected = sorted(known - connected)
if disconnected:
raise ValueError(f"workflow has disconnected agent(s): {', '.join(disconnected)}")
def _agent_from_dict(raw: dict[str, Any], *, index: int) -> WorkflowAgentSpec:
name = _required_str(raw.get("name"), f"agents[{index}].name")
instruction = _required_str(raw.get("instruction"), f"agents[{index}].instruction")
return WorkflowAgentSpec(
name=name,
instruction=instruction,
use_skill=_optional_str(raw.get("use_skill")),
skill_query=_optional_str(raw.get("skill_query")),
allowed_tool_names=_optional_string_list(raw.get("allowed_tool_names")),
required_evidence=_string_list(raw.get("required_evidence")),
evidence_contract=_dict(raw.get("evidence_contract")),
validation_rules=_string_list(raw.get("validation_rules")),
required_for_completion=bool(raw.get("required_for_completion", True)),
block_downstream_on_partial=bool(raw.get("block_downstream_on_partial", False)),
max_tool_iterations=_optional_int(raw.get("max_tool_iterations")),
constraints=_string_list(raw.get("constraints")),
expected_output=_optional_str(raw.get("expected_output")),
input_contract=_dict(raw.get("input_contract")),
output_contract=_dict(raw.get("output_contract")),
)
def _parse_edge(raw_edge: tuple[str, str] | list[str]) -> tuple[str, str]:
if not isinstance(raw_edge, (list, tuple)) or len(raw_edge) != 2:
raise ValueError("workflow edges must be [source, target] pairs")
source = _required_str(raw_edge[0], "edge source")
target = _required_str(raw_edge[1], "edge target")
return source, target
def _upstream_nodes(node_id: str, dependencies: dict[str, list[str]]) -> set[str]:
result: set[str] = set()
def visit(current: str) -> None:
for dependency in dependencies.get(current, []):
if dependency in result:
continue
result.add(dependency)
visit(dependency)
visit(node_id)
return result
def _required_str(value: Any, label: str) -> str:
text = str(value or "").strip()
if not text:
raise ValueError(f"{label} is required")
return text
def _optional_str(value: Any) -> str | None:
text = str(value or "").strip()
return text or None
def _string_list(value: Any) -> list[str]:
if value is None:
return []
if not isinstance(value, list):
raise ValueError("expected a list of strings")
return [str(item).strip() for item in value if str(item).strip()]
def _optional_string_list(value: Any) -> list[str] | None:
if value is None:
return None
return _string_list(value)
def _dict(value: Any) -> dict[str, Any]:
return dict(value) if isinstance(value, dict) else {}
def _optional_int(value: Any) -> int | None:
if value is None:
return None
try:
return int(value)
except (TypeError, ValueError) as exc:
raise ValueError("max_tool_iterations must be an integer") from exc

View File

@ -0,0 +1,26 @@
"""ConcurrentWorkflow graph builder."""
from __future__ import annotations
from typing import Any, Iterable
from beaver.coordinator.models import ExecutionGraph
from .base import WorkflowAgentSpec, build_graph_from_dependencies, parse_agents
WORKFLOW_NAME = "ConcurrentWorkflow"
def build_graph(
*,
task: str,
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
) -> ExecutionGraph:
del task
parsed = parse_agents(agents)
return build_graph_from_dependencies(
workflow_name=WORKFLOW_NAME,
strategy="parallel",
agents=parsed,
dependencies={agent.name: [] for agent in parsed},
)

View File

@ -0,0 +1,174 @@
"""Runtime bridge for local team workflow MCP tools."""
from __future__ import annotations
import json
from typing import Any, Callable
from beaver.coordinator.models import ExecutionGraph, TeamRunResult
from beaver.tools.base import ToolContext, ToolResult
from . import agent_rearrange, concurrent, graph, mixture_of_agents, sequential
GraphBuilder = Callable[..., ExecutionGraph]
class TeamWorkflowExecutor:
"""Execute workflow MCP calls inside the current Beaver runtime."""
_BUILDERS: dict[str, GraphBuilder] = {
"SequentialWorkflow": sequential.build_graph,
"ConcurrentWorkflow": concurrent.build_graph,
"MixtureOfAgents": mixture_of_agents.build_graph,
"AgentRearrange": agent_rearrange.build_graph,
"GraphWorkflow": graph.build_graph,
}
async def execute(
self,
workflow_name: str,
arguments: dict[str, Any],
context: ToolContext,
*,
tool_name: str | None = None,
) -> ToolResult:
exposed_name = tool_name or workflow_name
try:
if str(context.metadata.get("source") or "").startswith("team:"):
raise ValueError("nested_team_workflow_not_allowed")
builder = self._BUILDERS.get(workflow_name)
if builder is None:
raise ValueError(f"unknown team workflow tool: {workflow_name}")
graph = builder(**dict(arguments or {}))
parent_task_id = _task_id(context)
parent_session_id = _session_id(context)
result = await self._run_team(
context=context,
graph=graph,
parent_task_id=parent_task_id,
parent_session_id=parent_session_id,
)
payload = _success_payload(
workflow_name=workflow_name,
graph=graph,
result=result,
)
return ToolResult(
success=True,
content=json.dumps(payload, ensure_ascii=False),
tool_name=exposed_name,
raw_output=payload,
)
except Exception as exc:
payload = {
"success": False,
"workflow": workflow_name,
"error": str(exc),
}
return ToolResult(
success=False,
content=json.dumps(payload, ensure_ascii=False),
tool_name=exposed_name,
error=str(exc),
raw_output=payload,
)
async def _run_team(
self,
*,
context: ToolContext,
graph: ExecutionGraph,
parent_task_id: str,
parent_session_id: str,
) -> TeamRunResult:
runner = context.services.get("agent_team_runner")
parent_run_id = _run_id(context)
if runner is not None:
return await runner(
graph,
parent_task_id=parent_task_id,
parent_session_id=parent_session_id,
parent_run_id=parent_run_id,
)
agent_loop = context.services.get("agent_loop")
if agent_loop is None:
raise ValueError("team workflow execution requires agent_loop or agent_team_runner")
provider_bundle = context.services.get("provider_bundle")
def provider_bundle_factory(_node: Any) -> Any:
return provider_bundle
from beaver.engine import AgentLoop
from beaver.services.team_service import TeamService
loaded = context.services.get("loaded")
team_loop = AgentLoop(profile=agent_loop.profile, loader=agent_loop.loader)
team_loop.loaded = loaded
return await TeamService(team_loop).run_team(
graph,
parent_task_id=parent_task_id,
parent_session_id=parent_session_id,
parent_run_id=parent_run_id,
provider_bundle_factory=provider_bundle_factory if provider_bundle is not None else None,
allow_candidate_generation=False,
)
def _task_id(context: ToolContext) -> str:
value = str(context.services.get("task_id") or context.metadata.get("task_id") or "").strip()
if not value:
raise ValueError("team workflow execution requires task_id")
return value
def _session_id(context: ToolContext) -> str:
value = str(context.session_id or context.services.get("session_id") or "").strip()
if not value:
raise ValueError("team workflow execution requires session_id")
return value
def _run_id(context: ToolContext) -> str | None:
return str(context.services.get("run_id") or context.metadata.get("run_id") or "").strip() or None
def _success_payload(
*,
workflow_name: str,
graph: ExecutionGraph,
result: TeamRunResult,
) -> dict[str, Any]:
return {
"success": result.success,
"workflow": workflow_name,
"summary": result.summary,
"run_ids": list(result.run_ids),
"session_ids": list(result.session_ids),
"node_results": [item.to_dict() for item in result.node_results],
"graph": _graph_to_dict(graph),
}
def _graph_to_dict(graph: ExecutionGraph) -> dict[str, Any]:
return {
"strategy": graph.strategy,
"nodes": [
{
"node_id": node.node_id,
"task": node.task,
"depends_on": list(node.depends_on),
"allowed_tool_names": (
None if node.allowed_tool_names is None else list(node.allowed_tool_names)
),
"required_evidence": list(node.required_evidence),
"evidence_contract": dict(node.evidence_contract),
"validation_rules": list(node.validation_rules),
"required_for_completion": node.required_for_completion,
"block_downstream_on_partial": node.block_downstream_on_partial,
"max_tool_iterations": node.max_tool_iterations,
"metadata": dict(node.agent.metadata),
}
for node in graph.nodes
],
}

View File

@ -0,0 +1,45 @@
"""GraphWorkflow explicit DAG builder."""
from __future__ import annotations
from typing import Any, Iterable
from beaver.coordinator.models import ExecutionGraph
from .base import (
WorkflowAgentSpec,
build_graph_from_dependencies,
edges_to_dependencies,
parse_agents,
validate_output_agent,
)
WORKFLOW_NAME = "GraphWorkflow"
def build_graph(
*,
task: str,
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
edges: Iterable[tuple[str, str] | list[str]],
output_agent: str,
allow_disconnected: bool = False,
) -> ExecutionGraph:
del task
parsed = parse_agents(agents)
edge_list = list(edges or [])
if not edge_list:
raise ValueError("GraphWorkflow requires edges")
dependencies = edges_to_dependencies(agents=parsed, edges=edge_list)
validate_output_agent(
agents=parsed,
dependencies=dependencies,
output_agent=str(output_agent or "").strip(),
allow_disconnected=allow_disconnected,
)
return build_graph_from_dependencies(
workflow_name=WORKFLOW_NAME,
strategy="dag",
agents=parsed,
dependencies=dependencies,
)

View File

@ -0,0 +1,261 @@
"""MCP schema tools for local team workflow graph builders."""
from __future__ import annotations
import json
from typing import Any, Callable
from beaver.coordinator.models import ExecutionGraph
from beaver.tools.base import BaseTool, ToolContext, ToolResult, ToolSpec
from . import agent_rearrange, concurrent, graph, mixture_of_agents, sequential
GraphBuilder = Callable[..., ExecutionGraph]
def create_team_workflow_tools() -> list[BaseTool]:
return [
TeamWorkflowSchemaTool(
name="SequentialWorkflow",
description=(
"Build a sequential Beaver team workflow graph. Use this for staged work "
"where each agent depends on the previous agent's output."
),
input_schema=_sequential_schema(),
builder=sequential.build_graph,
),
TeamWorkflowSchemaTool(
name="ConcurrentWorkflow",
description=(
"Build a concurrent Beaver team workflow graph. Use this only when agents "
"can work independently on the same task."
),
input_schema=_concurrent_schema(),
builder=concurrent.build_graph,
),
TeamWorkflowSchemaTool(
name="MixtureOfAgents",
description=(
"Build a mixture-of-agents Beaver team workflow graph where independent "
"expert agents feed one aggregator agent."
),
input_schema=_mixture_schema(),
builder=mixture_of_agents.build_graph,
),
TeamWorkflowSchemaTool(
name="AgentRearrange",
description=(
"Build a Beaver team workflow graph from strict flow syntax. Use '->' for "
"stage order and ',' for agents in the same parallel stage."
),
input_schema=_agent_rearrange_schema(),
builder=agent_rearrange.build_graph,
),
TeamWorkflowSchemaTool(
name="GraphWorkflow",
description=(
"Build an explicit Beaver DAG workflow graph. Use this advanced tool only "
"when the dependency edges must be specified directly."
),
input_schema=_graph_schema(),
builder=graph.build_graph,
),
]
class TeamWorkflowSchemaTool(BaseTool):
def __init__(
self,
*,
name: str,
description: str,
input_schema: dict[str, Any],
builder: GraphBuilder,
) -> None:
self._spec = ToolSpec(
name=name,
description=description,
input_schema=input_schema,
toolset="team_workflow",
always_available=False,
metadata={"category": "team_workflow"},
)
self._builder = builder
@property
def spec(self) -> ToolSpec:
return self._spec
async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult:
del context
try:
graph = self._builder(**dict(arguments or {}))
payload = {
"success": True,
"workflow": self.spec.name,
"graph": _graph_to_dict(graph),
}
return ToolResult(
success=True,
content=json.dumps(payload, ensure_ascii=False),
tool_name=self.spec.name,
raw_output=payload,
)
except Exception as exc:
payload = {"success": False, "workflow": self.spec.name, "error": str(exc)}
return ToolResult(
success=False,
content=json.dumps(payload, ensure_ascii=False),
tool_name=self.spec.name,
error=str(exc),
raw_output=payload,
)
def _graph_to_dict(graph: ExecutionGraph) -> dict[str, Any]:
return {
"strategy": graph.strategy,
"nodes": [
{
"node_id": node.node_id,
"task": node.task,
"depends_on": list(node.depends_on),
"allowed_tool_names": (
None if node.allowed_tool_names is None else list(node.allowed_tool_names)
),
"required_evidence": list(node.required_evidence),
"evidence_contract": dict(node.evidence_contract),
"validation_rules": list(node.validation_rules),
"required_for_completion": node.required_for_completion,
"block_downstream_on_partial": node.block_downstream_on_partial,
"max_tool_iterations": node.max_tool_iterations,
"metadata": dict(node.agent.metadata),
}
for node in graph.nodes
],
}
def _sequential_schema() -> dict[str, Any]:
return {
"type": "object",
"properties": {
"task": _task_schema(),
"agents": _agents_schema(),
},
"required": ["task", "agents"],
"additionalProperties": False,
}
def _concurrent_schema() -> dict[str, Any]:
return {
"type": "object",
"properties": {
"task": _task_schema(),
"agents": _agents_schema(),
},
"required": ["task", "agents"],
"additionalProperties": False,
}
def _mixture_schema() -> dict[str, Any]:
return {
"type": "object",
"properties": {
"task": _task_schema(),
"agents": _agents_schema(description="Expert agents that run independently before aggregation."),
"aggregator": _agent_schema(description="Aggregator agent that synthesizes expert outputs."),
},
"required": ["task", "agents", "aggregator"],
"additionalProperties": False,
}
def _agent_rearrange_schema() -> dict[str, Any]:
return {
"type": "object",
"properties": {
"task": _task_schema(),
"agents": _agents_schema(),
"flow": {
"type": "string",
"description": "Strict flow syntax, e.g. 'collector -> tactics, players -> synthesizer'.",
},
},
"required": ["task", "agents", "flow"],
"additionalProperties": False,
}
def _graph_schema() -> dict[str, Any]:
return {
"type": "object",
"properties": {
"task": _task_schema(),
"agents": _agents_schema(),
"edges": {
"type": "array",
"description": "Directed dependency edges as [source_agent, target_agent] pairs.",
"items": {
"type": "array",
"minItems": 2,
"maxItems": 2,
"items": {"type": "string"},
},
},
"output_agent": {
"type": "string",
"description": "Final output/synthesis agent. Must be reachable from upstream agents.",
},
"allow_disconnected": {
"type": "boolean",
"description": "Allow agents that are not connected to output_agent. Defaults to false.",
},
},
"required": ["task", "agents", "edges", "output_agent"],
"additionalProperties": False,
}
def _task_schema() -> dict[str, Any]:
return {
"type": "string",
"description": "Overall user task this workflow supports.",
}
def _agents_schema(*, description: str = "Workflow agents in the order or set used by this workflow.") -> dict[str, Any]:
return {
"type": "array",
"description": description,
"items": _agent_schema(),
"minItems": 1,
}
def _agent_schema(*, description: str = "One workflow agent slot.") -> dict[str, Any]:
return {
"type": "object",
"description": description,
"properties": {
"name": {"type": "string"},
"instruction": {"type": "string"},
"use_skill": {"type": "string"},
"skill_query": {"type": "string"},
"allowed_tool_names": {"type": "array", "items": {"type": "string"}},
"required_evidence": {"type": "array", "items": {"type": "string"}},
"evidence_contract": {"type": "object"},
"validation_rules": {"type": "array", "items": {"type": "string"}},
"required_for_completion": {"type": "boolean"},
"block_downstream_on_partial": {"type": "boolean"},
"max_tool_iterations": {"type": "integer"},
"constraints": {"type": "array", "items": {"type": "string"}},
"expected_output": {"type": "string"},
"input_contract": {"type": "object"},
"output_contract": {"type": "object"},
},
"required": ["name", "instruction"],
"additionalProperties": False,
}

View File

@ -0,0 +1,37 @@
"""MixtureOfAgents graph builder."""
from __future__ import annotations
from typing import Any, Iterable
from beaver.coordinator.models import ExecutionGraph
from .base import (
WorkflowAgentSpec,
build_graph_from_dependencies,
parse_agents,
validate_agent_names,
)
WORKFLOW_NAME = "MixtureOfAgents"
def build_graph(
*,
task: str,
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
aggregator: WorkflowAgentSpec | dict[str, Any],
) -> ExecutionGraph:
del task
experts = parse_agents(agents)
parsed_aggregator = parse_agents([aggregator])[0]
all_agents = [*experts, parsed_aggregator]
validate_agent_names(all_agents)
dependencies = {agent.name: [] for agent in all_agents}
dependencies[parsed_aggregator.name] = [agent.name for agent in experts]
return build_graph_from_dependencies(
workflow_name=WORKFLOW_NAME,
strategy="dag",
agents=all_agents,
dependencies=dependencies,
)

View File

@ -0,0 +1,29 @@
"""SequentialWorkflow graph builder."""
from __future__ import annotations
from typing import Any, Iterable
from beaver.coordinator.models import ExecutionGraph
from .base import WorkflowAgentSpec, build_graph_from_dependencies, parse_agents
WORKFLOW_NAME = "SequentialWorkflow"
def build_graph(
*,
task: str,
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
) -> ExecutionGraph:
del task
parsed = parse_agents(agents)
dependencies = {agent.name: [] for agent in parsed}
for previous, current in zip(parsed, parsed[1:], strict=False):
dependencies[current.name].append(previous.name)
return build_graph_from_dependencies(
workflow_name=WORKFLOW_NAME,
strategy="sequence",
agents=parsed,
dependencies=dependencies,
)

View File

@ -68,6 +68,15 @@ class MCPToolWrapper(BaseTool):
)
async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult:
if self.category == "team_workflow":
from beaver.team_workflows.executor import TeamWorkflowExecutor
return await TeamWorkflowExecutor().execute(
self.original_name,
dict(arguments or {}),
context,
tool_name=self.spec.name,
)
try:
result = await asyncio.wait_for(
self.call_tool(self.original_name, dict(arguments or {})),