feat(coordinator): 添加团队节点默认最大工具迭代次数配置
添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数, 并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。 fix(runtime): 修复团队节点运行成功判断逻辑 更新运行成功判断条件,将 finish_reason 为 "max_tool_iterations_finalized" 的情况 视为运行失败,并添加对原始工具调用输出的检测,避免将其误判为成功完成。 feat(mcp): 添加团队工作流MCP工具类别支持 增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能, 为团队工作流提供本地工具支持。 refactor(engine): 调整AgentLoop最大工具迭代次数设置 将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100, 同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。 perf(mcp): 优化MCP连接管理避免重复连接 添加 mcp_connected 标志来跟踪MCP连接状态,确保 connect_all 只执行一次, 提高性能并避免不必要的重复连接。 refactor(skills): 移除技能团队模板相关功能 移除与技能团队模板相关的代码,包括解析、存储和处理逻辑, 简化技能记录结构和加载流程。 feat(process): 增强会话过程投影器功能 添加技能激活快照事件处理,改进团队运行完成消息显示, 并增强技能激活事件的时间戳记录功能。 refactor(tasks): 简化任务尝试编排器团队执行逻辑 移除团队执行相关代码,将所有任务统一按单步执行处理, 简化任务编排器的复杂度并提升执行效率。 fix(evidence): 修复节点证据评估中需求验证逻辑 更新节点证据评估逻辑,跳过自然语言证据需求的确定性验证, 只执行机器可读的需求验证,避免因自然语言需求导致的节点失败。
This commit is contained in:
@ -9,6 +9,7 @@ from beaver.engine.providers import ProviderBundle
|
||||
from beaver.tasks.evidence import EvidenceBuilder, evaluate_node_evidence
|
||||
|
||||
from .models import DelegationEnvelope, NodeRunResult
|
||||
from .runtime_defaults import DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS
|
||||
|
||||
|
||||
class LocalAgentRunner:
|
||||
@ -55,7 +56,11 @@ class LocalAgentRunner:
|
||||
pinned_skill_names=envelope.inherited_pinned_skills,
|
||||
pinned_skill_contexts=envelope.inherited_pinned_skill_contexts,
|
||||
allowed_tool_names=envelope.allowed_tool_names,
|
||||
max_tool_iterations=envelope.max_tool_iterations,
|
||||
max_tool_iterations=(
|
||||
envelope.max_tool_iterations
|
||||
if envelope.max_tool_iterations is not None
|
||||
else DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS
|
||||
),
|
||||
allow_candidate_generation=allow_candidate_generation,
|
||||
)
|
||||
loaded = target_loop.boot()
|
||||
@ -70,7 +75,8 @@ class LocalAgentRunner:
|
||||
envelope.required_evidence,
|
||||
result.output_text,
|
||||
)
|
||||
run_succeeded = result.finish_reason == "stop"
|
||||
raw_tool_call_output = self._looks_like_raw_tool_call(result.output_text)
|
||||
run_succeeded = result.finish_reason in {"stop", "max_tool_iterations_finalized"} and not raw_tool_call_output
|
||||
if not run_succeeded:
|
||||
completion_status = "failed"
|
||||
elif evidence_gaps:
|
||||
@ -81,7 +87,10 @@ class LocalAgentRunner:
|
||||
if completion_status == "partial":
|
||||
error = "; ".join(evidence_gaps)
|
||||
else:
|
||||
error = None if success else (result.output_text or result.finish_reason)
|
||||
if raw_tool_call_output:
|
||||
error = "finalized output is a raw tool call"
|
||||
else:
|
||||
error = None if success else (result.output_text or result.finish_reason)
|
||||
return NodeRunResult(
|
||||
node_id=envelope.node_id or envelope.agent.name,
|
||||
success=success,
|
||||
@ -169,3 +178,16 @@ class LocalAgentRunner:
|
||||
"If no published skill matches, return [] and let the node continue without skills."
|
||||
)
|
||||
return "\n\n".join(sections)
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_raw_tool_call(output_text: str | None) -> bool:
|
||||
text = (output_text or "").strip()
|
||||
if not text:
|
||||
return False
|
||||
markers = (
|
||||
"<||DSML||tool_calls>",
|
||||
"<||DSML||invoke",
|
||||
"<tool_call",
|
||||
"<function=",
|
||||
)
|
||||
return any(marker in text for marker in markers)
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
"""Runtime defaults shared by Beaver team planning and execution."""
|
||||
|
||||
DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS = 100
|
||||
@ -48,8 +48,6 @@ class SkillContext:
|
||||
content_hash: str = ""
|
||||
activation_reason: str = "selected"
|
||||
tool_hints: list[str] = field(default_factory=list)
|
||||
team_template: dict[str, Any] | None = None
|
||||
team_template_warnings: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
|
||||
@ -106,6 +106,7 @@ class EngineLoadResult:
|
||||
task_execution_planner: TaskExecutionPlanner | None = None
|
||||
mcp_manager: MCPConnectionManager | None = None
|
||||
mcp_report: dict[str, dict] = field(default_factory=dict)
|
||||
mcp_connected: bool = False
|
||||
closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
|
||||
closed: bool = False
|
||||
|
||||
@ -317,10 +318,7 @@ class EngineLoader:
|
||||
draft_service=draft_service,
|
||||
)
|
||||
task_service = self._task_service or TaskService(workspace / "tasks")
|
||||
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(
|
||||
task_skill_resolver=task_skill_resolver,
|
||||
tool_registry=tool_registry,
|
||||
)
|
||||
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner()
|
||||
mcp_manager = MCPConnectionManager(
|
||||
self.config.tools.mcp_servers,
|
||||
authz_config=self.config.authz,
|
||||
|
||||
@ -53,7 +53,7 @@ class AgentProfile:
|
||||
max_tokens: int | None = None
|
||||
max_context_messages: int = 1000
|
||||
temperature: float = 0.2
|
||||
max_tool_iterations: int = 30
|
||||
max_tool_iterations: int = 100
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -99,8 +99,8 @@ class _WebSearchLoopGuard:
|
||||
return None
|
||||
|
||||
query = str(payload.get("query") or self._last_query or "").strip()
|
||||
is_low_quality = payload.get("success") is False or payload.get("quality") == "low"
|
||||
if not is_low_quality:
|
||||
is_failed_search = payload.get("success") is False
|
||||
if not is_failed_search:
|
||||
self._reset()
|
||||
self._last_query = query
|
||||
return None
|
||||
@ -435,7 +435,9 @@ class AgentLoop:
|
||||
if include_tools and mcp_manager is not None:
|
||||
started_at = perf_counter()
|
||||
try:
|
||||
loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
|
||||
if not loaded.mcp_connected:
|
||||
loaded.mcp_report = await mcp_manager.connect_all(tool_registry)
|
||||
loaded.mcp_connected = True
|
||||
loaded.tools = [spec.name for spec in tool_registry.list_specs()]
|
||||
finally:
|
||||
add_latency("mcp_ms", started_at)
|
||||
@ -752,6 +754,11 @@ class AgentLoop:
|
||||
"memory_store": memory_service.get_store(),
|
||||
"tool_registry": tool_registry,
|
||||
"skills_loader": skills_loader,
|
||||
"loaded": loaded,
|
||||
"agent_loop": self,
|
||||
"provider_bundle": bundle,
|
||||
"user_message": task,
|
||||
"attempt_index": attempt_index,
|
||||
"draft_service": getattr(loaded, "draft_service", None),
|
||||
"beaver_config": loaded.config,
|
||||
"task_id": task_id,
|
||||
@ -764,6 +771,7 @@ class AgentLoop:
|
||||
"session_id": resolved_session_id,
|
||||
"task_id": task_id,
|
||||
"run_id": resolved_run_id,
|
||||
"parent_session_id": parent_session_id,
|
||||
"allowed_tool_names": (
|
||||
None if allowed_tool_names is None else list(allowed_tool_names)
|
||||
),
|
||||
|
||||
@ -29,6 +29,7 @@ LOCAL_MCP_CATEGORIES: dict[str, dict[str, str]] = {
|
||||
"local_coordination_mcp": {"category": "coordination", "display_name": "本地协作工具"},
|
||||
"local_scheduler_mcp": {"category": "scheduler", "display_name": "本地定时工具"},
|
||||
"local_web_mcp": {"category": "web", "display_name": "本地联网工具"},
|
||||
"local_team_workflow_mcp": {"category": "team_workflow", "display_name": "本地 Agent Team Workflow 工具"},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -56,6 +56,7 @@ LOCAL_TOOL_CATEGORIES = {
|
||||
"coordination": "Beaver Local Coordination Tools",
|
||||
"scheduler": "Beaver Local Scheduler Tools",
|
||||
"web": "Beaver Local Web Tools",
|
||||
"team_workflow": "Beaver Local Team Workflow Tools",
|
||||
}
|
||||
|
||||
|
||||
@ -129,6 +130,10 @@ def _category_tools(category: str, workspace: Path) -> tuple[list[BaseTool], Too
|
||||
ObjectBackedTool(WebFetchTool()),
|
||||
ObjectBackedTool(WebSearchTool()),
|
||||
]
|
||||
elif category == "team_workflow":
|
||||
from beaver.team_workflows.mcp_tools import create_team_workflow_tools
|
||||
|
||||
tools = create_team_workflow_tools()
|
||||
else:
|
||||
raise ValueError(f"Unknown local tool category: {category}")
|
||||
return tools, context
|
||||
|
||||
@ -68,7 +68,7 @@ class AgentService:
|
||||
self.profile.max_tokens = None
|
||||
self.profile.temperature = 0.2
|
||||
self.profile.max_context_messages = 1000
|
||||
self.profile.max_tool_iterations = 30
|
||||
self.profile.max_tool_iterations = 100
|
||||
if defaults.max_tokens is not None:
|
||||
self.profile.max_tokens = max(1, defaults.max_tokens)
|
||||
if defaults.temperature is not None:
|
||||
|
||||
@ -17,6 +17,7 @@ class SessionProcessProjector:
|
||||
runs: dict[str, dict[str, Any]] = {}
|
||||
events: list[dict[str, Any]] = []
|
||||
artifacts: list[dict[str, Any]] = []
|
||||
projected_skill_activation_run_ids: set[str] = set()
|
||||
|
||||
def add_event(
|
||||
*,
|
||||
@ -186,6 +187,38 @@ class SessionProcessProjector:
|
||||
},
|
||||
)
|
||||
|
||||
elif record.event_type == "skill_activation_snapshotted":
|
||||
run_id = record.run_id or root_run_id
|
||||
parent_run_id = root_run_id if run_id != root_run_id else None
|
||||
receipts = [
|
||||
item
|
||||
for item in payload.get("receipts") or []
|
||||
if isinstance(item, dict)
|
||||
]
|
||||
selected_skill_names = _receipt_skill_names(receipts)
|
||||
if selected_skill_names:
|
||||
projected_skill_activation_run_ids.add(str(run_id))
|
||||
add_event(
|
||||
event_id=_event_id(record, "skill-activation"),
|
||||
run_id=str(run_id),
|
||||
parent_run_id=parent_run_id,
|
||||
kind="skill_selected",
|
||||
actor_type="system",
|
||||
actor_id="skill-selector",
|
||||
actor_name="Skill Selector",
|
||||
text=f"Selected skill guidance: {', '.join(selected_skill_names)}.",
|
||||
created_at=_receipt_started_at(receipts) or created_at,
|
||||
status="done",
|
||||
metadata={
|
||||
"task_id": task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"timeline_type": "skill",
|
||||
"skill_names": selected_skill_names,
|
||||
"activation_reasons": _receipt_reasons(receipts),
|
||||
"receipts": receipts,
|
||||
},
|
||||
)
|
||||
|
||||
elif record.event_type in {"task_team_run_completed", "task_team_run_failed"}:
|
||||
team_success = bool(payload.get("team_success"))
|
||||
root["status"] = "running"
|
||||
@ -203,7 +236,7 @@ class SessionProcessProjector:
|
||||
actor_type="system",
|
||||
actor_id="team",
|
||||
actor_name="Task Team",
|
||||
text=payload.get("error") or ("Team completed" if team_success else "Team completed with failed nodes"),
|
||||
text="Team completed" if team_success else "Team 执行未完成 / 子节点失败",
|
||||
created_at=created_at,
|
||||
status="done" if team_success else "error",
|
||||
metadata={**dict(payload), "timeline_type": "agent_team", "team_run_ids": team_run_ids},
|
||||
@ -316,7 +349,10 @@ class SessionProcessProjector:
|
||||
"skill_names": activated_skill_names,
|
||||
},
|
||||
}
|
||||
if activated_skill_names:
|
||||
if activated_skill_names and main_run_id not in projected_skill_activation_run_ids:
|
||||
skill_created_at = _activated_skill_started_at(run_record) or (
|
||||
run_record.started_at if run_record is not None else None
|
||||
) or created_at
|
||||
add_event(
|
||||
event_id=_event_id(record, "synthesis-skills"),
|
||||
run_id=main_run_id,
|
||||
@ -326,7 +362,7 @@ class SessionProcessProjector:
|
||||
actor_id="skill-selector",
|
||||
actor_name="Skill Selector",
|
||||
text=f"Selected skill guidance: {', '.join(activated_skill_names)}.",
|
||||
created_at=created_at,
|
||||
created_at=skill_created_at,
|
||||
status="done",
|
||||
metadata={
|
||||
"task_id": task_id,
|
||||
@ -439,6 +475,48 @@ def _activated_skill_reasons(run_record: Any | None) -> list[str]:
|
||||
return reasons
|
||||
|
||||
|
||||
def _activated_skill_started_at(run_record: Any | None) -> str | None:
|
||||
if run_record is None:
|
||||
return None
|
||||
timestamps = [
|
||||
str(getattr(receipt, "activated_at", "") or "").strip()
|
||||
for receipt in getattr(run_record, "activated_skills", []) or []
|
||||
]
|
||||
timestamps = [value for value in timestamps if value]
|
||||
if not timestamps:
|
||||
return None
|
||||
return sorted(timestamps)[0]
|
||||
|
||||
|
||||
def _receipt_skill_names(receipts: list[dict[str, Any]]) -> list[str]:
|
||||
names = []
|
||||
for receipt in receipts:
|
||||
skill_name = str(receipt.get("skill_name") or "").strip()
|
||||
if skill_name:
|
||||
names.append(skill_name)
|
||||
return list(dict.fromkeys(names))
|
||||
|
||||
|
||||
def _receipt_reasons(receipts: list[dict[str, Any]]) -> list[str]:
|
||||
reasons = []
|
||||
for receipt in receipts:
|
||||
reason = str(receipt.get("activation_reason") or "").strip()
|
||||
if reason:
|
||||
reasons.append(reason)
|
||||
return reasons
|
||||
|
||||
|
||||
def _receipt_started_at(receipts: list[dict[str, Any]]) -> str | None:
|
||||
timestamps = [
|
||||
str(receipt.get("activated_at") or "").strip()
|
||||
for receipt in receipts
|
||||
]
|
||||
timestamps = [value for value in timestamps if value]
|
||||
if not timestamps:
|
||||
return None
|
||||
return sorted(timestamps)[0]
|
||||
|
||||
|
||||
def _tool_call_name(tool_call: dict[str, Any]) -> str:
|
||||
function_payload = tool_call.get("function")
|
||||
if isinstance(function_payload, dict):
|
||||
|
||||
@ -140,8 +140,6 @@ class SkillAssembler:
|
||||
content_hash=record.content_hash or "" if record is not None else "",
|
||||
activation_reason="llm_selected",
|
||||
tool_hints=list(record.tool_hints) if record is not None else [],
|
||||
team_template=getattr(record, "team_template", None) if record is not None else None,
|
||||
team_template_warnings=list(getattr(record, "team_template_warnings", [])) if record is not None else [],
|
||||
)
|
||||
)
|
||||
return activated_skills
|
||||
|
||||
@ -28,7 +28,6 @@ from .utils import (
|
||||
check_requirements,
|
||||
escape_xml,
|
||||
extract_required_tool_names,
|
||||
extract_skill_team_template,
|
||||
get_missing_requirements,
|
||||
parse_frontmatter,
|
||||
parse_skill_metadata_blob,
|
||||
@ -50,8 +49,6 @@ class SkillRecord:
|
||||
tool_hints: list[str] = field(default_factory=list)
|
||||
frontmatter: dict[str, Any] = field(default_factory=dict)
|
||||
description: str = ""
|
||||
team_template: dict[str, Any] | None = None
|
||||
team_template_warnings: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class SkillsLoader:
|
||||
@ -116,7 +113,6 @@ class SkillsLoader:
|
||||
continue
|
||||
normalized_frontmatter = dict(frontmatter)
|
||||
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
|
||||
template_result = extract_skill_team_template(body)
|
||||
record = SkillRecord(
|
||||
name=name,
|
||||
path=skill_file,
|
||||
@ -131,8 +127,6 @@ class SkillsLoader:
|
||||
),
|
||||
frontmatter=normalized_frontmatter,
|
||||
description=str(frontmatter.get("description") or summarize_body(body) or name),
|
||||
team_template=template_result.template,
|
||||
team_template_warnings=template_result.warnings,
|
||||
)
|
||||
if filter_unavailable and not self._record_available(record):
|
||||
continue
|
||||
@ -152,7 +146,6 @@ class SkillsLoader:
|
||||
else:
|
||||
path = self.workspace_skills / name / "versions" / loaded.version.version / "SKILL.md"
|
||||
_frontmatter, body = parse_frontmatter(loaded.content)
|
||||
template_result = extract_skill_team_template(body)
|
||||
record = SkillRecord(
|
||||
name=name,
|
||||
path=path,
|
||||
@ -167,8 +160,6 @@ class SkillsLoader:
|
||||
),
|
||||
frontmatter=dict(loaded.version.frontmatter),
|
||||
description=str(loaded.version.frontmatter.get("description") or loaded.version.summary or name),
|
||||
team_template=template_result.template,
|
||||
team_template_warnings=template_result.warnings,
|
||||
)
|
||||
if filter_unavailable and not self._record_available(record):
|
||||
continue
|
||||
|
||||
@ -17,7 +17,6 @@ import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@ -85,27 +84,6 @@ def strip_frontmatter(content: str) -> str:
|
||||
return body
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillTeamTemplateParseResult:
|
||||
template: dict[str, Any] | None = None
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def extract_skill_team_template(body: str) -> SkillTeamTemplateParseResult:
|
||||
matches = re.findall(r"```beaver-team-template\s*\n(.*?)\n```", body, re.DOTALL)
|
||||
if not matches:
|
||||
return SkillTeamTemplateParseResult()
|
||||
if len(matches) != 1:
|
||||
return SkillTeamTemplateParseResult(warnings=["skill defines multiple team templates"])
|
||||
try:
|
||||
template = json.loads(matches[0])
|
||||
except json.JSONDecodeError:
|
||||
return SkillTeamTemplateParseResult(warnings=["team template JSON is invalid"])
|
||||
if not isinstance(template, dict) or not isinstance(template.get("nodes", []), list):
|
||||
return SkillTeamTemplateParseResult(warnings=["team template must be an object with a nodes list"])
|
||||
return SkillTeamTemplateParseResult(template=template)
|
||||
|
||||
|
||||
def extract_required_tool_names(body: str) -> list[str]:
|
||||
"""从 canonical skill 正文的 `## Required Tools` 段落提取工具名。
|
||||
|
||||
|
||||
@ -5,12 +5,11 @@ from __future__ import annotations
|
||||
from time import perf_counter
|
||||
from typing import Any, Callable
|
||||
|
||||
from beaver.coordinator.models import ExecutionNode, TeamRunResult
|
||||
from beaver.engine import AgentRunResult
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.prompts.main_agent import normalize_main_agent_prompt_locale
|
||||
|
||||
from .evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, render_task_evidence
|
||||
from .evidence import EvidenceBuilder, TaskEvidencePacket, render_task_evidence
|
||||
from .models import TaskRecord
|
||||
from .planner import TaskExecutionPlan
|
||||
|
||||
@ -46,7 +45,7 @@ class TaskAttemptOrchestrator:
|
||||
output_language_instruction = self._output_language_instruction(prompt_locale)
|
||||
provider_bundle = kwargs.get("provider_bundle") or self.make_provider_bundle_for_task(self.loaded, kwargs)
|
||||
kwargs = dict(kwargs)
|
||||
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
|
||||
kwargs.pop("team_provider_bundle_factory", None)
|
||||
kwargs["provider_bundle"] = provider_bundle
|
||||
|
||||
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
|
||||
@ -87,75 +86,17 @@ class TaskAttemptOrchestrator:
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
team_result: TeamRunResult | None = None
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(kwargs),
|
||||
plan = TaskExecutionPlan.single(
|
||||
"legacy_planner_team_ignored",
|
||||
planner_adaptation=plan.planner_adaptation,
|
||||
)
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_packet = TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=None,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results),
|
||||
final_output="",
|
||||
)
|
||||
team_execution_context = self._join_context(
|
||||
self._team_execution_context(plan, team_result),
|
||||
"Rendered team evidence:\n" + render_task_evidence(team_packet),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
|
||||
outcome_context, incomplete_prefix, outcome_metadata = self._team_synthesis_outcome(
|
||||
plan,
|
||||
team_result,
|
||||
prompt_locale=prompt_locale,
|
||||
)
|
||||
if plan.is_team:
|
||||
team_execution_context = self._join_context(outcome_context, team_execution_context)
|
||||
outcome_metadata = {
|
||||
"task_outcome": "single",
|
||||
"incomplete_node_ids": [],
|
||||
"node_statuses": {},
|
||||
"evidence_gaps": {},
|
||||
}
|
||||
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
@ -171,22 +112,15 @@ class TaskAttemptOrchestrator:
|
||||
attempt_kwargs["execution_context"] = self._join_context(
|
||||
base_execution_context,
|
||||
output_language_instruction,
|
||||
team_execution_context,
|
||||
)
|
||||
if plan.is_team and team_execution_context:
|
||||
attempt_kwargs["include_tools"] = False
|
||||
attempt_kwargs["max_tool_iterations"] = 0
|
||||
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
plan=plan,
|
||||
team_summaries=team_summaries,
|
||||
)
|
||||
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
if outcome_metadata["task_outcome"] == "incomplete":
|
||||
result.output_text = self._apply_incomplete_prefix(result.output_text, incomplete_prefix)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
@ -210,7 +144,6 @@ class TaskAttemptOrchestrator:
|
||||
task=task,
|
||||
attempt_index=attempt_index,
|
||||
result=result,
|
||||
team_result=team_result,
|
||||
)
|
||||
evidence_text = render_task_evidence(evidence_packet)
|
||||
evidence_debug = {
|
||||
@ -256,31 +189,6 @@ class TaskAttemptOrchestrator:
|
||||
result.validation_result = None
|
||||
return result
|
||||
|
||||
async def _run_team_for_task(
|
||||
self,
|
||||
plan: TaskExecutionPlan,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
parent_session_id: str,
|
||||
provider_bundle_factory: Any,
|
||||
) -> tuple[TeamRunResult | None, str | None]:
|
||||
if plan.graph is None:
|
||||
return None, "team plan did not include an execution graph"
|
||||
try:
|
||||
from beaver.services.team_service import TeamService
|
||||
|
||||
result = await TeamService(self.create_loop()).run_team(
|
||||
plan.graph,
|
||||
parent_task_id=task.task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=None,
|
||||
provider_bundle_factory=provider_bundle_factory,
|
||||
allow_candidate_generation=False,
|
||||
)
|
||||
return result, None
|
||||
except Exception as exc:
|
||||
return None, str(exc)
|
||||
|
||||
async def _assemble_task_attempt_skills(
|
||||
self,
|
||||
*,
|
||||
@ -396,7 +304,6 @@ class TaskAttemptOrchestrator:
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
plan: TaskExecutionPlan | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
) -> str:
|
||||
phase = f"attempt_{attempt_index}"
|
||||
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
|
||||
@ -445,8 +352,6 @@ class TaskAttemptOrchestrator:
|
||||
)
|
||||
)
|
||||
sections.append("Execution plan:\n" + "\n".join(plan_lines))
|
||||
if team_summaries:
|
||||
sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400])
|
||||
sections.append(
|
||||
"Skill selection instruction:\n"
|
||||
"Prefer reusing previously activated skills when they still match the Task. "
|
||||
@ -476,140 +381,6 @@ class TaskAttemptOrchestrator:
|
||||
def _join_context(*parts: str | None) -> str:
|
||||
return "\n\n".join(part.strip() for part in parts if part and part.strip())
|
||||
|
||||
@staticmethod
|
||||
def _team_summary_for_validation(result: TeamRunResult) -> str:
|
||||
lines = [
|
||||
f"success={result.success}",
|
||||
f"task_id={result.task_id or ''}",
|
||||
"summary:",
|
||||
result.summary,
|
||||
"nodes:",
|
||||
]
|
||||
for node in result.node_results:
|
||||
lines.append(
|
||||
f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
|
||||
f"error={node.error or ''} output={node.output_text[:500]}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
|
||||
nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
|
||||
payloads: list[dict[str, Any]] = []
|
||||
for item in result.node_results:
|
||||
payload = item.to_dict()
|
||||
node = nodes.get(item.node_id)
|
||||
if node is not None:
|
||||
payload["selected_skill_names"] = list(node.inherited_pinned_skills)
|
||||
payload["ephemeral_skill_names"] = [
|
||||
skill.name for skill in node.inherited_pinned_skill_contexts
|
||||
]
|
||||
payload["skill_query"] = node.agent.metadata.get("skill_query")
|
||||
payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id")
|
||||
payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name")
|
||||
payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
|
||||
payloads.append(payload)
|
||||
return payloads
|
||||
|
||||
@staticmethod
|
||||
def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]:
|
||||
if result is None:
|
||||
return []
|
||||
return [node.evidence for node in result.node_results if node.evidence is not None]
|
||||
|
||||
@staticmethod
|
||||
def _team_synthesis_outcome(
|
||||
plan: TaskExecutionPlan,
|
||||
result: TeamRunResult | None,
|
||||
*,
|
||||
prompt_locale: str | None = None,
|
||||
) -> tuple[str, str, dict[str, Any]]:
|
||||
if not plan.is_team or plan.graph is None:
|
||||
metadata = {
|
||||
"task_outcome": "single",
|
||||
"incomplete_node_ids": [],
|
||||
"node_statuses": {},
|
||||
"evidence_gaps": {},
|
||||
}
|
||||
return "Task outcome: single", "", metadata
|
||||
|
||||
result_by_node = {
|
||||
item.node_id: item
|
||||
for item in (result.node_results if result is not None else [])
|
||||
}
|
||||
node_statuses: dict[str, str] = {}
|
||||
evidence_gaps: dict[str, list[str]] = {}
|
||||
incomplete_node_ids: list[str] = []
|
||||
detail_lines: list[str] = []
|
||||
successful_lines: list[str] = []
|
||||
for node in plan.graph.nodes:
|
||||
node_result = result_by_node.get(node.node_id)
|
||||
status = node_result.completion_status if node_result is not None else "not_run"
|
||||
node_statuses[node.node_id] = status
|
||||
gaps = list(node_result.evidence_gaps) if node_result is not None else []
|
||||
if gaps:
|
||||
evidence_gaps[node.node_id] = gaps
|
||||
if node.required_for_completion and status != "succeeded":
|
||||
incomplete_node_ids.append(node.node_id)
|
||||
detail_lines.append(
|
||||
f"- {node.node_id}: status={status}, "
|
||||
f"finish_reason={node_result.finish_reason if node_result is not None else 'not_run'}, "
|
||||
f"error={(node_result.error or '') if node_result is not None else 'node did not run'}, "
|
||||
f"evidence_gaps={gaps}"
|
||||
)
|
||||
elif node_result is not None and status == "succeeded":
|
||||
successful_lines.append(f"- {node.node_id}: {node_result.output_text[:1000]}")
|
||||
|
||||
task_outcome = "incomplete" if incomplete_node_ids else "complete"
|
||||
metadata = {
|
||||
"task_outcome": task_outcome,
|
||||
"incomplete_node_ids": incomplete_node_ids,
|
||||
"node_statuses": node_statuses,
|
||||
"evidence_gaps": evidence_gaps,
|
||||
}
|
||||
context_parts = [
|
||||
f"Task outcome: {task_outcome}",
|
||||
"Incomplete node IDs: " + (", ".join(incomplete_node_ids) or "none"),
|
||||
]
|
||||
if detail_lines:
|
||||
context_parts.append("Incomplete required node details:\n" + "\n".join(detail_lines))
|
||||
if successful_lines:
|
||||
context_parts.append("Available successful node evidence:\n" + "\n".join(successful_lines))
|
||||
if task_outcome == "incomplete":
|
||||
context_parts.append(
|
||||
"Synthesis requirement: produce a partial report from available evidence and explicitly state "
|
||||
"that the task is incomplete, partially completed, or missing required evidence."
|
||||
)
|
||||
prefix = TaskAttemptOrchestrator._incomplete_prefix(prompt_locale) if incomplete_node_ids else ""
|
||||
return "\n\n".join(context_parts), prefix, metadata
|
||||
|
||||
@staticmethod
|
||||
def _incomplete_prefix(prompt_locale: str | None) -> str:
|
||||
locale = normalize_main_agent_prompt_locale(prompt_locale)
|
||||
if locale == "en":
|
||||
return "Task incomplete: some required steps failed or lack required evidence. The report below uses available results only.\n\n"
|
||||
if locale == "zh-Hant":
|
||||
return "任務未完成:部分必要步驟失敗或缺少必要證據。以下內容僅基於現有結果。\n\n"
|
||||
return "任务未完成:部分必要步骤失败或缺少必要证据。以下内容仅基于现有结果。\n\n"
|
||||
|
||||
@staticmethod
|
||||
def _apply_incomplete_prefix(output_text: str, prefix: str) -> str:
|
||||
normalized = output_text.lower()
|
||||
notices = (
|
||||
"任务未完成",
|
||||
"任務未完成",
|
||||
"部分完成",
|
||||
"缺少证据",
|
||||
"缺少證據",
|
||||
"task incomplete",
|
||||
"incomplete task",
|
||||
"partially complete",
|
||||
"missing evidence",
|
||||
)
|
||||
if any(notice in normalized for notice in notices):
|
||||
return output_text
|
||||
return prefix + output_text.lstrip()
|
||||
|
||||
def _build_task_evidence_packet(
|
||||
self,
|
||||
*,
|
||||
@ -617,7 +388,6 @@ class TaskAttemptOrchestrator:
|
||||
task: TaskRecord,
|
||||
attempt_index: int,
|
||||
result: AgentRunResult,
|
||||
team_result: TeamRunResult | None,
|
||||
) -> TaskEvidencePacket:
|
||||
main_run = EvidenceBuilder(session_manager).build_run_evidence(
|
||||
result.session_id,
|
||||
@ -629,67 +399,7 @@ class TaskAttemptOrchestrator:
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=main_run,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results) if team_result is not None else [],
|
||||
team_runs=[],
|
||||
team_node_results=[],
|
||||
final_output=result.output_text,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
|
||||
node_lines = [
|
||||
(
|
||||
f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
|
||||
f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
|
||||
)
|
||||
for node in result.node_results
|
||||
]
|
||||
return "\n\n".join(
|
||||
item
|
||||
for item in [
|
||||
"Task team execution result:",
|
||||
f"Planner reason: {plan.reason}",
|
||||
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
|
||||
f"Team success: {result.success}",
|
||||
f"Team summary:\n{result.summary}",
|
||||
"Node results:\n" + "\n\n".join(node_lines),
|
||||
(
|
||||
"Final synthesis instruction:\n" + plan.final_synthesis_instruction
|
||||
if plan.final_synthesis_instruction
|
||||
else None
|
||||
),
|
||||
(
|
||||
"Use successful team outputs as internal evidence. If one or more nodes failed, "
|
||||
"do not blindly repeat failed tool calls. Produce a user-visible fallback answer "
|
||||
"with available evidence and clearly state any missing or uncertain data."
|
||||
),
|
||||
]
|
||||
if item
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
|
||||
return "\n\n".join(
|
||||
[
|
||||
"Task team execution failed before final synthesis.",
|
||||
f"Planner reason: {plan.reason}",
|
||||
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
|
||||
f"Error: {error}",
|
||||
(
|
||||
"Proceed as the main agent. Do not blindly repeat failed tool calls; "
|
||||
"produce a user-visible fallback answer with available evidence and clearly "
|
||||
"state any missing or uncertain data."
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
def _build_team_provider_bundle_factory(self, kwargs: dict[str, Any]) -> Any:
|
||||
def factory(node: ExecutionNode) -> Any:
|
||||
node_kwargs = dict(kwargs)
|
||||
node_kwargs.pop("provider_bundle", None)
|
||||
if node.agent.model:
|
||||
node_kwargs["model"] = node.agent.model
|
||||
if node.agent.provider_name:
|
||||
node_kwargs["provider_name"] = node.agent.provider_name
|
||||
return self.make_provider_bundle_for_task(self.loaded, node_kwargs)
|
||||
|
||||
return factory
|
||||
|
||||
@ -155,7 +155,10 @@ def evaluate_node_evidence(
|
||||
if not output_text.strip():
|
||||
_append_unique(gaps, "missing required evidence: output")
|
||||
else:
|
||||
_append_unique(gaps, f"unsupported evidence requirement: {requirement}")
|
||||
# v1 only enforces the coarse machine-readable requirements above.
|
||||
# Natural-language evidence requirements are preserved for later
|
||||
# LLM-based validation and must not fail a node deterministically.
|
||||
continue
|
||||
return gaps
|
||||
|
||||
|
||||
|
||||
@ -1,39 +1,27 @@
|
||||
"""Internal Task execution planner for single-agent vs team execution."""
|
||||
"""Internal Task execution planner for single-agent task attempts.
|
||||
|
||||
Team execution is now started explicitly through local Team Workflow MCP tools.
|
||||
This planner only records why the normal Task attempt should continue as a
|
||||
single root-agent run.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.coordinator.models import ExecutionGraph
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.tools.registry import ToolRegistry
|
||||
|
||||
from .models import TaskRecord
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
from .skill_resolver import SkillResolutionReport
|
||||
|
||||
|
||||
TaskExecutionMode = Literal["single", "team"]
|
||||
|
||||
|
||||
# Temporary name-based denylist until high-risk tool approval is implemented.
|
||||
# Keep this policy centralized so planner behavior cannot drift by call site.
|
||||
HIGH_RISK_PLANNER_TOOL_NAMES = frozenset(
|
||||
{
|
||||
"delete_file",
|
||||
"execute_command",
|
||||
"external_send",
|
||||
"send_email",
|
||||
"terminal",
|
||||
"write_file",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _agent_team_enabled() -> bool:
|
||||
return os.getenv("BEAVER_AGENT_TEAM_ENABLED", "1").strip().lower() not in {"0", "false", "no", "off"}
|
||||
|
||||
@ -96,37 +84,7 @@ class TaskExecutionPlan:
|
||||
|
||||
|
||||
class TaskExecutionPlanner:
|
||||
"""Plan whether a Task attempt should run through a team first."""
|
||||
|
||||
_MAX_NODES = 6
|
||||
_MAX_DEPTH = 4
|
||||
_SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
|
||||
_ALLOWED_NODE_FIELDS = {
|
||||
"node_id",
|
||||
"task",
|
||||
"use_skill",
|
||||
"skill_query",
|
||||
"depends_on",
|
||||
"input_contract",
|
||||
"output_contract",
|
||||
"requested_tools",
|
||||
"required_evidence",
|
||||
"evidence_contract",
|
||||
"validation_rules",
|
||||
"required_for_completion",
|
||||
"block_downstream_on_partial",
|
||||
"max_tool_iterations",
|
||||
"constraints",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
task_skill_resolver: TaskSkillResolver | None = None,
|
||||
tool_registry: ToolRegistry | None = None,
|
||||
) -> None:
|
||||
self.task_skill_resolver = task_skill_resolver
|
||||
self.tool_registry = tool_registry
|
||||
"""Return the current Task execution mode for the root AgentLoop."""
|
||||
|
||||
async def plan(
|
||||
self,
|
||||
@ -144,122 +102,7 @@ class TaskExecutionPlanner:
|
||||
return TaskExecutionPlan.single("planner_disabled_by_environment")
|
||||
if not self._needs_team_planning(task=task, user_message=user_message):
|
||||
return TaskExecutionPlan.single("planner_skipped_simple_task")
|
||||
|
||||
provider = None
|
||||
model = None
|
||||
if provider_bundle is not None:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
if provider is None:
|
||||
return TaskExecutionPlan.single("planner_provider_unavailable")
|
||||
selected_template, base_adaptation = self._select_team_template(activated_skills or [])
|
||||
try:
|
||||
response = await asyncio.wait_for(
|
||||
provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You choose whether an internal Beaver Task attempt should run as a single "
|
||||
"main-agent pass or use a small sub-agent team first. Return only compact JSON."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": self._prompt(
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
skill_summaries=skill_summaries or [],
|
||||
tool_hints=tool_hints or [],
|
||||
activated_skills=activated_skills or [],
|
||||
selected_template=selected_template,
|
||||
),
|
||||
},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=4096,
|
||||
temperature=0.0,
|
||||
),
|
||||
timeout=timeout_seconds,
|
||||
)
|
||||
try:
|
||||
plan = self._from_json_or_raise(response.content or "")
|
||||
except Exception as first_error:
|
||||
repair_response = await asyncio.wait_for(
|
||||
provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Repair invalid Beaver task planner JSON. Return only one compact JSON object.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Repair the invalid planner JSON using the task-only schema from the original "
|
||||
f"request. Validation error: {first_error}\nInvalid output:\n{response.content or ''}"
|
||||
),
|
||||
},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=4096,
|
||||
temperature=0.0,
|
||||
),
|
||||
timeout=timeout_seconds,
|
||||
)
|
||||
try:
|
||||
plan = self._from_json_or_raise(repair_response.content or "")
|
||||
except Exception as repair_error:
|
||||
return TaskExecutionPlan.single(
|
||||
"planner_fallback_single",
|
||||
fallback_error=f"initial validation: {first_error}; repair validation: {repair_error}",
|
||||
planner_adaptation=base_adaptation,
|
||||
)
|
||||
self._merge_adaptation(plan, base_adaptation)
|
||||
return await self._resolve_plan(
|
||||
plan,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
except Exception as exc:
|
||||
detail = str(exc)
|
||||
error = f"{type(exc).__name__}: {detail}" if detail else type(exc).__name__
|
||||
return TaskExecutionPlan.single("planner_failed", fallback_error=error)
|
||||
|
||||
async def _resolve_plan(
|
||||
self,
|
||||
plan: TaskExecutionPlan,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
) -> TaskExecutionPlan:
|
||||
if not plan.is_team or self.task_skill_resolver is None:
|
||||
return plan
|
||||
if provider_bundle is None:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error="task_skill_resolver_provider_unavailable")
|
||||
try:
|
||||
assert plan.graph is not None
|
||||
graph, reports = await self.task_skill_resolver.resolve_graph(
|
||||
plan.graph,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
graph.validate()
|
||||
plan.graph = graph
|
||||
plan.skill_resolution_report = reports
|
||||
self._merge_skill_resolution_adaptation(plan, reports)
|
||||
return plan
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")
|
||||
return TaskExecutionPlan.single("planner_team_replaced_by_workflow_tools")
|
||||
|
||||
@staticmethod
|
||||
def _needs_team_planning(*, task: TaskRecord, user_message: str) -> bool:
|
||||
@ -306,307 +149,3 @@ class TaskExecutionPlanner:
|
||||
"端到端",
|
||||
)
|
||||
return any(marker in text for marker in complex_markers)
|
||||
|
||||
def from_json(self, text: str) -> TaskExecutionPlan:
|
||||
try:
|
||||
return self._from_json_or_raise(text)
|
||||
except Exception as exc:
|
||||
return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))
|
||||
|
||||
def _from_json_or_raise(self, text: str) -> TaskExecutionPlan:
|
||||
payload = self._parse_json_object(text)
|
||||
mode = str(payload.get("mode") or "single").strip().lower()
|
||||
reason = str(payload.get("reason") or "")
|
||||
adaptation = self._adaptation_from_payload(payload)
|
||||
if mode != "team":
|
||||
return TaskExecutionPlan.single(
|
||||
reason or "planner_selected_single",
|
||||
planner_adaptation=adaptation,
|
||||
)
|
||||
|
||||
graph = self._graph_from_payload(payload, adaptation=adaptation)
|
||||
graph.validate(max_depth=self._MAX_DEPTH)
|
||||
return TaskExecutionPlan(
|
||||
mode="team",
|
||||
reason=reason or "planner_selected_team",
|
||||
graph=graph,
|
||||
final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
|
||||
planner_adaptation=adaptation,
|
||||
)
|
||||
|
||||
def _graph_from_payload(
|
||||
self,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
adaptation: dict[str, Any],
|
||||
) -> ExecutionGraph:
|
||||
strategy = str(payload.get("strategy") or "sequence").strip().lower()
|
||||
if strategy not in self._SUPPORTED_STRATEGIES:
|
||||
raise ValueError(f"Unsupported team strategy: {strategy}")
|
||||
raw_nodes = payload.get("nodes")
|
||||
if not isinstance(raw_nodes, list) or not raw_nodes:
|
||||
raise ValueError("Team plan requires at least one node")
|
||||
if len(raw_nodes) > self._MAX_NODES:
|
||||
raise ValueError(f"Team plan exceeds max node count {self._MAX_NODES}")
|
||||
|
||||
nodes: list[ExecutionNode] = []
|
||||
for index, item in enumerate(raw_nodes, start=1):
|
||||
if not isinstance(item, dict):
|
||||
raise ValueError("Each team node must be an object")
|
||||
unsupported = sorted(set(item) - self._ALLOWED_NODE_FIELDS)
|
||||
if unsupported:
|
||||
raise ValueError(f"Unsupported team node field(s): {', '.join(unsupported)}")
|
||||
node_id = str(item.get("node_id") or f"node_{index}").strip()
|
||||
task = str(item.get("task") or "").strip()
|
||||
if not node_id or not task:
|
||||
raise ValueError("Each team node requires node_id and task")
|
||||
allowed_tool_names = self._resolve_requested_tools(
|
||||
item.get("requested_tools"),
|
||||
warnings=adaptation["warnings"],
|
||||
)
|
||||
use_skill = _optional_str(item.get("use_skill"))
|
||||
skill_query = _optional_str(item.get("skill_query")) or task
|
||||
if use_skill is not None or "skill_query" in item:
|
||||
adaptation.setdefault("node_skill_bindings", []).append(
|
||||
{
|
||||
"node_id": node_id,
|
||||
"use_skill": use_skill,
|
||||
"skill_query": skill_query,
|
||||
}
|
||||
)
|
||||
nodes.append(
|
||||
ExecutionNode(
|
||||
node_id=node_id,
|
||||
task=task,
|
||||
agent=AgentDescriptor(
|
||||
name=node_id,
|
||||
role="",
|
||||
system_prompt="",
|
||||
metadata={
|
||||
"use_skill": use_skill,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": [],
|
||||
"requested_tags": [],
|
||||
"sub_agent_kind": "generic_skill_worker",
|
||||
},
|
||||
),
|
||||
depends_on=[str(dep) for dep in item.get("depends_on") or []],
|
||||
constraints=[str(value) for value in item.get("constraints") or []],
|
||||
input_contract=_dict_value(item.get("input_contract")),
|
||||
output_contract=_dict_value(item.get("output_contract")),
|
||||
allowed_tool_names=allowed_tool_names,
|
||||
required_evidence=_string_list(item.get("required_evidence")),
|
||||
evidence_contract=_dict_value(item.get("evidence_contract")),
|
||||
validation_rules=_string_list(item.get("validation_rules")),
|
||||
required_for_completion=bool(item.get("required_for_completion", True)),
|
||||
block_downstream_on_partial=bool(item.get("block_downstream_on_partial", False)),
|
||||
max_tool_iterations=_optional_int(item.get("max_tool_iterations")),
|
||||
)
|
||||
)
|
||||
return ExecutionGraph(strategy=strategy, nodes=nodes) # type: ignore[arg-type]
|
||||
|
||||
def _resolve_requested_tools(self, value: Any, *, warnings: list[str]) -> list[str] | None:
|
||||
if value is None:
|
||||
return None
|
||||
result: list[str] = []
|
||||
for name in _string_list(value):
|
||||
if name.lower() in HIGH_RISK_PLANNER_TOOL_NAMES:
|
||||
_append_unique(warnings, f"requires_high_risk_review: {name}")
|
||||
continue
|
||||
if self.tool_registry is None or self.tool_registry.get(name) is None:
|
||||
_append_unique(warnings, f"unknown tool removed: {name}")
|
||||
continue
|
||||
result.append(name)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _adaptation_from_payload(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
raw = payload.get("adaptation")
|
||||
adaptation = dict(raw) if isinstance(raw, dict) else {}
|
||||
adaptation["warnings"] = _string_list(adaptation.get("warnings"))
|
||||
return adaptation
|
||||
|
||||
@staticmethod
|
||||
def _select_team_template(
|
||||
activated_skills: list[SkillContext],
|
||||
) -> tuple[SkillContext | None, dict[str, Any]]:
|
||||
candidates = [
|
||||
skill
|
||||
for skill in activated_skills
|
||||
if isinstance(skill.team_template, dict) and isinstance(skill.team_template.get("nodes"), list)
|
||||
]
|
||||
selected = candidates[0] if candidates else None
|
||||
warnings: list[str] = []
|
||||
for skill in activated_skills:
|
||||
for warning in skill.team_template_warnings:
|
||||
_append_unique(warnings, f"{skill.name}: {warning}")
|
||||
return selected, {
|
||||
"template_used": False,
|
||||
"selected_template": selected.name if selected else None,
|
||||
"selection_reason": (
|
||||
"first activated skill with a valid team template"
|
||||
if selected
|
||||
else "no activated skill has a valid team template"
|
||||
),
|
||||
"ignored_templates": [skill.name for skill in candidates[1:]],
|
||||
"warnings": warnings,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _merge_adaptation(plan: TaskExecutionPlan, base: dict[str, Any]) -> None:
|
||||
payload = dict(plan.planner_adaptation)
|
||||
warnings: list[str] = []
|
||||
for warning in [*base.get("warnings", []), *payload.get("warnings", [])]:
|
||||
_append_unique(warnings, str(warning))
|
||||
merged = {
|
||||
"template_used": bool(payload.get("template_used", False)),
|
||||
"selected_template": base.get("selected_template"),
|
||||
"selection_reason": base.get("selection_reason"),
|
||||
"ignored_templates": list(base.get("ignored_templates", [])),
|
||||
"warnings": warnings,
|
||||
}
|
||||
if isinstance(payload.get("node_skill_bindings"), list):
|
||||
merged["node_skill_bindings"] = [dict(item) for item in payload["node_skill_bindings"] if isinstance(item, dict)]
|
||||
plan.planner_adaptation = merged
|
||||
|
||||
@staticmethod
|
||||
def _merge_skill_resolution_adaptation(
|
||||
plan: TaskExecutionPlan,
|
||||
reports: list[SkillResolutionReport],
|
||||
) -> None:
|
||||
warnings = plan.planner_adaptation.setdefault("warnings", [])
|
||||
bindings = plan.planner_adaptation.get("node_skill_bindings")
|
||||
binding_by_node = {
|
||||
str(item.get("node_id")): item
|
||||
for item in bindings or []
|
||||
if isinstance(item, dict)
|
||||
}
|
||||
for report in reports:
|
||||
for warning in report.warnings:
|
||||
_append_unique(warnings, warning)
|
||||
binding = binding_by_node.get(report.node_id)
|
||||
if binding is not None and report.requested_skill_name and not report.exact_binding_used:
|
||||
binding["fallback_reason"] = f"use_skill unresolved; {report.reason}"
|
||||
|
||||
@staticmethod
|
||||
def _prompt(
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
skill_summaries: list[str] | None = None,
|
||||
tool_hints: list[str] | None = None,
|
||||
activated_skills: list[SkillContext] | None = None,
|
||||
selected_template: SkillContext | None = None,
|
||||
) -> str:
|
||||
history_note = ""
|
||||
if task.feedback:
|
||||
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
|
||||
skill_note = ""
|
||||
if skill_summaries:
|
||||
skill_note = "\nActivated skill summaries:\n" + "\n".join(f"- {item}" for item in skill_summaries)
|
||||
guidance_note = ""
|
||||
if activated_skills:
|
||||
guidance_note = "\nActivated Skill guidance:\n" + "\n".join(
|
||||
f"[{skill.name}]\n{skill.content}" for skill in activated_skills
|
||||
)
|
||||
template_note = ""
|
||||
if selected_template is not None:
|
||||
template_note = "\nPrimary Skill team template:\n" + json.dumps(
|
||||
{
|
||||
"skill_name": selected_template.name,
|
||||
"skill_version": selected_template.version,
|
||||
"template": selected_template.team_template,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
tool_note = ""
|
||||
if tool_hints:
|
||||
tool_note = "\nActivated skill tool hints:\n" + "\n".join(f"- {item}" for item in tool_hints)
|
||||
return (
|
||||
"Decide execution mode for this internal Task attempt.\n"
|
||||
"Use mode=team only when independent research, review, implementation slices, or staged checks "
|
||||
"would materially improve the result. Otherwise use mode=single.\n\n"
|
||||
"JSON schema:\n"
|
||||
"{\n"
|
||||
' "mode": "single" | "team",\n'
|
||||
' "reason": "short reason",\n'
|
||||
' "strategy": "sequence" | "parallel" | "dag",\n'
|
||||
' "nodes": [{"node_id": "collect", "task": "...", "use_skill": "optional exact skill", '
|
||||
'"skill_query": "optional dynamic skill query", "depends_on": [], '
|
||||
'"input_contract": {}, "output_contract": {}, "requested_tools": [], '
|
||||
'"required_evidence": [], "evidence_contract": {}, "validation_rules": [], '
|
||||
'"required_for_completion": true, "block_downstream_on_partial": false, '
|
||||
'"max_tool_iterations": 3, "constraints": []}],\n'
|
||||
' "adaptation": {"template_used": true, "warnings": []},\n'
|
||||
' "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
|
||||
"}\n\n"
|
||||
"Node definitions are task-only. Never output agent or role fields. Use at most one primary "
|
||||
"Skill template; treat all other activated Skills as guidance.\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Attempt index: {attempt_index}\n"
|
||||
f"{skill_note}"
|
||||
f"{guidance_note}"
|
||||
f"{template_note}"
|
||||
f"{tool_note}"
|
||||
f"{history_note}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_json_object(text: str) -> dict[str, Any]:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.strip("`")
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("planner response must be a JSON object")
|
||||
return payload
|
||||
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
text = str(value).strip()
|
||||
return text or None
|
||||
|
||||
|
||||
def _optional_int(value: Any) -> int | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
raise ValueError("max_tool_iterations must be an integer")
|
||||
result = int(value)
|
||||
if result < 0:
|
||||
raise ValueError("max_tool_iterations must be non-negative")
|
||||
return result
|
||||
|
||||
|
||||
def _dict_value(value: Any) -> dict[str, Any]:
|
||||
return dict(value) if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def _append_unique(values: list[str], value: str) -> None:
|
||||
if value and value not in values:
|
||||
values.append(value)
|
||||
|
||||
|
||||
def _string_list(value: Any) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
if isinstance(value, str):
|
||||
value = [item.strip() for item in value.split(",")]
|
||||
else:
|
||||
return []
|
||||
result: list[str] = []
|
||||
for item in value:
|
||||
text = str(item).strip()
|
||||
if text and text not in result:
|
||||
result.append(text)
|
||||
return result
|
||||
|
||||
2
app-instance/backend/beaver/team_workflows/__init__.py
Normal file
2
app-instance/backend/beaver/team_workflows/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
"""Local team workflow graph builders."""
|
||||
|
||||
@ -0,0 +1,70 @@
|
||||
"""AgentRearrange graph builder using arrow/comma flow syntax."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterable
|
||||
|
||||
from beaver.coordinator.models import ExecutionGraph
|
||||
|
||||
from .base import (
|
||||
WorkflowAgentSpec,
|
||||
agent_name_set,
|
||||
build_graph_from_dependencies,
|
||||
edges_to_dependencies,
|
||||
parse_agents,
|
||||
validate_no_disconnected_agents,
|
||||
)
|
||||
|
||||
WORKFLOW_NAME = "AgentRearrange"
|
||||
|
||||
|
||||
def build_graph(
|
||||
*,
|
||||
task: str,
|
||||
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
|
||||
flow: str,
|
||||
) -> ExecutionGraph:
|
||||
del task
|
||||
parsed = parse_agents(agents)
|
||||
edges = parse_flow(flow, known_agents=agent_name_set(parsed))
|
||||
dependencies = edges_to_dependencies(agents=parsed, edges=edges)
|
||||
validate_no_disconnected_agents(agents=parsed, dependencies=dependencies)
|
||||
return build_graph_from_dependencies(
|
||||
workflow_name=WORKFLOW_NAME,
|
||||
strategy="dag",
|
||||
agents=parsed,
|
||||
dependencies=dependencies,
|
||||
)
|
||||
|
||||
|
||||
def parse_flow(flow: str, *, known_agents: set[str]) -> list[tuple[str, str]]:
|
||||
stages = _parse_stages(flow)
|
||||
edges: list[tuple[str, str]] = []
|
||||
for stage in stages:
|
||||
for name in stage:
|
||||
if name not in known_agents:
|
||||
raise ValueError(f"workflow flow references unknown agent: {name}")
|
||||
for left, right in zip(stages, stages[1:], strict=False):
|
||||
for source in left:
|
||||
for target in right:
|
||||
edge = (source, target)
|
||||
if edge not in edges:
|
||||
edges.append(edge)
|
||||
return edges
|
||||
|
||||
|
||||
def _parse_stages(flow: str) -> list[list[str]]:
|
||||
raw_flow = str(flow or "").strip()
|
||||
if not raw_flow:
|
||||
raise ValueError("workflow flow is required")
|
||||
stages: list[list[str]] = []
|
||||
for raw_stage in raw_flow.split("->"):
|
||||
names = [name.strip() for name in raw_stage.split(",") if name.strip()]
|
||||
if not names:
|
||||
raise ValueError("workflow flow contains an empty stage")
|
||||
if len(names) != len(set(names)):
|
||||
raise ValueError("workflow flow contains duplicate agent names in a stage")
|
||||
stages.append(names)
|
||||
if len(stages) < 2:
|
||||
raise ValueError("workflow flow must contain at least two stages")
|
||||
return stages
|
||||
273
app-instance/backend/beaver/team_workflows/base.py
Normal file
273
app-instance/backend/beaver/team_workflows/base.py
Normal file
@ -0,0 +1,273 @@
|
||||
"""Shared builders for local team workflow graph construction."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Iterable, Literal
|
||||
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
|
||||
|
||||
GraphStrategy = Literal["sequence", "parallel", "dag"]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class WorkflowAgentSpec:
|
||||
name: str
|
||||
instruction: str
|
||||
use_skill: str | None = None
|
||||
skill_query: str | None = None
|
||||
allowed_tool_names: list[str] | None = None
|
||||
required_evidence: list[str] = field(default_factory=list)
|
||||
evidence_contract: dict[str, Any] = field(default_factory=dict)
|
||||
validation_rules: list[str] = field(default_factory=list)
|
||||
required_for_completion: bool = True
|
||||
block_downstream_on_partial: bool = False
|
||||
max_tool_iterations: int | None = None
|
||||
constraints: list[str] = field(default_factory=list)
|
||||
expected_output: str | None = None
|
||||
input_contract: dict[str, Any] = field(default_factory=dict)
|
||||
output_contract: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class WorkflowBuildResult:
|
||||
graph: ExecutionGraph
|
||||
workflow_name: str
|
||||
|
||||
|
||||
def parse_agents(raw_agents: Iterable[WorkflowAgentSpec | dict[str, Any]]) -> list[WorkflowAgentSpec]:
|
||||
agents: list[WorkflowAgentSpec] = []
|
||||
for index, raw in enumerate(raw_agents, start=1):
|
||||
if isinstance(raw, WorkflowAgentSpec):
|
||||
spec = raw
|
||||
elif isinstance(raw, dict):
|
||||
spec = _agent_from_dict(raw, index=index)
|
||||
else:
|
||||
raise ValueError("workflow agents must be objects")
|
||||
agents.append(spec)
|
||||
validate_agent_names(agents)
|
||||
return agents
|
||||
|
||||
|
||||
def validate_agent_names(agents: list[WorkflowAgentSpec]) -> None:
|
||||
if not agents:
|
||||
raise ValueError("workflow requires at least one agent")
|
||||
seen: set[str] = set()
|
||||
for agent in agents:
|
||||
if not agent.name:
|
||||
raise ValueError("workflow agent name is required")
|
||||
if not agent.instruction:
|
||||
raise ValueError(f"workflow agent {agent.name!r} requires instruction")
|
||||
if agent.name in seen:
|
||||
raise ValueError(f"workflow agent names must be unique: {agent.name}")
|
||||
seen.add(agent.name)
|
||||
|
||||
|
||||
def agent_name_set(agents: list[WorkflowAgentSpec]) -> set[str]:
|
||||
return {agent.name for agent in agents}
|
||||
|
||||
|
||||
def build_graph_from_dependencies(
|
||||
*,
|
||||
workflow_name: str,
|
||||
strategy: GraphStrategy,
|
||||
agents: list[WorkflowAgentSpec],
|
||||
dependencies: dict[str, list[str]],
|
||||
) -> ExecutionGraph:
|
||||
nodes = [
|
||||
build_node(
|
||||
workflow_name=workflow_name,
|
||||
agent=agent,
|
||||
depends_on=dependencies.get(agent.name, []),
|
||||
)
|
||||
for agent in agents
|
||||
]
|
||||
graph = ExecutionGraph(strategy=strategy, nodes=nodes)
|
||||
graph.validate()
|
||||
return graph
|
||||
|
||||
|
||||
def build_node(
|
||||
*,
|
||||
workflow_name: str,
|
||||
agent: WorkflowAgentSpec,
|
||||
depends_on: list[str],
|
||||
) -> ExecutionNode:
|
||||
metadata = {
|
||||
"sub_agent_kind": "generic_skill_worker",
|
||||
"workflow_tool": workflow_name,
|
||||
"workflow_agent_name": agent.name,
|
||||
}
|
||||
if agent.use_skill:
|
||||
metadata["use_skill"] = agent.use_skill
|
||||
if agent.skill_query:
|
||||
metadata["skill_query"] = agent.skill_query
|
||||
return ExecutionNode(
|
||||
node_id=agent.name,
|
||||
task=agent.instruction,
|
||||
agent=AgentDescriptor(
|
||||
name=agent.name,
|
||||
role="",
|
||||
system_prompt="",
|
||||
metadata=metadata,
|
||||
),
|
||||
depends_on=list(depends_on),
|
||||
constraints=list(agent.constraints),
|
||||
expected_output=agent.expected_output,
|
||||
input_contract=dict(agent.input_contract),
|
||||
output_contract=dict(agent.output_contract),
|
||||
allowed_tool_names=(
|
||||
None if agent.allowed_tool_names is None else list(agent.allowed_tool_names)
|
||||
),
|
||||
required_evidence=list(agent.required_evidence),
|
||||
evidence_contract=dict(agent.evidence_contract),
|
||||
validation_rules=list(agent.validation_rules),
|
||||
required_for_completion=agent.required_for_completion,
|
||||
block_downstream_on_partial=agent.block_downstream_on_partial,
|
||||
max_tool_iterations=agent.max_tool_iterations,
|
||||
)
|
||||
|
||||
|
||||
def edges_to_dependencies(
|
||||
*,
|
||||
agents: list[WorkflowAgentSpec],
|
||||
edges: Iterable[tuple[str, str] | list[str]],
|
||||
) -> dict[str, list[str]]:
|
||||
known = agent_name_set(agents)
|
||||
dependencies = {agent.name: [] for agent in agents}
|
||||
for raw_edge in edges:
|
||||
source, target = _parse_edge(raw_edge)
|
||||
if source not in known:
|
||||
raise ValueError(f"workflow edge references unknown agent: {source}")
|
||||
if target not in known:
|
||||
raise ValueError(f"workflow edge references unknown agent: {target}")
|
||||
if source == target:
|
||||
raise ValueError(f"workflow edge creates a self-cycle: {source}")
|
||||
if source not in dependencies[target]:
|
||||
dependencies[target].append(source)
|
||||
return dependencies
|
||||
|
||||
|
||||
def validate_output_agent(
|
||||
*,
|
||||
agents: list[WorkflowAgentSpec],
|
||||
dependencies: dict[str, list[str]],
|
||||
output_agent: str,
|
||||
allow_disconnected: bool = False,
|
||||
) -> None:
|
||||
known = agent_name_set(agents)
|
||||
if output_agent not in known:
|
||||
raise ValueError(f"workflow output_agent references unknown agent: {output_agent}")
|
||||
|
||||
upstream = _upstream_nodes(output_agent, dependencies)
|
||||
if not upstream:
|
||||
raise ValueError(f"workflow output_agent {output_agent!r} must be reachable from upstream agents")
|
||||
|
||||
if allow_disconnected:
|
||||
return
|
||||
|
||||
connected = set(upstream)
|
||||
connected.add(output_agent)
|
||||
disconnected = sorted(known - connected)
|
||||
if disconnected:
|
||||
raise ValueError(f"workflow has disconnected agent(s): {', '.join(disconnected)}")
|
||||
|
||||
|
||||
def validate_no_disconnected_agents(
|
||||
*,
|
||||
agents: list[WorkflowAgentSpec],
|
||||
dependencies: dict[str, list[str]],
|
||||
) -> None:
|
||||
known = agent_name_set(agents)
|
||||
connected: set[str] = set()
|
||||
for target, sources in dependencies.items():
|
||||
if sources:
|
||||
connected.add(target)
|
||||
connected.update(sources)
|
||||
disconnected = sorted(known - connected)
|
||||
if disconnected:
|
||||
raise ValueError(f"workflow has disconnected agent(s): {', '.join(disconnected)}")
|
||||
|
||||
|
||||
def _agent_from_dict(raw: dict[str, Any], *, index: int) -> WorkflowAgentSpec:
|
||||
name = _required_str(raw.get("name"), f"agents[{index}].name")
|
||||
instruction = _required_str(raw.get("instruction"), f"agents[{index}].instruction")
|
||||
return WorkflowAgentSpec(
|
||||
name=name,
|
||||
instruction=instruction,
|
||||
use_skill=_optional_str(raw.get("use_skill")),
|
||||
skill_query=_optional_str(raw.get("skill_query")),
|
||||
allowed_tool_names=_optional_string_list(raw.get("allowed_tool_names")),
|
||||
required_evidence=_string_list(raw.get("required_evidence")),
|
||||
evidence_contract=_dict(raw.get("evidence_contract")),
|
||||
validation_rules=_string_list(raw.get("validation_rules")),
|
||||
required_for_completion=bool(raw.get("required_for_completion", True)),
|
||||
block_downstream_on_partial=bool(raw.get("block_downstream_on_partial", False)),
|
||||
max_tool_iterations=_optional_int(raw.get("max_tool_iterations")),
|
||||
constraints=_string_list(raw.get("constraints")),
|
||||
expected_output=_optional_str(raw.get("expected_output")),
|
||||
input_contract=_dict(raw.get("input_contract")),
|
||||
output_contract=_dict(raw.get("output_contract")),
|
||||
)
|
||||
|
||||
|
||||
def _parse_edge(raw_edge: tuple[str, str] | list[str]) -> tuple[str, str]:
|
||||
if not isinstance(raw_edge, (list, tuple)) or len(raw_edge) != 2:
|
||||
raise ValueError("workflow edges must be [source, target] pairs")
|
||||
source = _required_str(raw_edge[0], "edge source")
|
||||
target = _required_str(raw_edge[1], "edge target")
|
||||
return source, target
|
||||
|
||||
|
||||
def _upstream_nodes(node_id: str, dependencies: dict[str, list[str]]) -> set[str]:
|
||||
result: set[str] = set()
|
||||
|
||||
def visit(current: str) -> None:
|
||||
for dependency in dependencies.get(current, []):
|
||||
if dependency in result:
|
||||
continue
|
||||
result.add(dependency)
|
||||
visit(dependency)
|
||||
|
||||
visit(node_id)
|
||||
return result
|
||||
|
||||
|
||||
def _required_str(value: Any, label: str) -> str:
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
raise ValueError(f"{label} is required")
|
||||
return text
|
||||
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
text = str(value or "").strip()
|
||||
return text or None
|
||||
|
||||
|
||||
def _string_list(value: Any) -> list[str]:
|
||||
if value is None:
|
||||
return []
|
||||
if not isinstance(value, list):
|
||||
raise ValueError("expected a list of strings")
|
||||
return [str(item).strip() for item in value if str(item).strip()]
|
||||
|
||||
|
||||
def _optional_string_list(value: Any) -> list[str] | None:
|
||||
if value is None:
|
||||
return None
|
||||
return _string_list(value)
|
||||
|
||||
|
||||
def _dict(value: Any) -> dict[str, Any]:
|
||||
return dict(value) if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def _optional_int(value: Any) -> int | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise ValueError("max_tool_iterations must be an integer") from exc
|
||||
26
app-instance/backend/beaver/team_workflows/concurrent.py
Normal file
26
app-instance/backend/beaver/team_workflows/concurrent.py
Normal file
@ -0,0 +1,26 @@
|
||||
"""ConcurrentWorkflow graph builder."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterable
|
||||
|
||||
from beaver.coordinator.models import ExecutionGraph
|
||||
|
||||
from .base import WorkflowAgentSpec, build_graph_from_dependencies, parse_agents
|
||||
|
||||
WORKFLOW_NAME = "ConcurrentWorkflow"
|
||||
|
||||
|
||||
def build_graph(
|
||||
*,
|
||||
task: str,
|
||||
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
|
||||
) -> ExecutionGraph:
|
||||
del task
|
||||
parsed = parse_agents(agents)
|
||||
return build_graph_from_dependencies(
|
||||
workflow_name=WORKFLOW_NAME,
|
||||
strategy="parallel",
|
||||
agents=parsed,
|
||||
dependencies={agent.name: [] for agent in parsed},
|
||||
)
|
||||
174
app-instance/backend/beaver/team_workflows/executor.py
Normal file
174
app-instance/backend/beaver/team_workflows/executor.py
Normal file
@ -0,0 +1,174 @@
|
||||
"""Runtime bridge for local team workflow MCP tools."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Callable
|
||||
|
||||
from beaver.coordinator.models import ExecutionGraph, TeamRunResult
|
||||
from beaver.tools.base import ToolContext, ToolResult
|
||||
|
||||
from . import agent_rearrange, concurrent, graph, mixture_of_agents, sequential
|
||||
|
||||
GraphBuilder = Callable[..., ExecutionGraph]
|
||||
|
||||
|
||||
class TeamWorkflowExecutor:
|
||||
"""Execute workflow MCP calls inside the current Beaver runtime."""
|
||||
|
||||
_BUILDERS: dict[str, GraphBuilder] = {
|
||||
"SequentialWorkflow": sequential.build_graph,
|
||||
"ConcurrentWorkflow": concurrent.build_graph,
|
||||
"MixtureOfAgents": mixture_of_agents.build_graph,
|
||||
"AgentRearrange": agent_rearrange.build_graph,
|
||||
"GraphWorkflow": graph.build_graph,
|
||||
}
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
workflow_name: str,
|
||||
arguments: dict[str, Any],
|
||||
context: ToolContext,
|
||||
*,
|
||||
tool_name: str | None = None,
|
||||
) -> ToolResult:
|
||||
exposed_name = tool_name or workflow_name
|
||||
try:
|
||||
if str(context.metadata.get("source") or "").startswith("team:"):
|
||||
raise ValueError("nested_team_workflow_not_allowed")
|
||||
builder = self._BUILDERS.get(workflow_name)
|
||||
if builder is None:
|
||||
raise ValueError(f"unknown team workflow tool: {workflow_name}")
|
||||
graph = builder(**dict(arguments or {}))
|
||||
parent_task_id = _task_id(context)
|
||||
parent_session_id = _session_id(context)
|
||||
result = await self._run_team(
|
||||
context=context,
|
||||
graph=graph,
|
||||
parent_task_id=parent_task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
)
|
||||
payload = _success_payload(
|
||||
workflow_name=workflow_name,
|
||||
graph=graph,
|
||||
result=result,
|
||||
)
|
||||
return ToolResult(
|
||||
success=True,
|
||||
content=json.dumps(payload, ensure_ascii=False),
|
||||
tool_name=exposed_name,
|
||||
raw_output=payload,
|
||||
)
|
||||
except Exception as exc:
|
||||
payload = {
|
||||
"success": False,
|
||||
"workflow": workflow_name,
|
||||
"error": str(exc),
|
||||
}
|
||||
return ToolResult(
|
||||
success=False,
|
||||
content=json.dumps(payload, ensure_ascii=False),
|
||||
tool_name=exposed_name,
|
||||
error=str(exc),
|
||||
raw_output=payload,
|
||||
)
|
||||
|
||||
async def _run_team(
|
||||
self,
|
||||
*,
|
||||
context: ToolContext,
|
||||
graph: ExecutionGraph,
|
||||
parent_task_id: str,
|
||||
parent_session_id: str,
|
||||
) -> TeamRunResult:
|
||||
runner = context.services.get("agent_team_runner")
|
||||
parent_run_id = _run_id(context)
|
||||
if runner is not None:
|
||||
return await runner(
|
||||
graph,
|
||||
parent_task_id=parent_task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=parent_run_id,
|
||||
)
|
||||
|
||||
agent_loop = context.services.get("agent_loop")
|
||||
if agent_loop is None:
|
||||
raise ValueError("team workflow execution requires agent_loop or agent_team_runner")
|
||||
provider_bundle = context.services.get("provider_bundle")
|
||||
|
||||
def provider_bundle_factory(_node: Any) -> Any:
|
||||
return provider_bundle
|
||||
|
||||
from beaver.engine import AgentLoop
|
||||
from beaver.services.team_service import TeamService
|
||||
|
||||
loaded = context.services.get("loaded")
|
||||
team_loop = AgentLoop(profile=agent_loop.profile, loader=agent_loop.loader)
|
||||
team_loop.loaded = loaded
|
||||
return await TeamService(team_loop).run_team(
|
||||
graph,
|
||||
parent_task_id=parent_task_id,
|
||||
parent_session_id=parent_session_id,
|
||||
parent_run_id=parent_run_id,
|
||||
provider_bundle_factory=provider_bundle_factory if provider_bundle is not None else None,
|
||||
allow_candidate_generation=False,
|
||||
)
|
||||
|
||||
|
||||
def _task_id(context: ToolContext) -> str:
|
||||
value = str(context.services.get("task_id") or context.metadata.get("task_id") or "").strip()
|
||||
if not value:
|
||||
raise ValueError("team workflow execution requires task_id")
|
||||
return value
|
||||
|
||||
|
||||
def _session_id(context: ToolContext) -> str:
|
||||
value = str(context.session_id or context.services.get("session_id") or "").strip()
|
||||
if not value:
|
||||
raise ValueError("team workflow execution requires session_id")
|
||||
return value
|
||||
|
||||
|
||||
def _run_id(context: ToolContext) -> str | None:
|
||||
return str(context.services.get("run_id") or context.metadata.get("run_id") or "").strip() or None
|
||||
|
||||
|
||||
def _success_payload(
|
||||
*,
|
||||
workflow_name: str,
|
||||
graph: ExecutionGraph,
|
||||
result: TeamRunResult,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"success": result.success,
|
||||
"workflow": workflow_name,
|
||||
"summary": result.summary,
|
||||
"run_ids": list(result.run_ids),
|
||||
"session_ids": list(result.session_ids),
|
||||
"node_results": [item.to_dict() for item in result.node_results],
|
||||
"graph": _graph_to_dict(graph),
|
||||
}
|
||||
|
||||
|
||||
def _graph_to_dict(graph: ExecutionGraph) -> dict[str, Any]:
|
||||
return {
|
||||
"strategy": graph.strategy,
|
||||
"nodes": [
|
||||
{
|
||||
"node_id": node.node_id,
|
||||
"task": node.task,
|
||||
"depends_on": list(node.depends_on),
|
||||
"allowed_tool_names": (
|
||||
None if node.allowed_tool_names is None else list(node.allowed_tool_names)
|
||||
),
|
||||
"required_evidence": list(node.required_evidence),
|
||||
"evidence_contract": dict(node.evidence_contract),
|
||||
"validation_rules": list(node.validation_rules),
|
||||
"required_for_completion": node.required_for_completion,
|
||||
"block_downstream_on_partial": node.block_downstream_on_partial,
|
||||
"max_tool_iterations": node.max_tool_iterations,
|
||||
"metadata": dict(node.agent.metadata),
|
||||
}
|
||||
for node in graph.nodes
|
||||
],
|
||||
}
|
||||
45
app-instance/backend/beaver/team_workflows/graph.py
Normal file
45
app-instance/backend/beaver/team_workflows/graph.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""GraphWorkflow explicit DAG builder."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterable
|
||||
|
||||
from beaver.coordinator.models import ExecutionGraph
|
||||
|
||||
from .base import (
|
||||
WorkflowAgentSpec,
|
||||
build_graph_from_dependencies,
|
||||
edges_to_dependencies,
|
||||
parse_agents,
|
||||
validate_output_agent,
|
||||
)
|
||||
|
||||
WORKFLOW_NAME = "GraphWorkflow"
|
||||
|
||||
|
||||
def build_graph(
|
||||
*,
|
||||
task: str,
|
||||
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
|
||||
edges: Iterable[tuple[str, str] | list[str]],
|
||||
output_agent: str,
|
||||
allow_disconnected: bool = False,
|
||||
) -> ExecutionGraph:
|
||||
del task
|
||||
parsed = parse_agents(agents)
|
||||
edge_list = list(edges or [])
|
||||
if not edge_list:
|
||||
raise ValueError("GraphWorkflow requires edges")
|
||||
dependencies = edges_to_dependencies(agents=parsed, edges=edge_list)
|
||||
validate_output_agent(
|
||||
agents=parsed,
|
||||
dependencies=dependencies,
|
||||
output_agent=str(output_agent or "").strip(),
|
||||
allow_disconnected=allow_disconnected,
|
||||
)
|
||||
return build_graph_from_dependencies(
|
||||
workflow_name=WORKFLOW_NAME,
|
||||
strategy="dag",
|
||||
agents=parsed,
|
||||
dependencies=dependencies,
|
||||
)
|
||||
261
app-instance/backend/beaver/team_workflows/mcp_tools.py
Normal file
261
app-instance/backend/beaver/team_workflows/mcp_tools.py
Normal file
@ -0,0 +1,261 @@
|
||||
"""MCP schema tools for local team workflow graph builders."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Callable
|
||||
|
||||
from beaver.coordinator.models import ExecutionGraph
|
||||
from beaver.tools.base import BaseTool, ToolContext, ToolResult, ToolSpec
|
||||
|
||||
from . import agent_rearrange, concurrent, graph, mixture_of_agents, sequential
|
||||
|
||||
GraphBuilder = Callable[..., ExecutionGraph]
|
||||
|
||||
|
||||
def create_team_workflow_tools() -> list[BaseTool]:
|
||||
return [
|
||||
TeamWorkflowSchemaTool(
|
||||
name="SequentialWorkflow",
|
||||
description=(
|
||||
"Build a sequential Beaver team workflow graph. Use this for staged work "
|
||||
"where each agent depends on the previous agent's output."
|
||||
),
|
||||
input_schema=_sequential_schema(),
|
||||
builder=sequential.build_graph,
|
||||
),
|
||||
TeamWorkflowSchemaTool(
|
||||
name="ConcurrentWorkflow",
|
||||
description=(
|
||||
"Build a concurrent Beaver team workflow graph. Use this only when agents "
|
||||
"can work independently on the same task."
|
||||
),
|
||||
input_schema=_concurrent_schema(),
|
||||
builder=concurrent.build_graph,
|
||||
),
|
||||
TeamWorkflowSchemaTool(
|
||||
name="MixtureOfAgents",
|
||||
description=(
|
||||
"Build a mixture-of-agents Beaver team workflow graph where independent "
|
||||
"expert agents feed one aggregator agent."
|
||||
),
|
||||
input_schema=_mixture_schema(),
|
||||
builder=mixture_of_agents.build_graph,
|
||||
),
|
||||
TeamWorkflowSchemaTool(
|
||||
name="AgentRearrange",
|
||||
description=(
|
||||
"Build a Beaver team workflow graph from strict flow syntax. Use '->' for "
|
||||
"stage order and ',' for agents in the same parallel stage."
|
||||
),
|
||||
input_schema=_agent_rearrange_schema(),
|
||||
builder=agent_rearrange.build_graph,
|
||||
),
|
||||
TeamWorkflowSchemaTool(
|
||||
name="GraphWorkflow",
|
||||
description=(
|
||||
"Build an explicit Beaver DAG workflow graph. Use this advanced tool only "
|
||||
"when the dependency edges must be specified directly."
|
||||
),
|
||||
input_schema=_graph_schema(),
|
||||
builder=graph.build_graph,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class TeamWorkflowSchemaTool(BaseTool):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
name: str,
|
||||
description: str,
|
||||
input_schema: dict[str, Any],
|
||||
builder: GraphBuilder,
|
||||
) -> None:
|
||||
self._spec = ToolSpec(
|
||||
name=name,
|
||||
description=description,
|
||||
input_schema=input_schema,
|
||||
toolset="team_workflow",
|
||||
always_available=False,
|
||||
metadata={"category": "team_workflow"},
|
||||
)
|
||||
self._builder = builder
|
||||
|
||||
@property
|
||||
def spec(self) -> ToolSpec:
|
||||
return self._spec
|
||||
|
||||
async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult:
|
||||
del context
|
||||
try:
|
||||
graph = self._builder(**dict(arguments or {}))
|
||||
payload = {
|
||||
"success": True,
|
||||
"workflow": self.spec.name,
|
||||
"graph": _graph_to_dict(graph),
|
||||
}
|
||||
return ToolResult(
|
||||
success=True,
|
||||
content=json.dumps(payload, ensure_ascii=False),
|
||||
tool_name=self.spec.name,
|
||||
raw_output=payload,
|
||||
)
|
||||
except Exception as exc:
|
||||
payload = {"success": False, "workflow": self.spec.name, "error": str(exc)}
|
||||
return ToolResult(
|
||||
success=False,
|
||||
content=json.dumps(payload, ensure_ascii=False),
|
||||
tool_name=self.spec.name,
|
||||
error=str(exc),
|
||||
raw_output=payload,
|
||||
)
|
||||
|
||||
|
||||
def _graph_to_dict(graph: ExecutionGraph) -> dict[str, Any]:
|
||||
return {
|
||||
"strategy": graph.strategy,
|
||||
"nodes": [
|
||||
{
|
||||
"node_id": node.node_id,
|
||||
"task": node.task,
|
||||
"depends_on": list(node.depends_on),
|
||||
"allowed_tool_names": (
|
||||
None if node.allowed_tool_names is None else list(node.allowed_tool_names)
|
||||
),
|
||||
"required_evidence": list(node.required_evidence),
|
||||
"evidence_contract": dict(node.evidence_contract),
|
||||
"validation_rules": list(node.validation_rules),
|
||||
"required_for_completion": node.required_for_completion,
|
||||
"block_downstream_on_partial": node.block_downstream_on_partial,
|
||||
"max_tool_iterations": node.max_tool_iterations,
|
||||
"metadata": dict(node.agent.metadata),
|
||||
}
|
||||
for node in graph.nodes
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _sequential_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": _task_schema(),
|
||||
"agents": _agents_schema(),
|
||||
},
|
||||
"required": ["task", "agents"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
|
||||
def _concurrent_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": _task_schema(),
|
||||
"agents": _agents_schema(),
|
||||
},
|
||||
"required": ["task", "agents"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
|
||||
def _mixture_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": _task_schema(),
|
||||
"agents": _agents_schema(description="Expert agents that run independently before aggregation."),
|
||||
"aggregator": _agent_schema(description="Aggregator agent that synthesizes expert outputs."),
|
||||
},
|
||||
"required": ["task", "agents", "aggregator"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
|
||||
def _agent_rearrange_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": _task_schema(),
|
||||
"agents": _agents_schema(),
|
||||
"flow": {
|
||||
"type": "string",
|
||||
"description": "Strict flow syntax, e.g. 'collector -> tactics, players -> synthesizer'.",
|
||||
},
|
||||
},
|
||||
"required": ["task", "agents", "flow"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
|
||||
def _graph_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": _task_schema(),
|
||||
"agents": _agents_schema(),
|
||||
"edges": {
|
||||
"type": "array",
|
||||
"description": "Directed dependency edges as [source_agent, target_agent] pairs.",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"minItems": 2,
|
||||
"maxItems": 2,
|
||||
"items": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"output_agent": {
|
||||
"type": "string",
|
||||
"description": "Final output/synthesis agent. Must be reachable from upstream agents.",
|
||||
},
|
||||
"allow_disconnected": {
|
||||
"type": "boolean",
|
||||
"description": "Allow agents that are not connected to output_agent. Defaults to false.",
|
||||
},
|
||||
},
|
||||
"required": ["task", "agents", "edges", "output_agent"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
|
||||
def _task_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "string",
|
||||
"description": "Overall user task this workflow supports.",
|
||||
}
|
||||
|
||||
|
||||
def _agents_schema(*, description: str = "Workflow agents in the order or set used by this workflow.") -> dict[str, Any]:
|
||||
return {
|
||||
"type": "array",
|
||||
"description": description,
|
||||
"items": _agent_schema(),
|
||||
"minItems": 1,
|
||||
}
|
||||
|
||||
|
||||
def _agent_schema(*, description: str = "One workflow agent slot.") -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"description": description,
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"instruction": {"type": "string"},
|
||||
"use_skill": {"type": "string"},
|
||||
"skill_query": {"type": "string"},
|
||||
"allowed_tool_names": {"type": "array", "items": {"type": "string"}},
|
||||
"required_evidence": {"type": "array", "items": {"type": "string"}},
|
||||
"evidence_contract": {"type": "object"},
|
||||
"validation_rules": {"type": "array", "items": {"type": "string"}},
|
||||
"required_for_completion": {"type": "boolean"},
|
||||
"block_downstream_on_partial": {"type": "boolean"},
|
||||
"max_tool_iterations": {"type": "integer"},
|
||||
"constraints": {"type": "array", "items": {"type": "string"}},
|
||||
"expected_output": {"type": "string"},
|
||||
"input_contract": {"type": "object"},
|
||||
"output_contract": {"type": "object"},
|
||||
},
|
||||
"required": ["name", "instruction"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
@ -0,0 +1,37 @@
|
||||
"""MixtureOfAgents graph builder."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterable
|
||||
|
||||
from beaver.coordinator.models import ExecutionGraph
|
||||
|
||||
from .base import (
|
||||
WorkflowAgentSpec,
|
||||
build_graph_from_dependencies,
|
||||
parse_agents,
|
||||
validate_agent_names,
|
||||
)
|
||||
|
||||
WORKFLOW_NAME = "MixtureOfAgents"
|
||||
|
||||
|
||||
def build_graph(
|
||||
*,
|
||||
task: str,
|
||||
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
|
||||
aggregator: WorkflowAgentSpec | dict[str, Any],
|
||||
) -> ExecutionGraph:
|
||||
del task
|
||||
experts = parse_agents(agents)
|
||||
parsed_aggregator = parse_agents([aggregator])[0]
|
||||
all_agents = [*experts, parsed_aggregator]
|
||||
validate_agent_names(all_agents)
|
||||
dependencies = {agent.name: [] for agent in all_agents}
|
||||
dependencies[parsed_aggregator.name] = [agent.name for agent in experts]
|
||||
return build_graph_from_dependencies(
|
||||
workflow_name=WORKFLOW_NAME,
|
||||
strategy="dag",
|
||||
agents=all_agents,
|
||||
dependencies=dependencies,
|
||||
)
|
||||
29
app-instance/backend/beaver/team_workflows/sequential.py
Normal file
29
app-instance/backend/beaver/team_workflows/sequential.py
Normal file
@ -0,0 +1,29 @@
|
||||
"""SequentialWorkflow graph builder."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterable
|
||||
|
||||
from beaver.coordinator.models import ExecutionGraph
|
||||
|
||||
from .base import WorkflowAgentSpec, build_graph_from_dependencies, parse_agents
|
||||
|
||||
WORKFLOW_NAME = "SequentialWorkflow"
|
||||
|
||||
|
||||
def build_graph(
|
||||
*,
|
||||
task: str,
|
||||
agents: Iterable[WorkflowAgentSpec | dict[str, Any]],
|
||||
) -> ExecutionGraph:
|
||||
del task
|
||||
parsed = parse_agents(agents)
|
||||
dependencies = {agent.name: [] for agent in parsed}
|
||||
for previous, current in zip(parsed, parsed[1:], strict=False):
|
||||
dependencies[current.name].append(previous.name)
|
||||
return build_graph_from_dependencies(
|
||||
workflow_name=WORKFLOW_NAME,
|
||||
strategy="sequence",
|
||||
agents=parsed,
|
||||
dependencies=dependencies,
|
||||
)
|
||||
@ -68,6 +68,15 @@ class MCPToolWrapper(BaseTool):
|
||||
)
|
||||
|
||||
async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult:
|
||||
if self.category == "team_workflow":
|
||||
from beaver.team_workflows.executor import TeamWorkflowExecutor
|
||||
|
||||
return await TeamWorkflowExecutor().execute(
|
||||
self.original_name,
|
||||
dict(arguments or {}),
|
||||
context,
|
||||
tool_name=self.spec.name,
|
||||
)
|
||||
try:
|
||||
result = await asyncio.wait_for(
|
||||
self.call_tool(self.original_name, dict(arguments or {})),
|
||||
|
||||
@ -1,10 +1,13 @@
|
||||
import asyncio
|
||||
import json
|
||||
from contextlib import suppress
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine import AgentLoop, AgentRunResult, EngineLoader
|
||||
from beaver.engine import loop as loop_module
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.engine.providers.factory import ProviderBundle
|
||||
|
||||
|
||||
def _run_result(run_id: str, output_text: str) -> AgentRunResult:
|
||||
@ -49,7 +52,7 @@ def test_running_loop_handles_reentrant_submit_direct(tmp_path) -> None:
|
||||
asyncio.run(run_case())
|
||||
|
||||
|
||||
def test_web_search_loop_guard_stops_after_repeated_low_quality_results() -> None:
|
||||
def test_web_search_loop_guard_keeps_successful_low_quality_results_available() -> None:
|
||||
guard = loop_module._WebSearchLoopGuard()
|
||||
low_quality = json.dumps(
|
||||
{
|
||||
@ -63,21 +66,106 @@ def test_web_search_loop_guard_stops_after_repeated_low_quality_results() -> Non
|
||||
assert guard.observe_result("web_search", low_quality) is None
|
||||
assert guard.observe_result("web_search", low_quality) is None
|
||||
|
||||
guidance = guard.observe_result("web_search", low_quality)
|
||||
|
||||
assert guidance is not None
|
||||
assert guidance["finish_reason"] == "web_search_low_quality_budget"
|
||||
assert "weather beijing" in guidance["message"]
|
||||
assert guard.observe_result("web_search", low_quality) is None
|
||||
|
||||
|
||||
def test_web_search_loop_guard_resets_after_useful_result() -> None:
|
||||
guard = loop_module._WebSearchLoopGuard()
|
||||
low_quality = json.dumps({"success": True, "query": "weather", "quality": "low", "results": []})
|
||||
failed_search = json.dumps({"success": False, "query": "weather", "error": "timeout"})
|
||||
useful = json.dumps({"success": True, "query": "weather", "quality": "high", "results": []})
|
||||
|
||||
assert guard.observe_result("web_search", low_quality) is None
|
||||
assert guard.observe_result("web_search", failed_search) is None
|
||||
assert guard.observe_result("web_search", useful) is None
|
||||
assert guard.observe_result("web_search", low_quality) is None
|
||||
assert guard.observe_result("web_search", low_quality) is None
|
||||
assert guard.observe_result("web_search", failed_search) is None
|
||||
assert guard.observe_result("web_search", failed_search) is None
|
||||
|
||||
assert guard.observe_result("web_search", low_quality) is not None
|
||||
assert guard.observe_result("web_search", failed_search) is not None
|
||||
|
||||
|
||||
class RecordingProvider(LLMProvider):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.tool_names_by_call: list[list[str]] = []
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int | None = None,
|
||||
temperature: float = 0.7,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> LLMResponse:
|
||||
self.tool_names_by_call.append(
|
||||
[
|
||||
str(tool.get("function", {}).get("name") or tool.get("name"))
|
||||
for tool in tools or []
|
||||
]
|
||||
)
|
||||
return LLMResponse("done", provider_name="stub", model="stub-model")
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "stub-model"
|
||||
|
||||
|
||||
def _bundle(provider: RecordingProvider) -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=provider,
|
||||
)
|
||||
|
||||
|
||||
def test_task_mode_root_does_not_expose_agent_team_tool(tmp_path) -> None:
|
||||
provider = RecordingProvider()
|
||||
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
|
||||
|
||||
asyncio.run(
|
||||
loop.process_direct(
|
||||
"compare financial reports",
|
||||
session_id="session",
|
||||
task_id="task-1",
|
||||
task_mode=True,
|
||||
include_skill_assembly=False,
|
||||
provider_bundle=_bundle(provider),
|
||||
)
|
||||
)
|
||||
|
||||
assert "run_agent_team" not in provider.tool_names_by_call[0]
|
||||
|
||||
|
||||
def test_default_engine_registry_does_not_register_agent_team_tool(tmp_path) -> None:
|
||||
loaded = AgentLoop(loader=EngineLoader(workspace=tmp_path)).boot()
|
||||
|
||||
assert loaded.tool_registry is not None
|
||||
assert loaded.tool_registry.get("run_agent_team") is None
|
||||
assert "run_agent_team" not in loaded.tools
|
||||
|
||||
|
||||
def test_non_task_and_team_node_do_not_expose_agent_team_tool(tmp_path) -> None:
|
||||
non_task_provider = RecordingProvider()
|
||||
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
|
||||
asyncio.run(
|
||||
loop.process_direct(
|
||||
"ordinary chat",
|
||||
session_id="session",
|
||||
include_skill_assembly=False,
|
||||
provider_bundle=_bundle(non_task_provider),
|
||||
)
|
||||
)
|
||||
|
||||
team_node_provider = RecordingProvider()
|
||||
asyncio.run(
|
||||
loop.process_direct(
|
||||
"team child",
|
||||
session_id="session:team:child",
|
||||
parent_session_id="session",
|
||||
source="team:child",
|
||||
task_id="task-1",
|
||||
task_mode=True,
|
||||
include_skill_assembly=False,
|
||||
provider_bundle=_bundle(team_node_provider),
|
||||
)
|
||||
)
|
||||
|
||||
assert "run_agent_team" not in non_task_provider.tool_names_by_call[0]
|
||||
assert "run_agent_team" not in team_node_provider.tool_names_by_call[0]
|
||||
|
||||
@ -15,6 +15,7 @@ from beaver.engine import AgentLoop, EngineLoader
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.engine.providers.factory import ProviderBundle
|
||||
from beaver.engine.session.manager import SessionManager
|
||||
from beaver.services.team_service import TeamService
|
||||
from beaver.skills.assembler import SkillAssemblyResult
|
||||
from beaver.skills.drafts import DraftService
|
||||
@ -232,9 +233,9 @@ def test_unknown_evidence_requirement_makes_node_partial(tmp_path: Path) -> None
|
||||
|
||||
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
|
||||
|
||||
assert result.success is False
|
||||
assert result.completion_status == "partial"
|
||||
assert result.evidence_gaps == ["unsupported evidence requirement: unknown_type"]
|
||||
assert result.success is True
|
||||
assert result.completion_status == "succeeded"
|
||||
assert result.evidence_gaps == []
|
||||
|
||||
|
||||
def test_team_node_preserves_evidence_when_finish_reason_is_not_stop(tmp_path: Path) -> None:
|
||||
@ -257,6 +258,90 @@ def test_team_node_preserves_evidence_when_finish_reason_is_not_stop(tmp_path: P
|
||||
assert result.evidence.finish_reason == "max_tool_iterations"
|
||||
|
||||
|
||||
def test_team_node_accepts_finalized_tool_budget_output(tmp_path: Path) -> None:
|
||||
loop = _loop(tmp_path)
|
||||
provider = RecordingProvider([_response("usable finalized output", finish_reason="max_tool_iterations_finalized")])
|
||||
envelope = DelegationEnvelope(
|
||||
parent_task_id="task-parent",
|
||||
parent_session_id="session-root",
|
||||
parent_run_id="run-root",
|
||||
agent=AgentDescriptor(name="researcher", role="research"),
|
||||
task="research the requested topic",
|
||||
node_id="research",
|
||||
)
|
||||
|
||||
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
|
||||
|
||||
assert result.success is True
|
||||
assert result.completion_status == "succeeded"
|
||||
assert result.finish_reason == "max_tool_iterations_finalized"
|
||||
|
||||
|
||||
def test_team_node_rejects_finalized_raw_tool_call_output(tmp_path: Path) -> None:
|
||||
loop = _loop(tmp_path)
|
||||
provider = RecordingProvider(
|
||||
[
|
||||
_response(
|
||||
'<||DSML||tool_calls><||DSML||invoke name="web_fetch"></||DSML||invoke></||DSML||tool_calls>',
|
||||
finish_reason="max_tool_iterations_finalized",
|
||||
)
|
||||
]
|
||||
)
|
||||
envelope = DelegationEnvelope(
|
||||
parent_task_id="task-parent",
|
||||
parent_session_id="session-root",
|
||||
parent_run_id="run-root",
|
||||
agent=AgentDescriptor(name="researcher", role="research"),
|
||||
task="research the requested topic",
|
||||
node_id="research",
|
||||
)
|
||||
|
||||
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
|
||||
|
||||
assert result.success is False
|
||||
assert result.completion_status == "failed"
|
||||
assert result.error == "finalized output is a raw tool call"
|
||||
|
||||
|
||||
def test_team_node_defaults_to_larger_tool_iteration_budget(tmp_path: Path) -> None:
|
||||
session_manager = SessionManager(tmp_path)
|
||||
captured_kwargs: dict[str, object] = {}
|
||||
|
||||
class CapturingLoop:
|
||||
profile = SimpleNamespace()
|
||||
loader = None
|
||||
is_running = False
|
||||
|
||||
async def process_direct(self, task: str, **kwargs: object) -> SimpleNamespace:
|
||||
captured_kwargs.update(kwargs)
|
||||
session_id = str(kwargs["session_id"])
|
||||
run_id = "run-captured"
|
||||
session_manager.ensure_session(session_id, source="test")
|
||||
return SimpleNamespace(
|
||||
session_id=session_id,
|
||||
run_id=run_id,
|
||||
output_text="done",
|
||||
finish_reason="stop",
|
||||
)
|
||||
|
||||
def boot(self) -> SimpleNamespace:
|
||||
return SimpleNamespace(session_manager=session_manager)
|
||||
|
||||
envelope = DelegationEnvelope(
|
||||
parent_task_id="task-parent",
|
||||
parent_session_id="session-root",
|
||||
parent_run_id="run-root",
|
||||
agent=AgentDescriptor(name="researcher", role="research"),
|
||||
task="research the requested topic",
|
||||
node_id="research",
|
||||
)
|
||||
|
||||
result = asyncio.run(LocalAgentRunner(CapturingLoop()).run(envelope))
|
||||
|
||||
assert result.success is True
|
||||
assert captured_kwargs["max_tool_iterations"] == 100
|
||||
|
||||
|
||||
def test_pinned_skill_is_injected_into_delegated_run(tmp_path: Path) -> None:
|
||||
_publish_skill(
|
||||
tmp_path,
|
||||
|
||||
@ -323,6 +323,14 @@ def test_agent_defaults_include_runtime_controls(tmp_path) -> None:
|
||||
service.close()
|
||||
|
||||
|
||||
def test_agent_default_tool_iteration_budget_is_100(tmp_path) -> None:
|
||||
service = AgentService(config_path=tmp_path / "config.json")
|
||||
|
||||
assert service.profile.max_tool_iterations == 100
|
||||
|
||||
service.close()
|
||||
|
||||
|
||||
def test_agent_config_api_persists_and_reloads_defaults(tmp_path) -> None:
|
||||
config_path = tmp_path / "config.json"
|
||||
config_path.write_text(json.dumps({"agents": {"defaults": {}}}), encoding="utf-8")
|
||||
@ -514,3 +522,16 @@ def test_load_config_adds_managed_local_mcp_servers(tmp_path) -> None:
|
||||
assert local.managed is True
|
||||
assert local.display_name == "个人智能体文件系统工具"
|
||||
assert "beaver.interfaces.mcp.tools_server" in local.args
|
||||
|
||||
team_workflow = config.tools.mcp_servers["local_team_workflow_mcp"]
|
||||
assert team_workflow.transport == "stdio"
|
||||
assert team_workflow.kind == "local"
|
||||
assert team_workflow.category == "team_workflow"
|
||||
assert team_workflow.managed is True
|
||||
assert team_workflow.display_name == "本地 Agent Team Workflow 工具"
|
||||
assert team_workflow.args == [
|
||||
"-m",
|
||||
"beaver.interfaces.mcp.tools_server",
|
||||
"--category",
|
||||
"team_workflow",
|
||||
]
|
||||
|
||||
120
app-instance/backend/tests/unit/test_legacy_team_cleanup.py
Normal file
120
app-instance/backend/tests/unit/test_legacy_team_cleanup.py
Normal file
@ -0,0 +1,120 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine import AgentRunResult
|
||||
from beaver.tasks import TaskExecutionPlan, TaskRecord
|
||||
from beaver.tasks.attempt_orchestrator import TaskAttemptOrchestrator
|
||||
|
||||
|
||||
class FakeTaskService:
|
||||
def start_run(self, task_id: str, **_: Any) -> None:
|
||||
return None
|
||||
|
||||
def append_run(self, task_id: str, run_id: str, **_: Any) -> TaskRecord:
|
||||
return self.task
|
||||
|
||||
|
||||
class FakeSessionManager:
|
||||
def __init__(self) -> None:
|
||||
self.events: list[dict[str, Any]] = []
|
||||
|
||||
def append_message(self, session_id: str, **kwargs: Any) -> None:
|
||||
self.events.append({"session_id": session_id, **kwargs})
|
||||
|
||||
def update_latest_assistant_event_payload(self, *args: Any, **kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def get_run_event_records(self, session_id: str, run_id: str) -> list[Any]:
|
||||
return []
|
||||
|
||||
|
||||
class LegacyTeamPlanner:
|
||||
async def plan(self, **_: Any) -> TaskExecutionPlan:
|
||||
return TaskExecutionPlan(
|
||||
mode="team",
|
||||
reason="legacy plan should be ignored by orchestrator",
|
||||
graph=ExecutionGraph(
|
||||
strategy="sequence",
|
||||
nodes=[
|
||||
ExecutionNode("collect", "Collect", AgentDescriptor(name="collect")),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _task() -> TaskRecord:
|
||||
return TaskRecord(
|
||||
task_id="task-1",
|
||||
session_id="session-1",
|
||||
description="finance comparison",
|
||||
goal="finance comparison",
|
||||
constraints=[],
|
||||
priority=0,
|
||||
status="open",
|
||||
creator="test",
|
||||
created_at="now",
|
||||
updated_at="now",
|
||||
)
|
||||
|
||||
|
||||
def test_builtin_tools_do_not_export_legacy_agent_team_tool() -> None:
|
||||
import beaver.tools.builtins as builtins
|
||||
|
||||
assert "AgentTeamTool" not in builtins.__all__
|
||||
assert not hasattr(builtins, "AgentTeamTool")
|
||||
|
||||
|
||||
def test_task_orchestrator_does_not_execute_legacy_planner_team_graph() -> None:
|
||||
task = _task()
|
||||
task_service = FakeTaskService()
|
||||
task_service.task = task
|
||||
session_manager = FakeSessionManager()
|
||||
loaded = SimpleNamespace(
|
||||
task_service=task_service,
|
||||
task_execution_planner=LegacyTeamPlanner(),
|
||||
session_manager=session_manager,
|
||||
run_memory_store=None,
|
||||
)
|
||||
orchestrator = TaskAttemptOrchestrator(
|
||||
loaded=loaded,
|
||||
create_loop=lambda: None,
|
||||
make_provider_bundle_for_task=lambda *_: None,
|
||||
)
|
||||
|
||||
async def fail_if_called(*args: Any, **kwargs: Any) -> Any:
|
||||
raise AssertionError("legacy planner team graph must not start TeamService")
|
||||
|
||||
async def runner(message: str, **kwargs: Any) -> AgentRunResult:
|
||||
return AgentRunResult(
|
||||
session_id="session-1",
|
||||
run_id="main-run",
|
||||
output_text="single path",
|
||||
finish_reason="stop",
|
||||
tool_iterations=0,
|
||||
)
|
||||
|
||||
orchestrator._run_team_for_task = fail_if_called # type: ignore[method-assign]
|
||||
result = asyncio.run(
|
||||
orchestrator.run(
|
||||
message="compare finance",
|
||||
runner=runner,
|
||||
kwargs={
|
||||
"session_id": "session-1",
|
||||
"provider_bundle": SimpleNamespace(),
|
||||
"include_skill_assembly": False,
|
||||
},
|
||||
task=task,
|
||||
)
|
||||
)
|
||||
|
||||
assert result.output_text == "single path"
|
||||
synthesis_events = [
|
||||
event
|
||||
for event in session_manager.events
|
||||
if event.get("event_type") == "task_synthesis_completed"
|
||||
]
|
||||
assert synthesis_events[0]["event_payload"]["task_outcome"] == "single"
|
||||
@ -20,3 +20,30 @@ def test_local_filesystem_mcp_exposes_personal_user_file_tools_only(tmp_path) ->
|
||||
assert "search_files" not in names
|
||||
assert "list_directory" not in names
|
||||
assert all("personal agent file system" in tool.spec.description for tool in tools)
|
||||
|
||||
|
||||
def test_team_workflow_mcp_exposes_workflow_tool_schemas(tmp_path) -> None:
|
||||
tools, _context = _category_tools("team_workflow", tmp_path)
|
||||
|
||||
specs = {tool.spec.name: tool.spec for tool in tools}
|
||||
|
||||
assert list(specs) == [
|
||||
"SequentialWorkflow",
|
||||
"ConcurrentWorkflow",
|
||||
"MixtureOfAgents",
|
||||
"AgentRearrange",
|
||||
"GraphWorkflow",
|
||||
]
|
||||
assert specs["SequentialWorkflow"].input_schema["required"] == ["task", "agents"]
|
||||
assert specs["SequentialWorkflow"].input_schema["properties"]["agents"]["items"]["required"] == [
|
||||
"name",
|
||||
"instruction",
|
||||
]
|
||||
assert specs["GraphWorkflow"].input_schema["required"] == [
|
||||
"task",
|
||||
"agents",
|
||||
"edges",
|
||||
"output_agent",
|
||||
]
|
||||
assert specs["GraphWorkflow"].input_schema["properties"]["edges"]["items"]["minItems"] == 2
|
||||
assert specs["AgentRearrange"].input_schema["required"] == ["task", "agents", "flow"]
|
||||
|
||||
@ -205,6 +205,7 @@ def test_process_projection_maps_failed_task_team_events(tmp_path: Path) -> None
|
||||
|
||||
team_event = next(event for event in projection["events"] if event["kind"] == "agent_team_created")
|
||||
assert team_event["status"] == "error"
|
||||
assert team_event["text"] == "Team 执行未完成 / 子节点失败"
|
||||
assert team_event["metadata"]["timeline_type"] == "agent_team"
|
||||
assert team_event["metadata"]["team_run_ids"] == ["failed-sub-run"]
|
||||
|
||||
@ -297,6 +298,101 @@ def test_process_projection_emits_skill_card_from_main_run_receipts(tmp_path: Pa
|
||||
assert skill_events
|
||||
assert skill_events[0]["metadata"]["timeline_type"] == "skill"
|
||||
assert skill_events[0]["metadata"]["skill_names"] == ["web-operation"]
|
||||
assert skill_events[0]["created_at"] == "2026-01-01T00:00:03+00:00"
|
||||
|
||||
|
||||
def test_process_projection_prefers_skill_activation_snapshot_over_synthesis_fallback(tmp_path: Path) -> None:
|
||||
session = SessionManager(tmp_path)
|
||||
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
||||
run_store.append_run_record(
|
||||
RunRecord(
|
||||
run_id="main-run",
|
||||
session_id="web:test",
|
||||
task_id="task-1",
|
||||
attempt_index=1,
|
||||
task_text="main task",
|
||||
started_at="2026-01-01T00:00:03+00:00",
|
||||
ended_at="2026-01-01T00:00:04+00:00",
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
activated_skills=[
|
||||
SkillActivationReceipt(
|
||||
run_id="main-run",
|
||||
session_id="web:test",
|
||||
skill_name="web-operation",
|
||||
skill_version="1",
|
||||
content_hash="hash",
|
||||
activated_at="2026-01-01T00:00:03+00:00",
|
||||
activation_reason="Needs live web lookup.",
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
session.append_message(
|
||||
"web:test",
|
||||
role="system",
|
||||
event_type="task_execution_planned",
|
||||
event_payload={
|
||||
"task_id": "task-1",
|
||||
"attempt_index": 1,
|
||||
"plan_mode": "single",
|
||||
"strategy": "single",
|
||||
"selected_skill_names": [],
|
||||
},
|
||||
context_visible=False,
|
||||
)
|
||||
session.append_message(
|
||||
"web:test",
|
||||
run_id="main-run",
|
||||
role="system",
|
||||
event_type="skill_activation_snapshotted",
|
||||
event_payload={
|
||||
"task_id": "task-1",
|
||||
"attempt_index": 1,
|
||||
"receipts": [
|
||||
{
|
||||
"run_id": "main-run",
|
||||
"session_id": "web:test",
|
||||
"skill_name": "web-operation",
|
||||
"skill_version": "1",
|
||||
"content_hash": "hash",
|
||||
"activated_at": "2026-01-01T00:00:03+00:00",
|
||||
"activation_reason": "Needs live web lookup.",
|
||||
}
|
||||
],
|
||||
},
|
||||
context_visible=False,
|
||||
)
|
||||
session.append_message(
|
||||
"web:test",
|
||||
run_id="main-run",
|
||||
role="assistant",
|
||||
event_type="assistant_message_added",
|
||||
event_payload={"task_id": "task-1", "attempt_index": 1},
|
||||
content="Searching",
|
||||
tool_calls=[{"id": "call-1", "name": "web_fetch", "arguments": {"url": "https://example.com"}}],
|
||||
context_visible=False,
|
||||
)
|
||||
session.append_message(
|
||||
"web:test",
|
||||
role="system",
|
||||
event_type="task_synthesis_completed",
|
||||
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
|
||||
context_visible=False,
|
||||
)
|
||||
|
||||
projection = SessionProcessProjector(session, run_store).project("web:test")
|
||||
|
||||
skill_events = [
|
||||
event
|
||||
for event in projection["events"]
|
||||
if event["kind"] == "skill_selected" and event["run_id"] == "main-run"
|
||||
]
|
||||
assert len(skill_events) == 1
|
||||
assert skill_events[0]["event_id"].endswith(":skill-activation")
|
||||
assert skill_events[0]["created_at"] == "2026-01-01T00:00:03+00:00"
|
||||
tool_event = next(event for event in projection["events"] if event["kind"] == "tool_call_started")
|
||||
assert projection["events"].index(skill_events[0]) < projection["events"].index(tool_event)
|
||||
|
||||
|
||||
def test_process_projection_emits_tool_cards_from_run_messages(tmp_path: Path) -> None:
|
||||
|
||||
@ -2,38 +2,9 @@ from __future__ import annotations
|
||||
|
||||
from beaver.skills.assembler.task_assembler import SkillAssembler
|
||||
from beaver.skills.catalog.loader import SkillsLoader
|
||||
from beaver.skills.catalog.utils import extract_skill_team_template
|
||||
|
||||
|
||||
def test_extract_team_template_returns_none_when_block_is_absent() -> None:
|
||||
result = extract_skill_team_template("# Ordinary Skill")
|
||||
|
||||
assert result.template is None
|
||||
assert result.warnings == []
|
||||
|
||||
|
||||
def test_extract_team_template_parses_valid_json_block() -> None:
|
||||
result = extract_skill_team_template(
|
||||
"```beaver-team-template\n"
|
||||
'{"version": 1, "nodes": [{"node_id": "collect", "task": "Collect"}]}\n'
|
||||
"```"
|
||||
)
|
||||
|
||||
assert result.template == {
|
||||
"version": 1,
|
||||
"nodes": [{"node_id": "collect", "task": "Collect"}],
|
||||
}
|
||||
assert result.warnings == []
|
||||
|
||||
|
||||
def test_invalid_template_is_warning_not_skill_load_failure() -> None:
|
||||
result = extract_skill_team_template("```beaver-team-template\nnot-json\n```")
|
||||
|
||||
assert result.template is None
|
||||
assert result.warnings == ["team template JSON is invalid"]
|
||||
|
||||
|
||||
def test_loader_and_assembler_propagate_team_template_to_skill_context(tmp_path) -> None:
|
||||
def test_beaver_team_template_block_is_not_runtime_metadata(tmp_path) -> None:
|
||||
skill_dir = tmp_path / "plugin-skills" / "financial-comparison"
|
||||
skill_dir.mkdir(parents=True)
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
@ -56,10 +27,7 @@ def test_loader_and_assembler_propagate_team_template_to_skill_context(tmp_path)
|
||||
context = SkillAssembler(loader)._activate_skill_contexts(["financial-comparison"])[0]
|
||||
|
||||
assert record is not None
|
||||
assert record.team_template == {
|
||||
"version": 1,
|
||||
"nodes": [{"node_id": "collect", "task": "Collect official sources"}],
|
||||
}
|
||||
assert record.team_template_warnings == []
|
||||
assert context.team_template == record.team_template
|
||||
assert context.team_template_warnings == []
|
||||
assert not hasattr(record, "team_template")
|
||||
assert not hasattr(record, "team_template_warnings")
|
||||
assert not hasattr(context, "team_template")
|
||||
assert not hasattr(context, "team_template_warnings")
|
||||
|
||||
@ -55,12 +55,11 @@ def test_evaluate_node_evidence_accepts_url_in_successful_tool_content() -> None
|
||||
assert evaluate_node_evidence(evidence, ["tool_result", "url"], "done") == []
|
||||
|
||||
|
||||
def test_evaluate_node_evidence_checks_output_and_unknown_requirements() -> None:
|
||||
def test_evaluate_node_evidence_checks_output_and_ignores_natural_language_requirements() -> None:
|
||||
evidence = _run_evidence()
|
||||
|
||||
assert evaluate_node_evidence(evidence, ["output", "unknown_type"], " ") == [
|
||||
assert evaluate_node_evidence(evidence, ["output", "至少3个价格信息来源"], " ") == [
|
||||
"missing required evidence: output",
|
||||
"unsupported evidence requirement: unknown_type",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -3,19 +3,15 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.engine.providers.factory import ProviderBundle
|
||||
from beaver.tasks import SkillResolutionReport, TaskExecutionPlanner, TaskRecord
|
||||
from beaver.tools.base import BaseTool, ToolContext, ToolResult, ToolSpec
|
||||
from beaver.tools.registry import ToolRegistry
|
||||
from beaver.tasks import TaskExecutionPlanner, TaskRecord
|
||||
|
||||
|
||||
class PlannerProvider(LLMProvider):
|
||||
def __init__(self, response: str) -> None:
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.response = response
|
||||
self.calls: list[dict] = []
|
||||
self.calls = 0
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
@ -25,59 +21,18 @@ class PlannerProvider(LLMProvider):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
) -> LLMResponse:
|
||||
self.calls.append(
|
||||
{
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
"model": model,
|
||||
"tools": tools,
|
||||
}
|
||||
self.calls += 1
|
||||
return LLMResponse(
|
||||
content='{"mode":"team"}',
|
||||
finish_reason="stop",
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
)
|
||||
return LLMResponse(content=self.response, finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "stub-model"
|
||||
|
||||
|
||||
class HangingPlannerProvider(LLMProvider):
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
) -> LLMResponse:
|
||||
await asyncio.sleep(10)
|
||||
return LLMResponse(content='{"mode":"team"}', finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "stub-model"
|
||||
|
||||
|
||||
class SequencedPlannerProvider(PlannerProvider):
|
||||
def __init__(self, responses: list[str]) -> None:
|
||||
super().__init__(responses[0])
|
||||
self.responses = list(responses)
|
||||
|
||||
async def chat(self, *args, **kwargs) -> LLMResponse:
|
||||
self.response = self.responses.pop(0)
|
||||
return await super().chat(*args, **kwargs)
|
||||
|
||||
|
||||
class StubTool(BaseTool):
|
||||
def __init__(self, name: str) -> None:
|
||||
self._spec = ToolSpec(name=name, description=name, input_schema={"type": "object"})
|
||||
|
||||
@property
|
||||
def spec(self) -> ToolSpec:
|
||||
return self._spec
|
||||
|
||||
async def invoke(self, arguments: dict, context: ToolContext) -> ToolResult:
|
||||
raise AssertionError("Planner tests do not execute tools")
|
||||
|
||||
|
||||
def _task() -> TaskRecord:
|
||||
return TaskRecord(
|
||||
task_id="task-1",
|
||||
@ -93,55 +48,15 @@ def _task() -> TaskRecord:
|
||||
)
|
||||
|
||||
|
||||
def _bundle(response: str) -> ProviderBundle:
|
||||
provider = PlannerProvider(response)
|
||||
def _bundle(provider: PlannerProvider) -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=provider,
|
||||
)
|
||||
|
||||
|
||||
def _bundle_with_provider(provider: LLMProvider) -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=provider,
|
||||
)
|
||||
|
||||
|
||||
def _registry() -> ToolRegistry:
|
||||
registry = ToolRegistry()
|
||||
registry.register_many([StubTool("web_search"), StubTool("web_fetch"), StubTool("terminal")])
|
||||
return registry
|
||||
|
||||
|
||||
def _hanging_bundle() -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=HangingPlannerProvider(),
|
||||
)
|
||||
|
||||
|
||||
def test_planner_selects_single_mode() -> None:
|
||||
plan = asyncio.run(
|
||||
TaskExecutionPlanner().plan(
|
||||
task=_task(),
|
||||
user_message="implement workflow",
|
||||
attempt_index=1,
|
||||
provider_bundle=_bundle('{"mode":"single","reason":"main agent is enough"}'),
|
||||
)
|
||||
)
|
||||
|
||||
assert plan.mode == "single"
|
||||
assert plan.graph is None
|
||||
assert plan.reason == "main agent is enough"
|
||||
|
||||
|
||||
def test_planner_skips_llm_for_simple_task() -> None:
|
||||
provider = PlannerProvider('{"mode":"team","reason":"should not be used"}')
|
||||
bundle = ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=provider,
|
||||
)
|
||||
def test_planner_skips_provider_for_simple_task() -> None:
|
||||
provider = PlannerProvider()
|
||||
task = _task()
|
||||
task.description = "查询深圳天气"
|
||||
task.goal = "查询深圳天气"
|
||||
@ -151,409 +66,55 @@ def test_planner_skips_llm_for_simple_task() -> None:
|
||||
task=task,
|
||||
user_message="帮我查一下今天深圳天气",
|
||||
attempt_index=1,
|
||||
provider_bundle=bundle,
|
||||
provider_bundle=_bundle(provider),
|
||||
)
|
||||
)
|
||||
|
||||
assert plan.mode == "single"
|
||||
assert plan.graph is None
|
||||
assert plan.reason == "planner_skipped_simple_task"
|
||||
assert provider.calls == []
|
||||
assert provider.calls == 0
|
||||
|
||||
|
||||
def test_planner_builds_team_graph() -> None:
|
||||
bundle = _bundle(
|
||||
"""
|
||||
{
|
||||
"mode": "team",
|
||||
"reason": "needs parallel review",
|
||||
"strategy": "dag",
|
||||
"nodes": [
|
||||
{"node_id": "research", "task": "research options"},
|
||||
{"node_id": "review", "task": "review result", "depends_on": ["research"]}
|
||||
],
|
||||
"final_synthesis_instruction": "merge the findings"
|
||||
}
|
||||
"""
|
||||
)
|
||||
provider = bundle.main_provider
|
||||
def test_planner_replaces_team_planning_with_workflow_tools_without_provider_call() -> None:
|
||||
provider = PlannerProvider()
|
||||
|
||||
plan = asyncio.run(
|
||||
TaskExecutionPlanner().plan(
|
||||
task=_task(),
|
||||
user_message="implement workflow",
|
||||
user_message="research and compare workflow options",
|
||||
attempt_index=1,
|
||||
provider_bundle=bundle,
|
||||
provider_bundle=_bundle(provider),
|
||||
skill_summaries=["docker-debug: Use docker logs before editing config."],
|
||||
tool_hints=["terminal", "search_files"],
|
||||
)
|
||||
)
|
||||
|
||||
assert plan.is_team
|
||||
assert plan.graph is not None
|
||||
assert plan.graph.strategy == "dag"
|
||||
assert [node.node_id for node in plan.graph.nodes] == ["research", "review"]
|
||||
assert plan.graph.nodes[1].depends_on == ["research"]
|
||||
assert plan.final_synthesis_instruction == "merge the findings"
|
||||
assert isinstance(provider, PlannerProvider)
|
||||
prompt = provider.calls[0]["messages"][1]["content"]
|
||||
assert "Activated skill summaries" in prompt
|
||||
assert "docker-debug: Use docker logs before editing config." in prompt
|
||||
assert "terminal" in prompt
|
||||
assert "search_files" in prompt
|
||||
assert not plan.is_team
|
||||
assert plan.mode == "single"
|
||||
assert plan.graph is None
|
||||
assert plan.reason == "planner_team_replaced_by_workflow_tools"
|
||||
assert plan.final_synthesis_instruction == ""
|
||||
assert provider.calls == 0
|
||||
|
||||
|
||||
def test_planner_timeout_falls_back_to_single() -> None:
|
||||
def test_planner_can_be_disabled_by_environment(monkeypatch) -> None:
|
||||
monkeypatch.setenv("BEAVER_AGENT_TEAM_ENABLED", "0")
|
||||
provider = PlannerProvider()
|
||||
|
||||
plan = asyncio.run(
|
||||
TaskExecutionPlanner().plan(
|
||||
task=_task(),
|
||||
user_message="implement workflow",
|
||||
user_message="research and compare workflow options",
|
||||
attempt_index=1,
|
||||
provider_bundle=_hanging_bundle(),
|
||||
timeout_seconds=0.01,
|
||||
provider_bundle=_bundle(provider),
|
||||
)
|
||||
)
|
||||
|
||||
assert plan.mode == "single"
|
||||
assert plan.reason == "planner_failed"
|
||||
assert "TimeoutError" in (plan.fallback_error or "")
|
||||
assert plan.reason == "planner_disabled_by_environment"
|
||||
assert provider.calls == 0
|
||||
|
||||
|
||||
def test_planner_team_nodes_use_task_as_internal_skill_query() -> None:
|
||||
plan = TaskExecutionPlanner().from_json(
|
||||
"""
|
||||
{
|
||||
"mode": "team",
|
||||
"reason": "needs skill-guided review",
|
||||
"strategy": "sequence",
|
||||
"nodes": [
|
||||
{
|
||||
"node_id": "api_review",
|
||||
"task": "review API compatibility"
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
assert plan.is_team
|
||||
assert plan.graph is not None
|
||||
node = plan.graph.nodes[0]
|
||||
assert node.agent.name == "api_review"
|
||||
assert node.agent.role == ""
|
||||
assert node.agent.metadata["skill_query"] == "review API compatibility"
|
||||
assert node.agent.metadata["required_capabilities"] == []
|
||||
|
||||
|
||||
def test_planner_accepts_use_skill_and_skill_query() -> None:
|
||||
plan = TaskExecutionPlanner().from_json(
|
||||
"""
|
||||
{
|
||||
"mode": "team",
|
||||
"strategy": "sequence",
|
||||
"nodes": [
|
||||
{
|
||||
"node_id": "collect",
|
||||
"task": "Collect official sources",
|
||||
"use_skill": "official-source-research",
|
||||
"skill_query": "official source verification"
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
assert plan.is_team
|
||||
assert plan.graph is not None
|
||||
node = plan.graph.nodes[0]
|
||||
assert node.agent.metadata["use_skill"] == "official-source-research"
|
||||
assert node.agent.metadata["skill_query"] == "official source verification"
|
||||
assert node.inherited_pinned_skills == []
|
||||
assert node.allowed_tool_names is None
|
||||
assert plan.planner_adaptation["node_skill_bindings"] == [
|
||||
{
|
||||
"node_id": "collect",
|
||||
"use_skill": "official-source-research",
|
||||
"skill_query": "official source verification",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_planner_defaults_skill_query_to_node_task_when_absent() -> None:
|
||||
plan = TaskExecutionPlanner().from_json(
|
||||
'{"mode":"team","strategy":"sequence","nodes":['
|
||||
'{"node_id":"extract","task":"Extract financial metrics","use_skill":"financial-extraction"}]}'
|
||||
)
|
||||
|
||||
assert plan.is_team
|
||||
assert plan.graph is not None
|
||||
assert plan.graph.nodes[0].agent.metadata["skill_query"] == "Extract financial metrics"
|
||||
|
||||
|
||||
def test_planner_adaptation_records_unresolved_use_skill_fallback() -> None:
|
||||
planner = TaskExecutionPlanner()
|
||||
plan = planner.from_json(
|
||||
'{"mode":"team","strategy":"sequence","nodes":['
|
||||
'{"node_id":"extract","task":"Extract metrics","use_skill":"missing-skill",'
|
||||
'"skill_query":"financial extraction"}]}'
|
||||
)
|
||||
report = SkillResolutionReport(
|
||||
node_id="extract",
|
||||
skill_query="financial extraction",
|
||||
requested_skill_name="missing-skill",
|
||||
exact_binding_used=False,
|
||||
warnings=["use_skill unresolved: missing-skill"],
|
||||
reason="matched published skill",
|
||||
)
|
||||
|
||||
planner._merge_skill_resolution_adaptation(plan, [report])
|
||||
|
||||
assert plan.planner_adaptation["warnings"] == ["use_skill unresolved: missing-skill"]
|
||||
assert plan.planner_adaptation["node_skill_bindings"][0]["fallback_reason"] == (
|
||||
"use_skill unresolved; matched published skill"
|
||||
)
|
||||
|
||||
|
||||
def test_planner_invalid_outputs_fallback_to_single() -> None:
|
||||
planner = TaskExecutionPlanner()
|
||||
invalid_json = planner.from_json("not json")
|
||||
unknown_strategy = planner.from_json(
|
||||
'{"mode":"team","strategy":"moa","nodes":[{"node_id":"a","task":"a","agent":{"name":"a"}}]}'
|
||||
)
|
||||
too_many_nodes = planner.from_json(
|
||||
'{"mode":"team","strategy":"parallel","nodes":['
|
||||
+ ",".join(
|
||||
'{"node_id":"n%s","task":"work","agent":{"name":"n%s"}}' % (index, index)
|
||||
for index in range(7)
|
||||
)
|
||||
+ "]}"
|
||||
)
|
||||
cyclic = planner.from_json(
|
||||
"""
|
||||
{
|
||||
"mode": "team",
|
||||
"strategy": "dag",
|
||||
"nodes": [
|
||||
{"node_id": "a", "task": "a", "agent": {"name": "a"}, "depends_on": ["b"]},
|
||||
{"node_id": "b", "task": "b", "agent": {"name": "b"}, "depends_on": ["a"]}
|
||||
]
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
assert invalid_json.mode == "single"
|
||||
assert unknown_strategy.mode == "single"
|
||||
assert too_many_nodes.mode == "single"
|
||||
assert cyclic.mode == "single"
|
||||
|
||||
|
||||
def test_template_plan_builds_generic_worker_and_preserves_v1_contract_fields() -> None:
|
||||
plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
|
||||
"""
|
||||
{
|
||||
"mode": "team",
|
||||
"strategy": "dag",
|
||||
"nodes": [
|
||||
{
|
||||
"node_id": "collect",
|
||||
"task": "Collect official sources",
|
||||
"requested_tools": ["web_search"],
|
||||
"evidence_contract": {"entities": ["MGM", "Galaxy"]},
|
||||
"block_downstream_on_partial": true
|
||||
}
|
||||
],
|
||||
"adaptation": {"template_used": true}
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
assert plan.is_team
|
||||
assert plan.graph is not None
|
||||
node = plan.graph.nodes[0]
|
||||
assert node.agent.name == "collect"
|
||||
assert node.agent.role == ""
|
||||
assert node.agent.metadata["sub_agent_kind"] == "generic_skill_worker"
|
||||
assert node.allowed_tool_names == ["web_search"]
|
||||
assert node.evidence_contract == {"entities": ["MGM", "Galaxy"]}
|
||||
assert node.block_downstream_on_partial is True
|
||||
assert plan.planner_adaptation["template_used"] is True
|
||||
|
||||
|
||||
def test_unknown_tool_is_removed_and_warned() -> None:
|
||||
plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
|
||||
'{"mode":"team","strategy":"sequence","nodes":['
|
||||
'{"node_id":"collect","task":"Collect","requested_tools":["web_search","not_real"]}]}'
|
||||
)
|
||||
|
||||
assert plan.is_team
|
||||
assert plan.graph is not None
|
||||
assert plan.graph.nodes[0].allowed_tool_names == ["web_search"]
|
||||
assert "unknown tool removed: not_real" in plan.planner_adaptation["warnings"]
|
||||
|
||||
|
||||
def test_high_risk_tool_is_removed_without_failing_low_risk_plan() -> None:
|
||||
plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
|
||||
'{"mode":"team","strategy":"sequence","nodes":['
|
||||
'{"node_id":"collect","task":"Collect","requested_tools":["web_search","terminal"]}]}'
|
||||
)
|
||||
|
||||
assert plan.is_team
|
||||
assert plan.graph is not None
|
||||
assert plan.graph.nodes[0].allowed_tool_names == ["web_search"]
|
||||
assert "requires_high_risk_review: terminal" in plan.planner_adaptation["warnings"]
|
||||
|
||||
|
||||
def test_planner_rejects_agent_and_role_node_fields() -> None:
|
||||
planner = TaskExecutionPlanner(tool_registry=_registry())
|
||||
|
||||
agent_plan = planner.from_json(
|
||||
'{"mode":"team","strategy":"sequence","nodes":['
|
||||
'{"node_id":"collect","task":"Collect","agent":{"name":"researcher"}}]}'
|
||||
)
|
||||
role_plan = planner.from_json(
|
||||
'{"mode":"team","strategy":"sequence","nodes":['
|
||||
'{"node_id":"collect","task":"Collect","role":"researcher"}]}'
|
||||
)
|
||||
|
||||
assert agent_plan.mode == "single"
|
||||
assert "agent" in (agent_plan.fallback_error or "")
|
||||
assert role_plan.mode == "single"
|
||||
assert "role" in (role_plan.fallback_error or "")
|
||||
|
||||
|
||||
def test_planner_records_primary_template_selection_and_ignored_templates() -> None:
|
||||
primary = SkillContext(
|
||||
name="financial-comparison",
|
||||
version="v1",
|
||||
content="Compare official financial disclosures.",
|
||||
team_template={"version": 1, "nodes": [{"node_id": "collect", "task": "Collect"}]},
|
||||
)
|
||||
secondary = SkillContext(
|
||||
name="chart-reporting",
|
||||
version="v2",
|
||||
content="Render chart-ready Markdown.",
|
||||
team_template={"version": 1, "nodes": [{"node_id": "report", "task": "Report"}]},
|
||||
)
|
||||
provider = PlannerProvider(
|
||||
'{"mode":"team","strategy":"sequence","nodes":['
|
||||
'{"node_id":"collect","task":"Collect official sources"}],'
|
||||
'"adaptation":{"template_used":true}}'
|
||||
)
|
||||
|
||||
plan = asyncio.run(
|
||||
TaskExecutionPlanner(tool_registry=_registry()).plan(
|
||||
task=_task(),
|
||||
user_message="compare financial workflow",
|
||||
attempt_index=1,
|
||||
provider_bundle=_bundle_with_provider(provider),
|
||||
activated_skills=[primary, secondary],
|
||||
)
|
||||
)
|
||||
|
||||
assert plan.planner_adaptation == {
|
||||
"template_used": True,
|
||||
"selected_template": "financial-comparison",
|
||||
"selection_reason": "first activated skill with a valid team template",
|
||||
"ignored_templates": ["chart-reporting"],
|
||||
"warnings": [],
|
||||
}
|
||||
prompt = provider.calls[0]["messages"][1]["content"]
|
||||
assert '"skill_name": "financial-comparison"' in prompt
|
||||
assert "Compare official financial disclosures." in prompt
|
||||
assert "Render chart-ready Markdown." in prompt
|
||||
|
||||
|
||||
def test_malformed_planner_output_repairs_once_without_tools() -> None:
|
||||
provider = SequencedPlannerProvider(
|
||||
[
|
||||
"not json",
|
||||
'{"mode":"team","strategy":"sequence","nodes":[{"node_id":"collect","task":"Collect"}]}',
|
||||
]
|
||||
)
|
||||
|
||||
plan = asyncio.run(
|
||||
TaskExecutionPlanner(tool_registry=_registry()).plan(
|
||||
task=_task(),
|
||||
user_message="implement workflow",
|
||||
attempt_index=1,
|
||||
provider_bundle=_bundle_with_provider(provider),
|
||||
)
|
||||
)
|
||||
|
||||
assert plan.is_team
|
||||
assert len(provider.calls) == 2
|
||||
assert provider.calls[1]["tools"] is None
|
||||
assert "Repair the invalid planner JSON" in provider.calls[1]["messages"][1]["content"]
|
||||
|
||||
|
||||
def test_failed_planner_repair_falls_back_to_single() -> None:
|
||||
provider = SequencedPlannerProvider(["not json", "still not json"])
|
||||
|
||||
plan = asyncio.run(
|
||||
TaskExecutionPlanner(tool_registry=_registry()).plan(
|
||||
task=_task(),
|
||||
user_message="implement workflow",
|
||||
attempt_index=1,
|
||||
provider_bundle=_bundle_with_provider(provider),
|
||||
)
|
||||
)
|
||||
|
||||
assert plan.mode == "single"
|
||||
assert plan.reason == "planner_fallback_single"
|
||||
assert len(provider.calls) == 2
|
||||
|
||||
|
||||
def test_finance_template_adapts_to_task_oriented_read_only_graph() -> None:
|
||||
plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
|
||||
"""
|
||||
{
|
||||
"mode": "team",
|
||||
"strategy": "dag",
|
||||
"nodes": [
|
||||
{
|
||||
"node_id": "collect_official_sources",
|
||||
"task": "Collect MGM and Galaxy official financial disclosures",
|
||||
"requested_tools": ["web_search", "web_fetch"],
|
||||
"required_evidence": ["tool_result", "url"]
|
||||
},
|
||||
{
|
||||
"node_id": "extract_financial_metrics",
|
||||
"task": "Extract comparable financial metrics from collected sources",
|
||||
"depends_on": ["collect_official_sources"],
|
||||
"requested_tools": ["web_fetch"],
|
||||
"required_evidence": ["output"]
|
||||
},
|
||||
{
|
||||
"node_id": "validate_metrics",
|
||||
"task": "Validate metric units, periods, and source consistency",
|
||||
"depends_on": ["extract_financial_metrics"],
|
||||
"required_evidence": ["output"]
|
||||
},
|
||||
{
|
||||
"node_id": "generate_chart_report",
|
||||
"task": "Generate a Markdown comparison table and chart-ready data without claiming an image or file artifact",
|
||||
"depends_on": ["validate_metrics"],
|
||||
"requested_tools": [],
|
||||
"required_evidence": ["output"]
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
assert plan.is_team
|
||||
assert plan.graph is not None
|
||||
assert [node.node_id for node in plan.graph.nodes] == [
|
||||
"collect_official_sources",
|
||||
"extract_financial_metrics",
|
||||
"validate_metrics",
|
||||
"generate_chart_report",
|
||||
]
|
||||
assert all(node.agent.role == "" for node in plan.graph.nodes)
|
||||
assert not {"researcher", "writer", "reviewer", "analyst"}.intersection(
|
||||
node.node_id for node in plan.graph.nodes
|
||||
)
|
||||
assert plan.graph.nodes[0].allowed_tool_names == ["web_search", "web_fetch"]
|
||||
assert plan.graph.nodes[-1].allowed_tool_names == []
|
||||
report_task = plan.graph.nodes[-1].task.lower()
|
||||
assert "markdown" in report_task
|
||||
assert "without claiming an image or file artifact" in report_task
|
||||
def test_planner_no_longer_exposes_json_to_team_graph_parser() -> None:
|
||||
assert not hasattr(TaskExecutionPlanner(), "from_json")
|
||||
|
||||
@ -1,233 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode, NodeRunResult, TeamRunResult
|
||||
from beaver.engine import AgentRunResult
|
||||
from beaver.tasks import TaskExecutionPlan, TaskRecord
|
||||
from beaver.tasks.attempt_orchestrator import TaskAttemptOrchestrator
|
||||
|
||||
|
||||
def _plan(*, optional_second: bool = False) -> TaskExecutionPlan:
|
||||
return TaskExecutionPlan(
|
||||
mode="team",
|
||||
reason="test team",
|
||||
graph=ExecutionGraph(
|
||||
strategy="sequence",
|
||||
nodes=[
|
||||
ExecutionNode("collect", "Collect", AgentDescriptor(name="collect")),
|
||||
ExecutionNode(
|
||||
"report",
|
||||
"Report",
|
||||
AgentDescriptor(name="report"),
|
||||
required_for_completion=not optional_second,
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _team_result(*results: NodeRunResult) -> TeamRunResult:
|
||||
return TeamRunResult(
|
||||
success=all(result.success for result in results),
|
||||
summary="team summary",
|
||||
node_results=list(results),
|
||||
)
|
||||
|
||||
|
||||
def _result(node_id: str, status: str, *, gaps: list[str] | None = None) -> NodeRunResult:
|
||||
return NodeRunResult(
|
||||
node_id=node_id,
|
||||
success=status == "succeeded",
|
||||
output_text=f"{node_id} output",
|
||||
finish_reason="blocked" if status == "blocked" else "stop",
|
||||
error=None if status == "succeeded" else f"{status} node",
|
||||
completion_status=status,
|
||||
evidence_gaps=list(gaps or []),
|
||||
)
|
||||
|
||||
|
||||
def test_required_partial_node_marks_synthesis_incomplete() -> None:
|
||||
context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
|
||||
_plan(),
|
||||
_team_result(
|
||||
_result("collect", "partial", gaps=["missing required evidence: url"]),
|
||||
_result("report", "succeeded"),
|
||||
),
|
||||
)
|
||||
|
||||
assert metadata["task_outcome"] == "incomplete"
|
||||
assert metadata["incomplete_node_ids"] == ["collect"]
|
||||
assert metadata["evidence_gaps"] == {"collect": ["missing required evidence: url"]}
|
||||
assert "Task outcome: incomplete" in context
|
||||
assert "missing required evidence: url" in context
|
||||
assert prefix.startswith("任务未完成:")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("status", ["failed", "blocked"])
|
||||
def test_required_failed_or_blocked_node_marks_synthesis_incomplete(status: str) -> None:
|
||||
_, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
|
||||
_plan(),
|
||||
_team_result(_result("collect", status), _result("report", "succeeded")),
|
||||
)
|
||||
|
||||
assert metadata["task_outcome"] == "incomplete"
|
||||
assert metadata["incomplete_node_ids"] == ["collect"]
|
||||
assert metadata["node_statuses"]["collect"] == status
|
||||
assert prefix
|
||||
|
||||
|
||||
def test_optional_failed_node_does_not_force_incomplete() -> None:
|
||||
context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
|
||||
_plan(optional_second=True),
|
||||
_team_result(_result("collect", "succeeded"), _result("report", "failed")),
|
||||
)
|
||||
|
||||
assert metadata["task_outcome"] == "complete"
|
||||
assert metadata["incomplete_node_ids"] == []
|
||||
assert "Task outcome: complete" in context
|
||||
assert prefix == ""
|
||||
|
||||
|
||||
def test_all_required_nodes_succeeded_is_complete() -> None:
|
||||
_, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
|
||||
_plan(),
|
||||
_team_result(_result("collect", "succeeded"), _result("report", "succeeded")),
|
||||
)
|
||||
|
||||
assert metadata["task_outcome"] == "complete"
|
||||
assert prefix == ""
|
||||
|
||||
|
||||
def test_single_plan_outcome_does_not_add_prefix() -> None:
|
||||
context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(
|
||||
TaskExecutionPlan.single("single"),
|
||||
None,
|
||||
)
|
||||
|
||||
assert metadata["task_outcome"] == "single"
|
||||
assert "Task outcome: single" in context
|
||||
assert prefix == ""
|
||||
|
||||
|
||||
class FakeTaskService:
|
||||
def start_run(self, task_id: str, **_: Any) -> None:
|
||||
return None
|
||||
|
||||
def append_run(self, task_id: str, run_id: str, **_: Any) -> TaskRecord:
|
||||
return self.task
|
||||
|
||||
|
||||
class FakeSessionManager:
|
||||
def __init__(self) -> None:
|
||||
self.events: list[dict[str, Any]] = []
|
||||
|
||||
def append_message(self, session_id: str, **kwargs: Any) -> None:
|
||||
self.events.append({"session_id": session_id, **kwargs})
|
||||
|
||||
def update_latest_assistant_event_payload(self, *args: Any, **kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def get_run_event_records(self, session_id: str, run_id: str) -> list[Any]:
|
||||
return []
|
||||
|
||||
|
||||
class FixedPlanner:
|
||||
def __init__(self, plan: TaskExecutionPlan) -> None:
|
||||
self.fixed_plan = plan
|
||||
|
||||
async def plan(self, **_: Any) -> TaskExecutionPlan:
|
||||
return self.fixed_plan
|
||||
|
||||
|
||||
def _task() -> TaskRecord:
|
||||
return TaskRecord(
|
||||
task_id="task-1",
|
||||
session_id="session-1",
|
||||
description="finance comparison",
|
||||
goal="finance comparison",
|
||||
constraints=[],
|
||||
priority=0,
|
||||
status="open",
|
||||
creator="test",
|
||||
created_at="now",
|
||||
updated_at="now",
|
||||
)
|
||||
|
||||
|
||||
def test_incomplete_team_still_runs_tool_free_synthesis_and_prefixes_output() -> None:
|
||||
plan = _plan()
|
||||
team_result = _team_result(
|
||||
_result("collect", "partial", gaps=["missing required evidence: url"]),
|
||||
_result("report", "succeeded"),
|
||||
)
|
||||
task = _task()
|
||||
task_service = FakeTaskService()
|
||||
task_service.task = task
|
||||
session_manager = FakeSessionManager()
|
||||
loaded = SimpleNamespace(
|
||||
task_service=task_service,
|
||||
task_execution_planner=FixedPlanner(plan),
|
||||
session_manager=session_manager,
|
||||
run_memory_store=None,
|
||||
)
|
||||
orchestrator = TaskAttemptOrchestrator(
|
||||
loaded=loaded,
|
||||
create_loop=lambda: None,
|
||||
make_provider_bundle_for_task=lambda *_: None,
|
||||
)
|
||||
|
||||
async def fake_run_team(*args: Any, **kwargs: Any) -> tuple[TeamRunResult, None]:
|
||||
return team_result, None
|
||||
|
||||
runner_calls: list[dict[str, Any]] = []
|
||||
|
||||
async def runner(message: str, **kwargs: Any) -> AgentRunResult:
|
||||
runner_calls.append(kwargs)
|
||||
return AgentRunResult(
|
||||
session_id="session-1",
|
||||
run_id="main-run",
|
||||
output_text="Available financial comparison.",
|
||||
finish_reason="stop",
|
||||
tool_iterations=0,
|
||||
)
|
||||
|
||||
orchestrator._run_team_for_task = fake_run_team # type: ignore[method-assign]
|
||||
result = asyncio.run(
|
||||
orchestrator.run(
|
||||
message="compare finance",
|
||||
runner=runner,
|
||||
kwargs={
|
||||
"session_id": "session-1",
|
||||
"provider_bundle": SimpleNamespace(),
|
||||
"include_skill_assembly": False,
|
||||
},
|
||||
task=task,
|
||||
)
|
||||
)
|
||||
|
||||
assert len(runner_calls) == 1
|
||||
assert runner_calls[0]["include_tools"] is False
|
||||
assert runner_calls[0]["max_tool_iterations"] == 0
|
||||
assert "Task outcome: incomplete" in runner_calls[0]["execution_context"]
|
||||
assert result.output_text.startswith("任务未完成:")
|
||||
synthesis_event = [event for event in session_manager.events if event.get("event_type") == "task_synthesis_completed"][0]
|
||||
assert synthesis_event["event_payload"]["task_outcome"] == "incomplete"
|
||||
assert synthesis_event["event_payload"]["incomplete_node_ids"] == ["collect"]
|
||||
assert synthesis_event["event_payload"]["node_statuses"] == {
|
||||
"collect": "partial",
|
||||
"report": "succeeded",
|
||||
}
|
||||
assert synthesis_event["event_payload"]["evidence_gaps"] == {
|
||||
"collect": ["missing required evidence: url"]
|
||||
}
|
||||
|
||||
|
||||
def test_incomplete_notice_is_not_prefixed_twice() -> None:
|
||||
text = "任务未完成:缺少官方来源。"
|
||||
|
||||
assert TaskAttemptOrchestrator._apply_incomplete_prefix(text, "任务未完成:部分步骤缺少证据。\n\n") == text
|
||||
214
app-instance/backend/tests/unit/test_team_workflow_graph.py
Normal file
214
app-instance/backend/tests/unit/test_team_workflow_graph.py
Normal file
@ -0,0 +1,214 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from beaver.team_workflows.agent_rearrange import build_graph as build_rearrange_graph
|
||||
from beaver.team_workflows.concurrent import build_graph as build_concurrent_graph
|
||||
from beaver.team_workflows.graph import build_graph as build_explicit_graph
|
||||
from beaver.team_workflows.mixture_of_agents import build_graph as build_moa_graph
|
||||
from beaver.team_workflows.sequential import build_graph as build_sequential_graph
|
||||
|
||||
|
||||
def _deps(graph) -> dict[str, list[str]]:
|
||||
return {node.node_id: list(node.depends_on) for node in graph.nodes}
|
||||
|
||||
|
||||
def test_sequential_workflow_builds_chain_and_preserves_agent_fields() -> None:
|
||||
graph = build_sequential_graph(
|
||||
task="finance report",
|
||||
agents=[
|
||||
{
|
||||
"name": "source_collector",
|
||||
"instruction": "Collect official sources",
|
||||
"skill_query": "official filings",
|
||||
"allowed_tool_names": ["web_search", "web_fetch"],
|
||||
"required_evidence": ["url"],
|
||||
"validation_rules": ["Prefer official sources."],
|
||||
"block_downstream_on_partial": True,
|
||||
},
|
||||
{"name": "metric_extractor", "instruction": "Extract metrics"},
|
||||
{"name": "reporter", "instruction": "Write report"},
|
||||
],
|
||||
)
|
||||
|
||||
assert graph.strategy == "sequence"
|
||||
assert [node.node_id for node in graph.nodes] == [
|
||||
"source_collector",
|
||||
"metric_extractor",
|
||||
"reporter",
|
||||
]
|
||||
assert _deps(graph) == {
|
||||
"source_collector": [],
|
||||
"metric_extractor": ["source_collector"],
|
||||
"reporter": ["metric_extractor"],
|
||||
}
|
||||
first = graph.nodes[0]
|
||||
assert first.task == "Collect official sources"
|
||||
assert first.agent.role == ""
|
||||
assert first.agent.metadata["sub_agent_kind"] == "generic_skill_worker"
|
||||
assert first.agent.metadata["workflow_tool"] == "SequentialWorkflow"
|
||||
assert first.agent.metadata["workflow_agent_name"] == "source_collector"
|
||||
assert first.agent.metadata["skill_query"] == "official filings"
|
||||
assert first.allowed_tool_names == ["web_search", "web_fetch"]
|
||||
assert first.required_evidence == ["url"]
|
||||
assert first.validation_rules == ["Prefer official sources."]
|
||||
assert first.block_downstream_on_partial is True
|
||||
|
||||
|
||||
def test_concurrent_workflow_builds_independent_nodes() -> None:
|
||||
graph = build_concurrent_graph(
|
||||
task="research topic",
|
||||
agents=[
|
||||
{"name": "official_sources", "instruction": "Check official sources"},
|
||||
{"name": "media_sources", "instruction": "Check media sources"},
|
||||
{"name": "data_sources", "instruction": "Check data sources"},
|
||||
],
|
||||
)
|
||||
|
||||
assert graph.strategy == "parallel"
|
||||
assert _deps(graph) == {
|
||||
"official_sources": [],
|
||||
"media_sources": [],
|
||||
"data_sources": [],
|
||||
}
|
||||
|
||||
|
||||
def test_mixture_of_agents_builds_experts_to_aggregator() -> None:
|
||||
graph = build_moa_graph(
|
||||
task="analyze match",
|
||||
agents=[
|
||||
{"name": "tactics", "instruction": "Analyze tactics"},
|
||||
{"name": "players", "instruction": "Analyze players"},
|
||||
{"name": "media", "instruction": "Analyze media"},
|
||||
],
|
||||
aggregator={"name": "synthesizer", "instruction": "Synthesize report"},
|
||||
)
|
||||
|
||||
assert graph.strategy == "dag"
|
||||
assert _deps(graph) == {
|
||||
"tactics": [],
|
||||
"players": [],
|
||||
"media": [],
|
||||
"synthesizer": ["tactics", "players", "media"],
|
||||
}
|
||||
assert graph.nodes[-1].agent.metadata["workflow_tool"] == "MixtureOfAgents"
|
||||
|
||||
|
||||
def test_agent_rearrange_parses_flow_into_edges() -> None:
|
||||
graph = build_rearrange_graph(
|
||||
task="collect then analyze then synthesize",
|
||||
agents=[
|
||||
{"name": "collector", "instruction": "Collect facts"},
|
||||
{"name": "tactics", "instruction": "Analyze tactics"},
|
||||
{"name": "players", "instruction": "Analyze players"},
|
||||
{"name": "media", "instruction": "Analyze media"},
|
||||
{"name": "synthesizer", "instruction": "Synthesize report"},
|
||||
],
|
||||
flow="collector -> tactics, players, media -> synthesizer",
|
||||
)
|
||||
|
||||
assert graph.strategy == "dag"
|
||||
assert _deps(graph) == {
|
||||
"collector": [],
|
||||
"tactics": ["collector"],
|
||||
"players": ["collector"],
|
||||
"media": ["collector"],
|
||||
"synthesizer": ["tactics", "players", "media"],
|
||||
}
|
||||
|
||||
|
||||
def test_agent_rearrange_rejects_unknown_agent_in_flow() -> None:
|
||||
with pytest.raises(ValueError, match="unknown agent"):
|
||||
build_rearrange_graph(
|
||||
task="bad flow",
|
||||
agents=[{"name": "collector", "instruction": "Collect"}],
|
||||
flow="collector -> missing",
|
||||
)
|
||||
|
||||
|
||||
def test_graph_workflow_requires_edges_and_output_agent() -> None:
|
||||
with pytest.raises(ValueError, match="edges"):
|
||||
build_explicit_graph(
|
||||
task="bad graph",
|
||||
agents=[{"name": "collector", "instruction": "Collect"}],
|
||||
edges=[],
|
||||
output_agent="collector",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="output_agent"):
|
||||
build_explicit_graph(
|
||||
task="bad graph",
|
||||
agents=[
|
||||
{"name": "collector", "instruction": "Collect"},
|
||||
{"name": "reporter", "instruction": "Report"},
|
||||
],
|
||||
edges=[["collector", "reporter"]],
|
||||
output_agent="missing",
|
||||
)
|
||||
|
||||
|
||||
def test_graph_workflow_builds_explicit_dag() -> None:
|
||||
graph = build_explicit_graph(
|
||||
task="match analysis",
|
||||
agents=[
|
||||
{"name": "collector", "instruction": "Collect facts"},
|
||||
{"name": "tactics", "instruction": "Analyze tactics"},
|
||||
{"name": "players", "instruction": "Analyze players"},
|
||||
{"name": "media", "instruction": "Analyze media"},
|
||||
{"name": "synthesizer", "instruction": "Synthesize report"},
|
||||
],
|
||||
edges=[
|
||||
["collector", "tactics"],
|
||||
["collector", "players"],
|
||||
["collector", "media"],
|
||||
["tactics", "synthesizer"],
|
||||
["players", "synthesizer"],
|
||||
["media", "synthesizer"],
|
||||
],
|
||||
output_agent="synthesizer",
|
||||
)
|
||||
|
||||
assert graph.strategy == "dag"
|
||||
assert _deps(graph) == {
|
||||
"collector": [],
|
||||
"tactics": ["collector"],
|
||||
"players": ["collector"],
|
||||
"media": ["collector"],
|
||||
"synthesizer": ["tactics", "players", "media"],
|
||||
}
|
||||
|
||||
|
||||
def test_graph_workflow_rejects_unknown_cycle_and_disconnected_agents() -> None:
|
||||
with pytest.raises(ValueError, match="unknown agent"):
|
||||
build_explicit_graph(
|
||||
task="bad graph",
|
||||
agents=[
|
||||
{"name": "collector", "instruction": "Collect"},
|
||||
{"name": "reporter", "instruction": "Report"},
|
||||
],
|
||||
edges=[["collector", "missing"]],
|
||||
output_agent="reporter",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="cyclic"):
|
||||
build_explicit_graph(
|
||||
task="bad graph",
|
||||
agents=[
|
||||
{"name": "a", "instruction": "A"},
|
||||
{"name": "b", "instruction": "B"},
|
||||
],
|
||||
edges=[["a", "b"], ["b", "a"]],
|
||||
output_agent="b",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="disconnected"):
|
||||
build_explicit_graph(
|
||||
task="bad graph",
|
||||
agents=[
|
||||
{"name": "collector", "instruction": "Collect"},
|
||||
{"name": "reporter", "instruction": "Report"},
|
||||
{"name": "orphan", "instruction": "Unused"},
|
||||
],
|
||||
edges=[["collector", "reporter"]],
|
||||
output_agent="reporter",
|
||||
)
|
||||
@ -0,0 +1,182 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from beaver.coordinator import NodeRunResult, TeamRunResult
|
||||
from beaver.tools import ToolContext
|
||||
from beaver.tools.mcp.wrapper import MCPToolWrapper
|
||||
|
||||
|
||||
def _tool_def(name: str) -> SimpleNamespace:
|
||||
return SimpleNamespace(
|
||||
name=name,
|
||||
description=name,
|
||||
inputSchema={"type": "object", "properties": {}},
|
||||
)
|
||||
|
||||
|
||||
def test_team_workflow_mcp_wrapper_bridges_to_current_team_runtime() -> None:
|
||||
remote_calls: list[tuple[str, dict[str, Any]]] = []
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
async def call_tool(name: str, arguments: dict[str, Any]) -> Any:
|
||||
remote_calls.append((name, arguments))
|
||||
raise AssertionError("team workflow bridge must not call MCP subprocess")
|
||||
|
||||
async def runner(graph, **kwargs: Any) -> TeamRunResult:
|
||||
captured["graph"] = graph
|
||||
captured["kwargs"] = kwargs
|
||||
return TeamRunResult(
|
||||
success=True,
|
||||
summary="team done",
|
||||
node_results=[
|
||||
NodeRunResult("collect", True, "collected", run_id="run-collect"),
|
||||
NodeRunResult("report", True, "reported", run_id="run-report"),
|
||||
],
|
||||
run_ids=["run-collect", "run-report"],
|
||||
session_ids=["session:collect", "session:report"],
|
||||
task_id=kwargs["parent_task_id"],
|
||||
)
|
||||
|
||||
wrapper = MCPToolWrapper(
|
||||
"local_team_workflow_mcp",
|
||||
_tool_def("SequentialWorkflow"),
|
||||
call_tool,
|
||||
category="team_workflow",
|
||||
kind="local",
|
||||
)
|
||||
context = ToolContext(
|
||||
session_id="session-1",
|
||||
services={
|
||||
"task_id": "task-1",
|
||||
"run_id": "run-root",
|
||||
"agent_team_runner": runner,
|
||||
},
|
||||
metadata={"source": "websocket"},
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
wrapper.invoke(
|
||||
{
|
||||
"task": "finance report",
|
||||
"agents": [
|
||||
{"name": "collect", "instruction": "Collect official sources"},
|
||||
{"name": "report", "instruction": "Write report"},
|
||||
],
|
||||
},
|
||||
context,
|
||||
)
|
||||
)
|
||||
|
||||
payload = json.loads(result.content)
|
||||
graph = captured["graph"]
|
||||
|
||||
assert remote_calls == []
|
||||
assert result.success is True
|
||||
assert result.tool_name == "mcp_local_team_workflow_mcp_SequentialWorkflow"
|
||||
assert payload["success"] is True
|
||||
assert payload["workflow"] == "SequentialWorkflow"
|
||||
assert payload["summary"] == "team done"
|
||||
assert payload["run_ids"] == ["run-collect", "run-report"]
|
||||
assert captured["kwargs"]["parent_task_id"] == "task-1"
|
||||
assert captured["kwargs"]["parent_session_id"] == "session-1"
|
||||
assert captured["kwargs"]["parent_run_id"] == "run-root"
|
||||
assert graph.strategy == "sequence"
|
||||
assert {node.node_id: list(node.depends_on) for node in graph.nodes} == {
|
||||
"collect": [],
|
||||
"report": ["collect"],
|
||||
}
|
||||
|
||||
|
||||
def test_ordinary_mcp_wrapper_still_calls_remote_tool() -> None:
|
||||
remote_calls: list[tuple[str, dict[str, Any]]] = []
|
||||
|
||||
async def call_tool(name: str, arguments: dict[str, Any]) -> Any:
|
||||
remote_calls.append((name, arguments))
|
||||
return SimpleNamespace(content=[], structuredContent={"ok": True})
|
||||
|
||||
wrapper = MCPToolWrapper(
|
||||
"local_web_mcp",
|
||||
_tool_def("web_search"),
|
||||
call_tool,
|
||||
category="web",
|
||||
kind="local",
|
||||
)
|
||||
|
||||
result = asyncio.run(wrapper.invoke({"query": "beaver"}, ToolContext()))
|
||||
|
||||
assert result.success is True
|
||||
assert remote_calls == [("web_search", {"query": "beaver"})]
|
||||
|
||||
|
||||
def test_team_workflow_bridge_uses_team_service_without_injected_runner(monkeypatch) -> None:
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
class FakeTeamService:
|
||||
def __init__(self, loop: Any) -> None:
|
||||
captured["loop"] = loop
|
||||
|
||||
async def run_team(self, graph, **kwargs: Any) -> TeamRunResult:
|
||||
captured["graph"] = graph
|
||||
captured["kwargs"] = kwargs
|
||||
return TeamRunResult(
|
||||
success=True,
|
||||
summary="service team done",
|
||||
node_results=[NodeRunResult("only", True, "ok", run_id="run-only")],
|
||||
run_ids=["run-only"],
|
||||
session_ids=["session:only"],
|
||||
task_id=kwargs["parent_task_id"],
|
||||
)
|
||||
|
||||
class FakeAgentLoop:
|
||||
def __init__(self, *, profile: Any, loader: Any) -> None:
|
||||
self.profile = profile
|
||||
self.loader = loader
|
||||
self.loaded = None
|
||||
|
||||
monkeypatch.setattr("beaver.engine.AgentLoop", FakeAgentLoop)
|
||||
monkeypatch.setattr("beaver.services.team_service.TeamService", FakeTeamService)
|
||||
|
||||
wrapper = MCPToolWrapper(
|
||||
"local_team_workflow_mcp",
|
||||
_tool_def("ConcurrentWorkflow"),
|
||||
call_tool=lambda _name, _arguments: None, # type: ignore[arg-type]
|
||||
category="team_workflow",
|
||||
kind="local",
|
||||
)
|
||||
parent_loop = SimpleNamespace(profile="profile", loader="loader")
|
||||
context = ToolContext(
|
||||
session_id="session-1",
|
||||
services={
|
||||
"task_id": "task-1",
|
||||
"run_id": "run-root",
|
||||
"agent_loop": parent_loop,
|
||||
"loaded": SimpleNamespace(name="loaded"),
|
||||
},
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
wrapper.invoke(
|
||||
{
|
||||
"task": "parallel work",
|
||||
"agents": [{"name": "only", "instruction": "Do work"}],
|
||||
},
|
||||
context,
|
||||
)
|
||||
)
|
||||
|
||||
payload = json.loads(result.content)
|
||||
|
||||
assert result.success is True
|
||||
assert payload["summary"] == "service team done"
|
||||
assert captured["loop"].profile == "profile"
|
||||
assert captured["loop"].loader == "loader"
|
||||
assert captured["loop"].loaded.name == "loaded"
|
||||
assert captured["kwargs"]["parent_task_id"] == "task-1"
|
||||
assert captured["kwargs"]["parent_session_id"] == "session-1"
|
||||
assert captured["kwargs"]["parent_run_id"] == "run-root"
|
||||
assert captured["kwargs"]["allow_candidate_generation"] is False
|
||||
assert captured["graph"].strategy == "parallel"
|
||||
Reference in New Issue
Block a user