feat(coordinator): 添加团队节点默认最大工具迭代次数配置

添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数，并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。 fix(runtime): 修复团队节点运行成功判断逻辑更新运行成功判断条件，将 finish_reason 为 "max_tool_iterations_finalized" 的情况视为运行失败，并添加对原始工具调用输出的检测，避免将其误判为成功完成。 feat(mcp): 添加团队工作流MCP工具类别支持增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能，为团队工作流提供本地工具支持。 refactor(engine): 调整AgentLoop最大工具迭代次数设置将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100，同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。 perf(mcp): 优化MCP连接管理避免重复连接添加 mcp_connected 标志来跟踪MCP连接状态，确保 connect_all 只执行一次，提高性能并避免不必要的重复连接。 refactor(skills): 移除技能团队模板相关功能移除与技能团队模板相关的代码，包括解析、存储和处理逻辑，简化技能记录结构和加载流程。 feat(process): 增强会话过程投影器功能添加技能激活快照事件处理，改进团队运行完成消息显示，并增强技能激活事件的时间戳记录功能。 refactor(tasks): 简化任务尝试编排器团队执行逻辑移除团队执行相关代码，将所有任务统一按单步执行处理，简化任务编排器的复杂度并提升执行效率。 fix(evidence): 修复节点证据评估中需求验证逻辑更新节点证据评估逻辑，跳过自然语言证据需求的确定性验证，只执行机器可读的需求验证，避免因自然语言需求导致的节点失败。
2026-06-26 16:36:29 +08:00
parent 53b13e8eac
commit 520a21a027
360 changed files with 13271 additions and 1848 deletions
--- a/app-instance/backend/beaver/tasks/planner.py
+++ b/app-instance/backend/beaver/tasks/planner.py
@ -1,39 +1,27 @@
-"""Internal Task execution planner for single-agent vs team execution."""
+"""Internal Task execution planner for single-agent task attempts.
+
+Team execution is now started explicitly through local Team Workflow MCP tools.
+This planner only records why the normal Task attempt should continue as a
+single root-agent run.
+"""

 from __future__ import annotations

-import asyncio
-import json
 import os
 from dataclasses import dataclass, field
 from typing import Any, Literal

-from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
+from beaver.coordinator.models import ExecutionGraph
 from beaver.engine.context import SkillContext
 from beaver.engine.providers import ProviderBundle
-from beaver.tools.registry import ToolRegistry

 from .models import TaskRecord
-from .skill_resolver import SkillResolutionReport, TaskSkillResolver
+from .skill_resolver import SkillResolutionReport


 TaskExecutionMode = Literal["single", "team"]


-# Temporary name-based denylist until high-risk tool approval is implemented.
-# Keep this policy centralized so planner behavior cannot drift by call site.
-HIGH_RISK_PLANNER_TOOL_NAMES = frozenset(
-    {
-        "delete_file",
-        "execute_command",
-        "external_send",
-        "send_email",
-        "terminal",
-        "write_file",
-    }
-)
-
-
 def _agent_team_enabled() -> bool:
    return os.getenv("BEAVER_AGENT_TEAM_ENABLED", "1").strip().lower() not in {"0", "false", "no", "off"}

@ -96,37 +84,7 @@ class TaskExecutionPlan:


 class TaskExecutionPlanner:
-    """Plan whether a Task attempt should run through a team first."""
-
-    _MAX_NODES = 6
-    _MAX_DEPTH = 4
-    _SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"}
-    _ALLOWED_NODE_FIELDS = {
-        "node_id",
-        "task",
-        "use_skill",
-        "skill_query",
-        "depends_on",
-        "input_contract",
-        "output_contract",
-        "requested_tools",
-        "required_evidence",
-        "evidence_contract",
-        "validation_rules",
-        "required_for_completion",
-        "block_downstream_on_partial",
-        "max_tool_iterations",
-        "constraints",
-    }
-
-    def __init__(
-        self,
-        *,
-        task_skill_resolver: TaskSkillResolver | None = None,
-        tool_registry: ToolRegistry | None = None,
-    ) -> None:
-        self.task_skill_resolver = task_skill_resolver
-        self.tool_registry = tool_registry
+    """Return the current Task execution mode for the root AgentLoop."""

    async def plan(
        self,
@ -144,122 +102,7 @@ class TaskExecutionPlanner:
            return TaskExecutionPlan.single("planner_disabled_by_environment")
        if not self._needs_team_planning(task=task, user_message=user_message):
            return TaskExecutionPlan.single("planner_skipped_simple_task")
-
-        provider = None
-        model = None
-        if provider_bundle is not None:
-            provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
-            runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
-            model = getattr(runtime, "model", None)
-        if provider is None:
-            return TaskExecutionPlan.single("planner_provider_unavailable")
-        selected_template, base_adaptation = self._select_team_template(activated_skills or [])
-        try:
-            response = await asyncio.wait_for(
-                provider.chat(
-                    messages=[
-                        {
-                            "role": "system",
-                            "content": (
-                                "You choose whether an internal Beaver Task attempt should run as a single "
-                                "main-agent pass or use a small sub-agent team first. Return only compact JSON."
-                            ),
-                        },
-                        {
-                            "role": "user",
-                            "content": self._prompt(
-                                task=task,
-                                user_message=user_message,
-                                attempt_index=attempt_index,
-                                skill_summaries=skill_summaries or [],
-                                tool_hints=tool_hints or [],
-                                activated_skills=activated_skills or [],
-                                selected_template=selected_template,
-                            ),
-                        },
-                    ],
-                    tools=None,
-                    model=model,
-                    max_tokens=4096,
-                    temperature=0.0,
-                ),
-                timeout=timeout_seconds,
-            )
-            try:
-                plan = self._from_json_or_raise(response.content or "")
-            except Exception as first_error:
-                repair_response = await asyncio.wait_for(
-                    provider.chat(
-                        messages=[
-                            {
-                                "role": "system",
-                                "content": "Repair invalid Beaver task planner JSON. Return only one compact JSON object.",
-                            },
-                            {
-                                "role": "user",
-                                "content": (
-                                    "Repair the invalid planner JSON using the task-only schema from the original "
-                                    f"request. Validation error: {first_error}\nInvalid output:\n{response.content or ''}"
-                                ),
-                            },
-                        ],
-                        tools=None,
-                        model=model,
-                        max_tokens=4096,
-                        temperature=0.0,
-                    ),
-                    timeout=timeout_seconds,
-                )
-                try:
-                    plan = self._from_json_or_raise(repair_response.content or "")
-                except Exception as repair_error:
-                    return TaskExecutionPlan.single(
-                        "planner_fallback_single",
-                        fallback_error=f"initial validation: {first_error}; repair validation: {repair_error}",
-                        planner_adaptation=base_adaptation,
-                    )
-            self._merge_adaptation(plan, base_adaptation)
-            return await self._resolve_plan(
-                plan,
-                task=task,
-                user_message=user_message,
-                attempt_index=attempt_index,
-                provider_bundle=provider_bundle,
-            )
-        except Exception as exc:
-            detail = str(exc)
-            error = f"{type(exc).__name__}: {detail}" if detail else type(exc).__name__
-            return TaskExecutionPlan.single("planner_failed", fallback_error=error)
-
-    async def _resolve_plan(
-        self,
-        plan: TaskExecutionPlan,
-        *,
-        task: TaskRecord,
-        user_message: str,
-        attempt_index: int,
-        provider_bundle: ProviderBundle | None,
-    ) -> TaskExecutionPlan:
-        if not plan.is_team or self.task_skill_resolver is None:
-            return plan
-        if provider_bundle is None:
-            return TaskExecutionPlan.single("planner_fallback_single", fallback_error="task_skill_resolver_provider_unavailable")
-        try:
-            assert plan.graph is not None
-            graph, reports = await self.task_skill_resolver.resolve_graph(
-                plan.graph,
-                task=task,
-                user_message=user_message,
-                attempt_index=attempt_index,
-                provider_bundle=provider_bundle,
-            )
-            graph.validate()
-            plan.graph = graph
-            plan.skill_resolution_report = reports
-            self._merge_skill_resolution_adaptation(plan, reports)
-            return plan
-        except Exception as exc:
-            return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}")
+        return TaskExecutionPlan.single("planner_team_replaced_by_workflow_tools")

    @staticmethod
    def _needs_team_planning(*, task: TaskRecord, user_message: str) -> bool:
@ -306,307 +149,3 @@ class TaskExecutionPlanner:
            "端到端",
        )
        return any(marker in text for marker in complex_markers)
-
-    def from_json(self, text: str) -> TaskExecutionPlan:
-        try:
-            return self._from_json_or_raise(text)
-        except Exception as exc:
-            return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc))
-
-    def _from_json_or_raise(self, text: str) -> TaskExecutionPlan:
-        payload = self._parse_json_object(text)
-        mode = str(payload.get("mode") or "single").strip().lower()
-        reason = str(payload.get("reason") or "")
-        adaptation = self._adaptation_from_payload(payload)
-        if mode != "team":
-            return TaskExecutionPlan.single(
-                reason or "planner_selected_single",
-                planner_adaptation=adaptation,
-            )
-
-        graph = self._graph_from_payload(payload, adaptation=adaptation)
-        graph.validate(max_depth=self._MAX_DEPTH)
-        return TaskExecutionPlan(
-            mode="team",
-            reason=reason or "planner_selected_team",
-            graph=graph,
-            final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""),
-            planner_adaptation=adaptation,
-        )
-
-    def _graph_from_payload(
-        self,
-        payload: dict[str, Any],
-        *,
-        adaptation: dict[str, Any],
-    ) -> ExecutionGraph:
-        strategy = str(payload.get("strategy") or "sequence").strip().lower()
-        if strategy not in self._SUPPORTED_STRATEGIES:
-            raise ValueError(f"Unsupported team strategy: {strategy}")
-        raw_nodes = payload.get("nodes")
-        if not isinstance(raw_nodes, list) or not raw_nodes:
-            raise ValueError("Team plan requires at least one node")
-        if len(raw_nodes) > self._MAX_NODES:
-            raise ValueError(f"Team plan exceeds max node count {self._MAX_NODES}")
-
-        nodes: list[ExecutionNode] = []
-        for index, item in enumerate(raw_nodes, start=1):
-            if not isinstance(item, dict):
-                raise ValueError("Each team node must be an object")
-            unsupported = sorted(set(item) - self._ALLOWED_NODE_FIELDS)
-            if unsupported:
-                raise ValueError(f"Unsupported team node field(s): {', '.join(unsupported)}")
-            node_id = str(item.get("node_id") or f"node_{index}").strip()
-            task = str(item.get("task") or "").strip()
-            if not node_id or not task:
-                raise ValueError("Each team node requires node_id and task")
-            allowed_tool_names = self._resolve_requested_tools(
-                item.get("requested_tools"),
-                warnings=adaptation["warnings"],
-            )
-            use_skill = _optional_str(item.get("use_skill"))
-            skill_query = _optional_str(item.get("skill_query")) or task
-            if use_skill is not None or "skill_query" in item:
-                adaptation.setdefault("node_skill_bindings", []).append(
-                    {
-                        "node_id": node_id,
-                        "use_skill": use_skill,
-                        "skill_query": skill_query,
-                    }
-                )
-            nodes.append(
-                ExecutionNode(
-                    node_id=node_id,
-                    task=task,
-                    agent=AgentDescriptor(
-                        name=node_id,
-                        role="",
-                        system_prompt="",
-                        metadata={
-                            "use_skill": use_skill,
-                            "skill_query": skill_query,
-                            "required_capabilities": [],
-                            "requested_tags": [],
-                            "sub_agent_kind": "generic_skill_worker",
-                        },
-                    ),
-                    depends_on=[str(dep) for dep in item.get("depends_on") or []],
-                    constraints=[str(value) for value in item.get("constraints") or []],
-                    input_contract=_dict_value(item.get("input_contract")),
-                    output_contract=_dict_value(item.get("output_contract")),
-                    allowed_tool_names=allowed_tool_names,
-                    required_evidence=_string_list(item.get("required_evidence")),
-                    evidence_contract=_dict_value(item.get("evidence_contract")),
-                    validation_rules=_string_list(item.get("validation_rules")),
-                    required_for_completion=bool(item.get("required_for_completion", True)),
-                    block_downstream_on_partial=bool(item.get("block_downstream_on_partial", False)),
-                    max_tool_iterations=_optional_int(item.get("max_tool_iterations")),
-                )
-            )
-        return ExecutionGraph(strategy=strategy, nodes=nodes)  # type: ignore[arg-type]
-
-    def _resolve_requested_tools(self, value: Any, *, warnings: list[str]) -> list[str] | None:
-        if value is None:
-            return None
-        result: list[str] = []
-        for name in _string_list(value):
-            if name.lower() in HIGH_RISK_PLANNER_TOOL_NAMES:
-                _append_unique(warnings, f"requires_high_risk_review: {name}")
-                continue
-            if self.tool_registry is None or self.tool_registry.get(name) is None:
-                _append_unique(warnings, f"unknown tool removed: {name}")
-                continue
-            result.append(name)
-        return result
-
-    @staticmethod
-    def _adaptation_from_payload(payload: dict[str, Any]) -> dict[str, Any]:
-        raw = payload.get("adaptation")
-        adaptation = dict(raw) if isinstance(raw, dict) else {}
-        adaptation["warnings"] = _string_list(adaptation.get("warnings"))
-        return adaptation
-
-    @staticmethod
-    def _select_team_template(
-        activated_skills: list[SkillContext],
-    ) -> tuple[SkillContext | None, dict[str, Any]]:
-        candidates = [
-            skill
-            for skill in activated_skills
-            if isinstance(skill.team_template, dict) and isinstance(skill.team_template.get("nodes"), list)
-        ]
-        selected = candidates[0] if candidates else None
-        warnings: list[str] = []
-        for skill in activated_skills:
-            for warning in skill.team_template_warnings:
-                _append_unique(warnings, f"{skill.name}: {warning}")
-        return selected, {
-            "template_used": False,
-            "selected_template": selected.name if selected else None,
-            "selection_reason": (
-                "first activated skill with a valid team template"
-                if selected
-                else "no activated skill has a valid team template"
-            ),
-            "ignored_templates": [skill.name for skill in candidates[1:]],
-            "warnings": warnings,
-        }
-
-    @staticmethod
-    def _merge_adaptation(plan: TaskExecutionPlan, base: dict[str, Any]) -> None:
-        payload = dict(plan.planner_adaptation)
-        warnings: list[str] = []
-        for warning in [*base.get("warnings", []), *payload.get("warnings", [])]:
-            _append_unique(warnings, str(warning))
-        merged = {
-            "template_used": bool(payload.get("template_used", False)),
-            "selected_template": base.get("selected_template"),
-            "selection_reason": base.get("selection_reason"),
-            "ignored_templates": list(base.get("ignored_templates", [])),
-            "warnings": warnings,
-        }
-        if isinstance(payload.get("node_skill_bindings"), list):
-            merged["node_skill_bindings"] = [dict(item) for item in payload["node_skill_bindings"] if isinstance(item, dict)]
-        plan.planner_adaptation = merged
-
-    @staticmethod
-    def _merge_skill_resolution_adaptation(
-        plan: TaskExecutionPlan,
-        reports: list[SkillResolutionReport],
-    ) -> None:
-        warnings = plan.planner_adaptation.setdefault("warnings", [])
-        bindings = plan.planner_adaptation.get("node_skill_bindings")
-        binding_by_node = {
-            str(item.get("node_id")): item
-            for item in bindings or []
-            if isinstance(item, dict)
-        }
-        for report in reports:
-            for warning in report.warnings:
-                _append_unique(warnings, warning)
-            binding = binding_by_node.get(report.node_id)
-            if binding is not None and report.requested_skill_name and not report.exact_binding_used:
-                binding["fallback_reason"] = f"use_skill unresolved; {report.reason}"
-
-    @staticmethod
-    def _prompt(
-        *,
-        task: TaskRecord,
-        user_message: str,
-        attempt_index: int,
-        skill_summaries: list[str] | None = None,
-        tool_hints: list[str] | None = None,
-        activated_skills: list[SkillContext] | None = None,
-        selected_template: SkillContext | None = None,
-    ) -> str:
-        history_note = ""
-        if task.feedback:
-            history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
-        skill_note = ""
-        if skill_summaries:
-            skill_note = "\nActivated skill summaries:\n" + "\n".join(f"- {item}" for item in skill_summaries)
-        guidance_note = ""
-        if activated_skills:
-            guidance_note = "\nActivated Skill guidance:\n" + "\n".join(
-                f"[{skill.name}]\n{skill.content}" for skill in activated_skills
-            )
-        template_note = ""
-        if selected_template is not None:
-            template_note = "\nPrimary Skill team template:\n" + json.dumps(
-                {
-                    "skill_name": selected_template.name,
-                    "skill_version": selected_template.version,
-                    "template": selected_template.team_template,
-                },
-                ensure_ascii=False,
-                indent=2,
-            )
-        tool_note = ""
-        if tool_hints:
-            tool_note = "\nActivated skill tool hints:\n" + "\n".join(f"- {item}" for item in tool_hints)
-        return (
-            "Decide execution mode for this internal Task attempt.\n"
-            "Use mode=team only when independent research, review, implementation slices, or staged checks "
-            "would materially improve the result. Otherwise use mode=single.\n\n"
-            "JSON schema:\n"
-            "{\n"
-            '  "mode": "single" | "team",\n'
-            '  "reason": "short reason",\n'
-            '  "strategy": "sequence" | "parallel" | "dag",\n'
-            '  "nodes": [{"node_id": "collect", "task": "...", "use_skill": "optional exact skill", '
-            '"skill_query": "optional dynamic skill query", "depends_on": [], '
-            '"input_contract": {}, "output_contract": {}, "requested_tools": [], '
-            '"required_evidence": [], "evidence_contract": {}, "validation_rules": [], '
-            '"required_for_completion": true, "block_downstream_on_partial": false, '
-            '"max_tool_iterations": 3, "constraints": []}],\n'
-            '  "adaptation": {"template_used": true, "warnings": []},\n'
-            '  "final_synthesis_instruction": "how the main agent should synthesize team output"\n'
-            "}\n\n"
-            "Node definitions are task-only. Never output agent or role fields. Use at most one primary "
-            "Skill template; treat all other activated Skills as guidance.\n\n"
-            f"Task goal:\n{task.goal}\n\n"
-            f"Current user request:\n{user_message}\n\n"
-            f"Attempt index: {attempt_index}\n"
-            f"{skill_note}"
-            f"{guidance_note}"
-            f"{template_note}"
-            f"{tool_note}"
-            f"{history_note}"
-        )
-
-    @staticmethod
-    def _parse_json_object(text: str) -> dict[str, Any]:
-        cleaned = text.strip()
-        if cleaned.startswith("```"):
-            cleaned = cleaned.strip("`")
-            if cleaned.lower().startswith("json"):
-                cleaned = cleaned[4:].strip()
-        start = cleaned.find("{")
-        end = cleaned.rfind("}")
-        if start >= 0 and end >= start:
-            cleaned = cleaned[start : end + 1]
-        payload = json.loads(cleaned)
-        if not isinstance(payload, dict):
-            raise ValueError("planner response must be a JSON object")
-        return payload
-
-
-def _optional_str(value: Any) -> str | None:
-    if value in (None, ""):
-        return None
-    text = str(value).strip()
-    return text or None
-
-
-def _optional_int(value: Any) -> int | None:
-    if value in (None, ""):
-        return None
-    if isinstance(value, bool):
-        raise ValueError("max_tool_iterations must be an integer")
-    result = int(value)
-    if result < 0:
-        raise ValueError("max_tool_iterations must be non-negative")
-    return result
-
-
-def _dict_value(value: Any) -> dict[str, Any]:
-    return dict(value) if isinstance(value, dict) else {}
-
-
-def _append_unique(values: list[str], value: str) -> None:
-    if value and value not in values:
-        values.append(value)
-
-
-def _string_list(value: Any) -> list[str]:
-    if not isinstance(value, list):
-        if isinstance(value, str):
-            value = [item.strip() for item in value.split(",")]
-        else:
-            return []
-    result: list[str] = []
-    for item in value:
-        text = str(item).strip()
-        if text and text not in result:
-            result.append(text)
-    return result