feat(coordinator): 添加团队节点默认最大工具迭代次数配置

添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数，并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。 fix(runtime): 修复团队节点运行成功判断逻辑更新运行成功判断条件，将 finish_reason 为 "max_tool_iterations_finalized" 的情况视为运行失败，并添加对原始工具调用输出的检测，避免将其误判为成功完成。 feat(mcp): 添加团队工作流MCP工具类别支持增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能，为团队工作流提供本地工具支持。 refactor(engine): 调整AgentLoop最大工具迭代次数设置将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100，同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。 perf(mcp): 优化MCP连接管理避免重复连接添加 mcp_connected 标志来跟踪MCP连接状态，确保 connect_all 只执行一次，提高性能并避免不必要的重复连接。 refactor(skills): 移除技能团队模板相关功能移除与技能团队模板相关的代码，包括解析、存储和处理逻辑，简化技能记录结构和加载流程。 feat(process): 增强会话过程投影器功能添加技能激活快照事件处理，改进团队运行完成消息显示，并增强技能激活事件的时间戳记录功能。 refactor(tasks): 简化任务尝试编排器团队执行逻辑移除团队执行相关代码，将所有任务统一按单步执行处理，简化任务编排器的复杂度并提升执行效率。 fix(evidence): 修复节点证据评估中需求验证逻辑更新节点证据评估逻辑，跳过自然语言证据需求的确定性验证，只执行机器可读的需求验证，避免因自然语言需求导致的节点失败。
2026-06-26 16:36:29 +08:00
parent 53b13e8eac
commit 520a21a027
360 changed files with 13271 additions and 1848 deletions
--- a/app-instance/backend/tests/unit/test_task_execution_planner.py
+++ b/app-instance/backend/tests/unit/test_task_execution_planner.py
@ -3,19 +3,15 @@ from __future__ import annotations
 import asyncio
 from types import SimpleNamespace

-from beaver.engine.context import SkillContext
 from beaver.engine.providers.base import LLMProvider, LLMResponse
 from beaver.engine.providers.factory import ProviderBundle
-from beaver.tasks import SkillResolutionReport, TaskExecutionPlanner, TaskRecord
-from beaver.tools.base import BaseTool, ToolContext, ToolResult, ToolSpec
-from beaver.tools.registry import ToolRegistry
+from beaver.tasks import TaskExecutionPlanner, TaskRecord


 class PlannerProvider(LLMProvider):
-    def __init__(self, response: str) -> None:
+    def __init__(self) -> None:
        super().__init__()
-        self.response = response
-        self.calls: list[dict] = []
+        self.calls = 0

    async def chat(
        self,
@ -25,59 +21,18 @@ class PlannerProvider(LLMProvider):
        max_tokens: int = 4096,
        temperature: float = 0.7,
    ) -> LLMResponse:
-        self.calls.append(
-            {
-                "messages": messages,
-                "max_tokens": max_tokens,
-                "temperature": temperature,
-                "model": model,
-                "tools": tools,
-            }
+        self.calls += 1
+        return LLMResponse(
+            content='{"mode":"team"}',
+            finish_reason="stop",
+            provider_name="stub",
+            model="stub-model",
        )
-        return LLMResponse(content=self.response, finish_reason="stop", provider_name="stub", model="stub-model")

    def get_default_model(self) -> str:
        return "stub-model"


-class HangingPlannerProvider(LLMProvider):
-    async def chat(
-        self,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        model: str | None = None,
-        max_tokens: int = 4096,
-        temperature: float = 0.7,
-    ) -> LLMResponse:
-        await asyncio.sleep(10)
-        return LLMResponse(content='{"mode":"team"}', finish_reason="stop", provider_name="stub", model="stub-model")
-
-    def get_default_model(self) -> str:
-        return "stub-model"
-
-
-class SequencedPlannerProvider(PlannerProvider):
-    def __init__(self, responses: list[str]) -> None:
-        super().__init__(responses[0])
-        self.responses = list(responses)
-
-    async def chat(self, *args, **kwargs) -> LLMResponse:
-        self.response = self.responses.pop(0)
-        return await super().chat(*args, **kwargs)
-
-
-class StubTool(BaseTool):
-    def __init__(self, name: str) -> None:
-        self._spec = ToolSpec(name=name, description=name, input_schema={"type": "object"})
-
-    @property
-    def spec(self) -> ToolSpec:
-        return self._spec
-
-    async def invoke(self, arguments: dict, context: ToolContext) -> ToolResult:
-        raise AssertionError("Planner tests do not execute tools")
-
-
 def _task() -> TaskRecord:
    return TaskRecord(
        task_id="task-1",
@ -93,55 +48,15 @@ def _task() -> TaskRecord:
    )


-def _bundle(response: str) -> ProviderBundle:
-    provider = PlannerProvider(response)
+def _bundle(provider: PlannerProvider) -> ProviderBundle:
    return ProviderBundle(
        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
        main_provider=provider,
    )


-def _bundle_with_provider(provider: LLMProvider) -> ProviderBundle:
-    return ProviderBundle(
-        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
-        main_provider=provider,
-    )
-
-
-def _registry() -> ToolRegistry:
-    registry = ToolRegistry()
-    registry.register_many([StubTool("web_search"), StubTool("web_fetch"), StubTool("terminal")])
-    return registry
-
-
-def _hanging_bundle() -> ProviderBundle:
-    return ProviderBundle(
-        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
-        main_provider=HangingPlannerProvider(),
-    )
-
-
-def test_planner_selects_single_mode() -> None:
-    plan = asyncio.run(
-        TaskExecutionPlanner().plan(
-            task=_task(),
-            user_message="implement workflow",
-            attempt_index=1,
-            provider_bundle=_bundle('{"mode":"single","reason":"main agent is enough"}'),
-        )
-    )
-
-    assert plan.mode == "single"
-    assert plan.graph is None
-    assert plan.reason == "main agent is enough"
-
-
-def test_planner_skips_llm_for_simple_task() -> None:
-    provider = PlannerProvider('{"mode":"team","reason":"should not be used"}')
-    bundle = ProviderBundle(
-        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
-        main_provider=provider,
-    )
+def test_planner_skips_provider_for_simple_task() -> None:
+    provider = PlannerProvider()
    task = _task()
    task.description = "查询深圳天气"
    task.goal = "查询深圳天气"
@ -151,409 +66,55 @@ def test_planner_skips_llm_for_simple_task() -> None:
            task=task,
            user_message="帮我查一下今天深圳天气",
            attempt_index=1,
-            provider_bundle=bundle,
+            provider_bundle=_bundle(provider),
        )
    )

    assert plan.mode == "single"
    assert plan.graph is None
    assert plan.reason == "planner_skipped_simple_task"
-    assert provider.calls == []
+    assert provider.calls == 0


-def test_planner_builds_team_graph() -> None:
-    bundle = _bundle(
-        """
-        {
-          "mode": "team",
-          "reason": "needs parallel review",
-          "strategy": "dag",
-          "nodes": [
-            {"node_id": "research", "task": "research options"},
-            {"node_id": "review", "task": "review result", "depends_on": ["research"]}
-          ],
-          "final_synthesis_instruction": "merge the findings"
-        }
-        """
-    )
-    provider = bundle.main_provider
+def test_planner_replaces_team_planning_with_workflow_tools_without_provider_call() -> None:
+    provider = PlannerProvider()
+
    plan = asyncio.run(
        TaskExecutionPlanner().plan(
            task=_task(),
-            user_message="implement workflow",
+            user_message="research and compare workflow options",
            attempt_index=1,
-            provider_bundle=bundle,
+            provider_bundle=_bundle(provider),
            skill_summaries=["docker-debug: Use docker logs before editing config."],
            tool_hints=["terminal", "search_files"],
        )
    )

-    assert plan.is_team
-    assert plan.graph is not None
-    assert plan.graph.strategy == "dag"
-    assert [node.node_id for node in plan.graph.nodes] == ["research", "review"]
-    assert plan.graph.nodes[1].depends_on == ["research"]
-    assert plan.final_synthesis_instruction == "merge the findings"
-    assert isinstance(provider, PlannerProvider)
-    prompt = provider.calls[0]["messages"][1]["content"]
-    assert "Activated skill summaries" in prompt
-    assert "docker-debug: Use docker logs before editing config." in prompt
-    assert "terminal" in prompt
-    assert "search_files" in prompt
+    assert not plan.is_team
+    assert plan.mode == "single"
+    assert plan.graph is None
+    assert plan.reason == "planner_team_replaced_by_workflow_tools"
+    assert plan.final_synthesis_instruction == ""
+    assert provider.calls == 0


-def test_planner_timeout_falls_back_to_single() -> None:
+def test_planner_can_be_disabled_by_environment(monkeypatch) -> None:
+    monkeypatch.setenv("BEAVER_AGENT_TEAM_ENABLED", "0")
+    provider = PlannerProvider()
+
    plan = asyncio.run(
        TaskExecutionPlanner().plan(
            task=_task(),
-            user_message="implement workflow",
+            user_message="research and compare workflow options",
            attempt_index=1,
-            provider_bundle=_hanging_bundle(),
-            timeout_seconds=0.01,
+            provider_bundle=_bundle(provider),
        )
    )

    assert plan.mode == "single"
-    assert plan.reason == "planner_failed"
-    assert "TimeoutError" in (plan.fallback_error or "")
+    assert plan.reason == "planner_disabled_by_environment"
+    assert provider.calls == 0


-def test_planner_team_nodes_use_task_as_internal_skill_query() -> None:
-    plan = TaskExecutionPlanner().from_json(
-        """
-        {
-          "mode": "team",
-          "reason": "needs skill-guided review",
-          "strategy": "sequence",
-          "nodes": [
-            {
-              "node_id": "api_review",
-              "task": "review API compatibility"
-            }
-          ]
-        }
-        """
-    )
-
-    assert plan.is_team
-    assert plan.graph is not None
-    node = plan.graph.nodes[0]
-    assert node.agent.name == "api_review"
-    assert node.agent.role == ""
-    assert node.agent.metadata["skill_query"] == "review API compatibility"
-    assert node.agent.metadata["required_capabilities"] == []
-
-
-def test_planner_accepts_use_skill_and_skill_query() -> None:
-    plan = TaskExecutionPlanner().from_json(
-        """
-        {
-          "mode": "team",
-          "strategy": "sequence",
-          "nodes": [
-            {
-              "node_id": "collect",
-              "task": "Collect official sources",
-              "use_skill": "official-source-research",
-              "skill_query": "official source verification"
-            }
-          ]
-        }
-        """
-    )
-
-    assert plan.is_team
-    assert plan.graph is not None
-    node = plan.graph.nodes[0]
-    assert node.agent.metadata["use_skill"] == "official-source-research"
-    assert node.agent.metadata["skill_query"] == "official source verification"
-    assert node.inherited_pinned_skills == []
-    assert node.allowed_tool_names is None
-    assert plan.planner_adaptation["node_skill_bindings"] == [
-        {
-            "node_id": "collect",
-            "use_skill": "official-source-research",
-            "skill_query": "official source verification",
-        }
-    ]
-
-
-def test_planner_defaults_skill_query_to_node_task_when_absent() -> None:
-    plan = TaskExecutionPlanner().from_json(
-        '{"mode":"team","strategy":"sequence","nodes":['
-        '{"node_id":"extract","task":"Extract financial metrics","use_skill":"financial-extraction"}]}'
-    )
-
-    assert plan.is_team
-    assert plan.graph is not None
-    assert plan.graph.nodes[0].agent.metadata["skill_query"] == "Extract financial metrics"
-
-
-def test_planner_adaptation_records_unresolved_use_skill_fallback() -> None:
-    planner = TaskExecutionPlanner()
-    plan = planner.from_json(
-        '{"mode":"team","strategy":"sequence","nodes":['
-        '{"node_id":"extract","task":"Extract metrics","use_skill":"missing-skill",'
-        '"skill_query":"financial extraction"}]}'
-    )
-    report = SkillResolutionReport(
-        node_id="extract",
-        skill_query="financial extraction",
-        requested_skill_name="missing-skill",
-        exact_binding_used=False,
-        warnings=["use_skill unresolved: missing-skill"],
-        reason="matched published skill",
-    )
-
-    planner._merge_skill_resolution_adaptation(plan, [report])
-
-    assert plan.planner_adaptation["warnings"] == ["use_skill unresolved: missing-skill"]
-    assert plan.planner_adaptation["node_skill_bindings"][0]["fallback_reason"] == (
-        "use_skill unresolved; matched published skill"
-    )
-
-
-def test_planner_invalid_outputs_fallback_to_single() -> None:
-    planner = TaskExecutionPlanner()
-    invalid_json = planner.from_json("not json")
-    unknown_strategy = planner.from_json(
-        '{"mode":"team","strategy":"moa","nodes":[{"node_id":"a","task":"a","agent":{"name":"a"}}]}'
-    )
-    too_many_nodes = planner.from_json(
-        '{"mode":"team","strategy":"parallel","nodes":['
-        + ",".join(
-            '{"node_id":"n%s","task":"work","agent":{"name":"n%s"}}' % (index, index)
-            for index in range(7)
-        )
-        + "]}"
-    )
-    cyclic = planner.from_json(
-        """
-        {
-          "mode": "team",
-          "strategy": "dag",
-          "nodes": [
-            {"node_id": "a", "task": "a", "agent": {"name": "a"}, "depends_on": ["b"]},
-            {"node_id": "b", "task": "b", "agent": {"name": "b"}, "depends_on": ["a"]}
-          ]
-        }
-        """
-    )
-
-    assert invalid_json.mode == "single"
-    assert unknown_strategy.mode == "single"
-    assert too_many_nodes.mode == "single"
-    assert cyclic.mode == "single"
-
-
-def test_template_plan_builds_generic_worker_and_preserves_v1_contract_fields() -> None:
-    plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
-        """
-        {
-          "mode": "team",
-          "strategy": "dag",
-          "nodes": [
-            {
-              "node_id": "collect",
-              "task": "Collect official sources",
-              "requested_tools": ["web_search"],
-              "evidence_contract": {"entities": ["MGM", "Galaxy"]},
-              "block_downstream_on_partial": true
-            }
-          ],
-          "adaptation": {"template_used": true}
-        }
-        """
-    )
-
-    assert plan.is_team
-    assert plan.graph is not None
-    node = plan.graph.nodes[0]
-    assert node.agent.name == "collect"
-    assert node.agent.role == ""
-    assert node.agent.metadata["sub_agent_kind"] == "generic_skill_worker"
-    assert node.allowed_tool_names == ["web_search"]
-    assert node.evidence_contract == {"entities": ["MGM", "Galaxy"]}
-    assert node.block_downstream_on_partial is True
-    assert plan.planner_adaptation["template_used"] is True
-
-
-def test_unknown_tool_is_removed_and_warned() -> None:
-    plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
-        '{"mode":"team","strategy":"sequence","nodes":['
-        '{"node_id":"collect","task":"Collect","requested_tools":["web_search","not_real"]}]}'
-    )
-
-    assert plan.is_team
-    assert plan.graph is not None
-    assert plan.graph.nodes[0].allowed_tool_names == ["web_search"]
-    assert "unknown tool removed: not_real" in plan.planner_adaptation["warnings"]
-
-
-def test_high_risk_tool_is_removed_without_failing_low_risk_plan() -> None:
-    plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
-        '{"mode":"team","strategy":"sequence","nodes":['
-        '{"node_id":"collect","task":"Collect","requested_tools":["web_search","terminal"]}]}'
-    )
-
-    assert plan.is_team
-    assert plan.graph is not None
-    assert plan.graph.nodes[0].allowed_tool_names == ["web_search"]
-    assert "requires_high_risk_review: terminal" in plan.planner_adaptation["warnings"]
-
-
-def test_planner_rejects_agent_and_role_node_fields() -> None:
-    planner = TaskExecutionPlanner(tool_registry=_registry())
-
-    agent_plan = planner.from_json(
-        '{"mode":"team","strategy":"sequence","nodes":['
-        '{"node_id":"collect","task":"Collect","agent":{"name":"researcher"}}]}'
-    )
-    role_plan = planner.from_json(
-        '{"mode":"team","strategy":"sequence","nodes":['
-        '{"node_id":"collect","task":"Collect","role":"researcher"}]}'
-    )
-
-    assert agent_plan.mode == "single"
-    assert "agent" in (agent_plan.fallback_error or "")
-    assert role_plan.mode == "single"
-    assert "role" in (role_plan.fallback_error or "")
-
-
-def test_planner_records_primary_template_selection_and_ignored_templates() -> None:
-    primary = SkillContext(
-        name="financial-comparison",
-        version="v1",
-        content="Compare official financial disclosures.",
-        team_template={"version": 1, "nodes": [{"node_id": "collect", "task": "Collect"}]},
-    )
-    secondary = SkillContext(
-        name="chart-reporting",
-        version="v2",
-        content="Render chart-ready Markdown.",
-        team_template={"version": 1, "nodes": [{"node_id": "report", "task": "Report"}]},
-    )
-    provider = PlannerProvider(
-        '{"mode":"team","strategy":"sequence","nodes":['
-        '{"node_id":"collect","task":"Collect official sources"}],'
-        '"adaptation":{"template_used":true}}'
-    )
-
-    plan = asyncio.run(
-        TaskExecutionPlanner(tool_registry=_registry()).plan(
-            task=_task(),
-            user_message="compare financial workflow",
-            attempt_index=1,
-            provider_bundle=_bundle_with_provider(provider),
-            activated_skills=[primary, secondary],
-        )
-    )
-
-    assert plan.planner_adaptation == {
-        "template_used": True,
-        "selected_template": "financial-comparison",
-        "selection_reason": "first activated skill with a valid team template",
-        "ignored_templates": ["chart-reporting"],
-        "warnings": [],
-    }
-    prompt = provider.calls[0]["messages"][1]["content"]
-    assert '"skill_name": "financial-comparison"' in prompt
-    assert "Compare official financial disclosures." in prompt
-    assert "Render chart-ready Markdown." in prompt
-
-
-def test_malformed_planner_output_repairs_once_without_tools() -> None:
-    provider = SequencedPlannerProvider(
-        [
-            "not json",
-            '{"mode":"team","strategy":"sequence","nodes":[{"node_id":"collect","task":"Collect"}]}',
-        ]
-    )
-
-    plan = asyncio.run(
-        TaskExecutionPlanner(tool_registry=_registry()).plan(
-            task=_task(),
-            user_message="implement workflow",
-            attempt_index=1,
-            provider_bundle=_bundle_with_provider(provider),
-        )
-    )
-
-    assert plan.is_team
-    assert len(provider.calls) == 2
-    assert provider.calls[1]["tools"] is None
-    assert "Repair the invalid planner JSON" in provider.calls[1]["messages"][1]["content"]
-
-
-def test_failed_planner_repair_falls_back_to_single() -> None:
-    provider = SequencedPlannerProvider(["not json", "still not json"])
-
-    plan = asyncio.run(
-        TaskExecutionPlanner(tool_registry=_registry()).plan(
-            task=_task(),
-            user_message="implement workflow",
-            attempt_index=1,
-            provider_bundle=_bundle_with_provider(provider),
-        )
-    )
-
-    assert plan.mode == "single"
-    assert plan.reason == "planner_fallback_single"
-    assert len(provider.calls) == 2
-
-
-def test_finance_template_adapts_to_task_oriented_read_only_graph() -> None:
-    plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
-        """
-        {
-          "mode": "team",
-          "strategy": "dag",
-          "nodes": [
-            {
-              "node_id": "collect_official_sources",
-              "task": "Collect MGM and Galaxy official financial disclosures",
-              "requested_tools": ["web_search", "web_fetch"],
-              "required_evidence": ["tool_result", "url"]
-            },
-            {
-              "node_id": "extract_financial_metrics",
-              "task": "Extract comparable financial metrics from collected sources",
-              "depends_on": ["collect_official_sources"],
-              "requested_tools": ["web_fetch"],
-              "required_evidence": ["output"]
-            },
-            {
-              "node_id": "validate_metrics",
-              "task": "Validate metric units, periods, and source consistency",
-              "depends_on": ["extract_financial_metrics"],
-              "required_evidence": ["output"]
-            },
-            {
-              "node_id": "generate_chart_report",
-              "task": "Generate a Markdown comparison table and chart-ready data without claiming an image or file artifact",
-              "depends_on": ["validate_metrics"],
-              "requested_tools": [],
-              "required_evidence": ["output"]
-            }
-          ]
-        }
-        """
-    )
-
-    assert plan.is_team
-    assert plan.graph is not None
-    assert [node.node_id for node in plan.graph.nodes] == [
-        "collect_official_sources",
-        "extract_financial_metrics",
-        "validate_metrics",
-        "generate_chart_report",
-    ]
-    assert all(node.agent.role == "" for node in plan.graph.nodes)
-    assert not {"researcher", "writer", "reviewer", "analyst"}.intersection(
-        node.node_id for node in plan.graph.nodes
-    )
-    assert plan.graph.nodes[0].allowed_tool_names == ["web_search", "web_fetch"]
-    assert plan.graph.nodes[-1].allowed_tool_names == []
-    report_task = plan.graph.nodes[-1].task.lower()
-    assert "markdown" in report_task
-    assert "without claiming an image or file artifact" in report_task
+def test_planner_no_longer_exposes_json_to_team_graph_parser() -> None:
+    assert not hasattr(TaskExecutionPlanner(), "from_json")