beaver_project/app-instance/backend/tests/unit/test_task_execution_planner.py

from __future__ import annotations

import asyncio
from types import SimpleNamespace

from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.tasks import SkillResolutionReport, TaskExecutionPlanner, TaskRecord
from beaver.tools.base import BaseTool, ToolContext, ToolResult, ToolSpec
from beaver.tools.registry import ToolRegistry


class PlannerProvider(LLMProvider):
    def __init__(self, response: str) -> None:
        super().__init__()
        self.response = response
        self.calls: list[dict] = []

    async def chat(
        self,
        messages: list[dict],
        tools: list[dict] | None = None,
        model: str | None = None,
        max_tokens: int = 4096,
        temperature: float = 0.7,
    ) -> LLMResponse:
        self.calls.append(
            {
                "messages": messages,
                "max_tokens": max_tokens,
                "temperature": temperature,
                "model": model,
                "tools": tools,
            }
        )
        return LLMResponse(content=self.response, finish_reason="stop", provider_name="stub", model="stub-model")

    def get_default_model(self) -> str:
        return "stub-model"


class HangingPlannerProvider(LLMProvider):
    async def chat(
        self,
        messages: list[dict],
        tools: list[dict] | None = None,
        model: str | None = None,
        max_tokens: int = 4096,
        temperature: float = 0.7,
    ) -> LLMResponse:
        await asyncio.sleep(10)
        return LLMResponse(content='{"mode":"team"}', finish_reason="stop", provider_name="stub", model="stub-model")

    def get_default_model(self) -> str:
        return "stub-model"


class SequencedPlannerProvider(PlannerProvider):
    def __init__(self, responses: list[str]) -> None:
        super().__init__(responses[0])
        self.responses = list(responses)

    async def chat(self, *args, **kwargs) -> LLMResponse:
        self.response = self.responses.pop(0)
        return await super().chat(*args, **kwargs)


class StubTool(BaseTool):
    def __init__(self, name: str) -> None:
        self._spec = ToolSpec(name=name, description=name, input_schema={"type": "object"})

    @property
    def spec(self) -> ToolSpec:
        return self._spec

    async def invoke(self, arguments: dict, context: ToolContext) -> ToolResult:
        raise AssertionError("Planner tests do not execute tools")


def _task() -> TaskRecord:
    return TaskRecord(
        task_id="task-1",
        session_id="session-1",
        description="implement workflow",
        goal="implement workflow",
        constraints=[],
        priority=0,
        status="open",
        creator="test",
        created_at="now",
        updated_at="now",
    )


def _bundle(response: str) -> ProviderBundle:
    provider = PlannerProvider(response)
    return ProviderBundle(
        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
        main_provider=provider,
    )


def _bundle_with_provider(provider: LLMProvider) -> ProviderBundle:
    return ProviderBundle(
        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
        main_provider=provider,
    )


def _registry() -> ToolRegistry:
    registry = ToolRegistry()
    registry.register_many([StubTool("web_search"), StubTool("web_fetch"), StubTool("terminal")])
    return registry


def _hanging_bundle() -> ProviderBundle:
    return ProviderBundle(
        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
        main_provider=HangingPlannerProvider(),
    )


def test_planner_selects_single_mode() -> None:
    plan = asyncio.run(
        TaskExecutionPlanner().plan(
            task=_task(),
            user_message="implement workflow",
            attempt_index=1,
            provider_bundle=_bundle('{"mode":"single","reason":"main agent is enough"}'),
        )
    )

    assert plan.mode == "single"
    assert plan.graph is None
    assert plan.reason == "main agent is enough"


def test_planner_skips_llm_for_simple_task() -> None:
    provider = PlannerProvider('{"mode":"team","reason":"should not be used"}')
    bundle = ProviderBundle(
        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
        main_provider=provider,
    )
    task = _task()
    task.description = "查询深圳天气"
    task.goal = "查询深圳天气"

    plan = asyncio.run(
        TaskExecutionPlanner().plan(
            task=task,
            user_message="帮我查一下今天深圳天气",
            attempt_index=1,
            provider_bundle=bundle,
        )
    )

    assert plan.mode == "single"
    assert plan.graph is None
    assert plan.reason == "planner_skipped_simple_task"
    assert provider.calls == []


def test_planner_builds_team_graph() -> None:
    bundle = _bundle(
        """
        {
          "mode": "team",
          "reason": "needs parallel review",
          "strategy": "dag",
          "nodes": [
            {"node_id": "research", "task": "research options"},
            {"node_id": "review", "task": "review result", "depends_on": ["research"]}
          ],
          "final_synthesis_instruction": "merge the findings"
        }
        """
    )
    provider = bundle.main_provider
    plan = asyncio.run(
        TaskExecutionPlanner().plan(
            task=_task(),
            user_message="implement workflow",
            attempt_index=1,
            provider_bundle=bundle,
            skill_summaries=["docker-debug: Use docker logs before editing config."],
            tool_hints=["terminal", "search_files"],
        )
    )

    assert plan.is_team
    assert plan.graph is not None
    assert plan.graph.strategy == "dag"
    assert [node.node_id for node in plan.graph.nodes] == ["research", "review"]
    assert plan.graph.nodes[1].depends_on == ["research"]
    assert plan.final_synthesis_instruction == "merge the findings"
    assert isinstance(provider, PlannerProvider)
    prompt = provider.calls[0]["messages"][1]["content"]
    assert "Activated skill summaries" in prompt
    assert "docker-debug: Use docker logs before editing config." in prompt
    assert "terminal" in prompt
    assert "search_files" in prompt


def test_planner_timeout_falls_back_to_single() -> None:
    plan = asyncio.run(
        TaskExecutionPlanner().plan(
            task=_task(),
            user_message="implement workflow",
            attempt_index=1,
            provider_bundle=_hanging_bundle(),
            timeout_seconds=0.01,
        )
    )

    assert plan.mode == "single"
    assert plan.reason == "planner_failed"
    assert "TimeoutError" in (plan.fallback_error or "")


def test_planner_team_nodes_use_task_as_internal_skill_query() -> None:
    plan = TaskExecutionPlanner().from_json(
        """
        {
          "mode": "team",
          "reason": "needs skill-guided review",
          "strategy": "sequence",
          "nodes": [
            {
              "node_id": "api_review",
              "task": "review API compatibility"
            }
          ]
        }
        """
    )

    assert plan.is_team
    assert plan.graph is not None
    node = plan.graph.nodes[0]
    assert node.agent.name == "api_review"
    assert node.agent.role == ""
    assert node.agent.metadata["skill_query"] == "review API compatibility"
    assert node.agent.metadata["required_capabilities"] == []


def test_planner_accepts_use_skill_and_skill_query() -> None:
    plan = TaskExecutionPlanner().from_json(
        """
        {
          "mode": "team",
          "strategy": "sequence",
          "nodes": [
            {
              "node_id": "collect",
              "task": "Collect official sources",
              "use_skill": "official-source-research",
              "skill_query": "official source verification"
            }
          ]
        }
        """
    )

    assert plan.is_team
    assert plan.graph is not None
    node = plan.graph.nodes[0]
    assert node.agent.metadata["use_skill"] == "official-source-research"
    assert node.agent.metadata["skill_query"] == "official source verification"
    assert node.inherited_pinned_skills == []
    assert node.allowed_tool_names is None
    assert plan.planner_adaptation["node_skill_bindings"] == [
        {
            "node_id": "collect",
            "use_skill": "official-source-research",
            "skill_query": "official source verification",
        }
    ]


def test_planner_defaults_skill_query_to_node_task_when_absent() -> None:
    plan = TaskExecutionPlanner().from_json(
        '{"mode":"team","strategy":"sequence","nodes":['
        '{"node_id":"extract","task":"Extract financial metrics","use_skill":"financial-extraction"}]}'
    )

    assert plan.is_team
    assert plan.graph is not None
    assert plan.graph.nodes[0].agent.metadata["skill_query"] == "Extract financial metrics"


def test_planner_adaptation_records_unresolved_use_skill_fallback() -> None:
    planner = TaskExecutionPlanner()
    plan = planner.from_json(
        '{"mode":"team","strategy":"sequence","nodes":['
        '{"node_id":"extract","task":"Extract metrics","use_skill":"missing-skill",'
        '"skill_query":"financial extraction"}]}'
    )
    report = SkillResolutionReport(
        node_id="extract",
        skill_query="financial extraction",
        requested_skill_name="missing-skill",
        exact_binding_used=False,
        warnings=["use_skill unresolved: missing-skill"],
        reason="matched published skill",
    )

    planner._merge_skill_resolution_adaptation(plan, [report])

    assert plan.planner_adaptation["warnings"] == ["use_skill unresolved: missing-skill"]
    assert plan.planner_adaptation["node_skill_bindings"][0]["fallback_reason"] == (
        "use_skill unresolved; matched published skill"
    )


def test_planner_invalid_outputs_fallback_to_single() -> None:
    planner = TaskExecutionPlanner()
    invalid_json = planner.from_json("not json")
    unknown_strategy = planner.from_json(
        '{"mode":"team","strategy":"moa","nodes":[{"node_id":"a","task":"a","agent":{"name":"a"}}]}'
    )
    too_many_nodes = planner.from_json(
        '{"mode":"team","strategy":"parallel","nodes":['
        + ",".join(
            '{"node_id":"n%s","task":"work","agent":{"name":"n%s"}}' % (index, index)
            for index in range(7)
        )
        + "]}"
    )
    cyclic = planner.from_json(
        """
        {
          "mode": "team",
          "strategy": "dag",
          "nodes": [
            {"node_id": "a", "task": "a", "agent": {"name": "a"}, "depends_on": ["b"]},
            {"node_id": "b", "task": "b", "agent": {"name": "b"}, "depends_on": ["a"]}
          ]
        }
        """
    )

    assert invalid_json.mode == "single"
    assert unknown_strategy.mode == "single"
    assert too_many_nodes.mode == "single"
    assert cyclic.mode == "single"


def test_template_plan_builds_generic_worker_and_preserves_v1_contract_fields() -> None:
    plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
        """
        {
          "mode": "team",
          "strategy": "dag",
          "nodes": [
            {
              "node_id": "collect",
              "task": "Collect official sources",
              "requested_tools": ["web_search"],
              "evidence_contract": {"entities": ["MGM", "Galaxy"]},
              "block_downstream_on_partial": true
            }
          ],
          "adaptation": {"template_used": true}
        }
        """
    )

    assert plan.is_team
    assert plan.graph is not None
    node = plan.graph.nodes[0]
    assert node.agent.name == "collect"
    assert node.agent.role == ""
    assert node.agent.metadata["sub_agent_kind"] == "generic_skill_worker"
    assert node.allowed_tool_names == ["web_search"]
    assert node.evidence_contract == {"entities": ["MGM", "Galaxy"]}
    assert node.block_downstream_on_partial is True
    assert plan.planner_adaptation["template_used"] is True


def test_unknown_tool_is_removed_and_warned() -> None:
    plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
        '{"mode":"team","strategy":"sequence","nodes":['
        '{"node_id":"collect","task":"Collect","requested_tools":["web_search","not_real"]}]}'
    )

    assert plan.is_team
    assert plan.graph is not None
    assert plan.graph.nodes[0].allowed_tool_names == ["web_search"]
    assert "unknown tool removed: not_real" in plan.planner_adaptation["warnings"]


def test_high_risk_tool_is_removed_without_failing_low_risk_plan() -> None:
    plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
        '{"mode":"team","strategy":"sequence","nodes":['
        '{"node_id":"collect","task":"Collect","requested_tools":["web_search","terminal"]}]}'
    )

    assert plan.is_team
    assert plan.graph is not None
    assert plan.graph.nodes[0].allowed_tool_names == ["web_search"]
    assert "requires_high_risk_review: terminal" in plan.planner_adaptation["warnings"]


def test_planner_rejects_agent_and_role_node_fields() -> None:
    planner = TaskExecutionPlanner(tool_registry=_registry())

    agent_plan = planner.from_json(
        '{"mode":"team","strategy":"sequence","nodes":['
        '{"node_id":"collect","task":"Collect","agent":{"name":"researcher"}}]}'
    )
    role_plan = planner.from_json(
        '{"mode":"team","strategy":"sequence","nodes":['
        '{"node_id":"collect","task":"Collect","role":"researcher"}]}'
    )

    assert agent_plan.mode == "single"
    assert "agent" in (agent_plan.fallback_error or "")
    assert role_plan.mode == "single"
    assert "role" in (role_plan.fallback_error or "")


def test_planner_records_primary_template_selection_and_ignored_templates() -> None:
    primary = SkillContext(
        name="financial-comparison",
        version="v1",
        content="Compare official financial disclosures.",
        team_template={"version": 1, "nodes": [{"node_id": "collect", "task": "Collect"}]},
    )
    secondary = SkillContext(
        name="chart-reporting",
        version="v2",
        content="Render chart-ready Markdown.",
        team_template={"version": 1, "nodes": [{"node_id": "report", "task": "Report"}]},
    )
    provider = PlannerProvider(
        '{"mode":"team","strategy":"sequence","nodes":['
        '{"node_id":"collect","task":"Collect official sources"}],'
        '"adaptation":{"template_used":true}}'
    )

    plan = asyncio.run(
        TaskExecutionPlanner(tool_registry=_registry()).plan(
            task=_task(),
            user_message="compare financial workflow",
            attempt_index=1,
            provider_bundle=_bundle_with_provider(provider),
            activated_skills=[primary, secondary],
        )
    )

    assert plan.planner_adaptation == {
        "template_used": True,
        "selected_template": "financial-comparison",
        "selection_reason": "first activated skill with a valid team template",
        "ignored_templates": ["chart-reporting"],
        "warnings": [],
    }
    prompt = provider.calls[0]["messages"][1]["content"]
    assert '"skill_name": "financial-comparison"' in prompt
    assert "Compare official financial disclosures." in prompt
    assert "Render chart-ready Markdown." in prompt


def test_malformed_planner_output_repairs_once_without_tools() -> None:
    provider = SequencedPlannerProvider(
        [
            "not json",
            '{"mode":"team","strategy":"sequence","nodes":[{"node_id":"collect","task":"Collect"}]}',
        ]
    )

    plan = asyncio.run(
        TaskExecutionPlanner(tool_registry=_registry()).plan(
            task=_task(),
            user_message="implement workflow",
            attempt_index=1,
            provider_bundle=_bundle_with_provider(provider),
        )
    )

    assert plan.is_team
    assert len(provider.calls) == 2
    assert provider.calls[1]["tools"] is None
    assert "Repair the invalid planner JSON" in provider.calls[1]["messages"][1]["content"]


def test_failed_planner_repair_falls_back_to_single() -> None:
    provider = SequencedPlannerProvider(["not json", "still not json"])

    plan = asyncio.run(
        TaskExecutionPlanner(tool_registry=_registry()).plan(
            task=_task(),
            user_message="implement workflow",
            attempt_index=1,
            provider_bundle=_bundle_with_provider(provider),
        )
    )

    assert plan.mode == "single"
    assert plan.reason == "planner_fallback_single"
    assert len(provider.calls) == 2


def test_finance_template_adapts_to_task_oriented_read_only_graph() -> None:
    plan = TaskExecutionPlanner(tool_registry=_registry()).from_json(
        """
        {
          "mode": "team",
          "strategy": "dag",
          "nodes": [
            {
              "node_id": "collect_official_sources",
              "task": "Collect MGM and Galaxy official financial disclosures",
              "requested_tools": ["web_search", "web_fetch"],
              "required_evidence": ["tool_result", "url"]
            },
            {
              "node_id": "extract_financial_metrics",
              "task": "Extract comparable financial metrics from collected sources",
              "depends_on": ["collect_official_sources"],
              "requested_tools": ["web_fetch"],
              "required_evidence": ["output"]
            },
            {
              "node_id": "validate_metrics",
              "task": "Validate metric units, periods, and source consistency",
              "depends_on": ["extract_financial_metrics"],
              "required_evidence": ["output"]
            },
            {
              "node_id": "generate_chart_report",
              "task": "Generate a Markdown comparison table and chart-ready data without claiming an image or file artifact",
              "depends_on": ["validate_metrics"],
              "requested_tools": [],
              "required_evidence": ["output"]
            }
          ]
        }
        """
    )

    assert plan.is_team
    assert plan.graph is not None
    assert [node.node_id for node in plan.graph.nodes] == [
        "collect_official_sources",
        "extract_financial_metrics",
        "validate_metrics",
        "generate_chart_report",
    ]
    assert all(node.agent.role == "" for node in plan.graph.nodes)
    assert not {"researcher", "writer", "reviewer", "analyst"}.intersection(
        node.node_id for node in plan.graph.nodes
    )
    assert plan.graph.nodes[0].allowed_tool_names == ["web_search", "web_fetch"]
    assert plan.graph.nodes[-1].allowed_tool_names == []
    report_task = plan.graph.nodes[-1].task.lower()
    assert "markdown" in report_task
    assert "without claiming an image or file artifact" in report_task