beaver_project/app-instance/backend/tests/unit/test_litellm_thinking_mode.py

from __future__ import annotations

import asyncio
import pytest
from types import SimpleNamespace

from beaver.engine.providers.litellm import LiteLLMProvider


def test_qwen_thinking_mode_is_sent_as_chat_template_kwargs(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}

    class Message:
        content = "可以"
        reasoning_content = ""
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="sk-test",
        api_base="https://oai.example.com/v1",
        default_model="Qwen3.6-35B",
        provider_name="openai",
    )
    response = asyncio.run(
        provider.chat(
            [{"role": "user", "content": "只回复可以"}],
            model="Qwen3.6-35B",
            thinking_enabled=False,
        )
    )

    assert response.content == "可以"
    assert captured["extra_body"] == {
        "chat_template_kwargs": {"enable_thinking": False},
        "thinking": {"type": "disabled"},
    }


def test_thinking_mode_disabled_is_sent_without_model_name_matching(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="sk-test",
        api_base="https://oai.example.com/v1",
        default_model="gpt-4.1-mini",
        provider_name="openai",
    )
    asyncio.run(
        provider.chat(
            [{"role": "user", "content": "reply ok"}],
            model="gpt-4.1-mini",
            thinking_enabled=False,
        )
    )

    assert captured["extra_body"] == {
        "chat_template_kwargs": {"enable_thinking": False},
        "thinking": {"type": "disabled"},
    }


def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> None:
    messages = [
        {
            "role": "assistant",
            "content": "",
            "reasoning_content": "must be passed back",
            "tool_calls": [
                {
                    "id": "call-1",
                    "type": "function",
                    "function": {"name": "lookup", "arguments": "{}"},
                }
            ],
        }
    ]

    assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back"


def test_litellm_provider_merges_late_system_messages_to_front() -> None:
    messages = [
        {"role": "system", "content": "base"},
        {"role": "user", "content": "question"},
        {"role": "system", "content": "finalize without tools"},
    ]

    sanitized = LiteLLMProvider._sanitize_messages(messages)

    assert [message["role"] for message in sanitized] == ["system", "user"]
    assert sanitized[0]["content"] == "base\n\nfinalize without tools"


def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="sk-test",
        api_base="https://oai.example.com/v1",
        default_model="gpt-4.1-mini",
        provider_name="openai",
    )
    asyncio.run(
        provider.chat(
            [{"role": "user", "content": "reply ok"}],
            model="gpt-4.1-mini",
            thinking_enabled=True,
        )
    )

    assert captured["extra_body"] == {
        "chat_template_kwargs": {"enable_thinking": False},
        "thinking": {"type": "disabled"},
    }


def test_mistral_vllm_uses_reasoning_effort_instead_of_qwen_thinking_body(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="EMPTY",
        api_base="http://localhost:8000/v1",
        default_model="mistralai/Mistral-Medium-3.5-128B",
        provider_name="vllm",
    )
    asyncio.run(
        provider.chat(
            [{"role": "user", "content": "reply ok"}],
            model="mistralai/Mistral-Medium-3.5-128B",
            thinking_enabled=True,
        )
    )

    assert captured["model"] == "hosted_vllm/mistralai/Mistral-Medium-3.5-128B"
    assert captured["extra_body"] == {"reasoning_effort": "high"}


def test_mistral_vllm_omits_reasoning_body_when_thinking_mode_is_unspecified(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="EMPTY",
        api_base="http://localhost:8000/v1",
        default_model="mistralai/Mistral-Medium-3.5-128B",
        provider_name="vllm",
    )
    asyncio.run(
        provider.chat(
            [{"role": "user", "content": "reply ok"}],
            model="mistralai/Mistral-Medium-3.5-128B",
        )
    )

    assert "extra_body" not in captured


def test_mistral_openai_compatible_private_vllm_uses_reasoning_effort(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="EMPTY",
        api_base="http://172.19.207.103/v1",
        default_model="Mistral-Medium-3.5-128B",
        provider_name="openai",
    )
    asyncio.run(
        provider.chat(
            [{"role": "user", "content": "reply ok"}],
            model="Mistral-Medium-3.5-128B",
            thinking_enabled=False,
        )
    )

    assert captured["extra_body"] == {"reasoning_effort": "none"}
    assert "chat_template_kwargs" not in captured["extra_body"]
    assert "thinking" not in captured["extra_body"]


def test_mistral_openai_compatible_private_vllm_omits_body_when_unspecified(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="EMPTY",
        api_base="http://172.19.207.103/v1",
        default_model="Mistral-Medium-3.5-128B",
        provider_name="openai",
    )
    asyncio.run(
        provider.chat(
            [{"role": "user", "content": "reply ok"}],
            model="Mistral-Medium-3.5-128B",
        )
    )

    assert "extra_body" not in captured


def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="sk-test",
        api_base="https://oai.example.com/v1",
        default_model="Qwen3.6-35B",
        provider_name="openai",
    )
    asyncio.run(
        provider.chat(
            [
                {
                    "role": "assistant",
                    "content": None,
                    "tool_calls": [
                        {
                            "id": "call-1",
                            "type": "function",
                            "function": {
                                "name": "cron",
                                "arguments": {"action": "add", "mode": "notification"},
                            },
                        }
                    ],
                },
                {"role": "tool", "tool_call_id": "call-1", "name": "cron", "content": "done"},
            ],
            model="Qwen3.6-35B",
            thinking_enabled=False,
        )
    )

    tool_call = captured["messages"][0]["tool_calls"][0]
    assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'