beaver_project/app-instance/backend/tests/unit/test_litellm_thinking_mode.py

from __future__ import annotations

import asyncio
import pytest
from types import SimpleNamespace

from beaver.engine.providers.litellm import LiteLLMProvider


def test_qwen_thinking_mode_is_sent_as_chat_template_kwargs(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}

    class Message:
        content = "可以"
        reasoning_content = ""
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="sk-test",
        api_base="https://oai.example.com/v1",
        default_model="Qwen3.6-35B",
        provider_name="openai",
    )
    response = asyncio.run(
        provider.chat(
            [{"role": "user", "content": "只回复可以"}],
            model="Qwen3.6-35B",
            thinking_enabled=False,
        )
    )

    assert response.content == "可以"
    assert captured["extra_body"] == {
        "chat_template_kwargs": {"enable_thinking": False},
        "thinking": {"type": "disabled"},
    }


def test_thinking_mode_disabled_is_sent_without_model_name_matching(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="sk-test",
        api_base="https://oai.example.com/v1",
        default_model="gpt-4.1-mini",
        provider_name="openai",
    )
    asyncio.run(
        provider.chat(
            [{"role": "user", "content": "reply ok"}],
            model="gpt-4.1-mini",
            thinking_enabled=False,
        )
    )

    assert captured["extra_body"] == {
        "chat_template_kwargs": {"enable_thinking": False},
        "thinking": {"type": "disabled"},
    }


def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> None:
    messages = [
        {
            "role": "assistant",
            "content": "",
            "reasoning_content": "must be passed back",
            "tool_calls": [
                {
                    "id": "call-1",
                    "type": "function",
                    "function": {"name": "lookup", "arguments": "{}"},
                }
            ],
        }
    ]

    assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back"


def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="sk-test",
        api_base="https://oai.example.com/v1",
        default_model="gpt-4.1-mini",
        provider_name="openai",
    )
    asyncio.run(
        provider.chat(
            [{"role": "user", "content": "reply ok"}],
            model="gpt-4.1-mini",
            thinking_enabled=True,
        )
    )

    assert captured["extra_body"] == {
        "chat_template_kwargs": {"enable_thinking": False},
        "thinking": {"type": "disabled"},
    }


def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}

    class Message:
        content = "ok"
        reasoning_content = None
        tool_calls = []

    class Choice:
        message = Message()
        finish_reason = "stop"

    class Response:
        choices = [Choice()]
        usage = None

    async def fake_acompletion(**kwargs):
        captured.update(kwargs)
        return Response()

    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())

    provider = LiteLLMProvider(
        api_key="sk-test",
        api_base="https://oai.example.com/v1",
        default_model="Qwen3.6-35B",
        provider_name="openai",
    )
    asyncio.run(
        provider.chat(
            [
                {
                    "role": "assistant",
                    "content": None,
                    "tool_calls": [
                        {
                            "id": "call-1",
                            "type": "function",
                            "function": {
                                "name": "cron",
                                "arguments": {"action": "add", "mode": "notification"},
                            },
                        }
                    ],
                },
                {"role": "tool", "tool_call_id": "call-1", "name": "cron", "content": "done"},
            ],
            model="Qwen3.6-35B",
            thinking_enabled=False,
        )
    )

    tool_call = captured["messages"][0]["tool_calls"][0]
    assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'