Files
beaver_project/app-instance/backend/tests/unit/test_litellm_thinking_mode.py

396 lines
11 KiB
Python

from __future__ import annotations
import asyncio
import pytest
from types import SimpleNamespace
from beaver.engine.providers.litellm import LiteLLMProvider
def test_qwen_thinking_mode_is_sent_as_chat_template_kwargs(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict = {}
class Message:
content = "可以"
reasoning_content = ""
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="sk-test",
api_base="https://oai.example.com/v1",
default_model="Qwen3.6-35B",
provider_name="openai",
)
response = asyncio.run(
provider.chat(
[{"role": "user", "content": "只回复可以"}],
model="Qwen3.6-35B",
thinking_enabled=False,
)
)
assert response.content == "可以"
assert captured["extra_body"] == {
"chat_template_kwargs": {"enable_thinking": False},
"thinking": {"type": "disabled"},
}
def test_thinking_mode_disabled_is_sent_without_model_name_matching(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict = {}
class Message:
content = "ok"
reasoning_content = None
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="sk-test",
api_base="https://oai.example.com/v1",
default_model="gpt-4.1-mini",
provider_name="openai",
)
asyncio.run(
provider.chat(
[{"role": "user", "content": "reply ok"}],
model="gpt-4.1-mini",
thinking_enabled=False,
)
)
assert captured["extra_body"] == {
"chat_template_kwargs": {"enable_thinking": False},
"thinking": {"type": "disabled"},
}
def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> None:
messages = [
{
"role": "assistant",
"content": "",
"reasoning_content": "must be passed back",
"tool_calls": [
{
"id": "call-1",
"type": "function",
"function": {"name": "lookup", "arguments": "{}"},
}
],
}
]
assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back"
def test_litellm_provider_merges_late_system_messages_to_front() -> None:
messages = [
{"role": "system", "content": "base"},
{"role": "user", "content": "question"},
{"role": "system", "content": "finalize without tools"},
]
sanitized = LiteLLMProvider._sanitize_messages(messages)
assert [message["role"] for message in sanitized] == ["system", "user"]
assert sanitized[0]["content"] == "base\n\nfinalize without tools"
def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict = {}
class Message:
content = "ok"
reasoning_content = None
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="sk-test",
api_base="https://oai.example.com/v1",
default_model="gpt-4.1-mini",
provider_name="openai",
)
asyncio.run(
provider.chat(
[{"role": "user", "content": "reply ok"}],
model="gpt-4.1-mini",
thinking_enabled=True,
)
)
assert captured["extra_body"] == {
"chat_template_kwargs": {"enable_thinking": False},
"thinking": {"type": "disabled"},
}
def test_mistral_vllm_uses_reasoning_effort_instead_of_qwen_thinking_body(
monkeypatch: pytest.MonkeyPatch,
) -> None:
captured: dict = {}
class Message:
content = "ok"
reasoning_content = None
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="EMPTY",
api_base="http://localhost:8000/v1",
default_model="mistralai/Mistral-Medium-3.5-128B",
provider_name="vllm",
)
asyncio.run(
provider.chat(
[{"role": "user", "content": "reply ok"}],
model="mistralai/Mistral-Medium-3.5-128B",
thinking_enabled=True,
)
)
assert captured["model"] == "hosted_vllm/mistralai/Mistral-Medium-3.5-128B"
assert captured["extra_body"] == {"reasoning_effort": "high"}
def test_mistral_vllm_omits_reasoning_body_when_thinking_mode_is_unspecified(
monkeypatch: pytest.MonkeyPatch,
) -> None:
captured: dict = {}
class Message:
content = "ok"
reasoning_content = None
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="EMPTY",
api_base="http://localhost:8000/v1",
default_model="mistralai/Mistral-Medium-3.5-128B",
provider_name="vllm",
)
asyncio.run(
provider.chat(
[{"role": "user", "content": "reply ok"}],
model="mistralai/Mistral-Medium-3.5-128B",
)
)
assert "extra_body" not in captured
def test_mistral_openai_compatible_private_vllm_uses_reasoning_effort(
monkeypatch: pytest.MonkeyPatch,
) -> None:
captured: dict = {}
class Message:
content = "ok"
reasoning_content = None
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="EMPTY",
api_base="http://172.19.207.103/v1",
default_model="Mistral-Medium-3.5-128B",
provider_name="openai",
)
asyncio.run(
provider.chat(
[{"role": "user", "content": "reply ok"}],
model="Mistral-Medium-3.5-128B",
thinking_enabled=False,
)
)
assert captured["extra_body"] == {"reasoning_effort": "none"}
assert "chat_template_kwargs" not in captured["extra_body"]
assert "thinking" not in captured["extra_body"]
def test_mistral_openai_compatible_private_vllm_omits_body_when_unspecified(
monkeypatch: pytest.MonkeyPatch,
) -> None:
captured: dict = {}
class Message:
content = "ok"
reasoning_content = None
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="EMPTY",
api_base="http://172.19.207.103/v1",
default_model="Mistral-Medium-3.5-128B",
provider_name="openai",
)
asyncio.run(
provider.chat(
[{"role": "user", "content": "reply ok"}],
model="Mistral-Medium-3.5-128B",
)
)
assert "extra_body" not in captured
def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict = {}
class Message:
content = "ok"
reasoning_content = None
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="sk-test",
api_base="https://oai.example.com/v1",
default_model="Qwen3.6-35B",
provider_name="openai",
)
asyncio.run(
provider.chat(
[
{
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call-1",
"type": "function",
"function": {
"name": "cron",
"arguments": {"action": "add", "mode": "notification"},
},
}
],
},
{"role": "tool", "tool_call_id": "call-1", "name": "cron", "content": "done"},
],
model="Qwen3.6-35B",
thinking_enabled=False,
)
)
tool_call = captured["messages"][0]["tool_calls"][0]
assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'