fix(providers): avoid chat template body for vllm mistral

2026-06-09 13:19:09 +08:00
parent 9e2c02a333
commit dc4c6f313d
2 changed files with 109 additions and 3 deletions
--- a/app-instance/backend/tests/unit/test_litellm_thinking_mode.py
+++ b/app-instance/backend/tests/unit/test_litellm_thinking_mode.py
@ -253,6 +253,91 @@ def test_mistral_vllm_omits_reasoning_body_when_thinking_mode_is_unspecified(
    assert "extra_body" not in captured


+def test_mistral_openai_compatible_private_vllm_uses_reasoning_effort(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    captured: dict = {}
+
+    class Message:
+        content = "ok"
+        reasoning_content = None
+        tool_calls = []
+
+    class Choice:
+        message = Message()
+        finish_reason = "stop"
+
+    class Response:
+        choices = [Choice()]
+        usage = None
+
+    async def fake_acompletion(**kwargs):
+        captured.update(kwargs)
+        return Response()
+
+    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
+    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
+
+    provider = LiteLLMProvider(
+        api_key="EMPTY",
+        api_base="http://172.19.207.103/v1",
+        default_model="Mistral-Medium-3.5-128B",
+        provider_name="openai",
+    )
+    asyncio.run(
+        provider.chat(
+            [{"role": "user", "content": "reply ok"}],
+            model="Mistral-Medium-3.5-128B",
+            thinking_enabled=False,
+        )
+    )
+
+    assert captured["extra_body"] == {"reasoning_effort": "none"}
+    assert "chat_template_kwargs" not in captured["extra_body"]
+    assert "thinking" not in captured["extra_body"]
+
+
+def test_mistral_openai_compatible_private_vllm_omits_body_when_unspecified(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    captured: dict = {}
+
+    class Message:
+        content = "ok"
+        reasoning_content = None
+        tool_calls = []
+
+    class Choice:
+        message = Message()
+        finish_reason = "stop"
+
+    class Response:
+        choices = [Choice()]
+        usage = None
+
+    async def fake_acompletion(**kwargs):
+        captured.update(kwargs)
+        return Response()
+
+    monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
+    monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
+
+    provider = LiteLLMProvider(
+        api_key="EMPTY",
+        api_base="http://172.19.207.103/v1",
+        default_model="Mistral-Medium-3.5-128B",
+        provider_name="openai",
+    )
+    asyncio.run(
+        provider.chat(
+            [{"role": "user", "content": "reply ok"}],
+            model="Mistral-Medium-3.5-128B",
+        )
+    )
+
+    assert "extra_body" not in captured
+
+
 def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}