396 lines
11 KiB
Python
396 lines
11 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import pytest
|
|
from types import SimpleNamespace
|
|
|
|
from beaver.engine.providers.litellm import LiteLLMProvider
|
|
|
|
|
|
def test_qwen_thinking_mode_is_sent_as_chat_template_kwargs(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
captured: dict = {}
|
|
|
|
class Message:
|
|
content = "可以"
|
|
reasoning_content = ""
|
|
tool_calls = []
|
|
|
|
class Choice:
|
|
message = Message()
|
|
finish_reason = "stop"
|
|
|
|
class Response:
|
|
choices = [Choice()]
|
|
usage = None
|
|
|
|
async def fake_acompletion(**kwargs):
|
|
captured.update(kwargs)
|
|
return Response()
|
|
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
|
|
|
provider = LiteLLMProvider(
|
|
api_key="sk-test",
|
|
api_base="https://oai.example.com/v1",
|
|
default_model="Qwen3.6-35B",
|
|
provider_name="openai",
|
|
)
|
|
response = asyncio.run(
|
|
provider.chat(
|
|
[{"role": "user", "content": "只回复可以"}],
|
|
model="Qwen3.6-35B",
|
|
thinking_enabled=False,
|
|
)
|
|
)
|
|
|
|
assert response.content == "可以"
|
|
assert captured["extra_body"] == {
|
|
"chat_template_kwargs": {"enable_thinking": False},
|
|
"thinking": {"type": "disabled"},
|
|
}
|
|
|
|
|
|
def test_thinking_mode_disabled_is_sent_without_model_name_matching(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
captured: dict = {}
|
|
|
|
class Message:
|
|
content = "ok"
|
|
reasoning_content = None
|
|
tool_calls = []
|
|
|
|
class Choice:
|
|
message = Message()
|
|
finish_reason = "stop"
|
|
|
|
class Response:
|
|
choices = [Choice()]
|
|
usage = None
|
|
|
|
async def fake_acompletion(**kwargs):
|
|
captured.update(kwargs)
|
|
return Response()
|
|
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
|
|
|
provider = LiteLLMProvider(
|
|
api_key="sk-test",
|
|
api_base="https://oai.example.com/v1",
|
|
default_model="gpt-4.1-mini",
|
|
provider_name="openai",
|
|
)
|
|
asyncio.run(
|
|
provider.chat(
|
|
[{"role": "user", "content": "reply ok"}],
|
|
model="gpt-4.1-mini",
|
|
thinking_enabled=False,
|
|
)
|
|
)
|
|
|
|
assert captured["extra_body"] == {
|
|
"chat_template_kwargs": {"enable_thinking": False},
|
|
"thinking": {"type": "disabled"},
|
|
}
|
|
|
|
|
|
def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> None:
|
|
messages = [
|
|
{
|
|
"role": "assistant",
|
|
"content": "",
|
|
"reasoning_content": "must be passed back",
|
|
"tool_calls": [
|
|
{
|
|
"id": "call-1",
|
|
"type": "function",
|
|
"function": {"name": "lookup", "arguments": "{}"},
|
|
}
|
|
],
|
|
}
|
|
]
|
|
|
|
assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back"
|
|
|
|
|
|
def test_litellm_provider_merges_late_system_messages_to_front() -> None:
|
|
messages = [
|
|
{"role": "system", "content": "base"},
|
|
{"role": "user", "content": "question"},
|
|
{"role": "system", "content": "finalize without tools"},
|
|
]
|
|
|
|
sanitized = LiteLLMProvider._sanitize_messages(messages)
|
|
|
|
assert [message["role"] for message in sanitized] == ["system", "user"]
|
|
assert sanitized[0]["content"] == "base\n\nfinalize without tools"
|
|
|
|
|
|
def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
captured: dict = {}
|
|
|
|
class Message:
|
|
content = "ok"
|
|
reasoning_content = None
|
|
tool_calls = []
|
|
|
|
class Choice:
|
|
message = Message()
|
|
finish_reason = "stop"
|
|
|
|
class Response:
|
|
choices = [Choice()]
|
|
usage = None
|
|
|
|
async def fake_acompletion(**kwargs):
|
|
captured.update(kwargs)
|
|
return Response()
|
|
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
|
|
|
provider = LiteLLMProvider(
|
|
api_key="sk-test",
|
|
api_base="https://oai.example.com/v1",
|
|
default_model="gpt-4.1-mini",
|
|
provider_name="openai",
|
|
)
|
|
asyncio.run(
|
|
provider.chat(
|
|
[{"role": "user", "content": "reply ok"}],
|
|
model="gpt-4.1-mini",
|
|
thinking_enabled=True,
|
|
)
|
|
)
|
|
|
|
assert captured["extra_body"] == {
|
|
"chat_template_kwargs": {"enable_thinking": False},
|
|
"thinking": {"type": "disabled"},
|
|
}
|
|
|
|
|
|
def test_mistral_vllm_uses_reasoning_effort_instead_of_qwen_thinking_body(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
captured: dict = {}
|
|
|
|
class Message:
|
|
content = "ok"
|
|
reasoning_content = None
|
|
tool_calls = []
|
|
|
|
class Choice:
|
|
message = Message()
|
|
finish_reason = "stop"
|
|
|
|
class Response:
|
|
choices = [Choice()]
|
|
usage = None
|
|
|
|
async def fake_acompletion(**kwargs):
|
|
captured.update(kwargs)
|
|
return Response()
|
|
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
|
|
|
provider = LiteLLMProvider(
|
|
api_key="EMPTY",
|
|
api_base="http://localhost:8000/v1",
|
|
default_model="mistralai/Mistral-Medium-3.5-128B",
|
|
provider_name="vllm",
|
|
)
|
|
asyncio.run(
|
|
provider.chat(
|
|
[{"role": "user", "content": "reply ok"}],
|
|
model="mistralai/Mistral-Medium-3.5-128B",
|
|
thinking_enabled=True,
|
|
)
|
|
)
|
|
|
|
assert captured["model"] == "hosted_vllm/mistralai/Mistral-Medium-3.5-128B"
|
|
assert captured["extra_body"] == {"reasoning_effort": "high"}
|
|
|
|
|
|
def test_mistral_vllm_omits_reasoning_body_when_thinking_mode_is_unspecified(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
captured: dict = {}
|
|
|
|
class Message:
|
|
content = "ok"
|
|
reasoning_content = None
|
|
tool_calls = []
|
|
|
|
class Choice:
|
|
message = Message()
|
|
finish_reason = "stop"
|
|
|
|
class Response:
|
|
choices = [Choice()]
|
|
usage = None
|
|
|
|
async def fake_acompletion(**kwargs):
|
|
captured.update(kwargs)
|
|
return Response()
|
|
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
|
|
|
provider = LiteLLMProvider(
|
|
api_key="EMPTY",
|
|
api_base="http://localhost:8000/v1",
|
|
default_model="mistralai/Mistral-Medium-3.5-128B",
|
|
provider_name="vllm",
|
|
)
|
|
asyncio.run(
|
|
provider.chat(
|
|
[{"role": "user", "content": "reply ok"}],
|
|
model="mistralai/Mistral-Medium-3.5-128B",
|
|
)
|
|
)
|
|
|
|
assert "extra_body" not in captured
|
|
|
|
|
|
def test_mistral_openai_compatible_private_vllm_uses_reasoning_effort(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
captured: dict = {}
|
|
|
|
class Message:
|
|
content = "ok"
|
|
reasoning_content = None
|
|
tool_calls = []
|
|
|
|
class Choice:
|
|
message = Message()
|
|
finish_reason = "stop"
|
|
|
|
class Response:
|
|
choices = [Choice()]
|
|
usage = None
|
|
|
|
async def fake_acompletion(**kwargs):
|
|
captured.update(kwargs)
|
|
return Response()
|
|
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
|
|
|
provider = LiteLLMProvider(
|
|
api_key="EMPTY",
|
|
api_base="http://172.19.207.103/v1",
|
|
default_model="Mistral-Medium-3.5-128B",
|
|
provider_name="openai",
|
|
)
|
|
asyncio.run(
|
|
provider.chat(
|
|
[{"role": "user", "content": "reply ok"}],
|
|
model="Mistral-Medium-3.5-128B",
|
|
thinking_enabled=False,
|
|
)
|
|
)
|
|
|
|
assert captured["extra_body"] == {"reasoning_effort": "none"}
|
|
assert "chat_template_kwargs" not in captured["extra_body"]
|
|
assert "thinking" not in captured["extra_body"]
|
|
|
|
|
|
def test_mistral_openai_compatible_private_vllm_omits_body_when_unspecified(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
captured: dict = {}
|
|
|
|
class Message:
|
|
content = "ok"
|
|
reasoning_content = None
|
|
tool_calls = []
|
|
|
|
class Choice:
|
|
message = Message()
|
|
finish_reason = "stop"
|
|
|
|
class Response:
|
|
choices = [Choice()]
|
|
usage = None
|
|
|
|
async def fake_acompletion(**kwargs):
|
|
captured.update(kwargs)
|
|
return Response()
|
|
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
|
|
|
provider = LiteLLMProvider(
|
|
api_key="EMPTY",
|
|
api_base="http://172.19.207.103/v1",
|
|
default_model="Mistral-Medium-3.5-128B",
|
|
provider_name="openai",
|
|
)
|
|
asyncio.run(
|
|
provider.chat(
|
|
[{"role": "user", "content": "reply ok"}],
|
|
model="Mistral-Medium-3.5-128B",
|
|
)
|
|
)
|
|
|
|
assert "extra_body" not in captured
|
|
|
|
|
|
def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
captured: dict = {}
|
|
|
|
class Message:
|
|
content = "ok"
|
|
reasoning_content = None
|
|
tool_calls = []
|
|
|
|
class Choice:
|
|
message = Message()
|
|
finish_reason = "stop"
|
|
|
|
class Response:
|
|
choices = [Choice()]
|
|
usage = None
|
|
|
|
async def fake_acompletion(**kwargs):
|
|
captured.update(kwargs)
|
|
return Response()
|
|
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
|
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
|
|
|
provider = LiteLLMProvider(
|
|
api_key="sk-test",
|
|
api_base="https://oai.example.com/v1",
|
|
default_model="Qwen3.6-35B",
|
|
provider_name="openai",
|
|
)
|
|
asyncio.run(
|
|
provider.chat(
|
|
[
|
|
{
|
|
"role": "assistant",
|
|
"content": None,
|
|
"tool_calls": [
|
|
{
|
|
"id": "call-1",
|
|
"type": "function",
|
|
"function": {
|
|
"name": "cron",
|
|
"arguments": {"action": "add", "mode": "notification"},
|
|
},
|
|
}
|
|
],
|
|
},
|
|
{"role": "tool", "tool_call_id": "call-1", "name": "cron", "content": "done"},
|
|
],
|
|
model="Qwen3.6-35B",
|
|
thinking_enabled=False,
|
|
)
|
|
)
|
|
|
|
tool_call = captured["messages"][0]["tool_calls"][0]
|
|
assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'
|