diff --git a/app-instance/backend/beaver/engine/providers/litellm.py b/app-instance/backend/beaver/engine/providers/litellm.py index fab3ded..7fe2e7f 100644 --- a/app-instance/backend/beaver/engine/providers/litellm.py +++ b/app-instance/backend/beaver/engine/providers/litellm.py @@ -3,9 +3,11 @@ from __future__ import annotations from contextlib import contextmanager +from ipaddress import ip_address import json import os from typing import Any +from urllib.parse import urlsplit from .base import LLMProvider, LLMResponse, ToolCallRequest from .registry import find_by_model, find_by_name, find_gateway @@ -26,6 +28,23 @@ except ModuleNotFoundError: # pragma: no cover _ALLOWED_MSG_KEYS = frozenset({"role", "content", "tool_calls", "tool_call_id", "name", "reasoning_content"}) +def _looks_like_local_vllm_api_base(api_base: str | None) -> bool: + if not api_base: + return False + lowered = api_base.lower() + if "vllm" in lowered or "localhost" in lowered: + return True + + host = urlsplit(lowered).hostname or "" + if host in {"127.0.0.1", "::1", "0.0.0.0"}: + return True + try: + parsed_host = ip_address(host) + except ValueError: + return False + return parsed_host.is_private or parsed_host.is_loopback + + class LiteLLMProvider(LLMProvider): """通过 LiteLLM 统一访问大多数 provider。""" @@ -200,10 +219,12 @@ class LiteLLMProvider(LLMProvider): kwargs["extra_body"] = extra_body def _uses_mistral_reasoning_parser(self, original_model: str, resolved_model: str) -> bool: - if self.provider_name != "vllm": - return False model_names = f"{original_model} {resolved_model}".lower() - return "mistral" in model_names + if "mistral" not in model_names: + return False + if self.provider_name == "vllm": + return True + return self.provider_name in {"openai", "custom"} and _looks_like_local_vllm_api_base(self.api_base) async def chat( self, diff --git a/app-instance/backend/tests/unit/test_litellm_thinking_mode.py b/app-instance/backend/tests/unit/test_litellm_thinking_mode.py index 1700d87..8a701ea 100644 --- a/app-instance/backend/tests/unit/test_litellm_thinking_mode.py +++ b/app-instance/backend/tests/unit/test_litellm_thinking_mode.py @@ -253,6 +253,91 @@ def test_mistral_vllm_omits_reasoning_body_when_thinking_mode_is_unspecified( assert "extra_body" not in captured +def test_mistral_openai_compatible_private_vllm_uses_reasoning_effort( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict = {} + + class Message: + content = "ok" + reasoning_content = None + tool_calls = [] + + class Choice: + message = Message() + finish_reason = "stop" + + class Response: + choices = [Choice()] + usage = None + + async def fake_acompletion(**kwargs): + captured.update(kwargs) + return Response() + + monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion) + monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace()) + + provider = LiteLLMProvider( + api_key="EMPTY", + api_base="http://172.19.207.103/v1", + default_model="Mistral-Medium-3.5-128B", + provider_name="openai", + ) + asyncio.run( + provider.chat( + [{"role": "user", "content": "reply ok"}], + model="Mistral-Medium-3.5-128B", + thinking_enabled=False, + ) + ) + + assert captured["extra_body"] == {"reasoning_effort": "none"} + assert "chat_template_kwargs" not in captured["extra_body"] + assert "thinking" not in captured["extra_body"] + + +def test_mistral_openai_compatible_private_vllm_omits_body_when_unspecified( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict = {} + + class Message: + content = "ok" + reasoning_content = None + tool_calls = [] + + class Choice: + message = Message() + finish_reason = "stop" + + class Response: + choices = [Choice()] + usage = None + + async def fake_acompletion(**kwargs): + captured.update(kwargs) + return Response() + + monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion) + monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace()) + + provider = LiteLLMProvider( + api_key="EMPTY", + api_base="http://172.19.207.103/v1", + default_model="Mistral-Medium-3.5-128B", + provider_name="openai", + ) + asyncio.run( + provider.chat( + [{"role": "user", "content": "reply ok"}], + model="Mistral-Medium-3.5-128B", + ) + ) + + assert "extra_body" not in captured + + def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None: captured: dict = {}