feat(litellm): 添加 reasoning_content 支持并强制禁用思考模式
- 在 LiteLLMProvider 中添加 "reasoning_content" 到允许的消息键集合中 - 修改 _apply_thinking_mode 方法以强制禁用思考模式,不再基于模型名称判断 - 总是设置 enable_thinking 为 False 并添加 thinking.type: disabled 配置 - 更新相关测试用例验证新的思考模式行为 fix(web): 修复非运行状态下的直接处理逻辑 - 创建 _run_web_direct 辅助函数来处理代理服务的直接提交/处理逻辑 - 当代理服务未运行时使用 process_direct 而不是 submit_direct - 更新 REST 和 WebSocket 接口以使用新的处理逻辑 - 添加相应的单元测试验证非运行状态下使用直接处理 test(config): 添加代理配置重载功能的测试 - 添加 test_reload_agent_config_updates_booted_loop_config 测试函数 - 验证配置文件更新后代理循环能够正确加载新配置 - 测试模型、API 基础地址和 API 密钥的更新 chore(frontend): 默认禁用前端思考模式偏好 - 将前端思考模式存储的默认值从 true 改为 false - 确保窗口未定义时返回 false 而不是 true - 更新本地存储缺失时的默认行为为禁用思考模式
This commit is contained in:
@ -23,7 +23,7 @@ except ModuleNotFoundError: # pragma: no cover
|
|||||||
litellm = None # type: ignore[assignment]
|
litellm = None # type: ignore[assignment]
|
||||||
acompletion = None # type: ignore[assignment]
|
acompletion = None # type: ignore[assignment]
|
||||||
|
|
||||||
_ALLOWED_MSG_KEYS = frozenset({"role", "content", "tool_calls", "tool_call_id", "name"})
|
_ALLOWED_MSG_KEYS = frozenset({"role", "content", "tool_calls", "tool_call_id", "name", "reasoning_content"})
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMProvider(LLMProvider):
|
class LiteLLMProvider(LLMProvider):
|
||||||
@ -175,15 +175,11 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
kwargs["provider"] = provider_payload
|
kwargs["provider"] = provider_payload
|
||||||
|
|
||||||
def _apply_thinking_mode(self, original_model: str, resolved_model: str, kwargs: dict[str, Any], enabled: bool | None) -> None:
|
def _apply_thinking_mode(self, original_model: str, resolved_model: str, kwargs: dict[str, Any], enabled: bool | None) -> None:
|
||||||
if enabled is None:
|
|
||||||
return
|
|
||||||
model_key = f"{original_model} {resolved_model}".lower()
|
|
||||||
if "qwen" not in model_key:
|
|
||||||
return
|
|
||||||
extra_body = dict(kwargs.get("extra_body") or {})
|
extra_body = dict(kwargs.get("extra_body") or {})
|
||||||
chat_template_kwargs = dict(extra_body.get("chat_template_kwargs") or {})
|
chat_template_kwargs = dict(extra_body.get("chat_template_kwargs") or {})
|
||||||
chat_template_kwargs["enable_thinking"] = bool(enabled)
|
chat_template_kwargs["enable_thinking"] = False
|
||||||
extra_body["chat_template_kwargs"] = chat_template_kwargs
|
extra_body["chat_template_kwargs"] = chat_template_kwargs
|
||||||
|
extra_body["thinking"] = {"type": "disabled"}
|
||||||
kwargs["extra_body"] = extra_body
|
kwargs["extra_body"] = extra_body
|
||||||
|
|
||||||
async def chat(
|
async def chat(
|
||||||
|
|||||||
@ -1745,7 +1745,7 @@ def create_app(
|
|||||||
}
|
}
|
||||||
if payload.thinking_enabled is not None:
|
if payload.thinking_enabled is not None:
|
||||||
direct_kwargs["thinking_enabled"] = payload.thinking_enabled
|
direct_kwargs["thinking_enabled"] = payload.thinking_enabled
|
||||||
result = await agent_service.submit_direct(message, **direct_kwargs)
|
result = await _run_web_direct(agent_service, message, **direct_kwargs)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||||
except RuntimeError as exc:
|
except RuntimeError as exc:
|
||||||
@ -1855,7 +1855,7 @@ def create_app(
|
|||||||
websocket_thinking_enabled = _bool_or_none(payload.get("thinking_enabled"))
|
websocket_thinking_enabled = _bool_or_none(payload.get("thinking_enabled"))
|
||||||
if websocket_thinking_enabled is not None:
|
if websocket_thinking_enabled is not None:
|
||||||
direct_kwargs["thinking_enabled"] = websocket_thinking_enabled
|
direct_kwargs["thinking_enabled"] = websocket_thinking_enabled
|
||||||
result = await agent_service.submit_direct(content, **direct_kwargs)
|
result = await _run_web_direct(agent_service, content, **direct_kwargs)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
await websocket.send_json(
|
await websocket.send_json(
|
||||||
{
|
{
|
||||||
@ -1940,6 +1940,12 @@ def _session_detail(session_manager: Any, session_id: str, session: dict[str, An
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _run_web_direct(agent_service: AgentService, message: str, **kwargs: Any) -> Any:
|
||||||
|
if agent_service.is_running:
|
||||||
|
return await agent_service.submit_direct(message, **kwargs)
|
||||||
|
return await agent_service.process_direct(message, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _create_skill_upload_draft(loaded: Any, filename: str, content: bytes) -> dict[str, Any]:
|
def _create_skill_upload_draft(loaded: Any, filename: str, content: bytes) -> dict[str, Any]:
|
||||||
try:
|
try:
|
||||||
archive = zipfile.ZipFile(io.BytesIO(content), "r")
|
archive = zipfile.ZipFile(io.BytesIO(content), "r")
|
||||||
|
|||||||
@ -4,6 +4,8 @@ from beaver.engine import AgentLoop, EngineLoader
|
|||||||
from beaver.engine.providers import make_provider_bundle
|
from beaver.engine.providers import make_provider_bundle
|
||||||
from beaver.engine.providers.litellm import LiteLLMProvider
|
from beaver.engine.providers.litellm import LiteLLMProvider
|
||||||
from beaver.foundation.config import load_config
|
from beaver.foundation.config import load_config
|
||||||
|
from beaver.interfaces.web.app import _reload_agent_config
|
||||||
|
from beaver.services.agent_service import AgentService
|
||||||
|
|
||||||
|
|
||||||
def test_load_config_reads_current_instance_shape(tmp_path) -> None:
|
def test_load_config_reads_current_instance_shape(tmp_path) -> None:
|
||||||
@ -124,6 +126,41 @@ def test_agent_loop_config_drives_provider_bundle(tmp_path) -> None:
|
|||||||
loop.close()
|
loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_reload_agent_config_updates_booted_loop_config(tmp_path) -> None:
|
||||||
|
workspace = tmp_path / "workspace"
|
||||||
|
config_path = tmp_path / "config.json"
|
||||||
|
config_path.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"agents": {"defaults": {"workspace": str(workspace), "model": "old-model"}},
|
||||||
|
"providers": {"openai": {"apiKey": "sk-test", "apiBase": "https://old.example.com/v1"}},
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
service = AgentService(config_path=config_path)
|
||||||
|
loaded = service.create_loop().boot()
|
||||||
|
assert loaded.config.default_model == "old-model"
|
||||||
|
|
||||||
|
config_path.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"agents": {"defaults": {"workspace": str(workspace), "model": "new-model"}},
|
||||||
|
"providers": {"openai": {"apiKey": "sk-test", "apiBase": "https://new.example.com/v1"}},
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
_reload_agent_config(service, config_path)
|
||||||
|
|
||||||
|
target = service.create_loop().boot().config.resolve_provider_target()
|
||||||
|
assert target["model"] == "new-model"
|
||||||
|
assert target["api_base"] == "https://new.example.com/v1"
|
||||||
|
assert target["api_key"] == "sk-test"
|
||||||
|
service.close()
|
||||||
|
|
||||||
|
|
||||||
def test_openai_compatible_qwen_config_keeps_openai_provider() -> None:
|
def test_openai_compatible_qwen_config_keeps_openai_provider() -> None:
|
||||||
bundle = make_provider_bundle(
|
bundle = make_provider_bundle(
|
||||||
model="qwen-plus",
|
model="qwen-plus",
|
||||||
|
|||||||
@ -45,10 +45,13 @@ def test_qwen_thinking_mode_is_sent_as_chat_template_kwargs(monkeypatch: pytest.
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert response.content == "可以"
|
assert response.content == "可以"
|
||||||
assert captured["extra_body"] == {"chat_template_kwargs": {"enable_thinking": False}}
|
assert captured["extra_body"] == {
|
||||||
|
"chat_template_kwargs": {"enable_thinking": False},
|
||||||
|
"thinking": {"type": "disabled"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_non_qwen_thinking_mode_is_not_sent(monkeypatch: pytest.MonkeyPatch) -> None:
|
def test_thinking_mode_disabled_is_sent_without_model_name_matching(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
captured: dict = {}
|
captured: dict = {}
|
||||||
|
|
||||||
class Message:
|
class Message:
|
||||||
@ -85,7 +88,72 @@ def test_non_qwen_thinking_mode_is_not_sent(monkeypatch: pytest.MonkeyPatch) ->
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
assert "extra_body" not in captured
|
assert captured["extra_body"] == {
|
||||||
|
"chat_template_kwargs": {"enable_thinking": False},
|
||||||
|
"thinking": {"type": "disabled"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> None:
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"reasoning_content": "must be passed back",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call-1",
|
||||||
|
"type": "function",
|
||||||
|
"function": {"name": "lookup", "arguments": "{}"},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back"
|
||||||
|
|
||||||
|
|
||||||
|
def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
captured: dict = {}
|
||||||
|
|
||||||
|
class Message:
|
||||||
|
content = "ok"
|
||||||
|
reasoning_content = None
|
||||||
|
tool_calls = []
|
||||||
|
|
||||||
|
class Choice:
|
||||||
|
message = Message()
|
||||||
|
finish_reason = "stop"
|
||||||
|
|
||||||
|
class Response:
|
||||||
|
choices = [Choice()]
|
||||||
|
usage = None
|
||||||
|
|
||||||
|
async def fake_acompletion(**kwargs):
|
||||||
|
captured.update(kwargs)
|
||||||
|
return Response()
|
||||||
|
|
||||||
|
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
||||||
|
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
||||||
|
|
||||||
|
provider = LiteLLMProvider(
|
||||||
|
api_key="sk-test",
|
||||||
|
api_base="https://oai.example.com/v1",
|
||||||
|
default_model="gpt-4.1-mini",
|
||||||
|
provider_name="openai",
|
||||||
|
)
|
||||||
|
asyncio.run(
|
||||||
|
provider.chat(
|
||||||
|
[{"role": "user", "content": "reply ok"}],
|
||||||
|
model="gpt-4.1-mini",
|
||||||
|
thinking_enabled=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert captured["extra_body"] == {
|
||||||
|
"chat_template_kwargs": {"enable_thinking": False},
|
||||||
|
"thinking": {"type": "disabled"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
|
def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
|||||||
@ -30,6 +30,15 @@ class StubAgentService(AgentService):
|
|||||||
self.fail = fail
|
self.fail = fail
|
||||||
self.calls: list[dict[str, Any]] = []
|
self.calls: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
async def process_direct(self, message: str, **kwargs: Any) -> StubRunResult: # type: ignore[override]
|
||||||
|
self.calls.append({"message": message, **kwargs})
|
||||||
|
if self.fail:
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
return StubRunResult(
|
||||||
|
session_id=kwargs.get("session_id") or "web:default",
|
||||||
|
output_text=f"echo:{message}",
|
||||||
|
)
|
||||||
|
|
||||||
async def submit_direct(self, message: str, **kwargs: Any) -> StubRunResult: # type: ignore[override]
|
async def submit_direct(self, message: str, **kwargs: Any) -> StubRunResult: # type: ignore[override]
|
||||||
self.calls.append({"message": message, **kwargs})
|
self.calls.append({"message": message, **kwargs})
|
||||||
if self.fail:
|
if self.fail:
|
||||||
@ -40,6 +49,11 @@ class StubAgentService(AgentService):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DirectModeOnlyAgentService(StubAgentService):
|
||||||
|
async def submit_direct(self, message: str, **kwargs: Any) -> StubRunResult: # type: ignore[override]
|
||||||
|
raise RuntimeError("submit_direct should not be used when service is not running")
|
||||||
|
|
||||||
|
|
||||||
def test_websocket_ping_pong() -> None:
|
def test_websocket_ping_pong() -> None:
|
||||||
app = create_app(service=StubAgentService(), manage_service_lifecycle=False)
|
app = create_app(service=StubAgentService(), manage_service_lifecycle=False)
|
||||||
|
|
||||||
@ -101,6 +115,64 @@ def test_websocket_message_returns_chat_metadata_and_session_updated() -> None:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_websocket_message_uses_direct_processing_when_loop_is_not_running() -> None:
|
||||||
|
service = DirectModeOnlyAgentService()
|
||||||
|
app = create_app(service=service, manage_service_lifecycle=False)
|
||||||
|
|
||||||
|
with TestClient(app) as client:
|
||||||
|
with client.websocket_connect("/ws/web:alpha") as websocket:
|
||||||
|
websocket.send_json({"type": "message", "content": "hello"})
|
||||||
|
assert websocket.receive_json() == {"type": "status", "status": "thinking"}
|
||||||
|
message = websocket.receive_json()
|
||||||
|
|
||||||
|
assert service.calls == [
|
||||||
|
{
|
||||||
|
"message": "hello",
|
||||||
|
"session_id": "web:alpha",
|
||||||
|
"source": "websocket",
|
||||||
|
"user_id": None,
|
||||||
|
"title": None,
|
||||||
|
"execution_context": None,
|
||||||
|
"model": None,
|
||||||
|
"provider_name": None,
|
||||||
|
"embedding_model": None,
|
||||||
|
"max_tool_iterations": None,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
assert message["type"] == "message"
|
||||||
|
assert message["content"] == "echo:hello"
|
||||||
|
|
||||||
|
|
||||||
|
def test_rest_chat_uses_direct_processing_when_loop_is_not_running() -> None:
|
||||||
|
service = DirectModeOnlyAgentService()
|
||||||
|
app = create_app(service=service, manage_service_lifecycle=False)
|
||||||
|
|
||||||
|
with TestClient(app) as client:
|
||||||
|
response = client.post("/api/chat", json={"session_id": "web:alpha", "message": "hello"})
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert service.calls == [
|
||||||
|
{
|
||||||
|
"message": "hello",
|
||||||
|
"session_id": "web:alpha",
|
||||||
|
"source": "web",
|
||||||
|
"user_id": None,
|
||||||
|
"title": None,
|
||||||
|
"execution_context": None,
|
||||||
|
"model": None,
|
||||||
|
"provider_name": None,
|
||||||
|
"embedding_model": None,
|
||||||
|
"temperature": None,
|
||||||
|
"max_tokens": None,
|
||||||
|
"max_tool_iterations": None,
|
||||||
|
"fallback_target": None,
|
||||||
|
"auxiliary_target": None,
|
||||||
|
"embedding_target": None,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
assert response.json()["output_text"] == "echo:hello"
|
||||||
|
|
||||||
|
|
||||||
def test_websocket_empty_content_returns_error_without_runtime_call() -> None:
|
def test_websocket_empty_content_returns_error_without_runtime_call() -> None:
|
||||||
service = StubAgentService()
|
service = StubAgentService()
|
||||||
app = create_app(service=service, manage_service_lifecycle=False)
|
app = create_app(service=service, manage_service_lifecycle=False)
|
||||||
|
|||||||
@ -41,10 +41,10 @@ const THINKING_MODE_STORAGE_KEY = 'beaver_chat_thinking_enabled';
|
|||||||
|
|
||||||
function loadThinkingModePreference(): boolean {
|
function loadThinkingModePreference(): boolean {
|
||||||
if (typeof window === 'undefined') {
|
if (typeof window === 'undefined') {
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
const stored = window.localStorage.getItem(THINKING_MODE_STORAGE_KEY);
|
const stored = window.localStorage.getItem(THINKING_MODE_STORAGE_KEY);
|
||||||
return stored == null ? true : stored !== 'false';
|
return stored == null ? false : stored !== 'false';
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function ChatPage() {
|
export default function ChatPage() {
|
||||||
|
|||||||
Reference in New Issue
Block a user