From 030bce8a605538ae4e6f1f21c0b6bf8701448513 Mon Sep 17 00:00:00 2001 From: steven_li Date: Fri, 22 May 2026 17:43:21 +0800 Subject: [PATCH] =?UTF-8?q?feat(litellm):=20=E6=B7=BB=E5=8A=A0=20reasoning?= =?UTF-8?q?=5Fcontent=20=E6=94=AF=E6=8C=81=E5=B9=B6=E5=BC=BA=E5=88=B6?= =?UTF-8?q?=E7=A6=81=E7=94=A8=E6=80=9D=E8=80=83=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 LiteLLMProvider 中添加 "reasoning_content" 到允许的消息键集合中 - 修改 _apply_thinking_mode 方法以强制禁用思考模式,不再基于模型名称判断 - 总是设置 enable_thinking 为 False 并添加 thinking.type: disabled 配置 - 更新相关测试用例验证新的思考模式行为 fix(web): 修复非运行状态下的直接处理逻辑 - 创建 _run_web_direct 辅助函数来处理代理服务的直接提交/处理逻辑 - 当代理服务未运行时使用 process_direct 而不是 submit_direct - 更新 REST 和 WebSocket 接口以使用新的处理逻辑 - 添加相应的单元测试验证非运行状态下使用直接处理 test(config): 添加代理配置重载功能的测试 - 添加 test_reload_agent_config_updates_booted_loop_config 测试函数 - 验证配置文件更新后代理循环能够正确加载新配置 - 测试模型、API 基础地址和 API 密钥的更新 chore(frontend): 默认禁用前端思考模式偏好 - 将前端思考模式存储的默认值从 true 改为 false - 确保窗口未定义时返回 false 而不是 true - 更新本地存储缺失时的默认行为为禁用思考模式 --- .../beaver/engine/providers/litellm.py | 10 +-- .../backend/beaver/interfaces/web/app.py | 10 ++- .../backend/tests/unit/test_config_loader.py | 37 ++++++++++ .../tests/unit/test_litellm_thinking_mode.py | 74 ++++++++++++++++++- .../backend/tests/unit/test_websocket_chat.py | 72 ++++++++++++++++++ app-instance/frontend/app/(app)/page.tsx | 4 +- 6 files changed, 193 insertions(+), 14 deletions(-) diff --git a/app-instance/backend/beaver/engine/providers/litellm.py b/app-instance/backend/beaver/engine/providers/litellm.py index 8b191f0..53532f1 100644 --- a/app-instance/backend/beaver/engine/providers/litellm.py +++ b/app-instance/backend/beaver/engine/providers/litellm.py @@ -23,7 +23,7 @@ except ModuleNotFoundError: # pragma: no cover litellm = None # type: ignore[assignment] acompletion = None # type: ignore[assignment] -_ALLOWED_MSG_KEYS = frozenset({"role", "content", "tool_calls", "tool_call_id", "name"}) +_ALLOWED_MSG_KEYS = frozenset({"role", "content", "tool_calls", "tool_call_id", "name", "reasoning_content"}) class LiteLLMProvider(LLMProvider): @@ -175,15 +175,11 @@ class LiteLLMProvider(LLMProvider): kwargs["provider"] = provider_payload def _apply_thinking_mode(self, original_model: str, resolved_model: str, kwargs: dict[str, Any], enabled: bool | None) -> None: - if enabled is None: - return - model_key = f"{original_model} {resolved_model}".lower() - if "qwen" not in model_key: - return extra_body = dict(kwargs.get("extra_body") or {}) chat_template_kwargs = dict(extra_body.get("chat_template_kwargs") or {}) - chat_template_kwargs["enable_thinking"] = bool(enabled) + chat_template_kwargs["enable_thinking"] = False extra_body["chat_template_kwargs"] = chat_template_kwargs + extra_body["thinking"] = {"type": "disabled"} kwargs["extra_body"] = extra_body async def chat( diff --git a/app-instance/backend/beaver/interfaces/web/app.py b/app-instance/backend/beaver/interfaces/web/app.py index cc06fbc..30ac62c 100644 --- a/app-instance/backend/beaver/interfaces/web/app.py +++ b/app-instance/backend/beaver/interfaces/web/app.py @@ -1745,7 +1745,7 @@ def create_app( } if payload.thinking_enabled is not None: direct_kwargs["thinking_enabled"] = payload.thinking_enabled - result = await agent_service.submit_direct(message, **direct_kwargs) + result = await _run_web_direct(agent_service, message, **direct_kwargs) except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc except RuntimeError as exc: @@ -1855,7 +1855,7 @@ def create_app( websocket_thinking_enabled = _bool_or_none(payload.get("thinking_enabled")) if websocket_thinking_enabled is not None: direct_kwargs["thinking_enabled"] = websocket_thinking_enabled - result = await agent_service.submit_direct(content, **direct_kwargs) + result = await _run_web_direct(agent_service, content, **direct_kwargs) except Exception as exc: await websocket.send_json( { @@ -1940,6 +1940,12 @@ def _session_detail(session_manager: Any, session_id: str, session: dict[str, An } +async def _run_web_direct(agent_service: AgentService, message: str, **kwargs: Any) -> Any: + if agent_service.is_running: + return await agent_service.submit_direct(message, **kwargs) + return await agent_service.process_direct(message, **kwargs) + + def _create_skill_upload_draft(loaded: Any, filename: str, content: bytes) -> dict[str, Any]: try: archive = zipfile.ZipFile(io.BytesIO(content), "r") diff --git a/app-instance/backend/tests/unit/test_config_loader.py b/app-instance/backend/tests/unit/test_config_loader.py index 622660b..43ddb76 100644 --- a/app-instance/backend/tests/unit/test_config_loader.py +++ b/app-instance/backend/tests/unit/test_config_loader.py @@ -4,6 +4,8 @@ from beaver.engine import AgentLoop, EngineLoader from beaver.engine.providers import make_provider_bundle from beaver.engine.providers.litellm import LiteLLMProvider from beaver.foundation.config import load_config +from beaver.interfaces.web.app import _reload_agent_config +from beaver.services.agent_service import AgentService def test_load_config_reads_current_instance_shape(tmp_path) -> None: @@ -124,6 +126,41 @@ def test_agent_loop_config_drives_provider_bundle(tmp_path) -> None: loop.close() +def test_reload_agent_config_updates_booted_loop_config(tmp_path) -> None: + workspace = tmp_path / "workspace" + config_path = tmp_path / "config.json" + config_path.write_text( + json.dumps( + { + "agents": {"defaults": {"workspace": str(workspace), "model": "old-model"}}, + "providers": {"openai": {"apiKey": "sk-test", "apiBase": "https://old.example.com/v1"}}, + } + ), + encoding="utf-8", + ) + service = AgentService(config_path=config_path) + loaded = service.create_loop().boot() + assert loaded.config.default_model == "old-model" + + config_path.write_text( + json.dumps( + { + "agents": {"defaults": {"workspace": str(workspace), "model": "new-model"}}, + "providers": {"openai": {"apiKey": "sk-test", "apiBase": "https://new.example.com/v1"}}, + } + ), + encoding="utf-8", + ) + + _reload_agent_config(service, config_path) + + target = service.create_loop().boot().config.resolve_provider_target() + assert target["model"] == "new-model" + assert target["api_base"] == "https://new.example.com/v1" + assert target["api_key"] == "sk-test" + service.close() + + def test_openai_compatible_qwen_config_keeps_openai_provider() -> None: bundle = make_provider_bundle( model="qwen-plus", diff --git a/app-instance/backend/tests/unit/test_litellm_thinking_mode.py b/app-instance/backend/tests/unit/test_litellm_thinking_mode.py index fad8956..97977ea 100644 --- a/app-instance/backend/tests/unit/test_litellm_thinking_mode.py +++ b/app-instance/backend/tests/unit/test_litellm_thinking_mode.py @@ -45,10 +45,13 @@ def test_qwen_thinking_mode_is_sent_as_chat_template_kwargs(monkeypatch: pytest. ) assert response.content == "可以" - assert captured["extra_body"] == {"chat_template_kwargs": {"enable_thinking": False}} + assert captured["extra_body"] == { + "chat_template_kwargs": {"enable_thinking": False}, + "thinking": {"type": "disabled"}, + } -def test_non_qwen_thinking_mode_is_not_sent(monkeypatch: pytest.MonkeyPatch) -> None: +def test_thinking_mode_disabled_is_sent_without_model_name_matching(monkeypatch: pytest.MonkeyPatch) -> None: captured: dict = {} class Message: @@ -85,7 +88,72 @@ def test_non_qwen_thinking_mode_is_not_sent(monkeypatch: pytest.MonkeyPatch) -> ) ) - assert "extra_body" not in captured + assert captured["extra_body"] == { + "chat_template_kwargs": {"enable_thinking": False}, + "thinking": {"type": "disabled"}, + } + + +def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> None: + messages = [ + { + "role": "assistant", + "content": "", + "reasoning_content": "must be passed back", + "tool_calls": [ + { + "id": "call-1", + "type": "function", + "function": {"name": "lookup", "arguments": "{}"}, + } + ], + } + ] + + assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back" + + +def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None: + captured: dict = {} + + class Message: + content = "ok" + reasoning_content = None + tool_calls = [] + + class Choice: + message = Message() + finish_reason = "stop" + + class Response: + choices = [Choice()] + usage = None + + async def fake_acompletion(**kwargs): + captured.update(kwargs) + return Response() + + monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion) + monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace()) + + provider = LiteLLMProvider( + api_key="sk-test", + api_base="https://oai.example.com/v1", + default_model="gpt-4.1-mini", + provider_name="openai", + ) + asyncio.run( + provider.chat( + [{"role": "user", "content": "reply ok"}], + model="gpt-4.1-mini", + thinking_enabled=True, + ) + ) + + assert captured["extra_body"] == { + "chat_template_kwargs": {"enable_thinking": False}, + "thinking": {"type": "disabled"}, + } def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None: diff --git a/app-instance/backend/tests/unit/test_websocket_chat.py b/app-instance/backend/tests/unit/test_websocket_chat.py index 718b4d3..4dabf5e 100644 --- a/app-instance/backend/tests/unit/test_websocket_chat.py +++ b/app-instance/backend/tests/unit/test_websocket_chat.py @@ -30,6 +30,15 @@ class StubAgentService(AgentService): self.fail = fail self.calls: list[dict[str, Any]] = [] + async def process_direct(self, message: str, **kwargs: Any) -> StubRunResult: # type: ignore[override] + self.calls.append({"message": message, **kwargs}) + if self.fail: + raise RuntimeError("boom") + return StubRunResult( + session_id=kwargs.get("session_id") or "web:default", + output_text=f"echo:{message}", + ) + async def submit_direct(self, message: str, **kwargs: Any) -> StubRunResult: # type: ignore[override] self.calls.append({"message": message, **kwargs}) if self.fail: @@ -40,6 +49,11 @@ class StubAgentService(AgentService): ) +class DirectModeOnlyAgentService(StubAgentService): + async def submit_direct(self, message: str, **kwargs: Any) -> StubRunResult: # type: ignore[override] + raise RuntimeError("submit_direct should not be used when service is not running") + + def test_websocket_ping_pong() -> None: app = create_app(service=StubAgentService(), manage_service_lifecycle=False) @@ -101,6 +115,64 @@ def test_websocket_message_returns_chat_metadata_and_session_updated() -> None: } +def test_websocket_message_uses_direct_processing_when_loop_is_not_running() -> None: + service = DirectModeOnlyAgentService() + app = create_app(service=service, manage_service_lifecycle=False) + + with TestClient(app) as client: + with client.websocket_connect("/ws/web:alpha") as websocket: + websocket.send_json({"type": "message", "content": "hello"}) + assert websocket.receive_json() == {"type": "status", "status": "thinking"} + message = websocket.receive_json() + + assert service.calls == [ + { + "message": "hello", + "session_id": "web:alpha", + "source": "websocket", + "user_id": None, + "title": None, + "execution_context": None, + "model": None, + "provider_name": None, + "embedding_model": None, + "max_tool_iterations": None, + } + ] + assert message["type"] == "message" + assert message["content"] == "echo:hello" + + +def test_rest_chat_uses_direct_processing_when_loop_is_not_running() -> None: + service = DirectModeOnlyAgentService() + app = create_app(service=service, manage_service_lifecycle=False) + + with TestClient(app) as client: + response = client.post("/api/chat", json={"session_id": "web:alpha", "message": "hello"}) + + assert response.status_code == 200 + assert service.calls == [ + { + "message": "hello", + "session_id": "web:alpha", + "source": "web", + "user_id": None, + "title": None, + "execution_context": None, + "model": None, + "provider_name": None, + "embedding_model": None, + "temperature": None, + "max_tokens": None, + "max_tool_iterations": None, + "fallback_target": None, + "auxiliary_target": None, + "embedding_target": None, + } + ] + assert response.json()["output_text"] == "echo:hello" + + def test_websocket_empty_content_returns_error_without_runtime_call() -> None: service = StubAgentService() app = create_app(service=service, manage_service_lifecycle=False) diff --git a/app-instance/frontend/app/(app)/page.tsx b/app-instance/frontend/app/(app)/page.tsx index a2b1994..de05fc8 100644 --- a/app-instance/frontend/app/(app)/page.tsx +++ b/app-instance/frontend/app/(app)/page.tsx @@ -41,10 +41,10 @@ const THINKING_MODE_STORAGE_KEY = 'beaver_chat_thinking_enabled'; function loadThinkingModePreference(): boolean { if (typeof window === 'undefined') { - return true; + return false; } const stored = window.localStorage.getItem(THINKING_MODE_STORAGE_KEY); - return stored == null ? true : stored !== 'false'; + return stored == null ? false : stored !== 'false'; } export default function ChatPage() {