diff --git a/config.example.toml b/config.example.toml index 1a3d69d..44a8318 100644 --- a/config.example.toml +++ b/config.example.toml @@ -24,6 +24,17 @@ model = "gpt-4o-mini" api_key = "sk-..." base_url = "https://api.openai.com/v1" +timeout_seconds = 180.0 + +# ── Multimodal LLM ─────────────────────────────────── +# Independent vision/audio-capable chat-completions endpoint for parsing. +[multimodal] +model = "google/gemini-3-flash-preview" +api_key = "sk-..." +base_url = "https://openrouter.ai/api/v1" +timeout_seconds = 180.0 +resize_images_for_vlm = true +max_concurrency = 4 # ── Embedding ───────────────────────────────────────── [embedding] diff --git a/src/everos/component/llm/client.py b/src/everos/component/llm/client.py index 846dcf1..e00ac96 100644 --- a/src/everos/component/llm/client.py +++ b/src/everos/component/llm/client.py @@ -9,9 +9,16 @@ provider) instead of silently failing per-request downstream. from __future__ import annotations +import base64 +import binascii +from io import BytesIO +from typing import Any + from everalgo.llm import build_client from everalgo.llm.config import LLMConfig from everalgo.llm.protocols import LLMClient +from everalgo.llm.types import ChatMessage, ChatResponse, ImageUrlPart, TextPart +from pydantic import BaseModel from everos.config import load_settings from everos.core.observability.logging import get_logger @@ -25,6 +32,212 @@ class LLMNotConfiguredError(RuntimeError): _llm_client: LLMClient | None = None _multimodal_client: LLMClient | None = None +_VLM_IMAGE_MIN_SIDE = 1024 +_NO_THINKING_EXTRA_BODY_KEY = "chat_template_kwargs" +_NO_THINKING_PARAM = {"enable_thinking": False} + +_IMAGE_VISUAL_MEMORY_PROMPT = """Describe this image for visual memory retrieval. + +Output final Markdown directly; do not include reasoning. + +Focus on: +1. Key visible objects and their names, brands, colors, labels, quantities. +2. Spatial relationships and relative positions: left/right/above/below/center, + foreground/background, nearby objects, and supporting surfaces. +3. Location-query facts, e.g. "the milk carton is center-left, to the right of + X and to the left of Y". +4. Important visible text, but extract only useful labels/interface text; do + not exhaustively OCR every key or menu item if that would crowd out object + locations. + +Do NOT describe the parser, assistant, or ChatGPT as processing the image. +If "ChatGPT" is visible, list it only as visible interface text. +""" + + +class _NoThinkingRequestDefaultsClient: + """Inject default no-thinking request params for OpenAI-compatible servers.""" + + def __init__(self, inner: LLMClient) -> None: + self._inner = inner + + async def chat( + self, + messages: list[ChatMessage], + *, + model: str | None = None, + temperature: float | None = None, + max_tokens: int | None = None, + response_format: type[BaseModel] | None = None, + **extra: Any, + ) -> ChatResponse: + return await self._inner.chat( + messages, + model=model, + temperature=temperature, + max_tokens=max_tokens, + response_format=response_format, + **_with_no_thinking_defaults(extra), + ) + + +class _MultimodalImageDetailCompatClient: + """Patch image parts for strict OpenAI-compatible gateways. + + everalgo-core 0.2.0 serialises ``image_url.detail`` as ``None`` when the + field is unset. Some gateways reject that literal null and require one of + OpenAI's enum values. EverOS only uses this wrapper for multimodal parsing. + """ + + def __init__(self, inner: LLMClient, *, resize_images_for_vlm: bool) -> None: + self._inner = inner + self._resize_images_for_vlm = resize_images_for_vlm + + async def chat( + self, + messages: list[ChatMessage], + *, + model: str | None = None, + temperature: float | None = None, + max_tokens: int | None = None, + response_format: type[BaseModel] | None = None, + **extra: Any, + ) -> ChatResponse: + return await self._inner.chat( + [ + _with_multimodal_image_defaults( + m, + resize_images_for_vlm=self._resize_images_for_vlm, + ) + for m in messages + ], + model=model, + temperature=temperature, + max_tokens=max_tokens, + response_format=response_format, + **_with_no_thinking_defaults(extra), + ) + + +def _with_no_thinking_defaults(extra: dict[str, Any]) -> dict[str, Any]: + """Return request kwargs with no-thinking enabled unless caller overrides.""" + patched = dict(extra) + extra_body = dict(patched.get("extra_body") or {}) + chat_template_kwargs = dict(extra_body.get(_NO_THINKING_EXTRA_BODY_KEY) or {}) + chat_template_kwargs.setdefault( + "enable_thinking", _NO_THINKING_PARAM["enable_thinking"] + ) + extra_body[_NO_THINKING_EXTRA_BODY_KEY] = chat_template_kwargs + patched["extra_body"] = extra_body + return patched + + +def _with_multimodal_image_defaults( + message: ChatMessage, *, resize_images_for_vlm: bool = True +) -> ChatMessage: + """Return a copy with stricter-gateway + visual-memory image defaults.""" + content = message.content + if not isinstance(content, list): + return message + + has_image = any(_is_image_part(part) for part in content) + instructions_added = False + changed = False + patched_parts: list[object] = [] + for part in content: + patched = part + if isinstance(part, ImageUrlPart): + image_url_updates: dict[str, object] = {} + if part.image_url.detail is None: + image_url_updates["detail"] = "auto" + if resize_images_for_vlm: + resized_url = _resize_image_data_url(part.image_url.url) + if resized_url != part.image_url.url: + image_url_updates["url"] = resized_url + if image_url_updates: + image_url = part.image_url.model_copy(update=image_url_updates) + patched = part.model_copy(update={"image_url": image_url}) + changed = True + if ( + has_image + and not instructions_added + and isinstance(patched, TextPart) + and patched.text != _IMAGE_VISUAL_MEMORY_PROMPT + ): + patched = patched.model_copy( + update={"text": _IMAGE_VISUAL_MEMORY_PROMPT} + ) + instructions_added = True + changed = True + patched_parts.append(patched) + + if not changed: + return message + return message.model_copy(update={"content": patched_parts}) + + +def _is_image_part(part: object) -> bool: + return ( + isinstance(part, ImageUrlPart) + and part.image_url.url.startswith("data:image/") + ) + + +def _resize_image_data_url(url: str) -> str: + """Resize base64 data-url images so the shorter side is 64 pixels.""" + if not url.startswith("data:image/"): + return url + try: + header, encoded = url.split(",", 1) + except ValueError: + return url + if ";base64" not in header.lower(): + return url + + mime_type = header[5:].split(";", 1)[0].lower() + image_format = { + "image/jpeg": "JPEG", + "image/jpg": "JPEG", + "image/png": "PNG", + "image/webp": "WEBP", + }.get(mime_type) + if image_format is None: + return url + + try: + from PIL import Image, ImageOps + + raw = base64.b64decode(encoded, validate=True) + with Image.open(BytesIO(raw)) as image: + image = ImageOps.exif_transpose(image) + target_size = _image_size_with_min_side( + image.size, _VLM_IMAGE_MIN_SIDE + ) + resized = image.resize(target_size, Image.Resampling.LANCZOS) + if image_format == "JPEG" and resized.mode not in ("RGB", "L"): + resized = resized.convert("RGB") + buffer = BytesIO() + save_kwargs: dict[str, object] = {"format": image_format} + if image_format == "JPEG": + save_kwargs["quality"] = 85 + resized.save(buffer, **save_kwargs) + except (ImportError, ValueError, OSError, binascii.Error): + return url + + resized_encoded = base64.b64encode(buffer.getvalue()).decode("ascii") + return f"{header},{resized_encoded}" + + +def _image_size_with_min_side( + size: tuple[int, int], + min_side: int, +) -> tuple[int, int]: + width, height = size + shortest = min(width, height) + if shortest <= 0: + return (max(1, width), max(1, height)) + scale = min_side / shortest + return (max(1, round(width * scale)), max(1, round(height * scale))) def get_llm_client() -> LLMClient: @@ -46,11 +259,14 @@ def get_llm_client() -> LLMClient: raise LLMNotConfiguredError( "LLM is required; set EVEROS_LLM__API_KEY + EVEROS_LLM__BASE_URL" ) - _llm_client = build_client( - LLMConfig( - model=llm_cfg.model, - api_key=api_key, - base_url=llm_cfg.base_url, + _llm_client = _NoThinkingRequestDefaultsClient( + build_client( + LLMConfig( + model=llm_cfg.model, + api_key=api_key, + base_url=llm_cfg.base_url, + timeout=llm_cfg.timeout_seconds, + ) ) ) logger.info("llm_client_built", model=llm_cfg.model) @@ -78,12 +294,16 @@ def get_multimodal_llm_client() -> LLMClient: "Multimodal LLM is required for parsing; set " "EVEROS_MULTIMODAL__API_KEY + EVEROS_MULTIMODAL__BASE_URL" ) - _multimodal_client = build_client( - LLMConfig( - model=cfg.model, - api_key=api_key, - base_url=cfg.base_url, - ) + _multimodal_client = _MultimodalImageDetailCompatClient( + build_client( + LLMConfig( + model=cfg.model, + api_key=api_key, + base_url=cfg.base_url, + timeout=cfg.timeout_seconds, + ) + ), + resize_images_for_vlm=cfg.resize_images_for_vlm, ) logger.info("multimodal_llm_client_built", model=cfg.model) return _multimodal_client diff --git a/src/everos/component/llm/factory.py b/src/everos/component/llm/factory.py index d0db74f..3c9a944 100644 --- a/src/everos/component/llm/factory.py +++ b/src/everos/component/llm/factory.py @@ -42,4 +42,5 @@ def build_llm_provider(settings: LLMSettings) -> LLMClient: model=settings.model, api_key=settings.api_key.get_secret_value(), base_url=settings.base_url, + timeout=settings.timeout_seconds, ) diff --git a/src/everos/config/default.toml b/src/everos/config/default.toml index 1bea9a5..3f96b06 100644 --- a/src/everos/config/default.toml +++ b/src/everos/config/default.toml @@ -56,17 +56,21 @@ cache_size_kb = 2048 [llm] # Provider-agnostic OpenAI-protocol client config. Override via env: -# EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL +# EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL, EVEROS_LLM__TIMEOUT_SECONDS # Or via a ``.env`` file next to the project root (auto-loaded). model = "gpt-4o-mini" +timeout_seconds = 180.0 # api_key = "" # base_url = "" [multimodal] # Independent LLM for multimodal parsing (everalgo-parser); must accept # image / pdf / audio image_url parts. Override via env: -# EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL +# EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL, +# EVEROS_MULTIMODAL__TIMEOUT_SECONDS, EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM model = "google/gemini-3-flash-preview" +timeout_seconds = 180.0 +resize_images_for_vlm = true max_concurrency = 4 # api_key = "" # base_url = "" diff --git a/src/everos/config/settings.py b/src/everos/config/settings.py index 98337f4..c9b672b 100644 --- a/src/everos/config/settings.py +++ b/src/everos/config/settings.py @@ -121,11 +121,13 @@ class LLMSettings(BaseModel): EVEROS_LLM__MODEL EVEROS_LLM__API_KEY EVEROS_LLM__BASE_URL + EVEROS_LLM__TIMEOUT_SECONDS """ model: str = "gpt-4o-mini" api_key: SecretStr | None = None base_url: str | None = None + timeout_seconds: float = Field(default=180.0, gt=0) class MultimodalSettings(BaseModel): @@ -140,6 +142,8 @@ class MultimodalSettings(BaseModel): EVEROS_MULTIMODAL__MODEL EVEROS_MULTIMODAL__API_KEY EVEROS_MULTIMODAL__BASE_URL + EVEROS_MULTIMODAL__TIMEOUT_SECONDS + EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM EVEROS_MULTIMODAL__MAX_CONCURRENCY EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES @@ -148,6 +152,8 @@ class MultimodalSettings(BaseModel): model: str = "google/gemini-3-flash-preview" api_key: SecretStr | None = None base_url: str | None = None + timeout_seconds: float = Field(default=180.0, gt=0) + resize_images_for_vlm: bool = True max_concurrency: int = 4 # ``file://`` content-item support (read locally by EverOS, not everalgo). diff --git a/src/everos/memory/cascade/watcher.py b/src/everos/memory/cascade/watcher.py index 49f2a9b..a001247 100644 --- a/src/everos/memory/cascade/watcher.py +++ b/src/everos/memory/cascade/watcher.py @@ -47,6 +47,7 @@ class CascadeWatcher: self._observer = Observer() self._handler = _Handler(memory_root, loop) self._started = False + self._observer_started = False def start(self) -> None: if self._started: @@ -54,18 +55,31 @@ class CascadeWatcher: # The memory root is created lazily by other layers; watchdog # rejects non-existent paths so we ensure it exists here. self._memory_root.ensure() - self._observer.schedule( - self._handler, str(self._memory_root.root), recursive=True - ) - self._observer.start() + watch_roots = _watch_roots(self._memory_root.root) + for root in watch_roots: + self._observer.schedule(self._handler, str(root), recursive=True) + if watch_roots: + self._observer.start() + self._observer_started = True + else: + logger.warning( + "cascade_watcher_no_user_visible_roots", + root=str(self._memory_root.root), + ) self._started = True - logger.info("cascade_watcher_started", root=str(self._memory_root.root)) + logger.info( + "cascade_watcher_started", + root=str(self._memory_root.root), + watched_roots=[str(root) for root in watch_roots], + ) def stop(self) -> None: if not self._started: return - self._observer.stop() - self._observer.join(timeout=5) + if self._observer_started: + self._observer.stop() + self._observer.join(timeout=5) + self._observer_started = False self._started = False logger.info("cascade_watcher_stopped") @@ -163,6 +177,22 @@ def _relative_to_root(root: Path, raw: str) -> str | None: return rel.as_posix() +def _watch_roots(root: Path) -> list[Path]: + """Return user-visible top-level dirs to watch, excluding system dot dirs.""" + try: + children = list(root.iterdir()) + except OSError: + return [] + return sorted( + ( + child + for child in children + if child.is_dir() and not child.name.startswith(".") + ), + key=lambda p: p.name, + ) + + def _safe_mtime(raw: str) -> float: """Return mtime in seconds, falling back to 0.0 on stat failure.""" try: diff --git a/src/everos/memory/extract/ingest/multimodal.py b/src/everos/memory/extract/ingest/multimodal.py index 45fc8ab..30ca0bb 100644 --- a/src/everos/memory/extract/ingest/multimodal.py +++ b/src/everos/memory/extract/ingest/multimodal.py @@ -17,6 +17,11 @@ from everos.core.observability.logging import get_logger logger = get_logger(__name__) +_IMAGE_VISUAL_FACTS_NOTE = ( + "Context: image visual facts extracted from an uploaded image; " + "treat these as image content, not assistant actions." +) + def coerce_items( content: str | list[dict[str, Any]] | list[Any], @@ -83,6 +88,8 @@ def _render_item(item: dict[str, Any]) -> str | None: kind = str(item.get("type") or "file").upper() name = item.get("name") or "" tag = f"[{kind}: {name}]" if name else f"[{kind}]" + if kind == "IMAGE": + return f"{tag}\n{_IMAGE_VISUAL_FACTS_NOTE}\n{parsed}" return f"{tag}\n{parsed}" diff --git a/src/everos/templates/env.template b/src/everos/templates/env.template index b287b18..a3878f2 100755 --- a/src/everos/templates/env.template +++ b/src/everos/templates/env.template @@ -33,6 +33,8 @@ EVEROS_LLM__MODEL=openai/gpt-4.1-mini EVEROS_LLM__API_KEY= EVEROS_LLM__BASE_URL=https://openrouter.ai/api/v1 +# Per-request chat-completions timeout in seconds (default 180): +# EVEROS_LLM__TIMEOUT_SECONDS=180 # ─── Multimodal LLM (independent from [llm]; vision/audio capable) ──── @@ -43,6 +45,11 @@ EVEROS_LLM__BASE_URL=https://openrouter.ai/api/v1 EVEROS_MULTIMODAL__MODEL=google/gemini-3-flash-preview EVEROS_MULTIMODAL__API_KEY= EVEROS_MULTIMODAL__BASE_URL=https://openrouter.ai/api/v1 +# Per-request multimodal chat-completions timeout in seconds (default 180): +# EVEROS_MULTIMODAL__TIMEOUT_SECONDS=180 +# Resize inline images to half width/height before sending them to the VLM +# (default true): +# EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM=true # Concurrency cap for parallel multimodal calls (default 4): # EVEROS_MULTIMODAL__MAX_CONCURRENCY=4 # diff --git a/tests/unit/test_component/test_llm/test_client.py b/tests/unit/test_component/test_llm/test_client.py index dd9eff2..397591c 100644 --- a/tests/unit/test_component/test_llm/test_client.py +++ b/tests/unit/test_component/test_llm/test_client.py @@ -2,20 +2,30 @@ from __future__ import annotations +import base64 import importlib +from io import BytesIO import pytest +from everalgo.llm.types import ( + ChatMessage, + ChatResponse, + ImageUrlInner, + ImageUrlPart, + TextPart, +) from pydantic import SecretStr from everos.component.llm import LLMNotConfiguredError from everos.config import Settings -from everos.config.settings import LLMSettings +from everos.config.settings import LLMSettings, MultimodalSettings _client_mod = importlib.import_module("everos.component.llm.client") def _reset_singleton(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(_client_mod, "_llm_client", None, raising=False) + monkeypatch.setattr(_client_mod, "_multimodal_client", None, raising=False) def _patch_settings( @@ -23,6 +33,7 @@ def _patch_settings( *, api_key: str | None, base_url: str | None, + timeout_seconds: float | None = None, ) -> None: """Stub the ``load_settings`` reference bound inside the client module.""" cfg = Settings( @@ -30,11 +41,86 @@ def _patch_settings( model="gpt-4o-mini", api_key=SecretStr(api_key) if api_key is not None else None, base_url=base_url, + **( + {"timeout_seconds": timeout_seconds} + if timeout_seconds is not None + else {} + ), ) ) monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg) +def _patch_multimodal_settings( + monkeypatch: pytest.MonkeyPatch, + *, + api_key: str | None, + base_url: str | None, + timeout_seconds: float | None = None, + resize_images_for_vlm: bool | None = None, +) -> None: + cfg = Settings( + multimodal=MultimodalSettings( + model="vision-model", + api_key=SecretStr(api_key) if api_key is not None else None, + base_url=base_url, + **( + {"timeout_seconds": timeout_seconds} + if timeout_seconds is not None + else {} + ), + **( + {"resize_images_for_vlm": resize_images_for_vlm} + if resize_images_for_vlm is not None + else {} + ), + ) + ) + monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg) + + +class _CapturingLLM: + def __init__(self) -> None: + self.messages: list[ChatMessage] | None = None + self.kwargs: dict[str, object] | None = None + + async def chat( + self, + messages: list[ChatMessage], + **kwargs: object, + ) -> ChatResponse: + self.messages = messages + self.kwargs = kwargs + return ChatResponse(content="ok", model="fake") + + +def _assert_no_thinking_param(kwargs: dict[str, object] | None) -> None: + assert kwargs is not None + extra_body = kwargs.get("extra_body") + assert isinstance(extra_body, dict) + chat_template_kwargs = extra_body.get("chat_template_kwargs") + assert isinstance(chat_template_kwargs, dict) + assert chat_template_kwargs["enable_thinking"] is False + + +def _png_data_url(size: tuple[int, int]) -> str: + from PIL import Image + + image = Image.new("RGB", size, color=(255, 0, 0)) + buffer = BytesIO() + image.save(buffer, format="PNG") + encoded = base64.b64encode(buffer.getvalue()).decode("ascii") + return f"data:image/png;base64,{encoded}" + + +def _data_url_image_size(data_url: str) -> tuple[int, int]: + from PIL import Image + + _, encoded = data_url.split(",", 1) + with Image.open(BytesIO(base64.b64decode(encoded))) as image: + return image.size + + def test_raises_when_api_key_missing(monkeypatch: pytest.MonkeyPatch) -> None: _reset_singleton(monkeypatch) _patch_settings(monkeypatch, api_key=None, base_url="https://example.test") @@ -60,5 +146,295 @@ def test_returns_singleton_when_configured(monkeypatch: pytest.MonkeyPatch) -> N first = _client_mod.get_llm_client() second = _client_mod.get_llm_client() - assert first is sentinel assert first is second + assert first._inner is sentinel + + +@pytest.mark.asyncio +async def test_llm_client_defaults_to_no_thinking_param( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_singleton(monkeypatch) + _patch_settings(monkeypatch, api_key="sk-test", base_url="https://example.test") + captured = _CapturingLLM() + monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured) + + client = _client_mod.get_llm_client() + await client.chat([ChatMessage(role="user", content="hello")]) + + _assert_no_thinking_param(captured.kwargs) + + +def test_llm_client_passes_configured_timeout( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_singleton(monkeypatch) + _patch_settings( + monkeypatch, + api_key="sk-test", + base_url="https://example.test", + timeout_seconds=180.0, + ) + captured_configs = [] + sentinel = object() + + def capture_build_client(cfg): + captured_configs.append(cfg) + return sentinel + + monkeypatch.setattr(_client_mod, "build_client", capture_build_client) + + client = _client_mod.get_llm_client() + assert client._inner is sentinel + assert captured_configs[0].timeout == 180.0 + + +def test_multimodal_client_passes_configured_timeout( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_singleton(monkeypatch) + _patch_multimodal_settings( + monkeypatch, + api_key="sk-test", + base_url="https://example.test", + timeout_seconds=240.0, + ) + captured_configs = [] + sentinel = _CapturingLLM() + + def capture_build_client(cfg): + captured_configs.append(cfg) + return sentinel + + monkeypatch.setattr(_client_mod, "build_client", capture_build_client) + + _client_mod.get_multimodal_llm_client() + assert captured_configs[0].timeout == 240.0 + + +@pytest.mark.asyncio +async def test_multimodal_client_sets_default_image_detail( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_singleton(monkeypatch) + _patch_multimodal_settings( + monkeypatch, + api_key="sk-test", + base_url="https://example.test", + ) + captured = _CapturingLLM() + monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured) + + client = _client_mod.get_multimodal_llm_client() + original = ChatMessage( + role="user", + content=[ + TextPart(text="describe"), + ImageUrlPart( + image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd") + ), + ], + ) + + await client.chat([original], max_tokens=10) + + assert captured.messages is not None + sent_content = captured.messages[0].content + assert isinstance(sent_content, list) + sent_image = sent_content[1] + assert isinstance(sent_image, ImageUrlPart) + assert sent_image.image_url.detail == "auto" + + original_content = original.content + assert isinstance(original_content, list) + original_image = original_content[1] + assert isinstance(original_image, ImageUrlPart) + assert original_image.image_url.detail is None + + +@pytest.mark.asyncio +async def test_multimodal_client_adds_visual_memory_instructions( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_singleton(monkeypatch) + _patch_multimodal_settings( + monkeypatch, + api_key="sk-test", + base_url="https://example.test", + ) + captured = _CapturingLLM() + monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured) + + client = _client_mod.get_multimodal_llm_client() + original = ChatMessage( + role="user", + content=[ + TextPart(text="Read this image and return its content."), + ImageUrlPart( + image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd") + ), + ], + ) + + await client.chat([original], max_tokens=10) + + assert captured.messages is not None + sent_content = captured.messages[0].content + assert isinstance(sent_content, list) + sent_text = sent_content[0] + assert isinstance(sent_text, TextPart) + sent_text_lower = sent_text.text.lower() + assert "spatial relationships" in sent_text_lower + assert "relative positions" in sent_text_lower + assert "Do NOT describe the parser, assistant, or ChatGPT" in sent_text.text + + original_content = original.content + assert isinstance(original_content, list) + original_text = original_content[0] + assert isinstance(original_text, TextPart) + assert "spatial relationships" not in original_text.text + + +@pytest.mark.asyncio +async def test_multimodal_client_defaults_to_no_thinking_param( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_singleton(monkeypatch) + _patch_multimodal_settings( + monkeypatch, + api_key="sk-test", + base_url="https://example.test", + ) + captured = _CapturingLLM() + monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured) + + client = _client_mod.get_multimodal_llm_client() + original = ChatMessage( + role="user", + content=[ + TextPart(text="Read this image and return its content."), + ImageUrlPart( + image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd") + ), + ], + ) + + await client.chat( + [original], + max_tokens=10, + extra_body={"provider": {"only": ["test"]}}, + ) + + _assert_no_thinking_param(captured.kwargs) + assert captured.kwargs is not None + extra_body = captured.kwargs["extra_body"] + assert isinstance(extra_body, dict) + assert extra_body["provider"] == {"only": ["test"]} + assert captured.messages is not None + sent_content = captured.messages[0].content + assert isinstance(sent_content, list) + sent_text = sent_content[0] + assert isinstance(sent_text, TextPart) + assert "/no_think" not in sent_text.text + + +@pytest.mark.asyncio +async def test_multimodal_client_resizes_landscape_image_to_64_min_side_by_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_singleton(monkeypatch) + _patch_multimodal_settings( + monkeypatch, + api_key="sk-test", + base_url="https://example.test", + ) + captured = _CapturingLLM() + monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured) + image_url = _png_data_url((640, 480)) + + client = _client_mod.get_multimodal_llm_client() + original = ChatMessage( + role="user", + content=[ + TextPart(text="describe"), + ImageUrlPart(image_url=ImageUrlInner(url=image_url)), + ], + ) + + await client.chat([original], max_tokens=10) + + assert captured.messages is not None + sent_content = captured.messages[0].content + assert isinstance(sent_content, list) + sent_image = sent_content[1] + assert isinstance(sent_image, ImageUrlPart) + assert _data_url_image_size(sent_image.image_url.url) == (85, 64) + assert _data_url_image_size(image_url) == (640, 480) + + +@pytest.mark.asyncio +async def test_multimodal_client_resizes_portrait_image_to_64_min_side_by_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_singleton(monkeypatch) + _patch_multimodal_settings( + monkeypatch, + api_key="sk-test", + base_url="https://example.test", + ) + captured = _CapturingLLM() + monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured) + image_url = _png_data_url((480, 640)) + + client = _client_mod.get_multimodal_llm_client() + original = ChatMessage( + role="user", + content=[ + TextPart(text="describe"), + ImageUrlPart(image_url=ImageUrlInner(url=image_url)), + ], + ) + + await client.chat([original], max_tokens=10) + + assert captured.messages is not None + sent_content = captured.messages[0].content + assert isinstance(sent_content, list) + sent_image = sent_content[1] + assert isinstance(sent_image, ImageUrlPart) + assert _data_url_image_size(sent_image.image_url.url) == (64, 85) + assert _data_url_image_size(image_url) == (480, 640) + + +@pytest.mark.asyncio +async def test_multimodal_client_keeps_image_when_resize_disabled( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_singleton(monkeypatch) + _patch_multimodal_settings( + monkeypatch, + api_key="sk-test", + base_url="https://example.test", + resize_images_for_vlm=False, + ) + captured = _CapturingLLM() + monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured) + image_url = _png_data_url((640, 480)) + + client = _client_mod.get_multimodal_llm_client() + original = ChatMessage( + role="user", + content=[ + TextPart(text="describe"), + ImageUrlPart(image_url=ImageUrlInner(url=image_url)), + ], + ) + + await client.chat([original], max_tokens=10) + + assert captured.messages is not None + sent_content = captured.messages[0].content + assert isinstance(sent_content, list) + sent_image = sent_content[1] + assert isinstance(sent_image, ImageUrlPart) + assert sent_image.image_url.url == image_url diff --git a/tests/unit/test_component/test_llm/test_factory.py b/tests/unit/test_component/test_llm/test_factory.py index 4329fce..c543dbf 100644 --- a/tests/unit/test_component/test_llm/test_factory.py +++ b/tests/unit/test_component/test_llm/test_factory.py @@ -6,6 +6,7 @@ import pytest from pydantic import SecretStr from everos.component.llm import build_llm_provider +from everos.component.llm import factory as factory_mod from everos.component.llm.openai_provider import OpenAIProvider from everos.config.settings import LLMSettings @@ -26,3 +27,23 @@ def test_builds_openai_provider() -> None: s = LLMSettings(model="m", api_key=SecretStr("k"), base_url="https://x") p = build_llm_provider(s) assert isinstance(p, OpenAIProvider) + + +def test_passes_configured_timeout(monkeypatch: pytest.MonkeyPatch) -> None: + captured_kwargs = {} + sentinel = object() + + def capture_provider(**kwargs): + captured_kwargs.update(kwargs) + return sentinel + + monkeypatch.setattr(factory_mod, "OpenAIProvider", capture_provider) + s = LLMSettings( + model="m", + api_key=SecretStr("k"), + base_url="https://x", + timeout_seconds=240.0, + ) + + assert build_llm_provider(s) is sentinel + assert captured_kwargs["timeout"] == 240.0 diff --git a/tests/unit/test_config/test_settings.py b/tests/unit/test_config/test_settings.py index a8fc5ca..373f967 100644 --- a/tests/unit/test_config/test_settings.py +++ b/tests/unit/test_config/test_settings.py @@ -105,6 +105,9 @@ def test_embedding_rerank_defaults() -> None: assert s.embedding.model is None assert s.embedding.api_key is None assert s.embedding.base_url is None + assert s.llm.timeout_seconds == 180.0 + assert s.multimodal.timeout_seconds == 180.0 + assert s.multimodal.resize_images_for_vlm is True # Runtime knobs come from default.toml. assert s.embedding.timeout_seconds == 30.0 assert s.embedding.max_retries == 3 @@ -126,6 +129,16 @@ def test_embedding_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None: assert s.embedding.batch_size == 32 +def test_llm_timeout_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("EVEROS_LLM__TIMEOUT_SECONDS", "240") + monkeypatch.setenv("EVEROS_MULTIMODAL__TIMEOUT_SECONDS", "300") + monkeypatch.setenv("EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM", "false") + s = Settings() + assert s.llm.timeout_seconds == 240.0 + assert s.multimodal.timeout_seconds == 300.0 + assert s.multimodal.resize_images_for_vlm is False + + def test_rerank_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("EVEROS_RERANK__MODEL", "BAAI/bge-reranker-v2-m3") monkeypatch.setenv("EVEROS_RERANK__MAX_CONCURRENT", "8") diff --git a/tests/unit/test_memory/test_cascade/test_watcher_helpers.py b/tests/unit/test_memory/test_cascade/test_watcher_helpers.py index 772e247..06e4cc8 100644 --- a/tests/unit/test_memory/test_cascade/test_watcher_helpers.py +++ b/tests/unit/test_memory/test_cascade/test_watcher_helpers.py @@ -9,7 +9,11 @@ from __future__ import annotations from pathlib import Path -from everos.memory.cascade.watcher import _relative_to_root, _safe_mtime +from everos.memory.cascade.watcher import ( + _relative_to_root, + _safe_mtime, + _watch_roots, +) def test_relative_to_root_within(tmp_path: Path) -> None: @@ -34,3 +38,14 @@ def test_safe_mtime_existing_path_returns_positive(tmp_path: Path) -> None: f = tmp_path / "f.md" f.write_text("ok") assert _safe_mtime(str(f)) > 0 + + +def test_watch_roots_excludes_system_dot_dirs(tmp_path: Path) -> None: + (tmp_path / ".index" / "lancedb" / "episode").mkdir(parents=True) + (tmp_path / ".tmp").mkdir() + (tmp_path / "default_app" / "default_project" / "users").mkdir(parents=True) + (tmp_path / "default_app" / "default_project" / "agents").mkdir() + + roots = _watch_roots(tmp_path) + + assert roots == [tmp_path / "default_app"] diff --git a/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py b/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py index 827e243..ccdd751 100644 --- a/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py +++ b/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py @@ -21,7 +21,10 @@ def test_derive_text_renders_parsed_nontext_as_tag() -> None: ] text, non_text = derive_text(items) - assert "[IMAGE: p.png]\nOCR TEXT" in text + assert "[IMAGE: p.png]" in text + assert "image visual facts" in text + assert "not assistant actions" in text + assert text.index("image visual facts") < text.index("OCR TEXT") assert text.startswith("before") assert text.endswith("after") assert non_text == 0