Save local modifications for syncing

2026-06-10 10:05:52 +08:00
parent 9fc6ad20d2
commit 0910affc78
13 changed files with 738 additions and 24 deletions
--- a/config.example.toml
+++ b/config.example.toml
@ -24,6 +24,17 @@
 model    = "gpt-4o-mini"
 api_key  = "sk-..."
 base_url = "https://api.openai.com/v1"
 timeout_seconds = 180.0
 # ── Multimodal LLM ───────────────────────────────────
 # Independent vision/audio-capable chat-completions endpoint for parsing.
 [multimodal]
 model    = "google/gemini-3-flash-preview"
 api_key  = "sk-..."
 base_url = "https://openrouter.ai/api/v1"
 timeout_seconds = 180.0
 resize_images_for_vlm = true
 max_concurrency = 4
 # ── Embedding ─────────────────────────────────────────
 [embedding]
--- a/src/everos/component/llm/client.py
+++ b/src/everos/component/llm/client.py
@ -9,9 +9,16 @@ provider) instead of silently failing per-request downstream.
 from __future__ import annotations
 import base64
 import binascii
 from io import BytesIO
 from typing import Any
 from everalgo.llm import build_client
 from everalgo.llm.config import LLMConfig
 from everalgo.llm.protocols import LLMClient
 from everalgo.llm.types import ChatMessage, ChatResponse, ImageUrlPart, TextPart
 from pydantic import BaseModel
 from everos.config import load_settings
 from everos.core.observability.logging import get_logger
@ -25,6 +32,212 @@ class LLMNotConfiguredError(RuntimeError):
 _llm_client: LLMClient | None = None
 _multimodal_client: LLMClient | None = None
 _VLM_IMAGE_MIN_SIDE = 1024
 _NO_THINKING_EXTRA_BODY_KEY = "chat_template_kwargs"
 _NO_THINKING_PARAM = {"enable_thinking": False}
 _IMAGE_VISUAL_MEMORY_PROMPT = """Describe this image for visual memory retrieval.
 Output final Markdown directly; do not include reasoning.
 Focus on:
 1. Key visible objects and their names, brands, colors, labels, quantities.
 2. Spatial relationships and relative positions: left/right/above/below/center,
   foreground/background, nearby objects, and supporting surfaces.
 3. Location-query facts, e.g. "the milk carton is center-left, to the right of
   X and to the left of Y".
 4. Important visible text, but extract only useful labels/interface text; do
   not exhaustively OCR every key or menu item if that would crowd out object
   locations.
 Do NOT describe the parser, assistant, or ChatGPT as processing the image.
 If "ChatGPT" is visible, list it only as visible interface text.
 """
 class _NoThinkingRequestDefaultsClient:
    """Inject default no-thinking request params for OpenAI-compatible servers."""
    def __init__(self, inner: LLMClient) -> None:
        self._inner = inner
    async def chat(
        self,
        messages: list[ChatMessage],
        *,
        model: str | None = None,
        temperature: float | None = None,
        max_tokens: int | None = None,
        response_format: type[BaseModel] | None = None,
        **extra: Any,
    ) -> ChatResponse:
        return await self._inner.chat(
            messages,
            model=model,
            temperature=temperature,
            max_tokens=max_tokens,
            response_format=response_format,
            **_with_no_thinking_defaults(extra),
        )
 class _MultimodalImageDetailCompatClient:
    """Patch image parts for strict OpenAI-compatible gateways.
    everalgo-core 0.2.0 serialises ``image_url.detail`` as ``None`` when the
    field is unset. Some gateways reject that literal null and require one of
    OpenAI's enum values. EverOS only uses this wrapper for multimodal parsing.
    """
    def __init__(self, inner: LLMClient, *, resize_images_for_vlm: bool) -> None:
        self._inner = inner
        self._resize_images_for_vlm = resize_images_for_vlm
    async def chat(
        self,
        messages: list[ChatMessage],
        *,
        model: str | None = None,
        temperature: float | None = None,
        max_tokens: int | None = None,
        response_format: type[BaseModel] | None = None,
        **extra: Any,
    ) -> ChatResponse:
        return await self._inner.chat(
            [
                _with_multimodal_image_defaults(
                    m,
                    resize_images_for_vlm=self._resize_images_for_vlm,
                )
                for m in messages
            ],
            model=model,
            temperature=temperature,
            max_tokens=max_tokens,
            response_format=response_format,
            **_with_no_thinking_defaults(extra),
        )
 def _with_no_thinking_defaults(extra: dict[str, Any]) -> dict[str, Any]:
    """Return request kwargs with no-thinking enabled unless caller overrides."""
    patched = dict(extra)
    extra_body = dict(patched.get("extra_body") or {})
    chat_template_kwargs = dict(extra_body.get(_NO_THINKING_EXTRA_BODY_KEY) or {})
    chat_template_kwargs.setdefault(
        "enable_thinking", _NO_THINKING_PARAM["enable_thinking"]
    )
    extra_body[_NO_THINKING_EXTRA_BODY_KEY] = chat_template_kwargs
    patched["extra_body"] = extra_body
    return patched
 def _with_multimodal_image_defaults(
    message: ChatMessage, *, resize_images_for_vlm: bool = True
 ) -> ChatMessage:
    """Return a copy with stricter-gateway + visual-memory image defaults."""
    content = message.content
    if not isinstance(content, list):
        return message
    has_image = any(_is_image_part(part) for part in content)
    instructions_added = False
    changed = False
    patched_parts: list[object] = []
    for part in content:
        patched = part
        if isinstance(part, ImageUrlPart):
            image_url_updates: dict[str, object] = {}
            if part.image_url.detail is None:
                image_url_updates["detail"] = "auto"
            if resize_images_for_vlm:
                resized_url = _resize_image_data_url(part.image_url.url)
                if resized_url != part.image_url.url:
                    image_url_updates["url"] = resized_url
            if image_url_updates:
                image_url = part.image_url.model_copy(update=image_url_updates)
                patched = part.model_copy(update={"image_url": image_url})
                changed = True
        if (
            has_image
            and not instructions_added
            and isinstance(patched, TextPart)
            and patched.text != _IMAGE_VISUAL_MEMORY_PROMPT
        ):
            patched = patched.model_copy(
                update={"text": _IMAGE_VISUAL_MEMORY_PROMPT}
            )
            instructions_added = True
            changed = True
        patched_parts.append(patched)
    if not changed:
        return message
    return message.model_copy(update={"content": patched_parts})
 def _is_image_part(part: object) -> bool:
    return (
        isinstance(part, ImageUrlPart)
        and part.image_url.url.startswith("data:image/")
    )
 def _resize_image_data_url(url: str) -> str:
    """Resize base64 data-url images so the shorter side is 64 pixels."""
    if not url.startswith("data:image/"):
        return url
    try:
        header, encoded = url.split(",", 1)
    except ValueError:
        return url
    if ";base64" not in header.lower():
        return url
    mime_type = header[5:].split(";", 1)[0].lower()
    image_format = {
        "image/jpeg": "JPEG",
        "image/jpg": "JPEG",
        "image/png": "PNG",
        "image/webp": "WEBP",
    }.get(mime_type)
    if image_format is None:
        return url
    try:
        from PIL import Image, ImageOps
        raw = base64.b64decode(encoded, validate=True)
        with Image.open(BytesIO(raw)) as image:
            image = ImageOps.exif_transpose(image)
            target_size = _image_size_with_min_side(
                image.size, _VLM_IMAGE_MIN_SIDE
            )
            resized = image.resize(target_size, Image.Resampling.LANCZOS)
            if image_format == "JPEG" and resized.mode not in ("RGB", "L"):
                resized = resized.convert("RGB")
            buffer = BytesIO()
            save_kwargs: dict[str, object] = {"format": image_format}
            if image_format == "JPEG":
                save_kwargs["quality"] = 85
            resized.save(buffer, **save_kwargs)
    except (ImportError, ValueError, OSError, binascii.Error):
        return url
    resized_encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
    return f"{header},{resized_encoded}"
 def _image_size_with_min_side(
    size: tuple[int, int],
    min_side: int,
 ) -> tuple[int, int]:
    width, height = size
    shortest = min(width, height)
    if shortest <= 0:
        return (max(1, width), max(1, height))
    scale = min_side / shortest
    return (max(1, round(width * scale)), max(1, round(height * scale)))
 def get_llm_client() -> LLMClient:
@ -46,11 +259,14 @@ def get_llm_client() -> LLMClient:
        raise LLMNotConfiguredError(
            "LLM is required; set EVEROS_LLM__API_KEY + EVEROS_LLM__BASE_URL"
        )
-    _llm_client = build_client(
+    _llm_client = _NoThinkingRequestDefaultsClient(
-        LLMConfig(
+        build_client(
-            model=llm_cfg.model,
+            LLMConfig(
-            api_key=api_key,
+                model=llm_cfg.model,
-            base_url=llm_cfg.base_url,
+                api_key=api_key,
                base_url=llm_cfg.base_url,
                timeout=llm_cfg.timeout_seconds,
            )
        )
    )
    logger.info("llm_client_built", model=llm_cfg.model)
@ -78,12 +294,16 @@ def get_multimodal_llm_client() -> LLMClient:
            "Multimodal LLM is required for parsing; set "
            "EVEROS_MULTIMODAL__API_KEY + EVEROS_MULTIMODAL__BASE_URL"
        )
-    _multimodal_client = build_client(
+    _multimodal_client = _MultimodalImageDetailCompatClient(
-        LLMConfig(
+        build_client(
-            model=cfg.model,
+            LLMConfig(
-            api_key=api_key,
+                model=cfg.model,
-            base_url=cfg.base_url,
+                api_key=api_key,
-        )
+                base_url=cfg.base_url,
                timeout=cfg.timeout_seconds,
            )
        ),
        resize_images_for_vlm=cfg.resize_images_for_vlm,
    )
    logger.info("multimodal_llm_client_built", model=cfg.model)
    return _multimodal_client
--- a/src/everos/component/llm/factory.py
+++ b/src/everos/component/llm/factory.py
@ -42,4 +42,5 @@ def build_llm_provider(settings: LLMSettings) -> LLMClient:
        model=settings.model,
        api_key=settings.api_key.get_secret_value(),
        base_url=settings.base_url,
        timeout=settings.timeout_seconds,
    )
--- a/src/everos/config/default.toml
+++ b/src/everos/config/default.toml
@ -56,17 +56,21 @@ cache_size_kb = 2048
 [llm]
 # Provider-agnostic OpenAI-protocol client config. Override via env:
-#   EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL
+#   EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL, EVEROS_LLM__TIMEOUT_SECONDS
 # Or via a ``.env`` file next to the project root (auto-loaded).
 model = "gpt-4o-mini"
 timeout_seconds = 180.0
 # api_key = ""
 # base_url = ""
 [multimodal]
 # Independent LLM for multimodal parsing (everalgo-parser); must accept
 # image / pdf / audio image_url parts. Override via env:
-#   EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL
+#   EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL,
 #   EVEROS_MULTIMODAL__TIMEOUT_SECONDS, EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM
 model = "google/gemini-3-flash-preview"
 timeout_seconds = 180.0
 resize_images_for_vlm = true
 max_concurrency = 4
 # api_key = ""
 # base_url = ""
--- a/src/everos/config/settings.py
+++ b/src/everos/config/settings.py
@ -121,11 +121,13 @@ class LLMSettings(BaseModel):
        EVEROS_LLM__MODEL
        EVEROS_LLM__API_KEY
        EVEROS_LLM__BASE_URL
        EVEROS_LLM__TIMEOUT_SECONDS
    """
    model: str = "gpt-4o-mini"
    api_key: SecretStr | None = None
    base_url: str | None = None
    timeout_seconds: float = Field(default=180.0, gt=0)
 class MultimodalSettings(BaseModel):
@ -140,6 +142,8 @@ class MultimodalSettings(BaseModel):
        EVEROS_MULTIMODAL__MODEL
        EVEROS_MULTIMODAL__API_KEY
        EVEROS_MULTIMODAL__BASE_URL
        EVEROS_MULTIMODAL__TIMEOUT_SECONDS
        EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM
        EVEROS_MULTIMODAL__MAX_CONCURRENCY
        EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
        EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
@ -148,6 +152,8 @@ class MultimodalSettings(BaseModel):
    model: str = "google/gemini-3-flash-preview"
    api_key: SecretStr | None = None
    base_url: str | None = None
    timeout_seconds: float = Field(default=180.0, gt=0)
    resize_images_for_vlm: bool = True
    max_concurrency: int = 4
    # ``file://`` content-item support (read locally by EverOS, not everalgo).
--- a/src/everos/memory/cascade/watcher.py
+++ b/src/everos/memory/cascade/watcher.py
@ -47,6 +47,7 @@ class CascadeWatcher:
        self._observer = Observer()
        self._handler = _Handler(memory_root, loop)
        self._started = False
        self._observer_started = False
    def start(self) -> None:
        if self._started:
@ -54,18 +55,31 @@ class CascadeWatcher:
        # The memory root is created lazily by other layers; watchdog
        # rejects non-existent paths so we ensure it exists here.
        self._memory_root.ensure()
-        self._observer.schedule(
+        watch_roots = _watch_roots(self._memory_root.root)
-            self._handler, str(self._memory_root.root), recursive=True
+        for root in watch_roots:
-        )
+            self._observer.schedule(self._handler, str(root), recursive=True)
-        self._observer.start()
+        if watch_roots:
            self._observer.start()
            self._observer_started = True
        else:
            logger.warning(
                "cascade_watcher_no_user_visible_roots",
                root=str(self._memory_root.root),
            )
        self._started = True
-        logger.info("cascade_watcher_started", root=str(self._memory_root.root))
+        logger.info(
            "cascade_watcher_started",
            root=str(self._memory_root.root),
            watched_roots=[str(root) for root in watch_roots],
        )
    def stop(self) -> None:
        if not self._started:
            return
-        self._observer.stop()
+        if self._observer_started:
-        self._observer.join(timeout=5)
+            self._observer.stop()
            self._observer.join(timeout=5)
            self._observer_started = False
        self._started = False
        logger.info("cascade_watcher_stopped")
@ -163,6 +177,22 @@ def _relative_to_root(root: Path, raw: str) -> str | None:
    return rel.as_posix()
 def _watch_roots(root: Path) -> list[Path]:
    """Return user-visible top-level dirs to watch, excluding system dot dirs."""
    try:
        children = list(root.iterdir())
    except OSError:
        return []
    return sorted(
        (
            child
            for child in children
            if child.is_dir() and not child.name.startswith(".")
        ),
        key=lambda p: p.name,
    )
 def _safe_mtime(raw: str) -> float:
    """Return mtime in seconds, falling back to 0.0 on stat failure."""
    try:
--- a/src/everos/memory/extract/ingest/multimodal.py
+++ b/src/everos/memory/extract/ingest/multimodal.py
@ -17,6 +17,11 @@ from everos.core.observability.logging import get_logger
 logger = get_logger(__name__)
 _IMAGE_VISUAL_FACTS_NOTE = (
    "Context: image visual facts extracted from an uploaded image; "
    "treat these as image content, not assistant actions."
 )
 def coerce_items(
    content: str | list[dict[str, Any]] | list[Any],
@ -83,6 +88,8 @@ def _render_item(item: dict[str, Any]) -> str | None:
    kind = str(item.get("type") or "file").upper()
    name = item.get("name") or ""
    tag = f"[{kind}: {name}]" if name else f"[{kind}]"
    if kind == "IMAGE":
        return f"{tag}\n{_IMAGE_VISUAL_FACTS_NOTE}\n{parsed}"
    return f"{tag}\n{parsed}"
--- a/src/everos/templates/env.template
+++ b/src/everos/templates/env.template
@ -33,6 +33,8 @@
 EVEROS_LLM__MODEL=openai/gpt-4.1-mini
 EVEROS_LLM__API_KEY=
 EVEROS_LLM__BASE_URL=https://openrouter.ai/api/v1
 # Per-request chat-completions timeout in seconds (default 180):
 # EVEROS_LLM__TIMEOUT_SECONDS=180
 # ─── Multimodal LLM (independent from [llm]; vision/audio capable) ────
@ -43,6 +45,11 @@ EVEROS_LLM__BASE_URL=https://openrouter.ai/api/v1
 EVEROS_MULTIMODAL__MODEL=google/gemini-3-flash-preview
 EVEROS_MULTIMODAL__API_KEY=
 EVEROS_MULTIMODAL__BASE_URL=https://openrouter.ai/api/v1
 # Per-request multimodal chat-completions timeout in seconds (default 180):
 # EVEROS_MULTIMODAL__TIMEOUT_SECONDS=180
 # Resize inline images to half width/height before sending them to the VLM
 # (default true):
 # EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM=true
 # Concurrency cap for parallel multimodal calls (default 4):
 # EVEROS_MULTIMODAL__MAX_CONCURRENCY=4
 #
--- a/tests/unit/test_component/test_llm/test_client.py
+++ b/tests/unit/test_component/test_llm/test_client.py
@ -2,20 +2,30 @@
 from __future__ import annotations
 import base64
 import importlib
 from io import BytesIO
 import pytest
 from everalgo.llm.types import (
    ChatMessage,
    ChatResponse,
    ImageUrlInner,
    ImageUrlPart,
    TextPart,
 )
 from pydantic import SecretStr
 from everos.component.llm import LLMNotConfiguredError
 from everos.config import Settings
-from everos.config.settings import LLMSettings
+from everos.config.settings import LLMSettings, MultimodalSettings
 _client_mod = importlib.import_module("everos.component.llm.client")
 def _reset_singleton(monkeypatch: pytest.MonkeyPatch) -> None:
    monkeypatch.setattr(_client_mod, "_llm_client", None, raising=False)
    monkeypatch.setattr(_client_mod, "_multimodal_client", None, raising=False)
 def _patch_settings(
@ -23,6 +33,7 @@ def _patch_settings(
    *,
    api_key: str | None,
    base_url: str | None,
    timeout_seconds: float | None = None,
 ) -> None:
    """Stub the ``load_settings`` reference bound inside the client module."""
    cfg = Settings(
@ -30,11 +41,86 @@ def _patch_settings(
            model="gpt-4o-mini",
            api_key=SecretStr(api_key) if api_key is not None else None,
            base_url=base_url,
            **(
                {"timeout_seconds": timeout_seconds}
                if timeout_seconds is not None
                else {}
            ),
        )
    )
    monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg)
 def _patch_multimodal_settings(
    monkeypatch: pytest.MonkeyPatch,
    *,
    api_key: str | None,
    base_url: str | None,
    timeout_seconds: float | None = None,
    resize_images_for_vlm: bool | None = None,
 ) -> None:
    cfg = Settings(
        multimodal=MultimodalSettings(
            model="vision-model",
            api_key=SecretStr(api_key) if api_key is not None else None,
            base_url=base_url,
            **(
                {"timeout_seconds": timeout_seconds}
                if timeout_seconds is not None
                else {}
            ),
            **(
                {"resize_images_for_vlm": resize_images_for_vlm}
                if resize_images_for_vlm is not None
                else {}
            ),
        )
    )
    monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg)
 class _CapturingLLM:
    def __init__(self) -> None:
        self.messages: list[ChatMessage] | None = None
        self.kwargs: dict[str, object] | None = None
    async def chat(
        self,
        messages: list[ChatMessage],
        **kwargs: object,
    ) -> ChatResponse:
        self.messages = messages
        self.kwargs = kwargs
        return ChatResponse(content="ok", model="fake")
 def _assert_no_thinking_param(kwargs: dict[str, object] | None) -> None:
    assert kwargs is not None
    extra_body = kwargs.get("extra_body")
    assert isinstance(extra_body, dict)
    chat_template_kwargs = extra_body.get("chat_template_kwargs")
    assert isinstance(chat_template_kwargs, dict)
    assert chat_template_kwargs["enable_thinking"] is False
 def _png_data_url(size: tuple[int, int]) -> str:
    from PIL import Image
    image = Image.new("RGB", size, color=(255, 0, 0))
    buffer = BytesIO()
    image.save(buffer, format="PNG")
    encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
    return f"data:image/png;base64,{encoded}"
 def _data_url_image_size(data_url: str) -> tuple[int, int]:
    from PIL import Image
    _, encoded = data_url.split(",", 1)
    with Image.open(BytesIO(base64.b64decode(encoded))) as image:
        return image.size
 def test_raises_when_api_key_missing(monkeypatch: pytest.MonkeyPatch) -> None:
    _reset_singleton(monkeypatch)
    _patch_settings(monkeypatch, api_key=None, base_url="https://example.test")
@ -60,5 +146,295 @@ def test_returns_singleton_when_configured(monkeypatch: pytest.MonkeyPatch) -> N
    first = _client_mod.get_llm_client()
    second = _client_mod.get_llm_client()
    assert first is sentinel
    assert first is second
    assert first._inner is sentinel
@pytest.mark.asyncio
 async def test_llm_client_defaults_to_no_thinking_param(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    _reset_singleton(monkeypatch)
    _patch_settings(monkeypatch, api_key="sk-test", base_url="https://example.test")
    captured = _CapturingLLM()
    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
    client = _client_mod.get_llm_client()
    await client.chat([ChatMessage(role="user", content="hello")])
    _assert_no_thinking_param(captured.kwargs)
 def test_llm_client_passes_configured_timeout(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    _reset_singleton(monkeypatch)
    _patch_settings(
        monkeypatch,
        api_key="sk-test",
        base_url="https://example.test",
        timeout_seconds=180.0,
    )
    captured_configs = []
    sentinel = object()
    def capture_build_client(cfg):
        captured_configs.append(cfg)
        return sentinel
    monkeypatch.setattr(_client_mod, "build_client", capture_build_client)
    client = _client_mod.get_llm_client()
    assert client._inner is sentinel
    assert captured_configs[0].timeout == 180.0
 def test_multimodal_client_passes_configured_timeout(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    _reset_singleton(monkeypatch)
    _patch_multimodal_settings(
        monkeypatch,
        api_key="sk-test",
        base_url="https://example.test",
        timeout_seconds=240.0,
    )
    captured_configs = []
    sentinel = _CapturingLLM()
    def capture_build_client(cfg):
        captured_configs.append(cfg)
        return sentinel
    monkeypatch.setattr(_client_mod, "build_client", capture_build_client)
    _client_mod.get_multimodal_llm_client()
    assert captured_configs[0].timeout == 240.0
@pytest.mark.asyncio
 async def test_multimodal_client_sets_default_image_detail(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    _reset_singleton(monkeypatch)
    _patch_multimodal_settings(
        monkeypatch,
        api_key="sk-test",
        base_url="https://example.test",
    )
    captured = _CapturingLLM()
    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
    client = _client_mod.get_multimodal_llm_client()
    original = ChatMessage(
        role="user",
        content=[
            TextPart(text="describe"),
            ImageUrlPart(
                image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd")
            ),
        ],
    )
    await client.chat([original], max_tokens=10)
    assert captured.messages is not None
    sent_content = captured.messages[0].content
    assert isinstance(sent_content, list)
    sent_image = sent_content[1]
    assert isinstance(sent_image, ImageUrlPart)
    assert sent_image.image_url.detail == "auto"
    original_content = original.content
    assert isinstance(original_content, list)
    original_image = original_content[1]
    assert isinstance(original_image, ImageUrlPart)
    assert original_image.image_url.detail is None
@pytest.mark.asyncio
 async def test_multimodal_client_adds_visual_memory_instructions(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    _reset_singleton(monkeypatch)
    _patch_multimodal_settings(
        monkeypatch,
        api_key="sk-test",
        base_url="https://example.test",
    )
    captured = _CapturingLLM()
    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
    client = _client_mod.get_multimodal_llm_client()
    original = ChatMessage(
        role="user",
        content=[
            TextPart(text="Read this image and return its content."),
            ImageUrlPart(
                image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd")
            ),
        ],
    )
    await client.chat([original], max_tokens=10)
    assert captured.messages is not None
    sent_content = captured.messages[0].content
    assert isinstance(sent_content, list)
    sent_text = sent_content[0]
    assert isinstance(sent_text, TextPart)
    sent_text_lower = sent_text.text.lower()
    assert "spatial relationships" in sent_text_lower
    assert "relative positions" in sent_text_lower
    assert "Do NOT describe the parser, assistant, or ChatGPT" in sent_text.text
    original_content = original.content
    assert isinstance(original_content, list)
    original_text = original_content[0]
    assert isinstance(original_text, TextPart)
    assert "spatial relationships" not in original_text.text
@pytest.mark.asyncio
 async def test_multimodal_client_defaults_to_no_thinking_param(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    _reset_singleton(monkeypatch)
    _patch_multimodal_settings(
        monkeypatch,
        api_key="sk-test",
        base_url="https://example.test",
    )
    captured = _CapturingLLM()
    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
    client = _client_mod.get_multimodal_llm_client()
    original = ChatMessage(
        role="user",
        content=[
            TextPart(text="Read this image and return its content."),
            ImageUrlPart(
                image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd")
            ),
        ],
    )
    await client.chat(
        [original],
        max_tokens=10,
        extra_body={"provider": {"only": ["test"]}},
    )
    _assert_no_thinking_param(captured.kwargs)
    assert captured.kwargs is not None
    extra_body = captured.kwargs["extra_body"]
    assert isinstance(extra_body, dict)
    assert extra_body["provider"] == {"only": ["test"]}
    assert captured.messages is not None
    sent_content = captured.messages[0].content
    assert isinstance(sent_content, list)
    sent_text = sent_content[0]
    assert isinstance(sent_text, TextPart)
    assert "/no_think" not in sent_text.text
@pytest.mark.asyncio
 async def test_multimodal_client_resizes_landscape_image_to_64_min_side_by_default(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    _reset_singleton(monkeypatch)
    _patch_multimodal_settings(
        monkeypatch,
        api_key="sk-test",
        base_url="https://example.test",
    )
    captured = _CapturingLLM()
    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
    image_url = _png_data_url((640, 480))
    client = _client_mod.get_multimodal_llm_client()
    original = ChatMessage(
        role="user",
        content=[
            TextPart(text="describe"),
            ImageUrlPart(image_url=ImageUrlInner(url=image_url)),
        ],
    )
    await client.chat([original], max_tokens=10)
    assert captured.messages is not None
    sent_content = captured.messages[0].content
    assert isinstance(sent_content, list)
    sent_image = sent_content[1]
    assert isinstance(sent_image, ImageUrlPart)
    assert _data_url_image_size(sent_image.image_url.url) == (85, 64)
    assert _data_url_image_size(image_url) == (640, 480)
@pytest.mark.asyncio
 async def test_multimodal_client_resizes_portrait_image_to_64_min_side_by_default(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    _reset_singleton(monkeypatch)
    _patch_multimodal_settings(
        monkeypatch,
        api_key="sk-test",
        base_url="https://example.test",
    )
    captured = _CapturingLLM()
    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
    image_url = _png_data_url((480, 640))
    client = _client_mod.get_multimodal_llm_client()
    original = ChatMessage(
        role="user",
        content=[
            TextPart(text="describe"),
            ImageUrlPart(image_url=ImageUrlInner(url=image_url)),
        ],
    )
    await client.chat([original], max_tokens=10)
    assert captured.messages is not None
    sent_content = captured.messages[0].content
    assert isinstance(sent_content, list)
    sent_image = sent_content[1]
    assert isinstance(sent_image, ImageUrlPart)
    assert _data_url_image_size(sent_image.image_url.url) == (64, 85)
    assert _data_url_image_size(image_url) == (480, 640)
@pytest.mark.asyncio
 async def test_multimodal_client_keeps_image_when_resize_disabled(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    _reset_singleton(monkeypatch)
    _patch_multimodal_settings(
        monkeypatch,
        api_key="sk-test",
        base_url="https://example.test",
        resize_images_for_vlm=False,
    )
    captured = _CapturingLLM()
    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
    image_url = _png_data_url((640, 480))
    client = _client_mod.get_multimodal_llm_client()
    original = ChatMessage(
        role="user",
        content=[
            TextPart(text="describe"),
            ImageUrlPart(image_url=ImageUrlInner(url=image_url)),
        ],
    )
    await client.chat([original], max_tokens=10)
    assert captured.messages is not None
    sent_content = captured.messages[0].content
    assert isinstance(sent_content, list)
    sent_image = sent_content[1]
    assert isinstance(sent_image, ImageUrlPart)
    assert sent_image.image_url.url == image_url
--- a/tests/unit/test_component/test_llm/test_factory.py
+++ b/tests/unit/test_component/test_llm/test_factory.py
@ -6,6 +6,7 @@ import pytest
 from pydantic import SecretStr
 from everos.component.llm import build_llm_provider
 from everos.component.llm import factory as factory_mod
 from everos.component.llm.openai_provider import OpenAIProvider
 from everos.config.settings import LLMSettings
@ -26,3 +27,23 @@ def test_builds_openai_provider() -> None:
    s = LLMSettings(model="m", api_key=SecretStr("k"), base_url="https://x")
    p = build_llm_provider(s)
    assert isinstance(p, OpenAIProvider)
 def test_passes_configured_timeout(monkeypatch: pytest.MonkeyPatch) -> None:
    captured_kwargs = {}
    sentinel = object()
    def capture_provider(**kwargs):
        captured_kwargs.update(kwargs)
        return sentinel
    monkeypatch.setattr(factory_mod, "OpenAIProvider", capture_provider)
    s = LLMSettings(
        model="m",
        api_key=SecretStr("k"),
        base_url="https://x",
        timeout_seconds=240.0,
    )
    assert build_llm_provider(s) is sentinel
    assert captured_kwargs["timeout"] == 240.0
--- a/tests/unit/test_config/test_settings.py
+++ b/tests/unit/test_config/test_settings.py
@ -105,6 +105,9 @@ def test_embedding_rerank_defaults() -> None:
    assert s.embedding.model is None
    assert s.embedding.api_key is None
    assert s.embedding.base_url is None
    assert s.llm.timeout_seconds == 180.0
    assert s.multimodal.timeout_seconds == 180.0
    assert s.multimodal.resize_images_for_vlm is True
    # Runtime knobs come from default.toml.
    assert s.embedding.timeout_seconds == 30.0
    assert s.embedding.max_retries == 3
@ -126,6 +129,16 @@ def test_embedding_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
    assert s.embedding.batch_size == 32
 def test_llm_timeout_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
    monkeypatch.setenv("EVEROS_LLM__TIMEOUT_SECONDS", "240")
    monkeypatch.setenv("EVEROS_MULTIMODAL__TIMEOUT_SECONDS", "300")
    monkeypatch.setenv("EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM", "false")
    s = Settings()
    assert s.llm.timeout_seconds == 240.0
    assert s.multimodal.timeout_seconds == 300.0
    assert s.multimodal.resize_images_for_vlm is False
 def test_rerank_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
    monkeypatch.setenv("EVEROS_RERANK__MODEL", "BAAI/bge-reranker-v2-m3")
    monkeypatch.setenv("EVEROS_RERANK__MAX_CONCURRENT", "8")
--- a/tests/unit/test_memory/test_cascade/test_watcher_helpers.py
+++ b/tests/unit/test_memory/test_cascade/test_watcher_helpers.py
@ -9,7 +9,11 @@ from __future__ import annotations
 from pathlib import Path
-from everos.memory.cascade.watcher import _relative_to_root, _safe_mtime
+from everos.memory.cascade.watcher import (
    _relative_to_root,
    _safe_mtime,
    _watch_roots,
 )
 def test_relative_to_root_within(tmp_path: Path) -> None:
@ -34,3 +38,14 @@ def test_safe_mtime_existing_path_returns_positive(tmp_path: Path) -> None:
    f = tmp_path / "f.md"
    f.write_text("ok")
    assert _safe_mtime(str(f)) > 0
 def test_watch_roots_excludes_system_dot_dirs(tmp_path: Path) -> None:
    (tmp_path / ".index" / "lancedb" / "episode").mkdir(parents=True)
    (tmp_path / ".tmp").mkdir()
    (tmp_path / "default_app" / "default_project" / "users").mkdir(parents=True)
    (tmp_path / "default_app" / "default_project" / "agents").mkdir()
    roots = _watch_roots(tmp_path)
    assert roots == [tmp_path / "default_app"]
--- a/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py
+++ b/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py
@ -21,7 +21,10 @@ def test_derive_text_renders_parsed_nontext_as_tag() -> None:
    ]
    text, non_text = derive_text(items)
-    assert "[IMAGE: p.png]\nOCR TEXT" in text
+    assert "[IMAGE: p.png]" in text
    assert "image visual facts" in text
    assert "not assistant actions" in text
    assert text.index("image visual facts") < text.index("OCR TEXT")
    assert text.startswith("before")
    assert text.endswith("after")
    assert non_text == 0