diff --git a/config.example.toml b/config.example.toml
index 1a3d69d..44a8318 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -24,6 +24,17 @@
 model    = "gpt-4o-mini"
 api_key  = "sk-..."
 base_url = "https://api.openai.com/v1"
+timeout_seconds = 180.0
+
+# ── Multimodal LLM ───────────────────────────────────
+# Independent vision/audio-capable chat-completions endpoint for parsing.
+[multimodal]
+model    = "google/gemini-3-flash-preview"
+api_key  = "sk-..."
+base_url = "https://openrouter.ai/api/v1"
+timeout_seconds = 180.0
+resize_images_for_vlm = true
+max_concurrency = 4
 
 # ── Embedding ─────────────────────────────────────────
 [embedding]
diff --git a/src/everos/component/llm/client.py b/src/everos/component/llm/client.py
index 846dcf1..e00ac96 100644
--- a/src/everos/component/llm/client.py
+++ b/src/everos/component/llm/client.py
@@ -9,9 +9,16 @@ provider) instead of silently failing per-request downstream.
 
 from __future__ import annotations
 
+import base64
+import binascii
+from io import BytesIO
+from typing import Any
+
 from everalgo.llm import build_client
 from everalgo.llm.config import LLMConfig
 from everalgo.llm.protocols import LLMClient
+from everalgo.llm.types import ChatMessage, ChatResponse, ImageUrlPart, TextPart
+from pydantic import BaseModel
 
 from everos.config import load_settings
 from everos.core.observability.logging import get_logger
@@ -25,6 +32,212 @@ class LLMNotConfiguredError(RuntimeError):
 
 _llm_client: LLMClient | None = None
 _multimodal_client: LLMClient | None = None
+_VLM_IMAGE_MIN_SIDE = 1024
+_NO_THINKING_EXTRA_BODY_KEY = "chat_template_kwargs"
+_NO_THINKING_PARAM = {"enable_thinking": False}
+
+_IMAGE_VISUAL_MEMORY_PROMPT = """Describe this image for visual memory retrieval.
+
+Output final Markdown directly; do not include reasoning.
+
+Focus on:
+1. Key visible objects and their names, brands, colors, labels, quantities.
+2. Spatial relationships and relative positions: left/right/above/below/center,
+   foreground/background, nearby objects, and supporting surfaces.
+3. Location-query facts, e.g. "the milk carton is center-left, to the right of
+   X and to the left of Y".
+4. Important visible text, but extract only useful labels/interface text; do
+   not exhaustively OCR every key or menu item if that would crowd out object
+   locations.
+
+Do NOT describe the parser, assistant, or ChatGPT as processing the image.
+If "ChatGPT" is visible, list it only as visible interface text.
+"""
+
+
+class _NoThinkingRequestDefaultsClient:
+    """Inject default no-thinking request params for OpenAI-compatible servers."""
+
+    def __init__(self, inner: LLMClient) -> None:
+        self._inner = inner
+
+    async def chat(
+        self,
+        messages: list[ChatMessage],
+        *,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        response_format: type[BaseModel] | None = None,
+        **extra: Any,
+    ) -> ChatResponse:
+        return await self._inner.chat(
+            messages,
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            response_format=response_format,
+            **_with_no_thinking_defaults(extra),
+        )
+
+
+class _MultimodalImageDetailCompatClient:
+    """Patch image parts for strict OpenAI-compatible gateways.
+
+    everalgo-core 0.2.0 serialises ``image_url.detail`` as ``None`` when the
+    field is unset. Some gateways reject that literal null and require one of
+    OpenAI's enum values. EverOS only uses this wrapper for multimodal parsing.
+    """
+
+    def __init__(self, inner: LLMClient, *, resize_images_for_vlm: bool) -> None:
+        self._inner = inner
+        self._resize_images_for_vlm = resize_images_for_vlm
+
+    async def chat(
+        self,
+        messages: list[ChatMessage],
+        *,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        response_format: type[BaseModel] | None = None,
+        **extra: Any,
+    ) -> ChatResponse:
+        return await self._inner.chat(
+            [
+                _with_multimodal_image_defaults(
+                    m,
+                    resize_images_for_vlm=self._resize_images_for_vlm,
+                )
+                for m in messages
+            ],
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            response_format=response_format,
+            **_with_no_thinking_defaults(extra),
+        )
+
+
+def _with_no_thinking_defaults(extra: dict[str, Any]) -> dict[str, Any]:
+    """Return request kwargs with no-thinking enabled unless caller overrides."""
+    patched = dict(extra)
+    extra_body = dict(patched.get("extra_body") or {})
+    chat_template_kwargs = dict(extra_body.get(_NO_THINKING_EXTRA_BODY_KEY) or {})
+    chat_template_kwargs.setdefault(
+        "enable_thinking", _NO_THINKING_PARAM["enable_thinking"]
+    )
+    extra_body[_NO_THINKING_EXTRA_BODY_KEY] = chat_template_kwargs
+    patched["extra_body"] = extra_body
+    return patched
+
+
+def _with_multimodal_image_defaults(
+    message: ChatMessage, *, resize_images_for_vlm: bool = True
+) -> ChatMessage:
+    """Return a copy with stricter-gateway + visual-memory image defaults."""
+    content = message.content
+    if not isinstance(content, list):
+        return message
+
+    has_image = any(_is_image_part(part) for part in content)
+    instructions_added = False
+    changed = False
+    patched_parts: list[object] = []
+    for part in content:
+        patched = part
+        if isinstance(part, ImageUrlPart):
+            image_url_updates: dict[str, object] = {}
+            if part.image_url.detail is None:
+                image_url_updates["detail"] = "auto"
+            if resize_images_for_vlm:
+                resized_url = _resize_image_data_url(part.image_url.url)
+                if resized_url != part.image_url.url:
+                    image_url_updates["url"] = resized_url
+            if image_url_updates:
+                image_url = part.image_url.model_copy(update=image_url_updates)
+                patched = part.model_copy(update={"image_url": image_url})
+                changed = True
+        if (
+            has_image
+            and not instructions_added
+            and isinstance(patched, TextPart)
+            and patched.text != _IMAGE_VISUAL_MEMORY_PROMPT
+        ):
+            patched = patched.model_copy(
+                update={"text": _IMAGE_VISUAL_MEMORY_PROMPT}
+            )
+            instructions_added = True
+            changed = True
+        patched_parts.append(patched)
+
+    if not changed:
+        return message
+    return message.model_copy(update={"content": patched_parts})
+
+
+def _is_image_part(part: object) -> bool:
+    return (
+        isinstance(part, ImageUrlPart)
+        and part.image_url.url.startswith("data:image/")
+    )
+
+
+def _resize_image_data_url(url: str) -> str:
+    """Resize base64 data-url images so the shorter side is 64 pixels."""
+    if not url.startswith("data:image/"):
+        return url
+    try:
+        header, encoded = url.split(",", 1)
+    except ValueError:
+        return url
+    if ";base64" not in header.lower():
+        return url
+
+    mime_type = header[5:].split(";", 1)[0].lower()
+    image_format = {
+        "image/jpeg": "JPEG",
+        "image/jpg": "JPEG",
+        "image/png": "PNG",
+        "image/webp": "WEBP",
+    }.get(mime_type)
+    if image_format is None:
+        return url
+
+    try:
+        from PIL import Image, ImageOps
+
+        raw = base64.b64decode(encoded, validate=True)
+        with Image.open(BytesIO(raw)) as image:
+            image = ImageOps.exif_transpose(image)
+            target_size = _image_size_with_min_side(
+                image.size, _VLM_IMAGE_MIN_SIDE
+            )
+            resized = image.resize(target_size, Image.Resampling.LANCZOS)
+            if image_format == "JPEG" and resized.mode not in ("RGB", "L"):
+                resized = resized.convert("RGB")
+            buffer = BytesIO()
+            save_kwargs: dict[str, object] = {"format": image_format}
+            if image_format == "JPEG":
+                save_kwargs["quality"] = 85
+            resized.save(buffer, **save_kwargs)
+    except (ImportError, ValueError, OSError, binascii.Error):
+        return url
+
+    resized_encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
+    return f"{header},{resized_encoded}"
+
+
+def _image_size_with_min_side(
+    size: tuple[int, int],
+    min_side: int,
+) -> tuple[int, int]:
+    width, height = size
+    shortest = min(width, height)
+    if shortest <= 0:
+        return (max(1, width), max(1, height))
+    scale = min_side / shortest
+    return (max(1, round(width * scale)), max(1, round(height * scale)))
 
 
 def get_llm_client() -> LLMClient:
@@ -46,11 +259,14 @@ def get_llm_client() -> LLMClient:
         raise LLMNotConfiguredError(
             "LLM is required; set EVEROS_LLM__API_KEY + EVEROS_LLM__BASE_URL"
         )
-    _llm_client = build_client(
-        LLMConfig(
-            model=llm_cfg.model,
-            api_key=api_key,
-            base_url=llm_cfg.base_url,
+    _llm_client = _NoThinkingRequestDefaultsClient(
+        build_client(
+            LLMConfig(
+                model=llm_cfg.model,
+                api_key=api_key,
+                base_url=llm_cfg.base_url,
+                timeout=llm_cfg.timeout_seconds,
+            )
         )
     )
     logger.info("llm_client_built", model=llm_cfg.model)
@@ -78,12 +294,16 @@ def get_multimodal_llm_client() -> LLMClient:
             "Multimodal LLM is required for parsing; set "
             "EVEROS_MULTIMODAL__API_KEY + EVEROS_MULTIMODAL__BASE_URL"
         )
-    _multimodal_client = build_client(
-        LLMConfig(
-            model=cfg.model,
-            api_key=api_key,
-            base_url=cfg.base_url,
-        )
+    _multimodal_client = _MultimodalImageDetailCompatClient(
+        build_client(
+            LLMConfig(
+                model=cfg.model,
+                api_key=api_key,
+                base_url=cfg.base_url,
+                timeout=cfg.timeout_seconds,
+            )
+        ),
+        resize_images_for_vlm=cfg.resize_images_for_vlm,
     )
     logger.info("multimodal_llm_client_built", model=cfg.model)
     return _multimodal_client
diff --git a/src/everos/component/llm/factory.py b/src/everos/component/llm/factory.py
index d0db74f..3c9a944 100644
--- a/src/everos/component/llm/factory.py
+++ b/src/everos/component/llm/factory.py
@@ -42,4 +42,5 @@ def build_llm_provider(settings: LLMSettings) -> LLMClient:
         model=settings.model,
         api_key=settings.api_key.get_secret_value(),
         base_url=settings.base_url,
+        timeout=settings.timeout_seconds,
     )
diff --git a/src/everos/config/default.toml b/src/everos/config/default.toml
index 1bea9a5..3f96b06 100644
--- a/src/everos/config/default.toml
+++ b/src/everos/config/default.toml
@@ -56,17 +56,21 @@ cache_size_kb = 2048
 
 [llm]
 # Provider-agnostic OpenAI-protocol client config. Override via env:
-#   EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL
+#   EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL, EVEROS_LLM__TIMEOUT_SECONDS
 # Or via a ``.env`` file next to the project root (auto-loaded).
 model = "gpt-4o-mini"
+timeout_seconds = 180.0
 # api_key = ""
 # base_url = ""
 
 [multimodal]
 # Independent LLM for multimodal parsing (everalgo-parser); must accept
 # image / pdf / audio image_url parts. Override via env:
-#   EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL
+#   EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL,
+#   EVEROS_MULTIMODAL__TIMEOUT_SECONDS, EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM
 model = "google/gemini-3-flash-preview"
+timeout_seconds = 180.0
+resize_images_for_vlm = true
 max_concurrency = 4
 # api_key = ""
 # base_url = ""
diff --git a/src/everos/config/settings.py b/src/everos/config/settings.py
index 98337f4..c9b672b 100644
--- a/src/everos/config/settings.py
+++ b/src/everos/config/settings.py
@@ -121,11 +121,13 @@ class LLMSettings(BaseModel):
         EVEROS_LLM__MODEL
         EVEROS_LLM__API_KEY
         EVEROS_LLM__BASE_URL
+        EVEROS_LLM__TIMEOUT_SECONDS
     """
 
     model: str = "gpt-4o-mini"
     api_key: SecretStr | None = None
     base_url: str | None = None
+    timeout_seconds: float = Field(default=180.0, gt=0)
 
 
 class MultimodalSettings(BaseModel):
@@ -140,6 +142,8 @@ class MultimodalSettings(BaseModel):
         EVEROS_MULTIMODAL__MODEL
         EVEROS_MULTIMODAL__API_KEY
         EVEROS_MULTIMODAL__BASE_URL
+        EVEROS_MULTIMODAL__TIMEOUT_SECONDS
+        EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM
         EVEROS_MULTIMODAL__MAX_CONCURRENCY
         EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
         EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
@@ -148,6 +152,8 @@ class MultimodalSettings(BaseModel):
     model: str = "google/gemini-3-flash-preview"
     api_key: SecretStr | None = None
     base_url: str | None = None
+    timeout_seconds: float = Field(default=180.0, gt=0)
+    resize_images_for_vlm: bool = True
     max_concurrency: int = 4
 
     # ``file://`` content-item support (read locally by EverOS, not everalgo).
diff --git a/src/everos/memory/cascade/watcher.py b/src/everos/memory/cascade/watcher.py
index 49f2a9b..a001247 100644
--- a/src/everos/memory/cascade/watcher.py
+++ b/src/everos/memory/cascade/watcher.py
@@ -47,6 +47,7 @@ class CascadeWatcher:
         self._observer = Observer()
         self._handler = _Handler(memory_root, loop)
         self._started = False
+        self._observer_started = False
 
     def start(self) -> None:
         if self._started:
@@ -54,18 +55,31 @@ class CascadeWatcher:
         # The memory root is created lazily by other layers; watchdog
         # rejects non-existent paths so we ensure it exists here.
         self._memory_root.ensure()
-        self._observer.schedule(
-            self._handler, str(self._memory_root.root), recursive=True
-        )
-        self._observer.start()
+        watch_roots = _watch_roots(self._memory_root.root)
+        for root in watch_roots:
+            self._observer.schedule(self._handler, str(root), recursive=True)
+        if watch_roots:
+            self._observer.start()
+            self._observer_started = True
+        else:
+            logger.warning(
+                "cascade_watcher_no_user_visible_roots",
+                root=str(self._memory_root.root),
+            )
         self._started = True
-        logger.info("cascade_watcher_started", root=str(self._memory_root.root))
+        logger.info(
+            "cascade_watcher_started",
+            root=str(self._memory_root.root),
+            watched_roots=[str(root) for root in watch_roots],
+        )
 
     def stop(self) -> None:
         if not self._started:
             return
-        self._observer.stop()
-        self._observer.join(timeout=5)
+        if self._observer_started:
+            self._observer.stop()
+            self._observer.join(timeout=5)
+            self._observer_started = False
         self._started = False
         logger.info("cascade_watcher_stopped")
 
@@ -163,6 +177,22 @@ def _relative_to_root(root: Path, raw: str) -> str | None:
     return rel.as_posix()
 
 
+def _watch_roots(root: Path) -> list[Path]:
+    """Return user-visible top-level dirs to watch, excluding system dot dirs."""
+    try:
+        children = list(root.iterdir())
+    except OSError:
+        return []
+    return sorted(
+        (
+            child
+            for child in children
+            if child.is_dir() and not child.name.startswith(".")
+        ),
+        key=lambda p: p.name,
+    )
+
+
 def _safe_mtime(raw: str) -> float:
     """Return mtime in seconds, falling back to 0.0 on stat failure."""
     try:
diff --git a/src/everos/memory/extract/ingest/multimodal.py b/src/everos/memory/extract/ingest/multimodal.py
index 45fc8ab..30ca0bb 100644
--- a/src/everos/memory/extract/ingest/multimodal.py
+++ b/src/everos/memory/extract/ingest/multimodal.py
@@ -17,6 +17,11 @@ from everos.core.observability.logging import get_logger
 
 logger = get_logger(__name__)
 
+_IMAGE_VISUAL_FACTS_NOTE = (
+    "Context: image visual facts extracted from an uploaded image; "
+    "treat these as image content, not assistant actions."
+)
+
 
 def coerce_items(
     content: str | list[dict[str, Any]] | list[Any],
@@ -83,6 +88,8 @@ def _render_item(item: dict[str, Any]) -> str | None:
     kind = str(item.get("type") or "file").upper()
     name = item.get("name") or ""
     tag = f"[{kind}: {name}]" if name else f"[{kind}]"
+    if kind == "IMAGE":
+        return f"{tag}\n{_IMAGE_VISUAL_FACTS_NOTE}\n{parsed}"
     return f"{tag}\n{parsed}"
 
 
diff --git a/src/everos/templates/env.template b/src/everos/templates/env.template
index b287b18..a3878f2 100755
--- a/src/everos/templates/env.template
+++ b/src/everos/templates/env.template
@@ -33,6 +33,8 @@
 EVEROS_LLM__MODEL=openai/gpt-4.1-mini
 EVEROS_LLM__API_KEY=
 EVEROS_LLM__BASE_URL=https://openrouter.ai/api/v1
+# Per-request chat-completions timeout in seconds (default 180):
+# EVEROS_LLM__TIMEOUT_SECONDS=180
 
 
 # ─── Multimodal LLM (independent from [llm]; vision/audio capable) ────
@@ -43,6 +45,11 @@ EVEROS_LLM__BASE_URL=https://openrouter.ai/api/v1
 EVEROS_MULTIMODAL__MODEL=google/gemini-3-flash-preview
 EVEROS_MULTIMODAL__API_KEY=
 EVEROS_MULTIMODAL__BASE_URL=https://openrouter.ai/api/v1
+# Per-request multimodal chat-completions timeout in seconds (default 180):
+# EVEROS_MULTIMODAL__TIMEOUT_SECONDS=180
+# Resize inline images to half width/height before sending them to the VLM
+# (default true):
+# EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM=true
 # Concurrency cap for parallel multimodal calls (default 4):
 # EVEROS_MULTIMODAL__MAX_CONCURRENCY=4
 #
diff --git a/tests/unit/test_component/test_llm/test_client.py b/tests/unit/test_component/test_llm/test_client.py
index dd9eff2..397591c 100644
--- a/tests/unit/test_component/test_llm/test_client.py
+++ b/tests/unit/test_component/test_llm/test_client.py
@@ -2,20 +2,30 @@
 
 from __future__ import annotations
 
+import base64
 import importlib
+from io import BytesIO
 
 import pytest
+from everalgo.llm.types import (
+    ChatMessage,
+    ChatResponse,
+    ImageUrlInner,
+    ImageUrlPart,
+    TextPart,
+)
 from pydantic import SecretStr
 
 from everos.component.llm import LLMNotConfiguredError
 from everos.config import Settings
-from everos.config.settings import LLMSettings
+from everos.config.settings import LLMSettings, MultimodalSettings
 
 _client_mod = importlib.import_module("everos.component.llm.client")
 
 
 def _reset_singleton(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.setattr(_client_mod, "_llm_client", None, raising=False)
+    monkeypatch.setattr(_client_mod, "_multimodal_client", None, raising=False)
 
 
 def _patch_settings(
@@ -23,6 +33,7 @@ def _patch_settings(
     *,
     api_key: str | None,
     base_url: str | None,
+    timeout_seconds: float | None = None,
 ) -> None:
     """Stub the ``load_settings`` reference bound inside the client module."""
     cfg = Settings(
@@ -30,11 +41,86 @@ def _patch_settings(
             model="gpt-4o-mini",
             api_key=SecretStr(api_key) if api_key is not None else None,
             base_url=base_url,
+            **(
+                {"timeout_seconds": timeout_seconds}
+                if timeout_seconds is not None
+                else {}
+            ),
         )
     )
     monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg)
 
 
+def _patch_multimodal_settings(
+    monkeypatch: pytest.MonkeyPatch,
+    *,
+    api_key: str | None,
+    base_url: str | None,
+    timeout_seconds: float | None = None,
+    resize_images_for_vlm: bool | None = None,
+) -> None:
+    cfg = Settings(
+        multimodal=MultimodalSettings(
+            model="vision-model",
+            api_key=SecretStr(api_key) if api_key is not None else None,
+            base_url=base_url,
+            **(
+                {"timeout_seconds": timeout_seconds}
+                if timeout_seconds is not None
+                else {}
+            ),
+            **(
+                {"resize_images_for_vlm": resize_images_for_vlm}
+                if resize_images_for_vlm is not None
+                else {}
+            ),
+        )
+    )
+    monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg)
+
+
+class _CapturingLLM:
+    def __init__(self) -> None:
+        self.messages: list[ChatMessage] | None = None
+        self.kwargs: dict[str, object] | None = None
+
+    async def chat(
+        self,
+        messages: list[ChatMessage],
+        **kwargs: object,
+    ) -> ChatResponse:
+        self.messages = messages
+        self.kwargs = kwargs
+        return ChatResponse(content="ok", model="fake")
+
+
+def _assert_no_thinking_param(kwargs: dict[str, object] | None) -> None:
+    assert kwargs is not None
+    extra_body = kwargs.get("extra_body")
+    assert isinstance(extra_body, dict)
+    chat_template_kwargs = extra_body.get("chat_template_kwargs")
+    assert isinstance(chat_template_kwargs, dict)
+    assert chat_template_kwargs["enable_thinking"] is False
+
+
+def _png_data_url(size: tuple[int, int]) -> str:
+    from PIL import Image
+
+    image = Image.new("RGB", size, color=(255, 0, 0))
+    buffer = BytesIO()
+    image.save(buffer, format="PNG")
+    encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
+    return f"data:image/png;base64,{encoded}"
+
+
+def _data_url_image_size(data_url: str) -> tuple[int, int]:
+    from PIL import Image
+
+    _, encoded = data_url.split(",", 1)
+    with Image.open(BytesIO(base64.b64decode(encoded))) as image:
+        return image.size
+
+
 def test_raises_when_api_key_missing(monkeypatch: pytest.MonkeyPatch) -> None:
     _reset_singleton(monkeypatch)
     _patch_settings(monkeypatch, api_key=None, base_url="https://example.test")
@@ -60,5 +146,295 @@ def test_returns_singleton_when_configured(monkeypatch: pytest.MonkeyPatch) -> N
     first = _client_mod.get_llm_client()
     second = _client_mod.get_llm_client()
 
-    assert first is sentinel
     assert first is second
+    assert first._inner is sentinel
+
+
+@pytest.mark.asyncio
+async def test_llm_client_defaults_to_no_thinking_param(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_settings(monkeypatch, api_key="sk-test", base_url="https://example.test")
+    captured = _CapturingLLM()
+    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
+
+    client = _client_mod.get_llm_client()
+    await client.chat([ChatMessage(role="user", content="hello")])
+
+    _assert_no_thinking_param(captured.kwargs)
+
+
+def test_llm_client_passes_configured_timeout(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_settings(
+        monkeypatch,
+        api_key="sk-test",
+        base_url="https://example.test",
+        timeout_seconds=180.0,
+    )
+    captured_configs = []
+    sentinel = object()
+
+    def capture_build_client(cfg):
+        captured_configs.append(cfg)
+        return sentinel
+
+    monkeypatch.setattr(_client_mod, "build_client", capture_build_client)
+
+    client = _client_mod.get_llm_client()
+    assert client._inner is sentinel
+    assert captured_configs[0].timeout == 180.0
+
+
+def test_multimodal_client_passes_configured_timeout(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_multimodal_settings(
+        monkeypatch,
+        api_key="sk-test",
+        base_url="https://example.test",
+        timeout_seconds=240.0,
+    )
+    captured_configs = []
+    sentinel = _CapturingLLM()
+
+    def capture_build_client(cfg):
+        captured_configs.append(cfg)
+        return sentinel
+
+    monkeypatch.setattr(_client_mod, "build_client", capture_build_client)
+
+    _client_mod.get_multimodal_llm_client()
+    assert captured_configs[0].timeout == 240.0
+
+
+@pytest.mark.asyncio
+async def test_multimodal_client_sets_default_image_detail(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_multimodal_settings(
+        monkeypatch,
+        api_key="sk-test",
+        base_url="https://example.test",
+    )
+    captured = _CapturingLLM()
+    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
+
+    client = _client_mod.get_multimodal_llm_client()
+    original = ChatMessage(
+        role="user",
+        content=[
+            TextPart(text="describe"),
+            ImageUrlPart(
+                image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd")
+            ),
+        ],
+    )
+
+    await client.chat([original], max_tokens=10)
+
+    assert captured.messages is not None
+    sent_content = captured.messages[0].content
+    assert isinstance(sent_content, list)
+    sent_image = sent_content[1]
+    assert isinstance(sent_image, ImageUrlPart)
+    assert sent_image.image_url.detail == "auto"
+
+    original_content = original.content
+    assert isinstance(original_content, list)
+    original_image = original_content[1]
+    assert isinstance(original_image, ImageUrlPart)
+    assert original_image.image_url.detail is None
+
+
+@pytest.mark.asyncio
+async def test_multimodal_client_adds_visual_memory_instructions(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_multimodal_settings(
+        monkeypatch,
+        api_key="sk-test",
+        base_url="https://example.test",
+    )
+    captured = _CapturingLLM()
+    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
+
+    client = _client_mod.get_multimodal_llm_client()
+    original = ChatMessage(
+        role="user",
+        content=[
+            TextPart(text="Read this image and return its content."),
+            ImageUrlPart(
+                image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd")
+            ),
+        ],
+    )
+
+    await client.chat([original], max_tokens=10)
+
+    assert captured.messages is not None
+    sent_content = captured.messages[0].content
+    assert isinstance(sent_content, list)
+    sent_text = sent_content[0]
+    assert isinstance(sent_text, TextPart)
+    sent_text_lower = sent_text.text.lower()
+    assert "spatial relationships" in sent_text_lower
+    assert "relative positions" in sent_text_lower
+    assert "Do NOT describe the parser, assistant, or ChatGPT" in sent_text.text
+
+    original_content = original.content
+    assert isinstance(original_content, list)
+    original_text = original_content[0]
+    assert isinstance(original_text, TextPart)
+    assert "spatial relationships" not in original_text.text
+
+
+@pytest.mark.asyncio
+async def test_multimodal_client_defaults_to_no_thinking_param(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_multimodal_settings(
+        monkeypatch,
+        api_key="sk-test",
+        base_url="https://example.test",
+    )
+    captured = _CapturingLLM()
+    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
+
+    client = _client_mod.get_multimodal_llm_client()
+    original = ChatMessage(
+        role="user",
+        content=[
+            TextPart(text="Read this image and return its content."),
+            ImageUrlPart(
+                image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd")
+            ),
+        ],
+    )
+
+    await client.chat(
+        [original],
+        max_tokens=10,
+        extra_body={"provider": {"only": ["test"]}},
+    )
+
+    _assert_no_thinking_param(captured.kwargs)
+    assert captured.kwargs is not None
+    extra_body = captured.kwargs["extra_body"]
+    assert isinstance(extra_body, dict)
+    assert extra_body["provider"] == {"only": ["test"]}
+    assert captured.messages is not None
+    sent_content = captured.messages[0].content
+    assert isinstance(sent_content, list)
+    sent_text = sent_content[0]
+    assert isinstance(sent_text, TextPart)
+    assert "/no_think" not in sent_text.text
+
+
+@pytest.mark.asyncio
+async def test_multimodal_client_resizes_landscape_image_to_64_min_side_by_default(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_multimodal_settings(
+        monkeypatch,
+        api_key="sk-test",
+        base_url="https://example.test",
+    )
+    captured = _CapturingLLM()
+    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
+    image_url = _png_data_url((640, 480))
+
+    client = _client_mod.get_multimodal_llm_client()
+    original = ChatMessage(
+        role="user",
+        content=[
+            TextPart(text="describe"),
+            ImageUrlPart(image_url=ImageUrlInner(url=image_url)),
+        ],
+    )
+
+    await client.chat([original], max_tokens=10)
+
+    assert captured.messages is not None
+    sent_content = captured.messages[0].content
+    assert isinstance(sent_content, list)
+    sent_image = sent_content[1]
+    assert isinstance(sent_image, ImageUrlPart)
+    assert _data_url_image_size(sent_image.image_url.url) == (85, 64)
+    assert _data_url_image_size(image_url) == (640, 480)
+
+
+@pytest.mark.asyncio
+async def test_multimodal_client_resizes_portrait_image_to_64_min_side_by_default(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_multimodal_settings(
+        monkeypatch,
+        api_key="sk-test",
+        base_url="https://example.test",
+    )
+    captured = _CapturingLLM()
+    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
+    image_url = _png_data_url((480, 640))
+
+    client = _client_mod.get_multimodal_llm_client()
+    original = ChatMessage(
+        role="user",
+        content=[
+            TextPart(text="describe"),
+            ImageUrlPart(image_url=ImageUrlInner(url=image_url)),
+        ],
+    )
+
+    await client.chat([original], max_tokens=10)
+
+    assert captured.messages is not None
+    sent_content = captured.messages[0].content
+    assert isinstance(sent_content, list)
+    sent_image = sent_content[1]
+    assert isinstance(sent_image, ImageUrlPart)
+    assert _data_url_image_size(sent_image.image_url.url) == (64, 85)
+    assert _data_url_image_size(image_url) == (480, 640)
+
+
+@pytest.mark.asyncio
+async def test_multimodal_client_keeps_image_when_resize_disabled(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _reset_singleton(monkeypatch)
+    _patch_multimodal_settings(
+        monkeypatch,
+        api_key="sk-test",
+        base_url="https://example.test",
+        resize_images_for_vlm=False,
+    )
+    captured = _CapturingLLM()
+    monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
+    image_url = _png_data_url((640, 480))
+
+    client = _client_mod.get_multimodal_llm_client()
+    original = ChatMessage(
+        role="user",
+        content=[
+            TextPart(text="describe"),
+            ImageUrlPart(image_url=ImageUrlInner(url=image_url)),
+        ],
+    )
+
+    await client.chat([original], max_tokens=10)
+
+    assert captured.messages is not None
+    sent_content = captured.messages[0].content
+    assert isinstance(sent_content, list)
+    sent_image = sent_content[1]
+    assert isinstance(sent_image, ImageUrlPart)
+    assert sent_image.image_url.url == image_url
diff --git a/tests/unit/test_component/test_llm/test_factory.py b/tests/unit/test_component/test_llm/test_factory.py
index 4329fce..c543dbf 100644
--- a/tests/unit/test_component/test_llm/test_factory.py
+++ b/tests/unit/test_component/test_llm/test_factory.py
@@ -6,6 +6,7 @@ import pytest
 from pydantic import SecretStr
 
 from everos.component.llm import build_llm_provider
+from everos.component.llm import factory as factory_mod
 from everos.component.llm.openai_provider import OpenAIProvider
 from everos.config.settings import LLMSettings
 
@@ -26,3 +27,23 @@ def test_builds_openai_provider() -> None:
     s = LLMSettings(model="m", api_key=SecretStr("k"), base_url="https://x")
     p = build_llm_provider(s)
     assert isinstance(p, OpenAIProvider)
+
+
+def test_passes_configured_timeout(monkeypatch: pytest.MonkeyPatch) -> None:
+    captured_kwargs = {}
+    sentinel = object()
+
+    def capture_provider(**kwargs):
+        captured_kwargs.update(kwargs)
+        return sentinel
+
+    monkeypatch.setattr(factory_mod, "OpenAIProvider", capture_provider)
+    s = LLMSettings(
+        model="m",
+        api_key=SecretStr("k"),
+        base_url="https://x",
+        timeout_seconds=240.0,
+    )
+
+    assert build_llm_provider(s) is sentinel
+    assert captured_kwargs["timeout"] == 240.0
diff --git a/tests/unit/test_config/test_settings.py b/tests/unit/test_config/test_settings.py
index a8fc5ca..373f967 100644
--- a/tests/unit/test_config/test_settings.py
+++ b/tests/unit/test_config/test_settings.py
@@ -105,6 +105,9 @@ def test_embedding_rerank_defaults() -> None:
     assert s.embedding.model is None
     assert s.embedding.api_key is None
     assert s.embedding.base_url is None
+    assert s.llm.timeout_seconds == 180.0
+    assert s.multimodal.timeout_seconds == 180.0
+    assert s.multimodal.resize_images_for_vlm is True
     # Runtime knobs come from default.toml.
     assert s.embedding.timeout_seconds == 30.0
     assert s.embedding.max_retries == 3
@@ -126,6 +129,16 @@ def test_embedding_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
     assert s.embedding.batch_size == 32
 
 
+def test_llm_timeout_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("EVEROS_LLM__TIMEOUT_SECONDS", "240")
+    monkeypatch.setenv("EVEROS_MULTIMODAL__TIMEOUT_SECONDS", "300")
+    monkeypatch.setenv("EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM", "false")
+    s = Settings()
+    assert s.llm.timeout_seconds == 240.0
+    assert s.multimodal.timeout_seconds == 300.0
+    assert s.multimodal.resize_images_for_vlm is False
+
+
 def test_rerank_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.setenv("EVEROS_RERANK__MODEL", "BAAI/bge-reranker-v2-m3")
     monkeypatch.setenv("EVEROS_RERANK__MAX_CONCURRENT", "8")
diff --git a/tests/unit/test_memory/test_cascade/test_watcher_helpers.py b/tests/unit/test_memory/test_cascade/test_watcher_helpers.py
index 772e247..06e4cc8 100644
--- a/tests/unit/test_memory/test_cascade/test_watcher_helpers.py
+++ b/tests/unit/test_memory/test_cascade/test_watcher_helpers.py
@@ -9,7 +9,11 @@ from __future__ import annotations
 
 from pathlib import Path
 
-from everos.memory.cascade.watcher import _relative_to_root, _safe_mtime
+from everos.memory.cascade.watcher import (
+    _relative_to_root,
+    _safe_mtime,
+    _watch_roots,
+)
 
 
 def test_relative_to_root_within(tmp_path: Path) -> None:
@@ -34,3 +38,14 @@ def test_safe_mtime_existing_path_returns_positive(tmp_path: Path) -> None:
     f = tmp_path / "f.md"
     f.write_text("ok")
     assert _safe_mtime(str(f)) > 0
+
+
+def test_watch_roots_excludes_system_dot_dirs(tmp_path: Path) -> None:
+    (tmp_path / ".index" / "lancedb" / "episode").mkdir(parents=True)
+    (tmp_path / ".tmp").mkdir()
+    (tmp_path / "default_app" / "default_project" / "users").mkdir(parents=True)
+    (tmp_path / "default_app" / "default_project" / "agents").mkdir()
+
+    roots = _watch_roots(tmp_path)
+
+    assert roots == [tmp_path / "default_app"]
diff --git a/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py b/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py
index 827e243..ccdd751 100644
--- a/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py
+++ b/tests/unit/test_memory/test_extract/test_ingest/test_multimodal.py
@@ -21,7 +21,10 @@ def test_derive_text_renders_parsed_nontext_as_tag() -> None:
     ]
     text, non_text = derive_text(items)
 
-    assert "[IMAGE: p.png]\nOCR TEXT" in text
+    assert "[IMAGE: p.png]" in text
+    assert "image visual facts" in text
+    assert "not assistant actions" in text
+    assert text.index("image visual facts") < text.index("OCR TEXT")
     assert text.startswith("before")
     assert text.endswith("after")
     assert non_text == 0