Save local modifications for syncing
Some checks failed
CI / lint (push) Has been cancelled
CI / unit tests (push) Has been cancelled
CI / integration tests (push) Has been cancelled
CI / package build (push) Has been cancelled
Commit lint / pull request title (push) Has been cancelled
Commit lint / commit messages (push) Has been cancelled

This commit is contained in:
2026-06-10 10:05:52 +08:00
parent 9fc6ad20d2
commit 0910affc78
13 changed files with 738 additions and 24 deletions

View File

@ -24,6 +24,17 @@
model = "gpt-4o-mini" model = "gpt-4o-mini"
api_key = "sk-..." api_key = "sk-..."
base_url = "https://api.openai.com/v1" base_url = "https://api.openai.com/v1"
timeout_seconds = 180.0
# ── Multimodal LLM ───────────────────────────────────
# Independent vision/audio-capable chat-completions endpoint for parsing.
[multimodal]
model = "google/gemini-3-flash-preview"
api_key = "sk-..."
base_url = "https://openrouter.ai/api/v1"
timeout_seconds = 180.0
resize_images_for_vlm = true
max_concurrency = 4
# ── Embedding ───────────────────────────────────────── # ── Embedding ─────────────────────────────────────────
[embedding] [embedding]

View File

@ -9,9 +9,16 @@ provider) instead of silently failing per-request downstream.
from __future__ import annotations from __future__ import annotations
import base64
import binascii
from io import BytesIO
from typing import Any
from everalgo.llm import build_client from everalgo.llm import build_client
from everalgo.llm.config import LLMConfig from everalgo.llm.config import LLMConfig
from everalgo.llm.protocols import LLMClient from everalgo.llm.protocols import LLMClient
from everalgo.llm.types import ChatMessage, ChatResponse, ImageUrlPart, TextPart
from pydantic import BaseModel
from everos.config import load_settings from everos.config import load_settings
from everos.core.observability.logging import get_logger from everos.core.observability.logging import get_logger
@ -25,6 +32,212 @@ class LLMNotConfiguredError(RuntimeError):
_llm_client: LLMClient | None = None _llm_client: LLMClient | None = None
_multimodal_client: LLMClient | None = None _multimodal_client: LLMClient | None = None
_VLM_IMAGE_MIN_SIDE = 1024
_NO_THINKING_EXTRA_BODY_KEY = "chat_template_kwargs"
_NO_THINKING_PARAM = {"enable_thinking": False}
_IMAGE_VISUAL_MEMORY_PROMPT = """Describe this image for visual memory retrieval.
Output final Markdown directly; do not include reasoning.
Focus on:
1. Key visible objects and their names, brands, colors, labels, quantities.
2. Spatial relationships and relative positions: left/right/above/below/center,
foreground/background, nearby objects, and supporting surfaces.
3. Location-query facts, e.g. "the milk carton is center-left, to the right of
X and to the left of Y".
4. Important visible text, but extract only useful labels/interface text; do
not exhaustively OCR every key or menu item if that would crowd out object
locations.
Do NOT describe the parser, assistant, or ChatGPT as processing the image.
If "ChatGPT" is visible, list it only as visible interface text.
"""
class _NoThinkingRequestDefaultsClient:
"""Inject default no-thinking request params for OpenAI-compatible servers."""
def __init__(self, inner: LLMClient) -> None:
self._inner = inner
async def chat(
self,
messages: list[ChatMessage],
*,
model: str | None = None,
temperature: float | None = None,
max_tokens: int | None = None,
response_format: type[BaseModel] | None = None,
**extra: Any,
) -> ChatResponse:
return await self._inner.chat(
messages,
model=model,
temperature=temperature,
max_tokens=max_tokens,
response_format=response_format,
**_with_no_thinking_defaults(extra),
)
class _MultimodalImageDetailCompatClient:
"""Patch image parts for strict OpenAI-compatible gateways.
everalgo-core 0.2.0 serialises ``image_url.detail`` as ``None`` when the
field is unset. Some gateways reject that literal null and require one of
OpenAI's enum values. EverOS only uses this wrapper for multimodal parsing.
"""
def __init__(self, inner: LLMClient, *, resize_images_for_vlm: bool) -> None:
self._inner = inner
self._resize_images_for_vlm = resize_images_for_vlm
async def chat(
self,
messages: list[ChatMessage],
*,
model: str | None = None,
temperature: float | None = None,
max_tokens: int | None = None,
response_format: type[BaseModel] | None = None,
**extra: Any,
) -> ChatResponse:
return await self._inner.chat(
[
_with_multimodal_image_defaults(
m,
resize_images_for_vlm=self._resize_images_for_vlm,
)
for m in messages
],
model=model,
temperature=temperature,
max_tokens=max_tokens,
response_format=response_format,
**_with_no_thinking_defaults(extra),
)
def _with_no_thinking_defaults(extra: dict[str, Any]) -> dict[str, Any]:
"""Return request kwargs with no-thinking enabled unless caller overrides."""
patched = dict(extra)
extra_body = dict(patched.get("extra_body") or {})
chat_template_kwargs = dict(extra_body.get(_NO_THINKING_EXTRA_BODY_KEY) or {})
chat_template_kwargs.setdefault(
"enable_thinking", _NO_THINKING_PARAM["enable_thinking"]
)
extra_body[_NO_THINKING_EXTRA_BODY_KEY] = chat_template_kwargs
patched["extra_body"] = extra_body
return patched
def _with_multimodal_image_defaults(
message: ChatMessage, *, resize_images_for_vlm: bool = True
) -> ChatMessage:
"""Return a copy with stricter-gateway + visual-memory image defaults."""
content = message.content
if not isinstance(content, list):
return message
has_image = any(_is_image_part(part) for part in content)
instructions_added = False
changed = False
patched_parts: list[object] = []
for part in content:
patched = part
if isinstance(part, ImageUrlPart):
image_url_updates: dict[str, object] = {}
if part.image_url.detail is None:
image_url_updates["detail"] = "auto"
if resize_images_for_vlm:
resized_url = _resize_image_data_url(part.image_url.url)
if resized_url != part.image_url.url:
image_url_updates["url"] = resized_url
if image_url_updates:
image_url = part.image_url.model_copy(update=image_url_updates)
patched = part.model_copy(update={"image_url": image_url})
changed = True
if (
has_image
and not instructions_added
and isinstance(patched, TextPart)
and patched.text != _IMAGE_VISUAL_MEMORY_PROMPT
):
patched = patched.model_copy(
update={"text": _IMAGE_VISUAL_MEMORY_PROMPT}
)
instructions_added = True
changed = True
patched_parts.append(patched)
if not changed:
return message
return message.model_copy(update={"content": patched_parts})
def _is_image_part(part: object) -> bool:
return (
isinstance(part, ImageUrlPart)
and part.image_url.url.startswith("data:image/")
)
def _resize_image_data_url(url: str) -> str:
"""Resize base64 data-url images so the shorter side is 64 pixels."""
if not url.startswith("data:image/"):
return url
try:
header, encoded = url.split(",", 1)
except ValueError:
return url
if ";base64" not in header.lower():
return url
mime_type = header[5:].split(";", 1)[0].lower()
image_format = {
"image/jpeg": "JPEG",
"image/jpg": "JPEG",
"image/png": "PNG",
"image/webp": "WEBP",
}.get(mime_type)
if image_format is None:
return url
try:
from PIL import Image, ImageOps
raw = base64.b64decode(encoded, validate=True)
with Image.open(BytesIO(raw)) as image:
image = ImageOps.exif_transpose(image)
target_size = _image_size_with_min_side(
image.size, _VLM_IMAGE_MIN_SIDE
)
resized = image.resize(target_size, Image.Resampling.LANCZOS)
if image_format == "JPEG" and resized.mode not in ("RGB", "L"):
resized = resized.convert("RGB")
buffer = BytesIO()
save_kwargs: dict[str, object] = {"format": image_format}
if image_format == "JPEG":
save_kwargs["quality"] = 85
resized.save(buffer, **save_kwargs)
except (ImportError, ValueError, OSError, binascii.Error):
return url
resized_encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
return f"{header},{resized_encoded}"
def _image_size_with_min_side(
size: tuple[int, int],
min_side: int,
) -> tuple[int, int]:
width, height = size
shortest = min(width, height)
if shortest <= 0:
return (max(1, width), max(1, height))
scale = min_side / shortest
return (max(1, round(width * scale)), max(1, round(height * scale)))
def get_llm_client() -> LLMClient: def get_llm_client() -> LLMClient:
@ -46,11 +259,14 @@ def get_llm_client() -> LLMClient:
raise LLMNotConfiguredError( raise LLMNotConfiguredError(
"LLM is required; set EVEROS_LLM__API_KEY + EVEROS_LLM__BASE_URL" "LLM is required; set EVEROS_LLM__API_KEY + EVEROS_LLM__BASE_URL"
) )
_llm_client = build_client( _llm_client = _NoThinkingRequestDefaultsClient(
LLMConfig( build_client(
model=llm_cfg.model, LLMConfig(
api_key=api_key, model=llm_cfg.model,
base_url=llm_cfg.base_url, api_key=api_key,
base_url=llm_cfg.base_url,
timeout=llm_cfg.timeout_seconds,
)
) )
) )
logger.info("llm_client_built", model=llm_cfg.model) logger.info("llm_client_built", model=llm_cfg.model)
@ -78,12 +294,16 @@ def get_multimodal_llm_client() -> LLMClient:
"Multimodal LLM is required for parsing; set " "Multimodal LLM is required for parsing; set "
"EVEROS_MULTIMODAL__API_KEY + EVEROS_MULTIMODAL__BASE_URL" "EVEROS_MULTIMODAL__API_KEY + EVEROS_MULTIMODAL__BASE_URL"
) )
_multimodal_client = build_client( _multimodal_client = _MultimodalImageDetailCompatClient(
LLMConfig( build_client(
model=cfg.model, LLMConfig(
api_key=api_key, model=cfg.model,
base_url=cfg.base_url, api_key=api_key,
) base_url=cfg.base_url,
timeout=cfg.timeout_seconds,
)
),
resize_images_for_vlm=cfg.resize_images_for_vlm,
) )
logger.info("multimodal_llm_client_built", model=cfg.model) logger.info("multimodal_llm_client_built", model=cfg.model)
return _multimodal_client return _multimodal_client

View File

@ -42,4 +42,5 @@ def build_llm_provider(settings: LLMSettings) -> LLMClient:
model=settings.model, model=settings.model,
api_key=settings.api_key.get_secret_value(), api_key=settings.api_key.get_secret_value(),
base_url=settings.base_url, base_url=settings.base_url,
timeout=settings.timeout_seconds,
) )

View File

@ -56,17 +56,21 @@ cache_size_kb = 2048
[llm] [llm]
# Provider-agnostic OpenAI-protocol client config. Override via env: # Provider-agnostic OpenAI-protocol client config. Override via env:
# EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL # EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL, EVEROS_LLM__TIMEOUT_SECONDS
# Or via a ``.env`` file next to the project root (auto-loaded). # Or via a ``.env`` file next to the project root (auto-loaded).
model = "gpt-4o-mini" model = "gpt-4o-mini"
timeout_seconds = 180.0
# api_key = "" # api_key = ""
# base_url = "" # base_url = ""
[multimodal] [multimodal]
# Independent LLM for multimodal parsing (everalgo-parser); must accept # Independent LLM for multimodal parsing (everalgo-parser); must accept
# image / pdf / audio image_url parts. Override via env: # image / pdf / audio image_url parts. Override via env:
# EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL # EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL,
# EVEROS_MULTIMODAL__TIMEOUT_SECONDS, EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM
model = "google/gemini-3-flash-preview" model = "google/gemini-3-flash-preview"
timeout_seconds = 180.0
resize_images_for_vlm = true
max_concurrency = 4 max_concurrency = 4
# api_key = "" # api_key = ""
# base_url = "" # base_url = ""

View File

@ -121,11 +121,13 @@ class LLMSettings(BaseModel):
EVEROS_LLM__MODEL EVEROS_LLM__MODEL
EVEROS_LLM__API_KEY EVEROS_LLM__API_KEY
EVEROS_LLM__BASE_URL EVEROS_LLM__BASE_URL
EVEROS_LLM__TIMEOUT_SECONDS
""" """
model: str = "gpt-4o-mini" model: str = "gpt-4o-mini"
api_key: SecretStr | None = None api_key: SecretStr | None = None
base_url: str | None = None base_url: str | None = None
timeout_seconds: float = Field(default=180.0, gt=0)
class MultimodalSettings(BaseModel): class MultimodalSettings(BaseModel):
@ -140,6 +142,8 @@ class MultimodalSettings(BaseModel):
EVEROS_MULTIMODAL__MODEL EVEROS_MULTIMODAL__MODEL
EVEROS_MULTIMODAL__API_KEY EVEROS_MULTIMODAL__API_KEY
EVEROS_MULTIMODAL__BASE_URL EVEROS_MULTIMODAL__BASE_URL
EVEROS_MULTIMODAL__TIMEOUT_SECONDS
EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM
EVEROS_MULTIMODAL__MAX_CONCURRENCY EVEROS_MULTIMODAL__MAX_CONCURRENCY
EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
@ -148,6 +152,8 @@ class MultimodalSettings(BaseModel):
model: str = "google/gemini-3-flash-preview" model: str = "google/gemini-3-flash-preview"
api_key: SecretStr | None = None api_key: SecretStr | None = None
base_url: str | None = None base_url: str | None = None
timeout_seconds: float = Field(default=180.0, gt=0)
resize_images_for_vlm: bool = True
max_concurrency: int = 4 max_concurrency: int = 4
# ``file://`` content-item support (read locally by EverOS, not everalgo). # ``file://`` content-item support (read locally by EverOS, not everalgo).

View File

@ -47,6 +47,7 @@ class CascadeWatcher:
self._observer = Observer() self._observer = Observer()
self._handler = _Handler(memory_root, loop) self._handler = _Handler(memory_root, loop)
self._started = False self._started = False
self._observer_started = False
def start(self) -> None: def start(self) -> None:
if self._started: if self._started:
@ -54,18 +55,31 @@ class CascadeWatcher:
# The memory root is created lazily by other layers; watchdog # The memory root is created lazily by other layers; watchdog
# rejects non-existent paths so we ensure it exists here. # rejects non-existent paths so we ensure it exists here.
self._memory_root.ensure() self._memory_root.ensure()
self._observer.schedule( watch_roots = _watch_roots(self._memory_root.root)
self._handler, str(self._memory_root.root), recursive=True for root in watch_roots:
) self._observer.schedule(self._handler, str(root), recursive=True)
self._observer.start() if watch_roots:
self._observer.start()
self._observer_started = True
else:
logger.warning(
"cascade_watcher_no_user_visible_roots",
root=str(self._memory_root.root),
)
self._started = True self._started = True
logger.info("cascade_watcher_started", root=str(self._memory_root.root)) logger.info(
"cascade_watcher_started",
root=str(self._memory_root.root),
watched_roots=[str(root) for root in watch_roots],
)
def stop(self) -> None: def stop(self) -> None:
if not self._started: if not self._started:
return return
self._observer.stop() if self._observer_started:
self._observer.join(timeout=5) self._observer.stop()
self._observer.join(timeout=5)
self._observer_started = False
self._started = False self._started = False
logger.info("cascade_watcher_stopped") logger.info("cascade_watcher_stopped")
@ -163,6 +177,22 @@ def _relative_to_root(root: Path, raw: str) -> str | None:
return rel.as_posix() return rel.as_posix()
def _watch_roots(root: Path) -> list[Path]:
"""Return user-visible top-level dirs to watch, excluding system dot dirs."""
try:
children = list(root.iterdir())
except OSError:
return []
return sorted(
(
child
for child in children
if child.is_dir() and not child.name.startswith(".")
),
key=lambda p: p.name,
)
def _safe_mtime(raw: str) -> float: def _safe_mtime(raw: str) -> float:
"""Return mtime in seconds, falling back to 0.0 on stat failure.""" """Return mtime in seconds, falling back to 0.0 on stat failure."""
try: try:

View File

@ -17,6 +17,11 @@ from everos.core.observability.logging import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)
_IMAGE_VISUAL_FACTS_NOTE = (
"Context: image visual facts extracted from an uploaded image; "
"treat these as image content, not assistant actions."
)
def coerce_items( def coerce_items(
content: str | list[dict[str, Any]] | list[Any], content: str | list[dict[str, Any]] | list[Any],
@ -83,6 +88,8 @@ def _render_item(item: dict[str, Any]) -> str | None:
kind = str(item.get("type") or "file").upper() kind = str(item.get("type") or "file").upper()
name = item.get("name") or "" name = item.get("name") or ""
tag = f"[{kind}: {name}]" if name else f"[{kind}]" tag = f"[{kind}: {name}]" if name else f"[{kind}]"
if kind == "IMAGE":
return f"{tag}\n{_IMAGE_VISUAL_FACTS_NOTE}\n{parsed}"
return f"{tag}\n{parsed}" return f"{tag}\n{parsed}"

View File

@ -33,6 +33,8 @@
EVEROS_LLM__MODEL=openai/gpt-4.1-mini EVEROS_LLM__MODEL=openai/gpt-4.1-mini
EVEROS_LLM__API_KEY= EVEROS_LLM__API_KEY=
EVEROS_LLM__BASE_URL=https://openrouter.ai/api/v1 EVEROS_LLM__BASE_URL=https://openrouter.ai/api/v1
# Per-request chat-completions timeout in seconds (default 180):
# EVEROS_LLM__TIMEOUT_SECONDS=180
# ─── Multimodal LLM (independent from [llm]; vision/audio capable) ──── # ─── Multimodal LLM (independent from [llm]; vision/audio capable) ────
@ -43,6 +45,11 @@ EVEROS_LLM__BASE_URL=https://openrouter.ai/api/v1
EVEROS_MULTIMODAL__MODEL=google/gemini-3-flash-preview EVEROS_MULTIMODAL__MODEL=google/gemini-3-flash-preview
EVEROS_MULTIMODAL__API_KEY= EVEROS_MULTIMODAL__API_KEY=
EVEROS_MULTIMODAL__BASE_URL=https://openrouter.ai/api/v1 EVEROS_MULTIMODAL__BASE_URL=https://openrouter.ai/api/v1
# Per-request multimodal chat-completions timeout in seconds (default 180):
# EVEROS_MULTIMODAL__TIMEOUT_SECONDS=180
# Resize inline images to half width/height before sending them to the VLM
# (default true):
# EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM=true
# Concurrency cap for parallel multimodal calls (default 4): # Concurrency cap for parallel multimodal calls (default 4):
# EVEROS_MULTIMODAL__MAX_CONCURRENCY=4 # EVEROS_MULTIMODAL__MAX_CONCURRENCY=4
# #

View File

@ -2,20 +2,30 @@
from __future__ import annotations from __future__ import annotations
import base64
import importlib import importlib
from io import BytesIO
import pytest import pytest
from everalgo.llm.types import (
ChatMessage,
ChatResponse,
ImageUrlInner,
ImageUrlPart,
TextPart,
)
from pydantic import SecretStr from pydantic import SecretStr
from everos.component.llm import LLMNotConfiguredError from everos.component.llm import LLMNotConfiguredError
from everos.config import Settings from everos.config import Settings
from everos.config.settings import LLMSettings from everos.config.settings import LLMSettings, MultimodalSettings
_client_mod = importlib.import_module("everos.component.llm.client") _client_mod = importlib.import_module("everos.component.llm.client")
def _reset_singleton(monkeypatch: pytest.MonkeyPatch) -> None: def _reset_singleton(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(_client_mod, "_llm_client", None, raising=False) monkeypatch.setattr(_client_mod, "_llm_client", None, raising=False)
monkeypatch.setattr(_client_mod, "_multimodal_client", None, raising=False)
def _patch_settings( def _patch_settings(
@ -23,6 +33,7 @@ def _patch_settings(
*, *,
api_key: str | None, api_key: str | None,
base_url: str | None, base_url: str | None,
timeout_seconds: float | None = None,
) -> None: ) -> None:
"""Stub the ``load_settings`` reference bound inside the client module.""" """Stub the ``load_settings`` reference bound inside the client module."""
cfg = Settings( cfg = Settings(
@ -30,11 +41,86 @@ def _patch_settings(
model="gpt-4o-mini", model="gpt-4o-mini",
api_key=SecretStr(api_key) if api_key is not None else None, api_key=SecretStr(api_key) if api_key is not None else None,
base_url=base_url, base_url=base_url,
**(
{"timeout_seconds": timeout_seconds}
if timeout_seconds is not None
else {}
),
) )
) )
monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg) monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg)
def _patch_multimodal_settings(
monkeypatch: pytest.MonkeyPatch,
*,
api_key: str | None,
base_url: str | None,
timeout_seconds: float | None = None,
resize_images_for_vlm: bool | None = None,
) -> None:
cfg = Settings(
multimodal=MultimodalSettings(
model="vision-model",
api_key=SecretStr(api_key) if api_key is not None else None,
base_url=base_url,
**(
{"timeout_seconds": timeout_seconds}
if timeout_seconds is not None
else {}
),
**(
{"resize_images_for_vlm": resize_images_for_vlm}
if resize_images_for_vlm is not None
else {}
),
)
)
monkeypatch.setattr(_client_mod, "load_settings", lambda: cfg)
class _CapturingLLM:
def __init__(self) -> None:
self.messages: list[ChatMessage] | None = None
self.kwargs: dict[str, object] | None = None
async def chat(
self,
messages: list[ChatMessage],
**kwargs: object,
) -> ChatResponse:
self.messages = messages
self.kwargs = kwargs
return ChatResponse(content="ok", model="fake")
def _assert_no_thinking_param(kwargs: dict[str, object] | None) -> None:
assert kwargs is not None
extra_body = kwargs.get("extra_body")
assert isinstance(extra_body, dict)
chat_template_kwargs = extra_body.get("chat_template_kwargs")
assert isinstance(chat_template_kwargs, dict)
assert chat_template_kwargs["enable_thinking"] is False
def _png_data_url(size: tuple[int, int]) -> str:
from PIL import Image
image = Image.new("RGB", size, color=(255, 0, 0))
buffer = BytesIO()
image.save(buffer, format="PNG")
encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
return f"data:image/png;base64,{encoded}"
def _data_url_image_size(data_url: str) -> tuple[int, int]:
from PIL import Image
_, encoded = data_url.split(",", 1)
with Image.open(BytesIO(base64.b64decode(encoded))) as image:
return image.size
def test_raises_when_api_key_missing(monkeypatch: pytest.MonkeyPatch) -> None: def test_raises_when_api_key_missing(monkeypatch: pytest.MonkeyPatch) -> None:
_reset_singleton(monkeypatch) _reset_singleton(monkeypatch)
_patch_settings(monkeypatch, api_key=None, base_url="https://example.test") _patch_settings(monkeypatch, api_key=None, base_url="https://example.test")
@ -60,5 +146,295 @@ def test_returns_singleton_when_configured(monkeypatch: pytest.MonkeyPatch) -> N
first = _client_mod.get_llm_client() first = _client_mod.get_llm_client()
second = _client_mod.get_llm_client() second = _client_mod.get_llm_client()
assert first is sentinel
assert first is second assert first is second
assert first._inner is sentinel
@pytest.mark.asyncio
async def test_llm_client_defaults_to_no_thinking_param(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_reset_singleton(monkeypatch)
_patch_settings(monkeypatch, api_key="sk-test", base_url="https://example.test")
captured = _CapturingLLM()
monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
client = _client_mod.get_llm_client()
await client.chat([ChatMessage(role="user", content="hello")])
_assert_no_thinking_param(captured.kwargs)
def test_llm_client_passes_configured_timeout(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_reset_singleton(monkeypatch)
_patch_settings(
monkeypatch,
api_key="sk-test",
base_url="https://example.test",
timeout_seconds=180.0,
)
captured_configs = []
sentinel = object()
def capture_build_client(cfg):
captured_configs.append(cfg)
return sentinel
monkeypatch.setattr(_client_mod, "build_client", capture_build_client)
client = _client_mod.get_llm_client()
assert client._inner is sentinel
assert captured_configs[0].timeout == 180.0
def test_multimodal_client_passes_configured_timeout(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_reset_singleton(monkeypatch)
_patch_multimodal_settings(
monkeypatch,
api_key="sk-test",
base_url="https://example.test",
timeout_seconds=240.0,
)
captured_configs = []
sentinel = _CapturingLLM()
def capture_build_client(cfg):
captured_configs.append(cfg)
return sentinel
monkeypatch.setattr(_client_mod, "build_client", capture_build_client)
_client_mod.get_multimodal_llm_client()
assert captured_configs[0].timeout == 240.0
@pytest.mark.asyncio
async def test_multimodal_client_sets_default_image_detail(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_reset_singleton(monkeypatch)
_patch_multimodal_settings(
monkeypatch,
api_key="sk-test",
base_url="https://example.test",
)
captured = _CapturingLLM()
monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
client = _client_mod.get_multimodal_llm_client()
original = ChatMessage(
role="user",
content=[
TextPart(text="describe"),
ImageUrlPart(
image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd")
),
],
)
await client.chat([original], max_tokens=10)
assert captured.messages is not None
sent_content = captured.messages[0].content
assert isinstance(sent_content, list)
sent_image = sent_content[1]
assert isinstance(sent_image, ImageUrlPart)
assert sent_image.image_url.detail == "auto"
original_content = original.content
assert isinstance(original_content, list)
original_image = original_content[1]
assert isinstance(original_image, ImageUrlPart)
assert original_image.image_url.detail is None
@pytest.mark.asyncio
async def test_multimodal_client_adds_visual_memory_instructions(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_reset_singleton(monkeypatch)
_patch_multimodal_settings(
monkeypatch,
api_key="sk-test",
base_url="https://example.test",
)
captured = _CapturingLLM()
monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
client = _client_mod.get_multimodal_llm_client()
original = ChatMessage(
role="user",
content=[
TextPart(text="Read this image and return its content."),
ImageUrlPart(
image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd")
),
],
)
await client.chat([original], max_tokens=10)
assert captured.messages is not None
sent_content = captured.messages[0].content
assert isinstance(sent_content, list)
sent_text = sent_content[0]
assert isinstance(sent_text, TextPart)
sent_text_lower = sent_text.text.lower()
assert "spatial relationships" in sent_text_lower
assert "relative positions" in sent_text_lower
assert "Do NOT describe the parser, assistant, or ChatGPT" in sent_text.text
original_content = original.content
assert isinstance(original_content, list)
original_text = original_content[0]
assert isinstance(original_text, TextPart)
assert "spatial relationships" not in original_text.text
@pytest.mark.asyncio
async def test_multimodal_client_defaults_to_no_thinking_param(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_reset_singleton(monkeypatch)
_patch_multimodal_settings(
monkeypatch,
api_key="sk-test",
base_url="https://example.test",
)
captured = _CapturingLLM()
monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
client = _client_mod.get_multimodal_llm_client()
original = ChatMessage(
role="user",
content=[
TextPart(text="Read this image and return its content."),
ImageUrlPart(
image_url=ImageUrlInner(url="data:image/jpeg;base64,abcd")
),
],
)
await client.chat(
[original],
max_tokens=10,
extra_body={"provider": {"only": ["test"]}},
)
_assert_no_thinking_param(captured.kwargs)
assert captured.kwargs is not None
extra_body = captured.kwargs["extra_body"]
assert isinstance(extra_body, dict)
assert extra_body["provider"] == {"only": ["test"]}
assert captured.messages is not None
sent_content = captured.messages[0].content
assert isinstance(sent_content, list)
sent_text = sent_content[0]
assert isinstance(sent_text, TextPart)
assert "/no_think" not in sent_text.text
@pytest.mark.asyncio
async def test_multimodal_client_resizes_landscape_image_to_64_min_side_by_default(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_reset_singleton(monkeypatch)
_patch_multimodal_settings(
monkeypatch,
api_key="sk-test",
base_url="https://example.test",
)
captured = _CapturingLLM()
monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
image_url = _png_data_url((640, 480))
client = _client_mod.get_multimodal_llm_client()
original = ChatMessage(
role="user",
content=[
TextPart(text="describe"),
ImageUrlPart(image_url=ImageUrlInner(url=image_url)),
],
)
await client.chat([original], max_tokens=10)
assert captured.messages is not None
sent_content = captured.messages[0].content
assert isinstance(sent_content, list)
sent_image = sent_content[1]
assert isinstance(sent_image, ImageUrlPart)
assert _data_url_image_size(sent_image.image_url.url) == (85, 64)
assert _data_url_image_size(image_url) == (640, 480)
@pytest.mark.asyncio
async def test_multimodal_client_resizes_portrait_image_to_64_min_side_by_default(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_reset_singleton(monkeypatch)
_patch_multimodal_settings(
monkeypatch,
api_key="sk-test",
base_url="https://example.test",
)
captured = _CapturingLLM()
monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
image_url = _png_data_url((480, 640))
client = _client_mod.get_multimodal_llm_client()
original = ChatMessage(
role="user",
content=[
TextPart(text="describe"),
ImageUrlPart(image_url=ImageUrlInner(url=image_url)),
],
)
await client.chat([original], max_tokens=10)
assert captured.messages is not None
sent_content = captured.messages[0].content
assert isinstance(sent_content, list)
sent_image = sent_content[1]
assert isinstance(sent_image, ImageUrlPart)
assert _data_url_image_size(sent_image.image_url.url) == (64, 85)
assert _data_url_image_size(image_url) == (480, 640)
@pytest.mark.asyncio
async def test_multimodal_client_keeps_image_when_resize_disabled(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_reset_singleton(monkeypatch)
_patch_multimodal_settings(
monkeypatch,
api_key="sk-test",
base_url="https://example.test",
resize_images_for_vlm=False,
)
captured = _CapturingLLM()
monkeypatch.setattr(_client_mod, "build_client", lambda cfg: captured)
image_url = _png_data_url((640, 480))
client = _client_mod.get_multimodal_llm_client()
original = ChatMessage(
role="user",
content=[
TextPart(text="describe"),
ImageUrlPart(image_url=ImageUrlInner(url=image_url)),
],
)
await client.chat([original], max_tokens=10)
assert captured.messages is not None
sent_content = captured.messages[0].content
assert isinstance(sent_content, list)
sent_image = sent_content[1]
assert isinstance(sent_image, ImageUrlPart)
assert sent_image.image_url.url == image_url

View File

@ -6,6 +6,7 @@ import pytest
from pydantic import SecretStr from pydantic import SecretStr
from everos.component.llm import build_llm_provider from everos.component.llm import build_llm_provider
from everos.component.llm import factory as factory_mod
from everos.component.llm.openai_provider import OpenAIProvider from everos.component.llm.openai_provider import OpenAIProvider
from everos.config.settings import LLMSettings from everos.config.settings import LLMSettings
@ -26,3 +27,23 @@ def test_builds_openai_provider() -> None:
s = LLMSettings(model="m", api_key=SecretStr("k"), base_url="https://x") s = LLMSettings(model="m", api_key=SecretStr("k"), base_url="https://x")
p = build_llm_provider(s) p = build_llm_provider(s)
assert isinstance(p, OpenAIProvider) assert isinstance(p, OpenAIProvider)
def test_passes_configured_timeout(monkeypatch: pytest.MonkeyPatch) -> None:
captured_kwargs = {}
sentinel = object()
def capture_provider(**kwargs):
captured_kwargs.update(kwargs)
return sentinel
monkeypatch.setattr(factory_mod, "OpenAIProvider", capture_provider)
s = LLMSettings(
model="m",
api_key=SecretStr("k"),
base_url="https://x",
timeout_seconds=240.0,
)
assert build_llm_provider(s) is sentinel
assert captured_kwargs["timeout"] == 240.0

View File

@ -105,6 +105,9 @@ def test_embedding_rerank_defaults() -> None:
assert s.embedding.model is None assert s.embedding.model is None
assert s.embedding.api_key is None assert s.embedding.api_key is None
assert s.embedding.base_url is None assert s.embedding.base_url is None
assert s.llm.timeout_seconds == 180.0
assert s.multimodal.timeout_seconds == 180.0
assert s.multimodal.resize_images_for_vlm is True
# Runtime knobs come from default.toml. # Runtime knobs come from default.toml.
assert s.embedding.timeout_seconds == 30.0 assert s.embedding.timeout_seconds == 30.0
assert s.embedding.max_retries == 3 assert s.embedding.max_retries == 3
@ -126,6 +129,16 @@ def test_embedding_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
assert s.embedding.batch_size == 32 assert s.embedding.batch_size == 32
def test_llm_timeout_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("EVEROS_LLM__TIMEOUT_SECONDS", "240")
monkeypatch.setenv("EVEROS_MULTIMODAL__TIMEOUT_SECONDS", "300")
monkeypatch.setenv("EVEROS_MULTIMODAL__RESIZE_IMAGES_FOR_VLM", "false")
s = Settings()
assert s.llm.timeout_seconds == 240.0
assert s.multimodal.timeout_seconds == 300.0
assert s.multimodal.resize_images_for_vlm is False
def test_rerank_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None: def test_rerank_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("EVEROS_RERANK__MODEL", "BAAI/bge-reranker-v2-m3") monkeypatch.setenv("EVEROS_RERANK__MODEL", "BAAI/bge-reranker-v2-m3")
monkeypatch.setenv("EVEROS_RERANK__MAX_CONCURRENT", "8") monkeypatch.setenv("EVEROS_RERANK__MAX_CONCURRENT", "8")

View File

@ -9,7 +9,11 @@ from __future__ import annotations
from pathlib import Path from pathlib import Path
from everos.memory.cascade.watcher import _relative_to_root, _safe_mtime from everos.memory.cascade.watcher import (
_relative_to_root,
_safe_mtime,
_watch_roots,
)
def test_relative_to_root_within(tmp_path: Path) -> None: def test_relative_to_root_within(tmp_path: Path) -> None:
@ -34,3 +38,14 @@ def test_safe_mtime_existing_path_returns_positive(tmp_path: Path) -> None:
f = tmp_path / "f.md" f = tmp_path / "f.md"
f.write_text("ok") f.write_text("ok")
assert _safe_mtime(str(f)) > 0 assert _safe_mtime(str(f)) > 0
def test_watch_roots_excludes_system_dot_dirs(tmp_path: Path) -> None:
(tmp_path / ".index" / "lancedb" / "episode").mkdir(parents=True)
(tmp_path / ".tmp").mkdir()
(tmp_path / "default_app" / "default_project" / "users").mkdir(parents=True)
(tmp_path / "default_app" / "default_project" / "agents").mkdir()
roots = _watch_roots(tmp_path)
assert roots == [tmp_path / "default_app"]

View File

@ -21,7 +21,10 @@ def test_derive_text_renders_parsed_nontext_as_tag() -> None:
] ]
text, non_text = derive_text(items) text, non_text = derive_text(items)
assert "[IMAGE: p.png]\nOCR TEXT" in text assert "[IMAGE: p.png]" in text
assert "image visual facts" in text
assert "not assistant actions" in text
assert text.index("image visual facts") < text.index("OCR TEXT")
assert text.startswith("before") assert text.startswith("before")
assert text.endswith("after") assert text.endswith("after")
assert non_text == 0 assert non_text == 0