chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/src/everos/config/init.py
+++ b/src/everos/config/init.py
@ -0,0 +1,37 @@
+"""Configuration data and Settings schema.
+
+Public API:
+    from everos.config import (
+        Settings, MemorySettings, SqliteSettings, LanceDBSettings,
+        LLMSettings, EmbeddingSettings, RerankSettings,
+        BoundaryDetectionSettings,
+        load_settings,
+    )
+
+Distinct from ``everos.component.config`` (which is a *capability* —
+loader / merger / env reader).
+"""
+
+from .settings import BoundaryDetectionSettings as BoundaryDetectionSettings
+from .settings import EmbeddingSettings as EmbeddingSettings
+from .settings import LanceDBSettings as LanceDBSettings
+from .settings import LLMSettings as LLMSettings
+from .settings import MemorySettings as MemorySettings
+from .settings import MultimodalSettings as MultimodalSettings
+from .settings import RerankSettings as RerankSettings
+from .settings import Settings as Settings
+from .settings import SqliteSettings as SqliteSettings
+from .settings import load_settings as load_settings
+
+__all__ = [
+    "BoundaryDetectionSettings",
+    "EmbeddingSettings",
+    "LLMSettings",
+    "LanceDBSettings",
+    "MemorySettings",
+    "MultimodalSettings",
+    "RerankSettings",
+    "Settings",
+    "SqliteSettings",
+    "load_settings",
+]
--- a/src/everos/config/default.toml
+++ b/src/everos/config/default.toml
@ -0,0 +1,137 @@
+# everos default configuration.
+#
+# Lookup order (later overrides earlier):
+#   1. This file (shipped defaults; lowest priority)
+#   2. ~/.everos/config.toml — user-level overrides (optional;
+#      path is overridable via EVEROS_CONFIG_FILE)
+#   3. .env file in the working directory
+#   4. Environment variables — EVEROS_<SECTION>__<KEY>
+#         e.g. EVEROS_SQLITE__BUSY_TIMEOUT_MS=10000
+#   5. Programmatic init args (highest priority)
+#
+# `null` (omitted in TOML) means "use the Pydantic default declared in code".
+
+[memory]
+# memory-root is the single directory holding all persisted memory.
+# `~` is expanded; the path is resolved when MemoryRoot is constructed.
+root = "~/.everos"
+# Effective timezone for date buckets and timestamps. Drives
+# component.utils.datetime; this is the SOLE source — OS `TZ` is not
+# read. Override via `EVEROS_MEMORY__TIMEZONE` env var if needed.
+timezone = "UTC"
+
+[api]
+# HTTP server bind for ``everos server start``. Default ``127.0.0.1``
+# keeps the API on loopback only — EverOS ships no built-in auth (see
+# SECURITY.md threat model). Only set ``host = "0.0.0.0"`` after you
+# have placed your own gateway / auth layer in front of the server.
+# Override via EVEROS_API__HOST and EVEROS_API__PORT.
+host = "127.0.0.1"
+port = 8000
+
+[sqlite]
+# PRAGMA journal_mode  — WAL is the recommended high-concurrency mode.
+journal_mode = "WAL"
+# PRAGMA synchronous  — NORMAL is safe under WAL and ~2x faster than FULL.
+synchronous = "NORMAL"
+# PRAGMA foreign_keys — must be explicitly enabled per connection.
+foreign_keys = true
+# PRAGMA temp_store   — MEMORY keeps query intermediates in RAM (no IO impact
+# on durability — only affects sort/group/temp-table calculation buffers).
+temp_store = "MEMORY"
+# PRAGMA busy_timeout — milliseconds to wait on a locked DB before erroring.
+busy_timeout_ms = 5000
+# PRAGMA journal_size_limit — cap WAL/journal at ~64 MB.
+journal_size_limit_bytes = 67108864
+# PRAGMA cache_size  — KB of page cache (per connection).
+cache_size_kb = 2048
+
+[lancedb]
+# Read consistency interval in seconds.
+#   omitted / null -> no consistency check (fastest reads)
+#   0              -> strict (every read checks updates)
+#   >0             -> eventual (interval seconds between checks)
+# Uncomment to override:
+# read_consistency_seconds = 5.0
+
+[llm]
+# Provider-agnostic OpenAI-protocol client config. Override via env:
+#   EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL
+# Or via a ``.env`` file next to the project root (auto-loaded).
+model = "gpt-4o-mini"
+# api_key = ""
+# base_url = ""
+
+[multimodal]
+# Independent LLM for multimodal parsing (everalgo-parser); must accept
+# image / pdf / audio image_url parts. Override via env:
+#   EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL
+model = "google/gemini-3-flash-preview"
+max_concurrency = 4
+# api_key = ""
+# base_url = ""
+# file:// content-item support (read locally by EverOS, not everalgo).
+# file_uri_allow_dirs: empty = allow any readable file (local-first default);
+# list base dirs to confine reads when the API is exposed.
+# file_uri_allow_dirs = ["/srv/uploads"]
+# file_uri_max_bytes = 52428800   # 50 MiB cap per file:// asset
+
+[embedding]
+# OpenAI-compatible embedding endpoint. Override via env:
+#   EVEROS_EMBEDDING__MODEL, EVEROS_EMBEDDING__API_KEY, EVEROS_EMBEDDING__BASE_URL
+# model / api_key / base_url have no shipped defaults — must be set
+# (env or user toml) before the embedding capability is used.
+# model     = "Qwen/Qwen3-Embedding-4B"
+# api_key   = ""
+# base_url  = "https://api.example.com/v1"
+timeout_seconds = 30.0
+max_retries     = 3
+batch_size      = 10
+max_concurrent  = 5
+
+[rerank]
+# Rerank provider. Override via env:
+#   EVEROS_RERANK__PROVIDER, EVEROS_RERANK__MODEL, EVEROS_RERANK__API_KEY,
+#   EVEROS_RERANK__BASE_URL
+# `provider` picks the request-shape:
+#   - "deepinfra" -> POST {base_url}/{model} (DeepInfra inference API)
+#   - "vllm"      -> POST {base_url}/rerank (OpenAI-compat rerank endpoint)
+provider = "deepinfra"
+# model     = "Qwen/Qwen3-Reranker-4B"
+# api_key   = ""
+# base_url  = "https://api.deepinfra.com/v1/inference"
+timeout_seconds = 30.0
+max_retries     = 3
+batch_size      = 10
+max_concurrent  = 5
+
+[boundary_detection]
+# Passed through to ``everalgo.BoundaryDetector.adetect``.
+hard_token_limit = 65536
+hard_msg_limit = 500
+
+[search]
+# Vector retrieval strategy when SearchMethod.VECTOR is selected.
+#   "maxsim_atomic" (default): ANN over atomic_fact.vector (pool=top_k*20),
+#       max-pool the per-fact cosine by parent memcell, then reverse-resolve
+#       to episode rows. MaxSim over atomic facts; +0.6pp over the legacy
+#       episode-vector path on LoCoMo, at the cost of one extra LanceDB scan.
+#   "episode": single-vector ANN over episode.vector (legacy path).
+# Override via EVEROS_SEARCH__VECTOR_STRATEGY.
+vector_strategy = "maxsim_atomic"
+
+[memorize]
+# Conversation mode. Selects the boundary detector and which pipelines run:
+#   "chat"  -> BoundaryDetector       + user_memory only
+#   "agent" -> AgentBoundaryDetector  + user_memory + agent_memory
+# A single service process serves one mode at a time; switching mode
+# requires a restart. Override via EVEROS_MEMORIZE__MODE.
+mode = "agent"
+
+# Maximum wall-clock for one memorize() invocation while holding the
+# per-session lock. On timeout the outer asyncio.timeout cancels the call
+# and the lock auto-releases so subsequent concurrent /add on the same
+# session aren't deadlocked. Covers boundary LLM + memcell writes +
+# synchronous portion of pipeline dispatch.
+# Override via EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS.
+session_lock_timeout_seconds = 360.0
--- a/src/everos/config/default_ome.toml
+++ b/src/everos/config/default_ome.toml
@ -0,0 +1,64 @@
+# everos OME (Offline Memory Engine) — per-strategy overrides.
+#
+# This file is materialised at ``<memory-root>/ome.toml`` by
+# ``MemoryRoot.ensure()`` on first server start. Edit it to toggle
+# individual strategies or tweak their gate / retry / cron without
+# restarting the server; the engine watches this file and hot-reloads
+# changes within ~2 seconds. Re-running ``ensure()`` will NOT overwrite
+# your edits — the file is only materialised when absent.
+#
+# Overrides are partial: only the keys you set replace the in-code
+# defaults; omitted keys keep each strategy's coded value. Unknown
+# keys (typos) raise StartupValidationError, so you cannot silently
+# misconfigure a strategy.
+#
+# Per-strategy schema (StrategyOverride):
+#   enabled                = bool           # disable a strategy entirely
+#   max_retries            = int >= 0       # re-fire on failure
+#   cron                   = str            # replace the @cron(...) trigger
+#   idle_seconds           = int > 0        # replace @idle(...) idle window
+#   scan_interval_seconds  = int > 0        # paired with idle_seconds; must be <= idle/2
+#   [strategies.<name>.gate]                # only valid on @counter-gated strategies
+#   threshold              = int > 0        # counter trigger threshold
+#   cooldown_seconds       = int >= 0       # min seconds between fires
+#   event_field            = str            # dispatch field for counter increment
+
+# ── User-memory pipeline ────────────────────────────────────────────────
+
+# Atomic fact extraction (runs per memcell). Disable to skip fact mining.
+# [strategies.extract_atomic_facts]
+# enabled = true
+
+# Foresight extraction (runs per memcell). Heavy LLM call — common to
+# disable in evaluation / benchmark runs.
+# [strategies.extract_foresight]
+# enabled = false
+
+# Profile clustering trigger (counter-gated; fires once N user memcells
+# accumulate). Lower the threshold to cluster more aggressively.
+# [strategies.trigger_profile_clustering]
+# enabled = true
+# [strategies.trigger_profile_clustering.gate]
+# threshold = 5
+
+# User-profile extraction (runs after clustering trigger fires). Common
+# to disable in evaluation runs where ground-truth profiles aren't measured.
+# [strategies.extract_user_profile]
+# enabled = false
+
+# ── Agent-memory pipeline ───────────────────────────────────────────────
+
+# Agent case extraction (runs per agent memcell). One per tool call cycle.
+# [strategies.extract_agent_case]
+# enabled = true
+
+# Skill clustering trigger (counter-gated; fires once N agent cases
+# accumulate per agent).
+# [strategies.trigger_skill_clustering]
+# enabled = true
+# [strategies.trigger_skill_clustering.gate]
+# threshold = 5
+
+# Agent skill extraction (runs after skill clustering trigger fires).
+# [strategies.extract_agent_skill]
+# enabled = true
--- a/src/everos/config/prompt_slots/.gitkeep
+++ b/src/everos/config/prompt_slots/.gitkeep
--- a/src/everos/config/prompt_slots/init.py
+++ b/src/everos/config/prompt_slots/init.py
--- a/src/everos/config/prompt_slots/boundary_detection.yaml
+++ b/src/everos/config/prompt_slots/boundary_detection.yaml
@ -0,0 +1,20 @@
+# Custom prompt slot for BoundaryDetector.adetect.
+#
+# Default behaviour
+#   Leave this slot disabled (``enabled: false``). The pipeline will pass
+#   ``prompt=None`` through to algo, which falls back to the everalgo
+#   bundled default prompt — see:
+#     ~/everalgo/packages/everalgo-boundary/src/everalgo/boundary/prompts/en/chat.py
+#     (constant ``CHAT_BOUNDARY_DETECT_PROMPT_EN``)
+#
+# To customise
+#   1. Read the algo default at the path above; note the required
+#      placeholders ``{messages}`` and ``{token_count}``.
+#   2. Replace the ``template`` body below with your prompt.
+#   3. Flip ``enabled`` to ``true``.
+#
+# When ``enabled: false`` or ``template`` is empty, the pipeline sends
+# ``prompt=None`` and the algo default is used (zero override cost).
+
+enabled: false
+template: ""
--- a/src/everos/config/prompt_slots/episode_extract.yaml
+++ b/src/everos/config/prompt_slots/episode_extract.yaml
@ -0,0 +1,23 @@
+# Custom prompt slot for EpisodeExtractor.aextract.
+#
+# Default behaviour
+#   Leave this slot disabled (``enabled: false``). The pipeline will pass
+#   ``prompt=None`` through to algo, which falls back to the everalgo
+#   bundled default prompt — see:
+#     everalgo/user_memory/prompts/en/episode.py
+#     (the pipeline calls ``aextract`` with ``sender_id=None``, so the
+#      whole-memcell ``EPISODE_GENERATION_PROMPT`` is used, not the
+#      per-user ``USER_EPISODE_GENERATION_PROMPT``)
+#
+# To customise
+#   1. Read the algo default at the path above; note the required
+#      placeholders ``{conversation_start_time}``, ``{conversation}`` and
+#      ``{custom_instructions}``.
+#   2. Replace the ``template`` body below with your prompt.
+#   3. Flip ``enabled`` to ``true``.
+#
+# When ``enabled: false`` or ``template`` is empty, the pipeline sends
+# ``prompt=None`` and the algo default is used (zero override cost).
+
+enabled: false
+template: ""
--- a/src/everos/config/settings.py
+++ b/src/everos/config/settings.py
@ -0,0 +1,403 @@
+"""Application settings.
+
+Loaded by :func:`load_settings`. Source priority (later wins):
+
+    1. ``config/default.toml`` (shipped values; lowest priority)
+    2. ``~/.everos/config.toml`` (user-level overrides; optional)
+    3. ``.env`` file in the working directory (secrets / machine-specific)
+    4. ``EVEROS_<SECTION>__<KEY>`` environment variables
+    5. Init args passed programmatically (highest priority)
+
+The user-level toml path defaults to ``~/.everos/config.toml``. Override
+with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
+optional — if it does not exist, the source is silently skipped.
+
+The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
+maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
+
+``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
+:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
+call. Tests that mutate environment variables must call
+``load_settings.cache_clear()`` after the mutation to invalidate.
+"""
+
+from __future__ import annotations
+
+import os
+from functools import cache
+from pathlib import Path
+from typing import Literal
+from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
+
+from pydantic import BaseModel, Field, SecretStr, field_validator
+from pydantic_settings import (
+    BaseSettings,
+    PydanticBaseSettingsSource,
+    SettingsConfigDict,
+    TomlConfigSettingsSource,
+)
+
+_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
+_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
+_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
+
+
+def _resolve_user_toml_path() -> Path:
+    """Resolve the user-level ``config.toml`` path.
+
+    Defaults to ``~/.everos/config.toml``; override with the
+    ``EVEROS_CONFIG_FILE`` environment variable.
+    """
+    override = os.environ.get(_USER_TOML_ENV_VAR)
+    return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
+
+
+class MemorySettings(BaseModel):
+    """memory-root configuration."""
+
+    root: Path = Path("~/.everos")
+    timezone: str = "UTC"
+    """Effective timezone for date buckets and timestamps.
+
+    Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
+    TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
+    :class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
+    fast (no silent fallback). This is the **sole** source of truth for
+    the project's effective timezone — the OS ``TZ`` env var is *not*
+    consulted, keeping the configuration deterministic.
+    """
+
+    @field_validator("timezone")
+    @classmethod
+    def _validate_timezone(cls, v: str) -> str:
+        try:
+            ZoneInfo(v)
+        except (ZoneInfoNotFoundError, ValueError) as exc:
+            raise ValueError(f"invalid timezone: {v!r}") from exc
+        return v
+
+
+class ApiSettings(BaseModel):
+    """HTTP API server bind configuration.
+
+    Default ``host = "127.0.0.1"`` keeps the server on loopback only,
+    matching the threat model in ``SECURITY.md``: EverOS ships **no
+    built-in authentication**, so binding to a routable interface
+    (``0.0.0.0`` etc.) without your own gateway / auth layer in front
+    is unsupported.
+
+    Env binding:
+        EVEROS_API__HOST
+        EVEROS_API__PORT
+    """
+
+    host: str = "127.0.0.1"
+    port: int = Field(default=8000, ge=1, le=65535)
+
+
+class SqliteSettings(BaseModel):
+    """SQLite tunables applied as PRAGMAs on every new connection."""
+
+    journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
+        "WAL"
+    )
+    synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
+    foreign_keys: bool = True
+    temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
+    busy_timeout_ms: int = Field(default=5000, ge=0)
+    journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
+    cache_size_kb: int = Field(default=2048, ge=0)
+
+
+class LLMSettings(BaseModel):
+    """LLM client configuration.
+
+    Read by the service layer when lazily constructing the LLM client
+    handed to algo extractors. Provider-agnostic field names — the
+    project follows the OpenAI API protocol so any OpenAI-compatible
+    endpoint plugs in via ``base_url``.
+
+    Env binding (via parent ``Settings``):
+        EVEROS_LLM__MODEL
+        EVEROS_LLM__API_KEY
+        EVEROS_LLM__BASE_URL
+    """
+
+    model: str = "gpt-4o-mini"
+    api_key: SecretStr | None = None
+    base_url: str | None = None
+
+
+class MultimodalSettings(BaseModel):
+    """Multimodal parsing LLM config (everalgo-parser).
+
+    Flat section mirroring ``[llm]``. The model must accept multimodal
+    ``image_url`` parts (image / pdf / audio); it is kept independent from
+    the main ``[llm]`` so parsing can target a vision/audio-capable
+    endpoint without affecting boundary / extraction.
+
+    Env binding (via parent ``Settings``):
+        EVEROS_MULTIMODAL__MODEL
+        EVEROS_MULTIMODAL__API_KEY
+        EVEROS_MULTIMODAL__BASE_URL
+        EVEROS_MULTIMODAL__MAX_CONCURRENCY
+        EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
+        EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
+    """
+
+    model: str = "google/gemini-3-flash-preview"
+    api_key: SecretStr | None = None
+    base_url: str | None = None
+    max_concurrency: int = 4
+
+    # ``file://`` content-item support (read locally by EverOS, not everalgo).
+    file_uri_allow_dirs: list[str] = []
+    """Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
+    file (local-first default); set to confine reads when the API is exposed."""
+    file_uri_max_bytes: int = 50 * 1024 * 1024
+    """Max size (bytes) of a ``file://`` asset; larger files are rejected."""
+
+
+class EmbeddingSettings(BaseModel):
+    """Embedding client configuration.
+
+    OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
+    ``base_url`` are required at runtime when the embedding capability
+    is enabled; the runtime knobs (``timeout`` etc.) have sensible
+    defaults.
+
+    Env binding:
+        EVEROS_EMBEDDING__MODEL
+        EVEROS_EMBEDDING__API_KEY
+        EVEROS_EMBEDDING__BASE_URL
+        EVEROS_EMBEDDING__TIMEOUT_SECONDS
+        EVEROS_EMBEDDING__MAX_RETRIES
+        EVEROS_EMBEDDING__BATCH_SIZE
+        EVEROS_EMBEDDING__MAX_CONCURRENT
+    """
+
+    model: str | None = None
+    api_key: SecretStr | None = None
+    base_url: str | None = None
+    timeout_seconds: float = Field(default=30.0, gt=0)
+    max_retries: int = Field(default=3, ge=0)
+    batch_size: int = Field(default=10, ge=1)
+    max_concurrent: int = Field(default=5, ge=1)
+
+
+class RerankSettings(BaseModel):
+    """Rerank client configuration.
+
+    Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
+    schemas differ between providers — DeepInfra uses ``POST {base_url}/
+    {model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
+    with ``{model, query, documents}``. ``provider`` picks which client
+    implementation the factory builds.
+
+    Env binding:
+        EVEROS_RERANK__PROVIDER
+        EVEROS_RERANK__MODEL
+        EVEROS_RERANK__API_KEY
+        EVEROS_RERANK__BASE_URL
+        EVEROS_RERANK__TIMEOUT_SECONDS
+        EVEROS_RERANK__MAX_RETRIES
+        EVEROS_RERANK__BATCH_SIZE
+        EVEROS_RERANK__MAX_CONCURRENT
+    """
+
+    provider: Literal["deepinfra", "vllm"] = "deepinfra"
+    model: str | None = None
+    api_key: SecretStr | None = None
+    base_url: str | None = None
+    timeout_seconds: float = Field(default=30.0, gt=0)
+    max_retries: int = Field(default=3, ge=0)
+    batch_size: int = Field(default=10, ge=1)
+    max_concurrent: int = Field(default=5, ge=1)
+
+
+class BoundaryDetectionSettings(BaseModel):
+    """Hard limits passed through to ``everalgo`` BoundaryDetector."""
+
+    hard_token_limit: int = Field(default=65536, ge=1)
+    hard_msg_limit: int = Field(default=500, ge=1)
+
+
+class MemorizeSettings(BaseModel):
+    """Memorize use-case configuration.
+
+    ``mode`` selects which boundary detector runs and which pipelines are
+    dispatched. A service process serves one mode at a time; toggling
+    requires a restart.
+
+        - ``"chat"``  -> ``everalgo.user_memory.BoundaryDetector`` and only the
+          user-memory pipeline runs.
+        - ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
+          both user-memory + agent-memory pipelines run.
+
+    ``session_lock_timeout_seconds`` caps how long one ``memorize()``
+    invocation can hold the per-session lock. Covers boundary LLM call +
+    memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
+    a stuck LLM from deadlocking subsequent concurrent calls on the same
+    session_id: on timeout the outer ``asyncio.timeout`` cancels the task
+    and the lock auto-releases.
+
+    Env binding:
+        EVEROS_MEMORIZE__MODE
+        EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
+    """
+
+    mode: Literal["chat", "agent"] = "agent"
+    session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
+
+
+class SearchSettings(BaseModel):
+    """Search-pipeline policy knobs.
+
+    ``vector_strategy`` selects the read path taken by
+    ``SearchMethod.VECTOR``:
+
+    - ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
+      (recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
+      cosine by parent memcell, then reverse-resolve the top memcells back
+      to episode rows. MaxSim over atomic facts; trades one extra LanceDB
+      scan for finer-grained semantic match on long episodes.
+    - ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
+      per episode = the embedded Content section). The legacy path; kept
+      so deployments can opt out via env.
+
+    Env binding:
+        EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
+    """
+
+    vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
+
+
+class LanceDBSettings(BaseModel):
+    """LanceDB tunables.
+
+    ``read_consistency_seconds``:
+      ``None`` (omitted) → no consistency check (highest performance).
+      ``0``              → strict consistency (every read).
+      ``>0``             → eventual (interval between checks).
+
+    ``index_cache_size_bytes``:
+      Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
+      in lance crate). Each cached entry is one opened FTS / vector /
+      scalar index reader and **holds the file descriptors of its on-disk
+      ``_indices/<uuid>/...`` files**.
+
+      LanceDB's own default is ``None`` (unbounded), which on a long-
+      running daemon means every new index UUID created by an
+      ``optimize()`` call adds a fresh reader to the cache, and its
+      FDs are never released — they leak monotonically until
+      ``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
+      take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
+      of 256 / Linux's 1024.
+
+      Setting a byte cap turns the cache into a real LRU: when it
+      exceeds the cap, the oldest readers are dropped, Rust ``Drop``
+      runs ``close(fd)``, and the FD pressure resolves itself.
+
+      Cap → steady-state FD upper bound (measured under 30 add+optimize
+      cycles with the real ``Episode`` schema and 100-query stress):
+
+      ===========  =================  ===================
+      cap          FD upper bound     query latency (100q)
+      ===========  =================  ===================
+      ``2 MB``     ~45                ~5 ms
+      ``4 MB``     ~52                ~3 ms
+      ``8 MB``     ~140               ~2.4 ms
+      ``16 MB``    ~290               ~2.3 ms   ← default
+      ``32 MB``    ~630               ~1.4 ms
+      ``unbound``  >960 (leaks)       ~1.3 ms
+      ===========  =================  ===================
+
+      EverOS's measured steady-state working set after a 12 h
+      ``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
+      (5 tables × ~7 BM25 columns × ~10 part_N entries each), so
+      ``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
+      yet-evicted readers, while the FD ceiling (~290) stays well below
+      common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
+      Linux default 1024 is fine out of the box).
+
+      Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
+      working set is much larger (heavier table count or much wider
+      indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
+      boxes).
+
+      Note: the *metadata* cache (``metadata_cache_size_bytes``) is
+      **not** exposed — experiment showed it caches in-memory parsed
+      manifests / fragment stats with zero impact on FD count; leaving
+      it unbounded (lancedb default) is fine.
+    """
+
+    read_consistency_seconds: float | None = None
+    index_cache_size_bytes: int = 16 * 1024 * 1024
+
+
+class Settings(BaseSettings):
+    """Top-level application settings."""
+
+    memory: MemorySettings = MemorySettings()
+    api: ApiSettings = ApiSettings()
+    sqlite: SqliteSettings = SqliteSettings()
+    lancedb: LanceDBSettings = LanceDBSettings()
+    llm: LLMSettings = LLMSettings()
+    embedding: EmbeddingSettings = EmbeddingSettings()
+    rerank: RerankSettings = RerankSettings()
+    boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
+    memorize: MemorizeSettings = MemorizeSettings()
+    search: SearchSettings = SearchSettings()
+    multimodal: MultimodalSettings = MultimodalSettings()
+
+    model_config = SettingsConfigDict(
+        env_prefix="EVEROS_",
+        env_nested_delimiter="__",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        toml_file=_DEFAULT_TOML_PATH,
+        extra="ignore",
+    )
+
+    @classmethod
+    def settings_customise_sources(
+        cls,
+        settings_cls: type[BaseSettings],
+        init_settings: PydanticBaseSettingsSource,
+        env_settings: PydanticBaseSettingsSource,
+        dotenv_settings: PydanticBaseSettingsSource,
+        file_secret_settings: PydanticBaseSettingsSource,
+    ) -> tuple[PydanticBaseSettingsSource, ...]:
+        """Layer TOML sources between env / dotenv and the secret store.
+
+        Order (earlier wins in pydantic-settings):
+            init_args > env > .env > user_toml > default_toml > secrets
+
+        The user-level toml (default ``~/.everos/config.toml``) is only
+        registered when the file exists, so the source list stays tight.
+        """
+        sources: list[PydanticBaseSettingsSource] = [
+            init_settings,
+            env_settings,
+            dotenv_settings,
+        ]
+        user_toml_path = _resolve_user_toml_path()
+        if user_toml_path.is_file():
+            sources.append(
+                TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
+            )
+        sources.append(TomlConfigSettingsSource(settings_cls))
+        sources.append(file_secret_settings)
+        return tuple(sources)
+
+
+@cache
+def load_settings() -> Settings:
+    """Load settings from default.toml + environment variables (cached).
+
+    Cached at the module level — every caller sees the same instance until
+    something explicitly clears the cache (``load_settings.cache_clear()``).
+    Tests that monkeypatch environment variables must call
+    ``cache_clear`` after each mutation to pick the new env up.
+    """
+    return Settings()