EverOS/src/everos/config/settings.py

"""Application settings.

Loaded by :func:`load_settings`. Source priority (later wins):

    1. ``config/default.toml`` (shipped values; lowest priority)
    2. ``~/.everos/config.toml`` (user-level overrides; optional)
    3. ``.env`` file in the working directory (secrets / machine-specific)
    4. ``EVEROS_<SECTION>__<KEY>`` environment variables
    5. Init args passed programmatically (highest priority)

The user-level toml path defaults to ``~/.everos/config.toml``. Override
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
optional — if it does not exist, the source is silently skipped.

The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.

``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
call. Tests that mutate environment variables must call
``load_settings.cache_clear()`` after the mutation to invalidate.
"""

from __future__ import annotations

import os
from functools import cache
from pathlib import Path
from typing import Literal
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError

from pydantic import BaseModel, Field, SecretStr, field_validator
from pydantic_settings import (
    BaseSettings,
    PydanticBaseSettingsSource,
    SettingsConfigDict,
    TomlConfigSettingsSource,
)

_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()


def _resolve_user_toml_path() -> Path:
    """Resolve the user-level ``config.toml`` path.

    Defaults to ``~/.everos/config.toml``; override with the
    ``EVEROS_CONFIG_FILE`` environment variable.
    """
    override = os.environ.get(_USER_TOML_ENV_VAR)
    return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH


class MemorySettings(BaseModel):
    """memory-root configuration."""

    root: Path = Path("~/.everos")
    timezone: str = "UTC"
    """Effective timezone for date buckets and timestamps.

    Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
    TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
    :class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
    fast (no silent fallback). This is the **sole** source of truth for
    the project's effective timezone — the OS ``TZ`` env var is *not*
    consulted, keeping the configuration deterministic.
    """

    @field_validator("timezone")
    @classmethod
    def _validate_timezone(cls, v: str) -> str:
        try:
            ZoneInfo(v)
        except (ZoneInfoNotFoundError, ValueError) as exc:
            raise ValueError(f"invalid timezone: {v!r}") from exc
        return v


class ApiSettings(BaseModel):
    """HTTP API server bind configuration.

    Default ``host = "127.0.0.1"`` keeps the server on loopback only,
    matching the threat model in ``SECURITY.md``: EverOS ships **no
    built-in authentication**, so binding to a routable interface
    (``0.0.0.0`` etc.) without your own gateway / auth layer in front
    is unsupported.

    Env binding:
        EVEROS_API__HOST
        EVEROS_API__PORT
    """

    host: str = "127.0.0.1"
    port: int = Field(default=8000, ge=1, le=65535)


class SqliteSettings(BaseModel):
    """SQLite tunables applied as PRAGMAs on every new connection."""

    journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
        "WAL"
    )
    synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
    foreign_keys: bool = True
    temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
    busy_timeout_ms: int = Field(default=5000, ge=0)
    journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
    cache_size_kb: int = Field(default=2048, ge=0)


class LLMSettings(BaseModel):
    """LLM client configuration.

    Read by the service layer when lazily constructing the LLM client
    handed to algo extractors. Provider-agnostic field names — the
    project follows the OpenAI API protocol so any OpenAI-compatible
    endpoint plugs in via ``base_url``.

    Env binding (via parent ``Settings``):
        EVEROS_LLM__MODEL
        EVEROS_LLM__API_KEY
        EVEROS_LLM__BASE_URL
    """

    model: str = "gpt-4o-mini"
    api_key: SecretStr | None = None
    base_url: str | None = None


class MultimodalSettings(BaseModel):
    """Multimodal parsing LLM config (everalgo-parser).

    Flat section mirroring ``[llm]``. The model must accept multimodal
    ``image_url`` parts (image / pdf / audio); it is kept independent from
    the main ``[llm]`` so parsing can target a vision/audio-capable
    endpoint without affecting boundary / extraction.

    Env binding (via parent ``Settings``):
        EVEROS_MULTIMODAL__MODEL
        EVEROS_MULTIMODAL__API_KEY
        EVEROS_MULTIMODAL__BASE_URL
        EVEROS_MULTIMODAL__MAX_CONCURRENCY
        EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
        EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
    """

    model: str = "google/gemini-3-flash-preview"
    api_key: SecretStr | None = None
    base_url: str | None = None
    max_concurrency: int = 4

    # ``file://`` content-item support (read locally by EverOS, not everalgo).
    file_uri_allow_dirs: list[str] = []
    """Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
    file (local-first default); set to confine reads when the API is exposed."""
    file_uri_max_bytes: int = 50 * 1024 * 1024
    """Max size (bytes) of a ``file://`` asset; larger files are rejected."""


class EmbeddingSettings(BaseModel):
    """Embedding client configuration.

    OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
    ``base_url`` are required at runtime when the embedding capability
    is enabled; the runtime knobs (``timeout`` etc.) have sensible
    defaults.

    Env binding:
        EVEROS_EMBEDDING__MODEL
        EVEROS_EMBEDDING__API_KEY
        EVEROS_EMBEDDING__BASE_URL
        EVEROS_EMBEDDING__TIMEOUT_SECONDS
        EVEROS_EMBEDDING__MAX_RETRIES
        EVEROS_EMBEDDING__BATCH_SIZE
        EVEROS_EMBEDDING__MAX_CONCURRENT
    """

    model: str | None = None
    api_key: SecretStr | None = None
    base_url: str | None = None
    timeout_seconds: float = Field(default=30.0, gt=0)
    max_retries: int = Field(default=3, ge=0)
    batch_size: int = Field(default=10, ge=1)
    max_concurrent: int = Field(default=5, ge=1)


class RerankSettings(BaseModel):
    """Rerank client configuration.

    Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
    schemas differ between providers — DeepInfra uses ``POST {base_url}/
    {model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
    with ``{model, query, documents}``. ``provider`` picks which client
    implementation the factory builds.

    Env binding:
        EVEROS_RERANK__PROVIDER
        EVEROS_RERANK__MODEL
        EVEROS_RERANK__API_KEY
        EVEROS_RERANK__BASE_URL
        EVEROS_RERANK__TIMEOUT_SECONDS
        EVEROS_RERANK__MAX_RETRIES
        EVEROS_RERANK__BATCH_SIZE
        EVEROS_RERANK__MAX_CONCURRENT
    """

    provider: Literal["deepinfra", "vllm"] = "deepinfra"
    model: str | None = None
    api_key: SecretStr | None = None
    base_url: str | None = None
    timeout_seconds: float = Field(default=30.0, gt=0)
    max_retries: int = Field(default=3, ge=0)
    batch_size: int = Field(default=10, ge=1)
    max_concurrent: int = Field(default=5, ge=1)


class BoundaryDetectionSettings(BaseModel):
    """Hard limits passed through to ``everalgo`` BoundaryDetector."""

    hard_token_limit: int = Field(default=65536, ge=1)
    hard_msg_limit: int = Field(default=500, ge=1)


class MemorizeSettings(BaseModel):
    """Memorize use-case configuration.

    ``mode`` selects which boundary detector runs and which pipelines are
    dispatched. A service process serves one mode at a time; toggling
    requires a restart.

        - ``"chat"``  -> ``everalgo.user_memory.BoundaryDetector`` and only the
          user-memory pipeline runs.
        - ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
          both user-memory + agent-memory pipelines run.

    ``session_lock_timeout_seconds`` caps how long one ``memorize()``
    invocation can hold the per-session lock. Covers boundary LLM call +
    memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
    a stuck LLM from deadlocking subsequent concurrent calls on the same
    session_id: on timeout the outer ``asyncio.timeout`` cancels the task
    and the lock auto-releases.

    Env binding:
        EVEROS_MEMORIZE__MODE
        EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
    """

    mode: Literal["chat", "agent"] = "agent"
    session_lock_timeout_seconds: float = Field(default=360.0, gt=0)


class SearchSettings(BaseModel):
    """Search-pipeline policy knobs.

    ``vector_strategy`` selects the read path taken by
    ``SearchMethod.VECTOR``:

    - ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
      (recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
      cosine by parent memcell, then reverse-resolve the top memcells back
      to episode rows. MaxSim over atomic facts; trades one extra LanceDB
      scan for finer-grained semantic match on long episodes.
    - ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
      per episode = the embedded Content section). The legacy path; kept
      so deployments can opt out via env.

    Env binding:
        EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
    """

    vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"


class LanceDBSettings(BaseModel):
    """LanceDB tunables.

    ``read_consistency_seconds``:
      ``None`` (omitted) → no consistency check (highest performance).
      ``0``              → strict consistency (every read).
      ``>0``             → eventual (interval between checks).

    ``index_cache_size_bytes``:
      Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
      in lance crate). Each cached entry is one opened FTS / vector /
      scalar index reader and **holds the file descriptors of its on-disk
      ``_indices/<uuid>/...`` files**.

      LanceDB's own default is ``None`` (unbounded), which on a long-
      running daemon means every new index UUID created by an
      ``optimize()`` call adds a fresh reader to the cache, and its
      FDs are never released — they leak monotonically until
      ``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
      take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
      of 256 / Linux's 1024.

      Setting a byte cap turns the cache into a real LRU: when it
      exceeds the cap, the oldest readers are dropped, Rust ``Drop``
      runs ``close(fd)``, and the FD pressure resolves itself.

      Cap → steady-state FD upper bound (measured under 30 add+optimize
      cycles with the real ``Episode`` schema and 100-query stress):

      ===========  =================  ===================
      cap          FD upper bound     query latency (100q)
      ===========  =================  ===================
      ``2 MB``     ~45                ~5 ms
      ``4 MB``     ~52                ~3 ms
      ``8 MB``     ~140               ~2.4 ms
      ``16 MB``    ~290               ~2.3 ms   ← default
      ``32 MB``    ~630               ~1.4 ms
      ``unbound``  >960 (leaks)       ~1.3 ms
      ===========  =================  ===================

      EverOS's measured steady-state working set after a 12 h
      ``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
      (5 tables × ~7 BM25 columns × ~10 part_N entries each), so
      ``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
      yet-evicted readers, while the FD ceiling (~290) stays well below
      common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
      Linux default 1024 is fine out of the box).

      Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
      working set is much larger (heavier table count or much wider
      indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
      boxes).

      Note: the *metadata* cache (``metadata_cache_size_bytes``) is
      **not** exposed — experiment showed it caches in-memory parsed
      manifests / fragment stats with zero impact on FD count; leaving
      it unbounded (lancedb default) is fine.
    """

    read_consistency_seconds: float | None = None
    index_cache_size_bytes: int = 16 * 1024 * 1024


class Settings(BaseSettings):
    """Top-level application settings."""

    memory: MemorySettings = MemorySettings()
    api: ApiSettings = ApiSettings()
    sqlite: SqliteSettings = SqliteSettings()
    lancedb: LanceDBSettings = LanceDBSettings()
    llm: LLMSettings = LLMSettings()
    embedding: EmbeddingSettings = EmbeddingSettings()
    rerank: RerankSettings = RerankSettings()
    boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
    memorize: MemorizeSettings = MemorizeSettings()
    search: SearchSettings = SearchSettings()
    multimodal: MultimodalSettings = MultimodalSettings()

    model_config = SettingsConfigDict(
        env_prefix="EVEROS_",
        env_nested_delimiter="__",
        env_file=".env",
        env_file_encoding="utf-8",
        toml_file=_DEFAULT_TOML_PATH,
        extra="ignore",
    )

    @classmethod
    def settings_customise_sources(
        cls,
        settings_cls: type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,
        file_secret_settings: PydanticBaseSettingsSource,
    ) -> tuple[PydanticBaseSettingsSource, ...]:
        """Layer TOML sources between env / dotenv and the secret store.

        Order (earlier wins in pydantic-settings):
            init_args > env > .env > user_toml > default_toml > secrets

        The user-level toml (default ``~/.everos/config.toml``) is only
        registered when the file exists, so the source list stays tight.
        """
        sources: list[PydanticBaseSettingsSource] = [
            init_settings,
            env_settings,
            dotenv_settings,
        ]
        user_toml_path = _resolve_user_toml_path()
        if user_toml_path.is_file():
            sources.append(
                TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
            )
        sources.append(TomlConfigSettingsSource(settings_cls))
        sources.append(file_secret_settings)
        return tuple(sources)


@cache
def load_settings() -> Settings:
    """Load settings from default.toml + environment variables (cached).

    Cached at the module level — every caller sees the same instance until
    something explicitly clears the cache (``load_settings.cache_clear()``).
    Tests that monkeypatch environment variables must call
    ``cache_clear`` after each mutation to pick the new env up.
    """
    return Settings()