chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,37 @@
"""Configuration data and Settings schema.
Public API:
from everos.config import (
Settings, MemorySettings, SqliteSettings, LanceDBSettings,
LLMSettings, EmbeddingSettings, RerankSettings,
BoundaryDetectionSettings,
load_settings,
)
Distinct from ``everos.component.config`` (which is a *capability* —
loader / merger / env reader).
"""
from .settings import BoundaryDetectionSettings as BoundaryDetectionSettings
from .settings import EmbeddingSettings as EmbeddingSettings
from .settings import LanceDBSettings as LanceDBSettings
from .settings import LLMSettings as LLMSettings
from .settings import MemorySettings as MemorySettings
from .settings import MultimodalSettings as MultimodalSettings
from .settings import RerankSettings as RerankSettings
from .settings import Settings as Settings
from .settings import SqliteSettings as SqliteSettings
from .settings import load_settings as load_settings
__all__ = [
"BoundaryDetectionSettings",
"EmbeddingSettings",
"LLMSettings",
"LanceDBSettings",
"MemorySettings",
"MultimodalSettings",
"RerankSettings",
"Settings",
"SqliteSettings",
"load_settings",
]

View File

@ -0,0 +1,137 @@
# everos default configuration.
#
# Lookup order (later overrides earlier):
# 1. This file (shipped defaults; lowest priority)
# 2. ~/.everos/config.toml — user-level overrides (optional;
# path is overridable via EVEROS_CONFIG_FILE)
# 3. .env file in the working directory
# 4. Environment variables — EVEROS_<SECTION>__<KEY>
# e.g. EVEROS_SQLITE__BUSY_TIMEOUT_MS=10000
# 5. Programmatic init args (highest priority)
#
# `null` (omitted in TOML) means "use the Pydantic default declared in code".
[memory]
# memory-root is the single directory holding all persisted memory.
# `~` is expanded; the path is resolved when MemoryRoot is constructed.
root = "~/.everos"
# Effective timezone for date buckets and timestamps. Drives
# component.utils.datetime; this is the SOLE source — OS `TZ` is not
# read. Override via `EVEROS_MEMORY__TIMEZONE` env var if needed.
timezone = "UTC"
[api]
# HTTP server bind for ``everos server start``. Default ``127.0.0.1``
# keeps the API on loopback only — EverOS ships no built-in auth (see
# SECURITY.md threat model). Only set ``host = "0.0.0.0"`` after you
# have placed your own gateway / auth layer in front of the server.
# Override via EVEROS_API__HOST and EVEROS_API__PORT.
host = "127.0.0.1"
port = 8000
[sqlite]
# PRAGMA journal_mode — WAL is the recommended high-concurrency mode.
journal_mode = "WAL"
# PRAGMA synchronous — NORMAL is safe under WAL and ~2x faster than FULL.
synchronous = "NORMAL"
# PRAGMA foreign_keys — must be explicitly enabled per connection.
foreign_keys = true
# PRAGMA temp_store — MEMORY keeps query intermediates in RAM (no IO impact
# on durability — only affects sort/group/temp-table calculation buffers).
temp_store = "MEMORY"
# PRAGMA busy_timeout — milliseconds to wait on a locked DB before erroring.
busy_timeout_ms = 5000
# PRAGMA journal_size_limit — cap WAL/journal at ~64 MB.
journal_size_limit_bytes = 67108864
# PRAGMA cache_size — KB of page cache (per connection).
cache_size_kb = 2048
[lancedb]
# Read consistency interval in seconds.
# omitted / null -> no consistency check (fastest reads)
# 0 -> strict (every read checks updates)
# >0 -> eventual (interval seconds between checks)
# Uncomment to override:
# read_consistency_seconds = 5.0
[llm]
# Provider-agnostic OpenAI-protocol client config. Override via env:
# EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL
# Or via a ``.env`` file next to the project root (auto-loaded).
model = "gpt-4o-mini"
# api_key = ""
# base_url = ""
[multimodal]
# Independent LLM for multimodal parsing (everalgo-parser); must accept
# image / pdf / audio image_url parts. Override via env:
# EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL
model = "google/gemini-3-flash-preview"
max_concurrency = 4
# api_key = ""
# base_url = ""
# file:// content-item support (read locally by EverOS, not everalgo).
# file_uri_allow_dirs: empty = allow any readable file (local-first default);
# list base dirs to confine reads when the API is exposed.
# file_uri_allow_dirs = ["/srv/uploads"]
# file_uri_max_bytes = 52428800 # 50 MiB cap per file:// asset
[embedding]
# OpenAI-compatible embedding endpoint. Override via env:
# EVEROS_EMBEDDING__MODEL, EVEROS_EMBEDDING__API_KEY, EVEROS_EMBEDDING__BASE_URL
# model / api_key / base_url have no shipped defaults — must be set
# (env or user toml) before the embedding capability is used.
# model = "Qwen/Qwen3-Embedding-4B"
# api_key = ""
# base_url = "https://api.example.com/v1"
timeout_seconds = 30.0
max_retries = 3
batch_size = 10
max_concurrent = 5
[rerank]
# Rerank provider. Override via env:
# EVEROS_RERANK__PROVIDER, EVEROS_RERANK__MODEL, EVEROS_RERANK__API_KEY,
# EVEROS_RERANK__BASE_URL
# `provider` picks the request-shape:
# - "deepinfra" -> POST {base_url}/{model} (DeepInfra inference API)
# - "vllm" -> POST {base_url}/rerank (OpenAI-compat rerank endpoint)
provider = "deepinfra"
# model = "Qwen/Qwen3-Reranker-4B"
# api_key = ""
# base_url = "https://api.deepinfra.com/v1/inference"
timeout_seconds = 30.0
max_retries = 3
batch_size = 10
max_concurrent = 5
[boundary_detection]
# Passed through to ``everalgo.BoundaryDetector.adetect``.
hard_token_limit = 65536
hard_msg_limit = 500
[search]
# Vector retrieval strategy when SearchMethod.VECTOR is selected.
# "maxsim_atomic" (default): ANN over atomic_fact.vector (pool=top_k*20),
# max-pool the per-fact cosine by parent memcell, then reverse-resolve
# to episode rows. MaxSim over atomic facts; +0.6pp over the legacy
# episode-vector path on LoCoMo, at the cost of one extra LanceDB scan.
# "episode": single-vector ANN over episode.vector (legacy path).
# Override via EVEROS_SEARCH__VECTOR_STRATEGY.
vector_strategy = "maxsim_atomic"
[memorize]
# Conversation mode. Selects the boundary detector and which pipelines run:
# "chat" -> BoundaryDetector + user_memory only
# "agent" -> AgentBoundaryDetector + user_memory + agent_memory
# A single service process serves one mode at a time; switching mode
# requires a restart. Override via EVEROS_MEMORIZE__MODE.
mode = "agent"
# Maximum wall-clock for one memorize() invocation while holding the
# per-session lock. On timeout the outer asyncio.timeout cancels the call
# and the lock auto-releases so subsequent concurrent /add on the same
# session aren't deadlocked. Covers boundary LLM + memcell writes +
# synchronous portion of pipeline dispatch.
# Override via EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS.
session_lock_timeout_seconds = 360.0

View File

@ -0,0 +1,64 @@
# everos OME (Offline Memory Engine) — per-strategy overrides.
#
# This file is materialised at ``<memory-root>/ome.toml`` by
# ``MemoryRoot.ensure()`` on first server start. Edit it to toggle
# individual strategies or tweak their gate / retry / cron without
# restarting the server; the engine watches this file and hot-reloads
# changes within ~2 seconds. Re-running ``ensure()`` will NOT overwrite
# your edits — the file is only materialised when absent.
#
# Overrides are partial: only the keys you set replace the in-code
# defaults; omitted keys keep each strategy's coded value. Unknown
# keys (typos) raise StartupValidationError, so you cannot silently
# misconfigure a strategy.
#
# Per-strategy schema (StrategyOverride):
# enabled = bool # disable a strategy entirely
# max_retries = int >= 0 # re-fire on failure
# cron = str # replace the @cron(...) trigger
# idle_seconds = int > 0 # replace @idle(...) idle window
# scan_interval_seconds = int > 0 # paired with idle_seconds; must be <= idle/2
# [strategies.<name>.gate] # only valid on @counter-gated strategies
# threshold = int > 0 # counter trigger threshold
# cooldown_seconds = int >= 0 # min seconds between fires
# event_field = str # dispatch field for counter increment
# ── User-memory pipeline ────────────────────────────────────────────────
# Atomic fact extraction (runs per memcell). Disable to skip fact mining.
# [strategies.extract_atomic_facts]
# enabled = true
# Foresight extraction (runs per memcell). Heavy LLM call — common to
# disable in evaluation / benchmark runs.
# [strategies.extract_foresight]
# enabled = false
# Profile clustering trigger (counter-gated; fires once N user memcells
# accumulate). Lower the threshold to cluster more aggressively.
# [strategies.trigger_profile_clustering]
# enabled = true
# [strategies.trigger_profile_clustering.gate]
# threshold = 5
# User-profile extraction (runs after clustering trigger fires). Common
# to disable in evaluation runs where ground-truth profiles aren't measured.
# [strategies.extract_user_profile]
# enabled = false
# ── Agent-memory pipeline ───────────────────────────────────────────────
# Agent case extraction (runs per agent memcell). One per tool call cycle.
# [strategies.extract_agent_case]
# enabled = true
# Skill clustering trigger (counter-gated; fires once N agent cases
# accumulate per agent).
# [strategies.trigger_skill_clustering]
# enabled = true
# [strategies.trigger_skill_clustering.gate]
# threshold = 5
# Agent skill extraction (runs after skill clustering trigger fires).
# [strategies.extract_agent_skill]
# enabled = true

View File

View File

@ -0,0 +1,20 @@
# Custom prompt slot for BoundaryDetector.adetect.
#
# Default behaviour
# Leave this slot disabled (``enabled: false``). The pipeline will pass
# ``prompt=None`` through to algo, which falls back to the everalgo
# bundled default prompt — see:
# ~/everalgo/packages/everalgo-boundary/src/everalgo/boundary/prompts/en/chat.py
# (constant ``CHAT_BOUNDARY_DETECT_PROMPT_EN``)
#
# To customise
# 1. Read the algo default at the path above; note the required
# placeholders ``{messages}`` and ``{token_count}``.
# 2. Replace the ``template`` body below with your prompt.
# 3. Flip ``enabled`` to ``true``.
#
# When ``enabled: false`` or ``template`` is empty, the pipeline sends
# ``prompt=None`` and the algo default is used (zero override cost).
enabled: false
template: ""

View File

@ -0,0 +1,23 @@
# Custom prompt slot for EpisodeExtractor.aextract.
#
# Default behaviour
# Leave this slot disabled (``enabled: false``). The pipeline will pass
# ``prompt=None`` through to algo, which falls back to the everalgo
# bundled default prompt — see:
# everalgo/user_memory/prompts/en/episode.py
# (the pipeline calls ``aextract`` with ``sender_id=None``, so the
# whole-memcell ``EPISODE_GENERATION_PROMPT`` is used, not the
# per-user ``USER_EPISODE_GENERATION_PROMPT``)
#
# To customise
# 1. Read the algo default at the path above; note the required
# placeholders ``{conversation_start_time}``, ``{conversation}`` and
# ``{custom_instructions}``.
# 2. Replace the ``template`` body below with your prompt.
# 3. Flip ``enabled`` to ``true``.
#
# When ``enabled: false`` or ``template`` is empty, the pipeline sends
# ``prompt=None`` and the algo default is used (zero override cost).
enabled: false
template: ""

View File

@ -0,0 +1,403 @@
"""Application settings.
Loaded by :func:`load_settings`. Source priority (later wins):
1. ``config/default.toml`` (shipped values; lowest priority)
2. ``~/.everos/config.toml`` (user-level overrides; optional)
3. ``.env`` file in the working directory (secrets / machine-specific)
4. ``EVEROS_<SECTION>__<KEY>`` environment variables
5. Init args passed programmatically (highest priority)
The user-level toml path defaults to ``~/.everos/config.toml``. Override
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
optional — if it does not exist, the source is silently skipped.
The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
call. Tests that mutate environment variables must call
``load_settings.cache_clear()`` after the mutation to invalidate.
"""
from __future__ import annotations
import os
from functools import cache
from pathlib import Path
from typing import Literal
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from pydantic import BaseModel, Field, SecretStr, field_validator
from pydantic_settings import (
BaseSettings,
PydanticBaseSettingsSource,
SettingsConfigDict,
TomlConfigSettingsSource,
)
_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
def _resolve_user_toml_path() -> Path:
"""Resolve the user-level ``config.toml`` path.
Defaults to ``~/.everos/config.toml``; override with the
``EVEROS_CONFIG_FILE`` environment variable.
"""
override = os.environ.get(_USER_TOML_ENV_VAR)
return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
class MemorySettings(BaseModel):
"""memory-root configuration."""
root: Path = Path("~/.everos")
timezone: str = "UTC"
"""Effective timezone for date buckets and timestamps.
Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
:class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
fast (no silent fallback). This is the **sole** source of truth for
the project's effective timezone — the OS ``TZ`` env var is *not*
consulted, keeping the configuration deterministic.
"""
@field_validator("timezone")
@classmethod
def _validate_timezone(cls, v: str) -> str:
try:
ZoneInfo(v)
except (ZoneInfoNotFoundError, ValueError) as exc:
raise ValueError(f"invalid timezone: {v!r}") from exc
return v
class ApiSettings(BaseModel):
"""HTTP API server bind configuration.
Default ``host = "127.0.0.1"`` keeps the server on loopback only,
matching the threat model in ``SECURITY.md``: EverOS ships **no
built-in authentication**, so binding to a routable interface
(``0.0.0.0`` etc.) without your own gateway / auth layer in front
is unsupported.
Env binding:
EVEROS_API__HOST
EVEROS_API__PORT
"""
host: str = "127.0.0.1"
port: int = Field(default=8000, ge=1, le=65535)
class SqliteSettings(BaseModel):
"""SQLite tunables applied as PRAGMAs on every new connection."""
journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
"WAL"
)
synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
foreign_keys: bool = True
temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
busy_timeout_ms: int = Field(default=5000, ge=0)
journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
cache_size_kb: int = Field(default=2048, ge=0)
class LLMSettings(BaseModel):
"""LLM client configuration.
Read by the service layer when lazily constructing the LLM client
handed to algo extractors. Provider-agnostic field names — the
project follows the OpenAI API protocol so any OpenAI-compatible
endpoint plugs in via ``base_url``.
Env binding (via parent ``Settings``):
EVEROS_LLM__MODEL
EVEROS_LLM__API_KEY
EVEROS_LLM__BASE_URL
"""
model: str = "gpt-4o-mini"
api_key: SecretStr | None = None
base_url: str | None = None
class MultimodalSettings(BaseModel):
"""Multimodal parsing LLM config (everalgo-parser).
Flat section mirroring ``[llm]``. The model must accept multimodal
``image_url`` parts (image / pdf / audio); it is kept independent from
the main ``[llm]`` so parsing can target a vision/audio-capable
endpoint without affecting boundary / extraction.
Env binding (via parent ``Settings``):
EVEROS_MULTIMODAL__MODEL
EVEROS_MULTIMODAL__API_KEY
EVEROS_MULTIMODAL__BASE_URL
EVEROS_MULTIMODAL__MAX_CONCURRENCY
EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
"""
model: str = "google/gemini-3-flash-preview"
api_key: SecretStr | None = None
base_url: str | None = None
max_concurrency: int = 4
# ``file://`` content-item support (read locally by EverOS, not everalgo).
file_uri_allow_dirs: list[str] = []
"""Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
file (local-first default); set to confine reads when the API is exposed."""
file_uri_max_bytes: int = 50 * 1024 * 1024
"""Max size (bytes) of a ``file://`` asset; larger files are rejected."""
class EmbeddingSettings(BaseModel):
"""Embedding client configuration.
OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
``base_url`` are required at runtime when the embedding capability
is enabled; the runtime knobs (``timeout`` etc.) have sensible
defaults.
Env binding:
EVEROS_EMBEDDING__MODEL
EVEROS_EMBEDDING__API_KEY
EVEROS_EMBEDDING__BASE_URL
EVEROS_EMBEDDING__TIMEOUT_SECONDS
EVEROS_EMBEDDING__MAX_RETRIES
EVEROS_EMBEDDING__BATCH_SIZE
EVEROS_EMBEDDING__MAX_CONCURRENT
"""
model: str | None = None
api_key: SecretStr | None = None
base_url: str | None = None
timeout_seconds: float = Field(default=30.0, gt=0)
max_retries: int = Field(default=3, ge=0)
batch_size: int = Field(default=10, ge=1)
max_concurrent: int = Field(default=5, ge=1)
class RerankSettings(BaseModel):
"""Rerank client configuration.
Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
schemas differ between providers — DeepInfra uses ``POST {base_url}/
{model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
with ``{model, query, documents}``. ``provider`` picks which client
implementation the factory builds.
Env binding:
EVEROS_RERANK__PROVIDER
EVEROS_RERANK__MODEL
EVEROS_RERANK__API_KEY
EVEROS_RERANK__BASE_URL
EVEROS_RERANK__TIMEOUT_SECONDS
EVEROS_RERANK__MAX_RETRIES
EVEROS_RERANK__BATCH_SIZE
EVEROS_RERANK__MAX_CONCURRENT
"""
provider: Literal["deepinfra", "vllm"] = "deepinfra"
model: str | None = None
api_key: SecretStr | None = None
base_url: str | None = None
timeout_seconds: float = Field(default=30.0, gt=0)
max_retries: int = Field(default=3, ge=0)
batch_size: int = Field(default=10, ge=1)
max_concurrent: int = Field(default=5, ge=1)
class BoundaryDetectionSettings(BaseModel):
"""Hard limits passed through to ``everalgo`` BoundaryDetector."""
hard_token_limit: int = Field(default=65536, ge=1)
hard_msg_limit: int = Field(default=500, ge=1)
class MemorizeSettings(BaseModel):
"""Memorize use-case configuration.
``mode`` selects which boundary detector runs and which pipelines are
dispatched. A service process serves one mode at a time; toggling
requires a restart.
- ``"chat"`` -> ``everalgo.user_memory.BoundaryDetector`` and only the
user-memory pipeline runs.
- ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
both user-memory + agent-memory pipelines run.
``session_lock_timeout_seconds`` caps how long one ``memorize()``
invocation can hold the per-session lock. Covers boundary LLM call +
memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
a stuck LLM from deadlocking subsequent concurrent calls on the same
session_id: on timeout the outer ``asyncio.timeout`` cancels the task
and the lock auto-releases.
Env binding:
EVEROS_MEMORIZE__MODE
EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
"""
mode: Literal["chat", "agent"] = "agent"
session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
class SearchSettings(BaseModel):
"""Search-pipeline policy knobs.
``vector_strategy`` selects the read path taken by
``SearchMethod.VECTOR``:
- ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
(recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
cosine by parent memcell, then reverse-resolve the top memcells back
to episode rows. MaxSim over atomic facts; trades one extra LanceDB
scan for finer-grained semantic match on long episodes.
- ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
per episode = the embedded Content section). The legacy path; kept
so deployments can opt out via env.
Env binding:
EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
"""
vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
class LanceDBSettings(BaseModel):
"""LanceDB tunables.
``read_consistency_seconds``:
``None`` (omitted) → no consistency check (highest performance).
``0`` → strict consistency (every read).
``>0`` → eventual (interval between checks).
``index_cache_size_bytes``:
Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
in lance crate). Each cached entry is one opened FTS / vector /
scalar index reader and **holds the file descriptors of its on-disk
``_indices/<uuid>/...`` files**.
LanceDB's own default is ``None`` (unbounded), which on a long-
running daemon means every new index UUID created by an
``optimize()`` call adds a fresh reader to the cache, and its
FDs are never released — they leak monotonically until
``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
of 256 / Linux's 1024.
Setting a byte cap turns the cache into a real LRU: when it
exceeds the cap, the oldest readers are dropped, Rust ``Drop``
runs ``close(fd)``, and the FD pressure resolves itself.
Cap → steady-state FD upper bound (measured under 30 add+optimize
cycles with the real ``Episode`` schema and 100-query stress):
=========== ================= ===================
cap FD upper bound query latency (100q)
=========== ================= ===================
``2 MB`` ~45 ~5 ms
``4 MB`` ~52 ~3 ms
``8 MB`` ~140 ~2.4 ms
``16 MB`` ~290 ~2.3 ms ← default
``32 MB`` ~630 ~1.4 ms
``unbound`` >960 (leaks) ~1.3 ms
=========== ================= ===================
EverOS's measured steady-state working set after a 12 h
``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
(5 tables × ~7 BM25 columns × ~10 part_N entries each), so
``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
yet-evicted readers, while the FD ceiling (~290) stays well below
common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
Linux default 1024 is fine out of the box).
Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
working set is much larger (heavier table count or much wider
indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
boxes).
Note: the *metadata* cache (``metadata_cache_size_bytes``) is
**not** exposed — experiment showed it caches in-memory parsed
manifests / fragment stats with zero impact on FD count; leaving
it unbounded (lancedb default) is fine.
"""
read_consistency_seconds: float | None = None
index_cache_size_bytes: int = 16 * 1024 * 1024
class Settings(BaseSettings):
"""Top-level application settings."""
memory: MemorySettings = MemorySettings()
api: ApiSettings = ApiSettings()
sqlite: SqliteSettings = SqliteSettings()
lancedb: LanceDBSettings = LanceDBSettings()
llm: LLMSettings = LLMSettings()
embedding: EmbeddingSettings = EmbeddingSettings()
rerank: RerankSettings = RerankSettings()
boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
memorize: MemorizeSettings = MemorizeSettings()
search: SearchSettings = SearchSettings()
multimodal: MultimodalSettings = MultimodalSettings()
model_config = SettingsConfigDict(
env_prefix="EVEROS_",
env_nested_delimiter="__",
env_file=".env",
env_file_encoding="utf-8",
toml_file=_DEFAULT_TOML_PATH,
extra="ignore",
)
@classmethod
def settings_customise_sources(
cls,
settings_cls: type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> tuple[PydanticBaseSettingsSource, ...]:
"""Layer TOML sources between env / dotenv and the secret store.
Order (earlier wins in pydantic-settings):
init_args > env > .env > user_toml > default_toml > secrets
The user-level toml (default ``~/.everos/config.toml``) is only
registered when the file exists, so the source list stays tight.
"""
sources: list[PydanticBaseSettingsSource] = [
init_settings,
env_settings,
dotenv_settings,
]
user_toml_path = _resolve_user_toml_path()
if user_toml_path.is_file():
sources.append(
TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
)
sources.append(TomlConfigSettingsSource(settings_cls))
sources.append(file_secret_settings)
return tuple(sources)
@cache
def load_settings() -> Settings:
"""Load settings from default.toml + environment variables (cached).
Cached at the module level — every caller sees the same instance until
something explicitly clears the cache (``load_settings.cache_clear()``).
Tests that monkeypatch environment variables must call
``cache_clear`` after each mutation to pick the new env up.
"""
return Settings()