md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
404 lines
15 KiB
Python
404 lines
15 KiB
Python
"""Application settings.
|
||
|
||
Loaded by :func:`load_settings`. Source priority (later wins):
|
||
|
||
1. ``config/default.toml`` (shipped values; lowest priority)
|
||
2. ``~/.everos/config.toml`` (user-level overrides; optional)
|
||
3. ``.env`` file in the working directory (secrets / machine-specific)
|
||
4. ``EVEROS_<SECTION>__<KEY>`` environment variables
|
||
5. Init args passed programmatically (highest priority)
|
||
|
||
The user-level toml path defaults to ``~/.everos/config.toml``. Override
|
||
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
|
||
optional — if it does not exist, the source is silently skipped.
|
||
|
||
The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
|
||
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
|
||
|
||
``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
|
||
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
|
||
call. Tests that mutate environment variables must call
|
||
``load_settings.cache_clear()`` after the mutation to invalidate.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
from functools import cache
|
||
from pathlib import Path
|
||
from typing import Literal
|
||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||
|
||
from pydantic import BaseModel, Field, SecretStr, field_validator
|
||
from pydantic_settings import (
|
||
BaseSettings,
|
||
PydanticBaseSettingsSource,
|
||
SettingsConfigDict,
|
||
TomlConfigSettingsSource,
|
||
)
|
||
|
||
_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
|
||
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
|
||
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
|
||
|
||
|
||
def _resolve_user_toml_path() -> Path:
|
||
"""Resolve the user-level ``config.toml`` path.
|
||
|
||
Defaults to ``~/.everos/config.toml``; override with the
|
||
``EVEROS_CONFIG_FILE`` environment variable.
|
||
"""
|
||
override = os.environ.get(_USER_TOML_ENV_VAR)
|
||
return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
|
||
|
||
|
||
class MemorySettings(BaseModel):
|
||
"""memory-root configuration."""
|
||
|
||
root: Path = Path("~/.everos")
|
||
timezone: str = "UTC"
|
||
"""Effective timezone for date buckets and timestamps.
|
||
|
||
Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
|
||
TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
|
||
:class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
|
||
fast (no silent fallback). This is the **sole** source of truth for
|
||
the project's effective timezone — the OS ``TZ`` env var is *not*
|
||
consulted, keeping the configuration deterministic.
|
||
"""
|
||
|
||
@field_validator("timezone")
|
||
@classmethod
|
||
def _validate_timezone(cls, v: str) -> str:
|
||
try:
|
||
ZoneInfo(v)
|
||
except (ZoneInfoNotFoundError, ValueError) as exc:
|
||
raise ValueError(f"invalid timezone: {v!r}") from exc
|
||
return v
|
||
|
||
|
||
class ApiSettings(BaseModel):
|
||
"""HTTP API server bind configuration.
|
||
|
||
Default ``host = "127.0.0.1"`` keeps the server on loopback only,
|
||
matching the threat model in ``SECURITY.md``: EverOS ships **no
|
||
built-in authentication**, so binding to a routable interface
|
||
(``0.0.0.0`` etc.) without your own gateway / auth layer in front
|
||
is unsupported.
|
||
|
||
Env binding:
|
||
EVEROS_API__HOST
|
||
EVEROS_API__PORT
|
||
"""
|
||
|
||
host: str = "127.0.0.1"
|
||
port: int = Field(default=8000, ge=1, le=65535)
|
||
|
||
|
||
class SqliteSettings(BaseModel):
|
||
"""SQLite tunables applied as PRAGMAs on every new connection."""
|
||
|
||
journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
|
||
"WAL"
|
||
)
|
||
synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
|
||
foreign_keys: bool = True
|
||
temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
|
||
busy_timeout_ms: int = Field(default=5000, ge=0)
|
||
journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
|
||
cache_size_kb: int = Field(default=2048, ge=0)
|
||
|
||
|
||
class LLMSettings(BaseModel):
|
||
"""LLM client configuration.
|
||
|
||
Read by the service layer when lazily constructing the LLM client
|
||
handed to algo extractors. Provider-agnostic field names — the
|
||
project follows the OpenAI API protocol so any OpenAI-compatible
|
||
endpoint plugs in via ``base_url``.
|
||
|
||
Env binding (via parent ``Settings``):
|
||
EVEROS_LLM__MODEL
|
||
EVEROS_LLM__API_KEY
|
||
EVEROS_LLM__BASE_URL
|
||
"""
|
||
|
||
model: str = "gpt-4o-mini"
|
||
api_key: SecretStr | None = None
|
||
base_url: str | None = None
|
||
|
||
|
||
class MultimodalSettings(BaseModel):
|
||
"""Multimodal parsing LLM config (everalgo-parser).
|
||
|
||
Flat section mirroring ``[llm]``. The model must accept multimodal
|
||
``image_url`` parts (image / pdf / audio); it is kept independent from
|
||
the main ``[llm]`` so parsing can target a vision/audio-capable
|
||
endpoint without affecting boundary / extraction.
|
||
|
||
Env binding (via parent ``Settings``):
|
||
EVEROS_MULTIMODAL__MODEL
|
||
EVEROS_MULTIMODAL__API_KEY
|
||
EVEROS_MULTIMODAL__BASE_URL
|
||
EVEROS_MULTIMODAL__MAX_CONCURRENCY
|
||
EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
|
||
EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
|
||
"""
|
||
|
||
model: str = "google/gemini-3-flash-preview"
|
||
api_key: SecretStr | None = None
|
||
base_url: str | None = None
|
||
max_concurrency: int = 4
|
||
|
||
# ``file://`` content-item support (read locally by EverOS, not everalgo).
|
||
file_uri_allow_dirs: list[str] = []
|
||
"""Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
|
||
file (local-first default); set to confine reads when the API is exposed."""
|
||
file_uri_max_bytes: int = 50 * 1024 * 1024
|
||
"""Max size (bytes) of a ``file://`` asset; larger files are rejected."""
|
||
|
||
|
||
class EmbeddingSettings(BaseModel):
|
||
"""Embedding client configuration.
|
||
|
||
OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
|
||
``base_url`` are required at runtime when the embedding capability
|
||
is enabled; the runtime knobs (``timeout`` etc.) have sensible
|
||
defaults.
|
||
|
||
Env binding:
|
||
EVEROS_EMBEDDING__MODEL
|
||
EVEROS_EMBEDDING__API_KEY
|
||
EVEROS_EMBEDDING__BASE_URL
|
||
EVEROS_EMBEDDING__TIMEOUT_SECONDS
|
||
EVEROS_EMBEDDING__MAX_RETRIES
|
||
EVEROS_EMBEDDING__BATCH_SIZE
|
||
EVEROS_EMBEDDING__MAX_CONCURRENT
|
||
"""
|
||
|
||
model: str | None = None
|
||
api_key: SecretStr | None = None
|
||
base_url: str | None = None
|
||
timeout_seconds: float = Field(default=30.0, gt=0)
|
||
max_retries: int = Field(default=3, ge=0)
|
||
batch_size: int = Field(default=10, ge=1)
|
||
max_concurrent: int = Field(default=5, ge=1)
|
||
|
||
|
||
class RerankSettings(BaseModel):
|
||
"""Rerank client configuration.
|
||
|
||
Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
|
||
schemas differ between providers — DeepInfra uses ``POST {base_url}/
|
||
{model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
|
||
with ``{model, query, documents}``. ``provider`` picks which client
|
||
implementation the factory builds.
|
||
|
||
Env binding:
|
||
EVEROS_RERANK__PROVIDER
|
||
EVEROS_RERANK__MODEL
|
||
EVEROS_RERANK__API_KEY
|
||
EVEROS_RERANK__BASE_URL
|
||
EVEROS_RERANK__TIMEOUT_SECONDS
|
||
EVEROS_RERANK__MAX_RETRIES
|
||
EVEROS_RERANK__BATCH_SIZE
|
||
EVEROS_RERANK__MAX_CONCURRENT
|
||
"""
|
||
|
||
provider: Literal["deepinfra", "vllm"] = "deepinfra"
|
||
model: str | None = None
|
||
api_key: SecretStr | None = None
|
||
base_url: str | None = None
|
||
timeout_seconds: float = Field(default=30.0, gt=0)
|
||
max_retries: int = Field(default=3, ge=0)
|
||
batch_size: int = Field(default=10, ge=1)
|
||
max_concurrent: int = Field(default=5, ge=1)
|
||
|
||
|
||
class BoundaryDetectionSettings(BaseModel):
|
||
"""Hard limits passed through to ``everalgo`` BoundaryDetector."""
|
||
|
||
hard_token_limit: int = Field(default=65536, ge=1)
|
||
hard_msg_limit: int = Field(default=500, ge=1)
|
||
|
||
|
||
class MemorizeSettings(BaseModel):
|
||
"""Memorize use-case configuration.
|
||
|
||
``mode`` selects which boundary detector runs and which pipelines are
|
||
dispatched. A service process serves one mode at a time; toggling
|
||
requires a restart.
|
||
|
||
- ``"chat"`` -> ``everalgo.user_memory.BoundaryDetector`` and only the
|
||
user-memory pipeline runs.
|
||
- ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
|
||
both user-memory + agent-memory pipelines run.
|
||
|
||
``session_lock_timeout_seconds`` caps how long one ``memorize()``
|
||
invocation can hold the per-session lock. Covers boundary LLM call +
|
||
memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
|
||
a stuck LLM from deadlocking subsequent concurrent calls on the same
|
||
session_id: on timeout the outer ``asyncio.timeout`` cancels the task
|
||
and the lock auto-releases.
|
||
|
||
Env binding:
|
||
EVEROS_MEMORIZE__MODE
|
||
EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
|
||
"""
|
||
|
||
mode: Literal["chat", "agent"] = "agent"
|
||
session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
|
||
|
||
|
||
class SearchSettings(BaseModel):
|
||
"""Search-pipeline policy knobs.
|
||
|
||
``vector_strategy`` selects the read path taken by
|
||
``SearchMethod.VECTOR``:
|
||
|
||
- ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
|
||
(recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
|
||
cosine by parent memcell, then reverse-resolve the top memcells back
|
||
to episode rows. MaxSim over atomic facts; trades one extra LanceDB
|
||
scan for finer-grained semantic match on long episodes.
|
||
- ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
|
||
per episode = the embedded Content section). The legacy path; kept
|
||
so deployments can opt out via env.
|
||
|
||
Env binding:
|
||
EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
|
||
"""
|
||
|
||
vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
|
||
|
||
|
||
class LanceDBSettings(BaseModel):
|
||
"""LanceDB tunables.
|
||
|
||
``read_consistency_seconds``:
|
||
``None`` (omitted) → no consistency check (highest performance).
|
||
``0`` → strict consistency (every read).
|
||
``>0`` → eventual (interval between checks).
|
||
|
||
``index_cache_size_bytes``:
|
||
Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
|
||
in lance crate). Each cached entry is one opened FTS / vector /
|
||
scalar index reader and **holds the file descriptors of its on-disk
|
||
``_indices/<uuid>/...`` files**.
|
||
|
||
LanceDB's own default is ``None`` (unbounded), which on a long-
|
||
running daemon means every new index UUID created by an
|
||
``optimize()`` call adds a fresh reader to the cache, and its
|
||
FDs are never released — they leak monotonically until
|
||
``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
|
||
take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
|
||
of 256 / Linux's 1024.
|
||
|
||
Setting a byte cap turns the cache into a real LRU: when it
|
||
exceeds the cap, the oldest readers are dropped, Rust ``Drop``
|
||
runs ``close(fd)``, and the FD pressure resolves itself.
|
||
|
||
Cap → steady-state FD upper bound (measured under 30 add+optimize
|
||
cycles with the real ``Episode`` schema and 100-query stress):
|
||
|
||
=========== ================= ===================
|
||
cap FD upper bound query latency (100q)
|
||
=========== ================= ===================
|
||
``2 MB`` ~45 ~5 ms
|
||
``4 MB`` ~52 ~3 ms
|
||
``8 MB`` ~140 ~2.4 ms
|
||
``16 MB`` ~290 ~2.3 ms ← default
|
||
``32 MB`` ~630 ~1.4 ms
|
||
``unbound`` >960 (leaks) ~1.3 ms
|
||
=========== ================= ===================
|
||
|
||
EverOS's measured steady-state working set after a 12 h
|
||
``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
|
||
(5 tables × ~7 BM25 columns × ~10 part_N entries each), so
|
||
``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
|
||
yet-evicted readers, while the FD ceiling (~290) stays well below
|
||
common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
|
||
Linux default 1024 is fine out of the box).
|
||
|
||
Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
|
||
working set is much larger (heavier table count or much wider
|
||
indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
|
||
boxes).
|
||
|
||
Note: the *metadata* cache (``metadata_cache_size_bytes``) is
|
||
**not** exposed — experiment showed it caches in-memory parsed
|
||
manifests / fragment stats with zero impact on FD count; leaving
|
||
it unbounded (lancedb default) is fine.
|
||
"""
|
||
|
||
read_consistency_seconds: float | None = None
|
||
index_cache_size_bytes: int = 16 * 1024 * 1024
|
||
|
||
|
||
class Settings(BaseSettings):
|
||
"""Top-level application settings."""
|
||
|
||
memory: MemorySettings = MemorySettings()
|
||
api: ApiSettings = ApiSettings()
|
||
sqlite: SqliteSettings = SqliteSettings()
|
||
lancedb: LanceDBSettings = LanceDBSettings()
|
||
llm: LLMSettings = LLMSettings()
|
||
embedding: EmbeddingSettings = EmbeddingSettings()
|
||
rerank: RerankSettings = RerankSettings()
|
||
boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
|
||
memorize: MemorizeSettings = MemorizeSettings()
|
||
search: SearchSettings = SearchSettings()
|
||
multimodal: MultimodalSettings = MultimodalSettings()
|
||
|
||
model_config = SettingsConfigDict(
|
||
env_prefix="EVEROS_",
|
||
env_nested_delimiter="__",
|
||
env_file=".env",
|
||
env_file_encoding="utf-8",
|
||
toml_file=_DEFAULT_TOML_PATH,
|
||
extra="ignore",
|
||
)
|
||
|
||
@classmethod
|
||
def settings_customise_sources(
|
||
cls,
|
||
settings_cls: type[BaseSettings],
|
||
init_settings: PydanticBaseSettingsSource,
|
||
env_settings: PydanticBaseSettingsSource,
|
||
dotenv_settings: PydanticBaseSettingsSource,
|
||
file_secret_settings: PydanticBaseSettingsSource,
|
||
) -> tuple[PydanticBaseSettingsSource, ...]:
|
||
"""Layer TOML sources between env / dotenv and the secret store.
|
||
|
||
Order (earlier wins in pydantic-settings):
|
||
init_args > env > .env > user_toml > default_toml > secrets
|
||
|
||
The user-level toml (default ``~/.everos/config.toml``) is only
|
||
registered when the file exists, so the source list stays tight.
|
||
"""
|
||
sources: list[PydanticBaseSettingsSource] = [
|
||
init_settings,
|
||
env_settings,
|
||
dotenv_settings,
|
||
]
|
||
user_toml_path = _resolve_user_toml_path()
|
||
if user_toml_path.is_file():
|
||
sources.append(
|
||
TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
|
||
)
|
||
sources.append(TomlConfigSettingsSource(settings_cls))
|
||
sources.append(file_secret_settings)
|
||
return tuple(sources)
|
||
|
||
|
||
@cache
|
||
def load_settings() -> Settings:
|
||
"""Load settings from default.toml + environment variables (cached).
|
||
|
||
Cached at the module level — every caller sees the same instance until
|
||
something explicitly clears the cache (``load_settings.cache_clear()``).
|
||
Tests that monkeypatch environment variables must call
|
||
``cache_clear`` after each mutation to pick the new env up.
|
||
"""
|
||
return Settings()
|