Files
EverOS/src/everos/config/settings.py
Elliot Chen 518b8eca85 chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-06 07:33:17 +08:00

404 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Application settings.
Loaded by :func:`load_settings`. Source priority (later wins):
1. ``config/default.toml`` (shipped values; lowest priority)
2. ``~/.everos/config.toml`` (user-level overrides; optional)
3. ``.env`` file in the working directory (secrets / machine-specific)
4. ``EVEROS_<SECTION>__<KEY>`` environment variables
5. Init args passed programmatically (highest priority)
The user-level toml path defaults to ``~/.everos/config.toml``. Override
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
optional — if it does not exist, the source is silently skipped.
The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
call. Tests that mutate environment variables must call
``load_settings.cache_clear()`` after the mutation to invalidate.
"""
from __future__ import annotations
import os
from functools import cache
from pathlib import Path
from typing import Literal
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from pydantic import BaseModel, Field, SecretStr, field_validator
from pydantic_settings import (
BaseSettings,
PydanticBaseSettingsSource,
SettingsConfigDict,
TomlConfigSettingsSource,
)
_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
def _resolve_user_toml_path() -> Path:
"""Resolve the user-level ``config.toml`` path.
Defaults to ``~/.everos/config.toml``; override with the
``EVEROS_CONFIG_FILE`` environment variable.
"""
override = os.environ.get(_USER_TOML_ENV_VAR)
return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
class MemorySettings(BaseModel):
"""memory-root configuration."""
root: Path = Path("~/.everos")
timezone: str = "UTC"
"""Effective timezone for date buckets and timestamps.
Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
:class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
fast (no silent fallback). This is the **sole** source of truth for
the project's effective timezone — the OS ``TZ`` env var is *not*
consulted, keeping the configuration deterministic.
"""
@field_validator("timezone")
@classmethod
def _validate_timezone(cls, v: str) -> str:
try:
ZoneInfo(v)
except (ZoneInfoNotFoundError, ValueError) as exc:
raise ValueError(f"invalid timezone: {v!r}") from exc
return v
class ApiSettings(BaseModel):
"""HTTP API server bind configuration.
Default ``host = "127.0.0.1"`` keeps the server on loopback only,
matching the threat model in ``SECURITY.md``: EverOS ships **no
built-in authentication**, so binding to a routable interface
(``0.0.0.0`` etc.) without your own gateway / auth layer in front
is unsupported.
Env binding:
EVEROS_API__HOST
EVEROS_API__PORT
"""
host: str = "127.0.0.1"
port: int = Field(default=8000, ge=1, le=65535)
class SqliteSettings(BaseModel):
"""SQLite tunables applied as PRAGMAs on every new connection."""
journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
"WAL"
)
synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
foreign_keys: bool = True
temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
busy_timeout_ms: int = Field(default=5000, ge=0)
journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
cache_size_kb: int = Field(default=2048, ge=0)
class LLMSettings(BaseModel):
"""LLM client configuration.
Read by the service layer when lazily constructing the LLM client
handed to algo extractors. Provider-agnostic field names — the
project follows the OpenAI API protocol so any OpenAI-compatible
endpoint plugs in via ``base_url``.
Env binding (via parent ``Settings``):
EVEROS_LLM__MODEL
EVEROS_LLM__API_KEY
EVEROS_LLM__BASE_URL
"""
model: str = "gpt-4o-mini"
api_key: SecretStr | None = None
base_url: str | None = None
class MultimodalSettings(BaseModel):
"""Multimodal parsing LLM config (everalgo-parser).
Flat section mirroring ``[llm]``. The model must accept multimodal
``image_url`` parts (image / pdf / audio); it is kept independent from
the main ``[llm]`` so parsing can target a vision/audio-capable
endpoint without affecting boundary / extraction.
Env binding (via parent ``Settings``):
EVEROS_MULTIMODAL__MODEL
EVEROS_MULTIMODAL__API_KEY
EVEROS_MULTIMODAL__BASE_URL
EVEROS_MULTIMODAL__MAX_CONCURRENCY
EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
"""
model: str = "google/gemini-3-flash-preview"
api_key: SecretStr | None = None
base_url: str | None = None
max_concurrency: int = 4
# ``file://`` content-item support (read locally by EverOS, not everalgo).
file_uri_allow_dirs: list[str] = []
"""Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
file (local-first default); set to confine reads when the API is exposed."""
file_uri_max_bytes: int = 50 * 1024 * 1024
"""Max size (bytes) of a ``file://`` asset; larger files are rejected."""
class EmbeddingSettings(BaseModel):
"""Embedding client configuration.
OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
``base_url`` are required at runtime when the embedding capability
is enabled; the runtime knobs (``timeout`` etc.) have sensible
defaults.
Env binding:
EVEROS_EMBEDDING__MODEL
EVEROS_EMBEDDING__API_KEY
EVEROS_EMBEDDING__BASE_URL
EVEROS_EMBEDDING__TIMEOUT_SECONDS
EVEROS_EMBEDDING__MAX_RETRIES
EVEROS_EMBEDDING__BATCH_SIZE
EVEROS_EMBEDDING__MAX_CONCURRENT
"""
model: str | None = None
api_key: SecretStr | None = None
base_url: str | None = None
timeout_seconds: float = Field(default=30.0, gt=0)
max_retries: int = Field(default=3, ge=0)
batch_size: int = Field(default=10, ge=1)
max_concurrent: int = Field(default=5, ge=1)
class RerankSettings(BaseModel):
"""Rerank client configuration.
Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
schemas differ between providers — DeepInfra uses ``POST {base_url}/
{model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
with ``{model, query, documents}``. ``provider`` picks which client
implementation the factory builds.
Env binding:
EVEROS_RERANK__PROVIDER
EVEROS_RERANK__MODEL
EVEROS_RERANK__API_KEY
EVEROS_RERANK__BASE_URL
EVEROS_RERANK__TIMEOUT_SECONDS
EVEROS_RERANK__MAX_RETRIES
EVEROS_RERANK__BATCH_SIZE
EVEROS_RERANK__MAX_CONCURRENT
"""
provider: Literal["deepinfra", "vllm"] = "deepinfra"
model: str | None = None
api_key: SecretStr | None = None
base_url: str | None = None
timeout_seconds: float = Field(default=30.0, gt=0)
max_retries: int = Field(default=3, ge=0)
batch_size: int = Field(default=10, ge=1)
max_concurrent: int = Field(default=5, ge=1)
class BoundaryDetectionSettings(BaseModel):
"""Hard limits passed through to ``everalgo`` BoundaryDetector."""
hard_token_limit: int = Field(default=65536, ge=1)
hard_msg_limit: int = Field(default=500, ge=1)
class MemorizeSettings(BaseModel):
"""Memorize use-case configuration.
``mode`` selects which boundary detector runs and which pipelines are
dispatched. A service process serves one mode at a time; toggling
requires a restart.
- ``"chat"`` -> ``everalgo.user_memory.BoundaryDetector`` and only the
user-memory pipeline runs.
- ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
both user-memory + agent-memory pipelines run.
``session_lock_timeout_seconds`` caps how long one ``memorize()``
invocation can hold the per-session lock. Covers boundary LLM call +
memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
a stuck LLM from deadlocking subsequent concurrent calls on the same
session_id: on timeout the outer ``asyncio.timeout`` cancels the task
and the lock auto-releases.
Env binding:
EVEROS_MEMORIZE__MODE
EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
"""
mode: Literal["chat", "agent"] = "agent"
session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
class SearchSettings(BaseModel):
"""Search-pipeline policy knobs.
``vector_strategy`` selects the read path taken by
``SearchMethod.VECTOR``:
- ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
(recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
cosine by parent memcell, then reverse-resolve the top memcells back
to episode rows. MaxSim over atomic facts; trades one extra LanceDB
scan for finer-grained semantic match on long episodes.
- ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
per episode = the embedded Content section). The legacy path; kept
so deployments can opt out via env.
Env binding:
EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
"""
vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
class LanceDBSettings(BaseModel):
"""LanceDB tunables.
``read_consistency_seconds``:
``None`` (omitted) → no consistency check (highest performance).
``0`` → strict consistency (every read).
``>0`` → eventual (interval between checks).
``index_cache_size_bytes``:
Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
in lance crate). Each cached entry is one opened FTS / vector /
scalar index reader and **holds the file descriptors of its on-disk
``_indices/<uuid>/...`` files**.
LanceDB's own default is ``None`` (unbounded), which on a long-
running daemon means every new index UUID created by an
``optimize()`` call adds a fresh reader to the cache, and its
FDs are never released — they leak monotonically until
``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
of 256 / Linux's 1024.
Setting a byte cap turns the cache into a real LRU: when it
exceeds the cap, the oldest readers are dropped, Rust ``Drop``
runs ``close(fd)``, and the FD pressure resolves itself.
Cap → steady-state FD upper bound (measured under 30 add+optimize
cycles with the real ``Episode`` schema and 100-query stress):
=========== ================= ===================
cap FD upper bound query latency (100q)
=========== ================= ===================
``2 MB`` ~45 ~5 ms
``4 MB`` ~52 ~3 ms
``8 MB`` ~140 ~2.4 ms
``16 MB`` ~290 ~2.3 ms ← default
``32 MB`` ~630 ~1.4 ms
``unbound`` >960 (leaks) ~1.3 ms
=========== ================= ===================
EverOS's measured steady-state working set after a 12 h
``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
(5 tables × ~7 BM25 columns × ~10 part_N entries each), so
``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
yet-evicted readers, while the FD ceiling (~290) stays well below
common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
Linux default 1024 is fine out of the box).
Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
working set is much larger (heavier table count or much wider
indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
boxes).
Note: the *metadata* cache (``metadata_cache_size_bytes``) is
**not** exposed — experiment showed it caches in-memory parsed
manifests / fragment stats with zero impact on FD count; leaving
it unbounded (lancedb default) is fine.
"""
read_consistency_seconds: float | None = None
index_cache_size_bytes: int = 16 * 1024 * 1024
class Settings(BaseSettings):
"""Top-level application settings."""
memory: MemorySettings = MemorySettings()
api: ApiSettings = ApiSettings()
sqlite: SqliteSettings = SqliteSettings()
lancedb: LanceDBSettings = LanceDBSettings()
llm: LLMSettings = LLMSettings()
embedding: EmbeddingSettings = EmbeddingSettings()
rerank: RerankSettings = RerankSettings()
boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
memorize: MemorizeSettings = MemorizeSettings()
search: SearchSettings = SearchSettings()
multimodal: MultimodalSettings = MultimodalSettings()
model_config = SettingsConfigDict(
env_prefix="EVEROS_",
env_nested_delimiter="__",
env_file=".env",
env_file_encoding="utf-8",
toml_file=_DEFAULT_TOML_PATH,
extra="ignore",
)
@classmethod
def settings_customise_sources(
cls,
settings_cls: type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> tuple[PydanticBaseSettingsSource, ...]:
"""Layer TOML sources between env / dotenv and the secret store.
Order (earlier wins in pydantic-settings):
init_args > env > .env > user_toml > default_toml > secrets
The user-level toml (default ``~/.everos/config.toml``) is only
registered when the file exists, so the source list stays tight.
"""
sources: list[PydanticBaseSettingsSource] = [
init_settings,
env_settings,
dotenv_settings,
]
user_toml_path = _resolve_user_toml_path()
if user_toml_path.is_file():
sources.append(
TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
)
sources.append(TomlConfigSettingsSource(settings_cls))
sources.append(file_secret_settings)
return tuple(sources)
@cache
def load_settings() -> Settings:
"""Load settings from default.toml + environment variables (cached).
Cached at the module level — every caller sees the same instance until
something explicitly clears the cache (``load_settings.cache_clear()``).
Tests that monkeypatch environment variables must call
``cache_clear`` after each mutation to pick the new env up.
"""
return Settings()