chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
403
src/everos/config/settings.py
Normal file
403
src/everos/config/settings.py
Normal file
@ -0,0 +1,403 @@
|
||||
"""Application settings.
|
||||
|
||||
Loaded by :func:`load_settings`. Source priority (later wins):
|
||||
|
||||
1. ``config/default.toml`` (shipped values; lowest priority)
|
||||
2. ``~/.everos/config.toml`` (user-level overrides; optional)
|
||||
3. ``.env`` file in the working directory (secrets / machine-specific)
|
||||
4. ``EVEROS_<SECTION>__<KEY>`` environment variables
|
||||
5. Init args passed programmatically (highest priority)
|
||||
|
||||
The user-level toml path defaults to ``~/.everos/config.toml``. Override
|
||||
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
|
||||
optional — if it does not exist, the source is silently skipped.
|
||||
|
||||
The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
|
||||
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
|
||||
|
||||
``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
|
||||
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
|
||||
call. Tests that mutate environment variables must call
|
||||
``load_settings.cache_clear()`` after the mutation to invalidate.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from functools import cache
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr, field_validator
|
||||
from pydantic_settings import (
|
||||
BaseSettings,
|
||||
PydanticBaseSettingsSource,
|
||||
SettingsConfigDict,
|
||||
TomlConfigSettingsSource,
|
||||
)
|
||||
|
||||
_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
|
||||
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
|
||||
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
|
||||
|
||||
|
||||
def _resolve_user_toml_path() -> Path:
|
||||
"""Resolve the user-level ``config.toml`` path.
|
||||
|
||||
Defaults to ``~/.everos/config.toml``; override with the
|
||||
``EVEROS_CONFIG_FILE`` environment variable.
|
||||
"""
|
||||
override = os.environ.get(_USER_TOML_ENV_VAR)
|
||||
return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
|
||||
|
||||
|
||||
class MemorySettings(BaseModel):
|
||||
"""memory-root configuration."""
|
||||
|
||||
root: Path = Path("~/.everos")
|
||||
timezone: str = "UTC"
|
||||
"""Effective timezone for date buckets and timestamps.
|
||||
|
||||
Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
|
||||
TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
|
||||
:class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
|
||||
fast (no silent fallback). This is the **sole** source of truth for
|
||||
the project's effective timezone — the OS ``TZ`` env var is *not*
|
||||
consulted, keeping the configuration deterministic.
|
||||
"""
|
||||
|
||||
@field_validator("timezone")
|
||||
@classmethod
|
||||
def _validate_timezone(cls, v: str) -> str:
|
||||
try:
|
||||
ZoneInfo(v)
|
||||
except (ZoneInfoNotFoundError, ValueError) as exc:
|
||||
raise ValueError(f"invalid timezone: {v!r}") from exc
|
||||
return v
|
||||
|
||||
|
||||
class ApiSettings(BaseModel):
|
||||
"""HTTP API server bind configuration.
|
||||
|
||||
Default ``host = "127.0.0.1"`` keeps the server on loopback only,
|
||||
matching the threat model in ``SECURITY.md``: EverOS ships **no
|
||||
built-in authentication**, so binding to a routable interface
|
||||
(``0.0.0.0`` etc.) without your own gateway / auth layer in front
|
||||
is unsupported.
|
||||
|
||||
Env binding:
|
||||
EVEROS_API__HOST
|
||||
EVEROS_API__PORT
|
||||
"""
|
||||
|
||||
host: str = "127.0.0.1"
|
||||
port: int = Field(default=8000, ge=1, le=65535)
|
||||
|
||||
|
||||
class SqliteSettings(BaseModel):
|
||||
"""SQLite tunables applied as PRAGMAs on every new connection."""
|
||||
|
||||
journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
|
||||
"WAL"
|
||||
)
|
||||
synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
|
||||
foreign_keys: bool = True
|
||||
temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
|
||||
busy_timeout_ms: int = Field(default=5000, ge=0)
|
||||
journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
|
||||
cache_size_kb: int = Field(default=2048, ge=0)
|
||||
|
||||
|
||||
class LLMSettings(BaseModel):
|
||||
"""LLM client configuration.
|
||||
|
||||
Read by the service layer when lazily constructing the LLM client
|
||||
handed to algo extractors. Provider-agnostic field names — the
|
||||
project follows the OpenAI API protocol so any OpenAI-compatible
|
||||
endpoint plugs in via ``base_url``.
|
||||
|
||||
Env binding (via parent ``Settings``):
|
||||
EVEROS_LLM__MODEL
|
||||
EVEROS_LLM__API_KEY
|
||||
EVEROS_LLM__BASE_URL
|
||||
"""
|
||||
|
||||
model: str = "gpt-4o-mini"
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
|
||||
|
||||
class MultimodalSettings(BaseModel):
|
||||
"""Multimodal parsing LLM config (everalgo-parser).
|
||||
|
||||
Flat section mirroring ``[llm]``. The model must accept multimodal
|
||||
``image_url`` parts (image / pdf / audio); it is kept independent from
|
||||
the main ``[llm]`` so parsing can target a vision/audio-capable
|
||||
endpoint without affecting boundary / extraction.
|
||||
|
||||
Env binding (via parent ``Settings``):
|
||||
EVEROS_MULTIMODAL__MODEL
|
||||
EVEROS_MULTIMODAL__API_KEY
|
||||
EVEROS_MULTIMODAL__BASE_URL
|
||||
EVEROS_MULTIMODAL__MAX_CONCURRENCY
|
||||
EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
|
||||
EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
|
||||
"""
|
||||
|
||||
model: str = "google/gemini-3-flash-preview"
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
max_concurrency: int = 4
|
||||
|
||||
# ``file://`` content-item support (read locally by EverOS, not everalgo).
|
||||
file_uri_allow_dirs: list[str] = []
|
||||
"""Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
|
||||
file (local-first default); set to confine reads when the API is exposed."""
|
||||
file_uri_max_bytes: int = 50 * 1024 * 1024
|
||||
"""Max size (bytes) of a ``file://`` asset; larger files are rejected."""
|
||||
|
||||
|
||||
class EmbeddingSettings(BaseModel):
|
||||
"""Embedding client configuration.
|
||||
|
||||
OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
|
||||
``base_url`` are required at runtime when the embedding capability
|
||||
is enabled; the runtime knobs (``timeout`` etc.) have sensible
|
||||
defaults.
|
||||
|
||||
Env binding:
|
||||
EVEROS_EMBEDDING__MODEL
|
||||
EVEROS_EMBEDDING__API_KEY
|
||||
EVEROS_EMBEDDING__BASE_URL
|
||||
EVEROS_EMBEDDING__TIMEOUT_SECONDS
|
||||
EVEROS_EMBEDDING__MAX_RETRIES
|
||||
EVEROS_EMBEDDING__BATCH_SIZE
|
||||
EVEROS_EMBEDDING__MAX_CONCURRENT
|
||||
"""
|
||||
|
||||
model: str | None = None
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
timeout_seconds: float = Field(default=30.0, gt=0)
|
||||
max_retries: int = Field(default=3, ge=0)
|
||||
batch_size: int = Field(default=10, ge=1)
|
||||
max_concurrent: int = Field(default=5, ge=1)
|
||||
|
||||
|
||||
class RerankSettings(BaseModel):
|
||||
"""Rerank client configuration.
|
||||
|
||||
Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
|
||||
schemas differ between providers — DeepInfra uses ``POST {base_url}/
|
||||
{model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
|
||||
with ``{model, query, documents}``. ``provider`` picks which client
|
||||
implementation the factory builds.
|
||||
|
||||
Env binding:
|
||||
EVEROS_RERANK__PROVIDER
|
||||
EVEROS_RERANK__MODEL
|
||||
EVEROS_RERANK__API_KEY
|
||||
EVEROS_RERANK__BASE_URL
|
||||
EVEROS_RERANK__TIMEOUT_SECONDS
|
||||
EVEROS_RERANK__MAX_RETRIES
|
||||
EVEROS_RERANK__BATCH_SIZE
|
||||
EVEROS_RERANK__MAX_CONCURRENT
|
||||
"""
|
||||
|
||||
provider: Literal["deepinfra", "vllm"] = "deepinfra"
|
||||
model: str | None = None
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
timeout_seconds: float = Field(default=30.0, gt=0)
|
||||
max_retries: int = Field(default=3, ge=0)
|
||||
batch_size: int = Field(default=10, ge=1)
|
||||
max_concurrent: int = Field(default=5, ge=1)
|
||||
|
||||
|
||||
class BoundaryDetectionSettings(BaseModel):
|
||||
"""Hard limits passed through to ``everalgo`` BoundaryDetector."""
|
||||
|
||||
hard_token_limit: int = Field(default=65536, ge=1)
|
||||
hard_msg_limit: int = Field(default=500, ge=1)
|
||||
|
||||
|
||||
class MemorizeSettings(BaseModel):
|
||||
"""Memorize use-case configuration.
|
||||
|
||||
``mode`` selects which boundary detector runs and which pipelines are
|
||||
dispatched. A service process serves one mode at a time; toggling
|
||||
requires a restart.
|
||||
|
||||
- ``"chat"`` -> ``everalgo.user_memory.BoundaryDetector`` and only the
|
||||
user-memory pipeline runs.
|
||||
- ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
|
||||
both user-memory + agent-memory pipelines run.
|
||||
|
||||
``session_lock_timeout_seconds`` caps how long one ``memorize()``
|
||||
invocation can hold the per-session lock. Covers boundary LLM call +
|
||||
memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
|
||||
a stuck LLM from deadlocking subsequent concurrent calls on the same
|
||||
session_id: on timeout the outer ``asyncio.timeout`` cancels the task
|
||||
and the lock auto-releases.
|
||||
|
||||
Env binding:
|
||||
EVEROS_MEMORIZE__MODE
|
||||
EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
|
||||
"""
|
||||
|
||||
mode: Literal["chat", "agent"] = "agent"
|
||||
session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
|
||||
|
||||
|
||||
class SearchSettings(BaseModel):
|
||||
"""Search-pipeline policy knobs.
|
||||
|
||||
``vector_strategy`` selects the read path taken by
|
||||
``SearchMethod.VECTOR``:
|
||||
|
||||
- ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
|
||||
(recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
|
||||
cosine by parent memcell, then reverse-resolve the top memcells back
|
||||
to episode rows. MaxSim over atomic facts; trades one extra LanceDB
|
||||
scan for finer-grained semantic match on long episodes.
|
||||
- ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
|
||||
per episode = the embedded Content section). The legacy path; kept
|
||||
so deployments can opt out via env.
|
||||
|
||||
Env binding:
|
||||
EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
|
||||
"""
|
||||
|
||||
vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
|
||||
|
||||
|
||||
class LanceDBSettings(BaseModel):
|
||||
"""LanceDB tunables.
|
||||
|
||||
``read_consistency_seconds``:
|
||||
``None`` (omitted) → no consistency check (highest performance).
|
||||
``0`` → strict consistency (every read).
|
||||
``>0`` → eventual (interval between checks).
|
||||
|
||||
``index_cache_size_bytes``:
|
||||
Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
|
||||
in lance crate). Each cached entry is one opened FTS / vector /
|
||||
scalar index reader and **holds the file descriptors of its on-disk
|
||||
``_indices/<uuid>/...`` files**.
|
||||
|
||||
LanceDB's own default is ``None`` (unbounded), which on a long-
|
||||
running daemon means every new index UUID created by an
|
||||
``optimize()`` call adds a fresh reader to the cache, and its
|
||||
FDs are never released — they leak monotonically until
|
||||
``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
|
||||
take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
|
||||
of 256 / Linux's 1024.
|
||||
|
||||
Setting a byte cap turns the cache into a real LRU: when it
|
||||
exceeds the cap, the oldest readers are dropped, Rust ``Drop``
|
||||
runs ``close(fd)``, and the FD pressure resolves itself.
|
||||
|
||||
Cap → steady-state FD upper bound (measured under 30 add+optimize
|
||||
cycles with the real ``Episode`` schema and 100-query stress):
|
||||
|
||||
=========== ================= ===================
|
||||
cap FD upper bound query latency (100q)
|
||||
=========== ================= ===================
|
||||
``2 MB`` ~45 ~5 ms
|
||||
``4 MB`` ~52 ~3 ms
|
||||
``8 MB`` ~140 ~2.4 ms
|
||||
``16 MB`` ~290 ~2.3 ms ← default
|
||||
``32 MB`` ~630 ~1.4 ms
|
||||
``unbound`` >960 (leaks) ~1.3 ms
|
||||
=========== ================= ===================
|
||||
|
||||
EverOS's measured steady-state working set after a 12 h
|
||||
``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
|
||||
(5 tables × ~7 BM25 columns × ~10 part_N entries each), so
|
||||
``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
|
||||
yet-evicted readers, while the FD ceiling (~290) stays well below
|
||||
common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
|
||||
Linux default 1024 is fine out of the box).
|
||||
|
||||
Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
|
||||
working set is much larger (heavier table count or much wider
|
||||
indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
|
||||
boxes).
|
||||
|
||||
Note: the *metadata* cache (``metadata_cache_size_bytes``) is
|
||||
**not** exposed — experiment showed it caches in-memory parsed
|
||||
manifests / fragment stats with zero impact on FD count; leaving
|
||||
it unbounded (lancedb default) is fine.
|
||||
"""
|
||||
|
||||
read_consistency_seconds: float | None = None
|
||||
index_cache_size_bytes: int = 16 * 1024 * 1024
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Top-level application settings."""
|
||||
|
||||
memory: MemorySettings = MemorySettings()
|
||||
api: ApiSettings = ApiSettings()
|
||||
sqlite: SqliteSettings = SqliteSettings()
|
||||
lancedb: LanceDBSettings = LanceDBSettings()
|
||||
llm: LLMSettings = LLMSettings()
|
||||
embedding: EmbeddingSettings = EmbeddingSettings()
|
||||
rerank: RerankSettings = RerankSettings()
|
||||
boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
|
||||
memorize: MemorizeSettings = MemorizeSettings()
|
||||
search: SearchSettings = SearchSettings()
|
||||
multimodal: MultimodalSettings = MultimodalSettings()
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
env_prefix="EVEROS_",
|
||||
env_nested_delimiter="__",
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
toml_file=_DEFAULT_TOML_PATH,
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def settings_customise_sources(
|
||||
cls,
|
||||
settings_cls: type[BaseSettings],
|
||||
init_settings: PydanticBaseSettingsSource,
|
||||
env_settings: PydanticBaseSettingsSource,
|
||||
dotenv_settings: PydanticBaseSettingsSource,
|
||||
file_secret_settings: PydanticBaseSettingsSource,
|
||||
) -> tuple[PydanticBaseSettingsSource, ...]:
|
||||
"""Layer TOML sources between env / dotenv and the secret store.
|
||||
|
||||
Order (earlier wins in pydantic-settings):
|
||||
init_args > env > .env > user_toml > default_toml > secrets
|
||||
|
||||
The user-level toml (default ``~/.everos/config.toml``) is only
|
||||
registered when the file exists, so the source list stays tight.
|
||||
"""
|
||||
sources: list[PydanticBaseSettingsSource] = [
|
||||
init_settings,
|
||||
env_settings,
|
||||
dotenv_settings,
|
||||
]
|
||||
user_toml_path = _resolve_user_toml_path()
|
||||
if user_toml_path.is_file():
|
||||
sources.append(
|
||||
TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
|
||||
)
|
||||
sources.append(TomlConfigSettingsSource(settings_cls))
|
||||
sources.append(file_secret_settings)
|
||||
return tuple(sources)
|
||||
|
||||
|
||||
@cache
|
||||
def load_settings() -> Settings:
|
||||
"""Load settings from default.toml + environment variables (cached).
|
||||
|
||||
Cached at the module level — every caller sees the same instance until
|
||||
something explicitly clears the cache (``load_settings.cache_clear()``).
|
||||
Tests that monkeypatch environment variables must call
|
||||
``cache_clear`` after each mutation to pick the new env up.
|
||||
"""
|
||||
return Settings()
|
||||
Reference in New Issue
Block a user