chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,403 @@
"""Application settings.
Loaded by :func:`load_settings`. Source priority (later wins):
1. ``config/default.toml`` (shipped values; lowest priority)
2. ``~/.everos/config.toml`` (user-level overrides; optional)
3. ``.env`` file in the working directory (secrets / machine-specific)
4. ``EVEROS_<SECTION>__<KEY>`` environment variables
5. Init args passed programmatically (highest priority)
The user-level toml path defaults to ``~/.everos/config.toml``. Override
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
optional — if it does not exist, the source is silently skipped.
The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
call. Tests that mutate environment variables must call
``load_settings.cache_clear()`` after the mutation to invalidate.
"""
from __future__ import annotations
import os
from functools import cache
from pathlib import Path
from typing import Literal
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from pydantic import BaseModel, Field, SecretStr, field_validator
from pydantic_settings import (
BaseSettings,
PydanticBaseSettingsSource,
SettingsConfigDict,
TomlConfigSettingsSource,
)
_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
def _resolve_user_toml_path() -> Path:
"""Resolve the user-level ``config.toml`` path.
Defaults to ``~/.everos/config.toml``; override with the
``EVEROS_CONFIG_FILE`` environment variable.
"""
override = os.environ.get(_USER_TOML_ENV_VAR)
return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
class MemorySettings(BaseModel):
"""memory-root configuration."""
root: Path = Path("~/.everos")
timezone: str = "UTC"
"""Effective timezone for date buckets and timestamps.
Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
:class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
fast (no silent fallback). This is the **sole** source of truth for
the project's effective timezone — the OS ``TZ`` env var is *not*
consulted, keeping the configuration deterministic.
"""
@field_validator("timezone")
@classmethod
def _validate_timezone(cls, v: str) -> str:
try:
ZoneInfo(v)
except (ZoneInfoNotFoundError, ValueError) as exc:
raise ValueError(f"invalid timezone: {v!r}") from exc
return v
class ApiSettings(BaseModel):
"""HTTP API server bind configuration.
Default ``host = "127.0.0.1"`` keeps the server on loopback only,
matching the threat model in ``SECURITY.md``: EverOS ships **no
built-in authentication**, so binding to a routable interface
(``0.0.0.0`` etc.) without your own gateway / auth layer in front
is unsupported.
Env binding:
EVEROS_API__HOST
EVEROS_API__PORT
"""
host: str = "127.0.0.1"
port: int = Field(default=8000, ge=1, le=65535)
class SqliteSettings(BaseModel):
"""SQLite tunables applied as PRAGMAs on every new connection."""
journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
"WAL"
)
synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
foreign_keys: bool = True
temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
busy_timeout_ms: int = Field(default=5000, ge=0)
journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
cache_size_kb: int = Field(default=2048, ge=0)
class LLMSettings(BaseModel):
"""LLM client configuration.
Read by the service layer when lazily constructing the LLM client
handed to algo extractors. Provider-agnostic field names — the
project follows the OpenAI API protocol so any OpenAI-compatible
endpoint plugs in via ``base_url``.
Env binding (via parent ``Settings``):
EVEROS_LLM__MODEL
EVEROS_LLM__API_KEY
EVEROS_LLM__BASE_URL
"""
model: str = "gpt-4o-mini"
api_key: SecretStr | None = None
base_url: str | None = None
class MultimodalSettings(BaseModel):
"""Multimodal parsing LLM config (everalgo-parser).
Flat section mirroring ``[llm]``. The model must accept multimodal
``image_url`` parts (image / pdf / audio); it is kept independent from
the main ``[llm]`` so parsing can target a vision/audio-capable
endpoint without affecting boundary / extraction.
Env binding (via parent ``Settings``):
EVEROS_MULTIMODAL__MODEL
EVEROS_MULTIMODAL__API_KEY
EVEROS_MULTIMODAL__BASE_URL
EVEROS_MULTIMODAL__MAX_CONCURRENCY
EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
"""
model: str = "google/gemini-3-flash-preview"
api_key: SecretStr | None = None
base_url: str | None = None
max_concurrency: int = 4
# ``file://`` content-item support (read locally by EverOS, not everalgo).
file_uri_allow_dirs: list[str] = []
"""Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
file (local-first default); set to confine reads when the API is exposed."""
file_uri_max_bytes: int = 50 * 1024 * 1024
"""Max size (bytes) of a ``file://`` asset; larger files are rejected."""
class EmbeddingSettings(BaseModel):
"""Embedding client configuration.
OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
``base_url`` are required at runtime when the embedding capability
is enabled; the runtime knobs (``timeout`` etc.) have sensible
defaults.
Env binding:
EVEROS_EMBEDDING__MODEL
EVEROS_EMBEDDING__API_KEY
EVEROS_EMBEDDING__BASE_URL
EVEROS_EMBEDDING__TIMEOUT_SECONDS
EVEROS_EMBEDDING__MAX_RETRIES
EVEROS_EMBEDDING__BATCH_SIZE
EVEROS_EMBEDDING__MAX_CONCURRENT
"""
model: str | None = None
api_key: SecretStr | None = None
base_url: str | None = None
timeout_seconds: float = Field(default=30.0, gt=0)
max_retries: int = Field(default=3, ge=0)
batch_size: int = Field(default=10, ge=1)
max_concurrent: int = Field(default=5, ge=1)
class RerankSettings(BaseModel):
"""Rerank client configuration.
Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
schemas differ between providers — DeepInfra uses ``POST {base_url}/
{model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
with ``{model, query, documents}``. ``provider`` picks which client
implementation the factory builds.
Env binding:
EVEROS_RERANK__PROVIDER
EVEROS_RERANK__MODEL
EVEROS_RERANK__API_KEY
EVEROS_RERANK__BASE_URL
EVEROS_RERANK__TIMEOUT_SECONDS
EVEROS_RERANK__MAX_RETRIES
EVEROS_RERANK__BATCH_SIZE
EVEROS_RERANK__MAX_CONCURRENT
"""
provider: Literal["deepinfra", "vllm"] = "deepinfra"
model: str | None = None
api_key: SecretStr | None = None
base_url: str | None = None
timeout_seconds: float = Field(default=30.0, gt=0)
max_retries: int = Field(default=3, ge=0)
batch_size: int = Field(default=10, ge=1)
max_concurrent: int = Field(default=5, ge=1)
class BoundaryDetectionSettings(BaseModel):
"""Hard limits passed through to ``everalgo`` BoundaryDetector."""
hard_token_limit: int = Field(default=65536, ge=1)
hard_msg_limit: int = Field(default=500, ge=1)
class MemorizeSettings(BaseModel):
"""Memorize use-case configuration.
``mode`` selects which boundary detector runs and which pipelines are
dispatched. A service process serves one mode at a time; toggling
requires a restart.
- ``"chat"`` -> ``everalgo.user_memory.BoundaryDetector`` and only the
user-memory pipeline runs.
- ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
both user-memory + agent-memory pipelines run.
``session_lock_timeout_seconds`` caps how long one ``memorize()``
invocation can hold the per-session lock. Covers boundary LLM call +
memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
a stuck LLM from deadlocking subsequent concurrent calls on the same
session_id: on timeout the outer ``asyncio.timeout`` cancels the task
and the lock auto-releases.
Env binding:
EVEROS_MEMORIZE__MODE
EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
"""
mode: Literal["chat", "agent"] = "agent"
session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
class SearchSettings(BaseModel):
"""Search-pipeline policy knobs.
``vector_strategy`` selects the read path taken by
``SearchMethod.VECTOR``:
- ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
(recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
cosine by parent memcell, then reverse-resolve the top memcells back
to episode rows. MaxSim over atomic facts; trades one extra LanceDB
scan for finer-grained semantic match on long episodes.
- ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
per episode = the embedded Content section). The legacy path; kept
so deployments can opt out via env.
Env binding:
EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
"""
vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
class LanceDBSettings(BaseModel):
"""LanceDB tunables.
``read_consistency_seconds``:
``None`` (omitted) → no consistency check (highest performance).
``0`` → strict consistency (every read).
``>0`` → eventual (interval between checks).
``index_cache_size_bytes``:
Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
in lance crate). Each cached entry is one opened FTS / vector /
scalar index reader and **holds the file descriptors of its on-disk
``_indices/<uuid>/...`` files**.
LanceDB's own default is ``None`` (unbounded), which on a long-
running daemon means every new index UUID created by an
``optimize()`` call adds a fresh reader to the cache, and its
FDs are never released — they leak monotonically until
``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
of 256 / Linux's 1024.
Setting a byte cap turns the cache into a real LRU: when it
exceeds the cap, the oldest readers are dropped, Rust ``Drop``
runs ``close(fd)``, and the FD pressure resolves itself.
Cap → steady-state FD upper bound (measured under 30 add+optimize
cycles with the real ``Episode`` schema and 100-query stress):
=========== ================= ===================
cap FD upper bound query latency (100q)
=========== ================= ===================
``2 MB`` ~45 ~5 ms
``4 MB`` ~52 ~3 ms
``8 MB`` ~140 ~2.4 ms
``16 MB`` ~290 ~2.3 ms ← default
``32 MB`` ~630 ~1.4 ms
``unbound`` >960 (leaks) ~1.3 ms
=========== ================= ===================
EverOS's measured steady-state working set after a 12 h
``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
(5 tables × ~7 BM25 columns × ~10 part_N entries each), so
``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
yet-evicted readers, while the FD ceiling (~290) stays well below
common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
Linux default 1024 is fine out of the box).
Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
working set is much larger (heavier table count or much wider
indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
boxes).
Note: the *metadata* cache (``metadata_cache_size_bytes``) is
**not** exposed — experiment showed it caches in-memory parsed
manifests / fragment stats with zero impact on FD count; leaving
it unbounded (lancedb default) is fine.
"""
read_consistency_seconds: float | None = None
index_cache_size_bytes: int = 16 * 1024 * 1024
class Settings(BaseSettings):
"""Top-level application settings."""
memory: MemorySettings = MemorySettings()
api: ApiSettings = ApiSettings()
sqlite: SqliteSettings = SqliteSettings()
lancedb: LanceDBSettings = LanceDBSettings()
llm: LLMSettings = LLMSettings()
embedding: EmbeddingSettings = EmbeddingSettings()
rerank: RerankSettings = RerankSettings()
boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
memorize: MemorizeSettings = MemorizeSettings()
search: SearchSettings = SearchSettings()
multimodal: MultimodalSettings = MultimodalSettings()
model_config = SettingsConfigDict(
env_prefix="EVEROS_",
env_nested_delimiter="__",
env_file=".env",
env_file_encoding="utf-8",
toml_file=_DEFAULT_TOML_PATH,
extra="ignore",
)
@classmethod
def settings_customise_sources(
cls,
settings_cls: type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> tuple[PydanticBaseSettingsSource, ...]:
"""Layer TOML sources between env / dotenv and the secret store.
Order (earlier wins in pydantic-settings):
init_args > env > .env > user_toml > default_toml > secrets
The user-level toml (default ``~/.everos/config.toml``) is only
registered when the file exists, so the source list stays tight.
"""
sources: list[PydanticBaseSettingsSource] = [
init_settings,
env_settings,
dotenv_settings,
]
user_toml_path = _resolve_user_toml_path()
if user_toml_path.is_file():
sources.append(
TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
)
sources.append(TomlConfigSettingsSource(settings_cls))
sources.append(file_secret_settings)
return tuple(sources)
@cache
def load_settings() -> Settings:
"""Load settings from default.toml + environment variables (cached).
Cached at the module level — every caller sees the same instance until
something explicitly clears the cache (``load_settings.cache_clear()``).
Tests that monkeypatch environment variables must call
``cache_clear`` after each mutation to pick the new env up.
"""
return Settings()