chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
37
src/everos/config/__init__.py
Normal file
37
src/everos/config/__init__.py
Normal file
@ -0,0 +1,37 @@
|
||||
"""Configuration data and Settings schema.
|
||||
|
||||
Public API:
|
||||
from everos.config import (
|
||||
Settings, MemorySettings, SqliteSettings, LanceDBSettings,
|
||||
LLMSettings, EmbeddingSettings, RerankSettings,
|
||||
BoundaryDetectionSettings,
|
||||
load_settings,
|
||||
)
|
||||
|
||||
Distinct from ``everos.component.config`` (which is a *capability* —
|
||||
loader / merger / env reader).
|
||||
"""
|
||||
|
||||
from .settings import BoundaryDetectionSettings as BoundaryDetectionSettings
|
||||
from .settings import EmbeddingSettings as EmbeddingSettings
|
||||
from .settings import LanceDBSettings as LanceDBSettings
|
||||
from .settings import LLMSettings as LLMSettings
|
||||
from .settings import MemorySettings as MemorySettings
|
||||
from .settings import MultimodalSettings as MultimodalSettings
|
||||
from .settings import RerankSettings as RerankSettings
|
||||
from .settings import Settings as Settings
|
||||
from .settings import SqliteSettings as SqliteSettings
|
||||
from .settings import load_settings as load_settings
|
||||
|
||||
__all__ = [
|
||||
"BoundaryDetectionSettings",
|
||||
"EmbeddingSettings",
|
||||
"LLMSettings",
|
||||
"LanceDBSettings",
|
||||
"MemorySettings",
|
||||
"MultimodalSettings",
|
||||
"RerankSettings",
|
||||
"Settings",
|
||||
"SqliteSettings",
|
||||
"load_settings",
|
||||
]
|
||||
137
src/everos/config/default.toml
Normal file
137
src/everos/config/default.toml
Normal file
@ -0,0 +1,137 @@
|
||||
# everos default configuration.
|
||||
#
|
||||
# Lookup order (later overrides earlier):
|
||||
# 1. This file (shipped defaults; lowest priority)
|
||||
# 2. ~/.everos/config.toml — user-level overrides (optional;
|
||||
# path is overridable via EVEROS_CONFIG_FILE)
|
||||
# 3. .env file in the working directory
|
||||
# 4. Environment variables — EVEROS_<SECTION>__<KEY>
|
||||
# e.g. EVEROS_SQLITE__BUSY_TIMEOUT_MS=10000
|
||||
# 5. Programmatic init args (highest priority)
|
||||
#
|
||||
# `null` (omitted in TOML) means "use the Pydantic default declared in code".
|
||||
|
||||
[memory]
|
||||
# memory-root is the single directory holding all persisted memory.
|
||||
# `~` is expanded; the path is resolved when MemoryRoot is constructed.
|
||||
root = "~/.everos"
|
||||
# Effective timezone for date buckets and timestamps. Drives
|
||||
# component.utils.datetime; this is the SOLE source — OS `TZ` is not
|
||||
# read. Override via `EVEROS_MEMORY__TIMEZONE` env var if needed.
|
||||
timezone = "UTC"
|
||||
|
||||
[api]
|
||||
# HTTP server bind for ``everos server start``. Default ``127.0.0.1``
|
||||
# keeps the API on loopback only — EverOS ships no built-in auth (see
|
||||
# SECURITY.md threat model). Only set ``host = "0.0.0.0"`` after you
|
||||
# have placed your own gateway / auth layer in front of the server.
|
||||
# Override via EVEROS_API__HOST and EVEROS_API__PORT.
|
||||
host = "127.0.0.1"
|
||||
port = 8000
|
||||
|
||||
[sqlite]
|
||||
# PRAGMA journal_mode — WAL is the recommended high-concurrency mode.
|
||||
journal_mode = "WAL"
|
||||
# PRAGMA synchronous — NORMAL is safe under WAL and ~2x faster than FULL.
|
||||
synchronous = "NORMAL"
|
||||
# PRAGMA foreign_keys — must be explicitly enabled per connection.
|
||||
foreign_keys = true
|
||||
# PRAGMA temp_store — MEMORY keeps query intermediates in RAM (no IO impact
|
||||
# on durability — only affects sort/group/temp-table calculation buffers).
|
||||
temp_store = "MEMORY"
|
||||
# PRAGMA busy_timeout — milliseconds to wait on a locked DB before erroring.
|
||||
busy_timeout_ms = 5000
|
||||
# PRAGMA journal_size_limit — cap WAL/journal at ~64 MB.
|
||||
journal_size_limit_bytes = 67108864
|
||||
# PRAGMA cache_size — KB of page cache (per connection).
|
||||
cache_size_kb = 2048
|
||||
|
||||
[lancedb]
|
||||
# Read consistency interval in seconds.
|
||||
# omitted / null -> no consistency check (fastest reads)
|
||||
# 0 -> strict (every read checks updates)
|
||||
# >0 -> eventual (interval seconds between checks)
|
||||
# Uncomment to override:
|
||||
# read_consistency_seconds = 5.0
|
||||
|
||||
[llm]
|
||||
# Provider-agnostic OpenAI-protocol client config. Override via env:
|
||||
# EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL
|
||||
# Or via a ``.env`` file next to the project root (auto-loaded).
|
||||
model = "gpt-4o-mini"
|
||||
# api_key = ""
|
||||
# base_url = ""
|
||||
|
||||
[multimodal]
|
||||
# Independent LLM for multimodal parsing (everalgo-parser); must accept
|
||||
# image / pdf / audio image_url parts. Override via env:
|
||||
# EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL
|
||||
model = "google/gemini-3-flash-preview"
|
||||
max_concurrency = 4
|
||||
# api_key = ""
|
||||
# base_url = ""
|
||||
# file:// content-item support (read locally by EverOS, not everalgo).
|
||||
# file_uri_allow_dirs: empty = allow any readable file (local-first default);
|
||||
# list base dirs to confine reads when the API is exposed.
|
||||
# file_uri_allow_dirs = ["/srv/uploads"]
|
||||
# file_uri_max_bytes = 52428800 # 50 MiB cap per file:// asset
|
||||
|
||||
[embedding]
|
||||
# OpenAI-compatible embedding endpoint. Override via env:
|
||||
# EVEROS_EMBEDDING__MODEL, EVEROS_EMBEDDING__API_KEY, EVEROS_EMBEDDING__BASE_URL
|
||||
# model / api_key / base_url have no shipped defaults — must be set
|
||||
# (env or user toml) before the embedding capability is used.
|
||||
# model = "Qwen/Qwen3-Embedding-4B"
|
||||
# api_key = ""
|
||||
# base_url = "https://api.example.com/v1"
|
||||
timeout_seconds = 30.0
|
||||
max_retries = 3
|
||||
batch_size = 10
|
||||
max_concurrent = 5
|
||||
|
||||
[rerank]
|
||||
# Rerank provider. Override via env:
|
||||
# EVEROS_RERANK__PROVIDER, EVEROS_RERANK__MODEL, EVEROS_RERANK__API_KEY,
|
||||
# EVEROS_RERANK__BASE_URL
|
||||
# `provider` picks the request-shape:
|
||||
# - "deepinfra" -> POST {base_url}/{model} (DeepInfra inference API)
|
||||
# - "vllm" -> POST {base_url}/rerank (OpenAI-compat rerank endpoint)
|
||||
provider = "deepinfra"
|
||||
# model = "Qwen/Qwen3-Reranker-4B"
|
||||
# api_key = ""
|
||||
# base_url = "https://api.deepinfra.com/v1/inference"
|
||||
timeout_seconds = 30.0
|
||||
max_retries = 3
|
||||
batch_size = 10
|
||||
max_concurrent = 5
|
||||
|
||||
[boundary_detection]
|
||||
# Passed through to ``everalgo.BoundaryDetector.adetect``.
|
||||
hard_token_limit = 65536
|
||||
hard_msg_limit = 500
|
||||
|
||||
[search]
|
||||
# Vector retrieval strategy when SearchMethod.VECTOR is selected.
|
||||
# "maxsim_atomic" (default): ANN over atomic_fact.vector (pool=top_k*20),
|
||||
# max-pool the per-fact cosine by parent memcell, then reverse-resolve
|
||||
# to episode rows. MaxSim over atomic facts; +0.6pp over the legacy
|
||||
# episode-vector path on LoCoMo, at the cost of one extra LanceDB scan.
|
||||
# "episode": single-vector ANN over episode.vector (legacy path).
|
||||
# Override via EVEROS_SEARCH__VECTOR_STRATEGY.
|
||||
vector_strategy = "maxsim_atomic"
|
||||
|
||||
[memorize]
|
||||
# Conversation mode. Selects the boundary detector and which pipelines run:
|
||||
# "chat" -> BoundaryDetector + user_memory only
|
||||
# "agent" -> AgentBoundaryDetector + user_memory + agent_memory
|
||||
# A single service process serves one mode at a time; switching mode
|
||||
# requires a restart. Override via EVEROS_MEMORIZE__MODE.
|
||||
mode = "agent"
|
||||
|
||||
# Maximum wall-clock for one memorize() invocation while holding the
|
||||
# per-session lock. On timeout the outer asyncio.timeout cancels the call
|
||||
# and the lock auto-releases so subsequent concurrent /add on the same
|
||||
# session aren't deadlocked. Covers boundary LLM + memcell writes +
|
||||
# synchronous portion of pipeline dispatch.
|
||||
# Override via EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS.
|
||||
session_lock_timeout_seconds = 360.0
|
||||
64
src/everos/config/default_ome.toml
Normal file
64
src/everos/config/default_ome.toml
Normal file
@ -0,0 +1,64 @@
|
||||
# everos OME (Offline Memory Engine) — per-strategy overrides.
|
||||
#
|
||||
# This file is materialised at ``<memory-root>/ome.toml`` by
|
||||
# ``MemoryRoot.ensure()`` on first server start. Edit it to toggle
|
||||
# individual strategies or tweak their gate / retry / cron without
|
||||
# restarting the server; the engine watches this file and hot-reloads
|
||||
# changes within ~2 seconds. Re-running ``ensure()`` will NOT overwrite
|
||||
# your edits — the file is only materialised when absent.
|
||||
#
|
||||
# Overrides are partial: only the keys you set replace the in-code
|
||||
# defaults; omitted keys keep each strategy's coded value. Unknown
|
||||
# keys (typos) raise StartupValidationError, so you cannot silently
|
||||
# misconfigure a strategy.
|
||||
#
|
||||
# Per-strategy schema (StrategyOverride):
|
||||
# enabled = bool # disable a strategy entirely
|
||||
# max_retries = int >= 0 # re-fire on failure
|
||||
# cron = str # replace the @cron(...) trigger
|
||||
# idle_seconds = int > 0 # replace @idle(...) idle window
|
||||
# scan_interval_seconds = int > 0 # paired with idle_seconds; must be <= idle/2
|
||||
# [strategies.<name>.gate] # only valid on @counter-gated strategies
|
||||
# threshold = int > 0 # counter trigger threshold
|
||||
# cooldown_seconds = int >= 0 # min seconds between fires
|
||||
# event_field = str # dispatch field for counter increment
|
||||
|
||||
# ── User-memory pipeline ────────────────────────────────────────────────
|
||||
|
||||
# Atomic fact extraction (runs per memcell). Disable to skip fact mining.
|
||||
# [strategies.extract_atomic_facts]
|
||||
# enabled = true
|
||||
|
||||
# Foresight extraction (runs per memcell). Heavy LLM call — common to
|
||||
# disable in evaluation / benchmark runs.
|
||||
# [strategies.extract_foresight]
|
||||
# enabled = false
|
||||
|
||||
# Profile clustering trigger (counter-gated; fires once N user memcells
|
||||
# accumulate). Lower the threshold to cluster more aggressively.
|
||||
# [strategies.trigger_profile_clustering]
|
||||
# enabled = true
|
||||
# [strategies.trigger_profile_clustering.gate]
|
||||
# threshold = 5
|
||||
|
||||
# User-profile extraction (runs after clustering trigger fires). Common
|
||||
# to disable in evaluation runs where ground-truth profiles aren't measured.
|
||||
# [strategies.extract_user_profile]
|
||||
# enabled = false
|
||||
|
||||
# ── Agent-memory pipeline ───────────────────────────────────────────────
|
||||
|
||||
# Agent case extraction (runs per agent memcell). One per tool call cycle.
|
||||
# [strategies.extract_agent_case]
|
||||
# enabled = true
|
||||
|
||||
# Skill clustering trigger (counter-gated; fires once N agent cases
|
||||
# accumulate per agent).
|
||||
# [strategies.trigger_skill_clustering]
|
||||
# enabled = true
|
||||
# [strategies.trigger_skill_clustering.gate]
|
||||
# threshold = 5
|
||||
|
||||
# Agent skill extraction (runs after skill clustering trigger fires).
|
||||
# [strategies.extract_agent_skill]
|
||||
# enabled = true
|
||||
0
src/everos/config/prompt_slots/.gitkeep
Normal file
0
src/everos/config/prompt_slots/.gitkeep
Normal file
0
src/everos/config/prompt_slots/__init__.py
Normal file
0
src/everos/config/prompt_slots/__init__.py
Normal file
20
src/everos/config/prompt_slots/boundary_detection.yaml
Normal file
20
src/everos/config/prompt_slots/boundary_detection.yaml
Normal file
@ -0,0 +1,20 @@
|
||||
# Custom prompt slot for BoundaryDetector.adetect.
|
||||
#
|
||||
# Default behaviour
|
||||
# Leave this slot disabled (``enabled: false``). The pipeline will pass
|
||||
# ``prompt=None`` through to algo, which falls back to the everalgo
|
||||
# bundled default prompt — see:
|
||||
# ~/everalgo/packages/everalgo-boundary/src/everalgo/boundary/prompts/en/chat.py
|
||||
# (constant ``CHAT_BOUNDARY_DETECT_PROMPT_EN``)
|
||||
#
|
||||
# To customise
|
||||
# 1. Read the algo default at the path above; note the required
|
||||
# placeholders ``{messages}`` and ``{token_count}``.
|
||||
# 2. Replace the ``template`` body below with your prompt.
|
||||
# 3. Flip ``enabled`` to ``true``.
|
||||
#
|
||||
# When ``enabled: false`` or ``template`` is empty, the pipeline sends
|
||||
# ``prompt=None`` and the algo default is used (zero override cost).
|
||||
|
||||
enabled: false
|
||||
template: ""
|
||||
23
src/everos/config/prompt_slots/episode_extract.yaml
Normal file
23
src/everos/config/prompt_slots/episode_extract.yaml
Normal file
@ -0,0 +1,23 @@
|
||||
# Custom prompt slot for EpisodeExtractor.aextract.
|
||||
#
|
||||
# Default behaviour
|
||||
# Leave this slot disabled (``enabled: false``). The pipeline will pass
|
||||
# ``prompt=None`` through to algo, which falls back to the everalgo
|
||||
# bundled default prompt — see:
|
||||
# everalgo/user_memory/prompts/en/episode.py
|
||||
# (the pipeline calls ``aextract`` with ``sender_id=None``, so the
|
||||
# whole-memcell ``EPISODE_GENERATION_PROMPT`` is used, not the
|
||||
# per-user ``USER_EPISODE_GENERATION_PROMPT``)
|
||||
#
|
||||
# To customise
|
||||
# 1. Read the algo default at the path above; note the required
|
||||
# placeholders ``{conversation_start_time}``, ``{conversation}`` and
|
||||
# ``{custom_instructions}``.
|
||||
# 2. Replace the ``template`` body below with your prompt.
|
||||
# 3. Flip ``enabled`` to ``true``.
|
||||
#
|
||||
# When ``enabled: false`` or ``template`` is empty, the pipeline sends
|
||||
# ``prompt=None`` and the algo default is used (zero override cost).
|
||||
|
||||
enabled: false
|
||||
template: ""
|
||||
403
src/everos/config/settings.py
Normal file
403
src/everos/config/settings.py
Normal file
@ -0,0 +1,403 @@
|
||||
"""Application settings.
|
||||
|
||||
Loaded by :func:`load_settings`. Source priority (later wins):
|
||||
|
||||
1. ``config/default.toml`` (shipped values; lowest priority)
|
||||
2. ``~/.everos/config.toml`` (user-level overrides; optional)
|
||||
3. ``.env`` file in the working directory (secrets / machine-specific)
|
||||
4. ``EVEROS_<SECTION>__<KEY>`` environment variables
|
||||
5. Init args passed programmatically (highest priority)
|
||||
|
||||
The user-level toml path defaults to ``~/.everos/config.toml``. Override
|
||||
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
|
||||
optional — if it does not exist, the source is silently skipped.
|
||||
|
||||
The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
|
||||
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
|
||||
|
||||
``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
|
||||
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
|
||||
call. Tests that mutate environment variables must call
|
||||
``load_settings.cache_clear()`` after the mutation to invalidate.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from functools import cache
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr, field_validator
|
||||
from pydantic_settings import (
|
||||
BaseSettings,
|
||||
PydanticBaseSettingsSource,
|
||||
SettingsConfigDict,
|
||||
TomlConfigSettingsSource,
|
||||
)
|
||||
|
||||
_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
|
||||
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
|
||||
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
|
||||
|
||||
|
||||
def _resolve_user_toml_path() -> Path:
|
||||
"""Resolve the user-level ``config.toml`` path.
|
||||
|
||||
Defaults to ``~/.everos/config.toml``; override with the
|
||||
``EVEROS_CONFIG_FILE`` environment variable.
|
||||
"""
|
||||
override = os.environ.get(_USER_TOML_ENV_VAR)
|
||||
return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
|
||||
|
||||
|
||||
class MemorySettings(BaseModel):
|
||||
"""memory-root configuration."""
|
||||
|
||||
root: Path = Path("~/.everos")
|
||||
timezone: str = "UTC"
|
||||
"""Effective timezone for date buckets and timestamps.
|
||||
|
||||
Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
|
||||
TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
|
||||
:class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
|
||||
fast (no silent fallback). This is the **sole** source of truth for
|
||||
the project's effective timezone — the OS ``TZ`` env var is *not*
|
||||
consulted, keeping the configuration deterministic.
|
||||
"""
|
||||
|
||||
@field_validator("timezone")
|
||||
@classmethod
|
||||
def _validate_timezone(cls, v: str) -> str:
|
||||
try:
|
||||
ZoneInfo(v)
|
||||
except (ZoneInfoNotFoundError, ValueError) as exc:
|
||||
raise ValueError(f"invalid timezone: {v!r}") from exc
|
||||
return v
|
||||
|
||||
|
||||
class ApiSettings(BaseModel):
|
||||
"""HTTP API server bind configuration.
|
||||
|
||||
Default ``host = "127.0.0.1"`` keeps the server on loopback only,
|
||||
matching the threat model in ``SECURITY.md``: EverOS ships **no
|
||||
built-in authentication**, so binding to a routable interface
|
||||
(``0.0.0.0`` etc.) without your own gateway / auth layer in front
|
||||
is unsupported.
|
||||
|
||||
Env binding:
|
||||
EVEROS_API__HOST
|
||||
EVEROS_API__PORT
|
||||
"""
|
||||
|
||||
host: str = "127.0.0.1"
|
||||
port: int = Field(default=8000, ge=1, le=65535)
|
||||
|
||||
|
||||
class SqliteSettings(BaseModel):
|
||||
"""SQLite tunables applied as PRAGMAs on every new connection."""
|
||||
|
||||
journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
|
||||
"WAL"
|
||||
)
|
||||
synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
|
||||
foreign_keys: bool = True
|
||||
temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
|
||||
busy_timeout_ms: int = Field(default=5000, ge=0)
|
||||
journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
|
||||
cache_size_kb: int = Field(default=2048, ge=0)
|
||||
|
||||
|
||||
class LLMSettings(BaseModel):
|
||||
"""LLM client configuration.
|
||||
|
||||
Read by the service layer when lazily constructing the LLM client
|
||||
handed to algo extractors. Provider-agnostic field names — the
|
||||
project follows the OpenAI API protocol so any OpenAI-compatible
|
||||
endpoint plugs in via ``base_url``.
|
||||
|
||||
Env binding (via parent ``Settings``):
|
||||
EVEROS_LLM__MODEL
|
||||
EVEROS_LLM__API_KEY
|
||||
EVEROS_LLM__BASE_URL
|
||||
"""
|
||||
|
||||
model: str = "gpt-4o-mini"
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
|
||||
|
||||
class MultimodalSettings(BaseModel):
|
||||
"""Multimodal parsing LLM config (everalgo-parser).
|
||||
|
||||
Flat section mirroring ``[llm]``. The model must accept multimodal
|
||||
``image_url`` parts (image / pdf / audio); it is kept independent from
|
||||
the main ``[llm]`` so parsing can target a vision/audio-capable
|
||||
endpoint without affecting boundary / extraction.
|
||||
|
||||
Env binding (via parent ``Settings``):
|
||||
EVEROS_MULTIMODAL__MODEL
|
||||
EVEROS_MULTIMODAL__API_KEY
|
||||
EVEROS_MULTIMODAL__BASE_URL
|
||||
EVEROS_MULTIMODAL__MAX_CONCURRENCY
|
||||
EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
|
||||
EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
|
||||
"""
|
||||
|
||||
model: str = "google/gemini-3-flash-preview"
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
max_concurrency: int = 4
|
||||
|
||||
# ``file://`` content-item support (read locally by EverOS, not everalgo).
|
||||
file_uri_allow_dirs: list[str] = []
|
||||
"""Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
|
||||
file (local-first default); set to confine reads when the API is exposed."""
|
||||
file_uri_max_bytes: int = 50 * 1024 * 1024
|
||||
"""Max size (bytes) of a ``file://`` asset; larger files are rejected."""
|
||||
|
||||
|
||||
class EmbeddingSettings(BaseModel):
|
||||
"""Embedding client configuration.
|
||||
|
||||
OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
|
||||
``base_url`` are required at runtime when the embedding capability
|
||||
is enabled; the runtime knobs (``timeout`` etc.) have sensible
|
||||
defaults.
|
||||
|
||||
Env binding:
|
||||
EVEROS_EMBEDDING__MODEL
|
||||
EVEROS_EMBEDDING__API_KEY
|
||||
EVEROS_EMBEDDING__BASE_URL
|
||||
EVEROS_EMBEDDING__TIMEOUT_SECONDS
|
||||
EVEROS_EMBEDDING__MAX_RETRIES
|
||||
EVEROS_EMBEDDING__BATCH_SIZE
|
||||
EVEROS_EMBEDDING__MAX_CONCURRENT
|
||||
"""
|
||||
|
||||
model: str | None = None
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
timeout_seconds: float = Field(default=30.0, gt=0)
|
||||
max_retries: int = Field(default=3, ge=0)
|
||||
batch_size: int = Field(default=10, ge=1)
|
||||
max_concurrent: int = Field(default=5, ge=1)
|
||||
|
||||
|
||||
class RerankSettings(BaseModel):
|
||||
"""Rerank client configuration.
|
||||
|
||||
Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
|
||||
schemas differ between providers — DeepInfra uses ``POST {base_url}/
|
||||
{model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
|
||||
with ``{model, query, documents}``. ``provider`` picks which client
|
||||
implementation the factory builds.
|
||||
|
||||
Env binding:
|
||||
EVEROS_RERANK__PROVIDER
|
||||
EVEROS_RERANK__MODEL
|
||||
EVEROS_RERANK__API_KEY
|
||||
EVEROS_RERANK__BASE_URL
|
||||
EVEROS_RERANK__TIMEOUT_SECONDS
|
||||
EVEROS_RERANK__MAX_RETRIES
|
||||
EVEROS_RERANK__BATCH_SIZE
|
||||
EVEROS_RERANK__MAX_CONCURRENT
|
||||
"""
|
||||
|
||||
provider: Literal["deepinfra", "vllm"] = "deepinfra"
|
||||
model: str | None = None
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
timeout_seconds: float = Field(default=30.0, gt=0)
|
||||
max_retries: int = Field(default=3, ge=0)
|
||||
batch_size: int = Field(default=10, ge=1)
|
||||
max_concurrent: int = Field(default=5, ge=1)
|
||||
|
||||
|
||||
class BoundaryDetectionSettings(BaseModel):
|
||||
"""Hard limits passed through to ``everalgo`` BoundaryDetector."""
|
||||
|
||||
hard_token_limit: int = Field(default=65536, ge=1)
|
||||
hard_msg_limit: int = Field(default=500, ge=1)
|
||||
|
||||
|
||||
class MemorizeSettings(BaseModel):
|
||||
"""Memorize use-case configuration.
|
||||
|
||||
``mode`` selects which boundary detector runs and which pipelines are
|
||||
dispatched. A service process serves one mode at a time; toggling
|
||||
requires a restart.
|
||||
|
||||
- ``"chat"`` -> ``everalgo.user_memory.BoundaryDetector`` and only the
|
||||
user-memory pipeline runs.
|
||||
- ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
|
||||
both user-memory + agent-memory pipelines run.
|
||||
|
||||
``session_lock_timeout_seconds`` caps how long one ``memorize()``
|
||||
invocation can hold the per-session lock. Covers boundary LLM call +
|
||||
memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
|
||||
a stuck LLM from deadlocking subsequent concurrent calls on the same
|
||||
session_id: on timeout the outer ``asyncio.timeout`` cancels the task
|
||||
and the lock auto-releases.
|
||||
|
||||
Env binding:
|
||||
EVEROS_MEMORIZE__MODE
|
||||
EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
|
||||
"""
|
||||
|
||||
mode: Literal["chat", "agent"] = "agent"
|
||||
session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
|
||||
|
||||
|
||||
class SearchSettings(BaseModel):
|
||||
"""Search-pipeline policy knobs.
|
||||
|
||||
``vector_strategy`` selects the read path taken by
|
||||
``SearchMethod.VECTOR``:
|
||||
|
||||
- ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
|
||||
(recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
|
||||
cosine by parent memcell, then reverse-resolve the top memcells back
|
||||
to episode rows. MaxSim over atomic facts; trades one extra LanceDB
|
||||
scan for finer-grained semantic match on long episodes.
|
||||
- ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
|
||||
per episode = the embedded Content section). The legacy path; kept
|
||||
so deployments can opt out via env.
|
||||
|
||||
Env binding:
|
||||
EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
|
||||
"""
|
||||
|
||||
vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
|
||||
|
||||
|
||||
class LanceDBSettings(BaseModel):
|
||||
"""LanceDB tunables.
|
||||
|
||||
``read_consistency_seconds``:
|
||||
``None`` (omitted) → no consistency check (highest performance).
|
||||
``0`` → strict consistency (every read).
|
||||
``>0`` → eventual (interval between checks).
|
||||
|
||||
``index_cache_size_bytes``:
|
||||
Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
|
||||
in lance crate). Each cached entry is one opened FTS / vector /
|
||||
scalar index reader and **holds the file descriptors of its on-disk
|
||||
``_indices/<uuid>/...`` files**.
|
||||
|
||||
LanceDB's own default is ``None`` (unbounded), which on a long-
|
||||
running daemon means every new index UUID created by an
|
||||
``optimize()`` call adds a fresh reader to the cache, and its
|
||||
FDs are never released — they leak monotonically until
|
||||
``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
|
||||
take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
|
||||
of 256 / Linux's 1024.
|
||||
|
||||
Setting a byte cap turns the cache into a real LRU: when it
|
||||
exceeds the cap, the oldest readers are dropped, Rust ``Drop``
|
||||
runs ``close(fd)``, and the FD pressure resolves itself.
|
||||
|
||||
Cap → steady-state FD upper bound (measured under 30 add+optimize
|
||||
cycles with the real ``Episode`` schema and 100-query stress):
|
||||
|
||||
=========== ================= ===================
|
||||
cap FD upper bound query latency (100q)
|
||||
=========== ================= ===================
|
||||
``2 MB`` ~45 ~5 ms
|
||||
``4 MB`` ~52 ~3 ms
|
||||
``8 MB`` ~140 ~2.4 ms
|
||||
``16 MB`` ~290 ~2.3 ms ← default
|
||||
``32 MB`` ~630 ~1.4 ms
|
||||
``unbound`` >960 (leaks) ~1.3 ms
|
||||
=========== ================= ===================
|
||||
|
||||
EverOS's measured steady-state working set after a 12 h
|
||||
``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
|
||||
(5 tables × ~7 BM25 columns × ~10 part_N entries each), so
|
||||
``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
|
||||
yet-evicted readers, while the FD ceiling (~290) stays well below
|
||||
common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
|
||||
Linux default 1024 is fine out of the box).
|
||||
|
||||
Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
|
||||
working set is much larger (heavier table count or much wider
|
||||
indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
|
||||
boxes).
|
||||
|
||||
Note: the *metadata* cache (``metadata_cache_size_bytes``) is
|
||||
**not** exposed — experiment showed it caches in-memory parsed
|
||||
manifests / fragment stats with zero impact on FD count; leaving
|
||||
it unbounded (lancedb default) is fine.
|
||||
"""
|
||||
|
||||
read_consistency_seconds: float | None = None
|
||||
index_cache_size_bytes: int = 16 * 1024 * 1024
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Top-level application settings."""
|
||||
|
||||
memory: MemorySettings = MemorySettings()
|
||||
api: ApiSettings = ApiSettings()
|
||||
sqlite: SqliteSettings = SqliteSettings()
|
||||
lancedb: LanceDBSettings = LanceDBSettings()
|
||||
llm: LLMSettings = LLMSettings()
|
||||
embedding: EmbeddingSettings = EmbeddingSettings()
|
||||
rerank: RerankSettings = RerankSettings()
|
||||
boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
|
||||
memorize: MemorizeSettings = MemorizeSettings()
|
||||
search: SearchSettings = SearchSettings()
|
||||
multimodal: MultimodalSettings = MultimodalSettings()
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
env_prefix="EVEROS_",
|
||||
env_nested_delimiter="__",
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
toml_file=_DEFAULT_TOML_PATH,
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def settings_customise_sources(
|
||||
cls,
|
||||
settings_cls: type[BaseSettings],
|
||||
init_settings: PydanticBaseSettingsSource,
|
||||
env_settings: PydanticBaseSettingsSource,
|
||||
dotenv_settings: PydanticBaseSettingsSource,
|
||||
file_secret_settings: PydanticBaseSettingsSource,
|
||||
) -> tuple[PydanticBaseSettingsSource, ...]:
|
||||
"""Layer TOML sources between env / dotenv and the secret store.
|
||||
|
||||
Order (earlier wins in pydantic-settings):
|
||||
init_args > env > .env > user_toml > default_toml > secrets
|
||||
|
||||
The user-level toml (default ``~/.everos/config.toml``) is only
|
||||
registered when the file exists, so the source list stays tight.
|
||||
"""
|
||||
sources: list[PydanticBaseSettingsSource] = [
|
||||
init_settings,
|
||||
env_settings,
|
||||
dotenv_settings,
|
||||
]
|
||||
user_toml_path = _resolve_user_toml_path()
|
||||
if user_toml_path.is_file():
|
||||
sources.append(
|
||||
TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
|
||||
)
|
||||
sources.append(TomlConfigSettingsSource(settings_cls))
|
||||
sources.append(file_secret_settings)
|
||||
return tuple(sources)
|
||||
|
||||
|
||||
@cache
|
||||
def load_settings() -> Settings:
|
||||
"""Load settings from default.toml + environment variables (cached).
|
||||
|
||||
Cached at the module level — every caller sees the same instance until
|
||||
something explicitly clears the cache (``load_settings.cache_clear()``).
|
||||
Tests that monkeypatch environment variables must call
|
||||
``cache_clear`` after each mutation to pick the new env up.
|
||||
"""
|
||||
return Settings()
|
||||
Reference in New Issue
Block a user