chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

35
src/everos/README.md Normal file
View File

@ -0,0 +1,35 @@
# `everos` package
Source layout for the `everos` Python package. This README is a quick
orientation; full architectural detail lives elsewhere.
## Layout
```
everos/
├── entrypoints/ Presentation: cli + api
├── service/ Application: use case orchestration
├── memory/ Domain: extract + search + cascade + prompt_slots + models
├── infra/ Infrastructure: persistence/{markdown, sqlite, lancedb}
├── component/ Cross-cutting providers: llm / embedding / config / utils
├── core/ Runtime base: observability / lifespan / context
└── config/ Data: Settings + default.toml + prompt_slots templates
```
Each subpackage has a top-level `__init__.py` describing its responsibility
and public API.
## Dependency rule
```
entrypoints → service → memory → infra
component / core / config
```
Single-direction; enforced by `import-linter` in CI.
## Further reading
- Architecture: [../../docs/architecture.md](../../docs/architecture.md)
- Coding rules (auto-loaded by Claude Code): [../../.claude/rules/](../../.claude/rules/)

11
src/everos/__init__.py Normal file
View File

@ -0,0 +1,11 @@
"""everos — md-first memory extraction framework."""
from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _pkg_version
try:
__version__ = _pkg_version("everos")
except PackageNotFoundError:
# Editable install without dist-info, or running from a source tree that
# was never installed. Fall back to a sentinel rather than crash imports.
__version__ = "0.0.0+unknown"

View File

@ -0,0 +1,4 @@
"""Cross-cutting components.
Technical capabilities used by every layer; depend on no business layer.
"""

View File

@ -0,0 +1,14 @@
"""Config processing capability.
YAML loader for category-organised config trees (PromptSlot templates,
etc.). Distinct from :mod:`everos.config` (configuration *data* + Settings
schema, which uses TOML for the Pydantic Settings file) — this subpackage
holds *capability* (how to load), the other holds *data* (what to load).
External usage:
from everos.component.config import YamlConfigLoader
"""
from .loader import YamlConfigLoader as YamlConfigLoader
__all__ = ["YamlConfigLoader"]

View File

@ -0,0 +1,146 @@
"""YAML config loader for category-organised file trees.
Concept: a project keeps several *categories* of YAML config files under
their own subdirectories — for example PromptSlot templates under
``config/prompt_slots/<name>.yaml``. The loader:
1. registers a category → subdirectory mapping
2. resolves ``find(category, name)`` to ``<root>/<subdir>/<name>.yaml``
3. caches parsed contents until ``refresh`` is called
Uses ``yaml.safe_load`` (no arbitrary tags) — PyYAML is already a project
dependency for markdown frontmatter, so no extra cost.
"""
from __future__ import annotations
from collections.abc import Mapping
from pathlib import Path
from typing import Any
import yaml
class YamlConfigLoader:
"""Load YAML files organised by category subdirectories.
Usage:
loader = YamlConfigLoader(root=Path("src/everos/config"))
loader.register_category("prompt_slots")
# → reads <root>/prompt_slots/episode.yaml
meta = loader.find("prompt_slots", "episode")
names = loader.list("prompt_slots")
loader.refresh() # next find() re-reads from disk
Cache semantics:
* ``find`` parses the file on first access and caches the dict.
* ``refresh()`` empties the entire cache.
* ``refresh(category)`` empties one category's entries.
* ``refresh(category, name)`` empties a single entry.
"""
def __init__(
self,
root: Path,
categories: Mapping[str, str | None] | None = None,
) -> None:
"""
Args:
root: Base directory containing the category subdirectories.
categories: Optional pre-registered category map (``name → subdir``).
When ``subdir`` is ``None`` the category name is used as-is.
"""
self._root = Path(root)
self._subdirs: dict[str, str] = {}
self._cache: dict[tuple[str, str], dict[str, Any]] = {}
if categories:
for name, subdir in categories.items():
self.register_category(name, subdir)
# ── Category management ────────────────────────────────────────────────
def register_category(self, name: str, subdir: str | None = None) -> None:
"""Register a category. ``subdir`` defaults to ``name``."""
self._subdirs[name] = subdir if subdir is not None else name
def categories(self) -> list[str]:
"""Return registered category names (sorted)."""
return sorted(self._subdirs)
# ── Lookup ─────────────────────────────────────────────────────────────
def find(self, category: str, name: str) -> dict[str, Any]:
"""Load ``<root>/<subdir>/<name>.yaml`` for ``category``.
Raises:
KeyError: if ``category`` was not registered.
FileNotFoundError: if the yaml file does not exist.
TypeError: if the parsed YAML is not a mapping.
"""
cache_key = (category, name)
if cache_key in self._cache:
return self._cache[cache_key]
path = self._path_for(category, name)
if not path.is_file():
raise FileNotFoundError(f"yaml not found: {path}")
with path.open("r", encoding="utf-8") as fh:
data = yaml.safe_load(fh)
if data is None:
data = {}
if not isinstance(data, dict):
raise TypeError(
f"yaml top-level must be a mapping, got {type(data).__name__}: {path}"
)
self._cache[cache_key] = data
return data
def list(self, category: str) -> list[str]:
"""Return sorted yaml stems available in ``category`` (no extension).
Raises:
KeyError: if ``category`` was not registered.
"""
directory = self._dir_for(category)
if not directory.is_dir():
return []
return sorted(p.stem for p in directory.glob("*.yaml"))
# ── Cache control ──────────────────────────────────────────────────────
def refresh(
self,
category: str | None = None,
name: str | None = None,
) -> None:
"""Invalidate cached entries.
- ``refresh()`` → drop every cached entry
- ``refresh(category)`` → drop everything in ``category``
- ``refresh(category, name)`` → drop a single entry
"""
if category is None:
self._cache.clear()
return
if name is not None:
self._cache.pop((category, name), None)
return
self._cache = {
(cat, n): v for (cat, n), v in self._cache.items() if cat != category
}
# ── Internals ──────────────────────────────────────────────────────────
def _dir_for(self, category: str) -> Path:
try:
subdir = self._subdirs[category]
except KeyError as exc:
raise KeyError(
f"category not registered: {category!r}; known: {sorted(self._subdirs)}"
) from exc
return self._root / subdir
def _path_for(self, category: str, name: str) -> Path:
return self._dir_for(category) / f"{name}.yaml"

View File

@ -0,0 +1,33 @@
"""Embedding provider adapters (one provider per file).
Public surface:
- :class:`EmbeddingProvider` — Protocol every provider satisfies.
- :class:`EmbeddingError` — provider-side failure.
- :class:`OpenAIEmbeddingProvider` — concrete provider for any
OpenAI-protocol embeddings endpoint (DeepInfra, vLLM, OpenAI, …).
- :func:`build_embedding_provider` — settings-driven factory.
External usage::
from everos.component.embedding import build_embedding_provider
provider = build_embedding_provider(settings.embedding)
vec = await provider.embed("hello")
"""
from .accessor import EmbeddingNotConfiguredError as EmbeddingNotConfiguredError
from .accessor import get_embedder as get_embedder
from .factory import build_embedding_provider as build_embedding_provider
from .openai_provider import OpenAIEmbeddingProvider as OpenAIEmbeddingProvider
from .protocol import EmbeddingError as EmbeddingError
from .protocol import EmbeddingProvider as EmbeddingProvider
__all__ = [
"EmbeddingError",
"EmbeddingNotConfiguredError",
"EmbeddingProvider",
"OpenAIEmbeddingProvider",
"build_embedding_provider",
"get_embedder",
]

View File

@ -0,0 +1,48 @@
"""Process-wide embedding provider accessor.
Lazy singleton mirror of :func:`everos.component.llm.get_llm_client`:
first call reads settings and builds the OpenAI-protocol embedding
client; subsequent calls return the cached instance. Strategies and
other components that need a process-wide embedder import this rather
than threading the provider through their constructors.
Raises :class:`EmbeddingNotConfiguredError` when credentials are missing
so misconfiguration surfaces at the call site (or at app startup via a
lifespan provider) instead of silently degrading.
"""
from __future__ import annotations
from everos.config import load_settings
from everos.core.observability.logging import get_logger
from .factory import build_embedding_provider
from .protocol import EmbeddingProvider
logger = get_logger(__name__)
class EmbeddingNotConfiguredError(RuntimeError):
"""Raised when ``settings.embedding`` lacks ``model``/``api_key``/``base_url``."""
_embedder: EmbeddingProvider | None = None
def get_embedder() -> EmbeddingProvider:
"""Return the singleton :class:`EmbeddingProvider`.
Raises:
EmbeddingNotConfiguredError: When required settings fields are
unset. See :func:`build_embedding_provider` for the exact
keys.
"""
global _embedder
if _embedder is not None:
return _embedder
try:
_embedder = build_embedding_provider(load_settings().embedding)
except ValueError as exc:
raise EmbeddingNotConfiguredError(str(exc)) from exc
logger.info("embedder_built")
return _embedder

View File

@ -0,0 +1,56 @@
"""Factory for building an embedding provider from :class:`EmbeddingSettings`."""
from __future__ import annotations
from everos.config import EmbeddingSettings
from .openai_provider import OpenAIEmbeddingProvider
from .protocol import EmbeddingProvider
# Vector dim for the LanceDB index column — see ``17_lancedb_tables_design.md``.
_DEFAULT_DIM = 1024
def build_embedding_provider(
settings: EmbeddingSettings,
*,
dim: int = _DEFAULT_DIM,
) -> EmbeddingProvider:
"""Build an OpenAI-compatible embedding provider from settings.
Args:
settings: The :class:`EmbeddingSettings` slice from
:func:`everos.config.load_settings`.
dim: Target vector dimension; defaults to 1024 to match the
LanceDB ``vector`` column shape.
Returns:
An :class:`EmbeddingProvider` ready to call ``embed`` /
``embed_batch``.
Raises:
ValueError: If ``model``, ``api_key`` or ``base_url`` is unset.
"""
if not settings.model:
raise ValueError(
"Embedding model is not configured "
"(set EVEROS_EMBEDDING__MODEL or [embedding] model in user toml)"
)
if settings.api_key is None:
raise ValueError(
"Embedding api_key is not configured (set EVEROS_EMBEDDING__API_KEY)"
)
if not settings.base_url:
raise ValueError(
"Embedding base_url is not configured (set EVEROS_EMBEDDING__BASE_URL)"
)
return OpenAIEmbeddingProvider(
model=settings.model,
api_key=settings.api_key.get_secret_value(),
base_url=settings.base_url,
dim=dim,
timeout=settings.timeout_seconds,
max_retries=settings.max_retries,
batch_size=settings.batch_size,
max_concurrent=settings.max_concurrent,
)

View File

@ -0,0 +1,98 @@
"""OpenAI-compatible embedding provider.
Wraps :class:`openai.AsyncOpenAI` so any OpenAI-protocol endpoint
(DeepInfra, OpenAI, Together, Fireworks, …) works without per-provider
forks. Self-hosted vLLM also exposes the same shape; the only quirk it
imposes is that the ``dimensions`` request parameter is ignored — we
truncate client-side to ``dim`` so callers always see the declared
shape regardless of backend.
Concurrency model:
- ``embed_batch`` splits the inputs into chunks of ``batch_size``.
- An :class:`asyncio.Semaphore` capped at ``max_concurrent`` bounds
in-flight requests; remaining chunks queue and start as slots free.
- Retries / timeouts come from the openai SDK (``max_retries``,
``timeout`` constructor args).
"""
from __future__ import annotations
import asyncio
from collections.abc import Sequence
import openai
from .protocol import EmbeddingError
class OpenAIEmbeddingProvider:
"""OpenAI-compatible embedding provider with batching + concurrency.
Args:
model: Embedding model id (e.g. ``"Qwen/Qwen3-Embedding-4B"``).
api_key: Bearer credential as a plain ``str``.
base_url: OpenAI-protocol endpoint
(e.g. ``"https://api.deepinfra.com/v1/openai"``).
dim: Target vector dimension. Vectors longer than this are
truncated client-side (matches the LanceDB column shape —
see ``17_lancedb_tables_design.md``).
timeout: Per-request timeout, seconds.
max_retries: Retry budget exposed via the openai SDK.
batch_size: How many inputs per ``/embeddings`` call.
max_concurrent: Cap on in-flight chunked requests.
"""
def __init__(
self,
*,
model: str,
api_key: str,
base_url: str,
dim: int = 1024,
timeout: float = 30.0,
max_retries: int = 3,
batch_size: int = 10,
max_concurrent: int = 5,
) -> None:
self.dim = dim
self._model = model
self._batch_size = batch_size
self._semaphore = asyncio.Semaphore(max_concurrent)
self._client = openai.AsyncOpenAI(
api_key=api_key,
base_url=base_url,
timeout=timeout,
max_retries=max_retries,
)
async def embed(self, text: str) -> list[float]:
"""Embed a single string."""
vectors = await self._embed_chunk([text])
return vectors[0]
async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
"""Embed many strings, preserving input order."""
if not texts:
return []
chunks = [
list(texts[i : i + self._batch_size])
for i in range(0, len(texts), self._batch_size)
]
results = await asyncio.gather(*(self._embed_chunk(chunk) for chunk in chunks))
# gather preserves order across awaitables, and each chunk preserves
# its internal order — so flattening yields the input order back.
return [vec for chunk in results for vec in chunk]
async def _embed_chunk(self, chunk: list[str]) -> list[list[float]]:
"""One ``/embeddings`` call, semaphore-guarded."""
async with self._semaphore:
try:
response = await self._client.embeddings.create(
model=self._model,
input=chunk,
)
except openai.OpenAIError as exc:
raise EmbeddingError(str(exc)) from exc
# OpenAI returns ``data`` indexed by request order; truncate to ``dim``.
return [list(item.embedding[: self.dim]) for item in response.data]

View File

@ -0,0 +1,48 @@
"""Embedding provider protocol.
The cascade worker / retrieval pipeline depend on a single small
contract: turn a string (or list of strings) into a fixed-dimension
vector. Whether the backend is OpenAI, vLLM, DeepInfra, Ollama, or a
local model is the provider's business — the contract is invariant.
"""
from __future__ import annotations
from collections.abc import Sequence
from typing import Protocol, runtime_checkable
class EmbeddingError(Exception):
"""Raised on any provider-side embedding failure.
Wraps the upstream SDK exception via ``__cause__`` (PEP 3134) so
diagnostic loggers preserve the original error chain.
"""
@runtime_checkable
class EmbeddingProvider(Protocol):
"""Async embedding provider contract.
``dim`` is the post-truncation vector dimension every embed call
returns. Providers that don't natively support dimension truncation
must truncate client-side so callers see the declared shape.
"""
dim: int
async def embed(self, text: str) -> list[float]:
"""Embed a single string. Returns a ``[dim]`` vector."""
...
async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
"""Embed a batch of strings preserving input order.
Implementations chunk by ``batch_size`` and bound in-flight
requests by ``max_concurrent`` (both from settings). On failure,
raises :class:`EmbeddingError` — the worker treats it as a
retryable / unrecoverable case per HTTP-status mapping.
"""
...

View File

@ -0,0 +1,45 @@
"""LLM provider adapters (one provider per file, mem0-style).
Public surface:
- :class:`LLMClient` — Protocol every provider satisfies (re-exported
from :mod:`everalgo.llm`; same shape so everos providers can be
handed to everalgo operators).
- :class:`ChatMessage` / :class:`ChatResponse` / :class:`Usage` — chat
payload types (re-exported from :mod:`everalgo.llm`).
- :class:`LLMError` — provider-side failure (re-exported).
- :class:`LLMNotConfiguredError` — raised when credentials are missing.
- :class:`OpenAIProvider` — concrete provider wrapping
``openai.AsyncOpenAI`` against any OpenAI-compatible endpoint.
- :func:`build_llm_provider` — settings-driven factory.
- :func:`get_llm_client` — process-wide lazy singleton accessor.
External usage::
from everos.component.llm import build_llm_provider, LLMClient
provider = build_llm_provider(settings.llm)
"""
from .client import LLMNotConfiguredError as LLMNotConfiguredError
from .client import get_llm_client as get_llm_client
from .client import get_multimodal_llm_client as get_multimodal_llm_client
from .factory import build_llm_provider as build_llm_provider
from .openai_provider import OpenAIProvider as OpenAIProvider
from .protocol import ChatMessage as ChatMessage
from .protocol import ChatResponse as ChatResponse
from .protocol import LLMClient as LLMClient
from .protocol import LLMError as LLMError
from .protocol import Usage as Usage
__all__ = [
"ChatMessage",
"ChatResponse",
"LLMClient",
"LLMError",
"LLMNotConfiguredError",
"OpenAIProvider",
"Usage",
"build_llm_provider",
"get_llm_client",
"get_multimodal_llm_client",
]

View File

@ -0,0 +1,89 @@
"""Process-wide LLM client accessor.
Lazy singleton — first call reads settings and builds the algo LLM
client; subsequent calls return the cached instance. Raises
:class:`LLMNotConfiguredError` when no credentials are present so
misconfiguration surfaces at app startup (via the LLM lifespan
provider) instead of silently failing per-request downstream.
"""
from __future__ import annotations
from everalgo.llm import build_client
from everalgo.llm.config import LLMConfig
from everalgo.llm.protocols import LLMClient
from everos.config import load_settings
from everos.core.observability.logging import get_logger
logger = get_logger(__name__)
class LLMNotConfiguredError(RuntimeError):
"""Raised when ``settings.llm`` is missing ``api_key`` or ``base_url``."""
_llm_client: LLMClient | None = None
_multimodal_client: LLMClient | None = None
def get_llm_client() -> LLMClient:
"""Return the singleton algo LLM client.
Raises:
LLMNotConfiguredError: When ``settings.llm.api_key`` or
``settings.llm.base_url`` is unset.
"""
global _llm_client
if _llm_client is not None:
return _llm_client
llm_cfg = load_settings().llm
api_key = (
llm_cfg.api_key.get_secret_value() if llm_cfg.api_key is not None else None
)
if not api_key or not llm_cfg.base_url:
raise LLMNotConfiguredError(
"LLM is required; set EVEROS_LLM__API_KEY + EVEROS_LLM__BASE_URL"
)
_llm_client = build_client(
LLMConfig(
model=llm_cfg.model,
api_key=api_key,
base_url=llm_cfg.base_url,
)
)
logger.info("llm_client_built", model=llm_cfg.model)
return _llm_client
def get_multimodal_llm_client() -> LLMClient:
"""Return the singleton multimodal LLM client (for everalgo.parser).
Reads the flat ``[multimodal]`` config — kept separate from the main
``[llm]`` so parsing can target a vision/audio-capable endpoint.
Raises:
LLMNotConfiguredError: When ``settings.multimodal.api_key`` or
``settings.multimodal.base_url`` is unset.
"""
global _multimodal_client
if _multimodal_client is not None:
return _multimodal_client
cfg = load_settings().multimodal
api_key = cfg.api_key.get_secret_value() if cfg.api_key is not None else None
if not api_key or not cfg.base_url:
raise LLMNotConfiguredError(
"Multimodal LLM is required for parsing; set "
"EVEROS_MULTIMODAL__API_KEY + EVEROS_MULTIMODAL__BASE_URL"
)
_multimodal_client = build_client(
LLMConfig(
model=cfg.model,
api_key=api_key,
base_url=cfg.base_url,
)
)
logger.info("multimodal_llm_client_built", model=cfg.model)
return _multimodal_client

View File

@ -0,0 +1,45 @@
"""Factory for building an LLM provider from :class:`LLMSettings`."""
from __future__ import annotations
from everos.config import LLMSettings
from .openai_provider import OpenAIProvider
from .protocol import LLMClient
def build_llm_provider(settings: LLMSettings) -> LLMClient:
"""Build an OpenAI-compatible LLM provider from settings.
Unwraps :class:`pydantic.SecretStr` here so downstream callers never
touch the raw key directly. Fails fast if either ``api_key`` or
``base_url`` is missing — caller is expected to set them via
``.env`` / user toml / programmatic init before calling.
Args:
settings: The :class:`LLMSettings` slice from
:func:`everos.config.load_settings`.
Returns:
A provider that structurally satisfies
:class:`everalgo.llm.LLMClient` and can be passed to everalgo
operators via ``llm=``.
Raises:
ValueError: If ``api_key`` or ``base_url`` is unset.
"""
if settings.api_key is None:
raise ValueError(
"LLM api_key is not configured "
"(set EVEROS_LLM__API_KEY or [llm] api_key in user toml)"
)
if not settings.base_url:
raise ValueError(
"LLM base_url is not configured "
"(set EVEROS_LLM__BASE_URL or [llm] base_url in user toml)"
)
return OpenAIProvider(
model=settings.model,
api_key=settings.api_key.get_secret_value(),
base_url=settings.base_url,
)

View File

@ -0,0 +1,114 @@
"""OpenAI-compatible LLM provider for everos.
Implements the :class:`everalgo.llm.LLMClient` structural contract by
wrapping :class:`openai.AsyncOpenAI` — the same backbone everalgo's own
``OpenAICompatClient`` uses, but defined here in everos so the
provider can be constructed from :class:`everos.config.LLMSettings`
and handed to everalgo extractors via the ``llm=`` per-call parameter.
Keeps the provider lean (matches the everalgo minimum-viable shape):
no multi-key rotation, no scenario-level routing, no token-usage
collector — those are deployment concerns layered on top.
"""
from __future__ import annotations
from collections.abc import Mapping
from typing import Any, Literal
import openai
from .protocol import ChatMessage, ChatResponse, LLMError, Usage
class OpenAIProvider:
"""Thin async wrapper over ``openai.AsyncOpenAI``.
Structurally satisfies :class:`everalgo.llm.LLMClient` (PEP 544);
instances can be passed directly to everalgo operators that accept
``llm: LLMClient | None``.
Args:
model: Default model id (override per-call with ``model=`` on
:meth:`chat`).
api_key: Bearer credential. Pass as plain ``str`` — settings
should unwrap :class:`pydantic.SecretStr` at the factory
boundary.
base_url: OpenAI-compatible endpoint (e.g.
``"https://openrouter.ai/api/v1"``).
timeout: Per-request timeout in seconds.
temperature: Default sampling temperature (overridable per call).
max_tokens: Default max-tokens cap (overridable per call).
"""
def __init__(
self,
*,
model: str,
api_key: str,
base_url: str | None = None,
timeout: float = 60.0,
temperature: float = 0.0,
max_tokens: int | None = None,
) -> None:
self._model = model
self._temperature = temperature
self._max_tokens = max_tokens
self._client = openai.AsyncOpenAI(
api_key=api_key,
base_url=base_url,
timeout=timeout,
)
async def chat(
self,
messages: list[ChatMessage],
*,
model: str | None = None,
temperature: float | None = None,
max_tokens: int | None = None,
response_format: Mapping[str, Any] | None = None,
**extra: Any,
) -> ChatResponse:
"""Send a chat completion request and return the parsed response."""
request: dict[str, Any] = {
"model": model or self._model,
"messages": [m.model_dump() for m in messages],
"temperature": (
temperature if temperature is not None else self._temperature
),
}
effective_max = max_tokens if max_tokens is not None else self._max_tokens
if effective_max is not None:
request["max_tokens"] = effective_max
if response_format is not None:
request["response_format"] = dict(response_format)
request.update(extra)
try:
completion = await self._client.chat.completions.create(**request)
except openai.OpenAIError as exc:
raise LLMError(str(exc)) from exc
choice = completion.choices[0]
usage: Usage | None = None
if completion.usage is not None:
usage = Usage(
prompt_tokens=completion.usage.prompt_tokens,
completion_tokens=completion.usage.completion_tokens,
)
return ChatResponse(
content=choice.message.content or "",
model=completion.model,
usage=usage,
finish_reason=_normalise_finish_reason(choice.finish_reason),
raw=None,
)
def _normalise_finish_reason(
value: str | None,
) -> Literal["stop", "length", "content_filter"] | None:
if value in ("stop", "length", "content_filter"):
return value # type: ignore[return-value]
return None

View File

@ -0,0 +1,39 @@
"""LLM client protocol re-export.
The structural contract every everos LLM provider satisfies is the same
:class:`everalgo.llm.LLMClient` Protocol — everos providers must be
pass-through-compatible with the everalgo extractors that accept an
``llm=`` parameter. Re-exporting the type here keeps the import path
stable (``everos.component.llm``) even if the everalgo namespace
shifts later.
The :class:`ChatMessage` / :class:`ChatResponse` / :class:`Usage`
shapes are likewise re-exported so callers can build / inspect chat
payloads without reaching into the everalgo package directly.
"""
from __future__ import annotations
from everalgo.llm import (
ChatMessage as ChatMessage,
)
from everalgo.llm import (
ChatResponse as ChatResponse,
)
from everalgo.llm import (
LLMClient as LLMClient,
)
from everalgo.llm import (
LLMError as LLMError,
)
from everalgo.llm import (
Usage as Usage,
)
__all__ = [
"ChatMessage",
"ChatResponse",
"LLMClient",
"LLMError",
"Usage",
]

View File

@ -0,0 +1,34 @@
"""Rerank provider adapters (one provider per file).
Public surface:
- :class:`RerankProvider` — Protocol every provider satisfies.
- :class:`RerankResult` / :class:`RerankError` — value type + error.
- :class:`DeepInfraRerankProvider` — DeepInfra inference-API rerank.
- :class:`VllmRerankProvider` — OpenAI-compat ``/v1/rerank`` (vLLM,
self-hosted, other compatible servers).
- :func:`build_rerank_provider` — settings-driven factory that picks
the concrete provider via ``settings.rerank.provider``.
External usage::
from everos.component.rerank import build_rerank_provider
provider = build_rerank_provider(settings.rerank)
scored = await provider.rerank("how to file a claim", documents)
"""
from .deepinfra_provider import DeepInfraRerankProvider as DeepInfraRerankProvider
from .factory import build_rerank_provider as build_rerank_provider
from .protocol import RerankError as RerankError
from .protocol import RerankProvider as RerankProvider
from .protocol import RerankResult as RerankResult
from .vllm_provider import VllmRerankProvider as VllmRerankProvider
__all__ = [
"DeepInfraRerankProvider",
"RerankError",
"RerankProvider",
"RerankResult",
"VllmRerankProvider",
"build_rerank_provider",
]

View File

@ -0,0 +1,196 @@
"""DeepInfra inference-API rerank provider.
DeepInfra exposes reranker models (e.g. ``Qwen/Qwen3-Reranker-4B``) at::
POST {base_url}/{model}
Authorization: Bearer <api_key>
Content-Type: application/json
The request shape is the inference-API convention used across DeepInfra
reranker / classifier models:
{
"queries": ["<query>"],
"documents": ["<doc 1>", "<doc 2>", ...]
}
The response carries one ``scores`` array per query:
{
"scores": [[0.12, 0.87, 0.43, ...]],
"request_id": "...",
"inference_status": {...}
}
We submit one query at a time (matches the :class:`RerankProvider`
contract) and unwrap the inner score list. Documents longer than the
model's input window are silently truncated server-side.
"""
from __future__ import annotations
import asyncio
from collections.abc import Sequence
from typing import Any
import httpx
from .protocol import RerankError, RerankResult
# Qwen3-Reranker chat template. The DeepInfra inference API treats the reranker
# as a yes/no generator, so the prompt scaffolding must be supplied client-side
# (verbatim mirror of the EverCore benchmark's reranker client). Without it the
# model scores raw text off-template and returns uncalibrated relevance.
_QWEN3_PREFIX = (
"<|im_start|>system\n"
"Judge whether the Document meets the requirements based on the Query and "
'the Instruct provided. Note that the answer can only be "yes" or "no".'
"<|im_end|>\n<|im_start|>user\n"
)
_QWEN3_SUFFIX = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
_DEFAULT_RERANK_INSTRUCTION = (
"Given a question and a passage, determine if the passage contains "
"information relevant to answering the question."
)
def _format_qwen3_inputs(
query: str, documents: list[str], instruction: str | None
) -> tuple[str, list[str]]:
"""Wrap query + documents in the Qwen3-Reranker chat template."""
instr = instruction or _DEFAULT_RERANK_INSTRUCTION
formatted_query = f"{_QWEN3_PREFIX}<Instruct>: {instr}\n<Query>: {query}\n"
formatted_docs = [f"<Document>: {doc}{_QWEN3_SUFFIX}" for doc in documents]
return formatted_query, formatted_docs
class DeepInfraRerankProvider:
"""Rerank provider for the DeepInfra inference API.
Args:
model: Reranker model id (e.g. ``"Qwen/Qwen3-Reranker-4B"``).
api_key: Bearer credential as plain ``str``.
base_url: Inference endpoint root
(e.g. ``"https://api.deepinfra.com/v1/inference"``). The
``/{model}`` suffix is appended at request time.
timeout: Per-request timeout, seconds.
max_retries: Soft retry count on transport errors / 5xx.
batch_size: Cap on documents per request (large doc lists are
split, scores merged in input order).
max_concurrent: Cap on in-flight requests across all batches.
"""
def __init__(
self,
*,
model: str,
api_key: str,
base_url: str,
timeout: float = 30.0,
max_retries: int = 3,
batch_size: int = 10,
max_concurrent: int = 5,
) -> None:
self._model = model
self._api_key = api_key
self._url = f"{base_url.rstrip('/')}/{model}"
self._timeout = timeout
self._max_retries = max_retries
self._batch_size = batch_size
self._semaphore = asyncio.Semaphore(max_concurrent)
async def rerank(
self,
query: str,
documents: Sequence[str],
*,
instruction: str | None = None,
) -> list[RerankResult]:
"""Score every document against ``query``; return sorted desc."""
if not documents:
return []
chunks: list[tuple[int, list[str]]] = [
(offset, list(documents[offset : offset + self._batch_size]))
for offset in range(0, len(documents), self._batch_size)
]
chunk_scores = await asyncio.gather(
*(self._score_chunk(query, docs, instruction) for _, docs in chunks)
)
scored: list[RerankResult] = []
for (offset, _), scores in zip(chunks, chunk_scores, strict=True):
scored.extend(
RerankResult(index=offset + i, score=score)
for i, score in enumerate(scores)
)
scored.sort(key=lambda r: r.score, reverse=True)
return scored
async def _score_chunk(
self, query: str, documents: list[str], instruction: str | None
) -> list[float]:
formatted_query, formatted_docs = _format_qwen3_inputs(
query, documents, instruction
)
payload: dict[str, Any] = {
"queries": [formatted_query],
"documents": formatted_docs,
}
headers = {
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json",
}
async with self._semaphore:
for attempt in range(self._max_retries + 1):
try:
async with httpx.AsyncClient(timeout=self._timeout) as client:
response = await client.post(
self._url, json=payload, headers=headers
)
except httpx.HTTPError as exc:
if attempt == self._max_retries:
raise RerankError(
f"DeepInfra rerank transport failure: {exc}"
) from exc
continue
if response.status_code == 200:
return _extract_scores(response.json(), len(documents))
# Retry on 5xx / 429 only; surface 4xx immediately.
if response.status_code >= 500 or response.status_code == 429:
if attempt == self._max_retries:
raise RerankError(
f"DeepInfra rerank HTTP {response.status_code}: "
f"{response.text[:200]}"
)
continue
raise RerankError(
f"DeepInfra rerank HTTP {response.status_code}: "
f"{response.text[:200]}"
)
raise RerankError(
f"DeepInfra rerank exhausted retries ({self._max_retries})"
)
def _extract_scores(body: dict[str, Any], expected_len: int) -> list[float]:
"""Unwrap ``scores`` from the DeepInfra response body.
Inference API returns ``scores`` as either:
- ``[[s1, s2, ...]]`` — one score row per query (current single-query
shape); take row 0.
- ``[s1, s2, ...]`` — flat list (fallback for providers that drop
the outer list when only one query is sent).
"""
raw = body.get("scores")
if not isinstance(raw, list):
raise RerankError(f"DeepInfra rerank response missing scores: {body!r}")
row = raw[0] if raw and isinstance(raw[0], list) else raw
if len(row) != expected_len:
raise RerankError(
f"DeepInfra rerank returned {len(row)} scores, expected {expected_len}"
)
return [float(s) for s in row]

View File

@ -0,0 +1,74 @@
"""Factory for building a rerank provider from :class:`RerankSettings`.
The ``provider`` field on :class:`RerankSettings` selects which concrete
implementation to build:
- ``"deepinfra"`` → :class:`DeepInfraRerankProvider`
- ``"vllm"`` → :class:`VllmRerankProvider`
Adding a new provider = one match arm here + one new file under
:mod:`everos.component.rerank`.
"""
from __future__ import annotations
from everos.config import RerankSettings
from .deepinfra_provider import DeepInfraRerankProvider
from .protocol import RerankProvider
from .vllm_provider import VllmRerankProvider
def build_rerank_provider(settings: RerankSettings) -> RerankProvider:
"""Build a rerank provider from settings.
Args:
settings: The :class:`RerankSettings` slice from
:func:`everos.config.load_settings`.
Returns:
A :class:`RerankProvider` ready to call ``rerank``.
Raises:
ValueError: If ``model`` or ``base_url`` is unset, or if
``provider`` does not match a known implementation.
``api_key`` is required for ``deepinfra``; optional (empty
string) for ``vllm`` self-hosted endpoints.
"""
if not settings.model:
raise ValueError(
"Rerank model is not configured "
"(set EVEROS_RERANK__MODEL or [rerank] model in user toml)"
)
if not settings.base_url:
raise ValueError(
"Rerank base_url is not configured (set EVEROS_RERANK__BASE_URL)"
)
api_key = settings.api_key.get_secret_value() if settings.api_key else ""
if settings.provider == "deepinfra":
if not api_key:
raise ValueError(
"DeepInfra rerank api_key is not configured "
"(set EVEROS_RERANK__API_KEY)"
)
return DeepInfraRerankProvider(
model=settings.model,
api_key=api_key,
base_url=settings.base_url,
timeout=settings.timeout_seconds,
max_retries=settings.max_retries,
batch_size=settings.batch_size,
max_concurrent=settings.max_concurrent,
)
if settings.provider == "vllm":
return VllmRerankProvider(
model=settings.model,
api_key=api_key,
base_url=settings.base_url,
timeout=settings.timeout_seconds,
max_retries=settings.max_retries,
batch_size=settings.batch_size,
max_concurrent=settings.max_concurrent,
)
raise ValueError(f"unknown rerank provider: {settings.provider!r}")

View File

@ -0,0 +1,62 @@
"""Rerank provider protocol.
The contract every rerank provider satisfies: given a query and a list
of candidate documents, return a re-ordered list of ``(index, score)``
pairs (highest relevance first). The provider does **not** filter —
that's the caller's job (e.g. drop scores below a threshold, take
``top_k``). Returning every input pair keeps the contract stable
across providers whose backends may not natively support ``top_n``.
"""
from __future__ import annotations
from collections.abc import Sequence
from typing import NamedTuple, Protocol, runtime_checkable
class RerankError(Exception):
"""Raised on any provider-side rerank failure."""
class RerankResult(NamedTuple):
"""One scored document from a rerank call.
``index`` is the position of the document in the *input* list (so
callers can map back to the original document text). ``score`` is
provider-defined; higher = more relevant.
"""
index: int
score: float
@runtime_checkable
class RerankProvider(Protocol):
"""Async rerank provider contract."""
async def rerank(
self,
query: str,
documents: Sequence[str],
*,
instruction: str | None = None,
) -> list[RerankResult]:
"""Score and re-order ``documents`` against ``query``.
Args:
query: The search query.
documents: Passage texts to score against ``query``.
instruction: Task instruction for instruction-tuned rerankers
(e.g. Qwen3-Reranker). Providers that wrap the model's chat
template fold this into the prompt; providers backed by a
dedicated rerank endpoint that handles templating server-side
may ignore it. ``None`` defers to the provider's default.
Returns:
One :class:`RerankResult` per input document, sorted by
``score`` descending. The returned list length equals
``len(documents)``.
"""
...

View File

@ -0,0 +1,173 @@
"""vLLM rerank provider.
Self-deployed vLLM (and other OpenAI-compatible rerank servers) expose
the OpenAI-style rerank endpoint::
POST {base_url}/rerank
Authorization: Bearer <api_key> # optional for self-hosted ("EMPTY")
Content-Type: application/json
Request body:
{
"model": "<model>",
"query": "<query>",
"documents": ["<doc 1>", "<doc 2>", ...]
}
Response body:
{
"results": [
{"index": 0, "relevance_score": 0.87},
{"index": 1, "relevance_score": 0.43},
...
],
"id": "...",
...
}
We pass documents through as-is — caller is responsible for any
prompt-template formatting required by the underlying reranker. Output
ordering may already be score-descending; we sort defensively to honour
the :class:`RerankProvider` contract regardless of server behaviour.
"""
from __future__ import annotations
import asyncio
from collections.abc import Sequence
from typing import Any
import httpx
from .protocol import RerankError, RerankResult
class VllmRerankProvider:
"""Rerank provider for vLLM / OpenAI-compat ``/v1/rerank`` endpoints.
Args:
model: Reranker model id (e.g. ``"Qwen/Qwen3-Reranker-4B"``).
api_key: Bearer credential. Pass ``""`` (empty string) for
self-hosted endpoints that don't require auth — the
``Authorization`` header is omitted in that case.
base_url: API root that *contains* the ``/v1`` prefix
(e.g. ``"http://localhost:8000/v1"``). The ``/rerank``
suffix is appended at request time.
timeout: Per-request timeout, seconds.
max_retries: Soft retry count on transport errors / 5xx.
batch_size: Cap on documents per request.
max_concurrent: Cap on in-flight requests across all batches.
"""
def __init__(
self,
*,
model: str,
api_key: str,
base_url: str,
timeout: float = 30.0,
max_retries: int = 3,
batch_size: int = 10,
max_concurrent: int = 5,
) -> None:
self._model = model
self._api_key = api_key
self._url = f"{base_url.rstrip('/')}/rerank"
self._timeout = timeout
self._max_retries = max_retries
self._batch_size = batch_size
self._semaphore = asyncio.Semaphore(max_concurrent)
async def rerank(
self,
query: str,
documents: Sequence[str],
*,
instruction: str | None = None,
) -> list[RerankResult]:
"""Score every document against ``query``; return sorted desc.
``instruction`` is accepted for protocol parity but not transmitted:
the OpenAI-compatible ``/rerank`` endpoint applies the reranker's chat
template (including any task instruction) server-side, so unlike the
DeepInfra completion-style API there is no client-side template to fill.
"""
if not documents:
return []
chunks: list[tuple[int, list[str]]] = [
(offset, list(documents[offset : offset + self._batch_size]))
for offset in range(0, len(documents), self._batch_size)
]
chunk_results = await asyncio.gather(
*(self._score_chunk(query, docs) for _, docs in chunks)
)
scored: list[RerankResult] = []
for (offset, _), partial in zip(chunks, chunk_results, strict=True):
scored.extend(
RerankResult(index=offset + r.index, score=r.score) for r in partial
)
scored.sort(key=lambda r: r.score, reverse=True)
return scored
async def _score_chunk(
self, query: str, documents: list[str]
) -> list[RerankResult]:
payload: dict[str, Any] = {
"model": self._model,
"query": query,
"documents": documents,
}
headers: dict[str, str] = {"Content-Type": "application/json"}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
async with self._semaphore:
for attempt in range(self._max_retries + 1):
try:
async with httpx.AsyncClient(timeout=self._timeout) as client:
response = await client.post(
self._url, json=payload, headers=headers
)
except httpx.HTTPError as exc:
if attempt == self._max_retries:
raise RerankError(
f"vLLM rerank transport failure: {exc}"
) from exc
continue
if response.status_code == 200:
return _parse_rerank_results(response.json())
if response.status_code >= 500 or response.status_code == 429:
if attempt == self._max_retries:
raise RerankError(
f"vLLM rerank HTTP {response.status_code}: "
f"{response.text[:200]}"
)
continue
raise RerankError(
f"vLLM rerank HTTP {response.status_code}: {response.text[:200]}"
)
raise RerankError(f"vLLM rerank exhausted retries ({self._max_retries})")
def _parse_rerank_results(body: dict[str, Any]) -> list[RerankResult]:
items = body.get("results")
if not isinstance(items, list):
raise RerankError(f"vLLM rerank response missing results: {body!r}")
parsed: list[RerankResult] = []
for item in items:
try:
parsed.append(
RerankResult(
index=int(item["index"]),
score=float(item["relevance_score"]),
)
)
except (KeyError, TypeError, ValueError) as exc:
raise RerankError(f"malformed rerank result entry: {item!r}") from exc
return parsed

View File

@ -0,0 +1,24 @@
"""Tokenizer provider — sync app-layer tokenisation for BM25 indexing.
Public surface:
- :class:`Tokenizer` — Protocol every provider satisfies.
- :class:`JiebaTokenizer` — default jieba-backed implementation.
- :func:`build_tokenizer` — factory returning the configured tokenizer.
External usage::
from everos.component.tokenizer import build_tokenizer
tk = build_tokenizer()
tokens = tk.tokenize("hello 世界") # ['hello', '世界']
"""
from .factory import build_tokenizer as build_tokenizer
from .jieba_provider import JiebaTokenizer as JiebaTokenizer
from .protocol import Tokenizer as Tokenizer
__all__ = [
"JiebaTokenizer",
"Tokenizer",
"build_tokenizer",
]

View File

@ -0,0 +1,17 @@
"""Factory for the cascade-time tokenizer.
Single implementation today (``JiebaTokenizer``). Lifting this into a
factory keeps callers (cascade handler) decoupled from the concrete
choice, so swapping to char-bigram / hf tokenizer later is a one-file
change — see ``17_lancedb_tables_design.md`` §2.4.1.
"""
from __future__ import annotations
from .jieba_provider import JiebaTokenizer
from .protocol import Tokenizer
def build_tokenizer() -> Tokenizer:
"""Build the default tokenizer (``JiebaTokenizer``)."""
return JiebaTokenizer()

View File

@ -0,0 +1,141 @@
"""Jieba-based tokenizer — covers CJK + English mixed content.
Uses ``jieba.cut_for_search`` (search-mode segmentation: yields both the
greedy max-match segment and its finer sub-segments for compound CJK
words). Same mode as the legacy enterprise keyword-search path uses on
the query side — keeping cascade write and search query symmetric is
the hard contract for BM25 recall to work.
After segmentation we drop:
* whitespace / empty tokens (so the join-on-space output stays clean),
* tokens shorter than ``min_token_length`` (default 2 — same threshold
enterprise's ``filter_stopwords(min_length=2)`` uses; single-char
fragments mostly hurt BM25 precision),
* tokens in a small bilingual stopword set (Chinese function words +
English articles / prepositions / aux verbs).
"""
from __future__ import annotations
from collections.abc import Sequence
from typing import Final
import jieba
# Small bilingual stopword set. Intentionally tight (not a full
# Chinese stopword list) so the behaviour is predictable; callers
# tuning recall can subclass / extend.
_DEFAULT_STOPWORDS: Final[frozenset[str]] = frozenset(
{
# English — articles / prepositions / aux verbs that dominate BM25
# idf-noise but add no recall value.
"the",
"a",
"an",
"and",
"or",
"but",
"if",
"of",
"to",
"in",
"on",
"at",
"by",
"for",
"with",
"as",
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"do",
"does",
"did",
"has",
"have",
"had",
"this",
"that",
"these",
"those",
"it",
"its",
# Chinese — function words / particles. ``cut_for_search`` emits
# these as single-char tokens anyway, and the min_length=2 floor
# would drop most; listing them explicitly makes the intent clear
# and is a no-op when min_length filtering also kicks in.
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
}
)
_DEFAULT_MIN_TOKEN_LENGTH: Final[int] = 2
class JiebaTokenizer:
"""Tokenizer that calls into ``jieba.cut_for_search`` and filters."""
def __init__(
self,
*,
min_token_length: int = _DEFAULT_MIN_TOKEN_LENGTH,
extra_stopwords: frozenset[str] | None = None,
) -> None:
# Touching ``jieba.initialize()`` here would force eager dict load
# at import time and balloon test-collection latency. ``jieba.cut*``
# lazy-loads on first call instead.
self._min_len = min_token_length
self._stopwords = (
_DEFAULT_STOPWORDS | extra_stopwords
if extra_stopwords
else _DEFAULT_STOPWORDS
)
def tokenize(self, text: str) -> list[str]:
if not text:
return []
out: list[str] = []
for raw in jieba.cut_for_search(text):
tok = raw.strip().lower()
if not tok or tok.isspace():
continue
if len(tok) < self._min_len:
continue
if tok in self._stopwords:
continue
out.append(tok)
return out
def tokenize_batch(self, texts: Sequence[str]) -> list[list[str]]:
return [self.tokenize(t) for t in texts]

View File

@ -0,0 +1,37 @@
"""Tokenizer protocol.
App-layer tokenisation gates every BM25-indexed field in LanceDB
(``17_lancedb_tables_design.md`` §2.4.1): the source surface form lives
in ``<field>`` while the space-joined token stream lives in
``<field>_tokens``, and the FTS index reads only the latter using a
whitespace tokenizer. Keeping the tokenizer decision in the app layer
means it can swap (jieba → unigram → hf) without re-indexing or
touching LanceDB schemas.
The protocol is sync — every concrete tokenizer in scope today (jieba,
char-bigram, regex word-split) is CPU-bound with no IO, so an async
wrapper would just shuffle work onto the event loop. If a future GPU
or remote tokenizer needs IO it should add an async method explicitly.
"""
from __future__ import annotations
from collections.abc import Sequence
from typing import Protocol, runtime_checkable
@runtime_checkable
class Tokenizer(Protocol):
"""Sync tokeniser contract used by the cascade handler."""
def tokenize(self, text: str) -> list[str]:
"""Return the ordered token list for ``text``.
Implementations must drop empty / whitespace-only tokens so the
resulting space-joined string never carries adjacent spaces.
"""
...
def tokenize_batch(self, texts: Sequence[str]) -> list[list[str]]:
"""Tokenise many strings, preserving input order."""
...

View File

@ -0,0 +1,22 @@
"""Common utilities (datetime, tokenization, etc.).
Public API:
from everos.component.utils.datetime import (
UtcDatetime,
ensure_utc,
from_iso_format,
from_timestamp,
get_now_with_timezone,
get_utc_now,
to_date_str,
to_display_tz,
to_iso_format,
to_timestamp_ms,
today_with_timezone,
)
from everos.component.utils.tokenize import (
tokens_for_index,
tokens_for_query,
join_tokens,
)
"""

View File

@ -0,0 +1,263 @@
"""Timezone-aware datetime helpers.
EverOS follows a **two-zone discipline**:
* **Storage** (SQLite + LanceDB) is always UTC. Use :func:`get_utc_now`
for any ``default_factory`` / write-path timestamp; if you accept a
``datetime`` from a caller, normalise with :func:`ensure_utc` before
it crosses the persistence boundary.
* **Display** (markdown frontmatter, HTTP API response, date buckets for
daily-log filenames) uses the configured "display timezone" from
:attr:`everos.config.MemorySettings.timezone` (``EVEROS_MEMORY__TIMEZONE``).
Use :func:`get_now_with_timezone` / :func:`today_with_timezone` /
:func:`to_display_tz` here.
The display timezone also serves as the **fallback timezone for naive
input**: if a caller hands us a string / datetime without offset (e.g.
a hand-written ISO timestamp), :func:`from_iso_format` attaches the
display timezone before further processing — that matches a human's
intuition ("if I didn't say a zone, you should assume my zone").
Never call :func:`datetime.datetime.now` /
:func:`datetime.datetime.utcnow` directly — see
:doc:`.claude/rules/datetime-handling`.
Cache invalidation in tests::
load_settings.cache_clear()
_display_tz.cache_clear()
"""
from __future__ import annotations
import datetime as _dt
from functools import cache
from typing import Annotated
from zoneinfo import ZoneInfo
from pydantic import AfterValidator
_MS_THRESHOLD = 1e12 # ts >= this is treated as milliseconds
@cache
def _display_tz() -> _dt.tzinfo:
"""Resolve the configured **display timezone** (cached).
Reads :attr:`everos.config.MemorySettings.timezone`; that field
validates the name with :class:`zoneinfo.ZoneInfo` at load time, so
by the time we reach here the value is guaranteed valid. This
timezone governs:
1. ISO output rendered in markdown / API responses.
2. The fallback zone attached to naive-input datetimes.
It does **not** govern storage — see :func:`get_utc_now`.
"""
# Lazy import to avoid pulling in pydantic-settings at module load.
from everos.config import load_settings
return ZoneInfo(load_settings().memory.timezone)
def get_utc_now() -> _dt.datetime:
"""Return the current time as a UTC-aware datetime.
Use for any **storage** write-path (SQLite ``default_factory``,
LanceDB row construction, OME event ``ts``, any internal "when
did this happen" record). Independent of the display timezone — a
new deployment that switches ``EVEROS_MEMORY__TIMEZONE`` will not
misalign existing rows.
Display-side code should use :func:`get_now_with_timezone` instead,
or render via :func:`to_display_tz`.
"""
return _dt.datetime.now(tz=_dt.UTC)
def get_now_with_timezone() -> _dt.datetime:
"""Return the current time in the **display timezone** (configured).
Use for **display** write-paths only — markdown frontmatter values,
daily-log date buckets, places where a human will see the literal
string. The returned datetime carries the display timezone offset
so ``.isoformat()`` produces something like
``2026-05-29T14:00:00+08:00``.
For storage / internal "when did this happen" timestamps use
:func:`get_utc_now` instead — display timezone must not bleed into
persisted rows.
"""
return _dt.datetime.now(tz=_display_tz())
def today_with_timezone() -> _dt.date:
"""Return today's date in the **display timezone**.
Use this anywhere a *date bucket* is needed (e.g. daily-log file
boundaries) — it normalises ``get_now_with_timezone().date()`` so
the timezone fallback rules are applied consistently.
"""
return get_now_with_timezone().date()
def ensure_utc(d: _dt.datetime | None) -> _dt.datetime | None:
"""Normalise any datetime to UTC at the **storage boundary**.
Semantics:
* ``None`` → ``None`` (nullable-column convenience: lets callers
pipe ``ensure_utc(row.last_attempt_at)`` without an outer guard).
* Aware input → ``astimezone(UTC)``.
* **Naive input → assume UTC** (attach ``tzinfo=UTC``); no
display-tz fallback.
Why naive→UTC rather than naive→display→UTC? Every caller of this
function sits at the storage boundary, and the dominant naive
source is SQLite reads: SQLAlchemy strips tz on write so what
comes back is a naive value whose bytes are UTC. Treating those
naive reads as display-tz would drift by the configured offset on
every round trip — exactly the bug Q2 prevents.
Caller-supplied datetimes that may genuinely be naive in display
tz (e.g. ISO strings from HTTP request bodies that omitted the
offset) should be funnelled through :func:`from_iso_format` first,
which encodes the "if you didn't say a zone, assume your zone"
rule. The aware result then passes through ``ensure_utc`` as a
pure ``astimezone(UTC)``.
Use the :data:`UtcDatetime` ``Annotated`` type to apply this
automatically on Pydantic model fields.
"""
if d is None:
return None
if d.tzinfo is None:
return d.replace(tzinfo=_dt.UTC)
return d.astimezone(_dt.UTC)
def to_display_tz(d: _dt.datetime | None) -> _dt.datetime | None:
"""Convert a datetime to the **display timezone** (configured).
Used at the **response render boundary**: any datetime leaving the
system through an API response or markdown body passes through
here so the user sees their wall-clock time with the matching
``+HH:MM`` offset.
* ``None`` → ``None`` (nullable-column convenience).
* Naive input is treated as already display-tz local (the fallback
rule) — attach the zone and return as-is.
* Aware input is ``astimezone(...)``-d to the display tz.
"""
if d is None:
return None
if d.tzinfo is None:
return d.replace(tzinfo=_display_tz())
return d.astimezone(_display_tz())
UtcDatetime = Annotated[_dt.datetime, AfterValidator(ensure_utc)]
"""Pydantic-friendly ``datetime`` type that normalises to UTC.
Apply to any SQLModel / Pydantic ``datetime`` field that maps to a
storage column. Both INSERT default values and post-read values pass
through :func:`ensure_utc`, so SQLite's tz-stripping behaviour is
neutralised: rows go in as UTC and come out as UTC-aware.
Usage::
from everos.component.utils.datetime import UtcDatetime, get_utc_now
class MyRow(BaseTable, table=True):
happened_at: UtcDatetime = Field(default_factory=get_utc_now)
"""
def from_timestamp(ts: int | float) -> _dt.datetime:
"""Parse a Unix timestamp into a timezone-aware datetime.
Auto-detects seconds vs milliseconds: values ``>= 1e12`` are treated as
milliseconds. Returned datetime is in the default timezone.
"""
seconds = ts / 1000.0 if ts >= _MS_THRESHOLD else float(ts)
return _dt.datetime.fromtimestamp(seconds, tz=_display_tz())
def from_iso_format(value: _dt.datetime | int | float | str) -> _dt.datetime:
"""Parse a value into a timezone-aware datetime (strict).
Accepted inputs:
* ``datetime`` — naive values get the default timezone attached.
* ``int`` / ``float`` — Unix timestamp (auto-detect seconds vs ms).
* ``str`` — ISO-8601, including ``"Z"`` suffix for UTC.
Raises:
TypeError: On unsupported input type.
ValueError: On malformed string / negative timestamp.
"""
if isinstance(value, _dt.datetime):
if value.tzinfo is None:
return value.replace(tzinfo=_display_tz())
return value
if isinstance(value, bool): # bool is an int subclass — reject explicitly
raise TypeError("from_iso_format does not accept bool")
if isinstance(value, int | float):
return from_timestamp(value)
if isinstance(value, str):
s = value.strip()
# Python's fromisoformat accepts "+HH:MM" but not the "Z" suffix; map it.
if s.endswith("Z"):
s = s[:-1] + "+00:00"
parsed = _dt.datetime.fromisoformat(s)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=_display_tz())
return parsed
raise TypeError(
f"from_iso_format: unsupported type {type(value).__name__}; "
"expected datetime / int / float / str"
)
def to_iso_format(
value: _dt.datetime | int | float | str | None,
) -> str | None:
"""Render a value as an ISO-8601 string (timezone-aware).
Accepted inputs:
* ``None`` — returns ``None`` (nullable column convenience).
* ``datetime`` — rendered as-is (must already be tz-aware).
* ``int`` / ``float`` — interpreted via :func:`from_timestamp`.
* ``str`` — re-validated through :func:`from_iso_format`.
"""
if value is None:
return None
if isinstance(value, _dt.datetime):
return value.isoformat()
if isinstance(value, bool): # bool is an int subclass
raise TypeError("to_iso_format does not accept bool")
if isinstance(value, int | float):
return from_timestamp(value).isoformat()
if isinstance(value, str):
if not value:
return None
return from_iso_format(value).isoformat()
raise TypeError(
f"to_iso_format: unsupported type {type(value).__name__}; "
"expected datetime / int / float / str / None"
)
def to_date_str(d: _dt.datetime | None) -> str | None:
"""Render the date portion of a datetime as ``YYYY-MM-DD``.
Accepts ``None`` for nullable database columns. When the input is
already a :class:`datetime.date`, call ``d.isoformat()`` directly.
"""
if d is None:
return None
return d.date().isoformat()
def to_timestamp_ms(d: _dt.datetime) -> int:
"""Convert a datetime to a Unix timestamp in milliseconds."""
return int(d.timestamp() * 1000)

View File

@ -0,0 +1,37 @@
"""Configuration data and Settings schema.
Public API:
from everos.config import (
Settings, MemorySettings, SqliteSettings, LanceDBSettings,
LLMSettings, EmbeddingSettings, RerankSettings,
BoundaryDetectionSettings,
load_settings,
)
Distinct from ``everos.component.config`` (which is a *capability* —
loader / merger / env reader).
"""
from .settings import BoundaryDetectionSettings as BoundaryDetectionSettings
from .settings import EmbeddingSettings as EmbeddingSettings
from .settings import LanceDBSettings as LanceDBSettings
from .settings import LLMSettings as LLMSettings
from .settings import MemorySettings as MemorySettings
from .settings import MultimodalSettings as MultimodalSettings
from .settings import RerankSettings as RerankSettings
from .settings import Settings as Settings
from .settings import SqliteSettings as SqliteSettings
from .settings import load_settings as load_settings
__all__ = [
"BoundaryDetectionSettings",
"EmbeddingSettings",
"LLMSettings",
"LanceDBSettings",
"MemorySettings",
"MultimodalSettings",
"RerankSettings",
"Settings",
"SqliteSettings",
"load_settings",
]

View File

@ -0,0 +1,137 @@
# everos default configuration.
#
# Lookup order (later overrides earlier):
# 1. This file (shipped defaults; lowest priority)
# 2. ~/.everos/config.toml — user-level overrides (optional;
# path is overridable via EVEROS_CONFIG_FILE)
# 3. .env file in the working directory
# 4. Environment variables — EVEROS_<SECTION>__<KEY>
# e.g. EVEROS_SQLITE__BUSY_TIMEOUT_MS=10000
# 5. Programmatic init args (highest priority)
#
# `null` (omitted in TOML) means "use the Pydantic default declared in code".
[memory]
# memory-root is the single directory holding all persisted memory.
# `~` is expanded; the path is resolved when MemoryRoot is constructed.
root = "~/.everos"
# Effective timezone for date buckets and timestamps. Drives
# component.utils.datetime; this is the SOLE source — OS `TZ` is not
# read. Override via `EVEROS_MEMORY__TIMEZONE` env var if needed.
timezone = "UTC"
[api]
# HTTP server bind for ``everos server start``. Default ``127.0.0.1``
# keeps the API on loopback only — EverOS ships no built-in auth (see
# SECURITY.md threat model). Only set ``host = "0.0.0.0"`` after you
# have placed your own gateway / auth layer in front of the server.
# Override via EVEROS_API__HOST and EVEROS_API__PORT.
host = "127.0.0.1"
port = 8000
[sqlite]
# PRAGMA journal_mode — WAL is the recommended high-concurrency mode.
journal_mode = "WAL"
# PRAGMA synchronous — NORMAL is safe under WAL and ~2x faster than FULL.
synchronous = "NORMAL"
# PRAGMA foreign_keys — must be explicitly enabled per connection.
foreign_keys = true
# PRAGMA temp_store — MEMORY keeps query intermediates in RAM (no IO impact
# on durability — only affects sort/group/temp-table calculation buffers).
temp_store = "MEMORY"
# PRAGMA busy_timeout — milliseconds to wait on a locked DB before erroring.
busy_timeout_ms = 5000
# PRAGMA journal_size_limit — cap WAL/journal at ~64 MB.
journal_size_limit_bytes = 67108864
# PRAGMA cache_size — KB of page cache (per connection).
cache_size_kb = 2048
[lancedb]
# Read consistency interval in seconds.
# omitted / null -> no consistency check (fastest reads)
# 0 -> strict (every read checks updates)
# >0 -> eventual (interval seconds between checks)
# Uncomment to override:
# read_consistency_seconds = 5.0
[llm]
# Provider-agnostic OpenAI-protocol client config. Override via env:
# EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL
# Or via a ``.env`` file next to the project root (auto-loaded).
model = "gpt-4o-mini"
# api_key = ""
# base_url = ""
[multimodal]
# Independent LLM for multimodal parsing (everalgo-parser); must accept
# image / pdf / audio image_url parts. Override via env:
# EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL
model = "google/gemini-3-flash-preview"
max_concurrency = 4
# api_key = ""
# base_url = ""
# file:// content-item support (read locally by EverOS, not everalgo).
# file_uri_allow_dirs: empty = allow any readable file (local-first default);
# list base dirs to confine reads when the API is exposed.
# file_uri_allow_dirs = ["/srv/uploads"]
# file_uri_max_bytes = 52428800 # 50 MiB cap per file:// asset
[embedding]
# OpenAI-compatible embedding endpoint. Override via env:
# EVEROS_EMBEDDING__MODEL, EVEROS_EMBEDDING__API_KEY, EVEROS_EMBEDDING__BASE_URL
# model / api_key / base_url have no shipped defaults — must be set
# (env or user toml) before the embedding capability is used.
# model = "Qwen/Qwen3-Embedding-4B"
# api_key = ""
# base_url = "https://api.example.com/v1"
timeout_seconds = 30.0
max_retries = 3
batch_size = 10
max_concurrent = 5
[rerank]
# Rerank provider. Override via env:
# EVEROS_RERANK__PROVIDER, EVEROS_RERANK__MODEL, EVEROS_RERANK__API_KEY,
# EVEROS_RERANK__BASE_URL
# `provider` picks the request-shape:
# - "deepinfra" -> POST {base_url}/{model} (DeepInfra inference API)
# - "vllm" -> POST {base_url}/rerank (OpenAI-compat rerank endpoint)
provider = "deepinfra"
# model = "Qwen/Qwen3-Reranker-4B"
# api_key = ""
# base_url = "https://api.deepinfra.com/v1/inference"
timeout_seconds = 30.0
max_retries = 3
batch_size = 10
max_concurrent = 5
[boundary_detection]
# Passed through to ``everalgo.BoundaryDetector.adetect``.
hard_token_limit = 65536
hard_msg_limit = 500
[search]
# Vector retrieval strategy when SearchMethod.VECTOR is selected.
# "maxsim_atomic" (default): ANN over atomic_fact.vector (pool=top_k*20),
# max-pool the per-fact cosine by parent memcell, then reverse-resolve
# to episode rows. MaxSim over atomic facts; +0.6pp over the legacy
# episode-vector path on LoCoMo, at the cost of one extra LanceDB scan.
# "episode": single-vector ANN over episode.vector (legacy path).
# Override via EVEROS_SEARCH__VECTOR_STRATEGY.
vector_strategy = "maxsim_atomic"
[memorize]
# Conversation mode. Selects the boundary detector and which pipelines run:
# "chat" -> BoundaryDetector + user_memory only
# "agent" -> AgentBoundaryDetector + user_memory + agent_memory
# A single service process serves one mode at a time; switching mode
# requires a restart. Override via EVEROS_MEMORIZE__MODE.
mode = "agent"
# Maximum wall-clock for one memorize() invocation while holding the
# per-session lock. On timeout the outer asyncio.timeout cancels the call
# and the lock auto-releases so subsequent concurrent /add on the same
# session aren't deadlocked. Covers boundary LLM + memcell writes +
# synchronous portion of pipeline dispatch.
# Override via EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS.
session_lock_timeout_seconds = 360.0

View File

@ -0,0 +1,64 @@
# everos OME (Offline Memory Engine) — per-strategy overrides.
#
# This file is materialised at ``<memory-root>/ome.toml`` by
# ``MemoryRoot.ensure()`` on first server start. Edit it to toggle
# individual strategies or tweak their gate / retry / cron without
# restarting the server; the engine watches this file and hot-reloads
# changes within ~2 seconds. Re-running ``ensure()`` will NOT overwrite
# your edits — the file is only materialised when absent.
#
# Overrides are partial: only the keys you set replace the in-code
# defaults; omitted keys keep each strategy's coded value. Unknown
# keys (typos) raise StartupValidationError, so you cannot silently
# misconfigure a strategy.
#
# Per-strategy schema (StrategyOverride):
# enabled = bool # disable a strategy entirely
# max_retries = int >= 0 # re-fire on failure
# cron = str # replace the @cron(...) trigger
# idle_seconds = int > 0 # replace @idle(...) idle window
# scan_interval_seconds = int > 0 # paired with idle_seconds; must be <= idle/2
# [strategies.<name>.gate] # only valid on @counter-gated strategies
# threshold = int > 0 # counter trigger threshold
# cooldown_seconds = int >= 0 # min seconds between fires
# event_field = str # dispatch field for counter increment
# ── User-memory pipeline ────────────────────────────────────────────────
# Atomic fact extraction (runs per memcell). Disable to skip fact mining.
# [strategies.extract_atomic_facts]
# enabled = true
# Foresight extraction (runs per memcell). Heavy LLM call — common to
# disable in evaluation / benchmark runs.
# [strategies.extract_foresight]
# enabled = false
# Profile clustering trigger (counter-gated; fires once N user memcells
# accumulate). Lower the threshold to cluster more aggressively.
# [strategies.trigger_profile_clustering]
# enabled = true
# [strategies.trigger_profile_clustering.gate]
# threshold = 5
# User-profile extraction (runs after clustering trigger fires). Common
# to disable in evaluation runs where ground-truth profiles aren't measured.
# [strategies.extract_user_profile]
# enabled = false
# ── Agent-memory pipeline ───────────────────────────────────────────────
# Agent case extraction (runs per agent memcell). One per tool call cycle.
# [strategies.extract_agent_case]
# enabled = true
# Skill clustering trigger (counter-gated; fires once N agent cases
# accumulate per agent).
# [strategies.trigger_skill_clustering]
# enabled = true
# [strategies.trigger_skill_clustering.gate]
# threshold = 5
# Agent skill extraction (runs after skill clustering trigger fires).
# [strategies.extract_agent_skill]
# enabled = true

View File

View File

@ -0,0 +1,20 @@
# Custom prompt slot for BoundaryDetector.adetect.
#
# Default behaviour
# Leave this slot disabled (``enabled: false``). The pipeline will pass
# ``prompt=None`` through to algo, which falls back to the everalgo
# bundled default prompt — see:
# ~/everalgo/packages/everalgo-boundary/src/everalgo/boundary/prompts/en/chat.py
# (constant ``CHAT_BOUNDARY_DETECT_PROMPT_EN``)
#
# To customise
# 1. Read the algo default at the path above; note the required
# placeholders ``{messages}`` and ``{token_count}``.
# 2. Replace the ``template`` body below with your prompt.
# 3. Flip ``enabled`` to ``true``.
#
# When ``enabled: false`` or ``template`` is empty, the pipeline sends
# ``prompt=None`` and the algo default is used (zero override cost).
enabled: false
template: ""

View File

@ -0,0 +1,23 @@
# Custom prompt slot for EpisodeExtractor.aextract.
#
# Default behaviour
# Leave this slot disabled (``enabled: false``). The pipeline will pass
# ``prompt=None`` through to algo, which falls back to the everalgo
# bundled default prompt — see:
# everalgo/user_memory/prompts/en/episode.py
# (the pipeline calls ``aextract`` with ``sender_id=None``, so the
# whole-memcell ``EPISODE_GENERATION_PROMPT`` is used, not the
# per-user ``USER_EPISODE_GENERATION_PROMPT``)
#
# To customise
# 1. Read the algo default at the path above; note the required
# placeholders ``{conversation_start_time}``, ``{conversation}`` and
# ``{custom_instructions}``.
# 2. Replace the ``template`` body below with your prompt.
# 3. Flip ``enabled`` to ``true``.
#
# When ``enabled: false`` or ``template`` is empty, the pipeline sends
# ``prompt=None`` and the algo default is used (zero override cost).
enabled: false
template: ""

View File

@ -0,0 +1,403 @@
"""Application settings.
Loaded by :func:`load_settings`. Source priority (later wins):
1. ``config/default.toml`` (shipped values; lowest priority)
2. ``~/.everos/config.toml`` (user-level overrides; optional)
3. ``.env`` file in the working directory (secrets / machine-specific)
4. ``EVEROS_<SECTION>__<KEY>`` environment variables
5. Init args passed programmatically (highest priority)
The user-level toml path defaults to ``~/.everos/config.toml``. Override
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
optional — if it does not exist, the source is silently skipped.
The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
call. Tests that mutate environment variables must call
``load_settings.cache_clear()`` after the mutation to invalidate.
"""
from __future__ import annotations
import os
from functools import cache
from pathlib import Path
from typing import Literal
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from pydantic import BaseModel, Field, SecretStr, field_validator
from pydantic_settings import (
BaseSettings,
PydanticBaseSettingsSource,
SettingsConfigDict,
TomlConfigSettingsSource,
)
_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
def _resolve_user_toml_path() -> Path:
"""Resolve the user-level ``config.toml`` path.
Defaults to ``~/.everos/config.toml``; override with the
``EVEROS_CONFIG_FILE`` environment variable.
"""
override = os.environ.get(_USER_TOML_ENV_VAR)
return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
class MemorySettings(BaseModel):
"""memory-root configuration."""
root: Path = Path("~/.everos")
timezone: str = "UTC"
"""Effective timezone for date buckets and timestamps.
Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
:class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
fast (no silent fallback). This is the **sole** source of truth for
the project's effective timezone — the OS ``TZ`` env var is *not*
consulted, keeping the configuration deterministic.
"""
@field_validator("timezone")
@classmethod
def _validate_timezone(cls, v: str) -> str:
try:
ZoneInfo(v)
except (ZoneInfoNotFoundError, ValueError) as exc:
raise ValueError(f"invalid timezone: {v!r}") from exc
return v
class ApiSettings(BaseModel):
"""HTTP API server bind configuration.
Default ``host = "127.0.0.1"`` keeps the server on loopback only,
matching the threat model in ``SECURITY.md``: EverOS ships **no
built-in authentication**, so binding to a routable interface
(``0.0.0.0`` etc.) without your own gateway / auth layer in front
is unsupported.
Env binding:
EVEROS_API__HOST
EVEROS_API__PORT
"""
host: str = "127.0.0.1"
port: int = Field(default=8000, ge=1, le=65535)
class SqliteSettings(BaseModel):
"""SQLite tunables applied as PRAGMAs on every new connection."""
journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
"WAL"
)
synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
foreign_keys: bool = True
temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
busy_timeout_ms: int = Field(default=5000, ge=0)
journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
cache_size_kb: int = Field(default=2048, ge=0)
class LLMSettings(BaseModel):
"""LLM client configuration.
Read by the service layer when lazily constructing the LLM client
handed to algo extractors. Provider-agnostic field names — the
project follows the OpenAI API protocol so any OpenAI-compatible
endpoint plugs in via ``base_url``.
Env binding (via parent ``Settings``):
EVEROS_LLM__MODEL
EVEROS_LLM__API_KEY
EVEROS_LLM__BASE_URL
"""
model: str = "gpt-4o-mini"
api_key: SecretStr | None = None
base_url: str | None = None
class MultimodalSettings(BaseModel):
"""Multimodal parsing LLM config (everalgo-parser).
Flat section mirroring ``[llm]``. The model must accept multimodal
``image_url`` parts (image / pdf / audio); it is kept independent from
the main ``[llm]`` so parsing can target a vision/audio-capable
endpoint without affecting boundary / extraction.
Env binding (via parent ``Settings``):
EVEROS_MULTIMODAL__MODEL
EVEROS_MULTIMODAL__API_KEY
EVEROS_MULTIMODAL__BASE_URL
EVEROS_MULTIMODAL__MAX_CONCURRENCY
EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
"""
model: str = "google/gemini-3-flash-preview"
api_key: SecretStr | None = None
base_url: str | None = None
max_concurrency: int = 4
# ``file://`` content-item support (read locally by EverOS, not everalgo).
file_uri_allow_dirs: list[str] = []
"""Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
file (local-first default); set to confine reads when the API is exposed."""
file_uri_max_bytes: int = 50 * 1024 * 1024
"""Max size (bytes) of a ``file://`` asset; larger files are rejected."""
class EmbeddingSettings(BaseModel):
"""Embedding client configuration.
OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
``base_url`` are required at runtime when the embedding capability
is enabled; the runtime knobs (``timeout`` etc.) have sensible
defaults.
Env binding:
EVEROS_EMBEDDING__MODEL
EVEROS_EMBEDDING__API_KEY
EVEROS_EMBEDDING__BASE_URL
EVEROS_EMBEDDING__TIMEOUT_SECONDS
EVEROS_EMBEDDING__MAX_RETRIES
EVEROS_EMBEDDING__BATCH_SIZE
EVEROS_EMBEDDING__MAX_CONCURRENT
"""
model: str | None = None
api_key: SecretStr | None = None
base_url: str | None = None
timeout_seconds: float = Field(default=30.0, gt=0)
max_retries: int = Field(default=3, ge=0)
batch_size: int = Field(default=10, ge=1)
max_concurrent: int = Field(default=5, ge=1)
class RerankSettings(BaseModel):
"""Rerank client configuration.
Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
schemas differ between providers — DeepInfra uses ``POST {base_url}/
{model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
with ``{model, query, documents}``. ``provider`` picks which client
implementation the factory builds.
Env binding:
EVEROS_RERANK__PROVIDER
EVEROS_RERANK__MODEL
EVEROS_RERANK__API_KEY
EVEROS_RERANK__BASE_URL
EVEROS_RERANK__TIMEOUT_SECONDS
EVEROS_RERANK__MAX_RETRIES
EVEROS_RERANK__BATCH_SIZE
EVEROS_RERANK__MAX_CONCURRENT
"""
provider: Literal["deepinfra", "vllm"] = "deepinfra"
model: str | None = None
api_key: SecretStr | None = None
base_url: str | None = None
timeout_seconds: float = Field(default=30.0, gt=0)
max_retries: int = Field(default=3, ge=0)
batch_size: int = Field(default=10, ge=1)
max_concurrent: int = Field(default=5, ge=1)
class BoundaryDetectionSettings(BaseModel):
"""Hard limits passed through to ``everalgo`` BoundaryDetector."""
hard_token_limit: int = Field(default=65536, ge=1)
hard_msg_limit: int = Field(default=500, ge=1)
class MemorizeSettings(BaseModel):
"""Memorize use-case configuration.
``mode`` selects which boundary detector runs and which pipelines are
dispatched. A service process serves one mode at a time; toggling
requires a restart.
- ``"chat"`` -> ``everalgo.user_memory.BoundaryDetector`` and only the
user-memory pipeline runs.
- ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
both user-memory + agent-memory pipelines run.
``session_lock_timeout_seconds`` caps how long one ``memorize()``
invocation can hold the per-session lock. Covers boundary LLM call +
memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
a stuck LLM from deadlocking subsequent concurrent calls on the same
session_id: on timeout the outer ``asyncio.timeout`` cancels the task
and the lock auto-releases.
Env binding:
EVEROS_MEMORIZE__MODE
EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
"""
mode: Literal["chat", "agent"] = "agent"
session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
class SearchSettings(BaseModel):
"""Search-pipeline policy knobs.
``vector_strategy`` selects the read path taken by
``SearchMethod.VECTOR``:
- ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
(recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
cosine by parent memcell, then reverse-resolve the top memcells back
to episode rows. MaxSim over atomic facts; trades one extra LanceDB
scan for finer-grained semantic match on long episodes.
- ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
per episode = the embedded Content section). The legacy path; kept
so deployments can opt out via env.
Env binding:
EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
"""
vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
class LanceDBSettings(BaseModel):
"""LanceDB tunables.
``read_consistency_seconds``:
``None`` (omitted) → no consistency check (highest performance).
``0`` → strict consistency (every read).
``>0`` → eventual (interval between checks).
``index_cache_size_bytes``:
Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
in lance crate). Each cached entry is one opened FTS / vector /
scalar index reader and **holds the file descriptors of its on-disk
``_indices/<uuid>/...`` files**.
LanceDB's own default is ``None`` (unbounded), which on a long-
running daemon means every new index UUID created by an
``optimize()`` call adds a fresh reader to the cache, and its
FDs are never released — they leak monotonically until
``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
of 256 / Linux's 1024.
Setting a byte cap turns the cache into a real LRU: when it
exceeds the cap, the oldest readers are dropped, Rust ``Drop``
runs ``close(fd)``, and the FD pressure resolves itself.
Cap → steady-state FD upper bound (measured under 30 add+optimize
cycles with the real ``Episode`` schema and 100-query stress):
=========== ================= ===================
cap FD upper bound query latency (100q)
=========== ================= ===================
``2 MB`` ~45 ~5 ms
``4 MB`` ~52 ~3 ms
``8 MB`` ~140 ~2.4 ms
``16 MB`` ~290 ~2.3 ms ← default
``32 MB`` ~630 ~1.4 ms
``unbound`` >960 (leaks) ~1.3 ms
=========== ================= ===================
EverOS's measured steady-state working set after a 12 h
``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
(5 tables × ~7 BM25 columns × ~10 part_N entries each), so
``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
yet-evicted readers, while the FD ceiling (~290) stays well below
common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
Linux default 1024 is fine out of the box).
Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
working set is much larger (heavier table count or much wider
indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
boxes).
Note: the *metadata* cache (``metadata_cache_size_bytes``) is
**not** exposed — experiment showed it caches in-memory parsed
manifests / fragment stats with zero impact on FD count; leaving
it unbounded (lancedb default) is fine.
"""
read_consistency_seconds: float | None = None
index_cache_size_bytes: int = 16 * 1024 * 1024
class Settings(BaseSettings):
"""Top-level application settings."""
memory: MemorySettings = MemorySettings()
api: ApiSettings = ApiSettings()
sqlite: SqliteSettings = SqliteSettings()
lancedb: LanceDBSettings = LanceDBSettings()
llm: LLMSettings = LLMSettings()
embedding: EmbeddingSettings = EmbeddingSettings()
rerank: RerankSettings = RerankSettings()
boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
memorize: MemorizeSettings = MemorizeSettings()
search: SearchSettings = SearchSettings()
multimodal: MultimodalSettings = MultimodalSettings()
model_config = SettingsConfigDict(
env_prefix="EVEROS_",
env_nested_delimiter="__",
env_file=".env",
env_file_encoding="utf-8",
toml_file=_DEFAULT_TOML_PATH,
extra="ignore",
)
@classmethod
def settings_customise_sources(
cls,
settings_cls: type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> tuple[PydanticBaseSettingsSource, ...]:
"""Layer TOML sources between env / dotenv and the secret store.
Order (earlier wins in pydantic-settings):
init_args > env > .env > user_toml > default_toml > secrets
The user-level toml (default ``~/.everos/config.toml``) is only
registered when the file exists, so the source list stays tight.
"""
sources: list[PydanticBaseSettingsSource] = [
init_settings,
env_settings,
dotenv_settings,
]
user_toml_path = _resolve_user_toml_path()
if user_toml_path.is_file():
sources.append(
TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
)
sources.append(TomlConfigSettingsSource(settings_cls))
sources.append(file_secret_settings)
return tuple(sources)
@cache
def load_settings() -> Settings:
"""Load settings from default.toml + environment variables (cached).
Cached at the module level — every caller sees the same instance until
something explicitly clears the cache (``load_settings.cache_clear()``).
Tests that monkeypatch environment variables must call
``cache_clear`` after each mutation to pick the new env up.
"""
return Settings()

View File

View File

33
src/everos/core/errors.py Normal file
View File

@ -0,0 +1,33 @@
"""Cross-cutting domain errors surfaced to API callers.
These live in ``core`` so the ``memory`` layer can raise them and the
``entrypoints`` layer can catch them without crossing the layered import
boundary — ``any -> core`` is the only edge both share (entrypoints must
not import ``memory`` directly).
"""
from __future__ import annotations
class MultimodalError(Exception):
"""Base for multimodal-parsing errors meant to reach the caller.
The API layer maps any ``MultimodalError`` to an aligned
``{error: {code, message}}`` envelope (HTTP 415).
"""
class UnsupportedModalityError(MultimodalError):
"""everalgo cannot handle this modality (e.g. video stub, unknown type).
Wraps everalgo's ``NotImplementedError`` / dispatch ``ValueError`` so the
caller gets a stable, aligned error instead of a raw 500.
"""
class MultimodalNotEnabledError(MultimodalError):
"""Multimodal capability is not ready.
Raised when the ``everos[multimodal]`` extra is not installed, or when a
required system dependency (LibreOffice for Office documents) is absent.
"""

View File

@ -0,0 +1,27 @@
"""Application lifespan composition (chassis only).
This subpackage holds the *generic* lifespan machinery — the
:class:`LifespanProvider` ABC, :func:`build_lifespan` factory, and
chassis-level providers that are independent of any storage backend
(observability metrics, etc.). Concrete storage-backend providers
(SQLite / LanceDB) live next to the entrypoint that composes them
(see :mod:`everos.entrypoints.api.lifespans`) so ``core`` stays free
of concrete-backend imports.
External usage:
from everos.core.lifespan import (
LifespanProvider,
MetricsLifespanProvider,
build_lifespan,
)
"""
from .base import LifespanProvider as LifespanProvider
from .factory import build_lifespan as build_lifespan
from .metrics_lifespan import MetricsLifespanProvider as MetricsLifespanProvider
__all__ = [
"LifespanProvider",
"MetricsLifespanProvider",
"build_lifespan",
]

View File

@ -0,0 +1,30 @@
"""Lifespan provider abstract base.
A LifespanProvider is one unit of startup / shutdown work invoked by the
FastAPI lifespan factory. Providers are registered explicitly (no DI
auto-discovery) and executed in ``order`` ascending on startup, reverse
on shutdown.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any
from fastapi import FastAPI
class LifespanProvider(ABC):
"""One unit of startup / shutdown work."""
def __init__(self, name: str, order: int = 0) -> None:
self.name = name
self.order = order
@abstractmethod
async def startup(self, app: FastAPI) -> Any:
"""Startup hook; return value is stored on ``app.state.lifespan_data[name]``."""
@abstractmethod
async def shutdown(self, app: FastAPI) -> None:
"""Shutdown hook; called in reverse order during application teardown."""

View File

@ -0,0 +1,57 @@
"""Lifespan composition factory.
Builds a FastAPI lifespan context manager from an explicit list of
LifespanProvider instances.
"""
from __future__ import annotations
from collections.abc import AsyncIterator, Callable, Sequence
from contextlib import asynccontextmanager
from fastapi import FastAPI
from everos.core.observability.logging import get_logger
from .base import LifespanProvider
logger = get_logger(__name__)
def build_lifespan(
providers: Sequence[LifespanProvider],
) -> Callable[[FastAPI], AsyncIterator[None]]:
"""Compose providers into a FastAPI lifespan context manager.
Providers are run in ``order`` ascending on startup and reverse on
shutdown. A non-None return value from ``startup`` is stored under
``app.state.lifespan_data[provider.name]``.
"""
sorted_providers = sorted(providers, key=lambda p: p.order)
@asynccontextmanager
async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
lifespan_data: dict[str, object] = {}
try:
for provider in sorted_providers:
logger.info(
"lifespan_provider_startup",
name=provider.name,
order=provider.order,
)
result = await provider.startup(app)
if result is not None:
lifespan_data[provider.name] = result
app.state.lifespan_data = lifespan_data
yield
finally:
for provider in reversed(sorted_providers):
try:
logger.info("lifespan_provider_shutdown", name=provider.name)
await provider.shutdown(app)
except Exception:
logger.exception(
"lifespan_provider_shutdown_failed", name=provider.name
)
return _lifespan

View File

@ -0,0 +1,36 @@
"""Metrics lifespan provider.
Confirms the metrics registry is ready and logs that the ``/metrics`` HTTP
endpoint is mounted on the main API. Kept as a placeholder to demonstrate
the lifespan pattern; replace or extend with a standalone metrics server
(e.g. ``prometheus_client.start_http_server`` on a separate port) if you
need to expose metrics on a dedicated socket.
"""
from __future__ import annotations
from typing import Any
from fastapi import FastAPI
from everos.core.observability.logging import get_logger
from everos.core.observability.metrics import get_metrics_registry
from .base import LifespanProvider
logger = get_logger(__name__)
class MetricsLifespanProvider(LifespanProvider):
"""No-op startup that warms the metrics registry and logs readiness."""
def __init__(self, order: int = 5) -> None:
super().__init__(name="metrics", order=order)
async def startup(self, app: FastAPI) -> Any:
registry = get_metrics_registry()
logger.info("metrics_registry_ready", endpoint="/metrics")
return registry
async def shutdown(self, app: FastAPI) -> None:
logger.info("metrics_lifespan_shutdown")

View File

@ -0,0 +1,31 @@
"""Cross-cutting HTTP middleware components.
External usage:
from everos.core.middleware import (
DEFAULT_CORS_ALLOW_CREDENTIALS,
DEFAULT_CORS_ALLOW_HEADERS,
DEFAULT_CORS_ALLOW_METHODS,
DEFAULT_CORS_ORIGINS,
ProfileMiddleware,
PrometheusMiddleware,
global_exception_handler,
)
"""
from .cors import DEFAULT_CORS_ALLOW_CREDENTIALS as DEFAULT_CORS_ALLOW_CREDENTIALS
from .cors import DEFAULT_CORS_ALLOW_HEADERS as DEFAULT_CORS_ALLOW_HEADERS
from .cors import DEFAULT_CORS_ALLOW_METHODS as DEFAULT_CORS_ALLOW_METHODS
from .cors import DEFAULT_CORS_ORIGINS as DEFAULT_CORS_ORIGINS
from .global_exception import global_exception_handler as global_exception_handler
from .profile import ProfileMiddleware as ProfileMiddleware
from .prometheus import PrometheusMiddleware as PrometheusMiddleware
__all__ = [
"DEFAULT_CORS_ALLOW_CREDENTIALS",
"DEFAULT_CORS_ALLOW_HEADERS",
"DEFAULT_CORS_ALLOW_METHODS",
"DEFAULT_CORS_ORIGINS",
"ProfileMiddleware",
"PrometheusMiddleware",
"global_exception_handler",
]

View File

@ -0,0 +1,12 @@
"""CORS configuration defaults.
The CORS middleware itself is FastAPI's stock ``CORSMiddleware``; this module
centralises the default policy values used by the application factory.
"""
from __future__ import annotations
DEFAULT_CORS_ALLOW_CREDENTIALS: bool = True
DEFAULT_CORS_ALLOW_HEADERS: list[str] = ["*"]
DEFAULT_CORS_ALLOW_METHODS: list[str] = ["*"]
DEFAULT_CORS_ORIGINS: list[str] = ["*"]

View File

@ -0,0 +1,143 @@
"""Global exception handler — uniform error envelope per v1 API brief §1.
Envelope shape (matches the v1 API brief §1 — ``request_id`` at the top
level alongside ``error``; the ``error`` object carries ``code`` /
``message`` plus ops-friendly ``timestamp`` / ``path`` for debugging)::
{
"request_id": "<32 lowercase hex chars — W3C trace_id format>",
"error": {
"code": "HTTP_ERROR" | "SYSTEM_ERROR",
"message": "<reason>",
"timestamp": "<ISO 8601 with tz>",
"path": "<request path>"
}
}
Rules:
- 4xx (DTO / business validation / HTTPException) → ``code="HTTP_ERROR"``
with the human-readable reason in ``message``.
- 5xx (unhandled exception) → ``code="SYSTEM_ERROR"`` with a fixed
``message="Internal server error"`` — internal exception details are
logged but never leak to the client.
- ``request_id`` is sourced from ``request.state.request_id`` (set by
upstream middleware); falls back to a freshly minted id when absent.
"""
from __future__ import annotations
from fastapi import HTTPException, Request
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse
from starlette.status import (
HTTP_422_UNPROCESSABLE_ENTITY,
HTTP_500_INTERNAL_SERVER_ERROR,
)
from everos.component.utils.datetime import (
get_now_with_timezone,
to_iso_format,
)
from everos.core.observability.logging import get_logger
from everos.core.observability.tracing import gen_request_id
logger = get_logger(__name__)
_INTERNAL_ERROR_MESSAGE = "Internal server error"
def _request_id(request: Request) -> str:
"""Return the request_id set by middleware, or mint a fresh fallback."""
rid = getattr(request.state, "request_id", None)
if rid:
return str(rid)
return gen_request_id()
def _envelope(
*,
code: str,
message: str,
request: Request,
) -> dict[str, object]:
"""Build the canonical error envelope (wiki §1 shape — nested ``error``).
``request_id`` at the top level, ``error`` object carries the
contract fields (``code`` / ``message``) plus ops-friendly
``timestamp`` / ``path``.
"""
return {
"request_id": _request_id(request),
"error": {
"code": code,
"message": message,
"timestamp": to_iso_format(get_now_with_timezone()),
"path": str(request.url.path),
},
}
async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse:
"""Convert any exception into a uniform JSON error response."""
path = str(request.url.path)
method = request.method
if isinstance(exc, RequestValidationError):
errors = exc.errors()
if errors:
first = errors[0]
loc = ".".join(str(p) for p in first.get("loc", []) if p != "body")
msg = first.get("msg", "Validation error")
message = f"{msg}: {loc}" if loc else msg
else:
message = "Request validation error"
logger.warning("validation_error", method=method, path=path, message=message)
return JSONResponse(
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
content=_envelope(code="HTTP_ERROR", message=message, request=request),
)
if isinstance(exc, HTTPException):
logger.warning(
"http_exception",
method=method,
path=path,
status_code=exc.status_code,
detail=exc.detail,
)
# 5xx routed through HTTPException is rare but valid; still honour
# the SYSTEM_ERROR code so the envelope is consistent.
if exc.status_code >= 500:
return JSONResponse(
status_code=exc.status_code,
content=_envelope(
code="SYSTEM_ERROR",
message=_INTERNAL_ERROR_MESSAGE,
request=request,
),
)
return JSONResponse(
status_code=exc.status_code,
content=_envelope(
code="HTTP_ERROR",
message=str(exc.detail),
request=request,
),
)
logger.error(
"unhandled_exception",
method=method,
path=path,
exception_type=type(exc).__name__,
exc_info=True,
)
return JSONResponse(
status_code=HTTP_500_INTERNAL_SERVER_ERROR,
content=_envelope(
code="SYSTEM_ERROR",
message=_INTERNAL_ERROR_MESSAGE,
request=request,
),
)

View File

@ -0,0 +1,69 @@
"""Performance profiling middleware (HTML report via pyinstrument).
Triggered with ``?profile=true`` query parameter when ``PROFILING_ENABLED=true``
is set. Gracefully no-ops if pyinstrument is not installed.
"""
from __future__ import annotations
import os
from collections.abc import Awaitable, Callable
from fastapi import Request
from fastapi.responses import HTMLResponse
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import Response
from starlette.types import ASGIApp
from everos.core.observability.logging import get_logger
logger = get_logger(__name__)
_TRUTHY = frozenset({"1", "true", "yes"})
def _profiling_enabled() -> bool:
"""Read ``PROFILING_ENABLED`` env var (1 / true / yes => enabled)."""
raw = os.getenv("PROFILING_ENABLED", os.getenv("PROFILING", "false")).lower()
return raw in _TRUTHY
class ProfileMiddleware(BaseHTTPMiddleware):
"""Returns a pyinstrument HTML report when ``?profile=true`` is set."""
def __init__(self, app: ASGIApp) -> None:
super().__init__(app)
self._enabled = _profiling_enabled()
self._available = False
if self._enabled:
try:
import pyinstrument # noqa: F401
self._available = True
logger.info("profiling_middleware_enabled")
except ImportError:
logger.warning("profiling_requested_but_pyinstrument_missing")
self._enabled = False
async def dispatch(
self,
request: Request,
call_next: Callable[[Request], Awaitable[Response]],
) -> Response:
if not self._enabled or not self._available:
return await call_next(request)
if request.query_params.get("profile", "").lower() not in _TRUTHY:
return await call_next(request)
from pyinstrument import Profiler
profiler = Profiler()
profiler.start()
logger.info("profile_started", method=request.method, path=request.url.path)
try:
await call_next(request)
except Exception:
logger.exception("profile_request_failed")
profiler.stop()
return HTMLResponse(content=profiler.output_html(), status_code=200)

View File

@ -0,0 +1,84 @@
"""Prometheus HTTP metrics middleware.
Auto-instruments incoming HTTP requests with a request counter and a
duration histogram. Mounted via ``app.add_middleware(PrometheusMiddleware)``.
Skips internal endpoints (``/metrics``, ``/health``, etc.) so they do not
inflate cardinality or pollute their own statistics.
"""
from __future__ import annotations
import time
from collections.abc import Awaitable, Callable
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response
from everos.core.observability.logging import get_logger
from everos.core.observability.metrics import Counter, Histogram, HistogramBuckets
logger = get_logger(__name__)
_http_requests_total = Counter(
name="http_requests_total",
description="Total number of HTTP requests handled.",
labelnames=("method", "path", "status"),
namespace="everos",
)
_http_request_duration_seconds = Histogram(
name="http_request_duration_seconds",
description="HTTP request duration in seconds.",
labelnames=("method", "path"),
namespace="everos",
buckets=HistogramBuckets.DEFAULT,
)
_SKIP_PATHS = frozenset({"/metrics", "/health", "/healthz", "/favicon.ico"})
def _normalize_path(request: Request) -> str:
"""Resolve the route template (e.g. ``/users/{user_id}``) for stable labels."""
scope = getattr(request, "scope", {})
route = scope.get("route") if isinstance(scope, dict) else None
if route is not None and hasattr(route, "path"):
return route.path
if request.path_params:
path = request.url.path
for name, value in request.path_params.items():
if str(value) in path:
path = path.replace(str(value), f"{{{name}}}")
return path
return "{unmatched}"
class PrometheusMiddleware(BaseHTTPMiddleware):
"""Records ``http_requests_total`` and ``http_request_duration_seconds``."""
async def dispatch(
self,
request: Request,
call_next: Callable[[Request], Awaitable[Response]],
) -> Response:
if request.url.path in _SKIP_PATHS:
return await call_next(request)
method = request.method
start = time.perf_counter()
status = "500"
response: Response | None = None
try:
response = await call_next(request)
status = str(response.status_code)
return response
finally:
duration = time.perf_counter() - start
path = _normalize_path(request)
_http_requests_total.labels(method=method, path=path, status=status).inc()
_http_request_duration_seconds.labels(method=method, path=path).observe(
duration
)

View File

@ -0,0 +1,13 @@
"""structlog-based logging factory.
External usage:
from everos.core.observability.logging import get_logger, configure_logging
logger = get_logger(__name__)
logger.info("event_name", key=value)
"""
from .factory import configure_logging as configure_logging
from .factory import get_logger as get_logger
__all__ = ["configure_logging", "get_logger"]

View File

@ -0,0 +1,117 @@
"""structlog logger factory.
Provides ``get_logger(__name__)`` for module-level logger acquisition.
``configure_logging()`` is called once at process startup (run.py / lifespan)
to set up the structlog processor chain and route stdlib logging through
the same formatter so output stays uniform regardless of the caller.
The configuration follows structlog's official "Foreign Log Integration"
recipe: a single ``ProcessorFormatter`` renders both everos's own
``get_logger(...)`` calls and any stdlib ``logging.getLogger(...)`` call
made by third-party libraries (uvicorn, fastapi, httpx, openai, ...).
That way all three of the previously divergent prefixes — ``INFO:``,
``[warning ]``, plus the unconfigured no-prefix output — collapse to
one ``[level] event key=value`` shape.
Rust-side loggers (LanceDB / Lance / Arrow) live in the Rust ``log``
crate and emit straight to stderr without going through Python; this
module cannot reach them. Control their level with ``RUST_LOG`` env.
"""
from __future__ import annotations
import logging
import sys
from typing import Any
import structlog
def get_logger(name: str) -> Any:
"""Return a structlog logger bound to the given module name."""
return structlog.get_logger(name)
def configure_logging(level: str = "INFO") -> None:
"""Configure structlog and stdlib logging once at process startup.
After this call:
* Every ``structlog.get_logger(...)`` and ``logging.getLogger(...)``
message flows through the same ``ProcessorFormatter``, so output
format is identical regardless of which logging API the caller used.
* Root-logger handlers are replaced with a single ``StreamHandler``
pointing at ``sys.stdout``; any previously installed handler
(uvicorn's default ``LOGGING_CONFIG``, libraries that call
``logging.basicConfig``, etc.) is removed.
The ``uvicorn.run(..., log_config=None)`` flag is the matching half
on the server entry point — without it, uvicorn re-installs its own
handlers on every startup and overrides what we set here.
Args:
level: Log level name (``DEBUG`` / ``INFO`` / ``WARNING`` / ``ERROR``).
Unknown names silently fall back to ``INFO`` via
``getattr(logging, ..., INFO)``.
"""
log_level = getattr(logging, level.upper(), logging.INFO)
shared_processors: list[Any] = [
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
]
# structlog's own loggers feed into stdlib's logging, so the root
# logger handler decides where output lands and how it's rendered.
structlog.configure(
processors=[
*shared_processors,
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
],
wrapper_class=structlog.make_filtering_bound_logger(log_level),
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
# The single formatter shared by both pipelines:
# * structlog events arrive already wrapped via ``wrap_for_formatter``;
# * foreign records (stdlib LogRecord) get pushed through
# ``foreign_pre_chain`` so they pick up the same level / timestamp
# fields before hitting ``ConsoleRenderer``.
formatter = structlog.stdlib.ProcessorFormatter(
foreign_pre_chain=shared_processors,
processors=[
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
structlog.dev.ConsoleRenderer(),
],
)
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)
# Drop any handler we installed on a previous ``configure_logging``
# call (identified by formatter type) so repeated invocations don't
# produce duplicate output, but keep handlers other parties have
# attached — pytest's caplog handler in particular has to survive,
# otherwise tests using the ``caplog`` fixture can't see records
# that flow through structlog.
root = logging.getLogger()
root.handlers = [
h
for h in root.handlers
if not isinstance(h.formatter, structlog.stdlib.ProcessorFormatter)
]
root.addHandler(handler)
root.setLevel(log_level)
# Third-party HTTP clients log every successful request at INFO level —
# `httpx` is the worst offender (one line per call, called once per
# LLM / embedding / rerank request). A single LoCoMo conv run easily
# produces a thousand such lines, drowning everos's own events. They
# are useful for debugging API failures, but failures already surface
# via exceptions + status codes — so demote the success path to WARNING
# and let real errors still come through.
for noisy in ("httpx", "httpcore", "urllib3"):
logging.getLogger(noisy).setLevel(logging.WARNING)

View File

@ -0,0 +1,34 @@
"""Prometheus-style metrics primitives + registry.
External usage:
from everos.core.observability.metrics import (
Counter, Gauge, Histogram, HistogramBuckets,
get_metrics_registry, generate_metrics_response,
)
"""
from .counter import Counter as Counter
from .counter import LabeledCounter as LabeledCounter
from .gauge import Gauge as Gauge
from .gauge import LabeledGauge as LabeledGauge
from .histogram import Histogram as Histogram
from .histogram import HistogramBuckets as HistogramBuckets
from .histogram import LabeledHistogram as LabeledHistogram
from .registry import generate_metrics_response as generate_metrics_response
from .registry import get_metrics_registry as get_metrics_registry
from .registry import reset_metrics_registry as reset_metrics_registry
from .registry import set_metrics_registry as set_metrics_registry
__all__ = [
"Counter",
"Gauge",
"Histogram",
"HistogramBuckets",
"LabeledCounter",
"LabeledGauge",
"LabeledHistogram",
"generate_metrics_response",
"get_metrics_registry",
"reset_metrics_registry",
"set_metrics_registry",
]

View File

@ -0,0 +1,50 @@
"""Counter wrapper around ``prometheus_client.Counter``."""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from prometheus_client import Counter as PromCounter
from .registry import get_metrics_registry
class Counter:
"""Monotonically-increasing counter (totals, error counts)."""
def __init__(
self,
name: str,
description: str,
labelnames: Sequence[str] = (),
namespace: str = "",
subsystem: str = "",
unit: str = "",
) -> None:
self._counter = PromCounter(
name=name,
documentation=description,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
registry=get_metrics_registry(),
)
self._labelnames = tuple(labelnames)
def labels(self, **labels: str) -> LabeledCounter:
return LabeledCounter(self._counter.labels(**labels))
def inc(self, amount: float = 1.0) -> None:
self._counter.inc(amount)
class LabeledCounter:
"""Counter slice with labels applied."""
def __init__(self, labeled: Any) -> None:
self._labeled = labeled
def inc(self, amount: float = 1.0) -> None:
self._labeled.inc(amount)

View File

@ -0,0 +1,66 @@
"""Gauge wrapper around ``prometheus_client.Gauge``.
Async auto-refresh is intentionally not included in v0.1; subclass
:class:`Gauge` and call :meth:`set` from your own scheduling logic when
needed.
"""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from prometheus_client import Gauge as PromGauge
from .registry import get_metrics_registry
class Gauge:
"""Instantaneous numeric value (queue depth, cache size)."""
def __init__(
self,
name: str,
description: str,
labelnames: Sequence[str] = (),
namespace: str = "",
subsystem: str = "",
unit: str = "",
) -> None:
self._gauge = PromGauge(
name=name,
documentation=description,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
registry=get_metrics_registry(),
)
def labels(self, **labels: str) -> LabeledGauge:
return LabeledGauge(self._gauge.labels(**labels))
def set(self, value: float) -> None:
self._gauge.set(value)
def inc(self, amount: float = 1.0) -> None:
self._gauge.inc(amount)
def dec(self, amount: float = 1.0) -> None:
self._gauge.dec(amount)
class LabeledGauge:
"""Gauge slice with labels applied."""
def __init__(self, labeled: Any) -> None:
self._labeled = labeled
def set(self, value: float) -> None:
self._labeled.set(value)
def inc(self, amount: float = 1.0) -> None:
self._labeled.inc(amount)
def dec(self, amount: float = 1.0) -> None:
self._labeled.dec(amount)

View File

@ -0,0 +1,102 @@
"""Histogram wrapper around ``prometheus_client.Histogram``."""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from prometheus_client import Histogram as PromHistogram
from .registry import get_metrics_registry
class HistogramBuckets:
"""Predefined bucket configurations for common workloads."""
DEFAULT: tuple[float, ...] = (
0.005,
0.01,
0.025,
0.05,
0.1,
0.25,
0.5,
1.0,
2.5,
5.0,
10.0,
)
FAST: tuple[float, ...] = (0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5)
API_CALL: tuple[float, ...] = (
0.01,
0.05,
0.1,
0.25,
0.5,
1.0,
2.0,
5.0,
10.0,
30.0,
)
BATCH: tuple[float, ...] = (0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0)
DATABASE: tuple[float, ...] = (
0.001,
0.005,
0.01,
0.025,
0.05,
0.1,
0.25,
0.5,
1.0,
2.5,
5.0,
)
class Histogram:
"""Distribution of observed values (latency, sizes)."""
def __init__(
self,
name: str,
description: str,
labelnames: Sequence[str] = (),
namespace: str = "",
subsystem: str = "",
unit: str = "",
buckets: Sequence[float] = HistogramBuckets.DEFAULT,
) -> None:
self._histogram = PromHistogram(
name=name,
documentation=description,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
buckets=tuple(buckets),
registry=get_metrics_registry(),
)
def labels(self, **labels: str) -> LabeledHistogram:
return LabeledHistogram(self._histogram.labels(**labels))
def observe(self, amount: float) -> None:
self._histogram.observe(amount)
def time(self) -> Any:
return self._histogram.time()
class LabeledHistogram:
"""Histogram slice with labels applied."""
def __init__(self, labeled: Any) -> None:
self._labeled = labeled
def observe(self, amount: float) -> None:
self._labeled.observe(amount)
def time(self) -> Any:
return self._labeled.time()

View File

@ -0,0 +1,35 @@
"""Prometheus metrics registry singleton."""
from __future__ import annotations
from prometheus_client import REGISTRY, CollectorRegistry, generate_latest
_registry: CollectorRegistry | None = None
def get_metrics_registry() -> CollectorRegistry:
"""Return the global metrics registry.
Defaults to ``prometheus_client.REGISTRY``.
"""
global _registry
if _registry is None:
_registry = REGISTRY
return _registry
def set_metrics_registry(registry: CollectorRegistry) -> None:
"""Override the global registry (mainly for tests)."""
global _registry
_registry = registry
def generate_metrics_response() -> bytes:
"""Render the current registry into Prometheus exposition format."""
return generate_latest(get_metrics_registry())
def reset_metrics_registry() -> None:
"""Reset the global registry override (mainly for tests)."""
global _registry
_registry = None

View File

@ -0,0 +1,32 @@
"""Tracing utilities — W3C-compatible request id generation.
External usage::
from everos.core.observability.tracing import gen_request_id
"""
from __future__ import annotations
from uuid import uuid4
def gen_request_id() -> str:
"""Generate a request id matching the W3C trace-context spec.
Returns 32 lowercase hex characters (128-bit, no prefix) — the same
format as a W3C ``trace_id`` / OpenTelemetry trace identifier. Routes
and services that mint a fresh request id (when one wasn't injected
by upstream middleware) should call this helper rather than rolling
their own uuid / prefix format, so the id layer stays compatible
with OpenTelemetry exporters and standard APM tooling.
Example::
>>> rid = gen_request_id()
>>> len(rid)
32
"""
return uuid4().hex
__all__ = ["gen_request_id"]

View File

@ -0,0 +1,106 @@
"""Persistence primitives.
Read/write toolkit for markdown files, async wrappers around the SQLite
system DB and LanceDB index, plus a memory-root path manager. Higher
layers (``memory``, ``infra``) layer business semantics on top of these
building blocks; this subpackage knows nothing about Entry / MemCell /
Episode or any other business model.
External usage:
from everos.core.persistence import (
# Path manager + lock
MemoryRoot, memory_root_lock, LockError,
# Markdown IO toolkit
MarkdownReader, MarkdownWriter, ParsedMarkdown, Entry,
parse_frontmatter, dump_frontmatter, split_entries, find_entry,
# Frontmatter schema chassis
BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
DailyLogPathMixin, SkillPathMixin,
# Async SQLite (SQLModel / SA 2.0)
create_system_engine, create_session_factory, session_scope,
SQLModel, Field, Relationship, BaseTable, RepoBase,
# Async LanceDB
open_lancedb_connection, LanceModel, Vector, BaseLanceTable, touch,
LanceRepoBase,
)
"""
from .lancedb import BaseLanceTable as BaseLanceTable
from .lancedb import LanceModel as LanceModel
from .lancedb import LanceRepoBase as LanceRepoBase
from .lancedb import Vector as Vector
from .lancedb import open_lancedb_connection as open_lancedb_connection
from .lancedb import touch as touch
from .locking import LockError as LockError
from .locking import memory_root_lock as memory_root_lock
from .markdown import AgentScopedFrontmatter as AgentScopedFrontmatter
from .markdown import BaseFrontmatter as BaseFrontmatter
from .markdown import DailyLogPathMixin as DailyLogPathMixin
from .markdown import Entry as Entry
from .markdown import EntryId as EntryId
from .markdown import MarkdownReader as MarkdownReader
from .markdown import MarkdownWriter as MarkdownWriter
from .markdown import ParsedMarkdown as ParsedMarkdown
from .markdown import SkillPathMixin as SkillPathMixin
from .markdown import StructuredEntry as StructuredEntry
from .markdown import UserScopedFrontmatter as UserScopedFrontmatter
from .markdown import dump_frontmatter as dump_frontmatter
from .markdown import find_entry as find_entry
from .markdown import parse_frontmatter as parse_frontmatter
from .markdown import parse_structured_entry as parse_structured_entry
from .markdown import render_structured_entry as render_structured_entry
from .markdown import split_entries as split_entries
from .memory_root import MemoryRoot as MemoryRoot
from .memory_root import app_dir_name as app_dir_name
from .memory_root import app_id_from_dir as app_id_from_dir
from .memory_root import project_dir_name as project_dir_name
from .memory_root import project_id_from_dir as project_id_from_dir
from .sqlite import BaseTable as BaseTable
from .sqlite import Field as Field
from .sqlite import Relationship as Relationship
from .sqlite import RepoBase as RepoBase
from .sqlite import SQLModel as SQLModel
from .sqlite import create_session_factory as create_session_factory
from .sqlite import create_system_engine as create_system_engine
from .sqlite import session_scope as session_scope
__all__ = [
"AgentScopedFrontmatter",
"BaseFrontmatter",
"BaseLanceTable",
"BaseTable",
"DailyLogPathMixin",
"Entry",
"EntryId",
"Field",
"LanceModel",
"LanceRepoBase",
"LockError",
"MarkdownReader",
"MarkdownWriter",
"MemoryRoot",
"ParsedMarkdown",
"Relationship",
"RepoBase",
"SkillPathMixin",
"StructuredEntry",
"SQLModel",
"UserScopedFrontmatter",
"Vector",
"app_dir_name",
"app_id_from_dir",
"create_session_factory",
"create_system_engine",
"dump_frontmatter",
"find_entry",
"memory_root_lock",
"project_dir_name",
"project_id_from_dir",
"open_lancedb_connection",
"parse_frontmatter",
"parse_structured_entry",
"render_structured_entry",
"session_scope",
"split_entries",
"touch",
]

View File

@ -0,0 +1,34 @@
"""LanceDB async persistence.
External usage (connection):
from everos.core.persistence.lancedb import open_lancedb_connection
External usage (ORM model basics — re-exported from lancedb.pydantic):
from everos.core.persistence.lancedb import (
LanceModel, Vector, BaseLanceTable, touch,
)
External usage (generic CRUD repository base):
from everos.core.persistence.lancedb import LanceRepoBase
"""
# Re-export the LanceDB-flavoured Pydantic primitives so business code has a
# single canonical entry point for table schemas.
from lancedb.pydantic import LanceModel as LanceModel
from lancedb.pydantic import Vector as Vector
from .base import BaseLanceTable as BaseLanceTable
from .base import touch as touch
from .connection import open_lancedb_connection as open_lancedb_connection
from .repository import LanceDailyLogRepoBase as LanceDailyLogRepoBase
from .repository import LanceRepoBase as LanceRepoBase
__all__ = [
"BaseLanceTable",
"LanceDailyLogRepoBase",
"LanceModel",
"LanceRepoBase",
"Vector",
"open_lancedb_connection",
"touch",
]

View File

@ -0,0 +1,158 @@
"""Common LanceDB base for everos tables.
:class:`BaseLanceTable` adds ``created_at`` / ``updated_at`` columns and
the :attr:`BM25_FIELDS` declaration + :meth:`ensure_fts_indexes`
classmethod so each schema owns *both* its column shape **and** its
BM25 index spec — repos stay focused on queries.
Note:
LanceDB has no SQL ``onupdate`` equivalent — the application must
explicitly set ``updated_at = get_utc_now()`` before calling
:meth:`AsyncTable.update` / :meth:`AsyncTable.merge_insert`. The
convenience :func:`touch` helper does this in one call.
**Every datetime column automatically carries ``tz=UTC`` in the
Arrow schema.** LanceDB's Pydantic→PyArrow converter does not
understand ``typing.Annotated`` metadata, so :data:`UtcDatetime`
cannot be used as the field type annotation. Instead,
:meth:`BaseLanceTable.to_arrow_schema` walks the inferred schema
and rewrites every ``timestamp[us]`` (naive) column to
``timestamp[us, tz=UTC]``. PyArrow then auto-``astimezone(UTC)``
aware inputs on write **and** returns aware UTC datetimes on read
— no per-table configuration, no caller-side ``ensure_utc``.
Subclasses just declare ``datetime`` fields normally::
class Episode(BaseLanceTable):
timestamp: dt.datetime
"""
from __future__ import annotations
import datetime as dt
from typing import ClassVar
import pyarrow as pa
from lancedb import AsyncTable
from lancedb.index import FTS
from lancedb.pydantic import LanceModel
from pydantic import Field
from everos.component.utils.datetime import get_utc_now
class BaseLanceTable(LanceModel):
"""Pydantic / LanceDB base with ``created_at`` / ``updated_at`` and
schema-level LanceDB metadata (``TABLE_NAME`` / ``BM25_FIELDS``).
The schema is the single source of truth for everything LanceDB
needs to materialise the table: column shape, table name, vector
dim (declared per-subclass), and which columns carry an FTS index.
Repos read these ClassVars; they do not duplicate them.
"""
TABLE_NAME: ClassVar[str] = ""
"""LanceDB table name. Business schemas must override (e.g.
``"episode"``). Left empty on chassis / test schemas that construct
their table inline."""
BM25_FIELDS: ClassVar[list[str]] = []
"""Columns to build LanceDB FTS (BM25) indexes on.
Each declared column must already exist as a ``str`` (or
``str | None``) field on the schema. Tokens are assumed to be
**app-layer pre-tokenised** (space-joined); the FTS index uses
``base_tokenizer="whitespace"`` so segmentation is owned by the
app layer (:class:`JiebaTokenizer`). The same boundary owns stop-
word filtering (English + Chinese); FTS-side ``remove_stop_words``
is OFF. FTS *does* keep lightweight English-aware normalisation
(``lower_case`` / ``stem`` / ``ascii_folding``) as a belt-and-
braces layer on the same English tokens that survive jieba.
See ``17_lancedb_tables_design.md`` §2.4.1 and
:meth:`ensure_fts_indexes` below for the exact knobs."""
created_at: dt.datetime = Field(default_factory=get_utc_now)
updated_at: dt.datetime = Field(default_factory=get_utc_now)
@classmethod
def to_arrow_schema(cls) -> pa.Schema:
"""Patch the default Arrow schema: force every timestamp to ``tz=UTC``.
The base ``LanceModel.to_arrow_schema()`` infers Arrow types from
Pydantic field annotations and emits naive ``timestamp[us]`` for
every :class:`datetime.datetime` column. We rewrite **every**
timestamp column to ``timestamp[us, tz=UTC]``:
* **on write** — PyArrow ``astimezone(UTC)``-s aware input
automatically before serialising the i64 epoch micros.
* **on read** — PyArrow returns aware UTC datetimes.
Zero per-table configuration. The rewrite also **overrides any
non-UTC tz** a subclass might have declared explicitly, because
project convention is: storage is always UTC. Mixed-tz columns
would violate the two-zone discipline (see
``docs/datetime.md``); enforcing UTC at the schema level closes
that loophole.
"""
base = super().to_arrow_schema()
return pa.schema(
[
pa.field(f.name, pa.timestamp("us", tz="UTC"), nullable=f.nullable)
if pa.types.is_timestamp(f.type)
else f
for f in base
]
)
@classmethod
async def ensure_fts_indexes(cls, table: AsyncTable) -> None:
"""Create FTS indexes on every column in :attr:`BM25_FIELDS`.
Idempotent: columns that already have an index are skipped, so
this is safe to call on every startup. The FTS config is fixed
to the app-layer pre-tokenisation + LanceDB normalisation
convention (designed for **multilingual mixed content**):
- ``base_tokenizer="whitespace"`` — split on the spaces our
app-layer tokenizer provider already inserted between tokens.
- ``lower_case=True`` — Unicode-aware case-fold (English A→a;
no-op on CJK characters).
- ``stem=True`` — Porter / Snowball English stemmer per
``language="English"`` (tantivy default). CJK tokens have no
stemmer and pass through untouched.
- ``remove_stop_words=False`` — **stop-word removal is owned by
the app-layer** (:class:`JiebaTokenizer`), which already drops
both Chinese and English stop-words before tokens reach the
FTS index. Keeping FTS-side filtering off avoids double-
filtering and a divided source of truth.
- ``ascii_folding=True`` — strips diacritics (é→e) on Latin
characters; no-op on CJK.
- ``with_position=True`` — enables phrase queries.
Subclasses normally do not need to override this — declaring
:attr:`BM25_FIELDS` is enough.
"""
if not cls.BM25_FIELDS:
return
indices = await table.list_indices()
indexed_cols = {col for idx in indices for col in (idx.columns or [])}
for field in cls.BM25_FIELDS:
if field in indexed_cols:
continue
await table.create_index(
column=field,
config=FTS(
with_position=True,
base_tokenizer="whitespace",
lower_case=True,
stem=True,
remove_stop_words=False,
ascii_folding=True,
),
)
def touch(record: BaseLanceTable) -> BaseLanceTable:
"""Set ``record.updated_at = now`` and return the record (chainable)."""
record.updated_at = get_utc_now()
return record

View File

@ -0,0 +1,68 @@
"""Async LanceDB connection factory.
LanceDB does not live inside the SQLAlchemy ecosystem; it has its own
``connect_async`` returning :class:`lancedb.AsyncConnection`. This module
is a thin wrapper that:
1. ensures the lancedb root directory exists
2. converts ``LanceDBSettings.read_consistency_seconds`` into the
:class:`datetime.timedelta` value LanceDB expects
3. installs a capped :class:`lancedb.Session` so the global index
cache cannot grow unbounded and exhaust file descriptors
(see :attr:`LanceDBSettings.index_cache_size_bytes` for the
full rationale)
"""
from __future__ import annotations
import datetime as dt
from pathlib import Path
import lancedb
from lancedb import AsyncConnection
from everos.config import LanceDBSettings
async def open_lancedb_connection(
lancedb_dir: Path,
lancedb_settings: LanceDBSettings,
) -> AsyncConnection:
"""Open an async LanceDB connection rooted at ``lancedb_dir``.
Args:
lancedb_dir: Filesystem path to the LanceDB root (typically
``MemoryRoot.lancedb_dir``). Created if missing.
lancedb_settings: Tunables; the ``read_consistency_seconds`` field
is converted to a :class:`~datetime.timedelta`, and
``index_cache_size_bytes`` caps the global index cache.
Returns:
An :class:`AsyncConnection` ready for table operations.
"""
# mkdir is a microsecond-fast syscall and only fires on first connect;
# not worth pulling in anyio.Path / aiofiles for it.
lancedb_dir.mkdir(parents=True, exist_ok=True) # noqa: ASYNC240
interval: dt.timedelta | None = None
if lancedb_settings.read_consistency_seconds is not None:
interval = dt.timedelta(seconds=lancedb_settings.read_consistency_seconds)
# Bound the index cache so its readers (each one holds the FDs of
# an opened ``_indices/<uuid>/...`` directory) get LRU-evicted
# rather than leaking. Without this, a long-running daemon's FD
# count grows monotonically until ``EMFILE``. The metadata cache
# is intentionally left at the lancedb default (unbounded): it
# holds parsed in-memory manifests with zero FD pressure, and a
# cap there would just thrash. See ``LanceDBSettings`` for the
# measurement that picked the default size.
session = lancedb.Session(
index_cache_size_bytes=lancedb_settings.index_cache_size_bytes,
metadata_cache_size_bytes=None,
)
return await lancedb.connect_async(
str(lancedb_dir),
read_consistency_interval=interval,
session=session,
)

View File

@ -0,0 +1,530 @@
"""Generic CRUD repository for LanceDB-backed tables.
``LanceRepoBase`` mirrors the SQLite ``RepoBase`` shape: a pure generic
CRUD helper that knows nothing about a storage runtime. Concrete repos
either pass an :class:`AsyncTable` explicitly (typical in tests) or
override :meth:`_table_lookup` to pull the cached table from their
storage manager (typical in
:mod:`everos.infra.persistence.lancedb.repos`).
"""
from __future__ import annotations
import asyncio
import datetime as dt
from collections.abc import Sequence
from typing import Any, ClassVar
from lancedb import AsyncTable
from everos.core.observability.logging import get_logger
from .base import BaseLanceTable
logger = get_logger(__name__)
def _q(value: str) -> str:
"""Escape single quotes for a LanceDB SQL-like ``where`` predicate.
LanceDB has no parameterised query API; predicates are strings.
Doubling the quote (``'`` → ``''``) is the SQL-standard way to keep
a literal single quote inside a single-quoted string. everos's PK
convention (``<owner_id>_<entry_id>``) never carries quotes — this
is defensive.
"""
return value.replace("'", "''")
class LanceRepoBase[T: BaseLanceTable]:
"""Generic CRUD repository for one LanceDB table.
Subclass and bind to a schema. Two ways to provide the table:
1. **Explicit (tests / DI)** — pass it to ``__init__``::
repo = EpisodeRepo(table)
2. **Lazy hook (production singletons)** — override
:meth:`_table_lookup` so the repo can be instantiated as a
module-level singleton with no live connection yet::
class _EpisodeRepo(LanceRepoBase[Episode]):
schema = Episode
async def _table_lookup(self):
from everos.infra.persistence.lancedb.lancedb_manager import (
get_table,
)
return await get_table(self.schema.TABLE_NAME, self.schema)
episode_repo = _EpisodeRepo()
await episode_repo.add([Episode(text=..., vector=[...])])
The LanceDB table name lives on the schema (``BaseLanceTable.TABLE_NAME``)
so every LanceDB-side metadatum — column shape, table name,
vector dim, BM25 index spec — sits in one place. ``table_name``
here is a thin pass-through; subclasses normally do **not**
override it.
Write paths (``add`` / ``upsert`` / ``delete`` / ``delete_by_md_path``)
are serialised by a per-``table_name`` :class:`asyncio.Lock`. LanceDB's
``merge_insert`` is a read-modify-write at the storage layer with no
application-visible OCC contract — two concurrent calls against the
same table can race on the version manifest and lose updates even
when the row sets are disjoint (observed: cascade worker
``asyncio.gather`` over a batch of ``user_profile`` rows where one
write disappears). Serialising on the table name closes that window;
reads stay unlocked so search QPS is not impacted by writers.
Locks live in a class-level dict keyed by table name and are never
evicted (mirrors :mod:`everos.memory.strategies._partition_locks`
on bpo-28427 — a lock with pending waiters must outlive any dict
entry that points to it).
"""
schema: type[T]
_table_locks: ClassVar[dict[str, asyncio.Lock]] = {}
"""Per-table-name write lock pool (process-wide, lazily populated)."""
@property
def table_name(self) -> str:
"""LanceDB table name, resolved from :attr:`schema.TABLE_NAME`."""
return self.schema.TABLE_NAME
@classmethod
def _write_lock(cls, table_name: str) -> asyncio.Lock:
"""Return the write lock for ``table_name``; create on first use.
``dict.setdefault`` is atomic under single-threaded asyncio (no
``await`` between check and insert), so no meta-lock is needed.
"""
return cls._table_locks.setdefault(table_name, asyncio.Lock())
@classmethod
def _reset_locks_for_tests(cls) -> None:
"""Test-only: drop the write-lock pool.
``asyncio.Lock`` binds to the current event loop on first
``acquire()``; pytest-asyncio creates a fresh loop per test, so
a module-level lock surviving across tests fails with "bound to
a different event loop". The production cascade worker runs on
one loop forever and does not need this hook. Mirrors
:func:`everos.memory.strategies._partition_locks._reset_for_tests`.
"""
cls._table_locks.clear()
def __init__(self, table: AsyncTable | None = None) -> None:
"""Bind to a table directly; if ``None``, defer to ``_table_lookup``."""
self._table_override = table
async def _table_lookup(self) -> AsyncTable:
"""Resolve the table on first use. Override in subclass.
``LanceRepoBase`` itself has no idea where the runtime singleton
lives. The default raises so a missing override is loud rather
than silently broken.
"""
raise NotImplementedError(
f"{type(self).__name__}: pass table= to __init__ "
"or override _table_lookup() to wire the storage manager."
)
async def _table(self) -> AsyncTable:
if self._table_override is not None:
return self._table_override
return await self._table_lookup()
# ── Create ─────────────────────────────────────────────────────────────
async def add(self, records: Sequence[T]) -> None:
"""Insert one or more records."""
table = await self._table()
async with self._write_lock(self.table_name):
await table.add(list(records))
# ── Upsert ─────────────────────────────────────────────────────────────
async def upsert(
self,
records: Sequence[T],
*,
by: str = "id",
) -> None:
"""Upsert records keyed by ``by`` (PK column, default ``"id"``).
Wraps LanceDB's ``merge_insert(on=...)`` fluent builder with the
equivalent of ``INSERT ... ON CONFLICT(by) DO UPDATE`` — matching
rows are replaced wholesale, non-matching rows inserted.
Cascade uses this when reconciling md → LanceDB: an entry seen
for the first time inserts; an entry that was edited in md
updates its existing row.
"""
table = await self._table()
async with self._write_lock(self.table_name):
await (
table.merge_insert(by)
.when_matched_update_all()
.when_not_matched_insert_all()
.execute(list(records))
)
# ── Maintenance ────────────────────────────────────────────────────────
async def optimize(self, *, cleanup_older_than: dt.timedelta | None = None) -> None:
"""Compact fragments + merge new data into the FTS / vector indexes.
LanceDB's ``merge_insert`` writes new data into a fresh fragment.
The FTS (BM25) index built by :meth:`ensure_fts_indexes` only
covers fragments visible at index-build time, so rows written
after the initial build can become **invisible to BM25 queries**
until ``optimize()`` runs and merges those fragments into the
index segment that the query engine reads.
Symptom this guards against (verified on LoCoMo conv0): after
steady-state cascade ingest, ``nearest_to_text("any_common_word")``
returns 0 hits even though the column literally contains the
token in 100% of rows — the new fragments simply hadn't been
indexed.
Cascade triggers this through a per-kind throttle + trailing
edge scheduler (``CascadeWorker._schedule_optimize``): at most
one run per ~1s window per kind, decoupled from the drain
loop, with a 60s heartbeat sweep as a safety net. Cost is
O(N) data-rewrite per optimized fragment; the throttle is how
we cap it under sustained write pressure.
Args:
cleanup_older_than: When set, also prune (physically delete)
files belonging to dataset versions older than this
interval. ``None`` (default) compacts only — historical
manifests, replaced data fragments, and stale index
UUID files are kept on disk forever, which inflates the
file count (and FD usage at scan time) without bound.
Cascade passes a non-None value on a slower beat
(``CascadeWorker._optimize_prune_interval``) so the
hot drain path stays cheap. Note: this does *not*
shrink **active** index internals (FTS ``part_N`` count
or vector index UUID count) — those only collapse via
``drop_index + create_index``, which is not done here.
"""
table = await self._table()
await table.optimize(cleanup_older_than=cleanup_older_than)
async def rebuild_indexes(self) -> None:
"""Drop and re-create every index on this table.
**Why this exists** — workaround for an upstream Python API gap:
Lance's Rust ``OptimizeOptions`` has a ``num_indices_to_merge``
knob (default 1) that bounds the number of active index UUIDs
per column. With ``Some(1)``, every ``optimize_indices()`` call
merges its delta into the base — active UUID count stays at 1.
Two problems block us from using it from the application layer:
1. ``lancedb.AsyncTable.optimize()`` does **not expose** this
parameter (verified on lancedb main 2026-05-28). It forwards
only ``cleanup_since_ms`` and ``delete_unverified`` to Rust.
2. Even calling Lance directly via ``pylance``, the merge
behaviour itself is buggy on ``lance crate 4.0`` (what
lancedb 0.30.2 embeds) — ``num_indices_to_merge=1`` does
nothing. Fix landed in ``lance 7.x``, but ``pylance 7.x``
can not collapse indexes on a ``lance 4.0``-format dataset
(verified by experiment).
So in our current stack there is **no application-level path**
to bound active index UUID growth. ``optimize()`` keeps
accumulating one new UUID (vector) / one new ``part_N`` (FTS)
per call.
This method is the workaround: drop every existing index and
rebuild from the schema's ``ensure_fts_indexes`` contract. The
rebuild is **O(N) full retrain** but cheap in practice (~0.3s
for 50k rows × 2 FTS columns on local SSD), and during the
window LanceDB transparently falls back to brute-force scan so
queries and writes stay available.
**Cadence** — :class:`CascadeWorker` runs this on a slow loop
(default 12h per kind). Frequency is bounded by the rebuild
cost, not by correctness — even daily is fine functionally;
12h is a conservative pick to keep file/UUID counts well below
any FD ceiling under steady-state ingest.
**When to remove** — once lancedb exposes ``num_indices_to_merge``
on the async Python API **and** the embedded ``lance crate``
ships the working merge implementation, delete this method and
switch to ``optimize(num_indices_to_merge=1)`` in the regular
``optimize()`` path. Tracking issues / context:
- https://github.com/lancedb/lancedb/issues/2193
- https://github.com/lancedb/lancedb/issues/3177
- https://github.com/lance-format/lance/pull/6711 (partial fix
in lance v7.0.0)
- https://docs.rs/lancedb/latest/lancedb/table/struct.OptimizeOptions.html
"""
table = await self._table()
async with self._write_lock(self.table_name):
for idx in await table.list_indices():
await table.drop_index(idx.name)
await self.schema.ensure_fts_indexes(table)
# ── Read ───────────────────────────────────────────────────────────────
async def count(self) -> int:
"""Total row count."""
table = await self._table()
return await table.count_rows()
async def get_by_id(
self,
id_value: str,
*,
id_field: str = "id",
) -> T | None:
"""Fetch one row by scalar PK; ``None`` if missing.
Uses LanceDB scalar filter ``<id_field> = '<id_value>'``. Single
quotes in ``id_value`` are doubled to avoid breaking the SQL-like
predicate; everos's PK convention is ``<owner_id>_<entry_id>``
which never contains quotes, so the escape is defensive.
"""
table = await self._table()
rows = (
await table.query()
.where(f"{id_field} = '{_q(id_value)}'")
.limit(1)
.to_list()
)
if not rows:
return None
return self.schema.model_validate(rows[0])
async def find_where(
self,
where: str,
*,
limit: int = 100,
) -> list[T]:
"""Scalar query returning *typed* schema instances.
Like :meth:`search` but returns ``list[T]`` rather than raw
LanceDB row dicts. No vector ANN; pure scalar filter only.
Use :meth:`search` when you need ``_distance`` or want to mix
ANN with filters.
"""
table = await self._table()
rows = await table.query().where(where).limit(limit).to_list()
return [self.schema.model_validate(r) for r in rows]
async def find_one_where(self, where: str) -> T | None:
"""Single-row variant of :meth:`find_where` (``None`` if no match)."""
rows = await self.find_where(where, limit=1)
return rows[0] if rows else None
async def find_where_paginated(
self,
where: str,
*,
sort_by: str,
descending: bool = True,
page: int = 1,
page_size: int = 20,
max_fetch: int = 20000,
) -> tuple[list[T], int]:
"""Paginated scalar query with in-memory sort.
LanceDB has no native ``ORDER BY``. The chassis fetches up to
``max_fetch`` rows matching ``where``, sorts the resulting Arrow
table by ``sort_by``, then slices ``page`` × ``page_size``. The
*true* row count of the predicate is returned alongside the
page so callers can render pagination controls without a second
query.
Args:
where: SQL-like scalar predicate. Required (no implicit
full-table scan from ``find_where_paginated``).
sort_by: Column name to sort the result set by.
descending: ``True`` (default) → newest first; ``False`` →
ascending.
page: 1-indexed page number.
page_size: Rows per page.
max_fetch: Cap on rows pulled before the in-memory sort.
When the predicate matches more rows than this cap the
page is sorted over an *arbitrary* prefix and the page
contents are only approximately correct — the chassis
emits a warning so the caller learns about the
truncation.
Returns:
``(rows, total)`` — ``rows`` is the typed page,
``total`` is ``count_rows(filter=where)`` (the predicate's
true match count, regardless of ``max_fetch``).
"""
table = await self._table()
total = await table.count_rows(filter=where)
if total > max_fetch:
logger.warning(
"find_where_paginated truncated",
extra={
"table": self.table_name,
"where": where,
"total": total,
"max_fetch": max_fetch,
},
)
arrow_tbl = await table.query().where(where).limit(max_fetch).to_arrow()
order = "descending" if descending else "ascending"
arrow_tbl = arrow_tbl.sort_by([(sort_by, order)])
offset = (page - 1) * page_size
page_rows = arrow_tbl.slice(offset, page_size)
return (
[self.schema.model_validate(r) for r in page_rows.to_pylist()],
total,
)
async def find_by_owner(
self,
owner_id: str,
*,
limit: int = 100,
) -> list[T]:
"""Fetch rows by ``owner_id`` (5 business tables share this column)."""
return await self.find_where(
f"owner_id = '{_q(owner_id)}'",
limit=limit,
)
async def find_by_md_path(self, md_path: str) -> T | None:
"""Reverse-lookup from md path (cascade maps md edit → row)."""
return await self.find_one_where(f"md_path = '{_q(md_path)}'")
async def search(
self,
*,
vector: Sequence[float] | None = None,
where: str | None = None,
limit: int = 10,
) -> list[dict[str, Any]]:
"""Hybrid search: optional vector ANN + scalar SQL-like predicate.
Args:
vector: Embedding to find nearest rows for; ``None`` skips ANN.
where: SQL-like predicate (e.g. ``"tags = 'meeting'"``).
limit: Max rows.
Returns:
List of row dicts (LanceDB native shape — fields depend on
``schema``; ``_distance`` added when ``vector`` is given).
"""
table = await self._table()
q = table.query()
if vector is not None:
q = q.nearest_to(list(vector))
if where is not None:
q = q.where(where)
return await q.limit(limit).to_list()
# ── Delete ─────────────────────────────────────────────────────────────
async def delete(self, predicate: str) -> None:
"""Delete rows matching a SQL-like predicate."""
table = await self._table()
async with self._write_lock(self.table_name):
await table.delete(predicate)
async def delete_by_md_path(self, md_path: str) -> int:
"""Delete every row whose ``md_path`` matches; return rows deleted.
Cascade handler calls this when an md file is removed on disk
(or when reverse-reconcile discovers an orphaned LanceDB row).
Single quotes in ``md_path`` are doubled defensively.
"""
table = await self._table()
async with self._write_lock(self.table_name):
result = await table.delete(f"md_path = '{_q(md_path)}'")
return int(result.num_deleted_rows)
class LanceDailyLogRepoBase[T: BaseLanceTable](LanceRepoBase[T]):
"""LanceRepoBase + queries unique to daily-log tables.
Daily-log tables (``episode`` / ``atomic_fact`` / ``foresight`` /
``agent_case``) share a fixed schema slice: ``entry_id`` (md seq
id), ``session_id`` (conversation scope), and ``parent_type`` /
``parent_id`` (record lineage). The queries below compose those
columns; ``agent_skill`` is *not* a daily-log (it is a named
single-file entity) and uses :class:`LanceRepoBase` directly.
"""
async def find_by_owner_entry(
self,
owner_id: str,
entry_id: str,
*,
app_id: str = "default",
project_id: str = "default",
) -> T | None:
"""Single point-query by ``(app, project, owner_id, entry_id)``.
``entry_id`` is only unique within a (app, project, owner) scope —
the same ``ac_<date>_<seq>`` recurs in another space — so the
scope segments are part of the predicate to avoid a cross-space hit.
"""
return await self.find_one_where(
f"owner_id = '{_q(owner_id)}' AND entry_id = '{_q(entry_id)}' "
f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'"
)
async def find_by_owner_entries(
self,
owner_id: str,
entry_ids: Sequence[str],
*,
app_id: str = "default",
project_id: str = "default",
) -> list[T]:
"""Bulk point-query by ``(app, project, owner_id, entry_id IN ...)``.
Empty ``entry_ids`` short-circuits to ``[]`` rather than emit a
``WHERE entry_id IN ()`` predicate (LanceDB rejects empty
tuples). The query's ``limit`` is bound to ``len(entry_ids)``
because at most one row per id can exist under one (app, project,
owner) scope.
"""
if not entry_ids:
return []
quoted = ", ".join(f"'{_q(eid)}'" for eid in entry_ids)
return await self.find_where(
f"owner_id = '{_q(owner_id)}' AND entry_id IN ({quoted}) "
f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'",
limit=len(entry_ids),
)
async def find_by_session(
self,
owner_id: str,
session_id: str,
*,
limit: int = 100,
) -> list[T]:
"""Every row in one conversation ``session_id`` under ``owner_id``."""
return await self.find_where(
f"owner_id = '{_q(owner_id)}' AND session_id = '{_q(session_id)}'",
limit=limit,
)
async def find_by_parent(
self,
parent_type: str,
parent_id: str,
*,
limit: int = 100,
) -> list[T]:
"""Every row whose parent matches ``(parent_type, parent_id)``."""
return await self.find_where(
f"parent_type = '{_q(parent_type)}' AND parent_id = '{_q(parent_id)}'",
limit=limit,
)

View File

@ -0,0 +1,76 @@
"""Process-wide exclusive lock on a memory-root.
Uses ``fcntl.flock`` (POSIX advisory locking, available on Linux + macOS;
Windows is not supported — see project README on platform scope). The
public surface is an :func:`contextlib.asynccontextmanager` so callers
use ``async with memory_root_lock(mr):``; the underlying syscalls have
no async equivalent so they run in a worker thread via
:func:`anyio.to_thread.run_sync`.
"""
from __future__ import annotations
import fcntl
import os
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
import anyio
from .memory_root import MemoryRoot
class LockError(RuntimeError):
"""Raised when the memory-root lock cannot be acquired in non-blocking mode."""
@asynccontextmanager
async def memory_root_lock(
memory_root: MemoryRoot,
*,
blocking: bool = True,
) -> AsyncIterator[None]:
"""Acquire an exclusive process lock on the memory-root.
Args:
memory_root: The memory-root to lock. The lock anchor file
(``<root>/.lock``) is created on first use.
blocking: If ``True`` (default), wait until the lock is free. If
``False``, raise :class:`LockError` immediately when another
process holds it.
Raises:
LockError: When ``blocking=False`` and the lock is already held.
"""
await anyio.Path(memory_root.root).mkdir(parents=True, exist_ok=True)
lock_path = memory_root.lock_file
# Open the anchor file (create on first use). The fd, not the path, is
# what fcntl operates on. ``os.open`` is microsecond-fast but offloaded
# for consistency with the rest of the lock acquisition flow.
fd = await anyio.to_thread.run_sync(
lambda: os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o644)
)
flags = fcntl.LOCK_EX
if not blocking:
flags |= fcntl.LOCK_NB
try:
await anyio.to_thread.run_sync(fcntl.flock, fd, flags)
except BlockingIOError as exc:
await anyio.to_thread.run_sync(os.close, fd)
raise LockError(
f"another process already holds the memory-root lock at {lock_path}"
) from exc
# Lock acquired — release + close strictly on exit. The BlockingIOError
# path above already cleaned up its fd, so it must NOT enter this
# finally block (otherwise we'd double-close).
try:
yield
finally:
try:
await anyio.to_thread.run_sync(fcntl.flock, fd, fcntl.LOCK_UN)
finally:
await anyio.to_thread.run_sync(os.close, fd)

View File

@ -0,0 +1,62 @@
"""Markdown file IO toolkit.
Atomic write + YAML frontmatter parse/dump + entry marker parse +
audit-form structured-entry parsing. Knows nothing about business
models (no MemCell / Episode); the :class:`Entry` here is a
*marker-delimited* span within a markdown body, not a business record.
External usage (IO + parse):
from everos.core.persistence.markdown import (
Entry, EntryId, StructuredEntry,
MarkdownReader, MarkdownWriter, ParsedMarkdown,
parse_frontmatter, dump_frontmatter,
split_entries, find_entry,
parse_structured_entry, render_structured_entry,
)
External usage (frontmatter schema chassis):
from everos.core.persistence.markdown import (
BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
DailyLogPathMixin, SkillPathMixin, ProfilePathMixin,
)
"""
from .entries import Entry as Entry
from .entries import EntryId as EntryId
from .entries import StructuredEntry as StructuredEntry
from .entries import find_entry as find_entry
from .entries import parse_structured_entry as parse_structured_entry
from .entries import render_structured_entry as render_structured_entry
from .entries import split_entries as split_entries
from .frontmatter import AgentScopedFrontmatter as AgentScopedFrontmatter
from .frontmatter import BaseFrontmatter as BaseFrontmatter
from .frontmatter import DailyLogPathMixin as DailyLogPathMixin
from .frontmatter import ProfilePathMixin as ProfilePathMixin
from .frontmatter import SkillPathMixin as SkillPathMixin
from .frontmatter import UserScopedFrontmatter as UserScopedFrontmatter
from .frontmatter import dump_frontmatter as dump_frontmatter
from .frontmatter import parse_frontmatter as parse_frontmatter
from .parsed import ParsedMarkdown as ParsedMarkdown
from .reader import MarkdownReader as MarkdownReader
from .writer import MarkdownWriter as MarkdownWriter
__all__ = [
"AgentScopedFrontmatter",
"BaseFrontmatter",
"DailyLogPathMixin",
"Entry",
"EntryId",
"MarkdownReader",
"MarkdownWriter",
"ParsedMarkdown",
"ProfilePathMixin",
"SkillPathMixin",
"StructuredEntry",
"UserScopedFrontmatter",
"dump_frontmatter",
"find_entry",
"parse_frontmatter",
"parse_structured_entry",
"render_structured_entry",
"split_entries",
]

View File

@ -0,0 +1,368 @@
"""Markdown entries — id format, marker spans, and audit-form parsing.
Three closely-related entry concepts live together here so a reader
sees the whole entry surface in one file:
1. :class:`EntryId` — the ``<prefix>_<YYYYMMDD>_<NNNN>`` structured id
stamped into each daily-log entry's open / close markers. Carries
the prefix declared by the frontmatter schema, the date bucket, and
the in-file zero-padded sequence.
2. :class:`Entry` — a marker-delimited span inside a markdown body::
<!-- entry:abc123 -->
...content...
<!-- /entry:abc123 -->
:func:`split_entries` and :func:`find_entry` locate these spans
without interpreting the inner content. Higher layers (writers,
cascade) parse it per record type.
3. :class:`StructuredEntry` — :class:`Entry` extended with the parsed
audit-form body fields (header / inline / sections). Built either
from a raw body string via :func:`parse_structured_entry` or from
an existing :class:`Entry` via :meth:`Entry.as_structured`.
Audit-form layout::
## <header> ← optional H2 (usually entry id, for grep)
**key**: value ← inline fields, one per line
**key2**: value2
### Section Title ← section fields: H3 + free-form text
body content...
### Another Section
more content...
The audit chassis is intentionally **type-agnostic** — every field
round-trips as a string. Inline values are stringified on render
(lists become ``[a, b, c]``, scalars use ``str()``); on parse
everything is the raw text after the colon. Section titles are kept
verbatim. This keeps parsing tolerant of stray fields, wrapped
strings, and manually-typed timestamps; the strong-typed model lives
in business writers + the SQLite/LanceDB indexes.
Cross-user uniqueness is handled at the database layer via a composite
``<user_id>_<entry_id>`` field; it is *not* encoded into the
:class:`EntryId` string itself.
"""
from __future__ import annotations
import datetime as _dt
import re
from collections.abc import Mapping
from dataclasses import dataclass, field
from typing import Self
# ── EntryId — structured id for marker stamping ─────────────────────────
_DATE_FMT = "%Y%m%d"
_SEQ_DIGITS = 8
"""Minimum zero-padding for the in-file seq.
8 digits keeps lexicographic order == numeric order up to 10**8
entries per file (per user, per day). ``format()`` is "at least 8"
larger seqs emit more digits without truncation. ``parse`` is
permissive: shorter (legacy 4-digit) and longer seq strings both
parse cleanly; format normalises to >= 8 digits on round-trip.
"""
@dataclass(frozen=True, slots=True)
class EntryId:
"""Parsed components of an entry id (``<prefix>_<YYYYMMDD>_<NNNN>``)."""
prefix: str
date: _dt.date
seq: int
def format(self) -> str:
"""Render as ``<prefix>_<YYYYMMDD>_<NNNN>``."""
return (
f"{self.prefix}_{self.date.strftime(_DATE_FMT)}_{self.seq:0{_SEQ_DIGITS}d}"
)
def __str__(self) -> str: # noqa: D401
return self.format()
@classmethod
def parse(cls, s: str) -> Self:
"""Parse ``<prefix>_<YYYYMMDD>_<NNNN>``.
Uses ``rsplit("_", 2)`` so a multi-segment prefix (rare, but
possible) is preserved as-is.
"""
parts = s.rsplit("_", 2)
if len(parts) != 3:
raise ValueError(f"invalid entry id format: {s!r}")
prefix, date_str, seq_str = parts
if not prefix:
raise ValueError(f"empty prefix in entry id: {s!r}")
try:
d = _dt.datetime.strptime(date_str, _DATE_FMT).date()
except ValueError as exc:
raise ValueError(f"invalid date in entry id: {s!r}") from exc
try:
seq = int(seq_str)
except ValueError as exc:
raise ValueError(f"invalid seq in entry id: {s!r}") from exc
if seq < 0:
raise ValueError(f"negative seq in entry id: {s!r}")
return cls(prefix=prefix, date=d, seq=seq)
@classmethod
def next_for(cls, prefix: str, date: _dt.date, current_count: int) -> Self:
"""Build the id for the next entry given the file's current count.
``current_count`` is the value of ``frontmatter.entry_count``
*before* this append. The new id gets ``seq = current_count + 1``.
"""
if current_count < 0:
raise ValueError(f"current_count must be >= 0, got {current_count}")
return cls(prefix=prefix, date=date, seq=current_count + 1)
# ── Entry — marker-delimited span inside a body ─────────────────────────
# Filename / URL-safe id alphabet for the marker.
_ID_PATTERN = r"[A-Za-z0-9_-]+"
_OPEN_RE = re.compile(rf"<!-- entry:({_ID_PATTERN}) -->")
@dataclass(frozen=True)
class Entry:
"""One marker-delimited entry within a markdown body.
Attributes:
id: Value between ``entry:`` and ``-->`` in the open marker.
body: Content between the open and close markers, with one leading
and one trailing newline removed (typical formatter output).
start: Offset of the opening ``<!-- entry:id -->`` in the source body.
end: Offset just past the closing ``<!-- /entry:id -->`` in the source.
"""
id: str
body: str
start: int
end: int
def as_structured(self) -> StructuredEntry:
"""Parse my body as audit-form and return a :class:`StructuredEntry`.
The id / body / start / end fields are preserved; the parsed
``header`` / ``inline`` / ``sections`` are added on top.
"""
return parse_structured_entry(self.body, _origin=self)
def split_entries(body: str) -> list[Entry]:
"""Scan ``body`` and return every entry in order.
Unmatched / unterminated open markers stop the scan at the first
such marker — partial entries are not returned. Callers needing
strict validation should layer a dedicated check on top.
"""
entries: list[Entry] = []
pos = 0
while True:
open_match = _OPEN_RE.search(body, pos)
if open_match is None:
break
entry_id = open_match.group(1)
close_match = _close_re_for(entry_id).search(body, open_match.end())
if close_match is None:
# Unterminated entry — abort further scanning.
break
entries.append(
Entry(
id=entry_id,
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
start=open_match.start(),
end=close_match.end(),
)
)
pos = close_match.end()
return entries
def find_entry(body: str, entry_id: str) -> Entry | None:
"""Find the first entry with ``entry_id``, or ``None``."""
open_re = re.compile(rf"<!-- entry:{re.escape(entry_id)} -->")
open_match = open_re.search(body)
if open_match is None:
return None
close_match = _close_re_for(entry_id).search(body, open_match.end())
if close_match is None:
return None
return Entry(
id=entry_id,
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
start=open_match.start(),
end=close_match.end(),
)
def _close_re_for(entry_id: str) -> re.Pattern[str]:
"""Build the close-marker regex for a specific id."""
return re.compile(rf"<!-- /entry:{re.escape(entry_id)} -->")
def _strip_one_newline(text: str) -> str:
"""Strip one leading and one trailing newline (typical formatter padding)."""
if text.startswith("\r\n"):
text = text[2:]
elif text.startswith("\n"):
text = text[1:]
if text.endswith("\r\n"):
text = text[:-2]
elif text.endswith("\n"):
text = text[:-1]
return text
# ── StructuredEntry — Entry + parsed audit-form fields ──────────────────
# H2 line: ``## <header>``.
_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
# Inline field: ``**key**: value``. Anchored to line start so a stray
# ``**emphasis**`` mid-paragraph isn't mistaken for a field.
_INLINE_RE = re.compile(
r"^\*\*(?P<key>[^*\n]+?)\*\*:\s*(?P<value>.*?)\s*$",
re.MULTILINE,
)
# H3 line: ``### Title``.
_H3_RE = re.compile(r"^###\s+(.+?)\s*$", re.MULTILINE)
@dataclass(frozen=True)
class StructuredEntry(Entry):
""":class:`Entry` whose body has been parsed as audit-form data.
Inherits ``id`` / ``body`` / ``start`` / ``end`` from :class:`Entry`
(zeroed when built from a raw body string with no marker context)
and adds three parsed views of the body: the optional H2 header,
the inline ``**key**: value`` map, and the ``### Title`` sections.
Audit-form values are strings only; type coercion is the caller's
job (a strong-typed model lives in the writer / index).
"""
header: str | None = None
inline: dict[str, str] = field(default_factory=dict)
sections: dict[str, str] = field(default_factory=dict)
def render_structured_entry(
*,
header: str | None = None,
inline: Mapping[str, object] | None = None,
sections: Mapping[str, str] | None = None,
) -> str:
"""Render an audit-form entry body.
Args:
header: Optional H2 line at the top (typically the entry id —
redundant with the marker but useful for plain-text grep).
inline: ``{key: value}`` rendered as ``**key**: value``. Values
are stringified: ``list``/``tuple`` become ``[a, b, c]``;
``None`` becomes the empty string; everything else uses
``str()``.
sections: ``{title: body}`` rendered as ``### Title`` plus the
body text. Title is verbatim; body's trailing whitespace is
stripped.
Returns:
The rendered string, no trailing newline (the caller — typically
:meth:`MarkdownWriter.append_entry` — handles markers + newlines).
"""
inline = inline or {}
sections = sections or {}
lines: list[str] = []
if header:
lines.append(f"## {header}")
lines.append("")
for key, value in inline.items():
lines.append(f"**{key}**: {_render_value(value)}")
for title, body in sections.items():
lines.append("")
lines.append(f"### {title}")
lines.append(body.rstrip())
return "\n".join(lines)
def parse_structured_entry(
body: str, *, _origin: Entry | None = None
) -> StructuredEntry:
"""Parse an audit-form entry body. Strings only — no type coercion.
Tolerant of:
- missing H2 (``header`` will be ``None``)
- inline fields appearing before, between or after sections
(only matches before the first H3 are taken as the inline block)
- extra whitespace and stray lines (silently kept inside the
enclosing section's body)
When called via :meth:`Entry.as_structured`, the ``_origin`` Entry
contributes its ``id`` / ``start`` / ``end``; otherwise those fall
back to ``""`` / ``0`` / ``len(body)``.
Returns:
:class:`StructuredEntry` with everything as strings.
"""
text = body.strip("\n")
# Split on H3 lines.
parts = _H3_RE.split(text)
head = parts[0]
sections_dict: dict[str, str] = {}
for i in range(1, len(parts), 2):
title = parts[i].strip()
content = parts[i + 1] if i + 1 < len(parts) else ""
sections_dict[title] = content.strip("\n").rstrip()
header: str | None = None
h2 = _H2_RE.search(head)
if h2:
header = h2.group(1).strip()
inline_dict: dict[str, str] = {
m.group("key").strip(): m.group("value").strip()
for m in _INLINE_RE.finditer(head)
}
if _origin is not None:
return StructuredEntry(
id=_origin.id,
body=_origin.body,
start=_origin.start,
end=_origin.end,
header=header,
inline=inline_dict,
sections=sections_dict,
)
return StructuredEntry(
id="",
body=body,
start=0,
end=len(body),
header=header,
inline=inline_dict,
sections=sections_dict,
)
def _render_value(value: object) -> str:
"""Stringify an inline value the audit-friendly way."""
if value is None:
return ""
if isinstance(value, list | tuple):
return "[" + ", ".join(str(item) for item in value) + "]"
return str(value)

View File

@ -0,0 +1,300 @@
"""Frontmatter — YAML block parse / dump + L1 schema chassis.
Frontmatter is the leading ``---``-delimited YAML block at the top of
a markdown document::
---
title: Hello
tags: [a, b]
---
# Body starts here
Two complementary surfaces live here:
1. :func:`parse_frontmatter` / :func:`dump_frontmatter` — schema-free
YAML helpers (``yaml.safe_load`` / ``yaml.safe_dump``,
``sort_keys=False`` so caller-controlled key order is preserved).
2. The L1 chassis classes — :class:`BaseFrontmatter`,
:class:`UserScopedFrontmatter`, :class:`AgentScopedFrontmatter` —
which fix the *absolute-readonly* fields (``id`` / ``type`` /
``schema_version``) plus scope (``user_id`` / ``agent_id`` +
``track``). Every business frontmatter schema in
``infra/persistence/markdown/mds/`` subclasses one of these.
Concrete business schemas (``UserMemcellDailyFrontmatter``,
``SkillFrontmatter``, …) live in ``infra``; they add per-record
business fields plus the path-resolution metadata daily-log writers
need (``ENTRY_ID_PREFIX`` / ``DIR_NAME`` / ``FILE_PREFIX``).
"""
from __future__ import annotations
from collections.abc import Mapping
from typing import Any, ClassVar, Literal
import yaml
from pydantic import BaseModel, ConfigDict
# ── YAML helpers ────────────────────────────────────────────────────────
_DELIM = "---"
def parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
"""Parse a leading ``---\\n...\\n---\\n`` YAML block.
Returns:
(meta, remainder): ``meta`` is the parsed YAML mapping (empty dict
if no frontmatter present, malformed, or non-mapping). ``remainder``
is everything after the closing delimiter line — including the body's
leading content as-is.
Notes:
- If the document does not start with ``---``, returns ``({}, text)``
unchanged.
- If a closing ``---`` line is not found, returns ``({}, text)``.
- If the YAML block is empty (``---\\n---\\n``), returns
``({}, remainder)``.
- If the parsed YAML is not a mapping (e.g. a scalar list), returns
``({}, text)`` — frontmatter must be a mapping.
"""
if not text.startswith(_DELIM):
return {}, text
# Skip the opening "---" and the newline that must follow it.
rest = text[len(_DELIM) :]
if rest.startswith("\r\n"):
rest = rest[2:]
elif rest.startswith("\n"):
rest = rest[1:]
else:
# Opening "---" not followed by a newline → not a valid frontmatter.
return {}, text
closing_idx = _find_closing_delim(rest)
if closing_idx is None:
return {}, text
yaml_block = rest[:closing_idx]
remainder = rest[closing_idx + len(_DELIM) :]
# Drop the newline that follows the closing delimiter, if any.
if remainder.startswith("\r\n"):
remainder = remainder[2:]
elif remainder.startswith("\n"):
remainder = remainder[1:]
parsed: Any = yaml.safe_load(yaml_block) if yaml_block.strip() else {}
if parsed is None:
parsed = {}
if not isinstance(parsed, dict):
return {}, text
return parsed, remainder
def dump_frontmatter(meta: Mapping[str, Any]) -> str:
"""Render a mapping as a ``---\\n<yaml>\\n---\\n`` block.
An empty mapping yields the empty string (no delimiters). The YAML
payload preserves caller-supplied key order (``sort_keys=False``).
"""
if not meta:
return ""
yaml_block = yaml.safe_dump(
dict(meta),
sort_keys=False,
allow_unicode=True,
default_flow_style=False,
)
return f"{_DELIM}\n{yaml_block}{_DELIM}\n"
def _find_closing_delim(text: str) -> int | None:
"""Find the offset of a line that is exactly ``---``.
A "line" is text between two newlines (or string boundaries).
Returns the offset of the first character of the matching line, or
``None`` if no such line exists.
"""
pos = 0
while pos < len(text):
nl = text.find("\n", pos)
line = text[pos:nl] if nl != -1 else text[pos:]
if line.rstrip("\r") == _DELIM:
return pos
if nl == -1:
return None
pos = nl + 1
return None
# ── L1 schema chassis ───────────────────────────────────────────────────
class BaseFrontmatter(BaseModel):
"""L1 fields every markdown frontmatter must carry.
These match the *absolute-readonly* tier in the EverOS Markdown First
spec — they identify the record across markdown ↔ LanceDB and must
never be rewritten by a human edit.
Subclasses add scope (``UserScopedFrontmatter`` /
``AgentScopedFrontmatter``) plus per-record business fields.
"""
SCOPE_DIR: ClassVar[str] = ""
"""Top-level directory under the memory-root that holds this kind.
Scope mixins set this to ``"users"`` / ``"agents"``. Scope-agnostic
schemas (rare) leave it empty; consumers that need to resolve a path
(writers, layout reverse-lookup) must reject schemas with empty
``SCOPE_DIR``.
"""
id: str
type: str
schema_version: int = 1
# Permit additional fields so L2 system-managed metadata
# (``md_sha256``, ``last_indexed_at``, ``lsn``, …) can ride along on
# the same model without forcing every subclass to redeclare them.
model_config = ConfigDict(extra="allow")
@classmethod
def path_glob(cls) -> str:
"""Return an ``fnmatch``-style glob (relative to memory-root)
covering every markdown file this schema describes.
Used by the cascade kind registry — the scanner walks every kind's
``path_glob()`` to enumerate eligible files without hard-coding
path patterns in cascade. The schema is the single source of truth
for both the writer's path resolution and the scanner's enumeration.
Subclasses must override — typically by mixing in
:class:`DailyLogPathMixin` or :class:`SkillPathMixin` *before* the
scope mixin in the MRO so this abstract version is shadowed.
"""
raise NotImplementedError(
f"{cls.__name__} must declare path_glob() "
f"(mix in DailyLogPathMixin / SkillPathMixin, or override directly)"
)
class DailyLogPathMixin:
"""Path strategy for daily-log files.
Files live at ``<SCOPE_DIR>/<scope_id>/<DIR_NAME>/<FILE_PREFIX>-<YYYY-MM-DD>.md``.
Subclasses must inherit a scope mixin (``UserScopedFrontmatter`` /
``AgentScopedFrontmatter``) supplying ``SCOPE_DIR``, and must declare
their own ``DIR_NAME`` / ``FILE_PREFIX`` ClassVars.
Place **this mixin first** so Python's MRO resolves ``path_glob()`` to
the mixin's concrete implementation rather than
:meth:`BaseFrontmatter.path_glob`'s ``NotImplementedError`` stub::
class EpisodeDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
DIR_NAME: ClassVar[str] = "episodes"
FILE_PREFIX: ClassVar[str] = "episode"
...
"""
DIR_NAME: ClassVar[str]
FILE_PREFIX: ClassVar[str]
SCOPE_DIR: ClassVar[str]
@classmethod
def path_glob(cls) -> str:
# Leading ``*/*/`` matches the <app>/<project> scope prefix that
# precedes every user-visible dir; the scanner's ``root.glob`` is
# anchored at root, so the prefix is mandatory (without it nothing
# matches), and the watcher's right-anchored ``PurePosixPath.match``
# agrees on the same shape.
return f"*/*/{cls.SCOPE_DIR}/*/{cls.DIR_NAME}/{cls.FILE_PREFIX}-*.md"
class SkillPathMixin:
"""Path strategy for skill-directory files.
Each skill lives at ``<SCOPE_DIR>/<scope_id>/<SKILLS_CONTAINER_NAME>/
<SKILL_DIR_PREFIX><skill_name>/<SKILL_MAIN_FILENAME>``. The glob covers
every skill's main file; sibling ``references/*.md`` and ``scripts/*``
are excluded (they ride alongside the main file and the cascade
daemon rebuilds the index column by concatenation, see
:class:`AgentSkillFrontmatter`'s docstring).
Place **this mixin first** so MRO resolves ``path_glob()`` here::
class AgentSkillFrontmatter(SkillPathMixin, AgentScopedFrontmatter):
SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
...
"""
SKILLS_CONTAINER_NAME: ClassVar[str]
SKILL_DIR_PREFIX: ClassVar[str]
SKILL_MAIN_FILENAME: ClassVar[str]
SCOPE_DIR: ClassVar[str]
@classmethod
def path_glob(cls) -> str:
# Leading ``*/*/`` matches the <app>/<project> scope prefix.
return (
f"*/*/{cls.SCOPE_DIR}/*/{cls.SKILLS_CONTAINER_NAME}/"
f"{cls.SKILL_DIR_PREFIX}*/{cls.SKILL_MAIN_FILENAME}"
)
class ProfilePathMixin:
"""Path strategy for single-file profile markdown.
Profiles live at ``<SCOPE_DIR>/<scope_id>/<PROFILE_FILENAME>`` —
one fixed-name file directly under the scope's owner directory, no
intermediate ``<dir>/`` segment (unlike daily-logs) and no per-name
subdir (unlike skills). Subclasses must inherit a scope mixin
(``UserScopedFrontmatter`` / ``AgentScopedFrontmatter``) supplying
``SCOPE_DIR`` and declare their own ``PROFILE_FILENAME``.
Place **this mixin first** so MRO resolves ``path_glob()`` here::
class UserProfileFrontmatter(ProfilePathMixin, UserScopedFrontmatter):
PROFILE_FILENAME: ClassVar[str] = "user.md"
...
"""
PROFILE_FILENAME: ClassVar[str]
SCOPE_DIR: ClassVar[str]
@classmethod
def path_glob(cls) -> str:
# Leading ``*/*/`` matches the <app>/<project> scope prefix.
return f"*/*/{cls.SCOPE_DIR}/*/{cls.PROFILE_FILENAME}"
class UserScopedFrontmatter(BaseFrontmatter):
"""Records that belong to a single user (track = ``user``).
The frontmatter only carries the *file-level* scope (``user_id``,
which the path itself already expresses); business attributes like
``group_id`` live inside each entry's structured body — see
:class:`StructuredEntry` in :mod:`.entries`.
"""
SCOPE_DIR: ClassVar[str] = "users"
user_id: str
track: Literal["user"] = "user"
class AgentScopedFrontmatter(BaseFrontmatter):
"""Records that belong to a single agent (track = ``agent``).
Same scope-vs-business split as :class:`UserScopedFrontmatter`:
``agent_id`` is the file-level scope; ``group_id`` etc. ride on
each entry, not on the file frontmatter.
"""
SCOPE_DIR: ClassVar[str] = "agents"
agent_id: str
track: Literal["agent"] = "agent"

View File

@ -0,0 +1,31 @@
"""Parsed-markdown data type.
The output shape of :class:`MarkdownReader` is held here, separate
from the reader implementation: callers that only consume parse
results don't need to import the reader machinery, and downstream
modules (writer, business readers) can produce :class:`ParsedMarkdown`
without going through ``MarkdownReader.read`` if they already hold
the pieces.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from .entries import Entry
@dataclass(frozen=True)
class ParsedMarkdown:
"""A markdown document after parsing.
Attributes:
frontmatter: Parsed YAML mapping (empty dict if no frontmatter block).
body: Document text after the frontmatter block; not entry-stripped.
entries: Marker-delimited entries discovered inside ``body``.
"""
frontmatter: dict[str, Any]
body: str
entries: list[Entry] = field(default_factory=list)

View File

@ -0,0 +1,42 @@
"""Markdown file reader.
Loads a markdown document and splits it into:
1. ``frontmatter`` — parsed YAML (empty dict if absent)
2. ``body`` — raw text after the closing ``---`` delimiter
3. ``entries`` — marker-delimited spans inside ``body``
The reader is purely parsing; it does not validate frontmatter shape,
entry content, or cross-references. Higher layers add business-aware
checks. The :class:`ParsedMarkdown` data type lives in :mod:`.parsed`.
``parse`` is sync (pure in-memory string processing). ``read`` is async
and uses :class:`anyio.Path` so file I/O does not block the event loop.
"""
from __future__ import annotations
from pathlib import Path
import anyio
from .entries import split_entries
from .frontmatter import parse_frontmatter
from .parsed import ParsedMarkdown
class MarkdownReader:
"""Parse markdown files / strings into :class:`ParsedMarkdown`."""
@staticmethod
def parse(text: str) -> ParsedMarkdown:
"""Parse already-loaded text (no IO)."""
meta, body = parse_frontmatter(text)
entries = split_entries(body)
return ParsedMarkdown(frontmatter=meta, body=body, entries=entries)
@staticmethod
async def read(path: Path) -> ParsedMarkdown:
"""Read the file at ``path`` and parse its content."""
text = await anyio.Path(path).read_text(encoding="utf-8")
return MarkdownReader.parse(text)

View File

@ -0,0 +1,269 @@
"""Markdown file writer with atomic write semantics.
Atomicity is provided by writing to a same-directory temp file
(``.<name>.tmp.<uuid>``) and using :func:`os.replace` to rename it onto
the target. Keeping the temp file in the same directory guarantees the
rename is on the same filesystem (POSIX rename is atomic only within a
single fs).
All public methods are async. File I/O (``read_text`` / ``write_text``
/ ``mkdir``) goes through :class:`anyio.Path`; the few syscalls without
a native async equivalent (``os.fsync`` / ``os.replace`` / ``unlink``
in the cleanup path) are offloaded via :func:`anyio.to_thread.run_sync`.
In-process per-path locking
---------------------------
:meth:`append_entry` / :meth:`append_entries` are read-modify-write of
the whole file (load frontmatter+body, merge an entry block, atomic
write the result). The atomic write itself is safe, but the read→write
window crosses ``await`` points. Concurrent asyncio tasks targeting the
same path would otherwise lose-update each other (both read N entries,
both produce N+1, second write overwrites the first → 1 entry lost).
To prevent this, an in-process per-path :class:`asyncio.Lock` is held
across the entire read-modify-write sequence. Lock objects live on the
writer instance (not class-level) so they bind to the event loop active
when the writer was constructed — this avoids the
"Lock bound to different loop" failure mode that surfaces when
pytest-asyncio rebuilds the loop between tests but module-level writer
singletons leak Lock objects across boundaries.
Process-level coordination (multi-process writers against the same
memory-root) remains the job of
:func:`everos.core.persistence.locking.memory_root_lock`, which uses
``fcntl.flock``. The two locks compose: per-path async lock serialises
tasks within one process, ``memory_root_lock`` serialises processes
against each other.
"""
from __future__ import annotations
import asyncio
import contextlib
import os
import uuid
from collections.abc import Mapping, Sequence
from pathlib import Path
from typing import Any
import anyio
from ..memory_root import MemoryRoot
from .entries import EntryId
from .frontmatter import dump_frontmatter
from .reader import MarkdownReader
class MarkdownWriter:
"""Atomic writer for markdown files inside a memory-root.
The ``memory_root`` reference is held to enable future enforcement that
targets stay within the configured root; current writes do not depend on
it for the rename itself (same-dir temp file).
"""
def __init__(self, memory_root: MemoryRoot) -> None:
self._memory_root = memory_root
# Per-path async lock registry. ``setdefault`` is GIL-atomic, so
# concurrent callers race only on the dict insert (resolved by
# ``setdefault`` returning the existing value), not on the Lock.
# Plain dict (not WeakValueDictionary): a Lock with pending waiters
# must outlive any task awaiting it; ref-counted GC would race with
# those waiters. See Python bpo-28427 for the WeakValueDictionary
# multithreading hazard that bites the weak-ref approach.
self._path_locks: dict[Path, asyncio.Lock] = {}
@property
def memory_root(self) -> MemoryRoot:
return self._memory_root
def lock_for(self, path: Path) -> asyncio.Lock:
"""Return the per-path lock; create on first use.
Public so that higher-level writers (e.g. :class:`BaseDailyWriter`)
can serialise their own multi-step ``read → compute → write``
sequences against this writer's single-step ``append`` paths.
Pair with :meth:`_append_entries_unlocked` to avoid reentrant
re-acquisition of the same lock from within an already-locked
critical section (``asyncio.Lock`` is *not* reentrant).
"""
# Resolve to an absolute canonical path so aliases (relative vs.
# absolute, symlinks) share the same lock object.
key = Path(path).resolve()
lock = self._path_locks.get(key)
if lock is None:
lock = asyncio.Lock()
self._path_locks[key] = lock
return lock
async def write(self, path: Path, content: str) -> Path:
"""Atomically write ``content`` to ``path``.
Steps:
1. ``mkdir -p`` the parent directory.
2. Write to ``<parent>/.<name>.tmp.<uuid>``.
3. ``flush`` + ``fsync`` the temp file.
4. ``os.replace`` the temp file onto ``path`` (atomic on POSIX).
Returns:
``path`` (resolved as written).
"""
target = Path(path)
await anyio.Path(target.parent).mkdir(parents=True, exist_ok=True)
tmp = target.parent / f".{target.name}.tmp.{uuid.uuid4().hex}"
try:
await anyio.to_thread.run_sync(_write_and_fsync, tmp, content)
await anyio.to_thread.run_sync(os.replace, tmp, target)
except Exception:
# Best-effort cleanup of the staging file on failure.
await _unlink_quiet(tmp)
raise
return target
async def write_markdown(
self,
path: Path,
*,
frontmatter: Mapping[str, Any] | None = None,
body: str = "",
) -> Path:
"""Assemble ``frontmatter`` + ``body`` then atomic-write to ``path``."""
head = dump_frontmatter(frontmatter or {})
return await self.write(path, head + body)
async def append_entry(
self,
path: Path,
*,
entry_body: str,
entry_id: EntryId,
frontmatter_updates: Mapping[str, Any] | None = None,
) -> Path:
"""Append a single entry block to a markdown file, merging frontmatter.
Convenience wrapper around :meth:`append_entries` for single-entry
callers. See that method for full semantics.
Args:
path: Target markdown file. Created if missing.
entry_body: Content between the open and close markers.
One leading and trailing newline are added automatically.
entry_id: The id to stamp on this entry. The caller normally
builds it with :meth:`EntryId.next_for`.
frontmatter_updates: Mapping shallow-merged into existing
frontmatter (later wins). ``None`` skips the merge.
Returns:
``path`` (resolved as written).
"""
return await self.append_entries(
path,
[(entry_body, entry_id)],
frontmatter_updates=frontmatter_updates,
)
async def append_entries(
self,
path: Path,
entries: Sequence[tuple[str, EntryId]],
*,
frontmatter_updates: Mapping[str, Any] | None = None,
) -> Path:
"""Append ``N`` entry blocks in a single locked read-modify-write cycle.
Compared with calling :meth:`append_entry` ``N`` times, this:
* Performs one file read + one file write instead of ``N`` of each
(IO complexity drops from ``O(N²)`` to ``O(N)`` when the file
already holds many entries).
* Holds the per-path lock for one short critical section instead of
``N`` separate acquisitions.
* Updates ``frontmatter`` once at the end (no intermediate
``entry_count`` flapping).
The caller assigns and supplies all :class:`EntryId` values — see
:meth:`append_entry` for the rationale. The order in ``entries`` is
the order the blocks land in the file.
Args:
path: Target markdown file. Created if missing.
entries: ``(entry_body, entry_id)`` pairs to append, in order.
Empty sequence is allowed; the file is still touched for
frontmatter updates if any are supplied.
frontmatter_updates: Mapping shallow-merged into existing
frontmatter once after all entries are appended.
Returns:
``path`` (resolved as written).
"""
target = Path(path)
async with self.lock_for(target):
return await self._append_entries_unlocked(
target,
entries,
frontmatter_updates=frontmatter_updates,
)
async def _append_entries_unlocked(
self,
path: Path,
entries: Sequence[tuple[str, EntryId]],
*,
frontmatter_updates: Mapping[str, Any] | None = None,
) -> Path:
"""Same as :meth:`append_entries` but assumes the caller already
holds :meth:`lock_for` ``(path)``.
For use by higher-level writers that perform a multi-step
``read → compute eid → write`` sequence and need to keep the lock
held across the read and the write. Public ``append_entries`` /
``append_entry`` always wrap this with the lock.
Reentrant re-acquisition is unsafe — ``asyncio.Lock`` is not
reentrant, so calling this without holding the lock yourself
breaks the safety contract.
"""
target = Path(path)
# 1. Load existing markdown (or initialise empty).
if await anyio.Path(target).is_file():
parsed = await MarkdownReader.read(target)
meta: dict[str, Any] = dict(parsed.frontmatter)
body = parsed.body
else:
meta = {}
body = ""
# 2. Shallow-merge frontmatter updates.
if frontmatter_updates:
meta.update(frontmatter_updates)
# 3. Append all entry blocks in order.
if entries:
if body and not body.endswith("\n"):
body += "\n"
appended_blocks: list[str] = []
for entry_body, entry_id in entries:
eid_str = entry_id.format()
appended_blocks.append(
f"<!-- entry:{eid_str} -->\n{entry_body}\n"
f"<!-- /entry:{eid_str} -->\n"
)
body = body + "".join(appended_blocks)
# 4. Atomic write.
return await self.write_markdown(target, frontmatter=meta, body=body)
def _write_and_fsync(tmp: Path, content: str) -> None:
"""Sync helper: write + fsync the staging file. Offloaded to a thread."""
with open(tmp, "w", encoding="utf-8") as fh:
fh.write(content)
fh.flush()
os.fsync(fh.fileno())
async def _unlink_quiet(tmp: Path) -> None:
"""Best-effort unlink — swallow OSError so the original exception wins."""
with contextlib.suppress(OSError):
await anyio.Path(tmp).unlink(missing_ok=True)

View File

@ -0,0 +1,243 @@
"""memory-root path manager.
Single root directory holding all persisted memory:
User-visible (no dot prefix, edited by humans / agents):
agents/ per-agent records
users/ per-user records
knowledge/ global shared knowledge
System-managed (dotfile prefix, hidden by default in ls / Finder):
.index/ derived indexes (rebuildable from markdown)
sqlite/ system.db (+ WAL/SHM), ome.db, ome.aps.db
lancedb/ LanceDB tables
.tmp/ atomic-write staging directory
.lock single-process lock anchor (created on demand by
``memory_root_lock``)
User-editable (at the root):
ome.toml OME strategy overrides (hot-reloaded)
The cascade queue, LSN watermark, and change audit all live in
``system.db`` (table ``md_change_state``), not in separate dotfiles.
The default location and tunables come from :class:`everos.config.Settings`
(loaded from ``config/default.toml`` + ``EVEROS_*`` environment variables);
:meth:`MemoryRoot.default` resolves the configured path.
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
# ── app / project directory-name convention ──────────────────────────────────
#
# A memory root is partitioned by ``<app>/<project>`` *before* the user-visible
# scope dirs (``agents`` / ``users`` / ``knowledge``), so memory for different
# (app, project) pairs never shares a directory. The reserved id ``"default"``
# materialises as ``default_app`` / ``default_project`` on disk (rather than a
# bare ``default``) so a default space is visually distinct from a user-named
# directory; every other id maps to itself.
#
# The mapping is symmetric: the cascade path parser reverses it (see
# :func:`app_id_from_dir`) to recover the ids from an on-disk path. The write
# side (here) and the read side (cascade) MUST stay in lockstep, or rebuilt
# rows carry app/project that disagree with what was written. ``default_app`` /
# ``default_project`` are therefore reserved directory names.
_DEFAULT_SCOPE_ID = "default"
_DEFAULT_APP_DIR = "default_app"
_DEFAULT_PROJECT_DIR = "default_project"
# Path to the shipped OME override template; copied to ``<root>/ome.toml`` on
# first ``ensure()`` so users have a real file to edit instead of having to
# create one from scratch. ``parents[2]`` is the ``src/everos/`` package root
# (memory_root.py sits at ``core/persistence/memory_root.py``).
_OME_TEMPLATE_PATH = Path(__file__).parents[2] / "config" / "default_ome.toml"
def app_dir_name(app_id: str) -> str:
"""Map an ``app_id`` to its on-disk directory name."""
return _DEFAULT_APP_DIR if app_id == _DEFAULT_SCOPE_ID else app_id
def project_dir_name(project_id: str) -> str:
"""Map a ``project_id`` to its on-disk directory name."""
return _DEFAULT_PROJECT_DIR if project_id == _DEFAULT_SCOPE_ID else project_id
def app_id_from_dir(dir_name: str) -> str:
"""Inverse of :func:`app_dir_name` — recover the ``app_id`` from a dir name."""
return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_APP_DIR else dir_name
def project_id_from_dir(dir_name: str) -> str:
"""Inverse of :func:`project_dir_name` — recover the ``project_id``."""
return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_PROJECT_DIR else dir_name
@dataclass(frozen=True, init=False)
class MemoryRoot:
"""Path manager for a memory-root directory.
Constructor accepts any path-like (``str`` or ``Path``); it is normalised
to an absolute, resolved ``Path`` so equality and hashing are stable
regardless of how the caller spells the path. ``init=False`` is paired
with a hand-written ``__init__`` so the input type (``Path | str``) is
decoupled from the stored field type (``Path``) — stdlib dataclass has
no converter slot, and Pyright would otherwise reject ``MemoryRoot(s)``
where ``s`` is a ``str``.
"""
root: Path
def __init__(self, root: Path | str) -> None:
# ``frozen=True`` forbids attribute assignment, so go through
# ``object.__setattr__`` to install the normalised Path field.
resolved = Path(root).expanduser().resolve()
object.__setattr__(self, "root", resolved)
@classmethod
def default(cls) -> MemoryRoot:
"""Return the memory-root from :class:`everos.config.Settings`.
The effective default lives in ``config/default.toml`` (``[memory]
root``); environment variable ``EVEROS_MEMORY__ROOT`` overrides it.
"""
# Lazy import to keep this module dependency-free at import time.
from everos.config import load_settings
return cls(load_settings().memory.root)
# ── User-visible (partitioned by app / project) ──────────────────────────
#
# These take ``(app_id, project_id)`` because the scope dirs hang off the
# ``<root>/<app>/<project>/`` prefix; they are request-level inputs, never
# instance state. Both default to ``"default"`` so call sites that don't
# yet carry scope still resolve to the default space.
def agents_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
"""``<root>/<app>/<project>/agents/`` — per-agent records."""
return (
self.root / app_dir_name(app_id) / project_dir_name(project_id) / "agents"
)
def users_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
"""``<root>/<app>/<project>/users/`` — per-user records."""
return self.root / app_dir_name(app_id) / project_dir_name(project_id) / "users"
def knowledge_dir(
self, app_id: str = "default", project_id: str = "default"
) -> Path:
"""``<root>/<app>/<project>/knowledge/`` — shared knowledge."""
return (
self.root
/ app_dir_name(app_id)
/ project_dir_name(project_id)
/ "knowledge"
)
# ── System-managed (dotfiles) ───────────────────────────────────────────
@property
def index_dir(self) -> Path:
"""``<root>/.index/`` — derived index root."""
return self.root / ".index"
@property
def lancedb_dir(self) -> Path:
"""``<root>/.index/lancedb/`` — LanceDB table root."""
return self.index_dir / "lancedb"
@property
def sqlite_dir(self) -> Path:
"""``<root>/.index/sqlite/`` — SQLite system DB root.
Holds ``system.db`` plus its sidecars (``-wal`` / ``-shm`` in WAL
mode). Symmetric with :attr:`lancedb_dir`.
"""
return self.index_dir / "sqlite"
@property
def system_db(self) -> Path:
"""``<root>/.index/sqlite/system.db`` — SQLite DB for system
state, audit log, task queue, LSN watermark, and other metadata.
"""
return self.sqlite_dir / "system.db"
@property
def ome_db(self) -> Path:
"""``<root>/.index/sqlite/ome.db`` — SQLite DB backing the Offline
Memory Engine's own state: run records, counter store, idle store.
Symmetric with :attr:`system_db`.
"""
return self.sqlite_dir / "ome.db"
@property
def ome_aps_db(self) -> Path:
"""``<root>/.index/sqlite/ome.aps.db`` — SQLite DB holding the
APScheduler jobstore for the Offline Memory Engine. Split from
:attr:`ome_db` so APS's sync SQLAlchemy writer and OME's async
aiosqlite writer never contend for the same sqlite file lock.
"""
return self.sqlite_dir / "ome.aps.db"
@property
def ome_config(self) -> Path:
"""``<root>/ome.toml`` — user-editable OME strategy overrides.
Drop a file here to toggle strategies on/off or tweak per-strategy
knobs (max_retries, gate, cron …) without restarting the server.
The engine watches this file and hot-reloads changes within ~2 s.
Example to disable foresight and user-profile extraction::
[strategies.extract_foresight]
enabled = false
[strategies.extract_user_profile]
enabled = false
"""
return self.root / "ome.toml"
@property
def lock_file(self) -> Path:
"""``<root>/.lock`` — single-process exclusive lock anchor."""
return self.root / ".lock"
@property
def tmp_dir(self) -> Path:
"""``<root>/.tmp/`` — staging directory for batch / multi-step writes.
Note:
``MarkdownWriter`` does *not* use this for atomic single-file
writes; it uses a same-directory temp file to guarantee a
same-filesystem rename. This directory is reserved for callers
that need scratch space outside any single target directory.
"""
return self.root / ".tmp"
# ── Operations ──────────────────────────────────────────────────────────
def ensure(self) -> None:
"""Create the memory-root and the runtime-required dotfile dirs.
User-visible directories (``agents/`` / ``users/`` / ``knowledge/``)
are *not* pre-created — they appear on first write of their records.
Only directories the runtime infrastructure requires are made:
<root>/
<root>/.index/
<root>/.index/sqlite/
<root>/.index/lancedb/
<root>/.tmp/
"""
self.root.mkdir(parents=True, exist_ok=True)
self.index_dir.mkdir(parents=True, exist_ok=True)
self.sqlite_dir.mkdir(parents=True, exist_ok=True)
self.lancedb_dir.mkdir(parents=True, exist_ok=True)
self.tmp_dir.mkdir(parents=True, exist_ok=True)
# Materialize the OME override template on first run; existence-only
# check preserves any edits the user has already made.
if not self.ome_config.exists():
self.ome_config.write_bytes(_OME_TEMPLATE_PATH.read_bytes())

View File

@ -0,0 +1,42 @@
"""SQLite async persistence (SQLModel + SQLAlchemy 2.0 + aiosqlite).
External usage (engine + sessions):
from everos.core.persistence.sqlite import (
create_system_engine, create_session_factory, session_scope,
)
External usage (ORM model basics — re-exported from sqlmodel):
from everos.core.persistence.sqlite import (
SQLModel, Field, Relationship, BaseTable,
)
External usage (generic CRUD repository base):
from everos.core.persistence.sqlite import RepoBase
The ``system_db`` is the everos
``<memory_root>/.index/sqlite/system.db`` SQLite file holding system
state, audit log, task queue, LSN watermark, and other metadata.
"""
# Re-export key sqlmodel symbols so business code has a single canonical
# entry point (``everos.core.persistence.sqlite``) for ORM authoring.
from sqlmodel import Field as Field
from sqlmodel import Relationship as Relationship
from sqlmodel import SQLModel as SQLModel
from .base import BaseTable as BaseTable
from .engine import create_system_engine as create_system_engine
from .repository import RepoBase as RepoBase
from .session import create_session_factory as create_session_factory
from .session import session_scope as session_scope
__all__ = [
"BaseTable",
"Field",
"Relationship",
"RepoBase",
"SQLModel",
"create_session_factory",
"create_system_engine",
"session_scope",
]

View File

@ -0,0 +1,112 @@
"""Common SQLModel base for everos tables.
:class:`BaseTable` adds ``created_at`` / ``updated_at`` columns. The
``updated_at`` column auto-refreshes on UPDATE through SA's ``onupdate``
hook (no explicit assignment needed in business code).
The **two-zone storage-UTC discipline** is enforced by a SQLAlchemy
:class:`TypeDecorator` (:class:`UtcDateTimeColumn`) used as the SQL
column type for every datetime field:
* **on write** — ``process_bind_param`` converts every datetime to
aware UTC before SQLAlchemy emits the bound parameter. This covers
*every* SQLAlchemy write path uniformly:
- ORM ``session.add()`` / ``session.merge()`` (unit-of-work flush)
- Core ``session.execute(insert(...).values(...))``
- Core ``session.execute(update(...).values(...))``
- Bulk ``bulk_insert_mappings`` / ``bulk_save_objects``
- Raw SQL with bound parameters
Reaching into the column type is the only place SQLAlchemy guarantees
*every* write path passes through. Mapper events (``before_insert`` /
``before_update``) only fire on the ORM unit-of-work path and would
silently miss Core statements — which :mod:`everos.infra.persistence
.sqlite.repos.md_change_state` uses heavily.
* **on read** — ``process_result_value`` re-attaches ``tzinfo=UTC`` to
every naive datetime returned from SQLite (which has no native tz
storage and always returns naive). Callers therefore never observe a
naive datetime regardless of which read API they use.
Subclass with ``table=True`` to declare a real SQLite table::
from sqlmodel import Field
class Sender(BaseTable, table=True):
id: int | None = Field(default=None, primary_key=True)
name: str
"""
from __future__ import annotations
import datetime as _dt
from typing import Any
from sqlalchemy import DateTime
from sqlalchemy import types as sa_types
from sqlmodel import Field, SQLModel
from everos.component.utils.datetime import UtcDatetime, ensure_utc, get_utc_now
class UtcDateTimeColumn(sa_types.TypeDecorator[_dt.datetime]):
"""SQLAlchemy column type enforcing storage-UTC on every read/write.
Implementation:
* ``impl = DateTime`` — uses the dialect's standard DateTime SQL type
(TEXT ISO-8601 on SQLite; ``TIMESTAMP`` on Postgres etc.).
* ``process_bind_param`` — write hook. Awares → ``astimezone(UTC)``;
naives → assumed already UTC (storage-boundary convention; see
:func:`ensure_utc` docstring); ``None`` passes through.
* ``process_result_value`` — read hook. Naive ``datetime`` →
``replace(tzinfo=UTC)``; aware passes through unchanged.
``cache_ok = True`` — SQLAlchemy can safely cache statement
compilations using this type (no per-instance mutable state).
"""
impl = DateTime
cache_ok = True
def process_bind_param(
self, value: _dt.datetime | None, _dialect: Any
) -> _dt.datetime | None:
if value is None:
return None
if not isinstance(value, _dt.datetime):
return value
return ensure_utc(value)
def process_result_value(
self, value: _dt.datetime | None, _dialect: Any
) -> _dt.datetime | None:
if value is None:
return None
if isinstance(value, _dt.datetime) and value.tzinfo is None:
return value.replace(tzinfo=_dt.UTC)
return value
class BaseTable(SQLModel):
"""Mixin providing ``created_at`` / ``updated_at`` columns.
Both default to :func:`get_utc_now` on INSERT.
``updated_at`` is auto-refreshed by SQLAlchemy on every UPDATE via the
``onupdate`` hook — do not set it manually unless overriding intentionally.
Both columns use :class:`UtcDateTimeColumn` as the SQL column type
so storage-UTC is enforced **at the SQLAlchemy bind layer** on every
write path (ORM + Core + bulk + raw bound params).
"""
created_at: UtcDatetime = Field(
default_factory=get_utc_now,
sa_type=UtcDateTimeColumn,
)
updated_at: UtcDatetime = Field(
default_factory=get_utc_now,
sa_type=UtcDateTimeColumn,
sa_column_kwargs={"onupdate": get_utc_now},
)

View File

@ -0,0 +1,74 @@
"""Async SQLAlchemy engine factory + per-connection PRAGMA listener.
The engine connects through ``aiosqlite`` (SA URL ``sqlite+aiosqlite://``).
PRAGMAs are *per-connection* — they must be re-applied every time the
SA pool opens a new connection. We attach a ``connect`` event listener on
the engine's underlying sync engine for that purpose.
"""
from __future__ import annotations
from pathlib import Path
from sqlalchemy import event
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
from everos.config import SqliteSettings
def create_system_engine(
db_path: Path,
sqlite_settings: SqliteSettings,
*,
echo: bool = False,
) -> AsyncEngine:
"""Create an async SQLAlchemy engine for the everos system DB.
``MemoryRoot.system_db`` is the conventional path; the DB holds system
state, audit log, task queue, LSN watermark, and other metadata.
Args:
db_path: Filesystem path to the system DB file. Parent directory is
created if missing.
sqlite_settings: Tunables (journal_mode, synchronous, foreign_keys,
temp_store, busy_timeout, journal_size_limit, cache_size).
echo: When ``True``, SQLAlchemy logs every statement (development).
Returns:
An :class:`AsyncEngine` ready for use with :class:`AsyncSession`.
"""
db_path.parent.mkdir(parents=True, exist_ok=True)
# Three slashes = relative path; four slashes = absolute. ``str(db_path)``
# of an absolute Path begins with ``/`` so the f-string yields four.
url = f"sqlite+aiosqlite:///{db_path}"
engine = create_async_engine(url, echo=echo, future=True)
_register_pragma_listener(engine, sqlite_settings)
return engine
def _register_pragma_listener(
engine: AsyncEngine,
sqlite_settings: SqliteSettings,
) -> None:
"""Attach a ``connect`` listener that applies PRAGMAs on every new connection."""
@event.listens_for(engine.sync_engine, "connect")
def _apply_pragmas(dbapi_connection, _connection_record) -> None: # type: ignore[no-untyped-def]
cursor = dbapi_connection.cursor()
try:
cursor.execute(f"PRAGMA journal_mode={sqlite_settings.journal_mode}")
cursor.execute(f"PRAGMA synchronous={sqlite_settings.synchronous}")
cursor.execute(
f"PRAGMA foreign_keys={'ON' if sqlite_settings.foreign_keys else 'OFF'}"
)
cursor.execute(f"PRAGMA temp_store={sqlite_settings.temp_store}")
cursor.execute(f"PRAGMA busy_timeout={sqlite_settings.busy_timeout_ms}")
cursor.execute(
f"PRAGMA journal_size_limit={sqlite_settings.journal_size_limit_bytes}"
)
# cache_size: negative = KB, positive = pages.
cursor.execute(f"PRAGMA cache_size=-{sqlite_settings.cache_size_kb}")
finally:
cursor.close()

View File

@ -0,0 +1,166 @@
"""Generic CRUD repository for SQLModel-backed tables.
``RepoBase`` is a pure generic CRUD helper that sits alongside
:class:`BaseTable`. It knows nothing about a storage runtime — concrete
repos either pass ``session_factory`` explicitly (typical in tests) or
override :meth:`_factory_lookup` to pull the singleton from their
storage manager (typical in :mod:`everos.infra.persistence.sqlite.repos`).
Each method opens its own ``session_scope`` (auto rollback on exception,
session closed at end). For multi-step transactional work, use the
session factory directly via :attr:`session_factory`.
"""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from sqlmodel import SQLModel, select
from .session import session_scope
class RepoBase[T: SQLModel]:
"""Generic CRUD repository for one SQLModel table.
Subclass and bind to a model. Two ways to provide the session factory:
1. **Explicit (tests / DI)** — pass it to ``__init__``::
repo = SenderRepo(session_factory)
2. **Lazy hook (production singletons)** — override
:meth:`_factory_lookup` so the repo can be instantiated as a
module-level singleton with no factory bound yet::
class _SenderRepo(RepoBase[Sender]):
model = Sender
def _factory_lookup(self):
from everos.infra.persistence.sqlite.sqlite_manager import (
get_session_factory,
)
return get_session_factory()
sender_repo = _SenderRepo()
await sender_repo.add(Sender(name="alice"))
"""
model: type[T]
def __init__(
self,
session_factory: async_sessionmaker[AsyncSession] | None = None,
) -> None:
"""Bind to a session factory; if ``None``, defer to ``_factory_lookup``."""
self._factory_override = session_factory
def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
"""Resolve a session factory on first use. Override in subclass.
``RepoBase`` itself has no idea where the runtime singleton lives
— that knowledge belongs to the infra subclass. The default raises
so a missing override is loud rather than silently broken.
"""
raise NotImplementedError(
f"{type(self).__name__}: pass session_factory= to __init__ "
"or override _factory_lookup() to wire the storage manager."
)
@property
def _factory(self) -> async_sessionmaker[AsyncSession]:
if self._factory_override is not None:
return self._factory_override
return self._factory_lookup()
@property
def session_factory(self) -> async_sessionmaker[AsyncSession]:
"""Underlying session factory (for multi-step transactions)."""
return self._factory
# ── Create ─────────────────────────────────────────────────────────────
async def add(self, instance: T) -> T:
"""Insert one row, commit, refresh, return the instance."""
async with session_scope(self._factory) as s:
s.add(instance)
await s.commit()
await s.refresh(instance)
return instance
async def add_many(self, instances: Sequence[T]) -> list[T]:
"""Insert many rows in one transaction."""
items = list(instances)
async with session_scope(self._factory) as s:
s.add_all(items)
await s.commit()
for inst in items:
await s.refresh(inst)
return items
# ── Read ───────────────────────────────────────────────────────────────
async def get_by_id(self, id_value: Any) -> T | None:
"""Get a row by primary key. Returns ``None`` if not found."""
async with session_scope(self._factory) as s:
return await s.get(self.model, id_value)
async def list_all(self) -> list[T]:
"""Return all rows (no filter, no order)."""
async with session_scope(self._factory) as s:
stmt = select(self.model)
return list((await s.execute(stmt)).scalars().all())
async def find_where(self, **filters: Any) -> list[T]:
"""Equality-only filtering, e.g. ``find_where(name="alice", active=True)``."""
async with session_scope(self._factory) as s:
stmt = select(self.model).filter_by(**filters)
return list((await s.execute(stmt)).scalars().all())
async def find_one(self, **filters: Any) -> T | None:
"""First row matching ``filters`` (no ordering); ``None`` if not found."""
async with session_scope(self._factory) as s:
stmt = select(self.model).filter_by(**filters).limit(1)
return (await s.execute(stmt)).scalars().first()
async def count(self) -> int:
"""Total row count (no filter)."""
async with session_scope(self._factory) as s:
stmt = select(func.count()).select_from(self.model)
return int((await s.execute(stmt)).scalar_one())
# ── Update ─────────────────────────────────────────────────────────────
async def update(self, instance: T) -> T:
"""Persist changes on an instance whose primary key already exists.
Uses ``session.merge`` so detached / fresh-from-Pydantic instances
are reattached. ``BaseTable.updated_at`` auto-bumps via SA's
``onupdate`` hook.
"""
async with session_scope(self._factory) as s:
merged = await s.merge(instance)
await s.commit()
await s.refresh(merged)
return merged
# ── Delete ─────────────────────────────────────────────────────────────
async def delete(self, instance: T) -> None:
"""Delete by instance (primary key must be set)."""
async with session_scope(self._factory) as s:
merged = await s.merge(instance)
await s.delete(merged)
await s.commit()
async def delete_by_id(self, id_value: Any) -> bool:
"""Delete by primary key. Returns ``True`` if a row was removed."""
async with session_scope(self._factory) as s:
instance = await s.get(self.model, id_value)
if instance is None:
return False
await s.delete(instance)
await s.commit()
return True

View File

@ -0,0 +1,45 @@
"""Async session factory + session scope context manager."""
from __future__ import annotations
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
def create_session_factory(engine: AsyncEngine) -> async_sessionmaker[AsyncSession]:
"""Build an :class:`async_sessionmaker` bound to ``engine``.
``expire_on_commit=False`` keeps attribute access on instances valid
after commit, which is the conventional setup for async SA usage.
"""
return async_sessionmaker(
bind=engine,
class_=AsyncSession,
expire_on_commit=False,
)
@asynccontextmanager
async def session_scope(
session_factory: async_sessionmaker[AsyncSession],
) -> AsyncIterator[AsyncSession]:
"""Yield an :class:`AsyncSession` inside a try/rollback/close block.
The session is rolled back on any exception in the ``async with`` body,
then closed. Callers are responsible for calling ``await session.commit()``
on success.
Usage:
factory = create_session_factory(engine)
async with session_scope(factory) as session:
session.add(some_record)
await session.commit()
"""
async with session_factory() as session:
try:
yield session
except Exception:
await session.rollback()
raise

View File

@ -0,0 +1,5 @@
"""Presentation layer.
Translates external requests (CLI / HTTP) into service-layer calls.
Contains no business logic.
"""

View File

@ -0,0 +1,11 @@
"""HTTP REST entry point (FastAPI), routed by resource.
External usage:
from everos.entrypoints.api import create_app
app = create_app()
"""
from .app import create_app as create_app
__all__ = ["create_app"]

View File

@ -0,0 +1,124 @@
"""FastAPI application factory.
Wires CORS + the project's middleware stack + global exception handler +
lifespan, and registers the public routes (``/health``, ``/metrics``).
"""
from __future__ import annotations
import os
from fastapi import FastAPI, HTTPException
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from everos.core.lifespan import (
LifespanProvider,
MetricsLifespanProvider,
build_lifespan,
)
from everos.core.middleware import (
DEFAULT_CORS_ALLOW_CREDENTIALS,
DEFAULT_CORS_ALLOW_HEADERS,
DEFAULT_CORS_ALLOW_METHODS,
DEFAULT_CORS_ORIGINS,
ProfileMiddleware,
PrometheusMiddleware,
global_exception_handler,
)
from everos.core.observability.logging import get_logger
from .lifespans import (
CascadeLifespanProvider,
LanceDBLifespanProvider,
LLMLifespanProvider,
OmeLifespanProvider,
SqliteLifespanProvider,
)
from .routes import (
get,
health,
memorize,
metrics,
search,
)
logger = get_logger(__name__)
def _docs_enabled() -> bool:
"""Enable docs endpoints (/docs, /redoc, /openapi.json) only in dev."""
return os.environ.get("ENV", "prod").upper() == "DEV"
def create_app(
*,
cors_origins: list[str] | None = None,
cors_allow_credentials: bool = DEFAULT_CORS_ALLOW_CREDENTIALS,
cors_allow_methods: list[str] | None = None,
cors_allow_headers: list[str] | None = None,
lifespan_providers: list[LifespanProvider] | None = None,
) -> FastAPI:
"""Build the FastAPI application instance.
Args:
cors_origins: Allowed CORS origins (default: ``["*"]``).
cors_allow_credentials: Whether to allow credentials (default: True).
cors_allow_methods: Allowed CORS methods (default: ``["*"]``).
cors_allow_headers: Allowed CORS headers (default: ``["*"]``).
lifespan_providers: Optional list of LifespanProvider; defaults to
``[MetricsLifespanProvider(), SqliteLifespanProvider(),
LanceDBLifespanProvider(), CascadeLifespanProvider(),
OmeLifespanProvider()]``.
Returns:
FastAPI: Configured application instance.
"""
enable_docs = _docs_enabled()
if lifespan_providers is None:
lifespan_providers = [
MetricsLifespanProvider(),
LLMLifespanProvider(),
SqliteLifespanProvider(),
LanceDBLifespanProvider(),
CascadeLifespanProvider(),
OmeLifespanProvider(),
]
app = FastAPI(
title="everos",
version="0.1.0",
description="md-first memory extraction framework",
lifespan=build_lifespan(lifespan_providers),
docs_url="/docs" if enable_docs else None,
redoc_url="/redoc" if enable_docs else None,
openapi_url="/openapi.json" if enable_docs else None,
)
# Exception handlers: HTTPException, validation errors, plus a fallback.
app.add_exception_handler(HTTPException, global_exception_handler)
app.add_exception_handler(RequestValidationError, global_exception_handler)
app.add_exception_handler(Exception, global_exception_handler)
# Middleware order: earlier `add_middleware` calls become inner, later ones outer.
# CORS innermost (matches base_app.py legacy pattern).
app.add_middleware(
CORSMiddleware,
allow_origins=cors_origins or DEFAULT_CORS_ORIGINS,
allow_credentials=cors_allow_credentials,
allow_methods=cors_allow_methods or DEFAULT_CORS_ALLOW_METHODS,
allow_headers=cors_allow_headers or DEFAULT_CORS_ALLOW_HEADERS,
)
app.add_middleware(PrometheusMiddleware)
app.add_middleware(ProfileMiddleware)
# Routes.
app.include_router(health.router)
app.include_router(metrics.router)
app.include_router(memorize.router)
app.include_router(search.router)
app.include_router(get.router)
logger.info("app_created", docs_enabled=enable_docs)
return app

View File

@ -0,0 +1,35 @@
"""HTTP API lifespan providers.
Concrete :class:`everos.core.lifespan.LifespanProvider` implementations
for the storage + chassis backends this entrypoint composes. They live next to
``app.py`` because they are *application-bootstrap* details, not
generic chassis: a different deployment mode (CLI, embedded, batch
worker) may compose a different set of providers.
Putting these here also keeps ``core.lifespan`` free of concrete-
backend imports — the chassis stays portable.
External usage::
from everos.entrypoints.api.lifespans import (
LLMLifespanProvider,
SqliteLifespanProvider,
LanceDBLifespanProvider,
CascadeLifespanProvider,
OmeLifespanProvider,
)
"""
from .cascade import CascadeLifespanProvider as CascadeLifespanProvider
from .lancedb import LanceDBLifespanProvider as LanceDBLifespanProvider
from .llm import LLMLifespanProvider as LLMLifespanProvider
from .ome import OmeLifespanProvider as OmeLifespanProvider
from .sqlite import SqliteLifespanProvider as SqliteLifespanProvider
__all__ = [
"CascadeLifespanProvider",
"LLMLifespanProvider",
"LanceDBLifespanProvider",
"OmeLifespanProvider",
"SqliteLifespanProvider",
]

View File

@ -0,0 +1,55 @@
"""Cascade lifespan provider — starts/stops :class:`CascadeOrchestrator`.
Ordered after SqliteLifespan + LanceDBLifespan: the orchestrator
depends on both stores being ready before its watcher / scanner /
worker tasks can take the first row.
Construction reads the live :class:`Settings` to build the embedding +
tokenizer providers. If either is misconfigured the lifespan fails
fast — the daemon would be useless without them anyway.
"""
from __future__ import annotations
from typing import Any
from fastapi import FastAPI
from everos.component.embedding import build_embedding_provider
from everos.component.tokenizer import build_tokenizer
from everos.config import load_settings
from everos.core.lifespan import LifespanProvider
from everos.core.observability.logging import get_logger
from everos.core.persistence import MemoryRoot
from everos.memory.cascade import CascadeOrchestrator
logger = get_logger(__name__)
class CascadeLifespanProvider(LifespanProvider):
"""Manage the cascade subsystem for the app lifecycle."""
def __init__(self, order: int = 12) -> None:
super().__init__(name="cascade", order=order)
self._orchestrator: CascadeOrchestrator | None = None
async def startup(self, app: FastAPI) -> Any:
settings = load_settings()
memory_root = MemoryRoot.default()
memory_root.ensure()
embedder = build_embedding_provider(settings.embedding)
tokenizer = build_tokenizer()
self._orchestrator = CascadeOrchestrator(
memory_root=memory_root,
embedder=embedder,
tokenizer=tokenizer,
)
await self._orchestrator.start()
logger.info("cascade_lifespan_ready")
return self._orchestrator
async def shutdown(self, app: FastAPI) -> None:
if self._orchestrator is not None:
await self._orchestrator.stop()
self._orchestrator = None

View File

@ -0,0 +1,55 @@
"""LanceDB lifespan provider (HTTP API entrypoint).
Startup:
Open the connection via ``get_connection`` (lazy, idempotent).
Importing :mod:`everos.infra.persistence.lancedb` also triggers the
side-effect import of ``tables`` so business schemas are loaded
(future: preflight registration).
Shutdown:
Close the connection (also clears the table cache).
"""
from __future__ import annotations
from typing import Any
from fastapi import FastAPI
from everos.core.lifespan import LifespanProvider
from everos.core.observability.logging import get_logger
from everos.infra.persistence.lancedb import (
dispose_connection,
ensure_business_indexes,
get_connection,
verify_business_schemas,
)
logger = get_logger(__name__)
class LanceDBLifespanProvider(LifespanProvider):
"""Manage the LanceDB connection + table cache for the app lifecycle.
Startup runs three steps:
1. ``get_connection`` — lazy-open the async connection.
2. ``verify_business_schemas`` — fail loud if an on-disk table's
columns drift from the current Pydantic schema. LanceDB has no
online migration; cascade is rebuildable from md so the recovery
is documented as ``rm -rf ~/.everos/.index/lancedb``.
3. ``ensure_business_indexes`` — idempotent FTS index creation.
"""
def __init__(self, order: int = 11) -> None:
super().__init__(name="lancedb", order=order)
async def startup(self, app: FastAPI) -> Any:
conn = await get_connection()
await verify_business_schemas()
await ensure_business_indexes()
logger.info("lancedb_ready", uri=conn.uri)
return conn
async def shutdown(self, app: FastAPI) -> None:
await dispose_connection()

View File

@ -0,0 +1,36 @@
"""LLM lifespan provider — eagerly resolves the LLM singleton at startup.
The framework's core value (memory extraction) is meaningless without
an LLM, so misconfiguration must surface as a startup failure instead
of N silent skips per request downstream. Ordered before the storage
stack so we fail before paying to bring sqlite / lancedb / cascade up.
"""
from __future__ import annotations
from typing import Any
from fastapi import FastAPI
from everos.component.llm import get_llm_client
from everos.core.lifespan import LifespanProvider
from everos.core.observability.logging import get_logger
logger = get_logger(__name__)
class LLMLifespanProvider(LifespanProvider):
"""Resolve the LLM client at startup; raise if credentials are missing."""
def __init__(self, order: int = 8) -> None:
super().__init__(name="llm", order=order)
async def startup(self, app: FastAPI) -> Any:
client = get_llm_client()
logger.info("llm_lifespan_ready")
return client
async def shutdown(self, app: FastAPI) -> None:
# The client is stateless (algo facade over openai.AsyncOpenAI);
# nothing to tear down.
return None

View File

@ -0,0 +1,39 @@
"""OME engine lifespan provider (HTTP API entrypoint).
Startup: build the singleton engine via service.memorize._get_engine
(which also registers strategies) and start it.
Shutdown: stop the engine.
"""
from __future__ import annotations
import importlib
from typing import Any
from fastapi import FastAPI
from everos.core.lifespan import LifespanProvider
from everos.core.observability.logging import get_logger
logger = get_logger(__name__)
class OmeLifespanProvider(LifespanProvider):
"""Manage the OfflineEngine lifecycle for the FastAPI app."""
def __init__(self, order: int = 50) -> None:
super().__init__(name="ome", order=order)
async def startup(self, app: FastAPI) -> Any:
svc = importlib.import_module("everos.service.memorize")
engine = svc._get_engine() # noqa: SLF001 — service-internal accessor
await engine.start()
logger.info("ome_engine_started")
return engine
async def shutdown(self, app: FastAPI) -> None:
svc = importlib.import_module("everos.service.memorize")
engine = svc._get_engine() # noqa: SLF001
await engine.stop()
logger.info("ome_engine_stopped")

View File

@ -0,0 +1,45 @@
"""SQLite system-DB lifespan provider (HTTP API entrypoint).
Startup:
1. Build the engine via ``get_engine`` (lazy, idempotent). Importing
:mod:`everos.infra.persistence.sqlite` also triggers the side-
effect import of ``tables`` so every business SQLModel registers
itself in ``SQLModel.metadata``.
2. ``SQLModel.metadata.create_all`` so every registered table exists.
Shutdown:
Dispose the engine + connection pool.
"""
from __future__ import annotations
from typing import Any
from fastapi import FastAPI
from sqlmodel import SQLModel
from everos.core.lifespan import LifespanProvider
from everos.core.observability.logging import get_logger
from everos.infra.persistence.sqlite import dispose_engine, get_engine
logger = get_logger(__name__)
class SqliteLifespanProvider(LifespanProvider):
"""Manage the SQLite system-DB engine + schema for the app lifecycle."""
def __init__(self, order: int = 10) -> None:
super().__init__(name="sqlite", order=order)
async def startup(self, app: FastAPI) -> Any:
engine = get_engine()
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
logger.info(
"sqlite_schema_ready",
tables=len(SQLModel.metadata.tables),
)
return engine
async def shutdown(self, app: FastAPI) -> None:
await dispose_engine()

View File

@ -0,0 +1,5 @@
"""HTTP route modules.
Each module here exposes an ``APIRouter`` named ``router`` registered by
:func:`everos.entrypoints.api.app.create_app` via ``app.include_router``.
"""

View File

@ -0,0 +1,26 @@
"""POST /api/v1/memory/get — paginated listing endpoint.
Thin adapter: validate the request DTO, dispatch to the service layer,
return the envelope verbatim. ``request_id`` is generated inside the
:class:`GetManager`; we trust the value on the way out.
"""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
from everos.memory.get import GetRequest, GetResponse
from everos.memory.search import FilterError
from everos.service import get as get_service
router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
@router.post("/get", response_model=GetResponse)
async def post_get(req: GetRequest) -> GetResponse:
"""Paginated listing over the requested ``memory_type``."""
try:
return await get_service(req)
except FilterError as exc:
# Filter-DSL violations surface as 422 with the compile message.
raise HTTPException(status_code=422, detail=str(exc)) from exc

View File

@ -0,0 +1,13 @@
"""Health check route."""
from __future__ import annotations
from fastapi import APIRouter
router = APIRouter(tags=["health"])
@router.get("/health")
async def health() -> dict[str, str]:
"""Liveness probe — returns ``{"status": "ok"}`` with HTTP 200."""
return {"status": "ok"}

View File

@ -0,0 +1,195 @@
"""POST /api/v1/memory/add and /api/v1/memory/flush.
DTOs follow the v1 API brief (01_v1_api_brief.md §2 / §3). Routes are
thin adapters: validate the DTO, dump to dict, hand to service. No
business logic lives here.
``/flush`` is OSS-only (the cloud edition decides boundary timing
server-side and does not expose this endpoint).
"""
from __future__ import annotations
from typing import Annotated, Any, Literal
from fastapi import APIRouter, HTTPException, Request
from pydantic import AfterValidator, BaseModel, ConfigDict, Field
from everos.core.errors import MultimodalError
from everos.core.observability.tracing import gen_request_id
from everos.service import memorize
router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
# ── Path-safe identifier ────────────────────────────────────────────────────
# ``app_id`` / ``project_id`` become directory segments under the memory
# root, so they must reject ``.`` and ``..`` (path traversal). The basic
# character whitelist is enforced via ``pattern`` (pydantic_core uses the
# Rust regex engine, which does NOT support lookaround), and the two
# reserved tokens are filtered out with a follow-up ``AfterValidator``.
_PATH_SAFE_CHARSET = r"^[a-zA-Z0-9_.-]+$"
_PATH_TRAVERSAL_TOKENS = frozenset({".", ".."})
def _reject_path_traversal(value: str) -> str:
if value in _PATH_TRAVERSAL_TOKENS:
raise ValueError("'.' and '..' are reserved (path traversal)")
return value
PathSafeId = Annotated[str, AfterValidator(_reject_path_traversal)]
# DTOs ────────────────────────────────────────────────────────────────────────
class ToolFunctionDTO(BaseModel):
name: str
arguments: str # JSON string per OpenAI Chat Completions spec
class ToolCallDTO(BaseModel):
id: str
type: str = "function"
function: ToolFunctionDTO
class ContentItemDTO(BaseModel):
"""Content piece (v1 API brief appendix A)."""
type: Literal["text", "image", "audio", "doc", "pdf", "html", "email"]
text: str | None = None
uri: str | None = None
base64: str | None = None
ext: str | None = None
name: str | None = None
extras: dict[str, Any] | None = None
model_config = ConfigDict(extra="forbid")
class MessageItemDTO(BaseModel):
sender_id: str = Field(..., min_length=1)
sender_name: str | None = None
role: Literal["user", "assistant", "tool"]
timestamp: int = Field(
...,
gt=0,
description=(
"Message event time as Unix epoch in **milliseconds** "
"(v1 API contract; the algo layer auto-detects sec vs ms "
"for backward compat but the contract is ms)."
),
)
content: str | list[ContentItemDTO]
tool_calls: list[ToolCallDTO] | None = None
tool_call_id: str | None = None
class MemorizeAddRequest(BaseModel):
session_id: str = Field(..., min_length=1, max_length=128)
app_id: PathSafeId = Field(
default="default",
min_length=1,
max_length=128,
pattern=_PATH_SAFE_CHARSET,
)
project_id: PathSafeId = Field(
default="default",
min_length=1,
max_length=128,
pattern=_PATH_SAFE_CHARSET,
)
messages: list[MessageItemDTO] = Field(..., min_length=1, max_length=500)
class AddResponseData(BaseModel):
message_count: int
status: Literal["accumulated", "extracted"]
class MemorizeFlushRequest(BaseModel):
session_id: str = Field(..., min_length=1, max_length=128)
app_id: PathSafeId = Field(
default="default",
min_length=1,
max_length=128,
pattern=_PATH_SAFE_CHARSET,
)
project_id: PathSafeId = Field(
default="default",
min_length=1,
max_length=128,
pattern=_PATH_SAFE_CHARSET,
)
class FlushResponseData(BaseModel):
status: Literal["extracted", "no_extraction"]
class SuccessEnvelope[T](BaseModel):
"""200 wrapper: ``request_id`` sits at the top level, not inside ``data``."""
request_id: str
data: T
# Route ──────────────────────────────────────────────────────────────────────
@router.post("/add")
async def add_memory(
req: Annotated[MemorizeAddRequest, ...],
request: Request,
) -> SuccessEnvelope[AddResponseData]:
"""Add messages into the user-memory + agent-memory pipelines."""
request_id = getattr(request.state, "request_id", None) or _gen_request_id()
try:
result = await memorize(req.model_dump())
except MultimodalError as exc:
raise HTTPException(status_code=415, detail=str(exc)) from exc
return SuccessEnvelope(
request_id=request_id,
data=AddResponseData(
message_count=result.message_count,
status=result.status,
),
)
@router.post("/flush")
async def flush_memory(
req: Annotated[MemorizeFlushRequest, ...],
request: Request,
) -> SuccessEnvelope[FlushResponseData]:
"""Force boundary detection over the current ``session_id`` buffer.
[OSS-only] — cloud edition decides boundary timing server-side and
does not expose this endpoint.
"""
request_id = getattr(request.state, "request_id", None) or _gen_request_id()
result = await memorize(
{
"session_id": req.session_id,
"app_id": req.app_id,
"project_id": req.project_id,
"messages": [],
},
is_final=True,
)
# service's ``accumulated`` = nothing to flush (buffer was empty);
# ``extracted`` = at least one cell carved out.
status: Literal["extracted", "no_extraction"] = (
"extracted" if result.status == "extracted" else "no_extraction"
)
return SuccessEnvelope(
request_id=request_id,
data=FlushResponseData(status=status),
)
def _gen_request_id() -> str:
"""Fallback request id when no middleware set one."""
return gen_request_id()

View File

@ -0,0 +1,20 @@
"""Prometheus metrics route."""
from __future__ import annotations
from fastapi import APIRouter
from fastapi.responses import Response
from prometheus_client import CONTENT_TYPE_LATEST
from everos.core.observability.metrics import generate_metrics_response
router = APIRouter(tags=["metrics"])
@router.get("/metrics")
async def metrics() -> Response:
"""Render the current Prometheus registry in exposition format."""
return Response(
content=generate_metrics_response(),
media_type=CONTENT_TYPE_LATEST,
)

View File

@ -0,0 +1,27 @@
"""POST /api/v1/memory/search — hybrid retrieval endpoint.
Thin adapter: validate the request DTO, dispatch to the service layer,
return the envelope verbatim. ``request_id`` is generated inside the
:class:`SearchManager` (uniform for OSS + cloud); we trust that value
on the way out.
"""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
from everos.memory.search import FilterError, SearchRequest, SearchResponse
from everos.service import search
router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
@router.post("/search", response_model=SearchResponse)
async def post_search(req: SearchRequest) -> SearchResponse:
"""Hybrid retrieval across the configured memory backends."""
try:
return await search(req)
except FilterError as exc:
# Filter-DSL violations surface as 422 with the compile message
# (mirrors /get's contract).
raise HTTPException(status_code=422, detail=str(exc)) from exc

View File

@ -0,0 +1,5 @@
"""Command line entry point.
Contract-first design, JSON output by default, ``--describe`` machine-readable
mode, granular exit codes.
"""

View File

@ -0,0 +1,5 @@
"""CLI subcommand modules.
Each module here exposes a ``app: typer.Typer`` instance which is mounted
as a subcommand group by :mod:`everos.entrypoints.cli.main`.
"""

View File

@ -0,0 +1,267 @@
"""``everos cascade`` subcommand group.
Three one-shot operations on the cascade subsystem, all run in-process
without standing up the FastAPI app:
- ``cascade sync [PATH]`` — flush the work queue. With ``PATH`` the
command first force-enqueues that single file (used after a manual
md edit when waiting for the watcher is impractical), then drains.
- ``cascade status`` — print the queue + LSN summary that the daemon
sees right now.
- ``cascade fix`` — list every ``failed`` row. With ``--apply``, also
reset ``retryable=TRUE`` rows back to ``pending`` and drain the
worker once so the retry actually runs before the command returns.
CLI is in-process (12 doc §7.1 + 16 doc §9.2): it constructs the same
:class:`CascadeOrchestrator` as the daemon but only calls
``sync_once`` / ``drain_once`` / ``queue_summary``. No watcher /
scanner background task is started.
"""
from __future__ import annotations
import asyncio
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Annotated
import typer
from sqlmodel import SQLModel
from everos.component.embedding import build_embedding_provider
from everos.component.tokenizer import build_tokenizer
from everos.component.utils.datetime import to_display_tz
from everos.config import load_settings
from everos.core.persistence import MemoryRoot
from everos.infra.persistence.lancedb import (
dispose_connection,
ensure_business_indexes,
get_connection,
verify_business_schemas,
)
from everos.infra.persistence.sqlite import (
dispose_engine,
get_engine,
md_change_state_repo,
)
from everos.memory.cascade import CascadeOrchestrator, match_kind
app = typer.Typer(
name="cascade",
help="Inspect and operate the md → LanceDB sync queue",
no_args_is_help=True,
)
# ── shared runtime context ───────────────────────────────────────────────
@asynccontextmanager
async def _runtime(): # type: ignore[no-untyped-def]
"""Stand up sqlite + lancedb the same way the API lifespan would.
The CLI piggybacks on the same singletons as the running daemon
(lazy + process-wide), so if a server happens to be running on
the same memory root, both share state correctly.
"""
engine = get_engine()
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
await get_connection()
await verify_business_schemas()
await ensure_business_indexes()
try:
yield
finally:
await dispose_connection()
await dispose_engine()
def _build_orchestrator() -> CascadeOrchestrator:
settings = load_settings()
memory_root = MemoryRoot.default()
memory_root.ensure()
embedder = build_embedding_provider(settings.embedding)
tokenizer = build_tokenizer()
return CascadeOrchestrator(
memory_root=memory_root,
embedder=embedder,
tokenizer=tokenizer,
)
# ── sync ─────────────────────────────────────────────────────────────────
@app.command("sync")
def sync(
path: Annotated[
Path | None,
typer.Argument(
help="Optional md path to force-enqueue before draining. "
"If omitted, only the existing queue is drained.",
),
] = None,
) -> None:
"""Drain the cascade queue (and optionally re-enqueue a path first)."""
async def _run() -> None:
async with _runtime():
orchestrator = _build_orchestrator()
if path is not None:
rel = _resolve_relative(path)
spec = match_kind(rel)
if spec is None:
typer.echo(
f"error: path does not match any registered cascade "
f"kind: {rel}",
err=True,
)
raise typer.Exit(code=1)
await md_change_state_repo.force_enqueue(rel, spec.name)
typer.echo(f"force-enqueued {rel} (kind={spec.name})")
processed = await orchestrator.sync_once()
typer.echo(f"sync complete — processed {processed} row(s)")
asyncio.run(_run())
# ── status ───────────────────────────────────────────────────────────────
@app.command("status")
def status() -> None:
"""Print the queue / LSN summary."""
async def _run() -> None:
async with _runtime():
summary = await md_change_state_repo.queue_summary()
lag = max(0, summary.max_lsn - summary.last_processed_lsn)
typer.echo("queue:")
typer.echo(f" pending: {summary.pending}")
typer.echo(f" done: {summary.done}")
typer.echo(
f" failed (retryable=TRUE): {summary.failed_retryable}"
+ (
" (eligible for `cascade fix --apply`)"
if summary.failed_retryable
else ""
)
)
typer.echo(
f" failed (retryable=FALSE): {summary.failed_permanent}"
+ (
" (fix md and re-save to recover)"
if summary.failed_permanent
else ""
)
)
typer.echo("lsn:")
typer.echo(f" max: {summary.max_lsn}")
typer.echo(f" last_processed: {summary.last_processed_lsn}")
typer.echo(f" lag: {lag}")
asyncio.run(_run())
# ── fix ──────────────────────────────────────────────────────────────────
@app.command("fix")
def fix(
apply: Annotated[
bool,
typer.Option(
"--apply",
help="Re-enqueue every `retryable=TRUE` row and drain the worker.",
),
] = False,
) -> None:
"""List failed rows (default) or re-enqueue retryable ones (``--apply``)."""
async def _run() -> None:
async with _runtime():
rows = await md_change_state_repo.list_failed()
if not rows:
typer.echo("no failed rows")
return
if not apply:
_print_failed_table(rows)
retryable = sum(1 for r in rows if r.retryable)
permanent = sum(1 for r in rows if not r.retryable)
typer.echo("")
if retryable:
typer.echo(
f"run `everos cascade fix --apply` to re-enqueue "
f"the {retryable} retryable row(s)."
)
if permanent:
typer.echo(
f"the {permanent} retryable=FALSE row(s) require "
"editing the md and re-saving."
)
return
moved = await md_change_state_repo.reset_retryable_to_pending()
typer.echo(f"re-enqueued {moved} retryable row(s)")
if moved:
orchestrator = _build_orchestrator()
processed = await orchestrator.drain_once()
typer.echo(f"[worker] processed {processed} row(s) on drain")
permanent_rows = [r for r in rows if not r.retryable]
if permanent_rows:
typer.echo(
f"{len(permanent_rows)} retryable=FALSE row(s) left untouched:"
)
for r in permanent_rows:
typer.echo(f" {r.md_path}")
asyncio.run(_run())
# ── helpers ──────────────────────────────────────────────────────────────
def _resolve_relative(p: Path) -> str:
"""Translate an absolute / relative path arg into the memory-root rel form.
The state table stores paths relative to memory root, so the CLI
must match that convention before calling :meth:`force_enqueue`.
Outside-the-root inputs surface as an error in the caller.
"""
memory_root = MemoryRoot.default()
absolute = p.expanduser().resolve()
try:
rel = absolute.relative_to(memory_root.root)
except ValueError as exc:
raise typer.BadParameter(
f"path {p!s} is not under memory root {memory_root.root!s}"
) from exc
return rel.as_posix()
def _print_failed_table(rows: list) -> None: # type: ignore[type-arg]
headers = ("md_path", "retryable", "retries", "last_attempt", "error")
widths = [
max(len(headers[0]), max(len(r.md_path) for r in rows)),
len(headers[1]),
len(headers[2]),
len(headers[3]),
max(len(headers[4]), max(len(r.error or "") for r in rows)),
]
fmt = " ".join(f"{{:<{w}}}" for w in widths)
typer.echo(f"{len(rows)} failed row(s):\n")
typer.echo(fmt.format(*headers))
for r in rows:
typer.echo(
fmt.format(
r.md_path,
"TRUE" if r.retryable else "FALSE",
r.retry_count,
to_display_tz(r.last_attempt_at).isoformat()
if r.last_attempt_at
else "",
r.error or "",
)
)

View File

@ -0,0 +1,183 @@
"""``everos init`` — generate a starter ``.env`` from the packaged template.
The ``env.template`` ships inside the wheel as package data at
``everos/templates/env.template``. ``init`` reads it via
:mod:`importlib.resources`, so the command works identically for pip-
installed users and source-tree users (the file is the single source
of truth).
Subcommand mounted as ``everos init`` (top-level leaf command — not a
Typer group), to match the idiomatic ``alembic init`` / ``django-admin
startproject`` shape.
"""
from __future__ import annotations
import contextlib
import logging
import os
import sys
import tempfile
from importlib import resources
from pathlib import Path
import typer
_TEMPLATE_PACKAGE = "everos.templates"
_TEMPLATE_NAME = "env.template"
_log = logging.getLogger("everos.cli.init")
def _read_template() -> str:
"""Read the packaged ``env.template`` from wheel resources.
Returns the file contents as a UTF-8 string. Raises ``RuntimeError``
on missing-file — if this fires it means the wheel was built from a
source tree where ``src/everos/templates/env.template`` was missing
(canonical location; auto-included via ``packages=["src/everos"]``
in ``pyproject.toml``).
"""
try:
return (
resources.files(_TEMPLATE_PACKAGE)
.joinpath(_TEMPLATE_NAME)
.read_text(encoding="utf-8")
)
except (FileNotFoundError, ModuleNotFoundError) as exc:
raise RuntimeError(
f"packaged template {_TEMPLATE_NAME!r} not found under "
f"{_TEMPLATE_PACKAGE!r}; the wheel is missing its "
"force-include entry (see pyproject.toml "
"[tool.hatch.build.targets.wheel.force-include])."
) from exc
def _xdg_default_path() -> Path:
"""``$XDG_CONFIG_HOME/everos/.env`` (default ``~/.config/everos/.env``)."""
xdg = os.environ.get("XDG_CONFIG_HOME") or "~/.config"
return Path(xdg).expanduser() / "everos" / ".env"
def _atomic_write(target: Path, content: str, mode: int = 0o600) -> None:
"""Write ``content`` to ``target`` atomically with ``mode`` permission.
Writes to a tempfile in the same directory then ``os.replace``s it
onto the target — guarantees either the full new file is visible or
the original (if any) is untouched. Permission bits applied before
the rename so the file is never readable by other users.
"""
target.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_path = tempfile.mkstemp(
prefix=target.name + ".",
dir=target.parent,
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(content)
os.chmod(tmp_path, mode)
os.replace(tmp_path, target)
except Exception:
with contextlib.suppress(OSError):
os.unlink(tmp_path)
raise
def register(parent: typer.Typer) -> None:
"""Attach the ``init`` command to the root CLI app."""
@parent.command("init")
def init(
to: str | None = typer.Option(
None,
"--to",
help=(
"Target path for the .env file (default: ./.env). "
"Parent directories are created if needed."
),
),
force: bool = typer.Option(
False,
"--force",
help="Overwrite an existing file at the target path.",
),
print_: bool = typer.Option(
False,
"--print",
help="Print the template to stdout instead of writing to disk.",
),
xdg: bool = typer.Option(
False,
"--xdg",
help=(
"Shortcut for --to=${XDG_CONFIG_HOME:-~/.config}/everos/.env "
"(mutually exclusive with --to)."
),
),
) -> None:
"""Generate a starter ``.env`` from the packaged template.
Common flows::
everos init # writes ./.env
everos init --xdg # writes ~/.config/everos/.env
everos init --to /etc/foo.env --force
everos init --print > custom.env
Exit codes:
- 0 — written successfully (or printed to stdout).
- 1 — target file already exists and ``--force`` was not given.
- 2 — packaged template missing (wheel build problem).
- 3 — write failed (permissions / disk full / parent unwritable).
"""
if xdg and to is not None:
typer.secho(
"error: --xdg and --to are mutually exclusive",
fg=typer.colors.RED,
err=True,
)
raise typer.Exit(code=2)
try:
template = _read_template()
except RuntimeError as exc:
typer.secho(f"error: {exc}", fg=typer.colors.RED, err=True)
raise typer.Exit(code=2) from exc
if print_:
sys.stdout.write(template)
return
if xdg:
target = _xdg_default_path()
elif to is not None:
target = Path(to).expanduser().resolve()
else:
target = Path.cwd() / ".env"
if target.exists() and not force:
typer.secho(
f"error: {target} already exists; pass --force to overwrite",
fg=typer.colors.RED,
err=True,
)
raise typer.Exit(code=1)
try:
_atomic_write(target, template)
except OSError as exc:
typer.secho(
f"error: failed to write {target}: {exc}",
fg=typer.colors.RED,
err=True,
)
raise typer.Exit(code=3) from exc
# Friendly next-step block (stdout — quiet enough for piping).
size_kb = target.stat().st_size / 1024
typer.secho(f"✓ wrote {target} ({size_kb:.1f} KB)", fg=typer.colors.GREEN)
typer.echo("Next steps:")
typer.echo(" 1. Edit the file and fill in the API keys (see comments inside).")
typer.echo(" 2. Run `everos server start`.")
typer.echo("Docs: https://github.com/evermind/everos/blob/master/QUICKSTART.md")

View File

@ -0,0 +1,161 @@
"""``everos server`` subcommand group.
Provides ``everos server start`` to run the HTTP API via uvicorn. CLI
parses arguments, configures structured logging, then hands off to
uvicorn pointing at :func:`everos.entrypoints.api.app.create_app` as a
factory.
"""
from __future__ import annotations
import logging
import os
import sys
from pathlib import Path
import typer
import uvicorn
app = typer.Typer(
name="server",
help="Run / manage the HTTP API server",
no_args_is_help=True,
)
def _resolve_env_file(explicit: str | None) -> Path | None:
"""Find the first existing ``.env`` along the four-layer search path.
Search order (highest-wins):
1. ``explicit`` — when the caller passed ``--env-file <path>``.
2. ``./.env`` — the current working directory (project-local convention).
3. ``${XDG_CONFIG_HOME:-~/.config}/everos/.env`` — XDG-standard user config.
4. ``~/.everos/.env`` — the project's default memory-root location.
Returns ``None`` if none of the layers exist (caller may then fall back
to inherited process env / CI secrets).
"""
candidates: list[Path] = []
if explicit:
candidates.append(Path(explicit).expanduser())
candidates.append(Path.cwd() / ".env")
xdg = os.environ.get("XDG_CONFIG_HOME") or "~/.config"
candidates.append(Path(xdg).expanduser() / "everos" / ".env")
candidates.append(Path("~/.everos/.env").expanduser())
for p in candidates:
try:
if p.is_file():
return p
except OSError:
# Path traversal / permission denied on a fallback candidate
# must not crash the search — skip and keep going.
continue
return None
def _load_env_file(path: str | None) -> Path | None:
"""Load environment variables from the resolved ``.env`` file.
Returns the path that was loaded, or ``None`` when no ``.env`` was
found anywhere along the search path. Existence of a ``.env`` is
optional — the user may rely entirely on inherited process env
(e.g. container / CI secret injection).
"""
resolved = _resolve_env_file(path)
if resolved is None:
return None
try:
from dotenv import load_dotenv
load_dotenv(resolved, override=False)
except ImportError:
# python-dotenv is in our deps; tolerate its absence anyway.
pass
return resolved
@app.command("start")
def start(
host: str | None = typer.Option(
None,
"--host",
help="Bind host (env: EVEROS_API__HOST, default: 127.0.0.1)",
),
port: int | None = typer.Option(
None,
"--port",
help="Bind port (env: EVEROS_API__PORT, default: 8000)",
),
env_file: str | None = typer.Option(
None,
"--env-file",
help=(
"Path to a dotenv file (highest priority). When omitted, "
"the server searches: ./.env → ${XDG_CONFIG_HOME:-~/.config}"
"/everos/.env → ~/.everos/.env. Run `everos init` to create one."
),
),
reload: bool = typer.Option(
False,
"--reload",
help="Reload on source changes (development)",
),
log_level: str | None = typer.Option(
None,
"--log-level",
help="Log level (env: EVEROS_LOG_LEVEL, default: INFO)",
),
) -> None:
"""Start the HTTP API server."""
loaded_env = _load_env_file(env_file)
# Load settings AFTER .env is in place so EVEROS_API__HOST and
# EVEROS_API__PORT (and any other env override) are honored.
from everos.config import load_settings
settings = load_settings()
host_resolved = host or settings.api.host
port_resolved = port if port is not None else settings.api.port
log_level_resolved = (log_level or os.getenv("EVEROS_LOG_LEVEL", "INFO")).upper()
from everos.core.observability.logging import configure_logging
configure_logging(level=log_level_resolved)
bootstrap_logger = logging.getLogger("everos.cli.server")
if loaded_env is not None:
bootstrap_logger.info("loaded env file: %s", loaded_env)
else:
bootstrap_logger.info(
"no .env found along the search path; relying on inherited env vars "
"(run `everos init` to generate one)"
)
bootstrap_logger.info("starting everos on %s:%d", host_resolved, port_resolved)
if host_resolved == "0.0.0.0":
bootstrap_logger.warning(
"binding to 0.0.0.0 exposes the API on all interfaces; EverOS "
"ships no built-in auth — see SECURITY.md"
)
try:
uvicorn.run(
"everos.entrypoints.api.app:create_app",
host=host_resolved,
port=port_resolved,
reload=reload,
factory=True,
log_level=log_level_resolved.lower(),
# ``configure_logging()`` above already installed the root
# handler + structlog ProcessorFormatter. ``log_config=None``
# stops uvicorn from running its own ``dictConfig`` over
# ours; otherwise uvicorn / fastapi messages revert to the
# ``INFO:`` no-structlog format on every restart.
log_config=None,
)
except KeyboardInterrupt:
bootstrap_logger.info("interrupted; shutting down")
except (OSError, RuntimeError) as exc:
bootstrap_logger.error("startup failed: %s", exc)
sys.exit(1)

View File

@ -0,0 +1,34 @@
"""everos CLI root entry point.
Exposed as the ``everos`` console script in ``pyproject.toml``. Subcommand
groups live under :mod:`everos.entrypoints.cli.commands` and are registered
here.
CLI subcommands run **in-process** — they call into the service layer
directly rather than through the HTTP API. The HTTP API and CLI are two
sibling surfaces over the same service layer.
"""
from __future__ import annotations
import typer
from .commands import cascade, init_cmd, server
app = typer.Typer(
name="everos",
help="everos — md-first memory extraction framework",
no_args_is_help=True,
add_completion=False,
)
app.add_typer(server.app, name="server")
app.add_typer(cascade.app, name="cascade")
# ``init`` is a top-level leaf command (not a Typer group) — match the
# idiomatic ``alembic init`` / ``django-admin startproject`` shape.
init_cmd.register(app)
if __name__ == "__main__":
app()

View File

@ -0,0 +1,5 @@
"""Infrastructure layer.
Adapts to external storage and persists domain models. Contains no
business rules.
"""

View File

@ -0,0 +1,63 @@
"""Async offline strategy scheduling chassis.
Provides decorator-based strategy registration, event-driven triggers
(Cron/Idle/Manual), and gate-based concurrency control.
"""
from everos.infra.ome.config import OMEConfig as OMEConfig
from everos.infra.ome.context import StrategyContext as StrategyContext
from everos.infra.ome.decorator import offline_strategy as offline_strategy
from everos.infra.ome.engine import OfflineEngine as OfflineEngine
from everos.infra.ome.events import BaseEvent as BaseEvent
from everos.infra.ome.events import CronTick as CronTick
from everos.infra.ome.events import IdleTick as IdleTick
from everos.infra.ome.events import ManualTick as ManualTick
from everos.infra.ome.exceptions import (
EmitNotDeclaredError as EmitNotDeclaredError,
)
from everos.infra.ome.exceptions import (
EngineCallFromStrategyError as EngineCallFromStrategyError,
)
from everos.infra.ome.exceptions import (
EngineLockHeldError as EngineLockHeldError,
)
from everos.infra.ome.exceptions import OMEError as OMEError
from everos.infra.ome.exceptions import (
StartupValidationError as StartupValidationError,
)
from everos.infra.ome.exceptions import (
StrategyContractError as StrategyContractError,
)
from everos.infra.ome.gates import Counter as Counter
from everos.infra.ome.records import RunRecord as RunRecord
from everos.infra.ome.records import RunStatus as RunStatus
from everos.infra.ome.records import StrategyRouteInfo as StrategyRouteInfo
from everos.infra.ome.triggers import Cron as Cron
from everos.infra.ome.triggers import Idle as Idle
from everos.infra.ome.triggers import Immediate as Immediate
from everos.infra.ome.triggers import Trigger as Trigger
__all__ = [
"BaseEvent",
"Counter",
"Cron",
"CronTick",
"EmitNotDeclaredError",
"EngineCallFromStrategyError",
"EngineLockHeldError",
"Idle",
"IdleTick",
"Immediate",
"ManualTick",
"OfflineEngine",
"OMEConfig",
"OMEError",
"RunRecord",
"RunStatus",
"StartupValidationError",
"StrategyContext",
"StrategyContractError",
"StrategyRouteInfo",
"Trigger",
"offline_strategy",
]

View File

@ -0,0 +1 @@
"""Internal: background loops (idle scan / config reload / crash recovery)."""

View File

@ -0,0 +1,254 @@
"""Config hot-reload — watchfiles + tomllib + shallow merge.
Hot-updatable fields: enabled / max_retries / gate / cron / idle_seconds /
scan_interval_seconds. Trigger type swap (Immediate ↔ Cron ↔ Idle),
event subscription (Immediate.on / Idle.on), and Idle.event_field
remain immutable — these define strategy routing and changing them
requires a code change and redeploy.
Per-strategy two-phase commit: enabled is applied independently for
emergency-stop semantics; max_retries / gate / trigger parameters
form one atomic group that fully rolls back on any failure inside it.
"""
from __future__ import annotations
import asyncio
import tomllib
from contextlib import suppress
from dataclasses import replace
from pathlib import Path
from typing import TYPE_CHECKING
from pydantic import ValidationError
from watchfiles import awatch
from everos.core.observability.logging import get_logger
from everos.infra.ome._dispatch.registry import StrategyRegistry
from everos.infra.ome.config import StrategyOverride, TomlRoot
from everos.infra.ome.decorator import StrategyMeta
from everos.infra.ome.gates import Counter
from everos.infra.ome.triggers import Cron, Idle, Trigger
if TYPE_CHECKING:
from everos.infra.ome.engine import OfflineEngine
logger = get_logger(__name__)
class _SkipAtomicGroupError(Exception):
"""Internal sentinel raised when the non-enabled atomic group for
one strategy must be skipped without affecting other strategies.
"""
def _apply_enabled(
meta: StrategyMeta,
override: StrategyOverride,
name: str,
registry: StrategyRegistry,
) -> StrategyMeta:
"""Step 1: apply `enabled` independently — never blocked by other fields."""
if override.enabled is None or override.enabled == meta.enabled:
return meta
new_meta = replace(meta, enabled=override.enabled)
registry.replace(name, new_meta)
return new_meta
def _build_atomic_meta(
meta: StrategyMeta,
override: StrategyOverride,
) -> tuple[StrategyMeta, Trigger]:
"""Step 2 pure-compute: build (new_meta, new_trigger) from current state.
Raises `_SkipAtomicGroupError` on type mismatches or invalid gate intros.
No registry / engine writes happen here.
"""
new_meta = meta
new_trigger: Trigger = meta.trigger
if override.max_retries is not None:
new_meta = replace(new_meta, max_retries=override.max_retries)
if override.gate is not None:
# Introducing a gate on a strategy that has none requires an explicit
# threshold — silently defaulting to 1 would mean "fire on every
# event", which is almost certainly not what the user intended.
if meta.gate is None and override.gate.threshold is None:
raise _SkipAtomicGroupError(
"introducing a gate requires explicit threshold"
)
base = meta.gate.model_dump() if meta.gate is not None else {}
for k, v in override.gate.model_dump(exclude_unset=True).items():
if v is not None:
base[k] = v
new_meta = replace(new_meta, gate=Counter(**base))
if override.cron is not None:
if not isinstance(meta.trigger, Cron):
raise _SkipAtomicGroupError(
f"cron given on non-Cron strategy "
f"(actual: {type(meta.trigger).__name__})"
)
new_trigger = Cron(expr=override.cron)
if override.idle_seconds is not None or override.scan_interval_seconds is not None:
if not isinstance(meta.trigger, Idle):
raise _SkipAtomicGroupError(
f"idle_* given on non-Idle strategy "
f"(actual: {type(meta.trigger).__name__})"
)
updates: dict[str, int] = {}
if override.idle_seconds is not None:
updates["idle_seconds"] = override.idle_seconds
if override.scan_interval_seconds is not None:
updates["scan_interval_seconds"] = override.scan_interval_seconds
# model_validate (not model_copy) re-runs Idle._validate_event_field on
# the merged dict; model_copy(update=...) would skip it and let an
# invalid event_field reach the registry.
new_trigger = Idle.model_validate({**meta.trigger.model_dump(), **updates})
if new_trigger is not meta.trigger:
new_meta = replace(new_meta, trigger=new_trigger)
return new_meta, new_trigger
def _needs_aps_reschedule(old_trigger: Trigger, new_trigger: Trigger) -> bool:
"""True iff scheduler-driving fields changed (cron expr / Idle scan_interval)."""
if new_trigger is old_trigger:
return False
if isinstance(new_trigger, Cron) and isinstance(old_trigger, Cron):
return new_trigger.expr != old_trigger.expr
if isinstance(new_trigger, Idle) and isinstance(old_trigger, Idle):
return new_trigger.scan_interval_seconds != old_trigger.scan_interval_seconds
return False
def _maybe_reschedule_aps(
engine: OfflineEngine, name: str, new_trigger: Trigger
) -> None:
"""Push the new trigger's APS-relevant fields to the scheduler."""
if isinstance(new_trigger, Cron):
engine.reschedule_cron_job(name, new_trigger.expr)
elif isinstance(new_trigger, Idle):
engine.reschedule_idle_job(
name, scan_interval_seconds=new_trigger.scan_interval_seconds
)
def _apply_one_strategy(
name: str,
override: StrategyOverride,
registry: StrategyRegistry,
engine: OfflineEngine,
) -> None:
"""Two-phase commit for one strategy: enabled, then atomic group."""
meta = registry.get(name)
meta = _apply_enabled(meta, override, name, registry)
try:
new_meta, new_trigger = _build_atomic_meta(meta, override)
if _needs_aps_reschedule(meta.trigger, new_trigger):
_maybe_reschedule_aps(engine, name, new_trigger)
registry.replace(name, new_meta)
except Exception as e: # noqa: BLE001
# User-fixable config error (typo / type mismatch / APS runtime
# failure) — log + skip this strategy's atomic group, never the loop.
logger.warning(
"strategy_atomic_group_skipped",
strategy_name=name,
error_type=type(e).__name__,
exc_info=True,
)
def apply_overrides(
registry: StrategyRegistry,
root: TomlRoot,
engine: OfflineEngine,
) -> None:
"""Shallow-merge TomlRoot overrides into registry strategies in place.
Two-phase per-strategy semantics:
Step 1 (enabled): applied independently — emergency-stop must
never be blocked by a typo in another field.
Step 2 (max_retries / gate / trigger params): applied as an atomic
group. Any failure (type mismatch, invalid cron, APS reschedule
error, ...) rolls the whole group back to the prior values.
"""
known = {m.name for m in registry.all()}
for name, override in root.strategies.items():
if name not in known:
logger.warning("config_override_unknown_strategy", strategy_name=name)
continue
_apply_one_strategy(name, override, registry, engine)
class ConfigReloader:
"""Watch a TOML file and apply overrides on change."""
def __init__(
self,
*,
config_path: Path,
registry: StrategyRegistry,
engine: OfflineEngine,
debounce_ms: int = 1600,
) -> None:
self._path = config_path
self._registry = registry
self._engine = engine
self._debounce_ms = debounce_ms
self._task: asyncio.Task[None] | None = None
def start(self) -> None:
"""Fire-and-forget the watch loop. Idempotent: raises on double-start."""
if self._path is None:
return
if self._task is not None and not self._task.done():
raise RuntimeError("ConfigReloader already started")
self._task = asyncio.create_task(self._loop())
async def stop(self) -> None:
"""Cancel the watch task and await it; safe to call multiple times."""
if self._task is not None:
self._task.cancel()
with suppress(asyncio.CancelledError):
await self._task
self._task = None
async def _loop(self) -> None:
"""Initial load + per-FS-change reload; survives single-iteration failures."""
try:
await self._load_once()
except Exception: # noqa: BLE001
logger.exception("config_reload_iteration_failed")
async for _changes in awatch(self._path, debounce=self._debounce_ms):
try:
await self._load_once()
except Exception: # noqa: BLE001
logger.exception("config_reload_iteration_failed")
async def _load_once(self) -> None:
"""Read TOML off the loop, parse + validate, apply overrides."""
def _read_and_parse() -> TomlRoot:
with open(self._path, "rb") as f:
content = f.read()
parsed = tomllib.loads(content.decode("utf-8"))
return TomlRoot.model_validate(parsed)
try:
root = await asyncio.to_thread(_read_and_parse)
except (OSError, tomllib.TOMLDecodeError, ValidationError) as e:
logger.warning(
"config_reload_failed",
error_type=type(e).__name__,
error=str(e),
path=str(self._path),
)
return
apply_overrides(self._registry, root, self._engine)
logger.info("config_reloaded", path=str(self._path))

View File

@ -0,0 +1,79 @@
"""Startup crash recovery — stale RUNNING rows → CRASHED + re-enqueue.
Runs once at engine.start() before normal dispatching begins. Rows
whose started_at is older than ``timeout_seconds`` are marked CRASHED
and re-enqueued with a fresh run_id reusing the original event payload.
Fresher RUNNING rows are skipped — APScheduler's own jobstore may have
already reattached them.
At-most-once: ``mark_crashed`` and ``add_job`` are not atomic. If
``add_job`` fails after ``mark_crashed``, the row stays CRASHED and
the event is lost. Strategies needing at-least-once must add their own
retry / monitor layer.
"""
from __future__ import annotations
from collections.abc import Awaitable, Callable
from datetime import timedelta
from uuid import uuid4
from everos.component.utils.datetime import get_utc_now
from everos.core.observability.logging import get_logger
from everos.infra.ome._stores.run_record import RunRecordStore
logger = get_logger(__name__)
async def scan_and_resume(
*,
run_record_store: RunRecordStore,
timeout_seconds: int,
add_job: Callable[[str, str, str, str, int], Awaitable[None]],
) -> None:
"""Scan ``run_record`` for stale RUNNING rows, mark them CRASHED, and
re-enqueue each via ``add_job``. See module docstring for the
at-most-once caveat.
``add_job`` is called with positional args
``(strategy_name, run_id, event_topic, event_payload, max_retries)``.
Raises:
ValueError: If ``timeout_seconds`` is not positive.
"""
if timeout_seconds <= 0:
raise ValueError(f"timeout_seconds must be > 0, got {timeout_seconds}")
now = get_utc_now()
cutoff = now - timedelta(seconds=timeout_seconds)
running = await run_record_store.find_running()
for rec in running:
if rec.started_at >= cutoff:
continue
await run_record_store.mark_crashed(
run_id=rec.run_id,
finished_at=now,
error="crash recovery: marked CRASHED after start scan",
)
new_run_id = uuid4().hex
try:
await add_job(
rec.strategy_name,
new_run_id,
rec.event_topic,
rec.event_payload,
rec.max_retries_snapshot,
)
logger.info(
"crash_recovery_resumed",
strategy_name=rec.strategy_name,
event_topic=rec.event_topic,
old_run_id=rec.run_id,
new_run_id=new_run_id,
)
except Exception: # noqa: BLE001
logger.exception(
"crash_recovery_resume_failed",
strategy_name=rec.strategy_name,
event_topic=rec.event_topic,
old_run_id=rec.run_id,
)

View File

@ -0,0 +1,60 @@
"""IdleScanner — periodic scan of idle_store, emits IdleTick for overdue buckets."""
from __future__ import annotations
from collections.abc import Awaitable, Callable
from datetime import datetime
from everos.component.utils.datetime import get_utc_now
from everos.core.observability.logging import get_logger
from everos.infra.ome._stores.idle import IdleStore
from everos.infra.ome.events import BaseEvent, IdleTick
from everos.infra.ome.triggers import Idle
logger = get_logger(__name__)
class IdleScanner:
"""Scans idle_store for overdue buckets and emits IdleTick events."""
def __init__(
self,
*,
strategy_name: str,
trigger: Idle,
idle_store: IdleStore,
emit: Callable[[BaseEvent], Awaitable[None]],
) -> None:
self._name = strategy_name
self._trigger = trigger
self._idle_store = idle_store
self._emit = emit
async def scan_once(self, *, now: datetime | None = None) -> None:
"""Find overdue buckets and emit IdleTick for each.
Per-bucket emit failures are caught and logged so a single
downstream error (e.g. dispatch hitting a transient DB lock)
cannot prevent sibling buckets from being notified this round.
"""
effective_now = now if now is not None else get_utc_now()
overdue = await self._idle_store.scan_idle(
self._name,
idle_seconds=self._trigger.idle_seconds,
now=effective_now,
)
for bucket_key in overdue:
try:
await self._emit(
IdleTick(
strategy_name=self._name,
bucket_key=bucket_key,
idle_seconds=self._trigger.idle_seconds,
)
)
except Exception: # noqa: BLE001
logger.exception(
"idle_emit_failed",
strategy_name=self._name,
bucket_key=bucket_key,
)

Some files were not shown because too many files have changed in this diff Show More