chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/src/everos/README.md
+++ b/src/everos/README.md
@ -0,0 +1,35 @@
+# `everos` package
+
+Source layout for the `everos` Python package. This README is a quick
+orientation; full architectural detail lives elsewhere.
+
+## Layout
+
+```
+everos/
+├── entrypoints/    Presentation: cli + api
+├── service/        Application: use case orchestration
+├── memory/         Domain: extract + search + cascade + prompt_slots + models
+├── infra/          Infrastructure: persistence/{markdown, sqlite, lancedb}
+├── component/      Cross-cutting providers: llm / embedding / config / utils
+├── core/           Runtime base: observability / lifespan / context
+└── config/         Data: Settings + default.toml + prompt_slots templates
+```
+
+Each subpackage has a top-level `__init__.py` describing its responsibility
+and public API.
+
+## Dependency rule
+
+```
+entrypoints → service → memory → infra
+                          ↓
+                    component / core / config
+```
+
+Single-direction; enforced by `import-linter` in CI.
+
+## Further reading
+
+- Architecture: [../../docs/architecture.md](../../docs/architecture.md)
+- Coding rules (auto-loaded by Claude Code): [../../.claude/rules/](../../.claude/rules/)
--- a/src/everos/init.py
+++ b/src/everos/init.py
@ -0,0 +1,11 @@
+"""everos — md-first memory extraction framework."""
+
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version as _pkg_version
+
+try:
+    __version__ = _pkg_version("everos")
+except PackageNotFoundError:
+    # Editable install without dist-info, or running from a source tree that
+    # was never installed. Fall back to a sentinel rather than crash imports.
+    __version__ = "0.0.0+unknown"
--- a/src/everos/component/init.py
+++ b/src/everos/component/init.py
@ -0,0 +1,4 @@
+"""Cross-cutting components.
+
+Technical capabilities used by every layer; depend on no business layer.
+"""
--- a/src/everos/component/config/init.py
+++ b/src/everos/component/config/init.py
@ -0,0 +1,14 @@
+"""Config processing capability.
+
+YAML loader for category-organised config trees (PromptSlot templates,
+etc.). Distinct from :mod:`everos.config` (configuration *data* + Settings
+schema, which uses TOML for the Pydantic Settings file) — this subpackage
+holds *capability* (how to load), the other holds *data* (what to load).
+
+External usage:
+    from everos.component.config import YamlConfigLoader
+"""
+
+from .loader import YamlConfigLoader as YamlConfigLoader
+
+__all__ = ["YamlConfigLoader"]
--- a/src/everos/component/config/loader.py
+++ b/src/everos/component/config/loader.py
@ -0,0 +1,146 @@
+"""YAML config loader for category-organised file trees.
+
+Concept: a project keeps several *categories* of YAML config files under
+their own subdirectories — for example PromptSlot templates under
+``config/prompt_slots/<name>.yaml``. The loader:
+
+    1. registers a category → subdirectory mapping
+    2. resolves ``find(category, name)`` to ``<root>/<subdir>/<name>.yaml``
+    3. caches parsed contents until ``refresh`` is called
+
+Uses ``yaml.safe_load`` (no arbitrary tags) — PyYAML is already a project
+dependency for markdown frontmatter, so no extra cost.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+class YamlConfigLoader:
+    """Load YAML files organised by category subdirectories.
+
+    Usage:
+        loader = YamlConfigLoader(root=Path("src/everos/config"))
+        loader.register_category("prompt_slots")
+        # → reads <root>/prompt_slots/episode.yaml
+        meta = loader.find("prompt_slots", "episode")
+        names = loader.list("prompt_slots")
+        loader.refresh()  # next find() re-reads from disk
+
+    Cache semantics:
+        * ``find`` parses the file on first access and caches the dict.
+        * ``refresh()`` empties the entire cache.
+        * ``refresh(category)`` empties one category's entries.
+        * ``refresh(category, name)`` empties a single entry.
+    """
+
+    def __init__(
+        self,
+        root: Path,
+        categories: Mapping[str, str | None] | None = None,
+    ) -> None:
+        """
+        Args:
+            root: Base directory containing the category subdirectories.
+            categories: Optional pre-registered category map (``name → subdir``).
+                When ``subdir`` is ``None`` the category name is used as-is.
+        """
+        self._root = Path(root)
+        self._subdirs: dict[str, str] = {}
+        self._cache: dict[tuple[str, str], dict[str, Any]] = {}
+
+        if categories:
+            for name, subdir in categories.items():
+                self.register_category(name, subdir)
+
+    # ── Category management ────────────────────────────────────────────────
+
+    def register_category(self, name: str, subdir: str | None = None) -> None:
+        """Register a category. ``subdir`` defaults to ``name``."""
+        self._subdirs[name] = subdir if subdir is not None else name
+
+    def categories(self) -> list[str]:
+        """Return registered category names (sorted)."""
+        return sorted(self._subdirs)
+
+    # ── Lookup ─────────────────────────────────────────────────────────────
+
+    def find(self, category: str, name: str) -> dict[str, Any]:
+        """Load ``<root>/<subdir>/<name>.yaml`` for ``category``.
+
+        Raises:
+            KeyError: if ``category`` was not registered.
+            FileNotFoundError: if the yaml file does not exist.
+            TypeError: if the parsed YAML is not a mapping.
+        """
+        cache_key = (category, name)
+        if cache_key in self._cache:
+            return self._cache[cache_key]
+
+        path = self._path_for(category, name)
+        if not path.is_file():
+            raise FileNotFoundError(f"yaml not found: {path}")
+
+        with path.open("r", encoding="utf-8") as fh:
+            data = yaml.safe_load(fh)
+        if data is None:
+            data = {}
+        if not isinstance(data, dict):
+            raise TypeError(
+                f"yaml top-level must be a mapping, got {type(data).__name__}: {path}"
+            )
+        self._cache[cache_key] = data
+        return data
+
+    def list(self, category: str) -> list[str]:
+        """Return sorted yaml stems available in ``category`` (no extension).
+
+        Raises:
+            KeyError: if ``category`` was not registered.
+        """
+        directory = self._dir_for(category)
+        if not directory.is_dir():
+            return []
+        return sorted(p.stem for p in directory.glob("*.yaml"))
+
+    # ── Cache control ──────────────────────────────────────────────────────
+
+    def refresh(
+        self,
+        category: str | None = None,
+        name: str | None = None,
+    ) -> None:
+        """Invalidate cached entries.
+
+        - ``refresh()``                  → drop every cached entry
+        - ``refresh(category)``          → drop everything in ``category``
+        - ``refresh(category, name)``    → drop a single entry
+        """
+        if category is None:
+            self._cache.clear()
+            return
+        if name is not None:
+            self._cache.pop((category, name), None)
+            return
+        self._cache = {
+            (cat, n): v for (cat, n), v in self._cache.items() if cat != category
+        }
+
+    # ── Internals ──────────────────────────────────────────────────────────
+
+    def _dir_for(self, category: str) -> Path:
+        try:
+            subdir = self._subdirs[category]
+        except KeyError as exc:
+            raise KeyError(
+                f"category not registered: {category!r}; known: {sorted(self._subdirs)}"
+            ) from exc
+        return self._root / subdir
+
+    def _path_for(self, category: str, name: str) -> Path:
+        return self._dir_for(category) / f"{name}.yaml"
--- a/src/everos/component/embedding/init.py
+++ b/src/everos/component/embedding/init.py
@ -0,0 +1,33 @@
+"""Embedding provider adapters (one provider per file).
+
+
+Public surface:
+
+- :class:`EmbeddingProvider` — Protocol every provider satisfies.
+- :class:`EmbeddingError` — provider-side failure.
+- :class:`OpenAIEmbeddingProvider` — concrete provider for any
+  OpenAI-protocol embeddings endpoint (DeepInfra, vLLM, OpenAI, …).
+- :func:`build_embedding_provider` — settings-driven factory.
+
+External usage::
+
+    from everos.component.embedding import build_embedding_provider
+    provider = build_embedding_provider(settings.embedding)
+    vec = await provider.embed("hello")
+"""
+
+from .accessor import EmbeddingNotConfiguredError as EmbeddingNotConfiguredError
+from .accessor import get_embedder as get_embedder
+from .factory import build_embedding_provider as build_embedding_provider
+from .openai_provider import OpenAIEmbeddingProvider as OpenAIEmbeddingProvider
+from .protocol import EmbeddingError as EmbeddingError
+from .protocol import EmbeddingProvider as EmbeddingProvider
+
+__all__ = [
+    "EmbeddingError",
+    "EmbeddingNotConfiguredError",
+    "EmbeddingProvider",
+    "OpenAIEmbeddingProvider",
+    "build_embedding_provider",
+    "get_embedder",
+]
--- a/src/everos/component/embedding/accessor.py
+++ b/src/everos/component/embedding/accessor.py
@ -0,0 +1,48 @@
+"""Process-wide embedding provider accessor.
+
+Lazy singleton mirror of :func:`everos.component.llm.get_llm_client`:
+first call reads settings and builds the OpenAI-protocol embedding
+client; subsequent calls return the cached instance. Strategies and
+other components that need a process-wide embedder import this rather
+than threading the provider through their constructors.
+
+Raises :class:`EmbeddingNotConfiguredError` when credentials are missing
+so misconfiguration surfaces at the call site (or at app startup via a
+lifespan provider) instead of silently degrading.
+"""
+
+from __future__ import annotations
+
+from everos.config import load_settings
+from everos.core.observability.logging import get_logger
+
+from .factory import build_embedding_provider
+from .protocol import EmbeddingProvider
+
+logger = get_logger(__name__)
+
+
+class EmbeddingNotConfiguredError(RuntimeError):
+    """Raised when ``settings.embedding`` lacks ``model``/``api_key``/``base_url``."""
+
+
+_embedder: EmbeddingProvider | None = None
+
+
+def get_embedder() -> EmbeddingProvider:
+    """Return the singleton :class:`EmbeddingProvider`.
+
+    Raises:
+        EmbeddingNotConfiguredError: When required settings fields are
+            unset. See :func:`build_embedding_provider` for the exact
+            keys.
+    """
+    global _embedder
+    if _embedder is not None:
+        return _embedder
+    try:
+        _embedder = build_embedding_provider(load_settings().embedding)
+    except ValueError as exc:
+        raise EmbeddingNotConfiguredError(str(exc)) from exc
+    logger.info("embedder_built")
+    return _embedder
--- a/src/everos/component/embedding/factory.py
+++ b/src/everos/component/embedding/factory.py
@ -0,0 +1,56 @@
+"""Factory for building an embedding provider from :class:`EmbeddingSettings`."""
+
+from __future__ import annotations
+
+from everos.config import EmbeddingSettings
+
+from .openai_provider import OpenAIEmbeddingProvider
+from .protocol import EmbeddingProvider
+
+# Vector dim for the LanceDB index column — see ``17_lancedb_tables_design.md``.
+_DEFAULT_DIM = 1024
+
+
+def build_embedding_provider(
+    settings: EmbeddingSettings,
+    *,
+    dim: int = _DEFAULT_DIM,
+) -> EmbeddingProvider:
+    """Build an OpenAI-compatible embedding provider from settings.
+
+    Args:
+        settings: The :class:`EmbeddingSettings` slice from
+            :func:`everos.config.load_settings`.
+        dim: Target vector dimension; defaults to 1024 to match the
+            LanceDB ``vector`` column shape.
+
+    Returns:
+        An :class:`EmbeddingProvider` ready to call ``embed`` /
+        ``embed_batch``.
+
+    Raises:
+        ValueError: If ``model``, ``api_key`` or ``base_url`` is unset.
+    """
+    if not settings.model:
+        raise ValueError(
+            "Embedding model is not configured "
+            "(set EVEROS_EMBEDDING__MODEL or [embedding] model in user toml)"
+        )
+    if settings.api_key is None:
+        raise ValueError(
+            "Embedding api_key is not configured (set EVEROS_EMBEDDING__API_KEY)"
+        )
+    if not settings.base_url:
+        raise ValueError(
+            "Embedding base_url is not configured (set EVEROS_EMBEDDING__BASE_URL)"
+        )
+    return OpenAIEmbeddingProvider(
+        model=settings.model,
+        api_key=settings.api_key.get_secret_value(),
+        base_url=settings.base_url,
+        dim=dim,
+        timeout=settings.timeout_seconds,
+        max_retries=settings.max_retries,
+        batch_size=settings.batch_size,
+        max_concurrent=settings.max_concurrent,
+    )
--- a/src/everos/component/embedding/openai_provider.py
+++ b/src/everos/component/embedding/openai_provider.py
@ -0,0 +1,98 @@
+"""OpenAI-compatible embedding provider.
+
+Wraps :class:`openai.AsyncOpenAI` so any OpenAI-protocol endpoint
+(DeepInfra, OpenAI, Together, Fireworks, …) works without per-provider
+forks. Self-hosted vLLM also exposes the same shape; the only quirk it
+imposes is that the ``dimensions`` request parameter is ignored — we
+truncate client-side to ``dim`` so callers always see the declared
+shape regardless of backend.
+
+Concurrency model:
+
+- ``embed_batch`` splits the inputs into chunks of ``batch_size``.
+- An :class:`asyncio.Semaphore` capped at ``max_concurrent`` bounds
+  in-flight requests; remaining chunks queue and start as slots free.
+- Retries / timeouts come from the openai SDK (``max_retries``,
+  ``timeout`` constructor args).
+"""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import Sequence
+
+import openai
+
+from .protocol import EmbeddingError
+
+
+class OpenAIEmbeddingProvider:
+    """OpenAI-compatible embedding provider with batching + concurrency.
+
+    Args:
+        model: Embedding model id (e.g. ``"Qwen/Qwen3-Embedding-4B"``).
+        api_key: Bearer credential as a plain ``str``.
+        base_url: OpenAI-protocol endpoint
+            (e.g. ``"https://api.deepinfra.com/v1/openai"``).
+        dim: Target vector dimension. Vectors longer than this are
+            truncated client-side (matches the LanceDB column shape —
+            see ``17_lancedb_tables_design.md``).
+        timeout: Per-request timeout, seconds.
+        max_retries: Retry budget exposed via the openai SDK.
+        batch_size: How many inputs per ``/embeddings`` call.
+        max_concurrent: Cap on in-flight chunked requests.
+    """
+
+    def __init__(
+        self,
+        *,
+        model: str,
+        api_key: str,
+        base_url: str,
+        dim: int = 1024,
+        timeout: float = 30.0,
+        max_retries: int = 3,
+        batch_size: int = 10,
+        max_concurrent: int = 5,
+    ) -> None:
+        self.dim = dim
+        self._model = model
+        self._batch_size = batch_size
+        self._semaphore = asyncio.Semaphore(max_concurrent)
+        self._client = openai.AsyncOpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+        )
+
+    async def embed(self, text: str) -> list[float]:
+        """Embed a single string."""
+        vectors = await self._embed_chunk([text])
+        return vectors[0]
+
+    async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
+        """Embed many strings, preserving input order."""
+        if not texts:
+            return []
+        chunks = [
+            list(texts[i : i + self._batch_size])
+            for i in range(0, len(texts), self._batch_size)
+        ]
+        results = await asyncio.gather(*(self._embed_chunk(chunk) for chunk in chunks))
+        # gather preserves order across awaitables, and each chunk preserves
+        # its internal order — so flattening yields the input order back.
+        return [vec for chunk in results for vec in chunk]
+
+    async def _embed_chunk(self, chunk: list[str]) -> list[list[float]]:
+        """One ``/embeddings`` call, semaphore-guarded."""
+        async with self._semaphore:
+            try:
+                response = await self._client.embeddings.create(
+                    model=self._model,
+                    input=chunk,
+                )
+            except openai.OpenAIError as exc:
+                raise EmbeddingError(str(exc)) from exc
+        # OpenAI returns ``data`` indexed by request order; truncate to ``dim``.
+        return [list(item.embedding[: self.dim]) for item in response.data]
--- a/src/everos/component/embedding/protocol.py
+++ b/src/everos/component/embedding/protocol.py
@ -0,0 +1,48 @@
+"""Embedding provider protocol.
+
+
+The cascade worker / retrieval pipeline depend on a single small
+contract: turn a string (or list of strings) into a fixed-dimension
+vector. Whether the backend is OpenAI, vLLM, DeepInfra, Ollama, or a
+local model is the provider's business — the contract is invariant.
+
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Protocol, runtime_checkable
+
+
+class EmbeddingError(Exception):
+    """Raised on any provider-side embedding failure.
+
+    Wraps the upstream SDK exception via ``__cause__`` (PEP 3134) so
+    diagnostic loggers preserve the original error chain.
+    """
+
+
+@runtime_checkable
+class EmbeddingProvider(Protocol):
+    """Async embedding provider contract.
+
+    ``dim`` is the post-truncation vector dimension every embed call
+    returns. Providers that don't natively support dimension truncation
+    must truncate client-side so callers see the declared shape.
+    """
+
+    dim: int
+
+    async def embed(self, text: str) -> list[float]:
+        """Embed a single string. Returns a ``[dim]`` vector."""
+        ...
+
+    async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
+        """Embed a batch of strings preserving input order.
+
+        Implementations chunk by ``batch_size`` and bound in-flight
+        requests by ``max_concurrent`` (both from settings). On failure,
+        raises :class:`EmbeddingError` — the worker treats it as a
+        retryable / unrecoverable case per HTTP-status mapping.
+        """
+        ...
--- a/src/everos/component/llm/init.py
+++ b/src/everos/component/llm/init.py
@ -0,0 +1,45 @@
+"""LLM provider adapters (one provider per file, mem0-style).
+
+Public surface:
+
+- :class:`LLMClient` — Protocol every provider satisfies (re-exported
+  from :mod:`everalgo.llm`; same shape so everos providers can be
+  handed to everalgo operators).
+- :class:`ChatMessage` / :class:`ChatResponse` / :class:`Usage` — chat
+  payload types (re-exported from :mod:`everalgo.llm`).
+- :class:`LLMError` — provider-side failure (re-exported).
+- :class:`LLMNotConfiguredError` — raised when credentials are missing.
+- :class:`OpenAIProvider` — concrete provider wrapping
+  ``openai.AsyncOpenAI`` against any OpenAI-compatible endpoint.
+- :func:`build_llm_provider` — settings-driven factory.
+- :func:`get_llm_client` — process-wide lazy singleton accessor.
+
+External usage::
+
+    from everos.component.llm import build_llm_provider, LLMClient
+    provider = build_llm_provider(settings.llm)
+"""
+
+from .client import LLMNotConfiguredError as LLMNotConfiguredError
+from .client import get_llm_client as get_llm_client
+from .client import get_multimodal_llm_client as get_multimodal_llm_client
+from .factory import build_llm_provider as build_llm_provider
+from .openai_provider import OpenAIProvider as OpenAIProvider
+from .protocol import ChatMessage as ChatMessage
+from .protocol import ChatResponse as ChatResponse
+from .protocol import LLMClient as LLMClient
+from .protocol import LLMError as LLMError
+from .protocol import Usage as Usage
+
+__all__ = [
+    "ChatMessage",
+    "ChatResponse",
+    "LLMClient",
+    "LLMError",
+    "LLMNotConfiguredError",
+    "OpenAIProvider",
+    "Usage",
+    "build_llm_provider",
+    "get_llm_client",
+    "get_multimodal_llm_client",
+]
--- a/src/everos/component/llm/client.py
+++ b/src/everos/component/llm/client.py
@ -0,0 +1,89 @@
+"""Process-wide LLM client accessor.
+
+Lazy singleton — first call reads settings and builds the algo LLM
+client; subsequent calls return the cached instance. Raises
+:class:`LLMNotConfiguredError` when no credentials are present so
+misconfiguration surfaces at app startup (via the LLM lifespan
+provider) instead of silently failing per-request downstream.
+"""
+
+from __future__ import annotations
+
+from everalgo.llm import build_client
+from everalgo.llm.config import LLMConfig
+from everalgo.llm.protocols import LLMClient
+
+from everos.config import load_settings
+from everos.core.observability.logging import get_logger
+
+logger = get_logger(__name__)
+
+
+class LLMNotConfiguredError(RuntimeError):
+    """Raised when ``settings.llm`` is missing ``api_key`` or ``base_url``."""
+
+
+_llm_client: LLMClient | None = None
+_multimodal_client: LLMClient | None = None
+
+
+def get_llm_client() -> LLMClient:
+    """Return the singleton algo LLM client.
+
+    Raises:
+        LLMNotConfiguredError: When ``settings.llm.api_key`` or
+            ``settings.llm.base_url`` is unset.
+    """
+    global _llm_client
+    if _llm_client is not None:
+        return _llm_client
+
+    llm_cfg = load_settings().llm
+    api_key = (
+        llm_cfg.api_key.get_secret_value() if llm_cfg.api_key is not None else None
+    )
+    if not api_key or not llm_cfg.base_url:
+        raise LLMNotConfiguredError(
+            "LLM is required; set EVEROS_LLM__API_KEY + EVEROS_LLM__BASE_URL"
+        )
+    _llm_client = build_client(
+        LLMConfig(
+            model=llm_cfg.model,
+            api_key=api_key,
+            base_url=llm_cfg.base_url,
+        )
+    )
+    logger.info("llm_client_built", model=llm_cfg.model)
+    return _llm_client
+
+
+def get_multimodal_llm_client() -> LLMClient:
+    """Return the singleton multimodal LLM client (for everalgo.parser).
+
+    Reads the flat ``[multimodal]`` config — kept separate from the main
+    ``[llm]`` so parsing can target a vision/audio-capable endpoint.
+
+    Raises:
+        LLMNotConfiguredError: When ``settings.multimodal.api_key`` or
+            ``settings.multimodal.base_url`` is unset.
+    """
+    global _multimodal_client
+    if _multimodal_client is not None:
+        return _multimodal_client
+
+    cfg = load_settings().multimodal
+    api_key = cfg.api_key.get_secret_value() if cfg.api_key is not None else None
+    if not api_key or not cfg.base_url:
+        raise LLMNotConfiguredError(
+            "Multimodal LLM is required for parsing; set "
+            "EVEROS_MULTIMODAL__API_KEY + EVEROS_MULTIMODAL__BASE_URL"
+        )
+    _multimodal_client = build_client(
+        LLMConfig(
+            model=cfg.model,
+            api_key=api_key,
+            base_url=cfg.base_url,
+        )
+    )
+    logger.info("multimodal_llm_client_built", model=cfg.model)
+    return _multimodal_client
--- a/src/everos/component/llm/factory.py
+++ b/src/everos/component/llm/factory.py
@ -0,0 +1,45 @@
+"""Factory for building an LLM provider from :class:`LLMSettings`."""
+
+from __future__ import annotations
+
+from everos.config import LLMSettings
+
+from .openai_provider import OpenAIProvider
+from .protocol import LLMClient
+
+
+def build_llm_provider(settings: LLMSettings) -> LLMClient:
+    """Build an OpenAI-compatible LLM provider from settings.
+
+    Unwraps :class:`pydantic.SecretStr` here so downstream callers never
+    touch the raw key directly. Fails fast if either ``api_key`` or
+    ``base_url`` is missing — caller is expected to set them via
+    ``.env`` / user toml / programmatic init before calling.
+
+    Args:
+        settings: The :class:`LLMSettings` slice from
+            :func:`everos.config.load_settings`.
+
+    Returns:
+        A provider that structurally satisfies
+        :class:`everalgo.llm.LLMClient` and can be passed to everalgo
+        operators via ``llm=``.
+
+    Raises:
+        ValueError: If ``api_key`` or ``base_url`` is unset.
+    """
+    if settings.api_key is None:
+        raise ValueError(
+            "LLM api_key is not configured "
+            "(set EVEROS_LLM__API_KEY or [llm] api_key in user toml)"
+        )
+    if not settings.base_url:
+        raise ValueError(
+            "LLM base_url is not configured "
+            "(set EVEROS_LLM__BASE_URL or [llm] base_url in user toml)"
+        )
+    return OpenAIProvider(
+        model=settings.model,
+        api_key=settings.api_key.get_secret_value(),
+        base_url=settings.base_url,
+    )
--- a/src/everos/component/llm/openai_provider.py
+++ b/src/everos/component/llm/openai_provider.py
@ -0,0 +1,114 @@
+"""OpenAI-compatible LLM provider for everos.
+
+Implements the :class:`everalgo.llm.LLMClient` structural contract by
+wrapping :class:`openai.AsyncOpenAI` — the same backbone everalgo's own
+``OpenAICompatClient`` uses, but defined here in everos so the
+provider can be constructed from :class:`everos.config.LLMSettings`
+and handed to everalgo extractors via the ``llm=`` per-call parameter.
+
+Keeps the provider lean (matches the everalgo minimum-viable shape):
+no multi-key rotation, no scenario-level routing, no token-usage
+collector — those are deployment concerns layered on top.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, Literal
+
+import openai
+
+from .protocol import ChatMessage, ChatResponse, LLMError, Usage
+
+
+class OpenAIProvider:
+    """Thin async wrapper over ``openai.AsyncOpenAI``.
+
+    Structurally satisfies :class:`everalgo.llm.LLMClient` (PEP 544);
+    instances can be passed directly to everalgo operators that accept
+    ``llm: LLMClient | None``.
+
+    Args:
+        model: Default model id (override per-call with ``model=`` on
+            :meth:`chat`).
+        api_key: Bearer credential. Pass as plain ``str`` — settings
+            should unwrap :class:`pydantic.SecretStr` at the factory
+            boundary.
+        base_url: OpenAI-compatible endpoint (e.g.
+            ``"https://openrouter.ai/api/v1"``).
+        timeout: Per-request timeout in seconds.
+        temperature: Default sampling temperature (overridable per call).
+        max_tokens: Default max-tokens cap (overridable per call).
+    """
+
+    def __init__(
+        self,
+        *,
+        model: str,
+        api_key: str,
+        base_url: str | None = None,
+        timeout: float = 60.0,
+        temperature: float = 0.0,
+        max_tokens: int | None = None,
+    ) -> None:
+        self._model = model
+        self._temperature = temperature
+        self._max_tokens = max_tokens
+        self._client = openai.AsyncOpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            timeout=timeout,
+        )
+
+    async def chat(
+        self,
+        messages: list[ChatMessage],
+        *,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        response_format: Mapping[str, Any] | None = None,
+        **extra: Any,
+    ) -> ChatResponse:
+        """Send a chat completion request and return the parsed response."""
+        request: dict[str, Any] = {
+            "model": model or self._model,
+            "messages": [m.model_dump() for m in messages],
+            "temperature": (
+                temperature if temperature is not None else self._temperature
+            ),
+        }
+        effective_max = max_tokens if max_tokens is not None else self._max_tokens
+        if effective_max is not None:
+            request["max_tokens"] = effective_max
+        if response_format is not None:
+            request["response_format"] = dict(response_format)
+        request.update(extra)
+
+        try:
+            completion = await self._client.chat.completions.create(**request)
+        except openai.OpenAIError as exc:
+            raise LLMError(str(exc)) from exc
+
+        choice = completion.choices[0]
+        usage: Usage | None = None
+        if completion.usage is not None:
+            usage = Usage(
+                prompt_tokens=completion.usage.prompt_tokens,
+                completion_tokens=completion.usage.completion_tokens,
+            )
+        return ChatResponse(
+            content=choice.message.content or "",
+            model=completion.model,
+            usage=usage,
+            finish_reason=_normalise_finish_reason(choice.finish_reason),
+            raw=None,
+        )
+
+
+def _normalise_finish_reason(
+    value: str | None,
+) -> Literal["stop", "length", "content_filter"] | None:
+    if value in ("stop", "length", "content_filter"):
+        return value  # type: ignore[return-value]
+    return None
--- a/src/everos/component/llm/protocol.py
+++ b/src/everos/component/llm/protocol.py
@ -0,0 +1,39 @@
+"""LLM client protocol re-export.
+
+The structural contract every everos LLM provider satisfies is the same
+:class:`everalgo.llm.LLMClient` Protocol — everos providers must be
+pass-through-compatible with the everalgo extractors that accept an
+``llm=`` parameter. Re-exporting the type here keeps the import path
+stable (``everos.component.llm``) even if the everalgo namespace
+shifts later.
+
+The :class:`ChatMessage` / :class:`ChatResponse` / :class:`Usage`
+shapes are likewise re-exported so callers can build / inspect chat
+payloads without reaching into the everalgo package directly.
+"""
+
+from __future__ import annotations
+
+from everalgo.llm import (
+    ChatMessage as ChatMessage,
+)
+from everalgo.llm import (
+    ChatResponse as ChatResponse,
+)
+from everalgo.llm import (
+    LLMClient as LLMClient,
+)
+from everalgo.llm import (
+    LLMError as LLMError,
+)
+from everalgo.llm import (
+    Usage as Usage,
+)
+
+__all__ = [
+    "ChatMessage",
+    "ChatResponse",
+    "LLMClient",
+    "LLMError",
+    "Usage",
+]
--- a/src/everos/component/rerank/init.py
+++ b/src/everos/component/rerank/init.py
@ -0,0 +1,34 @@
+"""Rerank provider adapters (one provider per file).
+
+Public surface:
+
+- :class:`RerankProvider` — Protocol every provider satisfies.
+- :class:`RerankResult` / :class:`RerankError` — value type + error.
+- :class:`DeepInfraRerankProvider` — DeepInfra inference-API rerank.
+- :class:`VllmRerankProvider` — OpenAI-compat ``/v1/rerank`` (vLLM,
+  self-hosted, other compatible servers).
+- :func:`build_rerank_provider` — settings-driven factory that picks
+  the concrete provider via ``settings.rerank.provider``.
+
+External usage::
+
+    from everos.component.rerank import build_rerank_provider
+    provider = build_rerank_provider(settings.rerank)
+    scored = await provider.rerank("how to file a claim", documents)
+"""
+
+from .deepinfra_provider import DeepInfraRerankProvider as DeepInfraRerankProvider
+from .factory import build_rerank_provider as build_rerank_provider
+from .protocol import RerankError as RerankError
+from .protocol import RerankProvider as RerankProvider
+from .protocol import RerankResult as RerankResult
+from .vllm_provider import VllmRerankProvider as VllmRerankProvider
+
+__all__ = [
+    "DeepInfraRerankProvider",
+    "RerankError",
+    "RerankProvider",
+    "RerankResult",
+    "VllmRerankProvider",
+    "build_rerank_provider",
+]
--- a/src/everos/component/rerank/deepinfra_provider.py
+++ b/src/everos/component/rerank/deepinfra_provider.py
@ -0,0 +1,196 @@
+"""DeepInfra inference-API rerank provider.
+
+DeepInfra exposes reranker models (e.g. ``Qwen/Qwen3-Reranker-4B``) at::
+
+    POST {base_url}/{model}
+    Authorization: Bearer <api_key>
+    Content-Type: application/json
+
+The request shape is the inference-API convention used across DeepInfra
+reranker / classifier models:
+
+    {
+        "queries":   ["<query>"],
+        "documents": ["<doc 1>", "<doc 2>", ...]
+    }
+
+The response carries one ``scores`` array per query:
+
+    {
+        "scores":          [[0.12, 0.87, 0.43, ...]],
+        "request_id":      "...",
+        "inference_status": {...}
+    }
+
+We submit one query at a time (matches the :class:`RerankProvider`
+contract) and unwrap the inner score list. Documents longer than the
+model's input window are silently truncated server-side.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import Sequence
+from typing import Any
+
+import httpx
+
+from .protocol import RerankError, RerankResult
+
+# Qwen3-Reranker chat template. The DeepInfra inference API treats the reranker
+# as a yes/no generator, so the prompt scaffolding must be supplied client-side
+# (verbatim mirror of the EverCore benchmark's reranker client). Without it the
+# model scores raw text off-template and returns uncalibrated relevance.
+_QWEN3_PREFIX = (
+    "<|im_start|>system\n"
+    "Judge whether the Document meets the requirements based on the Query and "
+    'the Instruct provided. Note that the answer can only be "yes" or "no".'
+    "<|im_end|>\n<|im_start|>user\n"
+)
+_QWEN3_SUFFIX = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
+_DEFAULT_RERANK_INSTRUCTION = (
+    "Given a question and a passage, determine if the passage contains "
+    "information relevant to answering the question."
+)
+
+
+def _format_qwen3_inputs(
+    query: str, documents: list[str], instruction: str | None
+) -> tuple[str, list[str]]:
+    """Wrap query + documents in the Qwen3-Reranker chat template."""
+    instr = instruction or _DEFAULT_RERANK_INSTRUCTION
+    formatted_query = f"{_QWEN3_PREFIX}<Instruct>: {instr}\n<Query>: {query}\n"
+    formatted_docs = [f"<Document>: {doc}{_QWEN3_SUFFIX}" for doc in documents]
+    return formatted_query, formatted_docs
+
+
+class DeepInfraRerankProvider:
+    """Rerank provider for the DeepInfra inference API.
+
+    Args:
+        model: Reranker model id (e.g. ``"Qwen/Qwen3-Reranker-4B"``).
+        api_key: Bearer credential as plain ``str``.
+        base_url: Inference endpoint root
+            (e.g. ``"https://api.deepinfra.com/v1/inference"``). The
+            ``/{model}`` suffix is appended at request time.
+        timeout: Per-request timeout, seconds.
+        max_retries: Soft retry count on transport errors / 5xx.
+        batch_size: Cap on documents per request (large doc lists are
+            split, scores merged in input order).
+        max_concurrent: Cap on in-flight requests across all batches.
+    """
+
+    def __init__(
+        self,
+        *,
+        model: str,
+        api_key: str,
+        base_url: str,
+        timeout: float = 30.0,
+        max_retries: int = 3,
+        batch_size: int = 10,
+        max_concurrent: int = 5,
+    ) -> None:
+        self._model = model
+        self._api_key = api_key
+        self._url = f"{base_url.rstrip('/')}/{model}"
+        self._timeout = timeout
+        self._max_retries = max_retries
+        self._batch_size = batch_size
+        self._semaphore = asyncio.Semaphore(max_concurrent)
+
+    async def rerank(
+        self,
+        query: str,
+        documents: Sequence[str],
+        *,
+        instruction: str | None = None,
+    ) -> list[RerankResult]:
+        """Score every document against ``query``; return sorted desc."""
+        if not documents:
+            return []
+
+        chunks: list[tuple[int, list[str]]] = [
+            (offset, list(documents[offset : offset + self._batch_size]))
+            for offset in range(0, len(documents), self._batch_size)
+        ]
+        chunk_scores = await asyncio.gather(
+            *(self._score_chunk(query, docs, instruction) for _, docs in chunks)
+        )
+        scored: list[RerankResult] = []
+        for (offset, _), scores in zip(chunks, chunk_scores, strict=True):
+            scored.extend(
+                RerankResult(index=offset + i, score=score)
+                for i, score in enumerate(scores)
+            )
+        scored.sort(key=lambda r: r.score, reverse=True)
+        return scored
+
+    async def _score_chunk(
+        self, query: str, documents: list[str], instruction: str | None
+    ) -> list[float]:
+        formatted_query, formatted_docs = _format_qwen3_inputs(
+            query, documents, instruction
+        )
+        payload: dict[str, Any] = {
+            "queries": [formatted_query],
+            "documents": formatted_docs,
+        }
+        headers = {
+            "Authorization": f"Bearer {self._api_key}",
+            "Content-Type": "application/json",
+        }
+        async with self._semaphore:
+            for attempt in range(self._max_retries + 1):
+                try:
+                    async with httpx.AsyncClient(timeout=self._timeout) as client:
+                        response = await client.post(
+                            self._url, json=payload, headers=headers
+                        )
+                except httpx.HTTPError as exc:
+                    if attempt == self._max_retries:
+                        raise RerankError(
+                            f"DeepInfra rerank transport failure: {exc}"
+                        ) from exc
+                    continue
+
+                if response.status_code == 200:
+                    return _extract_scores(response.json(), len(documents))
+
+                # Retry on 5xx / 429 only; surface 4xx immediately.
+                if response.status_code >= 500 or response.status_code == 429:
+                    if attempt == self._max_retries:
+                        raise RerankError(
+                            f"DeepInfra rerank HTTP {response.status_code}: "
+                            f"{response.text[:200]}"
+                        )
+                    continue
+                raise RerankError(
+                    f"DeepInfra rerank HTTP {response.status_code}: "
+                    f"{response.text[:200]}"
+                )
+
+            raise RerankError(
+                f"DeepInfra rerank exhausted retries ({self._max_retries})"
+            )
+
+
+def _extract_scores(body: dict[str, Any], expected_len: int) -> list[float]:
+    """Unwrap ``scores`` from the DeepInfra response body.
+
+    Inference API returns ``scores`` as either:
+
+    - ``[[s1, s2, ...]]`` — one score row per query (current single-query
+      shape); take row 0.
+    - ``[s1, s2, ...]`` — flat list (fallback for providers that drop
+      the outer list when only one query is sent).
+    """
+    raw = body.get("scores")
+    if not isinstance(raw, list):
+        raise RerankError(f"DeepInfra rerank response missing scores: {body!r}")
+    row = raw[0] if raw and isinstance(raw[0], list) else raw
+    if len(row) != expected_len:
+        raise RerankError(
+            f"DeepInfra rerank returned {len(row)} scores, expected {expected_len}"
+        )
+    return [float(s) for s in row]
--- a/src/everos/component/rerank/factory.py
+++ b/src/everos/component/rerank/factory.py
@ -0,0 +1,74 @@
+"""Factory for building a rerank provider from :class:`RerankSettings`.
+
+The ``provider`` field on :class:`RerankSettings` selects which concrete
+implementation to build:
+
+    - ``"deepinfra"`` → :class:`DeepInfraRerankProvider`
+    - ``"vllm"``      → :class:`VllmRerankProvider`
+
+Adding a new provider = one match arm here + one new file under
+:mod:`everos.component.rerank`.
+"""
+
+from __future__ import annotations
+
+from everos.config import RerankSettings
+
+from .deepinfra_provider import DeepInfraRerankProvider
+from .protocol import RerankProvider
+from .vllm_provider import VllmRerankProvider
+
+
+def build_rerank_provider(settings: RerankSettings) -> RerankProvider:
+    """Build a rerank provider from settings.
+
+    Args:
+        settings: The :class:`RerankSettings` slice from
+            :func:`everos.config.load_settings`.
+
+    Returns:
+        A :class:`RerankProvider` ready to call ``rerank``.
+
+    Raises:
+        ValueError: If ``model`` or ``base_url`` is unset, or if
+            ``provider`` does not match a known implementation.
+            ``api_key`` is required for ``deepinfra``; optional (empty
+            string) for ``vllm`` self-hosted endpoints.
+    """
+    if not settings.model:
+        raise ValueError(
+            "Rerank model is not configured "
+            "(set EVEROS_RERANK__MODEL or [rerank] model in user toml)"
+        )
+    if not settings.base_url:
+        raise ValueError(
+            "Rerank base_url is not configured (set EVEROS_RERANK__BASE_URL)"
+        )
+    api_key = settings.api_key.get_secret_value() if settings.api_key else ""
+
+    if settings.provider == "deepinfra":
+        if not api_key:
+            raise ValueError(
+                "DeepInfra rerank api_key is not configured "
+                "(set EVEROS_RERANK__API_KEY)"
+            )
+        return DeepInfraRerankProvider(
+            model=settings.model,
+            api_key=api_key,
+            base_url=settings.base_url,
+            timeout=settings.timeout_seconds,
+            max_retries=settings.max_retries,
+            batch_size=settings.batch_size,
+            max_concurrent=settings.max_concurrent,
+        )
+    if settings.provider == "vllm":
+        return VllmRerankProvider(
+            model=settings.model,
+            api_key=api_key,
+            base_url=settings.base_url,
+            timeout=settings.timeout_seconds,
+            max_retries=settings.max_retries,
+            batch_size=settings.batch_size,
+            max_concurrent=settings.max_concurrent,
+        )
+    raise ValueError(f"unknown rerank provider: {settings.provider!r}")
--- a/src/everos/component/rerank/protocol.py
+++ b/src/everos/component/rerank/protocol.py
@ -0,0 +1,62 @@
+"""Rerank provider protocol.
+
+The contract every rerank provider satisfies: given a query and a list
+of candidate documents, return a re-ordered list of ``(index, score)``
+pairs (highest relevance first). The provider does **not** filter —
+that's the caller's job (e.g. drop scores below a threshold, take
+``top_k``). Returning every input pair keeps the contract stable
+across providers whose backends may not natively support ``top_n``.
+
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import NamedTuple, Protocol, runtime_checkable
+
+
+class RerankError(Exception):
+    """Raised on any provider-side rerank failure."""
+
+
+class RerankResult(NamedTuple):
+    """One scored document from a rerank call.
+
+    ``index`` is the position of the document in the *input* list (so
+    callers can map back to the original document text). ``score`` is
+    provider-defined; higher = more relevant.
+    """
+
+    index: int
+    score: float
+
+
+@runtime_checkable
+class RerankProvider(Protocol):
+    """Async rerank provider contract."""
+
+    async def rerank(
+        self,
+        query: str,
+        documents: Sequence[str],
+        *,
+        instruction: str | None = None,
+    ) -> list[RerankResult]:
+        """Score and re-order ``documents`` against ``query``.
+
+        Args:
+            query: The search query.
+            documents: Passage texts to score against ``query``.
+            instruction: Task instruction for instruction-tuned rerankers
+                (e.g. Qwen3-Reranker). Providers that wrap the model's chat
+                template fold this into the prompt; providers backed by a
+                dedicated rerank endpoint that handles templating server-side
+                may ignore it. ``None`` defers to the provider's default.
+
+        Returns:
+            One :class:`RerankResult` per input document, sorted by
+            ``score`` descending. The returned list length equals
+            ``len(documents)``.
+        """
+
+        ...
--- a/src/everos/component/rerank/vllm_provider.py
+++ b/src/everos/component/rerank/vllm_provider.py
@ -0,0 +1,173 @@
+"""vLLM rerank provider.
+
+Self-deployed vLLM (and other OpenAI-compatible rerank servers) expose
+the OpenAI-style rerank endpoint::
+
+    POST {base_url}/rerank
+    Authorization: Bearer <api_key>  # optional for self-hosted ("EMPTY")
+    Content-Type: application/json
+
+Request body:
+
+    {
+        "model":     "<model>",
+        "query":     "<query>",
+        "documents": ["<doc 1>", "<doc 2>", ...]
+    }
+
+Response body:
+
+    {
+        "results": [
+            {"index": 0, "relevance_score": 0.87},
+            {"index": 1, "relevance_score": 0.43},
+            ...
+        ],
+        "id": "...",
+        ...
+    }
+
+We pass documents through as-is — caller is responsible for any
+prompt-template formatting required by the underlying reranker. Output
+ordering may already be score-descending; we sort defensively to honour
+the :class:`RerankProvider` contract regardless of server behaviour.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import Sequence
+from typing import Any
+
+import httpx
+
+from .protocol import RerankError, RerankResult
+
+
+class VllmRerankProvider:
+    """Rerank provider for vLLM / OpenAI-compat ``/v1/rerank`` endpoints.
+
+    Args:
+        model: Reranker model id (e.g. ``"Qwen/Qwen3-Reranker-4B"``).
+        api_key: Bearer credential. Pass ``""`` (empty string) for
+            self-hosted endpoints that don't require auth — the
+            ``Authorization`` header is omitted in that case.
+        base_url: API root that *contains* the ``/v1`` prefix
+            (e.g. ``"http://localhost:8000/v1"``). The ``/rerank``
+            suffix is appended at request time.
+        timeout: Per-request timeout, seconds.
+        max_retries: Soft retry count on transport errors / 5xx.
+        batch_size: Cap on documents per request.
+        max_concurrent: Cap on in-flight requests across all batches.
+    """
+
+    def __init__(
+        self,
+        *,
+        model: str,
+        api_key: str,
+        base_url: str,
+        timeout: float = 30.0,
+        max_retries: int = 3,
+        batch_size: int = 10,
+        max_concurrent: int = 5,
+    ) -> None:
+        self._model = model
+        self._api_key = api_key
+        self._url = f"{base_url.rstrip('/')}/rerank"
+        self._timeout = timeout
+        self._max_retries = max_retries
+        self._batch_size = batch_size
+        self._semaphore = asyncio.Semaphore(max_concurrent)
+
+    async def rerank(
+        self,
+        query: str,
+        documents: Sequence[str],
+        *,
+        instruction: str | None = None,
+    ) -> list[RerankResult]:
+        """Score every document against ``query``; return sorted desc.
+
+        ``instruction`` is accepted for protocol parity but not transmitted:
+        the OpenAI-compatible ``/rerank`` endpoint applies the reranker's chat
+        template (including any task instruction) server-side, so unlike the
+        DeepInfra completion-style API there is no client-side template to fill.
+        """
+        if not documents:
+            return []
+
+        chunks: list[tuple[int, list[str]]] = [
+            (offset, list(documents[offset : offset + self._batch_size]))
+            for offset in range(0, len(documents), self._batch_size)
+        ]
+        chunk_results = await asyncio.gather(
+            *(self._score_chunk(query, docs) for _, docs in chunks)
+        )
+        scored: list[RerankResult] = []
+        for (offset, _), partial in zip(chunks, chunk_results, strict=True):
+            scored.extend(
+                RerankResult(index=offset + r.index, score=r.score) for r in partial
+            )
+        scored.sort(key=lambda r: r.score, reverse=True)
+        return scored
+
+    async def _score_chunk(
+        self, query: str, documents: list[str]
+    ) -> list[RerankResult]:
+        payload: dict[str, Any] = {
+            "model": self._model,
+            "query": query,
+            "documents": documents,
+        }
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if self._api_key:
+            headers["Authorization"] = f"Bearer {self._api_key}"
+
+        async with self._semaphore:
+            for attempt in range(self._max_retries + 1):
+                try:
+                    async with httpx.AsyncClient(timeout=self._timeout) as client:
+                        response = await client.post(
+                            self._url, json=payload, headers=headers
+                        )
+                except httpx.HTTPError as exc:
+                    if attempt == self._max_retries:
+                        raise RerankError(
+                            f"vLLM rerank transport failure: {exc}"
+                        ) from exc
+                    continue
+
+                if response.status_code == 200:
+                    return _parse_rerank_results(response.json())
+
+                if response.status_code >= 500 or response.status_code == 429:
+                    if attempt == self._max_retries:
+                        raise RerankError(
+                            f"vLLM rerank HTTP {response.status_code}: "
+                            f"{response.text[:200]}"
+                        )
+                    continue
+                raise RerankError(
+                    f"vLLM rerank HTTP {response.status_code}: {response.text[:200]}"
+                )
+
+            raise RerankError(f"vLLM rerank exhausted retries ({self._max_retries})")
+
+
+def _parse_rerank_results(body: dict[str, Any]) -> list[RerankResult]:
+    items = body.get("results")
+    if not isinstance(items, list):
+        raise RerankError(f"vLLM rerank response missing results: {body!r}")
+    parsed: list[RerankResult] = []
+    for item in items:
+        try:
+            parsed.append(
+                RerankResult(
+                    index=int(item["index"]),
+                    score=float(item["relevance_score"]),
+                )
+            )
+        except (KeyError, TypeError, ValueError) as exc:
+            raise RerankError(f"malformed rerank result entry: {item!r}") from exc
+    return parsed
--- a/src/everos/component/tokenizer/init.py
+++ b/src/everos/component/tokenizer/init.py
@ -0,0 +1,24 @@
+"""Tokenizer provider — sync app-layer tokenisation for BM25 indexing.
+
+Public surface:
+
+- :class:`Tokenizer` — Protocol every provider satisfies.
+- :class:`JiebaTokenizer` — default jieba-backed implementation.
+- :func:`build_tokenizer` — factory returning the configured tokenizer.
+
+External usage::
+
+    from everos.component.tokenizer import build_tokenizer
+    tk = build_tokenizer()
+    tokens = tk.tokenize("hello 世界")  # ['hello', '世界']
+"""
+
+from .factory import build_tokenizer as build_tokenizer
+from .jieba_provider import JiebaTokenizer as JiebaTokenizer
+from .protocol import Tokenizer as Tokenizer
+
+__all__ = [
+    "JiebaTokenizer",
+    "Tokenizer",
+    "build_tokenizer",
+]
--- a/src/everos/component/tokenizer/factory.py
+++ b/src/everos/component/tokenizer/factory.py
@ -0,0 +1,17 @@
+"""Factory for the cascade-time tokenizer.
+
+Single implementation today (``JiebaTokenizer``). Lifting this into a
+factory keeps callers (cascade handler) decoupled from the concrete
+choice, so swapping to char-bigram / hf tokenizer later is a one-file
+change — see ``17_lancedb_tables_design.md`` §2.4.1.
+"""
+
+from __future__ import annotations
+
+from .jieba_provider import JiebaTokenizer
+from .protocol import Tokenizer
+
+
+def build_tokenizer() -> Tokenizer:
+    """Build the default tokenizer (``JiebaTokenizer``)."""
+    return JiebaTokenizer()
--- a/src/everos/component/tokenizer/jieba_provider.py
+++ b/src/everos/component/tokenizer/jieba_provider.py
@ -0,0 +1,141 @@
+"""Jieba-based tokenizer — covers CJK + English mixed content.
+
+Uses ``jieba.cut_for_search`` (search-mode segmentation: yields both the
+greedy max-match segment and its finer sub-segments for compound CJK
+words). Same mode as the legacy enterprise keyword-search path uses on
+the query side — keeping cascade write and search query symmetric is
+the hard contract for BM25 recall to work.
+
+After segmentation we drop:
+
+* whitespace / empty tokens (so the join-on-space output stays clean),
+* tokens shorter than ``min_token_length`` (default 2 — same threshold
+  enterprise's ``filter_stopwords(min_length=2)`` uses; single-char
+  fragments mostly hurt BM25 precision),
+* tokens in a small bilingual stopword set (Chinese function words +
+  English articles / prepositions / aux verbs).
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Final
+
+import jieba
+
+# Small bilingual stopword set. Intentionally tight (not a full
+# Chinese stopword list) so the behaviour is predictable; callers
+# tuning recall can subclass / extend.
+_DEFAULT_STOPWORDS: Final[frozenset[str]] = frozenset(
+    {
+        # English — articles / prepositions / aux verbs that dominate BM25
+        # idf-noise but add no recall value.
+        "the",
+        "a",
+        "an",
+        "and",
+        "or",
+        "but",
+        "if",
+        "of",
+        "to",
+        "in",
+        "on",
+        "at",
+        "by",
+        "for",
+        "with",
+        "as",
+        "is",
+        "are",
+        "was",
+        "were",
+        "be",
+        "been",
+        "being",
+        "do",
+        "does",
+        "did",
+        "has",
+        "have",
+        "had",
+        "this",
+        "that",
+        "these",
+        "those",
+        "it",
+        "its",
+        # Chinese — function words / particles. ``cut_for_search`` emits
+        # these as single-char tokens anyway, and the min_length=2 floor
+        # would drop most; listing them explicitly makes the intent clear
+        # and is a no-op when min_length filtering also kicks in.
+        "的",
+        "了",
+        "和",
+        "是",
+        "在",
+        "我",
+        "你",
+        "他",
+        "她",
+        "它",
+        "也",
+        "都",
+        "就",
+        "还",
+        "或",
+        "及",
+        "与",
+        "对",
+        "把",
+        "被",
+        "有",
+        "没",
+        "不",
+        "啊",
+        "吗",
+        "呢",
+        "吧",
+        "哦",
+    }
+)
+
+_DEFAULT_MIN_TOKEN_LENGTH: Final[int] = 2
+
+
+class JiebaTokenizer:
+    """Tokenizer that calls into ``jieba.cut_for_search`` and filters."""
+
+    def __init__(
+        self,
+        *,
+        min_token_length: int = _DEFAULT_MIN_TOKEN_LENGTH,
+        extra_stopwords: frozenset[str] | None = None,
+    ) -> None:
+        # Touching ``jieba.initialize()`` here would force eager dict load
+        # at import time and balloon test-collection latency. ``jieba.cut*``
+        # lazy-loads on first call instead.
+        self._min_len = min_token_length
+        self._stopwords = (
+            _DEFAULT_STOPWORDS | extra_stopwords
+            if extra_stopwords
+            else _DEFAULT_STOPWORDS
+        )
+
+    def tokenize(self, text: str) -> list[str]:
+        if not text:
+            return []
+        out: list[str] = []
+        for raw in jieba.cut_for_search(text):
+            tok = raw.strip().lower()
+            if not tok or tok.isspace():
+                continue
+            if len(tok) < self._min_len:
+                continue
+            if tok in self._stopwords:
+                continue
+            out.append(tok)
+        return out
+
+    def tokenize_batch(self, texts: Sequence[str]) -> list[list[str]]:
+        return [self.tokenize(t) for t in texts]
--- a/src/everos/component/tokenizer/protocol.py
+++ b/src/everos/component/tokenizer/protocol.py
@ -0,0 +1,37 @@
+"""Tokenizer protocol.
+
+App-layer tokenisation gates every BM25-indexed field in LanceDB
+(``17_lancedb_tables_design.md`` §2.4.1): the source surface form lives
+in ``<field>`` while the space-joined token stream lives in
+``<field>_tokens``, and the FTS index reads only the latter using a
+whitespace tokenizer. Keeping the tokenizer decision in the app layer
+means it can swap (jieba → unigram → hf) without re-indexing or
+touching LanceDB schemas.
+
+The protocol is sync — every concrete tokenizer in scope today (jieba,
+char-bigram, regex word-split) is CPU-bound with no IO, so an async
+wrapper would just shuffle work onto the event loop. If a future GPU
+or remote tokenizer needs IO it should add an async method explicitly.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Protocol, runtime_checkable
+
+
+@runtime_checkable
+class Tokenizer(Protocol):
+    """Sync tokeniser contract used by the cascade handler."""
+
+    def tokenize(self, text: str) -> list[str]:
+        """Return the ordered token list for ``text``.
+
+        Implementations must drop empty / whitespace-only tokens so the
+        resulting space-joined string never carries adjacent spaces.
+        """
+        ...
+
+    def tokenize_batch(self, texts: Sequence[str]) -> list[list[str]]:
+        """Tokenise many strings, preserving input order."""
+        ...
--- a/src/everos/component/utils/init.py
+++ b/src/everos/component/utils/init.py
@ -0,0 +1,22 @@
+"""Common utilities (datetime, tokenization, etc.).
+
+Public API:
+    from everos.component.utils.datetime import (
+        UtcDatetime,
+        ensure_utc,
+        from_iso_format,
+        from_timestamp,
+        get_now_with_timezone,
+        get_utc_now,
+        to_date_str,
+        to_display_tz,
+        to_iso_format,
+        to_timestamp_ms,
+        today_with_timezone,
+    )
+    from everos.component.utils.tokenize import (
+        tokens_for_index,
+        tokens_for_query,
+        join_tokens,
+    )
+"""
--- a/src/everos/component/utils/datetime.py
+++ b/src/everos/component/utils/datetime.py
@ -0,0 +1,263 @@
+"""Timezone-aware datetime helpers.
+
+EverOS follows a **two-zone discipline**:
+
+* **Storage** (SQLite + LanceDB) is always UTC. Use :func:`get_utc_now`
+  for any ``default_factory`` / write-path timestamp; if you accept a
+  ``datetime`` from a caller, normalise with :func:`ensure_utc` before
+  it crosses the persistence boundary.
+* **Display** (markdown frontmatter, HTTP API response, date buckets for
+  daily-log filenames) uses the configured "display timezone" from
+  :attr:`everos.config.MemorySettings.timezone` (``EVEROS_MEMORY__TIMEZONE``).
+  Use :func:`get_now_with_timezone` / :func:`today_with_timezone` /
+  :func:`to_display_tz` here.
+
+The display timezone also serves as the **fallback timezone for naive
+input**: if a caller hands us a string / datetime without offset (e.g.
+a hand-written ISO timestamp), :func:`from_iso_format` attaches the
+display timezone before further processing — that matches a human's
+intuition ("if I didn't say a zone, you should assume my zone").
+
+Never call :func:`datetime.datetime.now` /
+:func:`datetime.datetime.utcnow` directly — see
+:doc:`.claude/rules/datetime-handling`.
+
+Cache invalidation in tests::
+
+    load_settings.cache_clear()
+    _display_tz.cache_clear()
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from functools import cache
+from typing import Annotated
+from zoneinfo import ZoneInfo
+
+from pydantic import AfterValidator
+
+_MS_THRESHOLD = 1e12  # ts >= this is treated as milliseconds
+
+
+@cache
+def _display_tz() -> _dt.tzinfo:
+    """Resolve the configured **display timezone** (cached).
+
+    Reads :attr:`everos.config.MemorySettings.timezone`; that field
+    validates the name with :class:`zoneinfo.ZoneInfo` at load time, so
+    by the time we reach here the value is guaranteed valid. This
+    timezone governs:
+
+    1. ISO output rendered in markdown / API responses.
+    2. The fallback zone attached to naive-input datetimes.
+
+    It does **not** govern storage — see :func:`get_utc_now`.
+    """
+    # Lazy import to avoid pulling in pydantic-settings at module load.
+    from everos.config import load_settings
+
+    return ZoneInfo(load_settings().memory.timezone)
+
+
+def get_utc_now() -> _dt.datetime:
+    """Return the current time as a UTC-aware datetime.
+
+    Use for any **storage** write-path (SQLite ``default_factory``,
+    LanceDB row construction, OME event ``ts``, any internal "when
+    did this happen" record). Independent of the display timezone — a
+    new deployment that switches ``EVEROS_MEMORY__TIMEZONE`` will not
+    misalign existing rows.
+
+    Display-side code should use :func:`get_now_with_timezone` instead,
+    or render via :func:`to_display_tz`.
+    """
+    return _dt.datetime.now(tz=_dt.UTC)
+
+
+def get_now_with_timezone() -> _dt.datetime:
+    """Return the current time in the **display timezone** (configured).
+
+    Use for **display** write-paths only — markdown frontmatter values,
+    daily-log date buckets, places where a human will see the literal
+    string. The returned datetime carries the display timezone offset
+    so ``.isoformat()`` produces something like
+    ``2026-05-29T14:00:00+08:00``.
+
+    For storage / internal "when did this happen" timestamps use
+    :func:`get_utc_now` instead — display timezone must not bleed into
+    persisted rows.
+    """
+    return _dt.datetime.now(tz=_display_tz())
+
+
+def today_with_timezone() -> _dt.date:
+    """Return today's date in the **display timezone**.
+
+    Use this anywhere a *date bucket* is needed (e.g. daily-log file
+    boundaries) — it normalises ``get_now_with_timezone().date()`` so
+    the timezone fallback rules are applied consistently.
+    """
+    return get_now_with_timezone().date()
+
+
+def ensure_utc(d: _dt.datetime | None) -> _dt.datetime | None:
+    """Normalise any datetime to UTC at the **storage boundary**.
+
+    Semantics:
+
+    * ``None`` → ``None`` (nullable-column convenience: lets callers
+      pipe ``ensure_utc(row.last_attempt_at)`` without an outer guard).
+    * Aware input → ``astimezone(UTC)``.
+    * **Naive input → assume UTC** (attach ``tzinfo=UTC``); no
+      display-tz fallback.
+
+    Why naive→UTC rather than naive→display→UTC? Every caller of this
+    function sits at the storage boundary, and the dominant naive
+    source is SQLite reads: SQLAlchemy strips tz on write so what
+    comes back is a naive value whose bytes are UTC. Treating those
+    naive reads as display-tz would drift by the configured offset on
+    every round trip — exactly the bug Q2 prevents.
+
+    Caller-supplied datetimes that may genuinely be naive in display
+    tz (e.g. ISO strings from HTTP request bodies that omitted the
+    offset) should be funnelled through :func:`from_iso_format` first,
+    which encodes the "if you didn't say a zone, assume your zone"
+    rule. The aware result then passes through ``ensure_utc`` as a
+    pure ``astimezone(UTC)``.
+
+    Use the :data:`UtcDatetime` ``Annotated`` type to apply this
+    automatically on Pydantic model fields.
+    """
+    if d is None:
+        return None
+    if d.tzinfo is None:
+        return d.replace(tzinfo=_dt.UTC)
+    return d.astimezone(_dt.UTC)
+
+
+def to_display_tz(d: _dt.datetime | None) -> _dt.datetime | None:
+    """Convert a datetime to the **display timezone** (configured).
+
+    Used at the **response render boundary**: any datetime leaving the
+    system through an API response or markdown body passes through
+    here so the user sees their wall-clock time with the matching
+    ``+HH:MM`` offset.
+
+    * ``None`` → ``None`` (nullable-column convenience).
+    * Naive input is treated as already display-tz local (the fallback
+      rule) — attach the zone and return as-is.
+    * Aware input is ``astimezone(...)``-d to the display tz.
+    """
+    if d is None:
+        return None
+    if d.tzinfo is None:
+        return d.replace(tzinfo=_display_tz())
+    return d.astimezone(_display_tz())
+
+
+UtcDatetime = Annotated[_dt.datetime, AfterValidator(ensure_utc)]
+"""Pydantic-friendly ``datetime`` type that normalises to UTC.
+
+Apply to any SQLModel / Pydantic ``datetime`` field that maps to a
+storage column. Both INSERT default values and post-read values pass
+through :func:`ensure_utc`, so SQLite's tz-stripping behaviour is
+neutralised: rows go in as UTC and come out as UTC-aware.
+
+Usage::
+
+    from everos.component.utils.datetime import UtcDatetime, get_utc_now
+
+    class MyRow(BaseTable, table=True):
+        happened_at: UtcDatetime = Field(default_factory=get_utc_now)
+"""
+
+
+def from_timestamp(ts: int | float) -> _dt.datetime:
+    """Parse a Unix timestamp into a timezone-aware datetime.
+
+    Auto-detects seconds vs milliseconds: values ``>= 1e12`` are treated as
+    milliseconds. Returned datetime is in the default timezone.
+    """
+    seconds = ts / 1000.0 if ts >= _MS_THRESHOLD else float(ts)
+    return _dt.datetime.fromtimestamp(seconds, tz=_display_tz())
+
+
+def from_iso_format(value: _dt.datetime | int | float | str) -> _dt.datetime:
+    """Parse a value into a timezone-aware datetime (strict).
+
+    Accepted inputs:
+        * ``datetime`` — naive values get the default timezone attached.
+        * ``int`` / ``float`` — Unix timestamp (auto-detect seconds vs ms).
+        * ``str`` — ISO-8601, including ``"Z"`` suffix for UTC.
+
+    Raises:
+        TypeError: On unsupported input type.
+        ValueError: On malformed string / negative timestamp.
+    """
+    if isinstance(value, _dt.datetime):
+        if value.tzinfo is None:
+            return value.replace(tzinfo=_display_tz())
+        return value
+    if isinstance(value, bool):  # bool is an int subclass — reject explicitly
+        raise TypeError("from_iso_format does not accept bool")
+    if isinstance(value, int | float):
+        return from_timestamp(value)
+    if isinstance(value, str):
+        s = value.strip()
+        # Python's fromisoformat accepts "+HH:MM" but not the "Z" suffix; map it.
+        if s.endswith("Z"):
+            s = s[:-1] + "+00:00"
+        parsed = _dt.datetime.fromisoformat(s)
+        if parsed.tzinfo is None:
+            parsed = parsed.replace(tzinfo=_display_tz())
+        return parsed
+    raise TypeError(
+        f"from_iso_format: unsupported type {type(value).__name__}; "
+        "expected datetime / int / float / str"
+    )
+
+
+def to_iso_format(
+    value: _dt.datetime | int | float | str | None,
+) -> str | None:
+    """Render a value as an ISO-8601 string (timezone-aware).
+
+    Accepted inputs:
+        * ``None`` — returns ``None`` (nullable column convenience).
+        * ``datetime`` — rendered as-is (must already be tz-aware).
+        * ``int`` / ``float`` — interpreted via :func:`from_timestamp`.
+        * ``str`` — re-validated through :func:`from_iso_format`.
+    """
+    if value is None:
+        return None
+    if isinstance(value, _dt.datetime):
+        return value.isoformat()
+    if isinstance(value, bool):  # bool is an int subclass
+        raise TypeError("to_iso_format does not accept bool")
+    if isinstance(value, int | float):
+        return from_timestamp(value).isoformat()
+    if isinstance(value, str):
+        if not value:
+            return None
+        return from_iso_format(value).isoformat()
+    raise TypeError(
+        f"to_iso_format: unsupported type {type(value).__name__}; "
+        "expected datetime / int / float / str / None"
+    )
+
+
+def to_date_str(d: _dt.datetime | None) -> str | None:
+    """Render the date portion of a datetime as ``YYYY-MM-DD``.
+
+    Accepts ``None`` for nullable database columns. When the input is
+    already a :class:`datetime.date`, call ``d.isoformat()`` directly.
+    """
+    if d is None:
+        return None
+    return d.date().isoformat()
+
+
+def to_timestamp_ms(d: _dt.datetime) -> int:
+    """Convert a datetime to a Unix timestamp in milliseconds."""
+    return int(d.timestamp() * 1000)
--- a/src/everos/config/init.py
+++ b/src/everos/config/init.py
@ -0,0 +1,37 @@
+"""Configuration data and Settings schema.
+
+Public API:
+    from everos.config import (
+        Settings, MemorySettings, SqliteSettings, LanceDBSettings,
+        LLMSettings, EmbeddingSettings, RerankSettings,
+        BoundaryDetectionSettings,
+        load_settings,
+    )
+
+Distinct from ``everos.component.config`` (which is a *capability* —
+loader / merger / env reader).
+"""
+
+from .settings import BoundaryDetectionSettings as BoundaryDetectionSettings
+from .settings import EmbeddingSettings as EmbeddingSettings
+from .settings import LanceDBSettings as LanceDBSettings
+from .settings import LLMSettings as LLMSettings
+from .settings import MemorySettings as MemorySettings
+from .settings import MultimodalSettings as MultimodalSettings
+from .settings import RerankSettings as RerankSettings
+from .settings import Settings as Settings
+from .settings import SqliteSettings as SqliteSettings
+from .settings import load_settings as load_settings
+
+__all__ = [
+    "BoundaryDetectionSettings",
+    "EmbeddingSettings",
+    "LLMSettings",
+    "LanceDBSettings",
+    "MemorySettings",
+    "MultimodalSettings",
+    "RerankSettings",
+    "Settings",
+    "SqliteSettings",
+    "load_settings",
+]
--- a/src/everos/config/default.toml
+++ b/src/everos/config/default.toml
@ -0,0 +1,137 @@
+# everos default configuration.
+#
+# Lookup order (later overrides earlier):
+#   1. This file (shipped defaults; lowest priority)
+#   2. ~/.everos/config.toml — user-level overrides (optional;
+#      path is overridable via EVEROS_CONFIG_FILE)
+#   3. .env file in the working directory
+#   4. Environment variables — EVEROS_<SECTION>__<KEY>
+#         e.g. EVEROS_SQLITE__BUSY_TIMEOUT_MS=10000
+#   5. Programmatic init args (highest priority)
+#
+# `null` (omitted in TOML) means "use the Pydantic default declared in code".
+
+[memory]
+# memory-root is the single directory holding all persisted memory.
+# `~` is expanded; the path is resolved when MemoryRoot is constructed.
+root = "~/.everos"
+# Effective timezone for date buckets and timestamps. Drives
+# component.utils.datetime; this is the SOLE source — OS `TZ` is not
+# read. Override via `EVEROS_MEMORY__TIMEZONE` env var if needed.
+timezone = "UTC"
+
+[api]
+# HTTP server bind for ``everos server start``. Default ``127.0.0.1``
+# keeps the API on loopback only — EverOS ships no built-in auth (see
+# SECURITY.md threat model). Only set ``host = "0.0.0.0"`` after you
+# have placed your own gateway / auth layer in front of the server.
+# Override via EVEROS_API__HOST and EVEROS_API__PORT.
+host = "127.0.0.1"
+port = 8000
+
+[sqlite]
+# PRAGMA journal_mode  — WAL is the recommended high-concurrency mode.
+journal_mode = "WAL"
+# PRAGMA synchronous  — NORMAL is safe under WAL and ~2x faster than FULL.
+synchronous = "NORMAL"
+# PRAGMA foreign_keys — must be explicitly enabled per connection.
+foreign_keys = true
+# PRAGMA temp_store   — MEMORY keeps query intermediates in RAM (no IO impact
+# on durability — only affects sort/group/temp-table calculation buffers).
+temp_store = "MEMORY"
+# PRAGMA busy_timeout — milliseconds to wait on a locked DB before erroring.
+busy_timeout_ms = 5000
+# PRAGMA journal_size_limit — cap WAL/journal at ~64 MB.
+journal_size_limit_bytes = 67108864
+# PRAGMA cache_size  — KB of page cache (per connection).
+cache_size_kb = 2048
+
+[lancedb]
+# Read consistency interval in seconds.
+#   omitted / null -> no consistency check (fastest reads)
+#   0              -> strict (every read checks updates)
+#   >0             -> eventual (interval seconds between checks)
+# Uncomment to override:
+# read_consistency_seconds = 5.0
+
+[llm]
+# Provider-agnostic OpenAI-protocol client config. Override via env:
+#   EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL
+# Or via a ``.env`` file next to the project root (auto-loaded).
+model = "gpt-4o-mini"
+# api_key = ""
+# base_url = ""
+
+[multimodal]
+# Independent LLM for multimodal parsing (everalgo-parser); must accept
+# image / pdf / audio image_url parts. Override via env:
+#   EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL
+model = "google/gemini-3-flash-preview"
+max_concurrency = 4
+# api_key = ""
+# base_url = ""
+# file:// content-item support (read locally by EverOS, not everalgo).
+# file_uri_allow_dirs: empty = allow any readable file (local-first default);
+# list base dirs to confine reads when the API is exposed.
+# file_uri_allow_dirs = ["/srv/uploads"]
+# file_uri_max_bytes = 52428800   # 50 MiB cap per file:// asset
+
+[embedding]
+# OpenAI-compatible embedding endpoint. Override via env:
+#   EVEROS_EMBEDDING__MODEL, EVEROS_EMBEDDING__API_KEY, EVEROS_EMBEDDING__BASE_URL
+# model / api_key / base_url have no shipped defaults — must be set
+# (env or user toml) before the embedding capability is used.
+# model     = "Qwen/Qwen3-Embedding-4B"
+# api_key   = ""
+# base_url  = "https://api.example.com/v1"
+timeout_seconds = 30.0
+max_retries     = 3
+batch_size      = 10
+max_concurrent  = 5
+
+[rerank]
+# Rerank provider. Override via env:
+#   EVEROS_RERANK__PROVIDER, EVEROS_RERANK__MODEL, EVEROS_RERANK__API_KEY,
+#   EVEROS_RERANK__BASE_URL
+# `provider` picks the request-shape:
+#   - "deepinfra" -> POST {base_url}/{model} (DeepInfra inference API)
+#   - "vllm"      -> POST {base_url}/rerank (OpenAI-compat rerank endpoint)
+provider = "deepinfra"
+# model     = "Qwen/Qwen3-Reranker-4B"
+# api_key   = ""
+# base_url  = "https://api.deepinfra.com/v1/inference"
+timeout_seconds = 30.0
+max_retries     = 3
+batch_size      = 10
+max_concurrent  = 5
+
+[boundary_detection]
+# Passed through to ``everalgo.BoundaryDetector.adetect``.
+hard_token_limit = 65536
+hard_msg_limit = 500
+
+[search]
+# Vector retrieval strategy when SearchMethod.VECTOR is selected.
+#   "maxsim_atomic" (default): ANN over atomic_fact.vector (pool=top_k*20),
+#       max-pool the per-fact cosine by parent memcell, then reverse-resolve
+#       to episode rows. MaxSim over atomic facts; +0.6pp over the legacy
+#       episode-vector path on LoCoMo, at the cost of one extra LanceDB scan.
+#   "episode": single-vector ANN over episode.vector (legacy path).
+# Override via EVEROS_SEARCH__VECTOR_STRATEGY.
+vector_strategy = "maxsim_atomic"
+
+[memorize]
+# Conversation mode. Selects the boundary detector and which pipelines run:
+#   "chat"  -> BoundaryDetector       + user_memory only
+#   "agent" -> AgentBoundaryDetector  + user_memory + agent_memory
+# A single service process serves one mode at a time; switching mode
+# requires a restart. Override via EVEROS_MEMORIZE__MODE.
+mode = "agent"
+
+# Maximum wall-clock for one memorize() invocation while holding the
+# per-session lock. On timeout the outer asyncio.timeout cancels the call
+# and the lock auto-releases so subsequent concurrent /add on the same
+# session aren't deadlocked. Covers boundary LLM + memcell writes +
+# synchronous portion of pipeline dispatch.
+# Override via EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS.
+session_lock_timeout_seconds = 360.0
--- a/src/everos/config/default_ome.toml
+++ b/src/everos/config/default_ome.toml
@ -0,0 +1,64 @@
+# everos OME (Offline Memory Engine) — per-strategy overrides.
+#
+# This file is materialised at ``<memory-root>/ome.toml`` by
+# ``MemoryRoot.ensure()`` on first server start. Edit it to toggle
+# individual strategies or tweak their gate / retry / cron without
+# restarting the server; the engine watches this file and hot-reloads
+# changes within ~2 seconds. Re-running ``ensure()`` will NOT overwrite
+# your edits — the file is only materialised when absent.
+#
+# Overrides are partial: only the keys you set replace the in-code
+# defaults; omitted keys keep each strategy's coded value. Unknown
+# keys (typos) raise StartupValidationError, so you cannot silently
+# misconfigure a strategy.
+#
+# Per-strategy schema (StrategyOverride):
+#   enabled                = bool           # disable a strategy entirely
+#   max_retries            = int >= 0       # re-fire on failure
+#   cron                   = str            # replace the @cron(...) trigger
+#   idle_seconds           = int > 0        # replace @idle(...) idle window
+#   scan_interval_seconds  = int > 0        # paired with idle_seconds; must be <= idle/2
+#   [strategies.<name>.gate]                # only valid on @counter-gated strategies
+#   threshold              = int > 0        # counter trigger threshold
+#   cooldown_seconds       = int >= 0       # min seconds between fires
+#   event_field            = str            # dispatch field for counter increment
+
+# ── User-memory pipeline ────────────────────────────────────────────────
+
+# Atomic fact extraction (runs per memcell). Disable to skip fact mining.
+# [strategies.extract_atomic_facts]
+# enabled = true
+
+# Foresight extraction (runs per memcell). Heavy LLM call — common to
+# disable in evaluation / benchmark runs.
+# [strategies.extract_foresight]
+# enabled = false
+
+# Profile clustering trigger (counter-gated; fires once N user memcells
+# accumulate). Lower the threshold to cluster more aggressively.
+# [strategies.trigger_profile_clustering]
+# enabled = true
+# [strategies.trigger_profile_clustering.gate]
+# threshold = 5
+
+# User-profile extraction (runs after clustering trigger fires). Common
+# to disable in evaluation runs where ground-truth profiles aren't measured.
+# [strategies.extract_user_profile]
+# enabled = false
+
+# ── Agent-memory pipeline ───────────────────────────────────────────────
+
+# Agent case extraction (runs per agent memcell). One per tool call cycle.
+# [strategies.extract_agent_case]
+# enabled = true
+
+# Skill clustering trigger (counter-gated; fires once N agent cases
+# accumulate per agent).
+# [strategies.trigger_skill_clustering]
+# enabled = true
+# [strategies.trigger_skill_clustering.gate]
+# threshold = 5
+
+# Agent skill extraction (runs after skill clustering trigger fires).
+# [strategies.extract_agent_skill]
+# enabled = true
--- a/src/everos/config/prompt_slots/.gitkeep
+++ b/src/everos/config/prompt_slots/.gitkeep
--- a/src/everos/config/prompt_slots/init.py
+++ b/src/everos/config/prompt_slots/init.py
--- a/src/everos/config/prompt_slots/boundary_detection.yaml
+++ b/src/everos/config/prompt_slots/boundary_detection.yaml
@ -0,0 +1,20 @@
+# Custom prompt slot for BoundaryDetector.adetect.
+#
+# Default behaviour
+#   Leave this slot disabled (``enabled: false``). The pipeline will pass
+#   ``prompt=None`` through to algo, which falls back to the everalgo
+#   bundled default prompt — see:
+#     ~/everalgo/packages/everalgo-boundary/src/everalgo/boundary/prompts/en/chat.py
+#     (constant ``CHAT_BOUNDARY_DETECT_PROMPT_EN``)
+#
+# To customise
+#   1. Read the algo default at the path above; note the required
+#      placeholders ``{messages}`` and ``{token_count}``.
+#   2. Replace the ``template`` body below with your prompt.
+#   3. Flip ``enabled`` to ``true``.
+#
+# When ``enabled: false`` or ``template`` is empty, the pipeline sends
+# ``prompt=None`` and the algo default is used (zero override cost).
+
+enabled: false
+template: ""
--- a/src/everos/config/prompt_slots/episode_extract.yaml
+++ b/src/everos/config/prompt_slots/episode_extract.yaml
@ -0,0 +1,23 @@
+# Custom prompt slot for EpisodeExtractor.aextract.
+#
+# Default behaviour
+#   Leave this slot disabled (``enabled: false``). The pipeline will pass
+#   ``prompt=None`` through to algo, which falls back to the everalgo
+#   bundled default prompt — see:
+#     everalgo/user_memory/prompts/en/episode.py
+#     (the pipeline calls ``aextract`` with ``sender_id=None``, so the
+#      whole-memcell ``EPISODE_GENERATION_PROMPT`` is used, not the
+#      per-user ``USER_EPISODE_GENERATION_PROMPT``)
+#
+# To customise
+#   1. Read the algo default at the path above; note the required
+#      placeholders ``{conversation_start_time}``, ``{conversation}`` and
+#      ``{custom_instructions}``.
+#   2. Replace the ``template`` body below with your prompt.
+#   3. Flip ``enabled`` to ``true``.
+#
+# When ``enabled: false`` or ``template`` is empty, the pipeline sends
+# ``prompt=None`` and the algo default is used (zero override cost).
+
+enabled: false
+template: ""
--- a/src/everos/config/settings.py
+++ b/src/everos/config/settings.py
@ -0,0 +1,403 @@
+"""Application settings.
+
+Loaded by :func:`load_settings`. Source priority (later wins):
+
+    1. ``config/default.toml`` (shipped values; lowest priority)
+    2. ``~/.everos/config.toml`` (user-level overrides; optional)
+    3. ``.env`` file in the working directory (secrets / machine-specific)
+    4. ``EVEROS_<SECTION>__<KEY>`` environment variables
+    5. Init args passed programmatically (highest priority)
+
+The user-level toml path defaults to ``~/.everos/config.toml``. Override
+with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
+optional — if it does not exist, the source is silently skipped.
+
+The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
+maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
+
+``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
+:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
+call. Tests that mutate environment variables must call
+``load_settings.cache_clear()`` after the mutation to invalidate.
+"""
+
+from __future__ import annotations
+
+import os
+from functools import cache
+from pathlib import Path
+from typing import Literal
+from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
+
+from pydantic import BaseModel, Field, SecretStr, field_validator
+from pydantic_settings import (
+    BaseSettings,
+    PydanticBaseSettingsSource,
+    SettingsConfigDict,
+    TomlConfigSettingsSource,
+)
+
+_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
+_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
+_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
+
+
+def _resolve_user_toml_path() -> Path:
+    """Resolve the user-level ``config.toml`` path.
+
+    Defaults to ``~/.everos/config.toml``; override with the
+    ``EVEROS_CONFIG_FILE`` environment variable.
+    """
+    override = os.environ.get(_USER_TOML_ENV_VAR)
+    return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
+
+
+class MemorySettings(BaseModel):
+    """memory-root configuration."""
+
+    root: Path = Path("~/.everos")
+    timezone: str = "UTC"
+    """Effective timezone for date buckets and timestamps.
+
+    Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
+    TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
+    :class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
+    fast (no silent fallback). This is the **sole** source of truth for
+    the project's effective timezone — the OS ``TZ`` env var is *not*
+    consulted, keeping the configuration deterministic.
+    """
+
+    @field_validator("timezone")
+    @classmethod
+    def _validate_timezone(cls, v: str) -> str:
+        try:
+            ZoneInfo(v)
+        except (ZoneInfoNotFoundError, ValueError) as exc:
+            raise ValueError(f"invalid timezone: {v!r}") from exc
+        return v
+
+
+class ApiSettings(BaseModel):
+    """HTTP API server bind configuration.
+
+    Default ``host = "127.0.0.1"`` keeps the server on loopback only,
+    matching the threat model in ``SECURITY.md``: EverOS ships **no
+    built-in authentication**, so binding to a routable interface
+    (``0.0.0.0`` etc.) without your own gateway / auth layer in front
+    is unsupported.
+
+    Env binding:
+        EVEROS_API__HOST
+        EVEROS_API__PORT
+    """
+
+    host: str = "127.0.0.1"
+    port: int = Field(default=8000, ge=1, le=65535)
+
+
+class SqliteSettings(BaseModel):
+    """SQLite tunables applied as PRAGMAs on every new connection."""
+
+    journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
+        "WAL"
+    )
+    synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
+    foreign_keys: bool = True
+    temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
+    busy_timeout_ms: int = Field(default=5000, ge=0)
+    journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
+    cache_size_kb: int = Field(default=2048, ge=0)
+
+
+class LLMSettings(BaseModel):
+    """LLM client configuration.
+
+    Read by the service layer when lazily constructing the LLM client
+    handed to algo extractors. Provider-agnostic field names — the
+    project follows the OpenAI API protocol so any OpenAI-compatible
+    endpoint plugs in via ``base_url``.
+
+    Env binding (via parent ``Settings``):
+        EVEROS_LLM__MODEL
+        EVEROS_LLM__API_KEY
+        EVEROS_LLM__BASE_URL
+    """
+
+    model: str = "gpt-4o-mini"
+    api_key: SecretStr | None = None
+    base_url: str | None = None
+
+
+class MultimodalSettings(BaseModel):
+    """Multimodal parsing LLM config (everalgo-parser).
+
+    Flat section mirroring ``[llm]``. The model must accept multimodal
+    ``image_url`` parts (image / pdf / audio); it is kept independent from
+    the main ``[llm]`` so parsing can target a vision/audio-capable
+    endpoint without affecting boundary / extraction.
+
+    Env binding (via parent ``Settings``):
+        EVEROS_MULTIMODAL__MODEL
+        EVEROS_MULTIMODAL__API_KEY
+        EVEROS_MULTIMODAL__BASE_URL
+        EVEROS_MULTIMODAL__MAX_CONCURRENCY
+        EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
+        EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
+    """
+
+    model: str = "google/gemini-3-flash-preview"
+    api_key: SecretStr | None = None
+    base_url: str | None = None
+    max_concurrency: int = 4
+
+    # ``file://`` content-item support (read locally by EverOS, not everalgo).
+    file_uri_allow_dirs: list[str] = []
+    """Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
+    file (local-first default); set to confine reads when the API is exposed."""
+    file_uri_max_bytes: int = 50 * 1024 * 1024
+    """Max size (bytes) of a ``file://`` asset; larger files are rejected."""
+
+
+class EmbeddingSettings(BaseModel):
+    """Embedding client configuration.
+
+    OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
+    ``base_url`` are required at runtime when the embedding capability
+    is enabled; the runtime knobs (``timeout`` etc.) have sensible
+    defaults.
+
+    Env binding:
+        EVEROS_EMBEDDING__MODEL
+        EVEROS_EMBEDDING__API_KEY
+        EVEROS_EMBEDDING__BASE_URL
+        EVEROS_EMBEDDING__TIMEOUT_SECONDS
+        EVEROS_EMBEDDING__MAX_RETRIES
+        EVEROS_EMBEDDING__BATCH_SIZE
+        EVEROS_EMBEDDING__MAX_CONCURRENT
+    """
+
+    model: str | None = None
+    api_key: SecretStr | None = None
+    base_url: str | None = None
+    timeout_seconds: float = Field(default=30.0, gt=0)
+    max_retries: int = Field(default=3, ge=0)
+    batch_size: int = Field(default=10, ge=1)
+    max_concurrent: int = Field(default=5, ge=1)
+
+
+class RerankSettings(BaseModel):
+    """Rerank client configuration.
+
+    Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
+    schemas differ between providers — DeepInfra uses ``POST {base_url}/
+    {model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
+    with ``{model, query, documents}``. ``provider`` picks which client
+    implementation the factory builds.
+
+    Env binding:
+        EVEROS_RERANK__PROVIDER
+        EVEROS_RERANK__MODEL
+        EVEROS_RERANK__API_KEY
+        EVEROS_RERANK__BASE_URL
+        EVEROS_RERANK__TIMEOUT_SECONDS
+        EVEROS_RERANK__MAX_RETRIES
+        EVEROS_RERANK__BATCH_SIZE
+        EVEROS_RERANK__MAX_CONCURRENT
+    """
+
+    provider: Literal["deepinfra", "vllm"] = "deepinfra"
+    model: str | None = None
+    api_key: SecretStr | None = None
+    base_url: str | None = None
+    timeout_seconds: float = Field(default=30.0, gt=0)
+    max_retries: int = Field(default=3, ge=0)
+    batch_size: int = Field(default=10, ge=1)
+    max_concurrent: int = Field(default=5, ge=1)
+
+
+class BoundaryDetectionSettings(BaseModel):
+    """Hard limits passed through to ``everalgo`` BoundaryDetector."""
+
+    hard_token_limit: int = Field(default=65536, ge=1)
+    hard_msg_limit: int = Field(default=500, ge=1)
+
+
+class MemorizeSettings(BaseModel):
+    """Memorize use-case configuration.
+
+    ``mode`` selects which boundary detector runs and which pipelines are
+    dispatched. A service process serves one mode at a time; toggling
+    requires a restart.
+
+        - ``"chat"``  -> ``everalgo.user_memory.BoundaryDetector`` and only the
+          user-memory pipeline runs.
+        - ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
+          both user-memory + agent-memory pipelines run.
+
+    ``session_lock_timeout_seconds`` caps how long one ``memorize()``
+    invocation can hold the per-session lock. Covers boundary LLM call +
+    memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
+    a stuck LLM from deadlocking subsequent concurrent calls on the same
+    session_id: on timeout the outer ``asyncio.timeout`` cancels the task
+    and the lock auto-releases.
+
+    Env binding:
+        EVEROS_MEMORIZE__MODE
+        EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
+    """
+
+    mode: Literal["chat", "agent"] = "agent"
+    session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
+
+
+class SearchSettings(BaseModel):
+    """Search-pipeline policy knobs.
+
+    ``vector_strategy`` selects the read path taken by
+    ``SearchMethod.VECTOR``:
+
+    - ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
+      (recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
+      cosine by parent memcell, then reverse-resolve the top memcells back
+      to episode rows. MaxSim over atomic facts; trades one extra LanceDB
+      scan for finer-grained semantic match on long episodes.
+    - ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
+      per episode = the embedded Content section). The legacy path; kept
+      so deployments can opt out via env.
+
+    Env binding:
+        EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
+    """
+
+    vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
+
+
+class LanceDBSettings(BaseModel):
+    """LanceDB tunables.
+
+    ``read_consistency_seconds``:
+      ``None`` (omitted) → no consistency check (highest performance).
+      ``0``              → strict consistency (every read).
+      ``>0``             → eventual (interval between checks).
+
+    ``index_cache_size_bytes``:
+      Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
+      in lance crate). Each cached entry is one opened FTS / vector /
+      scalar index reader and **holds the file descriptors of its on-disk
+      ``_indices/<uuid>/...`` files**.
+
+      LanceDB's own default is ``None`` (unbounded), which on a long-
+      running daemon means every new index UUID created by an
+      ``optimize()`` call adds a fresh reader to the cache, and its
+      FDs are never released — they leak monotonically until
+      ``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
+      take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
+      of 256 / Linux's 1024.
+
+      Setting a byte cap turns the cache into a real LRU: when it
+      exceeds the cap, the oldest readers are dropped, Rust ``Drop``
+      runs ``close(fd)``, and the FD pressure resolves itself.
+
+      Cap → steady-state FD upper bound (measured under 30 add+optimize
+      cycles with the real ``Episode`` schema and 100-query stress):
+
+      ===========  =================  ===================
+      cap          FD upper bound     query latency (100q)
+      ===========  =================  ===================
+      ``2 MB``     ~45                ~5 ms
+      ``4 MB``     ~52                ~3 ms
+      ``8 MB``     ~140               ~2.4 ms
+      ``16 MB``    ~290               ~2.3 ms   ← default
+      ``32 MB``    ~630               ~1.4 ms
+      ``unbound``  >960 (leaks)       ~1.3 ms
+      ===========  =================  ===================
+
+      EverOS's measured steady-state working set after a 12 h
+      ``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
+      (5 tables × ~7 BM25 columns × ~10 part_N entries each), so
+      ``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
+      yet-evicted readers, while the FD ceiling (~290) stays well below
+      common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
+      Linux default 1024 is fine out of the box).
+
+      Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
+      working set is much larger (heavier table count or much wider
+      indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
+      boxes).
+
+      Note: the *metadata* cache (``metadata_cache_size_bytes``) is
+      **not** exposed — experiment showed it caches in-memory parsed
+      manifests / fragment stats with zero impact on FD count; leaving
+      it unbounded (lancedb default) is fine.
+    """
+
+    read_consistency_seconds: float | None = None
+    index_cache_size_bytes: int = 16 * 1024 * 1024
+
+
+class Settings(BaseSettings):
+    """Top-level application settings."""
+
+    memory: MemorySettings = MemorySettings()
+    api: ApiSettings = ApiSettings()
+    sqlite: SqliteSettings = SqliteSettings()
+    lancedb: LanceDBSettings = LanceDBSettings()
+    llm: LLMSettings = LLMSettings()
+    embedding: EmbeddingSettings = EmbeddingSettings()
+    rerank: RerankSettings = RerankSettings()
+    boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
+    memorize: MemorizeSettings = MemorizeSettings()
+    search: SearchSettings = SearchSettings()
+    multimodal: MultimodalSettings = MultimodalSettings()
+
+    model_config = SettingsConfigDict(
+        env_prefix="EVEROS_",
+        env_nested_delimiter="__",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        toml_file=_DEFAULT_TOML_PATH,
+        extra="ignore",
+    )
+
+    @classmethod
+    def settings_customise_sources(
+        cls,
+        settings_cls: type[BaseSettings],
+        init_settings: PydanticBaseSettingsSource,
+        env_settings: PydanticBaseSettingsSource,
+        dotenv_settings: PydanticBaseSettingsSource,
+        file_secret_settings: PydanticBaseSettingsSource,
+    ) -> tuple[PydanticBaseSettingsSource, ...]:
+        """Layer TOML sources between env / dotenv and the secret store.
+
+        Order (earlier wins in pydantic-settings):
+            init_args > env > .env > user_toml > default_toml > secrets
+
+        The user-level toml (default ``~/.everos/config.toml``) is only
+        registered when the file exists, so the source list stays tight.
+        """
+        sources: list[PydanticBaseSettingsSource] = [
+            init_settings,
+            env_settings,
+            dotenv_settings,
+        ]
+        user_toml_path = _resolve_user_toml_path()
+        if user_toml_path.is_file():
+            sources.append(
+                TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
+            )
+        sources.append(TomlConfigSettingsSource(settings_cls))
+        sources.append(file_secret_settings)
+        return tuple(sources)
+
+
+@cache
+def load_settings() -> Settings:
+    """Load settings from default.toml + environment variables (cached).
+
+    Cached at the module level — every caller sees the same instance until
+    something explicitly clears the cache (``load_settings.cache_clear()``).
+    Tests that monkeypatch environment variables must call
+    ``cache_clear`` after each mutation to pick the new env up.
+    """
+    return Settings()
--- a/src/everos/core/init.py
+++ b/src/everos/core/init.py
--- a/src/everos/core/context/init.py
+++ b/src/everos/core/context/init.py
--- a/src/everos/core/errors.py
+++ b/src/everos/core/errors.py
@ -0,0 +1,33 @@
+"""Cross-cutting domain errors surfaced to API callers.
+
+These live in ``core`` so the ``memory`` layer can raise them and the
+``entrypoints`` layer can catch them without crossing the layered import
+boundary — ``any -> core`` is the only edge both share (entrypoints must
+not import ``memory`` directly).
+"""
+
+from __future__ import annotations
+
+
+class MultimodalError(Exception):
+    """Base for multimodal-parsing errors meant to reach the caller.
+
+    The API layer maps any ``MultimodalError`` to an aligned
+    ``{error: {code, message}}`` envelope (HTTP 415).
+    """
+
+
+class UnsupportedModalityError(MultimodalError):
+    """everalgo cannot handle this modality (e.g. video stub, unknown type).
+
+    Wraps everalgo's ``NotImplementedError`` / dispatch ``ValueError`` so the
+    caller gets a stable, aligned error instead of a raw 500.
+    """
+
+
+class MultimodalNotEnabledError(MultimodalError):
+    """Multimodal capability is not ready.
+
+    Raised when the ``everos[multimodal]`` extra is not installed, or when a
+    required system dependency (LibreOffice for Office documents) is absent.
+    """
--- a/src/everos/core/lifespan/init.py
+++ b/src/everos/core/lifespan/init.py
@ -0,0 +1,27 @@
+"""Application lifespan composition (chassis only).
+
+This subpackage holds the *generic* lifespan machinery — the
+:class:`LifespanProvider` ABC, :func:`build_lifespan` factory, and
+chassis-level providers that are independent of any storage backend
+(observability metrics, etc.). Concrete storage-backend providers
+(SQLite / LanceDB) live next to the entrypoint that composes them
+(see :mod:`everos.entrypoints.api.lifespans`) so ``core`` stays free
+of concrete-backend imports.
+
+External usage:
+    from everos.core.lifespan import (
+        LifespanProvider,
+        MetricsLifespanProvider,
+        build_lifespan,
+    )
+"""
+
+from .base import LifespanProvider as LifespanProvider
+from .factory import build_lifespan as build_lifespan
+from .metrics_lifespan import MetricsLifespanProvider as MetricsLifespanProvider
+
+__all__ = [
+    "LifespanProvider",
+    "MetricsLifespanProvider",
+    "build_lifespan",
+]
--- a/src/everos/core/lifespan/base.py
+++ b/src/everos/core/lifespan/base.py
@ -0,0 +1,30 @@
+"""Lifespan provider abstract base.
+
+A LifespanProvider is one unit of startup / shutdown work invoked by the
+FastAPI lifespan factory. Providers are registered explicitly (no DI
+auto-discovery) and executed in ``order`` ascending on startup, reverse
+on shutdown.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from fastapi import FastAPI
+
+
+class LifespanProvider(ABC):
+    """One unit of startup / shutdown work."""
+
+    def __init__(self, name: str, order: int = 0) -> None:
+        self.name = name
+        self.order = order
+
+    @abstractmethod
+    async def startup(self, app: FastAPI) -> Any:
+        """Startup hook; return value is stored on ``app.state.lifespan_data[name]``."""
+
+    @abstractmethod
+    async def shutdown(self, app: FastAPI) -> None:
+        """Shutdown hook; called in reverse order during application teardown."""
--- a/src/everos/core/lifespan/factory.py
+++ b/src/everos/core/lifespan/factory.py
@ -0,0 +1,57 @@
+"""Lifespan composition factory.
+
+Builds a FastAPI lifespan context manager from an explicit list of
+LifespanProvider instances.
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator, Callable, Sequence
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+
+from everos.core.observability.logging import get_logger
+
+from .base import LifespanProvider
+
+logger = get_logger(__name__)
+
+
+def build_lifespan(
+    providers: Sequence[LifespanProvider],
+) -> Callable[[FastAPI], AsyncIterator[None]]:
+    """Compose providers into a FastAPI lifespan context manager.
+
+    Providers are run in ``order`` ascending on startup and reverse on
+    shutdown. A non-None return value from ``startup`` is stored under
+    ``app.state.lifespan_data[provider.name]``.
+    """
+    sorted_providers = sorted(providers, key=lambda p: p.order)
+
+    @asynccontextmanager
+    async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
+        lifespan_data: dict[str, object] = {}
+        try:
+            for provider in sorted_providers:
+                logger.info(
+                    "lifespan_provider_startup",
+                    name=provider.name,
+                    order=provider.order,
+                )
+                result = await provider.startup(app)
+                if result is not None:
+                    lifespan_data[provider.name] = result
+            app.state.lifespan_data = lifespan_data
+            yield
+        finally:
+            for provider in reversed(sorted_providers):
+                try:
+                    logger.info("lifespan_provider_shutdown", name=provider.name)
+                    await provider.shutdown(app)
+                except Exception:
+                    logger.exception(
+                        "lifespan_provider_shutdown_failed", name=provider.name
+                    )
+
+    return _lifespan
--- a/src/everos/core/lifespan/metrics_lifespan.py
+++ b/src/everos/core/lifespan/metrics_lifespan.py
@ -0,0 +1,36 @@
+"""Metrics lifespan provider.
+
+Confirms the metrics registry is ready and logs that the ``/metrics`` HTTP
+endpoint is mounted on the main API. Kept as a placeholder to demonstrate
+the lifespan pattern; replace or extend with a standalone metrics server
+(e.g. ``prometheus_client.start_http_server`` on a separate port) if you
+need to expose metrics on a dedicated socket.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from everos.core.observability.logging import get_logger
+from everos.core.observability.metrics import get_metrics_registry
+
+from .base import LifespanProvider
+
+logger = get_logger(__name__)
+
+
+class MetricsLifespanProvider(LifespanProvider):
+    """No-op startup that warms the metrics registry and logs readiness."""
+
+    def __init__(self, order: int = 5) -> None:
+        super().__init__(name="metrics", order=order)
+
+    async def startup(self, app: FastAPI) -> Any:
+        registry = get_metrics_registry()
+        logger.info("metrics_registry_ready", endpoint="/metrics")
+        return registry
+
+    async def shutdown(self, app: FastAPI) -> None:
+        logger.info("metrics_lifespan_shutdown")
--- a/src/everos/core/middleware/init.py
+++ b/src/everos/core/middleware/init.py
@ -0,0 +1,31 @@
+"""Cross-cutting HTTP middleware components.
+
+External usage:
+    from everos.core.middleware import (
+        DEFAULT_CORS_ALLOW_CREDENTIALS,
+        DEFAULT_CORS_ALLOW_HEADERS,
+        DEFAULT_CORS_ALLOW_METHODS,
+        DEFAULT_CORS_ORIGINS,
+        ProfileMiddleware,
+        PrometheusMiddleware,
+        global_exception_handler,
+    )
+"""
+
+from .cors import DEFAULT_CORS_ALLOW_CREDENTIALS as DEFAULT_CORS_ALLOW_CREDENTIALS
+from .cors import DEFAULT_CORS_ALLOW_HEADERS as DEFAULT_CORS_ALLOW_HEADERS
+from .cors import DEFAULT_CORS_ALLOW_METHODS as DEFAULT_CORS_ALLOW_METHODS
+from .cors import DEFAULT_CORS_ORIGINS as DEFAULT_CORS_ORIGINS
+from .global_exception import global_exception_handler as global_exception_handler
+from .profile import ProfileMiddleware as ProfileMiddleware
+from .prometheus import PrometheusMiddleware as PrometheusMiddleware
+
+__all__ = [
+    "DEFAULT_CORS_ALLOW_CREDENTIALS",
+    "DEFAULT_CORS_ALLOW_HEADERS",
+    "DEFAULT_CORS_ALLOW_METHODS",
+    "DEFAULT_CORS_ORIGINS",
+    "ProfileMiddleware",
+    "PrometheusMiddleware",
+    "global_exception_handler",
+]
--- a/src/everos/core/middleware/cors.py
+++ b/src/everos/core/middleware/cors.py
@ -0,0 +1,12 @@
+"""CORS configuration defaults.
+
+The CORS middleware itself is FastAPI's stock ``CORSMiddleware``; this module
+centralises the default policy values used by the application factory.
+"""
+
+from __future__ import annotations
+
+DEFAULT_CORS_ALLOW_CREDENTIALS: bool = True
+DEFAULT_CORS_ALLOW_HEADERS: list[str] = ["*"]
+DEFAULT_CORS_ALLOW_METHODS: list[str] = ["*"]
+DEFAULT_CORS_ORIGINS: list[str] = ["*"]
--- a/src/everos/core/middleware/global_exception.py
+++ b/src/everos/core/middleware/global_exception.py
@ -0,0 +1,143 @@
+"""Global exception handler — uniform error envelope per v1 API brief §1.
+
+Envelope shape (matches the v1 API brief §1 — ``request_id`` at the top
+level alongside ``error``; the ``error`` object carries ``code`` /
+``message`` plus ops-friendly ``timestamp`` / ``path`` for debugging)::
+
+    {
+      "request_id": "<32 lowercase hex chars — W3C trace_id format>",
+      "error": {
+        "code": "HTTP_ERROR" | "SYSTEM_ERROR",
+        "message": "<reason>",
+        "timestamp": "<ISO 8601 with tz>",
+        "path": "<request path>"
+      }
+    }
+
+Rules:
+- 4xx (DTO / business validation / HTTPException) → ``code="HTTP_ERROR"``
+  with the human-readable reason in ``message``.
+- 5xx (unhandled exception) → ``code="SYSTEM_ERROR"`` with a fixed
+  ``message="Internal server error"`` — internal exception details are
+  logged but never leak to the client.
+- ``request_id`` is sourced from ``request.state.request_id`` (set by
+  upstream middleware); falls back to a freshly minted id when absent.
+"""
+
+from __future__ import annotations
+
+from fastapi import HTTPException, Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from starlette.status import (
+    HTTP_422_UNPROCESSABLE_ENTITY,
+    HTTP_500_INTERNAL_SERVER_ERROR,
+)
+
+from everos.component.utils.datetime import (
+    get_now_with_timezone,
+    to_iso_format,
+)
+from everos.core.observability.logging import get_logger
+from everos.core.observability.tracing import gen_request_id
+
+logger = get_logger(__name__)
+
+_INTERNAL_ERROR_MESSAGE = "Internal server error"
+
+
+def _request_id(request: Request) -> str:
+    """Return the request_id set by middleware, or mint a fresh fallback."""
+    rid = getattr(request.state, "request_id", None)
+    if rid:
+        return str(rid)
+    return gen_request_id()
+
+
+def _envelope(
+    *,
+    code: str,
+    message: str,
+    request: Request,
+) -> dict[str, object]:
+    """Build the canonical error envelope (wiki §1 shape — nested ``error``).
+
+    ``request_id`` at the top level, ``error`` object carries the
+    contract fields (``code`` / ``message``) plus ops-friendly
+    ``timestamp`` / ``path``.
+    """
+    return {
+        "request_id": _request_id(request),
+        "error": {
+            "code": code,
+            "message": message,
+            "timestamp": to_iso_format(get_now_with_timezone()),
+            "path": str(request.url.path),
+        },
+    }
+
+
+async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse:
+    """Convert any exception into a uniform JSON error response."""
+    path = str(request.url.path)
+    method = request.method
+
+    if isinstance(exc, RequestValidationError):
+        errors = exc.errors()
+        if errors:
+            first = errors[0]
+            loc = ".".join(str(p) for p in first.get("loc", []) if p != "body")
+            msg = first.get("msg", "Validation error")
+            message = f"{msg}: {loc}" if loc else msg
+        else:
+            message = "Request validation error"
+
+        logger.warning("validation_error", method=method, path=path, message=message)
+        return JSONResponse(
+            status_code=HTTP_422_UNPROCESSABLE_ENTITY,
+            content=_envelope(code="HTTP_ERROR", message=message, request=request),
+        )
+
+    if isinstance(exc, HTTPException):
+        logger.warning(
+            "http_exception",
+            method=method,
+            path=path,
+            status_code=exc.status_code,
+            detail=exc.detail,
+        )
+        # 5xx routed through HTTPException is rare but valid; still honour
+        # the SYSTEM_ERROR code so the envelope is consistent.
+        if exc.status_code >= 500:
+            return JSONResponse(
+                status_code=exc.status_code,
+                content=_envelope(
+                    code="SYSTEM_ERROR",
+                    message=_INTERNAL_ERROR_MESSAGE,
+                    request=request,
+                ),
+            )
+        return JSONResponse(
+            status_code=exc.status_code,
+            content=_envelope(
+                code="HTTP_ERROR",
+                message=str(exc.detail),
+                request=request,
+            ),
+        )
+
+    logger.error(
+        "unhandled_exception",
+        method=method,
+        path=path,
+        exception_type=type(exc).__name__,
+        exc_info=True,
+    )
+    return JSONResponse(
+        status_code=HTTP_500_INTERNAL_SERVER_ERROR,
+        content=_envelope(
+            code="SYSTEM_ERROR",
+            message=_INTERNAL_ERROR_MESSAGE,
+            request=request,
+        ),
+    )
--- a/src/everos/core/middleware/profile.py
+++ b/src/everos/core/middleware/profile.py
@ -0,0 +1,69 @@
+"""Performance profiling middleware (HTML report via pyinstrument).
+
+Triggered with ``?profile=true`` query parameter when ``PROFILING_ENABLED=true``
+is set. Gracefully no-ops if pyinstrument is not installed.
+"""
+
+from __future__ import annotations
+
+import os
+from collections.abc import Awaitable, Callable
+
+from fastapi import Request
+from fastapi.responses import HTMLResponse
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.responses import Response
+from starlette.types import ASGIApp
+
+from everos.core.observability.logging import get_logger
+
+logger = get_logger(__name__)
+
+_TRUTHY = frozenset({"1", "true", "yes"})
+
+
+def _profiling_enabled() -> bool:
+    """Read ``PROFILING_ENABLED`` env var (1 / true / yes => enabled)."""
+    raw = os.getenv("PROFILING_ENABLED", os.getenv("PROFILING", "false")).lower()
+    return raw in _TRUTHY
+
+
+class ProfileMiddleware(BaseHTTPMiddleware):
+    """Returns a pyinstrument HTML report when ``?profile=true`` is set."""
+
+    def __init__(self, app: ASGIApp) -> None:
+        super().__init__(app)
+        self._enabled = _profiling_enabled()
+        self._available = False
+        if self._enabled:
+            try:
+                import pyinstrument  # noqa: F401
+
+                self._available = True
+                logger.info("profiling_middleware_enabled")
+            except ImportError:
+                logger.warning("profiling_requested_but_pyinstrument_missing")
+                self._enabled = False
+
+    async def dispatch(
+        self,
+        request: Request,
+        call_next: Callable[[Request], Awaitable[Response]],
+    ) -> Response:
+        if not self._enabled or not self._available:
+            return await call_next(request)
+
+        if request.query_params.get("profile", "").lower() not in _TRUTHY:
+            return await call_next(request)
+
+        from pyinstrument import Profiler
+
+        profiler = Profiler()
+        profiler.start()
+        logger.info("profile_started", method=request.method, path=request.url.path)
+        try:
+            await call_next(request)
+        except Exception:
+            logger.exception("profile_request_failed")
+        profiler.stop()
+        return HTMLResponse(content=profiler.output_html(), status_code=200)
--- a/src/everos/core/middleware/prometheus.py
+++ b/src/everos/core/middleware/prometheus.py
@ -0,0 +1,84 @@
+"""Prometheus HTTP metrics middleware.
+
+Auto-instruments incoming HTTP requests with a request counter and a
+duration histogram. Mounted via ``app.add_middleware(PrometheusMiddleware)``.
+
+Skips internal endpoints (``/metrics``, ``/health``, etc.) so they do not
+inflate cardinality or pollute their own statistics.
+"""
+
+from __future__ import annotations
+
+import time
+from collections.abc import Awaitable, Callable
+
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response
+
+from everos.core.observability.logging import get_logger
+from everos.core.observability.metrics import Counter, Histogram, HistogramBuckets
+
+logger = get_logger(__name__)
+
+
+_http_requests_total = Counter(
+    name="http_requests_total",
+    description="Total number of HTTP requests handled.",
+    labelnames=("method", "path", "status"),
+    namespace="everos",
+)
+
+_http_request_duration_seconds = Histogram(
+    name="http_request_duration_seconds",
+    description="HTTP request duration in seconds.",
+    labelnames=("method", "path"),
+    namespace="everos",
+    buckets=HistogramBuckets.DEFAULT,
+)
+
+
+_SKIP_PATHS = frozenset({"/metrics", "/health", "/healthz", "/favicon.ico"})
+
+
+def _normalize_path(request: Request) -> str:
+    """Resolve the route template (e.g. ``/users/{user_id}``) for stable labels."""
+    scope = getattr(request, "scope", {})
+    route = scope.get("route") if isinstance(scope, dict) else None
+    if route is not None and hasattr(route, "path"):
+        return route.path
+    if request.path_params:
+        path = request.url.path
+        for name, value in request.path_params.items():
+            if str(value) in path:
+                path = path.replace(str(value), f"{{{name}}}")
+        return path
+    return "{unmatched}"
+
+
+class PrometheusMiddleware(BaseHTTPMiddleware):
+    """Records ``http_requests_total`` and ``http_request_duration_seconds``."""
+
+    async def dispatch(
+        self,
+        request: Request,
+        call_next: Callable[[Request], Awaitable[Response]],
+    ) -> Response:
+        if request.url.path in _SKIP_PATHS:
+            return await call_next(request)
+
+        method = request.method
+        start = time.perf_counter()
+        status = "500"
+        response: Response | None = None
+        try:
+            response = await call_next(request)
+            status = str(response.status_code)
+            return response
+        finally:
+            duration = time.perf_counter() - start
+            path = _normalize_path(request)
+            _http_requests_total.labels(method=method, path=path, status=status).inc()
+            _http_request_duration_seconds.labels(method=method, path=path).observe(
+                duration
+            )
--- a/src/everos/core/observability/init.py
+++ b/src/everos/core/observability/init.py
--- a/src/everos/core/observability/logging/init.py
+++ b/src/everos/core/observability/logging/init.py
@ -0,0 +1,13 @@
+"""structlog-based logging factory.
+
+External usage:
+    from everos.core.observability.logging import get_logger, configure_logging
+
+    logger = get_logger(__name__)
+    logger.info("event_name", key=value)
+"""
+
+from .factory import configure_logging as configure_logging
+from .factory import get_logger as get_logger
+
+__all__ = ["configure_logging", "get_logger"]
--- a/src/everos/core/observability/logging/factory.py
+++ b/src/everos/core/observability/logging/factory.py
@ -0,0 +1,117 @@
+"""structlog logger factory.
+
+Provides ``get_logger(__name__)`` for module-level logger acquisition.
+``configure_logging()`` is called once at process startup (run.py / lifespan)
+to set up the structlog processor chain and route stdlib logging through
+the same formatter so output stays uniform regardless of the caller.
+
+The configuration follows structlog's official "Foreign Log Integration"
+recipe: a single ``ProcessorFormatter`` renders both everos's own
+``get_logger(...)`` calls and any stdlib ``logging.getLogger(...)`` call
+made by third-party libraries (uvicorn, fastapi, httpx, openai, ...).
+That way all three of the previously divergent prefixes — ``INFO:``,
+``[warning  ]``, plus the unconfigured no-prefix output — collapse to
+one ``[level] event key=value`` shape.
+
+Rust-side loggers (LanceDB / Lance / Arrow) live in the Rust ``log``
+crate and emit straight to stderr without going through Python; this
+module cannot reach them. Control their level with ``RUST_LOG`` env.
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+from typing import Any
+
+import structlog
+
+
+def get_logger(name: str) -> Any:
+    """Return a structlog logger bound to the given module name."""
+    return structlog.get_logger(name)
+
+
+def configure_logging(level: str = "INFO") -> None:
+    """Configure structlog and stdlib logging once at process startup.
+
+    After this call:
+
+    * Every ``structlog.get_logger(...)`` and ``logging.getLogger(...)``
+      message flows through the same ``ProcessorFormatter``, so output
+      format is identical regardless of which logging API the caller used.
+    * Root-logger handlers are replaced with a single ``StreamHandler``
+      pointing at ``sys.stdout``; any previously installed handler
+      (uvicorn's default ``LOGGING_CONFIG``, libraries that call
+      ``logging.basicConfig``, etc.) is removed.
+
+    The ``uvicorn.run(..., log_config=None)`` flag is the matching half
+    on the server entry point — without it, uvicorn re-installs its own
+    handlers on every startup and overrides what we set here.
+
+    Args:
+        level: Log level name (``DEBUG`` / ``INFO`` / ``WARNING`` / ``ERROR``).
+            Unknown names silently fall back to ``INFO`` via
+            ``getattr(logging, ..., INFO)``.
+    """
+    log_level = getattr(logging, level.upper(), logging.INFO)
+
+    shared_processors: list[Any] = [
+        structlog.contextvars.merge_contextvars,
+        structlog.processors.add_log_level,
+        structlog.processors.TimeStamper(fmt="iso"),
+        structlog.processors.StackInfoRenderer(),
+    ]
+
+    # structlog's own loggers feed into stdlib's logging, so the root
+    # logger handler decides where output lands and how it's rendered.
+    structlog.configure(
+        processors=[
+            *shared_processors,
+            structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
+        ],
+        wrapper_class=structlog.make_filtering_bound_logger(log_level),
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        cache_logger_on_first_use=True,
+    )
+
+    # The single formatter shared by both pipelines:
+    # * structlog events arrive already wrapped via ``wrap_for_formatter``;
+    # * foreign records (stdlib LogRecord) get pushed through
+    #   ``foreign_pre_chain`` so they pick up the same level / timestamp
+    #   fields before hitting ``ConsoleRenderer``.
+    formatter = structlog.stdlib.ProcessorFormatter(
+        foreign_pre_chain=shared_processors,
+        processors=[
+            structlog.stdlib.ProcessorFormatter.remove_processors_meta,
+            structlog.dev.ConsoleRenderer(),
+        ],
+    )
+
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(formatter)
+
+    # Drop any handler we installed on a previous ``configure_logging``
+    # call (identified by formatter type) so repeated invocations don't
+    # produce duplicate output, but keep handlers other parties have
+    # attached — pytest's caplog handler in particular has to survive,
+    # otherwise tests using the ``caplog`` fixture can't see records
+    # that flow through structlog.
+    root = logging.getLogger()
+    root.handlers = [
+        h
+        for h in root.handlers
+        if not isinstance(h.formatter, structlog.stdlib.ProcessorFormatter)
+    ]
+    root.addHandler(handler)
+    root.setLevel(log_level)
+
+    # Third-party HTTP clients log every successful request at INFO level —
+    # `httpx` is the worst offender (one line per call, called once per
+    # LLM / embedding / rerank request). A single LoCoMo conv run easily
+    # produces a thousand such lines, drowning everos's own events. They
+    # are useful for debugging API failures, but failures already surface
+    # via exceptions + status codes — so demote the success path to WARNING
+    # and let real errors still come through.
+    for noisy in ("httpx", "httpcore", "urllib3"):
+        logging.getLogger(noisy).setLevel(logging.WARNING)
--- a/src/everos/core/observability/metrics/init.py
+++ b/src/everos/core/observability/metrics/init.py
@ -0,0 +1,34 @@
+"""Prometheus-style metrics primitives + registry.
+
+External usage:
+    from everos.core.observability.metrics import (
+        Counter, Gauge, Histogram, HistogramBuckets,
+        get_metrics_registry, generate_metrics_response,
+    )
+"""
+
+from .counter import Counter as Counter
+from .counter import LabeledCounter as LabeledCounter
+from .gauge import Gauge as Gauge
+from .gauge import LabeledGauge as LabeledGauge
+from .histogram import Histogram as Histogram
+from .histogram import HistogramBuckets as HistogramBuckets
+from .histogram import LabeledHistogram as LabeledHistogram
+from .registry import generate_metrics_response as generate_metrics_response
+from .registry import get_metrics_registry as get_metrics_registry
+from .registry import reset_metrics_registry as reset_metrics_registry
+from .registry import set_metrics_registry as set_metrics_registry
+
+__all__ = [
+    "Counter",
+    "Gauge",
+    "Histogram",
+    "HistogramBuckets",
+    "LabeledCounter",
+    "LabeledGauge",
+    "LabeledHistogram",
+    "generate_metrics_response",
+    "get_metrics_registry",
+    "reset_metrics_registry",
+    "set_metrics_registry",
+]
--- a/src/everos/core/observability/metrics/counter.py
+++ b/src/everos/core/observability/metrics/counter.py
@ -0,0 +1,50 @@
+"""Counter wrapper around ``prometheus_client.Counter``."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from prometheus_client import Counter as PromCounter
+
+from .registry import get_metrics_registry
+
+
+class Counter:
+    """Monotonically-increasing counter (totals, error counts)."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        labelnames: Sequence[str] = (),
+        namespace: str = "",
+        subsystem: str = "",
+        unit: str = "",
+    ) -> None:
+        self._counter = PromCounter(
+            name=name,
+            documentation=description,
+            labelnames=labelnames,
+            namespace=namespace,
+            subsystem=subsystem,
+            unit=unit,
+            registry=get_metrics_registry(),
+        )
+        self._labelnames = tuple(labelnames)
+
+    def labels(self, **labels: str) -> LabeledCounter:
+        return LabeledCounter(self._counter.labels(**labels))
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._counter.inc(amount)
+
+
+class LabeledCounter:
+    """Counter slice with labels applied."""
+
+    def __init__(self, labeled: Any) -> None:
+        self._labeled = labeled
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._labeled.inc(amount)
--- a/src/everos/core/observability/metrics/gauge.py
+++ b/src/everos/core/observability/metrics/gauge.py
@ -0,0 +1,66 @@
+"""Gauge wrapper around ``prometheus_client.Gauge``.
+
+Async auto-refresh is intentionally not included in v0.1; subclass
+:class:`Gauge` and call :meth:`set` from your own scheduling logic when
+needed.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from prometheus_client import Gauge as PromGauge
+
+from .registry import get_metrics_registry
+
+
+class Gauge:
+    """Instantaneous numeric value (queue depth, cache size)."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        labelnames: Sequence[str] = (),
+        namespace: str = "",
+        subsystem: str = "",
+        unit: str = "",
+    ) -> None:
+        self._gauge = PromGauge(
+            name=name,
+            documentation=description,
+            labelnames=labelnames,
+            namespace=namespace,
+            subsystem=subsystem,
+            unit=unit,
+            registry=get_metrics_registry(),
+        )
+
+    def labels(self, **labels: str) -> LabeledGauge:
+        return LabeledGauge(self._gauge.labels(**labels))
+
+    def set(self, value: float) -> None:
+        self._gauge.set(value)
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._gauge.inc(amount)
+
+    def dec(self, amount: float = 1.0) -> None:
+        self._gauge.dec(amount)
+
+
+class LabeledGauge:
+    """Gauge slice with labels applied."""
+
+    def __init__(self, labeled: Any) -> None:
+        self._labeled = labeled
+
+    def set(self, value: float) -> None:
+        self._labeled.set(value)
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._labeled.inc(amount)
+
+    def dec(self, amount: float = 1.0) -> None:
+        self._labeled.dec(amount)
--- a/src/everos/core/observability/metrics/histogram.py
+++ b/src/everos/core/observability/metrics/histogram.py
@ -0,0 +1,102 @@
+"""Histogram wrapper around ``prometheus_client.Histogram``."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from prometheus_client import Histogram as PromHistogram
+
+from .registry import get_metrics_registry
+
+
+class HistogramBuckets:
+    """Predefined bucket configurations for common workloads."""
+
+    DEFAULT: tuple[float, ...] = (
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.5,
+        5.0,
+        10.0,
+    )
+    FAST: tuple[float, ...] = (0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5)
+    API_CALL: tuple[float, ...] = (
+        0.01,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.0,
+        5.0,
+        10.0,
+        30.0,
+    )
+    BATCH: tuple[float, ...] = (0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0)
+    DATABASE: tuple[float, ...] = (
+        0.001,
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.5,
+        5.0,
+    )
+
+
+class Histogram:
+    """Distribution of observed values (latency, sizes)."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        labelnames: Sequence[str] = (),
+        namespace: str = "",
+        subsystem: str = "",
+        unit: str = "",
+        buckets: Sequence[float] = HistogramBuckets.DEFAULT,
+    ) -> None:
+        self._histogram = PromHistogram(
+            name=name,
+            documentation=description,
+            labelnames=labelnames,
+            namespace=namespace,
+            subsystem=subsystem,
+            unit=unit,
+            buckets=tuple(buckets),
+            registry=get_metrics_registry(),
+        )
+
+    def labels(self, **labels: str) -> LabeledHistogram:
+        return LabeledHistogram(self._histogram.labels(**labels))
+
+    def observe(self, amount: float) -> None:
+        self._histogram.observe(amount)
+
+    def time(self) -> Any:
+        return self._histogram.time()
+
+
+class LabeledHistogram:
+    """Histogram slice with labels applied."""
+
+    def __init__(self, labeled: Any) -> None:
+        self._labeled = labeled
+
+    def observe(self, amount: float) -> None:
+        self._labeled.observe(amount)
+
+    def time(self) -> Any:
+        return self._labeled.time()
--- a/src/everos/core/observability/metrics/registry.py
+++ b/src/everos/core/observability/metrics/registry.py
@ -0,0 +1,35 @@
+"""Prometheus metrics registry singleton."""
+
+from __future__ import annotations
+
+from prometheus_client import REGISTRY, CollectorRegistry, generate_latest
+
+_registry: CollectorRegistry | None = None
+
+
+def get_metrics_registry() -> CollectorRegistry:
+    """Return the global metrics registry.
+
+    Defaults to ``prometheus_client.REGISTRY``.
+    """
+    global _registry
+    if _registry is None:
+        _registry = REGISTRY
+    return _registry
+
+
+def set_metrics_registry(registry: CollectorRegistry) -> None:
+    """Override the global registry (mainly for tests)."""
+    global _registry
+    _registry = registry
+
+
+def generate_metrics_response() -> bytes:
+    """Render the current registry into Prometheus exposition format."""
+    return generate_latest(get_metrics_registry())
+
+
+def reset_metrics_registry() -> None:
+    """Reset the global registry override (mainly for tests)."""
+    global _registry
+    _registry = None
--- a/src/everos/core/observability/tracing/init.py
+++ b/src/everos/core/observability/tracing/init.py
@ -0,0 +1,32 @@
+"""Tracing utilities — W3C-compatible request id generation.
+
+External usage::
+
+    from everos.core.observability.tracing import gen_request_id
+"""
+
+from __future__ import annotations
+
+from uuid import uuid4
+
+
+def gen_request_id() -> str:
+    """Generate a request id matching the W3C trace-context spec.
+
+    Returns 32 lowercase hex characters (128-bit, no prefix) — the same
+    format as a W3C ``trace_id`` / OpenTelemetry trace identifier. Routes
+    and services that mint a fresh request id (when one wasn't injected
+    by upstream middleware) should call this helper rather than rolling
+    their own uuid / prefix format, so the id layer stays compatible
+    with OpenTelemetry exporters and standard APM tooling.
+
+    Example::
+
+        >>> rid = gen_request_id()
+        >>> len(rid)
+        32
+    """
+    return uuid4().hex
+
+
+__all__ = ["gen_request_id"]
--- a/src/everos/core/persistence/init.py
+++ b/src/everos/core/persistence/init.py
@ -0,0 +1,106 @@
+"""Persistence primitives.
+
+Read/write toolkit for markdown files, async wrappers around the SQLite
+system DB and LanceDB index, plus a memory-root path manager. Higher
+layers (``memory``, ``infra``) layer business semantics on top of these
+building blocks; this subpackage knows nothing about Entry / MemCell /
+Episode or any other business model.
+
+External usage:
+    from everos.core.persistence import (
+        # Path manager + lock
+        MemoryRoot, memory_root_lock, LockError,
+        # Markdown IO toolkit
+        MarkdownReader, MarkdownWriter, ParsedMarkdown, Entry,
+        parse_frontmatter, dump_frontmatter, split_entries, find_entry,
+        # Frontmatter schema chassis
+        BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
+        DailyLogPathMixin, SkillPathMixin,
+        # Async SQLite (SQLModel / SA 2.0)
+        create_system_engine, create_session_factory, session_scope,
+        SQLModel, Field, Relationship, BaseTable, RepoBase,
+        # Async LanceDB
+        open_lancedb_connection, LanceModel, Vector, BaseLanceTable, touch,
+        LanceRepoBase,
+    )
+"""
+
+from .lancedb import BaseLanceTable as BaseLanceTable
+from .lancedb import LanceModel as LanceModel
+from .lancedb import LanceRepoBase as LanceRepoBase
+from .lancedb import Vector as Vector
+from .lancedb import open_lancedb_connection as open_lancedb_connection
+from .lancedb import touch as touch
+from .locking import LockError as LockError
+from .locking import memory_root_lock as memory_root_lock
+from .markdown import AgentScopedFrontmatter as AgentScopedFrontmatter
+from .markdown import BaseFrontmatter as BaseFrontmatter
+from .markdown import DailyLogPathMixin as DailyLogPathMixin
+from .markdown import Entry as Entry
+from .markdown import EntryId as EntryId
+from .markdown import MarkdownReader as MarkdownReader
+from .markdown import MarkdownWriter as MarkdownWriter
+from .markdown import ParsedMarkdown as ParsedMarkdown
+from .markdown import SkillPathMixin as SkillPathMixin
+from .markdown import StructuredEntry as StructuredEntry
+from .markdown import UserScopedFrontmatter as UserScopedFrontmatter
+from .markdown import dump_frontmatter as dump_frontmatter
+from .markdown import find_entry as find_entry
+from .markdown import parse_frontmatter as parse_frontmatter
+from .markdown import parse_structured_entry as parse_structured_entry
+from .markdown import render_structured_entry as render_structured_entry
+from .markdown import split_entries as split_entries
+from .memory_root import MemoryRoot as MemoryRoot
+from .memory_root import app_dir_name as app_dir_name
+from .memory_root import app_id_from_dir as app_id_from_dir
+from .memory_root import project_dir_name as project_dir_name
+from .memory_root import project_id_from_dir as project_id_from_dir
+from .sqlite import BaseTable as BaseTable
+from .sqlite import Field as Field
+from .sqlite import Relationship as Relationship
+from .sqlite import RepoBase as RepoBase
+from .sqlite import SQLModel as SQLModel
+from .sqlite import create_session_factory as create_session_factory
+from .sqlite import create_system_engine as create_system_engine
+from .sqlite import session_scope as session_scope
+
+__all__ = [
+    "AgentScopedFrontmatter",
+    "BaseFrontmatter",
+    "BaseLanceTable",
+    "BaseTable",
+    "DailyLogPathMixin",
+    "Entry",
+    "EntryId",
+    "Field",
+    "LanceModel",
+    "LanceRepoBase",
+    "LockError",
+    "MarkdownReader",
+    "MarkdownWriter",
+    "MemoryRoot",
+    "ParsedMarkdown",
+    "Relationship",
+    "RepoBase",
+    "SkillPathMixin",
+    "StructuredEntry",
+    "SQLModel",
+    "UserScopedFrontmatter",
+    "Vector",
+    "app_dir_name",
+    "app_id_from_dir",
+    "create_session_factory",
+    "create_system_engine",
+    "dump_frontmatter",
+    "find_entry",
+    "memory_root_lock",
+    "project_dir_name",
+    "project_id_from_dir",
+    "open_lancedb_connection",
+    "parse_frontmatter",
+    "parse_structured_entry",
+    "render_structured_entry",
+    "session_scope",
+    "split_entries",
+    "touch",
+]
--- a/src/everos/core/persistence/lancedb/init.py
+++ b/src/everos/core/persistence/lancedb/init.py
@ -0,0 +1,34 @@
+"""LanceDB async persistence.
+
+External usage (connection):
+    from everos.core.persistence.lancedb import open_lancedb_connection
+
+External usage (ORM model basics — re-exported from lancedb.pydantic):
+    from everos.core.persistence.lancedb import (
+        LanceModel, Vector, BaseLanceTable, touch,
+    )
+
+External usage (generic CRUD repository base):
+    from everos.core.persistence.lancedb import LanceRepoBase
+"""
+
+# Re-export the LanceDB-flavoured Pydantic primitives so business code has a
+# single canonical entry point for table schemas.
+from lancedb.pydantic import LanceModel as LanceModel
+from lancedb.pydantic import Vector as Vector
+
+from .base import BaseLanceTable as BaseLanceTable
+from .base import touch as touch
+from .connection import open_lancedb_connection as open_lancedb_connection
+from .repository import LanceDailyLogRepoBase as LanceDailyLogRepoBase
+from .repository import LanceRepoBase as LanceRepoBase
+
+__all__ = [
+    "BaseLanceTable",
+    "LanceDailyLogRepoBase",
+    "LanceModel",
+    "LanceRepoBase",
+    "Vector",
+    "open_lancedb_connection",
+    "touch",
+]
--- a/src/everos/core/persistence/lancedb/base.py
+++ b/src/everos/core/persistence/lancedb/base.py
@ -0,0 +1,158 @@
+"""Common LanceDB base for everos tables.
+
+:class:`BaseLanceTable` adds ``created_at`` / ``updated_at`` columns and
+the :attr:`BM25_FIELDS` declaration + :meth:`ensure_fts_indexes`
+classmethod so each schema owns *both* its column shape **and** its
+BM25 index spec — repos stay focused on queries.
+
+Note:
+    LanceDB has no SQL ``onupdate`` equivalent — the application must
+    explicitly set ``updated_at = get_utc_now()`` before calling
+    :meth:`AsyncTable.update` / :meth:`AsyncTable.merge_insert`. The
+    convenience :func:`touch` helper does this in one call.
+
+    **Every datetime column automatically carries ``tz=UTC`` in the
+    Arrow schema.** LanceDB's Pydantic→PyArrow converter does not
+    understand ``typing.Annotated`` metadata, so :data:`UtcDatetime`
+    cannot be used as the field type annotation. Instead,
+    :meth:`BaseLanceTable.to_arrow_schema` walks the inferred schema
+    and rewrites every ``timestamp[us]`` (naive) column to
+    ``timestamp[us, tz=UTC]``. PyArrow then auto-``astimezone(UTC)``
+    aware inputs on write **and** returns aware UTC datetimes on read
+    — no per-table configuration, no caller-side ``ensure_utc``.
+
+    Subclasses just declare ``datetime`` fields normally::
+
+        class Episode(BaseLanceTable):
+            timestamp: dt.datetime
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+from typing import ClassVar
+
+import pyarrow as pa
+from lancedb import AsyncTable
+from lancedb.index import FTS
+from lancedb.pydantic import LanceModel
+from pydantic import Field
+
+from everos.component.utils.datetime import get_utc_now
+
+
+class BaseLanceTable(LanceModel):
+    """Pydantic / LanceDB base with ``created_at`` / ``updated_at`` and
+    schema-level LanceDB metadata (``TABLE_NAME`` / ``BM25_FIELDS``).
+
+    The schema is the single source of truth for everything LanceDB
+    needs to materialise the table: column shape, table name, vector
+    dim (declared per-subclass), and which columns carry an FTS index.
+    Repos read these ClassVars; they do not duplicate them.
+    """
+
+    TABLE_NAME: ClassVar[str] = ""
+    """LanceDB table name. Business schemas must override (e.g.
+    ``"episode"``). Left empty on chassis / test schemas that construct
+    their table inline."""
+
+    BM25_FIELDS: ClassVar[list[str]] = []
+    """Columns to build LanceDB FTS (BM25) indexes on.
+
+    Each declared column must already exist as a ``str`` (or
+    ``str | None``) field on the schema. Tokens are assumed to be
+    **app-layer pre-tokenised** (space-joined); the FTS index uses
+    ``base_tokenizer="whitespace"`` so segmentation is owned by the
+    app layer (:class:`JiebaTokenizer`). The same boundary owns stop-
+    word filtering (English + Chinese); FTS-side ``remove_stop_words``
+    is OFF. FTS *does* keep lightweight English-aware normalisation
+    (``lower_case`` / ``stem`` / ``ascii_folding``) as a belt-and-
+    braces layer on the same English tokens that survive jieba.
+    See ``17_lancedb_tables_design.md`` §2.4.1 and
+    :meth:`ensure_fts_indexes` below for the exact knobs."""
+
+    created_at: dt.datetime = Field(default_factory=get_utc_now)
+    updated_at: dt.datetime = Field(default_factory=get_utc_now)
+
+    @classmethod
+    def to_arrow_schema(cls) -> pa.Schema:
+        """Patch the default Arrow schema: force every timestamp to ``tz=UTC``.
+
+        The base ``LanceModel.to_arrow_schema()`` infers Arrow types from
+        Pydantic field annotations and emits naive ``timestamp[us]`` for
+        every :class:`datetime.datetime` column. We rewrite **every**
+        timestamp column to ``timestamp[us, tz=UTC]``:
+
+        * **on write** — PyArrow ``astimezone(UTC)``-s aware input
+          automatically before serialising the i64 epoch micros.
+        * **on read** — PyArrow returns aware UTC datetimes.
+
+        Zero per-table configuration. The rewrite also **overrides any
+        non-UTC tz** a subclass might have declared explicitly, because
+        project convention is: storage is always UTC. Mixed-tz columns
+        would violate the two-zone discipline (see
+        ``docs/datetime.md``); enforcing UTC at the schema level closes
+        that loophole.
+        """
+        base = super().to_arrow_schema()
+        return pa.schema(
+            [
+                pa.field(f.name, pa.timestamp("us", tz="UTC"), nullable=f.nullable)
+                if pa.types.is_timestamp(f.type)
+                else f
+                for f in base
+            ]
+        )
+
+    @classmethod
+    async def ensure_fts_indexes(cls, table: AsyncTable) -> None:
+        """Create FTS indexes on every column in :attr:`BM25_FIELDS`.
+
+        Idempotent: columns that already have an index are skipped, so
+        this is safe to call on every startup. The FTS config is fixed
+        to the app-layer pre-tokenisation + LanceDB normalisation
+        convention (designed for **multilingual mixed content**):
+
+        - ``base_tokenizer="whitespace"`` — split on the spaces our
+          app-layer tokenizer provider already inserted between tokens.
+        - ``lower_case=True`` — Unicode-aware case-fold (English A→a;
+          no-op on CJK characters).
+        - ``stem=True`` — Porter / Snowball English stemmer per
+          ``language="English"`` (tantivy default). CJK tokens have no
+          stemmer and pass through untouched.
+        - ``remove_stop_words=False`` — **stop-word removal is owned by
+          the app-layer** (:class:`JiebaTokenizer`), which already drops
+          both Chinese and English stop-words before tokens reach the
+          FTS index. Keeping FTS-side filtering off avoids double-
+          filtering and a divided source of truth.
+        - ``ascii_folding=True`` — strips diacritics (é→e) on Latin
+          characters; no-op on CJK.
+        - ``with_position=True`` — enables phrase queries.
+
+        Subclasses normally do not need to override this — declaring
+        :attr:`BM25_FIELDS` is enough.
+        """
+        if not cls.BM25_FIELDS:
+            return
+        indices = await table.list_indices()
+        indexed_cols = {col for idx in indices for col in (idx.columns or [])}
+        for field in cls.BM25_FIELDS:
+            if field in indexed_cols:
+                continue
+            await table.create_index(
+                column=field,
+                config=FTS(
+                    with_position=True,
+                    base_tokenizer="whitespace",
+                    lower_case=True,
+                    stem=True,
+                    remove_stop_words=False,
+                    ascii_folding=True,
+                ),
+            )
+
+
+def touch(record: BaseLanceTable) -> BaseLanceTable:
+    """Set ``record.updated_at = now`` and return the record (chainable)."""
+    record.updated_at = get_utc_now()
+    return record
--- a/src/everos/core/persistence/lancedb/connection.py
+++ b/src/everos/core/persistence/lancedb/connection.py
@ -0,0 +1,68 @@
+"""Async LanceDB connection factory.
+
+LanceDB does not live inside the SQLAlchemy ecosystem; it has its own
+``connect_async`` returning :class:`lancedb.AsyncConnection`. This module
+is a thin wrapper that:
+
+    1. ensures the lancedb root directory exists
+    2. converts ``LanceDBSettings.read_consistency_seconds`` into the
+       :class:`datetime.timedelta` value LanceDB expects
+    3. installs a capped :class:`lancedb.Session` so the global index
+       cache cannot grow unbounded and exhaust file descriptors
+       (see :attr:`LanceDBSettings.index_cache_size_bytes` for the
+       full rationale)
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+from pathlib import Path
+
+import lancedb
+from lancedb import AsyncConnection
+
+from everos.config import LanceDBSettings
+
+
+async def open_lancedb_connection(
+    lancedb_dir: Path,
+    lancedb_settings: LanceDBSettings,
+) -> AsyncConnection:
+    """Open an async LanceDB connection rooted at ``lancedb_dir``.
+
+    Args:
+        lancedb_dir: Filesystem path to the LanceDB root (typically
+            ``MemoryRoot.lancedb_dir``). Created if missing.
+        lancedb_settings: Tunables; the ``read_consistency_seconds`` field
+            is converted to a :class:`~datetime.timedelta`, and
+            ``index_cache_size_bytes`` caps the global index cache.
+
+    Returns:
+        An :class:`AsyncConnection` ready for table operations.
+    """
+    # mkdir is a microsecond-fast syscall and only fires on first connect;
+    # not worth pulling in anyio.Path / aiofiles for it.
+    lancedb_dir.mkdir(parents=True, exist_ok=True)  # noqa: ASYNC240
+
+    interval: dt.timedelta | None = None
+    if lancedb_settings.read_consistency_seconds is not None:
+        interval = dt.timedelta(seconds=lancedb_settings.read_consistency_seconds)
+
+    # Bound the index cache so its readers (each one holds the FDs of
+    # an opened ``_indices/<uuid>/...`` directory) get LRU-evicted
+    # rather than leaking. Without this, a long-running daemon's FD
+    # count grows monotonically until ``EMFILE``. The metadata cache
+    # is intentionally left at the lancedb default (unbounded): it
+    # holds parsed in-memory manifests with zero FD pressure, and a
+    # cap there would just thrash. See ``LanceDBSettings`` for the
+    # measurement that picked the default size.
+    session = lancedb.Session(
+        index_cache_size_bytes=lancedb_settings.index_cache_size_bytes,
+        metadata_cache_size_bytes=None,
+    )
+
+    return await lancedb.connect_async(
+        str(lancedb_dir),
+        read_consistency_interval=interval,
+        session=session,
+    )
--- a/src/everos/core/persistence/lancedb/repository.py
+++ b/src/everos/core/persistence/lancedb/repository.py
@ -0,0 +1,530 @@
+"""Generic CRUD repository for LanceDB-backed tables.
+
+``LanceRepoBase`` mirrors the SQLite ``RepoBase`` shape: a pure generic
+CRUD helper that knows nothing about a storage runtime. Concrete repos
+either pass an :class:`AsyncTable` explicitly (typical in tests) or
+override :meth:`_table_lookup` to pull the cached table from their
+storage manager (typical in
+:mod:`everos.infra.persistence.lancedb.repos`).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import datetime as dt
+from collections.abc import Sequence
+from typing import Any, ClassVar
+
+from lancedb import AsyncTable
+
+from everos.core.observability.logging import get_logger
+
+from .base import BaseLanceTable
+
+logger = get_logger(__name__)
+
+
+def _q(value: str) -> str:
+    """Escape single quotes for a LanceDB SQL-like ``where`` predicate.
+
+    LanceDB has no parameterised query API; predicates are strings.
+    Doubling the quote (``'`` → ``''``) is the SQL-standard way to keep
+    a literal single quote inside a single-quoted string. everos's PK
+    convention (``<owner_id>_<entry_id>``) never carries quotes — this
+    is defensive.
+    """
+    return value.replace("'", "''")
+
+
+class LanceRepoBase[T: BaseLanceTable]:
+    """Generic CRUD repository for one LanceDB table.
+
+    Subclass and bind to a schema. Two ways to provide the table:
+
+    1. **Explicit (tests / DI)** — pass it to ``__init__``::
+
+           repo = EpisodeRepo(table)
+
+    2. **Lazy hook (production singletons)** — override
+       :meth:`_table_lookup` so the repo can be instantiated as a
+       module-level singleton with no live connection yet::
+
+           class _EpisodeRepo(LanceRepoBase[Episode]):
+               schema = Episode
+
+               async def _table_lookup(self):
+                   from everos.infra.persistence.lancedb.lancedb_manager import (
+                       get_table,
+                   )
+                   return await get_table(self.schema.TABLE_NAME, self.schema)
+
+           episode_repo = _EpisodeRepo()
+           await episode_repo.add([Episode(text=..., vector=[...])])
+
+    The LanceDB table name lives on the schema (``BaseLanceTable.TABLE_NAME``)
+    so every LanceDB-side metadatum — column shape, table name,
+    vector dim, BM25 index spec — sits in one place. ``table_name``
+    here is a thin pass-through; subclasses normally do **not**
+    override it.
+
+    Write paths (``add`` / ``upsert`` / ``delete`` / ``delete_by_md_path``)
+    are serialised by a per-``table_name`` :class:`asyncio.Lock`. LanceDB's
+    ``merge_insert`` is a read-modify-write at the storage layer with no
+    application-visible OCC contract — two concurrent calls against the
+    same table can race on the version manifest and lose updates even
+    when the row sets are disjoint (observed: cascade worker
+    ``asyncio.gather`` over a batch of ``user_profile`` rows where one
+    write disappears). Serialising on the table name closes that window;
+    reads stay unlocked so search QPS is not impacted by writers.
+
+    Locks live in a class-level dict keyed by table name and are never
+    evicted (mirrors :mod:`everos.memory.strategies._partition_locks`
+    on bpo-28427 — a lock with pending waiters must outlive any dict
+    entry that points to it).
+    """
+
+    schema: type[T]
+
+    _table_locks: ClassVar[dict[str, asyncio.Lock]] = {}
+    """Per-table-name write lock pool (process-wide, lazily populated)."""
+
+    @property
+    def table_name(self) -> str:
+        """LanceDB table name, resolved from :attr:`schema.TABLE_NAME`."""
+        return self.schema.TABLE_NAME
+
+    @classmethod
+    def _write_lock(cls, table_name: str) -> asyncio.Lock:
+        """Return the write lock for ``table_name``; create on first use.
+
+        ``dict.setdefault`` is atomic under single-threaded asyncio (no
+        ``await`` between check and insert), so no meta-lock is needed.
+        """
+        return cls._table_locks.setdefault(table_name, asyncio.Lock())
+
+    @classmethod
+    def _reset_locks_for_tests(cls) -> None:
+        """Test-only: drop the write-lock pool.
+
+        ``asyncio.Lock`` binds to the current event loop on first
+        ``acquire()``; pytest-asyncio creates a fresh loop per test, so
+        a module-level lock surviving across tests fails with "bound to
+        a different event loop". The production cascade worker runs on
+        one loop forever and does not need this hook. Mirrors
+        :func:`everos.memory.strategies._partition_locks._reset_for_tests`.
+        """
+        cls._table_locks.clear()
+
+    def __init__(self, table: AsyncTable | None = None) -> None:
+        """Bind to a table directly; if ``None``, defer to ``_table_lookup``."""
+        self._table_override = table
+
+    async def _table_lookup(self) -> AsyncTable:
+        """Resolve the table on first use. Override in subclass.
+
+        ``LanceRepoBase`` itself has no idea where the runtime singleton
+        lives. The default raises so a missing override is loud rather
+        than silently broken.
+        """
+        raise NotImplementedError(
+            f"{type(self).__name__}: pass table= to __init__ "
+            "or override _table_lookup() to wire the storage manager."
+        )
+
+    async def _table(self) -> AsyncTable:
+        if self._table_override is not None:
+            return self._table_override
+        return await self._table_lookup()
+
+    # ── Create ─────────────────────────────────────────────────────────────
+
+    async def add(self, records: Sequence[T]) -> None:
+        """Insert one or more records."""
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            await table.add(list(records))
+
+    # ── Upsert ─────────────────────────────────────────────────────────────
+
+    async def upsert(
+        self,
+        records: Sequence[T],
+        *,
+        by: str = "id",
+    ) -> None:
+        """Upsert records keyed by ``by`` (PK column, default ``"id"``).
+
+        Wraps LanceDB's ``merge_insert(on=...)`` fluent builder with the
+        equivalent of ``INSERT ... ON CONFLICT(by) DO UPDATE`` — matching
+        rows are replaced wholesale, non-matching rows inserted.
+
+        Cascade uses this when reconciling md → LanceDB: an entry seen
+        for the first time inserts; an entry that was edited in md
+        updates its existing row.
+        """
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            await (
+                table.merge_insert(by)
+                .when_matched_update_all()
+                .when_not_matched_insert_all()
+                .execute(list(records))
+            )
+
+    # ── Maintenance ────────────────────────────────────────────────────────
+
+    async def optimize(self, *, cleanup_older_than: dt.timedelta | None = None) -> None:
+        """Compact fragments + merge new data into the FTS / vector indexes.
+
+        LanceDB's ``merge_insert`` writes new data into a fresh fragment.
+        The FTS (BM25) index built by :meth:`ensure_fts_indexes` only
+        covers fragments visible at index-build time, so rows written
+        after the initial build can become **invisible to BM25 queries**
+        until ``optimize()`` runs and merges those fragments into the
+        index segment that the query engine reads.
+
+        Symptom this guards against (verified on LoCoMo conv0): after
+        steady-state cascade ingest, ``nearest_to_text("any_common_word")``
+        returns 0 hits even though the column literally contains the
+        token in 100% of rows — the new fragments simply hadn't been
+        indexed.
+
+        Cascade triggers this through a per-kind throttle + trailing
+        edge scheduler (``CascadeWorker._schedule_optimize``): at most
+        one run per ~1s window per kind, decoupled from the drain
+        loop, with a 60s heartbeat sweep as a safety net. Cost is
+        O(N) data-rewrite per optimized fragment; the throttle is how
+        we cap it under sustained write pressure.
+
+        Args:
+            cleanup_older_than: When set, also prune (physically delete)
+                files belonging to dataset versions older than this
+                interval. ``None`` (default) compacts only — historical
+                manifests, replaced data fragments, and stale index
+                UUID files are kept on disk forever, which inflates the
+                file count (and FD usage at scan time) without bound.
+                Cascade passes a non-None value on a slower beat
+                (``CascadeWorker._optimize_prune_interval``) so the
+                hot drain path stays cheap. Note: this does *not*
+                shrink **active** index internals (FTS ``part_N`` count
+                or vector index UUID count) — those only collapse via
+                ``drop_index + create_index``, which is not done here.
+        """
+        table = await self._table()
+        await table.optimize(cleanup_older_than=cleanup_older_than)
+
+    async def rebuild_indexes(self) -> None:
+        """Drop and re-create every index on this table.
+
+        **Why this exists** — workaround for an upstream Python API gap:
+
+        Lance's Rust ``OptimizeOptions`` has a ``num_indices_to_merge``
+        knob (default 1) that bounds the number of active index UUIDs
+        per column. With ``Some(1)``, every ``optimize_indices()`` call
+        merges its delta into the base — active UUID count stays at 1.
+
+        Two problems block us from using it from the application layer:
+
+        1. ``lancedb.AsyncTable.optimize()`` does **not expose** this
+           parameter (verified on lancedb main 2026-05-28). It forwards
+           only ``cleanup_since_ms`` and ``delete_unverified`` to Rust.
+        2. Even calling Lance directly via ``pylance``, the merge
+           behaviour itself is buggy on ``lance crate 4.0`` (what
+           lancedb 0.30.2 embeds) — ``num_indices_to_merge=1`` does
+           nothing. Fix landed in ``lance 7.x``, but ``pylance 7.x``
+           can not collapse indexes on a ``lance 4.0``-format dataset
+           (verified by experiment).
+
+        So in our current stack there is **no application-level path**
+        to bound active index UUID growth. ``optimize()`` keeps
+        accumulating one new UUID (vector) / one new ``part_N`` (FTS)
+        per call.
+
+        This method is the workaround: drop every existing index and
+        rebuild from the schema's ``ensure_fts_indexes`` contract. The
+        rebuild is **O(N) full retrain** but cheap in practice (~0.3s
+        for 50k rows × 2 FTS columns on local SSD), and during the
+        window LanceDB transparently falls back to brute-force scan so
+        queries and writes stay available.
+
+        **Cadence** — :class:`CascadeWorker` runs this on a slow loop
+        (default 12h per kind). Frequency is bounded by the rebuild
+        cost, not by correctness — even daily is fine functionally;
+        12h is a conservative pick to keep file/UUID counts well below
+        any FD ceiling under steady-state ingest.
+
+        **When to remove** — once lancedb exposes ``num_indices_to_merge``
+        on the async Python API **and** the embedded ``lance crate``
+        ships the working merge implementation, delete this method and
+        switch to ``optimize(num_indices_to_merge=1)`` in the regular
+        ``optimize()`` path. Tracking issues / context:
+
+        - https://github.com/lancedb/lancedb/issues/2193
+        - https://github.com/lancedb/lancedb/issues/3177
+        - https://github.com/lance-format/lance/pull/6711 (partial fix
+          in lance v7.0.0)
+        - https://docs.rs/lancedb/latest/lancedb/table/struct.OptimizeOptions.html
+        """
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            for idx in await table.list_indices():
+                await table.drop_index(idx.name)
+            await self.schema.ensure_fts_indexes(table)
+
+    # ── Read ───────────────────────────────────────────────────────────────
+
+    async def count(self) -> int:
+        """Total row count."""
+        table = await self._table()
+        return await table.count_rows()
+
+    async def get_by_id(
+        self,
+        id_value: str,
+        *,
+        id_field: str = "id",
+    ) -> T | None:
+        """Fetch one row by scalar PK; ``None`` if missing.
+
+        Uses LanceDB scalar filter ``<id_field> = '<id_value>'``. Single
+        quotes in ``id_value`` are doubled to avoid breaking the SQL-like
+        predicate; everos's PK convention is ``<owner_id>_<entry_id>``
+        which never contains quotes, so the escape is defensive.
+        """
+        table = await self._table()
+        rows = (
+            await table.query()
+            .where(f"{id_field} = '{_q(id_value)}'")
+            .limit(1)
+            .to_list()
+        )
+        if not rows:
+            return None
+        return self.schema.model_validate(rows[0])
+
+    async def find_where(
+        self,
+        where: str,
+        *,
+        limit: int = 100,
+    ) -> list[T]:
+        """Scalar query returning *typed* schema instances.
+
+        Like :meth:`search` but returns ``list[T]`` rather than raw
+        LanceDB row dicts. No vector ANN; pure scalar filter only.
+        Use :meth:`search` when you need ``_distance`` or want to mix
+        ANN with filters.
+        """
+        table = await self._table()
+        rows = await table.query().where(where).limit(limit).to_list()
+        return [self.schema.model_validate(r) for r in rows]
+
+    async def find_one_where(self, where: str) -> T | None:
+        """Single-row variant of :meth:`find_where` (``None`` if no match)."""
+        rows = await self.find_where(where, limit=1)
+        return rows[0] if rows else None
+
+    async def find_where_paginated(
+        self,
+        where: str,
+        *,
+        sort_by: str,
+        descending: bool = True,
+        page: int = 1,
+        page_size: int = 20,
+        max_fetch: int = 20000,
+    ) -> tuple[list[T], int]:
+        """Paginated scalar query with in-memory sort.
+
+        LanceDB has no native ``ORDER BY``. The chassis fetches up to
+        ``max_fetch`` rows matching ``where``, sorts the resulting Arrow
+        table by ``sort_by``, then slices ``page`` × ``page_size``. The
+        *true* row count of the predicate is returned alongside the
+        page so callers can render pagination controls without a second
+        query.
+
+        Args:
+            where: SQL-like scalar predicate. Required (no implicit
+                full-table scan from ``find_where_paginated``).
+            sort_by: Column name to sort the result set by.
+            descending: ``True`` (default) → newest first; ``False`` →
+                ascending.
+            page: 1-indexed page number.
+            page_size: Rows per page.
+            max_fetch: Cap on rows pulled before the in-memory sort.
+                When the predicate matches more rows than this cap the
+                page is sorted over an *arbitrary* prefix and the page
+                contents are only approximately correct — the chassis
+                emits a warning so the caller learns about the
+                truncation.
+
+        Returns:
+            ``(rows, total)`` — ``rows`` is the typed page,
+            ``total`` is ``count_rows(filter=where)`` (the predicate's
+            true match count, regardless of ``max_fetch``).
+        """
+        table = await self._table()
+        total = await table.count_rows(filter=where)
+        if total > max_fetch:
+            logger.warning(
+                "find_where_paginated truncated",
+                extra={
+                    "table": self.table_name,
+                    "where": where,
+                    "total": total,
+                    "max_fetch": max_fetch,
+                },
+            )
+        arrow_tbl = await table.query().where(where).limit(max_fetch).to_arrow()
+        order = "descending" if descending else "ascending"
+        arrow_tbl = arrow_tbl.sort_by([(sort_by, order)])
+        offset = (page - 1) * page_size
+        page_rows = arrow_tbl.slice(offset, page_size)
+        return (
+            [self.schema.model_validate(r) for r in page_rows.to_pylist()],
+            total,
+        )
+
+    async def find_by_owner(
+        self,
+        owner_id: str,
+        *,
+        limit: int = 100,
+    ) -> list[T]:
+        """Fetch rows by ``owner_id`` (5 business tables share this column)."""
+        return await self.find_where(
+            f"owner_id = '{_q(owner_id)}'",
+            limit=limit,
+        )
+
+    async def find_by_md_path(self, md_path: str) -> T | None:
+        """Reverse-lookup from md path (cascade maps md edit → row)."""
+        return await self.find_one_where(f"md_path = '{_q(md_path)}'")
+
+    async def search(
+        self,
+        *,
+        vector: Sequence[float] | None = None,
+        where: str | None = None,
+        limit: int = 10,
+    ) -> list[dict[str, Any]]:
+        """Hybrid search: optional vector ANN + scalar SQL-like predicate.
+
+        Args:
+            vector: Embedding to find nearest rows for; ``None`` skips ANN.
+            where: SQL-like predicate (e.g. ``"tags = 'meeting'"``).
+            limit: Max rows.
+
+        Returns:
+            List of row dicts (LanceDB native shape — fields depend on
+            ``schema``; ``_distance`` added when ``vector`` is given).
+        """
+        table = await self._table()
+        q = table.query()
+        if vector is not None:
+            q = q.nearest_to(list(vector))
+        if where is not None:
+            q = q.where(where)
+        return await q.limit(limit).to_list()
+
+    # ── Delete ─────────────────────────────────────────────────────────────
+
+    async def delete(self, predicate: str) -> None:
+        """Delete rows matching a SQL-like predicate."""
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            await table.delete(predicate)
+
+    async def delete_by_md_path(self, md_path: str) -> int:
+        """Delete every row whose ``md_path`` matches; return rows deleted.
+
+        Cascade handler calls this when an md file is removed on disk
+        (or when reverse-reconcile discovers an orphaned LanceDB row).
+        Single quotes in ``md_path`` are doubled defensively.
+        """
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            result = await table.delete(f"md_path = '{_q(md_path)}'")
+        return int(result.num_deleted_rows)
+
+
+class LanceDailyLogRepoBase[T: BaseLanceTable](LanceRepoBase[T]):
+    """LanceRepoBase + queries unique to daily-log tables.
+
+    Daily-log tables (``episode`` / ``atomic_fact`` / ``foresight`` /
+    ``agent_case``) share a fixed schema slice: ``entry_id`` (md seq
+    id), ``session_id`` (conversation scope), and ``parent_type`` /
+    ``parent_id`` (record lineage). The queries below compose those
+    columns; ``agent_skill`` is *not* a daily-log (it is a named
+    single-file entity) and uses :class:`LanceRepoBase` directly.
+    """
+
+    async def find_by_owner_entry(
+        self,
+        owner_id: str,
+        entry_id: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> T | None:
+        """Single point-query by ``(app, project, owner_id, entry_id)``.
+
+        ``entry_id`` is only unique within a (app, project, owner) scope —
+        the same ``ac_<date>_<seq>`` recurs in another space — so the
+        scope segments are part of the predicate to avoid a cross-space hit.
+        """
+        return await self.find_one_where(
+            f"owner_id = '{_q(owner_id)}' AND entry_id = '{_q(entry_id)}' "
+            f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'"
+        )
+
+    async def find_by_owner_entries(
+        self,
+        owner_id: str,
+        entry_ids: Sequence[str],
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> list[T]:
+        """Bulk point-query by ``(app, project, owner_id, entry_id IN ...)``.
+
+        Empty ``entry_ids`` short-circuits to ``[]`` rather than emit a
+        ``WHERE entry_id IN ()`` predicate (LanceDB rejects empty
+        tuples). The query's ``limit`` is bound to ``len(entry_ids)``
+        because at most one row per id can exist under one (app, project,
+        owner) scope.
+        """
+        if not entry_ids:
+            return []
+        quoted = ", ".join(f"'{_q(eid)}'" for eid in entry_ids)
+        return await self.find_where(
+            f"owner_id = '{_q(owner_id)}' AND entry_id IN ({quoted}) "
+            f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'",
+            limit=len(entry_ids),
+        )
+
+    async def find_by_session(
+        self,
+        owner_id: str,
+        session_id: str,
+        *,
+        limit: int = 100,
+    ) -> list[T]:
+        """Every row in one conversation ``session_id`` under ``owner_id``."""
+        return await self.find_where(
+            f"owner_id = '{_q(owner_id)}' AND session_id = '{_q(session_id)}'",
+            limit=limit,
+        )
+
+    async def find_by_parent(
+        self,
+        parent_type: str,
+        parent_id: str,
+        *,
+        limit: int = 100,
+    ) -> list[T]:
+        """Every row whose parent matches ``(parent_type, parent_id)``."""
+        return await self.find_where(
+            f"parent_type = '{_q(parent_type)}' AND parent_id = '{_q(parent_id)}'",
+            limit=limit,
+        )
--- a/src/everos/core/persistence/locking.py
+++ b/src/everos/core/persistence/locking.py
@ -0,0 +1,76 @@
+"""Process-wide exclusive lock on a memory-root.
+
+Uses ``fcntl.flock`` (POSIX advisory locking, available on Linux + macOS;
+Windows is not supported — see project README on platform scope). The
+public surface is an :func:`contextlib.asynccontextmanager` so callers
+use ``async with memory_root_lock(mr):``; the underlying syscalls have
+no async equivalent so they run in a worker thread via
+:func:`anyio.to_thread.run_sync`.
+"""
+
+from __future__ import annotations
+
+import fcntl
+import os
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+
+import anyio
+
+from .memory_root import MemoryRoot
+
+
+class LockError(RuntimeError):
+    """Raised when the memory-root lock cannot be acquired in non-blocking mode."""
+
+
+@asynccontextmanager
+async def memory_root_lock(
+    memory_root: MemoryRoot,
+    *,
+    blocking: bool = True,
+) -> AsyncIterator[None]:
+    """Acquire an exclusive process lock on the memory-root.
+
+    Args:
+        memory_root: The memory-root to lock. The lock anchor file
+            (``<root>/.lock``) is created on first use.
+        blocking: If ``True`` (default), wait until the lock is free. If
+            ``False``, raise :class:`LockError` immediately when another
+            process holds it.
+
+    Raises:
+        LockError: When ``blocking=False`` and the lock is already held.
+    """
+    await anyio.Path(memory_root.root).mkdir(parents=True, exist_ok=True)
+    lock_path = memory_root.lock_file
+
+    # Open the anchor file (create on first use). The fd, not the path, is
+    # what fcntl operates on. ``os.open`` is microsecond-fast but offloaded
+    # for consistency with the rest of the lock acquisition flow.
+    fd = await anyio.to_thread.run_sync(
+        lambda: os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o644)
+    )
+
+    flags = fcntl.LOCK_EX
+    if not blocking:
+        flags |= fcntl.LOCK_NB
+
+    try:
+        await anyio.to_thread.run_sync(fcntl.flock, fd, flags)
+    except BlockingIOError as exc:
+        await anyio.to_thread.run_sync(os.close, fd)
+        raise LockError(
+            f"another process already holds the memory-root lock at {lock_path}"
+        ) from exc
+
+    # Lock acquired — release + close strictly on exit. The BlockingIOError
+    # path above already cleaned up its fd, so it must NOT enter this
+    # finally block (otherwise we'd double-close).
+    try:
+        yield
+    finally:
+        try:
+            await anyio.to_thread.run_sync(fcntl.flock, fd, fcntl.LOCK_UN)
+        finally:
+            await anyio.to_thread.run_sync(os.close, fd)
--- a/src/everos/core/persistence/markdown/init.py
+++ b/src/everos/core/persistence/markdown/init.py
@ -0,0 +1,62 @@
+"""Markdown file IO toolkit.
+
+Atomic write + YAML frontmatter parse/dump + entry marker parse +
+audit-form structured-entry parsing. Knows nothing about business
+models (no MemCell / Episode); the :class:`Entry` here is a
+*marker-delimited* span within a markdown body, not a business record.
+
+External usage (IO + parse):
+    from everos.core.persistence.markdown import (
+        Entry, EntryId, StructuredEntry,
+        MarkdownReader, MarkdownWriter, ParsedMarkdown,
+        parse_frontmatter, dump_frontmatter,
+        split_entries, find_entry,
+        parse_structured_entry, render_structured_entry,
+    )
+
+External usage (frontmatter schema chassis):
+    from everos.core.persistence.markdown import (
+        BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
+        DailyLogPathMixin, SkillPathMixin, ProfilePathMixin,
+    )
+"""
+
+from .entries import Entry as Entry
+from .entries import EntryId as EntryId
+from .entries import StructuredEntry as StructuredEntry
+from .entries import find_entry as find_entry
+from .entries import parse_structured_entry as parse_structured_entry
+from .entries import render_structured_entry as render_structured_entry
+from .entries import split_entries as split_entries
+from .frontmatter import AgentScopedFrontmatter as AgentScopedFrontmatter
+from .frontmatter import BaseFrontmatter as BaseFrontmatter
+from .frontmatter import DailyLogPathMixin as DailyLogPathMixin
+from .frontmatter import ProfilePathMixin as ProfilePathMixin
+from .frontmatter import SkillPathMixin as SkillPathMixin
+from .frontmatter import UserScopedFrontmatter as UserScopedFrontmatter
+from .frontmatter import dump_frontmatter as dump_frontmatter
+from .frontmatter import parse_frontmatter as parse_frontmatter
+from .parsed import ParsedMarkdown as ParsedMarkdown
+from .reader import MarkdownReader as MarkdownReader
+from .writer import MarkdownWriter as MarkdownWriter
+
+__all__ = [
+    "AgentScopedFrontmatter",
+    "BaseFrontmatter",
+    "DailyLogPathMixin",
+    "Entry",
+    "EntryId",
+    "MarkdownReader",
+    "MarkdownWriter",
+    "ParsedMarkdown",
+    "ProfilePathMixin",
+    "SkillPathMixin",
+    "StructuredEntry",
+    "UserScopedFrontmatter",
+    "dump_frontmatter",
+    "find_entry",
+    "parse_frontmatter",
+    "parse_structured_entry",
+    "render_structured_entry",
+    "split_entries",
+]
--- a/src/everos/core/persistence/markdown/entries.py
+++ b/src/everos/core/persistence/markdown/entries.py
@ -0,0 +1,368 @@
+"""Markdown entries — id format, marker spans, and audit-form parsing.
+
+Three closely-related entry concepts live together here so a reader
+sees the whole entry surface in one file:
+
+1. :class:`EntryId` — the ``<prefix>_<YYYYMMDD>_<NNNN>`` structured id
+   stamped into each daily-log entry's open / close markers. Carries
+   the prefix declared by the frontmatter schema, the date bucket, and
+   the in-file zero-padded sequence.
+
+2. :class:`Entry` — a marker-delimited span inside a markdown body::
+
+       <!-- entry:abc123 -->
+       ...content...
+       <!-- /entry:abc123 -->
+
+   :func:`split_entries` and :func:`find_entry` locate these spans
+   without interpreting the inner content. Higher layers (writers,
+   cascade) parse it per record type.
+
+3. :class:`StructuredEntry` — :class:`Entry` extended with the parsed
+   audit-form body fields (header / inline / sections). Built either
+   from a raw body string via :func:`parse_structured_entry` or from
+   an existing :class:`Entry` via :meth:`Entry.as_structured`.
+
+Audit-form layout::
+
+    ## <header>                ← optional H2 (usually entry id, for grep)
+
+    **key**: value             ← inline fields, one per line
+    **key2**: value2
+
+    ### Section Title          ← section fields: H3 + free-form text
+    body content...
+
+    ### Another Section
+    more content...
+
+The audit chassis is intentionally **type-agnostic** — every field
+round-trips as a string. Inline values are stringified on render
+(lists become ``[a, b, c]``, scalars use ``str()``); on parse
+everything is the raw text after the colon. Section titles are kept
+verbatim. This keeps parsing tolerant of stray fields, wrapped
+strings, and manually-typed timestamps; the strong-typed model lives
+in business writers + the SQLite/LanceDB indexes.
+
+Cross-user uniqueness is handled at the database layer via a composite
+``<user_id>_<entry_id>`` field; it is *not* encoded into the
+:class:`EntryId` string itself.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+import re
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+from typing import Self
+
+# ── EntryId — structured id for marker stamping ─────────────────────────
+
+_DATE_FMT = "%Y%m%d"
+_SEQ_DIGITS = 8
+"""Minimum zero-padding for the in-file seq.
+
+8 digits keeps lexicographic order == numeric order up to 10**8
+entries per file (per user, per day). ``format()`` is "at least 8" —
+larger seqs emit more digits without truncation. ``parse`` is
+permissive: shorter (legacy 4-digit) and longer seq strings both
+parse cleanly; format normalises to >= 8 digits on round-trip.
+"""
+
+
+@dataclass(frozen=True, slots=True)
+class EntryId:
+    """Parsed components of an entry id (``<prefix>_<YYYYMMDD>_<NNNN>``)."""
+
+    prefix: str
+    date: _dt.date
+    seq: int
+
+    def format(self) -> str:
+        """Render as ``<prefix>_<YYYYMMDD>_<NNNN>``."""
+        return (
+            f"{self.prefix}_{self.date.strftime(_DATE_FMT)}_{self.seq:0{_SEQ_DIGITS}d}"
+        )
+
+    def __str__(self) -> str:  # noqa: D401
+        return self.format()
+
+    @classmethod
+    def parse(cls, s: str) -> Self:
+        """Parse ``<prefix>_<YYYYMMDD>_<NNNN>``.
+
+        Uses ``rsplit("_", 2)`` so a multi-segment prefix (rare, but
+        possible) is preserved as-is.
+        """
+        parts = s.rsplit("_", 2)
+        if len(parts) != 3:
+            raise ValueError(f"invalid entry id format: {s!r}")
+        prefix, date_str, seq_str = parts
+        if not prefix:
+            raise ValueError(f"empty prefix in entry id: {s!r}")
+        try:
+            d = _dt.datetime.strptime(date_str, _DATE_FMT).date()
+        except ValueError as exc:
+            raise ValueError(f"invalid date in entry id: {s!r}") from exc
+        try:
+            seq = int(seq_str)
+        except ValueError as exc:
+            raise ValueError(f"invalid seq in entry id: {s!r}") from exc
+        if seq < 0:
+            raise ValueError(f"negative seq in entry id: {s!r}")
+        return cls(prefix=prefix, date=d, seq=seq)
+
+    @classmethod
+    def next_for(cls, prefix: str, date: _dt.date, current_count: int) -> Self:
+        """Build the id for the next entry given the file's current count.
+
+        ``current_count`` is the value of ``frontmatter.entry_count``
+        *before* this append. The new id gets ``seq = current_count + 1``.
+        """
+        if current_count < 0:
+            raise ValueError(f"current_count must be >= 0, got {current_count}")
+        return cls(prefix=prefix, date=date, seq=current_count + 1)
+
+
+# ── Entry — marker-delimited span inside a body ─────────────────────────
+
+# Filename / URL-safe id alphabet for the marker.
+_ID_PATTERN = r"[A-Za-z0-9_-]+"
+_OPEN_RE = re.compile(rf"<!-- entry:({_ID_PATTERN}) -->")
+
+
+@dataclass(frozen=True)
+class Entry:
+    """One marker-delimited entry within a markdown body.
+
+    Attributes:
+        id: Value between ``entry:`` and ``-->`` in the open marker.
+        body: Content between the open and close markers, with one leading
+            and one trailing newline removed (typical formatter output).
+        start: Offset of the opening ``<!-- entry:id -->`` in the source body.
+        end: Offset just past the closing ``<!-- /entry:id -->`` in the source.
+    """
+
+    id: str
+    body: str
+    start: int
+    end: int
+
+    def as_structured(self) -> StructuredEntry:
+        """Parse my body as audit-form and return a :class:`StructuredEntry`.
+
+        The id / body / start / end fields are preserved; the parsed
+        ``header`` / ``inline`` / ``sections`` are added on top.
+        """
+        return parse_structured_entry(self.body, _origin=self)
+
+
+def split_entries(body: str) -> list[Entry]:
+    """Scan ``body`` and return every entry in order.
+
+    Unmatched / unterminated open markers stop the scan at the first
+    such marker — partial entries are not returned. Callers needing
+    strict validation should layer a dedicated check on top.
+    """
+    entries: list[Entry] = []
+    pos = 0
+    while True:
+        open_match = _OPEN_RE.search(body, pos)
+        if open_match is None:
+            break
+        entry_id = open_match.group(1)
+        close_match = _close_re_for(entry_id).search(body, open_match.end())
+        if close_match is None:
+            # Unterminated entry — abort further scanning.
+            break
+        entries.append(
+            Entry(
+                id=entry_id,
+                body=_strip_one_newline(body[open_match.end() : close_match.start()]),
+                start=open_match.start(),
+                end=close_match.end(),
+            )
+        )
+        pos = close_match.end()
+    return entries
+
+
+def find_entry(body: str, entry_id: str) -> Entry | None:
+    """Find the first entry with ``entry_id``, or ``None``."""
+    open_re = re.compile(rf"<!-- entry:{re.escape(entry_id)} -->")
+    open_match = open_re.search(body)
+    if open_match is None:
+        return None
+    close_match = _close_re_for(entry_id).search(body, open_match.end())
+    if close_match is None:
+        return None
+    return Entry(
+        id=entry_id,
+        body=_strip_one_newline(body[open_match.end() : close_match.start()]),
+        start=open_match.start(),
+        end=close_match.end(),
+    )
+
+
+def _close_re_for(entry_id: str) -> re.Pattern[str]:
+    """Build the close-marker regex for a specific id."""
+    return re.compile(rf"<!-- /entry:{re.escape(entry_id)} -->")
+
+
+def _strip_one_newline(text: str) -> str:
+    """Strip one leading and one trailing newline (typical formatter padding)."""
+    if text.startswith("\r\n"):
+        text = text[2:]
+    elif text.startswith("\n"):
+        text = text[1:]
+    if text.endswith("\r\n"):
+        text = text[:-2]
+    elif text.endswith("\n"):
+        text = text[:-1]
+    return text
+
+
+# ── StructuredEntry — Entry + parsed audit-form fields ──────────────────
+
+# H2 line: ``## <header>``.
+_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
+# Inline field: ``**key**: value``. Anchored to line start so a stray
+# ``**emphasis**`` mid-paragraph isn't mistaken for a field.
+_INLINE_RE = re.compile(
+    r"^\*\*(?P<key>[^*\n]+?)\*\*:\s*(?P<value>.*?)\s*$",
+    re.MULTILINE,
+)
+# H3 line: ``### Title``.
+_H3_RE = re.compile(r"^###\s+(.+?)\s*$", re.MULTILINE)
+
+
+@dataclass(frozen=True)
+class StructuredEntry(Entry):
+    """:class:`Entry` whose body has been parsed as audit-form data.
+
+    Inherits ``id`` / ``body`` / ``start`` / ``end`` from :class:`Entry`
+    (zeroed when built from a raw body string with no marker context)
+    and adds three parsed views of the body: the optional H2 header,
+    the inline ``**key**: value`` map, and the ``### Title`` sections.
+
+    Audit-form values are strings only; type coercion is the caller's
+    job (a strong-typed model lives in the writer / index).
+    """
+
+    header: str | None = None
+    inline: dict[str, str] = field(default_factory=dict)
+    sections: dict[str, str] = field(default_factory=dict)
+
+
+def render_structured_entry(
+    *,
+    header: str | None = None,
+    inline: Mapping[str, object] | None = None,
+    sections: Mapping[str, str] | None = None,
+) -> str:
+    """Render an audit-form entry body.
+
+    Args:
+        header: Optional H2 line at the top (typically the entry id —
+            redundant with the marker but useful for plain-text grep).
+        inline: ``{key: value}`` rendered as ``**key**: value``. Values
+            are stringified: ``list``/``tuple`` become ``[a, b, c]``;
+            ``None`` becomes the empty string; everything else uses
+            ``str()``.
+        sections: ``{title: body}`` rendered as ``### Title`` plus the
+            body text. Title is verbatim; body's trailing whitespace is
+            stripped.
+
+    Returns:
+        The rendered string, no trailing newline (the caller — typically
+        :meth:`MarkdownWriter.append_entry` — handles markers + newlines).
+    """
+    inline = inline or {}
+    sections = sections or {}
+    lines: list[str] = []
+
+    if header:
+        lines.append(f"## {header}")
+        lines.append("")
+
+    for key, value in inline.items():
+        lines.append(f"**{key}**: {_render_value(value)}")
+
+    for title, body in sections.items():
+        lines.append("")
+        lines.append(f"### {title}")
+        lines.append(body.rstrip())
+
+    return "\n".join(lines)
+
+
+def parse_structured_entry(
+    body: str, *, _origin: Entry | None = None
+) -> StructuredEntry:
+    """Parse an audit-form entry body. Strings only — no type coercion.
+
+    Tolerant of:
+
+    - missing H2 (``header`` will be ``None``)
+    - inline fields appearing before, between or after sections
+      (only matches before the first H3 are taken as the inline block)
+    - extra whitespace and stray lines (silently kept inside the
+      enclosing section's body)
+
+    When called via :meth:`Entry.as_structured`, the ``_origin`` Entry
+    contributes its ``id`` / ``start`` / ``end``; otherwise those fall
+    back to ``""`` / ``0`` / ``len(body)``.
+
+    Returns:
+        :class:`StructuredEntry` with everything as strings.
+    """
+    text = body.strip("\n")
+
+    # Split on H3 lines.
+    parts = _H3_RE.split(text)
+    head = parts[0]
+    sections_dict: dict[str, str] = {}
+    for i in range(1, len(parts), 2):
+        title = parts[i].strip()
+        content = parts[i + 1] if i + 1 < len(parts) else ""
+        sections_dict[title] = content.strip("\n").rstrip()
+
+    header: str | None = None
+    h2 = _H2_RE.search(head)
+    if h2:
+        header = h2.group(1).strip()
+
+    inline_dict: dict[str, str] = {
+        m.group("key").strip(): m.group("value").strip()
+        for m in _INLINE_RE.finditer(head)
+    }
+
+    if _origin is not None:
+        return StructuredEntry(
+            id=_origin.id,
+            body=_origin.body,
+            start=_origin.start,
+            end=_origin.end,
+            header=header,
+            inline=inline_dict,
+            sections=sections_dict,
+        )
+    return StructuredEntry(
+        id="",
+        body=body,
+        start=0,
+        end=len(body),
+        header=header,
+        inline=inline_dict,
+        sections=sections_dict,
+    )
+
+
+def _render_value(value: object) -> str:
+    """Stringify an inline value the audit-friendly way."""
+    if value is None:
+        return ""
+    if isinstance(value, list | tuple):
+        return "[" + ", ".join(str(item) for item in value) + "]"
+    return str(value)
--- a/src/everos/core/persistence/markdown/frontmatter.py
+++ b/src/everos/core/persistence/markdown/frontmatter.py
@ -0,0 +1,300 @@
+"""Frontmatter — YAML block parse / dump + L1 schema chassis.
+
+Frontmatter is the leading ``---``-delimited YAML block at the top of
+a markdown document::
+
+    ---
+    title: Hello
+    tags: [a, b]
+    ---
+    # Body starts here
+
+Two complementary surfaces live here:
+
+1. :func:`parse_frontmatter` / :func:`dump_frontmatter` — schema-free
+   YAML helpers (``yaml.safe_load`` / ``yaml.safe_dump``,
+   ``sort_keys=False`` so caller-controlled key order is preserved).
+
+2. The L1 chassis classes — :class:`BaseFrontmatter`,
+   :class:`UserScopedFrontmatter`, :class:`AgentScopedFrontmatter` —
+   which fix the *absolute-readonly* fields (``id`` / ``type`` /
+   ``schema_version``) plus scope (``user_id`` / ``agent_id`` +
+   ``track``). Every business frontmatter schema in
+   ``infra/persistence/markdown/mds/`` subclasses one of these.
+
+Concrete business schemas (``UserMemcellDailyFrontmatter``,
+``SkillFrontmatter``, …) live in ``infra``; they add per-record
+business fields plus the path-resolution metadata daily-log writers
+need (``ENTRY_ID_PREFIX`` / ``DIR_NAME`` / ``FILE_PREFIX``).
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, ClassVar, Literal
+
+import yaml
+from pydantic import BaseModel, ConfigDict
+
+# ── YAML helpers ────────────────────────────────────────────────────────
+
+_DELIM = "---"
+
+
+def parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
+    """Parse a leading ``---\\n...\\n---\\n`` YAML block.
+
+    Returns:
+        (meta, remainder): ``meta`` is the parsed YAML mapping (empty dict
+        if no frontmatter present, malformed, or non-mapping). ``remainder``
+        is everything after the closing delimiter line — including the body's
+        leading content as-is.
+
+    Notes:
+        - If the document does not start with ``---``, returns ``({}, text)``
+          unchanged.
+        - If a closing ``---`` line is not found, returns ``({}, text)``.
+        - If the YAML block is empty (``---\\n---\\n``), returns
+          ``({}, remainder)``.
+        - If the parsed YAML is not a mapping (e.g. a scalar list), returns
+          ``({}, text)`` — frontmatter must be a mapping.
+    """
+    if not text.startswith(_DELIM):
+        return {}, text
+
+    # Skip the opening "---" and the newline that must follow it.
+    rest = text[len(_DELIM) :]
+    if rest.startswith("\r\n"):
+        rest = rest[2:]
+    elif rest.startswith("\n"):
+        rest = rest[1:]
+    else:
+        # Opening "---" not followed by a newline → not a valid frontmatter.
+        return {}, text
+
+    closing_idx = _find_closing_delim(rest)
+    if closing_idx is None:
+        return {}, text
+
+    yaml_block = rest[:closing_idx]
+    remainder = rest[closing_idx + len(_DELIM) :]
+    # Drop the newline that follows the closing delimiter, if any.
+    if remainder.startswith("\r\n"):
+        remainder = remainder[2:]
+    elif remainder.startswith("\n"):
+        remainder = remainder[1:]
+
+    parsed: Any = yaml.safe_load(yaml_block) if yaml_block.strip() else {}
+    if parsed is None:
+        parsed = {}
+    if not isinstance(parsed, dict):
+        return {}, text
+    return parsed, remainder
+
+
+def dump_frontmatter(meta: Mapping[str, Any]) -> str:
+    """Render a mapping as a ``---\\n<yaml>\\n---\\n`` block.
+
+    An empty mapping yields the empty string (no delimiters). The YAML
+    payload preserves caller-supplied key order (``sort_keys=False``).
+    """
+    if not meta:
+        return ""
+    yaml_block = yaml.safe_dump(
+        dict(meta),
+        sort_keys=False,
+        allow_unicode=True,
+        default_flow_style=False,
+    )
+    return f"{_DELIM}\n{yaml_block}{_DELIM}\n"
+
+
+def _find_closing_delim(text: str) -> int | None:
+    """Find the offset of a line that is exactly ``---``.
+
+    A "line" is text between two newlines (or string boundaries).
+    Returns the offset of the first character of the matching line, or
+    ``None`` if no such line exists.
+    """
+    pos = 0
+    while pos < len(text):
+        nl = text.find("\n", pos)
+        line = text[pos:nl] if nl != -1 else text[pos:]
+        if line.rstrip("\r") == _DELIM:
+            return pos
+        if nl == -1:
+            return None
+        pos = nl + 1
+    return None
+
+
+# ── L1 schema chassis ───────────────────────────────────────────────────
+
+
+class BaseFrontmatter(BaseModel):
+    """L1 fields every markdown frontmatter must carry.
+
+    These match the *absolute-readonly* tier in the EverOS Markdown First
+    spec — they identify the record across markdown ↔ LanceDB and must
+    never be rewritten by a human edit.
+
+    Subclasses add scope (``UserScopedFrontmatter`` /
+    ``AgentScopedFrontmatter``) plus per-record business fields.
+    """
+
+    SCOPE_DIR: ClassVar[str] = ""
+    """Top-level directory under the memory-root that holds this kind.
+
+    Scope mixins set this to ``"users"`` / ``"agents"``. Scope-agnostic
+    schemas (rare) leave it empty; consumers that need to resolve a path
+    (writers, layout reverse-lookup) must reject schemas with empty
+    ``SCOPE_DIR``.
+    """
+
+    id: str
+    type: str
+    schema_version: int = 1
+
+    # Permit additional fields so L2 system-managed metadata
+    # (``md_sha256``, ``last_indexed_at``, ``lsn``, …) can ride along on
+    # the same model without forcing every subclass to redeclare them.
+    model_config = ConfigDict(extra="allow")
+
+    @classmethod
+    def path_glob(cls) -> str:
+        """Return an ``fnmatch``-style glob (relative to memory-root)
+        covering every markdown file this schema describes.
+
+        Used by the cascade kind registry — the scanner walks every kind's
+        ``path_glob()`` to enumerate eligible files without hard-coding
+        path patterns in cascade. The schema is the single source of truth
+        for both the writer's path resolution and the scanner's enumeration.
+
+        Subclasses must override — typically by mixing in
+        :class:`DailyLogPathMixin` or :class:`SkillPathMixin` *before* the
+        scope mixin in the MRO so this abstract version is shadowed.
+        """
+        raise NotImplementedError(
+            f"{cls.__name__} must declare path_glob() "
+            f"(mix in DailyLogPathMixin / SkillPathMixin, or override directly)"
+        )
+
+
+class DailyLogPathMixin:
+    """Path strategy for daily-log files.
+
+    Files live at ``<SCOPE_DIR>/<scope_id>/<DIR_NAME>/<FILE_PREFIX>-<YYYY-MM-DD>.md``.
+    Subclasses must inherit a scope mixin (``UserScopedFrontmatter`` /
+    ``AgentScopedFrontmatter``) supplying ``SCOPE_DIR``, and must declare
+    their own ``DIR_NAME`` / ``FILE_PREFIX`` ClassVars.
+
+    Place **this mixin first** so Python's MRO resolves ``path_glob()`` to
+    the mixin's concrete implementation rather than
+    :meth:`BaseFrontmatter.path_glob`'s ``NotImplementedError`` stub::
+
+        class EpisodeDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
+            DIR_NAME: ClassVar[str] = "episodes"
+            FILE_PREFIX: ClassVar[str] = "episode"
+            ...
+    """
+
+    DIR_NAME: ClassVar[str]
+    FILE_PREFIX: ClassVar[str]
+    SCOPE_DIR: ClassVar[str]
+
+    @classmethod
+    def path_glob(cls) -> str:
+        # Leading ``*/*/`` matches the <app>/<project> scope prefix that
+        # precedes every user-visible dir; the scanner's ``root.glob`` is
+        # anchored at root, so the prefix is mandatory (without it nothing
+        # matches), and the watcher's right-anchored ``PurePosixPath.match``
+        # agrees on the same shape.
+        return f"*/*/{cls.SCOPE_DIR}/*/{cls.DIR_NAME}/{cls.FILE_PREFIX}-*.md"
+
+
+class SkillPathMixin:
+    """Path strategy for skill-directory files.
+
+    Each skill lives at ``<SCOPE_DIR>/<scope_id>/<SKILLS_CONTAINER_NAME>/
+    <SKILL_DIR_PREFIX><skill_name>/<SKILL_MAIN_FILENAME>``. The glob covers
+    every skill's main file; sibling ``references/*.md`` and ``scripts/*``
+    are excluded (they ride alongside the main file and the cascade
+    daemon rebuilds the index column by concatenation, see
+    :class:`AgentSkillFrontmatter`'s docstring).
+
+    Place **this mixin first** so MRO resolves ``path_glob()`` here::
+
+        class AgentSkillFrontmatter(SkillPathMixin, AgentScopedFrontmatter):
+            SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
+            SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
+            SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
+            ...
+    """
+
+    SKILLS_CONTAINER_NAME: ClassVar[str]
+    SKILL_DIR_PREFIX: ClassVar[str]
+    SKILL_MAIN_FILENAME: ClassVar[str]
+    SCOPE_DIR: ClassVar[str]
+
+    @classmethod
+    def path_glob(cls) -> str:
+        # Leading ``*/*/`` matches the <app>/<project> scope prefix.
+        return (
+            f"*/*/{cls.SCOPE_DIR}/*/{cls.SKILLS_CONTAINER_NAME}/"
+            f"{cls.SKILL_DIR_PREFIX}*/{cls.SKILL_MAIN_FILENAME}"
+        )
+
+
+class ProfilePathMixin:
+    """Path strategy for single-file profile markdown.
+
+    Profiles live at ``<SCOPE_DIR>/<scope_id>/<PROFILE_FILENAME>`` —
+    one fixed-name file directly under the scope's owner directory, no
+    intermediate ``<dir>/`` segment (unlike daily-logs) and no per-name
+    subdir (unlike skills). Subclasses must inherit a scope mixin
+    (``UserScopedFrontmatter`` / ``AgentScopedFrontmatter``) supplying
+    ``SCOPE_DIR`` and declare their own ``PROFILE_FILENAME``.
+
+    Place **this mixin first** so MRO resolves ``path_glob()`` here::
+
+        class UserProfileFrontmatter(ProfilePathMixin, UserScopedFrontmatter):
+            PROFILE_FILENAME: ClassVar[str] = "user.md"
+            ...
+    """
+
+    PROFILE_FILENAME: ClassVar[str]
+    SCOPE_DIR: ClassVar[str]
+
+    @classmethod
+    def path_glob(cls) -> str:
+        # Leading ``*/*/`` matches the <app>/<project> scope prefix.
+        return f"*/*/{cls.SCOPE_DIR}/*/{cls.PROFILE_FILENAME}"
+
+
+class UserScopedFrontmatter(BaseFrontmatter):
+    """Records that belong to a single user (track = ``user``).
+
+    The frontmatter only carries the *file-level* scope (``user_id``,
+    which the path itself already expresses); business attributes like
+    ``group_id`` live inside each entry's structured body — see
+    :class:`StructuredEntry` in :mod:`.entries`.
+    """
+
+    SCOPE_DIR: ClassVar[str] = "users"
+
+    user_id: str
+    track: Literal["user"] = "user"
+
+
+class AgentScopedFrontmatter(BaseFrontmatter):
+    """Records that belong to a single agent (track = ``agent``).
+
+    Same scope-vs-business split as :class:`UserScopedFrontmatter`:
+    ``agent_id`` is the file-level scope; ``group_id`` etc. ride on
+    each entry, not on the file frontmatter.
+    """
+
+    SCOPE_DIR: ClassVar[str] = "agents"
+
+    agent_id: str
+    track: Literal["agent"] = "agent"
--- a/src/everos/core/persistence/markdown/parsed.py
+++ b/src/everos/core/persistence/markdown/parsed.py
@ -0,0 +1,31 @@
+"""Parsed-markdown data type.
+
+The output shape of :class:`MarkdownReader` is held here, separate
+from the reader implementation: callers that only consume parse
+results don't need to import the reader machinery, and downstream
+modules (writer, business readers) can produce :class:`ParsedMarkdown`
+without going through ``MarkdownReader.read`` if they already hold
+the pieces.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from .entries import Entry
+
+
+@dataclass(frozen=True)
+class ParsedMarkdown:
+    """A markdown document after parsing.
+
+    Attributes:
+        frontmatter: Parsed YAML mapping (empty dict if no frontmatter block).
+        body: Document text after the frontmatter block; not entry-stripped.
+        entries: Marker-delimited entries discovered inside ``body``.
+    """
+
+    frontmatter: dict[str, Any]
+    body: str
+    entries: list[Entry] = field(default_factory=list)
--- a/src/everos/core/persistence/markdown/reader.py
+++ b/src/everos/core/persistence/markdown/reader.py
@ -0,0 +1,42 @@
+"""Markdown file reader.
+
+Loads a markdown document and splits it into:
+
+    1. ``frontmatter`` — parsed YAML (empty dict if absent)
+    2. ``body`` — raw text after the closing ``---`` delimiter
+    3. ``entries`` — marker-delimited spans inside ``body``
+
+The reader is purely parsing; it does not validate frontmatter shape,
+entry content, or cross-references. Higher layers add business-aware
+checks. The :class:`ParsedMarkdown` data type lives in :mod:`.parsed`.
+
+``parse`` is sync (pure in-memory string processing). ``read`` is async
+and uses :class:`anyio.Path` so file I/O does not block the event loop.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import anyio
+
+from .entries import split_entries
+from .frontmatter import parse_frontmatter
+from .parsed import ParsedMarkdown
+
+
+class MarkdownReader:
+    """Parse markdown files / strings into :class:`ParsedMarkdown`."""
+
+    @staticmethod
+    def parse(text: str) -> ParsedMarkdown:
+        """Parse already-loaded text (no IO)."""
+        meta, body = parse_frontmatter(text)
+        entries = split_entries(body)
+        return ParsedMarkdown(frontmatter=meta, body=body, entries=entries)
+
+    @staticmethod
+    async def read(path: Path) -> ParsedMarkdown:
+        """Read the file at ``path`` and parse its content."""
+        text = await anyio.Path(path).read_text(encoding="utf-8")
+        return MarkdownReader.parse(text)
--- a/src/everos/core/persistence/markdown/writer.py
+++ b/src/everos/core/persistence/markdown/writer.py
@ -0,0 +1,269 @@
+"""Markdown file writer with atomic write semantics.
+
+Atomicity is provided by writing to a same-directory temp file
+(``.<name>.tmp.<uuid>``) and using :func:`os.replace` to rename it onto
+the target. Keeping the temp file in the same directory guarantees the
+rename is on the same filesystem (POSIX rename is atomic only within a
+single fs).
+
+All public methods are async. File I/O (``read_text`` / ``write_text``
+/ ``mkdir``) goes through :class:`anyio.Path`; the few syscalls without
+a native async equivalent (``os.fsync`` / ``os.replace`` / ``unlink``
+in the cleanup path) are offloaded via :func:`anyio.to_thread.run_sync`.
+
+In-process per-path locking
+---------------------------
+:meth:`append_entry` / :meth:`append_entries` are read-modify-write of
+the whole file (load frontmatter+body, merge an entry block, atomic
+write the result). The atomic write itself is safe, but the read→write
+window crosses ``await`` points. Concurrent asyncio tasks targeting the
+same path would otherwise lose-update each other (both read N entries,
+both produce N+1, second write overwrites the first → 1 entry lost).
+
+To prevent this, an in-process per-path :class:`asyncio.Lock` is held
+across the entire read-modify-write sequence. Lock objects live on the
+writer instance (not class-level) so they bind to the event loop active
+when the writer was constructed — this avoids the
+"Lock bound to different loop" failure mode that surfaces when
+pytest-asyncio rebuilds the loop between tests but module-level writer
+singletons leak Lock objects across boundaries.
+
+Process-level coordination (multi-process writers against the same
+memory-root) remains the job of
+:func:`everos.core.persistence.locking.memory_root_lock`, which uses
+``fcntl.flock``. The two locks compose: per-path async lock serialises
+tasks within one process, ``memory_root_lock`` serialises processes
+against each other.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import os
+import uuid
+from collections.abc import Mapping, Sequence
+from pathlib import Path
+from typing import Any
+
+import anyio
+
+from ..memory_root import MemoryRoot
+from .entries import EntryId
+from .frontmatter import dump_frontmatter
+from .reader import MarkdownReader
+
+
+class MarkdownWriter:
+    """Atomic writer for markdown files inside a memory-root.
+
+    The ``memory_root`` reference is held to enable future enforcement that
+    targets stay within the configured root; current writes do not depend on
+    it for the rename itself (same-dir temp file).
+    """
+
+    def __init__(self, memory_root: MemoryRoot) -> None:
+        self._memory_root = memory_root
+        # Per-path async lock registry. ``setdefault`` is GIL-atomic, so
+        # concurrent callers race only on the dict insert (resolved by
+        # ``setdefault`` returning the existing value), not on the Lock.
+        # Plain dict (not WeakValueDictionary): a Lock with pending waiters
+        # must outlive any task awaiting it; ref-counted GC would race with
+        # those waiters. See Python bpo-28427 for the WeakValueDictionary
+        # multithreading hazard that bites the weak-ref approach.
+        self._path_locks: dict[Path, asyncio.Lock] = {}
+
+    @property
+    def memory_root(self) -> MemoryRoot:
+        return self._memory_root
+
+    def lock_for(self, path: Path) -> asyncio.Lock:
+        """Return the per-path lock; create on first use.
+
+        Public so that higher-level writers (e.g. :class:`BaseDailyWriter`)
+        can serialise their own multi-step ``read → compute → write``
+        sequences against this writer's single-step ``append`` paths.
+        Pair with :meth:`_append_entries_unlocked` to avoid reentrant
+        re-acquisition of the same lock from within an already-locked
+        critical section (``asyncio.Lock`` is *not* reentrant).
+        """
+        # Resolve to an absolute canonical path so aliases (relative vs.
+        # absolute, symlinks) share the same lock object.
+        key = Path(path).resolve()
+        lock = self._path_locks.get(key)
+        if lock is None:
+            lock = asyncio.Lock()
+            self._path_locks[key] = lock
+        return lock
+
+    async def write(self, path: Path, content: str) -> Path:
+        """Atomically write ``content`` to ``path``.
+
+        Steps:
+            1. ``mkdir -p`` the parent directory.
+            2. Write to ``<parent>/.<name>.tmp.<uuid>``.
+            3. ``flush`` + ``fsync`` the temp file.
+            4. ``os.replace`` the temp file onto ``path`` (atomic on POSIX).
+
+        Returns:
+            ``path`` (resolved as written).
+        """
+        target = Path(path)
+        await anyio.Path(target.parent).mkdir(parents=True, exist_ok=True)
+        tmp = target.parent / f".{target.name}.tmp.{uuid.uuid4().hex}"
+        try:
+            await anyio.to_thread.run_sync(_write_and_fsync, tmp, content)
+            await anyio.to_thread.run_sync(os.replace, tmp, target)
+        except Exception:
+            # Best-effort cleanup of the staging file on failure.
+            await _unlink_quiet(tmp)
+            raise
+        return target
+
+    async def write_markdown(
+        self,
+        path: Path,
+        *,
+        frontmatter: Mapping[str, Any] | None = None,
+        body: str = "",
+    ) -> Path:
+        """Assemble ``frontmatter`` + ``body`` then atomic-write to ``path``."""
+        head = dump_frontmatter(frontmatter or {})
+        return await self.write(path, head + body)
+
+    async def append_entry(
+        self,
+        path: Path,
+        *,
+        entry_body: str,
+        entry_id: EntryId,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+    ) -> Path:
+        """Append a single entry block to a markdown file, merging frontmatter.
+
+        Convenience wrapper around :meth:`append_entries` for single-entry
+        callers. See that method for full semantics.
+
+        Args:
+            path: Target markdown file. Created if missing.
+            entry_body: Content between the open and close markers.
+                One leading and trailing newline are added automatically.
+            entry_id: The id to stamp on this entry. The caller normally
+                builds it with :meth:`EntryId.next_for`.
+            frontmatter_updates: Mapping shallow-merged into existing
+                frontmatter (later wins). ``None`` skips the merge.
+
+        Returns:
+            ``path`` (resolved as written).
+        """
+        return await self.append_entries(
+            path,
+            [(entry_body, entry_id)],
+            frontmatter_updates=frontmatter_updates,
+        )
+
+    async def append_entries(
+        self,
+        path: Path,
+        entries: Sequence[tuple[str, EntryId]],
+        *,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+    ) -> Path:
+        """Append ``N`` entry blocks in a single locked read-modify-write cycle.
+
+        Compared with calling :meth:`append_entry` ``N`` times, this:
+
+        * Performs one file read + one file write instead of ``N`` of each
+          (IO complexity drops from ``O(N²)`` to ``O(N)`` when the file
+          already holds many entries).
+        * Holds the per-path lock for one short critical section instead of
+          ``N`` separate acquisitions.
+        * Updates ``frontmatter`` once at the end (no intermediate
+          ``entry_count`` flapping).
+
+        The caller assigns and supplies all :class:`EntryId` values — see
+        :meth:`append_entry` for the rationale. The order in ``entries`` is
+        the order the blocks land in the file.
+
+        Args:
+            path: Target markdown file. Created if missing.
+            entries: ``(entry_body, entry_id)`` pairs to append, in order.
+                Empty sequence is allowed; the file is still touched for
+                frontmatter updates if any are supplied.
+            frontmatter_updates: Mapping shallow-merged into existing
+                frontmatter once after all entries are appended.
+
+        Returns:
+            ``path`` (resolved as written).
+        """
+        target = Path(path)
+        async with self.lock_for(target):
+            return await self._append_entries_unlocked(
+                target,
+                entries,
+                frontmatter_updates=frontmatter_updates,
+            )
+
+    async def _append_entries_unlocked(
+        self,
+        path: Path,
+        entries: Sequence[tuple[str, EntryId]],
+        *,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+    ) -> Path:
+        """Same as :meth:`append_entries` but assumes the caller already
+        holds :meth:`lock_for` ``(path)``.
+
+        For use by higher-level writers that perform a multi-step
+        ``read → compute eid → write`` sequence and need to keep the lock
+        held across the read and the write. Public ``append_entries`` /
+        ``append_entry`` always wrap this with the lock.
+
+        Reentrant re-acquisition is unsafe — ``asyncio.Lock`` is not
+        reentrant, so calling this without holding the lock yourself
+        breaks the safety contract.
+        """
+        target = Path(path)
+
+        # 1. Load existing markdown (or initialise empty).
+        if await anyio.Path(target).is_file():
+            parsed = await MarkdownReader.read(target)
+            meta: dict[str, Any] = dict(parsed.frontmatter)
+            body = parsed.body
+        else:
+            meta = {}
+            body = ""
+
+        # 2. Shallow-merge frontmatter updates.
+        if frontmatter_updates:
+            meta.update(frontmatter_updates)
+
+        # 3. Append all entry blocks in order.
+        if entries:
+            if body and not body.endswith("\n"):
+                body += "\n"
+            appended_blocks: list[str] = []
+            for entry_body, entry_id in entries:
+                eid_str = entry_id.format()
+                appended_blocks.append(
+                    f"<!-- entry:{eid_str} -->\n{entry_body}\n"
+                    f"<!-- /entry:{eid_str} -->\n"
+                )
+            body = body + "".join(appended_blocks)
+
+        # 4. Atomic write.
+        return await self.write_markdown(target, frontmatter=meta, body=body)
+
+
+def _write_and_fsync(tmp: Path, content: str) -> None:
+    """Sync helper: write + fsync the staging file. Offloaded to a thread."""
+    with open(tmp, "w", encoding="utf-8") as fh:
+        fh.write(content)
+        fh.flush()
+        os.fsync(fh.fileno())
+
+
+async def _unlink_quiet(tmp: Path) -> None:
+    """Best-effort unlink — swallow OSError so the original exception wins."""
+    with contextlib.suppress(OSError):
+        await anyio.Path(tmp).unlink(missing_ok=True)
--- a/src/everos/core/persistence/memory_root.py
+++ b/src/everos/core/persistence/memory_root.py
@ -0,0 +1,243 @@
+"""memory-root path manager.
+
+Single root directory holding all persisted memory:
+
+    User-visible (no dot prefix, edited by humans / agents):
+        agents/      per-agent records
+        users/       per-user records
+        knowledge/   global shared knowledge
+
+    System-managed (dotfile prefix, hidden by default in ls / Finder):
+        .index/             derived indexes (rebuildable from markdown)
+            sqlite/         system.db (+ WAL/SHM), ome.db, ome.aps.db
+            lancedb/        LanceDB tables
+        .tmp/               atomic-write staging directory
+        .lock               single-process lock anchor (created on demand by
+                            ``memory_root_lock``)
+
+    User-editable (at the root):
+        ome.toml            OME strategy overrides (hot-reloaded)
+
+The cascade queue, LSN watermark, and change audit all live in
+``system.db`` (table ``md_change_state``), not in separate dotfiles.
+
+The default location and tunables come from :class:`everos.config.Settings`
+(loaded from ``config/default.toml`` + ``EVEROS_*`` environment variables);
+:meth:`MemoryRoot.default` resolves the configured path.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+
+# ── app / project directory-name convention ──────────────────────────────────
+#
+# A memory root is partitioned by ``<app>/<project>`` *before* the user-visible
+# scope dirs (``agents`` / ``users`` / ``knowledge``), so memory for different
+# (app, project) pairs never shares a directory. The reserved id ``"default"``
+# materialises as ``default_app`` / ``default_project`` on disk (rather than a
+# bare ``default``) so a default space is visually distinct from a user-named
+# directory; every other id maps to itself.
+#
+# The mapping is symmetric: the cascade path parser reverses it (see
+# :func:`app_id_from_dir`) to recover the ids from an on-disk path. The write
+# side (here) and the read side (cascade) MUST stay in lockstep, or rebuilt
+# rows carry app/project that disagree with what was written. ``default_app`` /
+# ``default_project`` are therefore reserved directory names.
+_DEFAULT_SCOPE_ID = "default"
+_DEFAULT_APP_DIR = "default_app"
+_DEFAULT_PROJECT_DIR = "default_project"
+
+# Path to the shipped OME override template; copied to ``<root>/ome.toml`` on
+# first ``ensure()`` so users have a real file to edit instead of having to
+# create one from scratch. ``parents[2]`` is the ``src/everos/`` package root
+# (memory_root.py sits at ``core/persistence/memory_root.py``).
+_OME_TEMPLATE_PATH = Path(__file__).parents[2] / "config" / "default_ome.toml"
+
+
+def app_dir_name(app_id: str) -> str:
+    """Map an ``app_id`` to its on-disk directory name."""
+    return _DEFAULT_APP_DIR if app_id == _DEFAULT_SCOPE_ID else app_id
+
+
+def project_dir_name(project_id: str) -> str:
+    """Map a ``project_id`` to its on-disk directory name."""
+    return _DEFAULT_PROJECT_DIR if project_id == _DEFAULT_SCOPE_ID else project_id
+
+
+def app_id_from_dir(dir_name: str) -> str:
+    """Inverse of :func:`app_dir_name` — recover the ``app_id`` from a dir name."""
+    return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_APP_DIR else dir_name
+
+
+def project_id_from_dir(dir_name: str) -> str:
+    """Inverse of :func:`project_dir_name` — recover the ``project_id``."""
+    return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_PROJECT_DIR else dir_name
+
+
+@dataclass(frozen=True, init=False)
+class MemoryRoot:
+    """Path manager for a memory-root directory.
+
+    Constructor accepts any path-like (``str`` or ``Path``); it is normalised
+    to an absolute, resolved ``Path`` so equality and hashing are stable
+    regardless of how the caller spells the path. ``init=False`` is paired
+    with a hand-written ``__init__`` so the input type (``Path | str``) is
+    decoupled from the stored field type (``Path``) — stdlib dataclass has
+    no converter slot, and Pyright would otherwise reject ``MemoryRoot(s)``
+    where ``s`` is a ``str``.
+    """
+
+    root: Path
+
+    def __init__(self, root: Path | str) -> None:
+        # ``frozen=True`` forbids attribute assignment, so go through
+        # ``object.__setattr__`` to install the normalised Path field.
+        resolved = Path(root).expanduser().resolve()
+        object.__setattr__(self, "root", resolved)
+
+    @classmethod
+    def default(cls) -> MemoryRoot:
+        """Return the memory-root from :class:`everos.config.Settings`.
+
+        The effective default lives in ``config/default.toml`` (``[memory]
+        root``); environment variable ``EVEROS_MEMORY__ROOT`` overrides it.
+        """
+        # Lazy import to keep this module dependency-free at import time.
+        from everos.config import load_settings
+
+        return cls(load_settings().memory.root)
+
+    # ── User-visible (partitioned by app / project) ──────────────────────────
+    #
+    # These take ``(app_id, project_id)`` because the scope dirs hang off the
+    # ``<root>/<app>/<project>/`` prefix; they are request-level inputs, never
+    # instance state. Both default to ``"default"`` so call sites that don't
+    # yet carry scope still resolve to the default space.
+
+    def agents_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
+        """``<root>/<app>/<project>/agents/`` — per-agent records."""
+        return (
+            self.root / app_dir_name(app_id) / project_dir_name(project_id) / "agents"
+        )
+
+    def users_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
+        """``<root>/<app>/<project>/users/`` — per-user records."""
+        return self.root / app_dir_name(app_id) / project_dir_name(project_id) / "users"
+
+    def knowledge_dir(
+        self, app_id: str = "default", project_id: str = "default"
+    ) -> Path:
+        """``<root>/<app>/<project>/knowledge/`` — shared knowledge."""
+        return (
+            self.root
+            / app_dir_name(app_id)
+            / project_dir_name(project_id)
+            / "knowledge"
+        )
+
+    # ── System-managed (dotfiles) ───────────────────────────────────────────
+
+    @property
+    def index_dir(self) -> Path:
+        """``<root>/.index/`` — derived index root."""
+        return self.root / ".index"
+
+    @property
+    def lancedb_dir(self) -> Path:
+        """``<root>/.index/lancedb/`` — LanceDB table root."""
+        return self.index_dir / "lancedb"
+
+    @property
+    def sqlite_dir(self) -> Path:
+        """``<root>/.index/sqlite/`` — SQLite system DB root.
+
+        Holds ``system.db`` plus its sidecars (``-wal`` / ``-shm`` in WAL
+        mode). Symmetric with :attr:`lancedb_dir`.
+        """
+        return self.index_dir / "sqlite"
+
+    @property
+    def system_db(self) -> Path:
+        """``<root>/.index/sqlite/system.db`` — SQLite DB for system
+        state, audit log, task queue, LSN watermark, and other metadata.
+        """
+        return self.sqlite_dir / "system.db"
+
+    @property
+    def ome_db(self) -> Path:
+        """``<root>/.index/sqlite/ome.db`` — SQLite DB backing the Offline
+        Memory Engine's own state: run records, counter store, idle store.
+        Symmetric with :attr:`system_db`.
+        """
+        return self.sqlite_dir / "ome.db"
+
+    @property
+    def ome_aps_db(self) -> Path:
+        """``<root>/.index/sqlite/ome.aps.db`` — SQLite DB holding the
+        APScheduler jobstore for the Offline Memory Engine. Split from
+        :attr:`ome_db` so APS's sync SQLAlchemy writer and OME's async
+        aiosqlite writer never contend for the same sqlite file lock.
+        """
+        return self.sqlite_dir / "ome.aps.db"
+
+    @property
+    def ome_config(self) -> Path:
+        """``<root>/ome.toml`` — user-editable OME strategy overrides.
+
+        Drop a file here to toggle strategies on/off or tweak per-strategy
+        knobs (max_retries, gate, cron …) without restarting the server.
+        The engine watches this file and hot-reloads changes within ~2 s.
+
+        Example to disable foresight and user-profile extraction::
+
+            [strategies.extract_foresight]
+            enabled = false
+
+            [strategies.extract_user_profile]
+            enabled = false
+        """
+        return self.root / "ome.toml"
+
+    @property
+    def lock_file(self) -> Path:
+        """``<root>/.lock`` — single-process exclusive lock anchor."""
+        return self.root / ".lock"
+
+    @property
+    def tmp_dir(self) -> Path:
+        """``<root>/.tmp/`` — staging directory for batch / multi-step writes.
+
+        Note:
+            ``MarkdownWriter`` does *not* use this for atomic single-file
+            writes; it uses a same-directory temp file to guarantee a
+            same-filesystem rename. This directory is reserved for callers
+            that need scratch space outside any single target directory.
+        """
+        return self.root / ".tmp"
+
+    # ── Operations ──────────────────────────────────────────────────────────
+
+    def ensure(self) -> None:
+        """Create the memory-root and the runtime-required dotfile dirs.
+
+        User-visible directories (``agents/`` / ``users/`` / ``knowledge/``)
+        are *not* pre-created — they appear on first write of their records.
+        Only directories the runtime infrastructure requires are made:
+
+            <root>/
+            <root>/.index/
+            <root>/.index/sqlite/
+            <root>/.index/lancedb/
+            <root>/.tmp/
+        """
+        self.root.mkdir(parents=True, exist_ok=True)
+        self.index_dir.mkdir(parents=True, exist_ok=True)
+        self.sqlite_dir.mkdir(parents=True, exist_ok=True)
+        self.lancedb_dir.mkdir(parents=True, exist_ok=True)
+        self.tmp_dir.mkdir(parents=True, exist_ok=True)
+        # Materialize the OME override template on first run; existence-only
+        # check preserves any edits the user has already made.
+        if not self.ome_config.exists():
+            self.ome_config.write_bytes(_OME_TEMPLATE_PATH.read_bytes())
--- a/src/everos/core/persistence/sqlite/init.py
+++ b/src/everos/core/persistence/sqlite/init.py
@ -0,0 +1,42 @@
+"""SQLite async persistence (SQLModel + SQLAlchemy 2.0 + aiosqlite).
+
+External usage (engine + sessions):
+    from everos.core.persistence.sqlite import (
+        create_system_engine, create_session_factory, session_scope,
+    )
+
+External usage (ORM model basics — re-exported from sqlmodel):
+    from everos.core.persistence.sqlite import (
+        SQLModel, Field, Relationship, BaseTable,
+    )
+
+External usage (generic CRUD repository base):
+    from everos.core.persistence.sqlite import RepoBase
+
+The ``system_db`` is the everos
+``<memory_root>/.index/sqlite/system.db`` SQLite file holding system
+state, audit log, task queue, LSN watermark, and other metadata.
+"""
+
+# Re-export key sqlmodel symbols so business code has a single canonical
+# entry point (``everos.core.persistence.sqlite``) for ORM authoring.
+from sqlmodel import Field as Field
+from sqlmodel import Relationship as Relationship
+from sqlmodel import SQLModel as SQLModel
+
+from .base import BaseTable as BaseTable
+from .engine import create_system_engine as create_system_engine
+from .repository import RepoBase as RepoBase
+from .session import create_session_factory as create_session_factory
+from .session import session_scope as session_scope
+
+__all__ = [
+    "BaseTable",
+    "Field",
+    "Relationship",
+    "RepoBase",
+    "SQLModel",
+    "create_session_factory",
+    "create_system_engine",
+    "session_scope",
+]
--- a/src/everos/core/persistence/sqlite/base.py
+++ b/src/everos/core/persistence/sqlite/base.py
@ -0,0 +1,112 @@
+"""Common SQLModel base for everos tables.
+
+:class:`BaseTable` adds ``created_at`` / ``updated_at`` columns. The
+``updated_at`` column auto-refreshes on UPDATE through SA's ``onupdate``
+hook (no explicit assignment needed in business code).
+
+The **two-zone storage-UTC discipline** is enforced by a SQLAlchemy
+:class:`TypeDecorator` (:class:`UtcDateTimeColumn`) used as the SQL
+column type for every datetime field:
+
+* **on write** — ``process_bind_param`` converts every datetime to
+  aware UTC before SQLAlchemy emits the bound parameter. This covers
+  *every* SQLAlchemy write path uniformly:
+
+  - ORM ``session.add()`` / ``session.merge()`` (unit-of-work flush)
+  - Core ``session.execute(insert(...).values(...))``
+  - Core ``session.execute(update(...).values(...))``
+  - Bulk ``bulk_insert_mappings`` / ``bulk_save_objects``
+  - Raw SQL with bound parameters
+
+  Reaching into the column type is the only place SQLAlchemy guarantees
+  *every* write path passes through. Mapper events (``before_insert`` /
+  ``before_update``) only fire on the ORM unit-of-work path and would
+  silently miss Core statements — which :mod:`everos.infra.persistence
+  .sqlite.repos.md_change_state` uses heavily.
+
+* **on read** — ``process_result_value`` re-attaches ``tzinfo=UTC`` to
+  every naive datetime returned from SQLite (which has no native tz
+  storage and always returns naive). Callers therefore never observe a
+  naive datetime regardless of which read API they use.
+
+Subclass with ``table=True`` to declare a real SQLite table::
+
+    from sqlmodel import Field
+
+    class Sender(BaseTable, table=True):
+        id: int | None = Field(default=None, primary_key=True)
+        name: str
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import Any
+
+from sqlalchemy import DateTime
+from sqlalchemy import types as sa_types
+from sqlmodel import Field, SQLModel
+
+from everos.component.utils.datetime import UtcDatetime, ensure_utc, get_utc_now
+
+
+class UtcDateTimeColumn(sa_types.TypeDecorator[_dt.datetime]):
+    """SQLAlchemy column type enforcing storage-UTC on every read/write.
+
+    Implementation:
+
+    * ``impl = DateTime`` — uses the dialect's standard DateTime SQL type
+      (TEXT ISO-8601 on SQLite; ``TIMESTAMP`` on Postgres etc.).
+    * ``process_bind_param`` — write hook. Awares → ``astimezone(UTC)``;
+      naives → assumed already UTC (storage-boundary convention; see
+      :func:`ensure_utc` docstring); ``None`` passes through.
+    * ``process_result_value`` — read hook. Naive ``datetime`` →
+      ``replace(tzinfo=UTC)``; aware passes through unchanged.
+
+    ``cache_ok = True`` — SQLAlchemy can safely cache statement
+    compilations using this type (no per-instance mutable state).
+    """
+
+    impl = DateTime
+    cache_ok = True
+
+    def process_bind_param(
+        self, value: _dt.datetime | None, _dialect: Any
+    ) -> _dt.datetime | None:
+        if value is None:
+            return None
+        if not isinstance(value, _dt.datetime):
+            return value
+        return ensure_utc(value)
+
+    def process_result_value(
+        self, value: _dt.datetime | None, _dialect: Any
+    ) -> _dt.datetime | None:
+        if value is None:
+            return None
+        if isinstance(value, _dt.datetime) and value.tzinfo is None:
+            return value.replace(tzinfo=_dt.UTC)
+        return value
+
+
+class BaseTable(SQLModel):
+    """Mixin providing ``created_at`` / ``updated_at`` columns.
+
+    Both default to :func:`get_utc_now` on INSERT.
+    ``updated_at`` is auto-refreshed by SQLAlchemy on every UPDATE via the
+    ``onupdate`` hook — do not set it manually unless overriding intentionally.
+
+    Both columns use :class:`UtcDateTimeColumn` as the SQL column type
+    so storage-UTC is enforced **at the SQLAlchemy bind layer** on every
+    write path (ORM + Core + bulk + raw bound params).
+    """
+
+    created_at: UtcDatetime = Field(
+        default_factory=get_utc_now,
+        sa_type=UtcDateTimeColumn,
+    )
+    updated_at: UtcDatetime = Field(
+        default_factory=get_utc_now,
+        sa_type=UtcDateTimeColumn,
+        sa_column_kwargs={"onupdate": get_utc_now},
+    )
--- a/src/everos/core/persistence/sqlite/engine.py
+++ b/src/everos/core/persistence/sqlite/engine.py
@ -0,0 +1,74 @@
+"""Async SQLAlchemy engine factory + per-connection PRAGMA listener.
+
+The engine connects through ``aiosqlite`` (SA URL ``sqlite+aiosqlite://``).
+PRAGMAs are *per-connection* — they must be re-applied every time the
+SA pool opens a new connection. We attach a ``connect`` event listener on
+the engine's underlying sync engine for that purpose.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from sqlalchemy import event
+from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
+
+from everos.config import SqliteSettings
+
+
+def create_system_engine(
+    db_path: Path,
+    sqlite_settings: SqliteSettings,
+    *,
+    echo: bool = False,
+) -> AsyncEngine:
+    """Create an async SQLAlchemy engine for the everos system DB.
+
+    ``MemoryRoot.system_db`` is the conventional path; the DB holds system
+    state, audit log, task queue, LSN watermark, and other metadata.
+
+    Args:
+        db_path: Filesystem path to the system DB file. Parent directory is
+            created if missing.
+        sqlite_settings: Tunables (journal_mode, synchronous, foreign_keys,
+            temp_store, busy_timeout, journal_size_limit, cache_size).
+        echo: When ``True``, SQLAlchemy logs every statement (development).
+
+    Returns:
+        An :class:`AsyncEngine` ready for use with :class:`AsyncSession`.
+    """
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Three slashes = relative path; four slashes = absolute. ``str(db_path)``
+    # of an absolute Path begins with ``/`` so the f-string yields four.
+    url = f"sqlite+aiosqlite:///{db_path}"
+    engine = create_async_engine(url, echo=echo, future=True)
+
+    _register_pragma_listener(engine, sqlite_settings)
+    return engine
+
+
+def _register_pragma_listener(
+    engine: AsyncEngine,
+    sqlite_settings: SqliteSettings,
+) -> None:
+    """Attach a ``connect`` listener that applies PRAGMAs on every new connection."""
+
+    @event.listens_for(engine.sync_engine, "connect")
+    def _apply_pragmas(dbapi_connection, _connection_record) -> None:  # type: ignore[no-untyped-def]
+        cursor = dbapi_connection.cursor()
+        try:
+            cursor.execute(f"PRAGMA journal_mode={sqlite_settings.journal_mode}")
+            cursor.execute(f"PRAGMA synchronous={sqlite_settings.synchronous}")
+            cursor.execute(
+                f"PRAGMA foreign_keys={'ON' if sqlite_settings.foreign_keys else 'OFF'}"
+            )
+            cursor.execute(f"PRAGMA temp_store={sqlite_settings.temp_store}")
+            cursor.execute(f"PRAGMA busy_timeout={sqlite_settings.busy_timeout_ms}")
+            cursor.execute(
+                f"PRAGMA journal_size_limit={sqlite_settings.journal_size_limit_bytes}"
+            )
+            # cache_size: negative = KB, positive = pages.
+            cursor.execute(f"PRAGMA cache_size=-{sqlite_settings.cache_size_kb}")
+        finally:
+            cursor.close()
--- a/src/everos/core/persistence/sqlite/repository.py
+++ b/src/everos/core/persistence/sqlite/repository.py
@ -0,0 +1,166 @@
+"""Generic CRUD repository for SQLModel-backed tables.
+
+``RepoBase`` is a pure generic CRUD helper that sits alongside
+:class:`BaseTable`. It knows nothing about a storage runtime — concrete
+repos either pass ``session_factory`` explicitly (typical in tests) or
+override :meth:`_factory_lookup` to pull the singleton from their
+storage manager (typical in :mod:`everos.infra.persistence.sqlite.repos`).
+
+Each method opens its own ``session_scope`` (auto rollback on exception,
+session closed at end). For multi-step transactional work, use the
+session factory directly via :attr:`session_factory`.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from sqlalchemy import func
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+from sqlmodel import SQLModel, select
+
+from .session import session_scope
+
+
+class RepoBase[T: SQLModel]:
+    """Generic CRUD repository for one SQLModel table.
+
+    Subclass and bind to a model. Two ways to provide the session factory:
+
+    1. **Explicit (tests / DI)** — pass it to ``__init__``::
+
+           repo = SenderRepo(session_factory)
+
+    2. **Lazy hook (production singletons)** — override
+       :meth:`_factory_lookup` so the repo can be instantiated as a
+       module-level singleton with no factory bound yet::
+
+           class _SenderRepo(RepoBase[Sender]):
+               model = Sender
+               def _factory_lookup(self):
+                   from everos.infra.persistence.sqlite.sqlite_manager import (
+                       get_session_factory,
+                   )
+                   return get_session_factory()
+
+           sender_repo = _SenderRepo()
+           await sender_repo.add(Sender(name="alice"))
+    """
+
+    model: type[T]
+
+    def __init__(
+        self,
+        session_factory: async_sessionmaker[AsyncSession] | None = None,
+    ) -> None:
+        """Bind to a session factory; if ``None``, defer to ``_factory_lookup``."""
+        self._factory_override = session_factory
+
+    def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
+        """Resolve a session factory on first use. Override in subclass.
+
+        ``RepoBase`` itself has no idea where the runtime singleton lives
+        — that knowledge belongs to the infra subclass. The default raises
+        so a missing override is loud rather than silently broken.
+        """
+        raise NotImplementedError(
+            f"{type(self).__name__}: pass session_factory= to __init__ "
+            "or override _factory_lookup() to wire the storage manager."
+        )
+
+    @property
+    def _factory(self) -> async_sessionmaker[AsyncSession]:
+        if self._factory_override is not None:
+            return self._factory_override
+        return self._factory_lookup()
+
+    @property
+    def session_factory(self) -> async_sessionmaker[AsyncSession]:
+        """Underlying session factory (for multi-step transactions)."""
+        return self._factory
+
+    # ── Create ─────────────────────────────────────────────────────────────
+
+    async def add(self, instance: T) -> T:
+        """Insert one row, commit, refresh, return the instance."""
+        async with session_scope(self._factory) as s:
+            s.add(instance)
+            await s.commit()
+            await s.refresh(instance)
+        return instance
+
+    async def add_many(self, instances: Sequence[T]) -> list[T]:
+        """Insert many rows in one transaction."""
+        items = list(instances)
+        async with session_scope(self._factory) as s:
+            s.add_all(items)
+            await s.commit()
+            for inst in items:
+                await s.refresh(inst)
+        return items
+
+    # ── Read ───────────────────────────────────────────────────────────────
+
+    async def get_by_id(self, id_value: Any) -> T | None:
+        """Get a row by primary key. Returns ``None`` if not found."""
+        async with session_scope(self._factory) as s:
+            return await s.get(self.model, id_value)
+
+    async def list_all(self) -> list[T]:
+        """Return all rows (no filter, no order)."""
+        async with session_scope(self._factory) as s:
+            stmt = select(self.model)
+            return list((await s.execute(stmt)).scalars().all())
+
+    async def find_where(self, **filters: Any) -> list[T]:
+        """Equality-only filtering, e.g. ``find_where(name="alice", active=True)``."""
+        async with session_scope(self._factory) as s:
+            stmt = select(self.model).filter_by(**filters)
+            return list((await s.execute(stmt)).scalars().all())
+
+    async def find_one(self, **filters: Any) -> T | None:
+        """First row matching ``filters`` (no ordering); ``None`` if not found."""
+        async with session_scope(self._factory) as s:
+            stmt = select(self.model).filter_by(**filters).limit(1)
+            return (await s.execute(stmt)).scalars().first()
+
+    async def count(self) -> int:
+        """Total row count (no filter)."""
+        async with session_scope(self._factory) as s:
+            stmt = select(func.count()).select_from(self.model)
+            return int((await s.execute(stmt)).scalar_one())
+
+    # ── Update ─────────────────────────────────────────────────────────────
+
+    async def update(self, instance: T) -> T:
+        """Persist changes on an instance whose primary key already exists.
+
+        Uses ``session.merge`` so detached / fresh-from-Pydantic instances
+        are reattached. ``BaseTable.updated_at`` auto-bumps via SA's
+        ``onupdate`` hook.
+        """
+        async with session_scope(self._factory) as s:
+            merged = await s.merge(instance)
+            await s.commit()
+            await s.refresh(merged)
+        return merged
+
+    # ── Delete ─────────────────────────────────────────────────────────────
+
+    async def delete(self, instance: T) -> None:
+        """Delete by instance (primary key must be set)."""
+        async with session_scope(self._factory) as s:
+            merged = await s.merge(instance)
+            await s.delete(merged)
+            await s.commit()
+
+    async def delete_by_id(self, id_value: Any) -> bool:
+        """Delete by primary key. Returns ``True`` if a row was removed."""
+        async with session_scope(self._factory) as s:
+            instance = await s.get(self.model, id_value)
+            if instance is None:
+                return False
+            await s.delete(instance)
+            await s.commit()
+            return True
--- a/src/everos/core/persistence/sqlite/session.py
+++ b/src/everos/core/persistence/sqlite/session.py
@ -0,0 +1,45 @@
+"""Async session factory + session scope context manager."""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+
+from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
+
+
+def create_session_factory(engine: AsyncEngine) -> async_sessionmaker[AsyncSession]:
+    """Build an :class:`async_sessionmaker` bound to ``engine``.
+
+    ``expire_on_commit=False`` keeps attribute access on instances valid
+    after commit, which is the conventional setup for async SA usage.
+    """
+    return async_sessionmaker(
+        bind=engine,
+        class_=AsyncSession,
+        expire_on_commit=False,
+    )
+
+
+@asynccontextmanager
+async def session_scope(
+    session_factory: async_sessionmaker[AsyncSession],
+) -> AsyncIterator[AsyncSession]:
+    """Yield an :class:`AsyncSession` inside a try/rollback/close block.
+
+    The session is rolled back on any exception in the ``async with`` body,
+    then closed. Callers are responsible for calling ``await session.commit()``
+    on success.
+
+    Usage:
+        factory = create_session_factory(engine)
+        async with session_scope(factory) as session:
+            session.add(some_record)
+            await session.commit()
+    """
+    async with session_factory() as session:
+        try:
+            yield session
+        except Exception:
+            await session.rollback()
+            raise
--- a/src/everos/entrypoints/init.py
+++ b/src/everos/entrypoints/init.py
@ -0,0 +1,5 @@
+"""Presentation layer.
+
+Translates external requests (CLI / HTTP) into service-layer calls.
+Contains no business logic.
+"""
--- a/src/everos/entrypoints/api/init.py
+++ b/src/everos/entrypoints/api/init.py
@ -0,0 +1,11 @@
+"""HTTP REST entry point (FastAPI), routed by resource.
+
+External usage:
+    from everos.entrypoints.api import create_app
+
+    app = create_app()
+"""
+
+from .app import create_app as create_app
+
+__all__ = ["create_app"]
--- a/src/everos/entrypoints/api/app.py
+++ b/src/everos/entrypoints/api/app.py
@ -0,0 +1,124 @@
+"""FastAPI application factory.
+
+Wires CORS + the project's middleware stack + global exception handler +
+lifespan, and registers the public routes (``/health``, ``/metrics``).
+"""
+
+from __future__ import annotations
+
+import os
+
+from fastapi import FastAPI, HTTPException
+from fastapi.exceptions import RequestValidationError
+from fastapi.middleware.cors import CORSMiddleware
+
+from everos.core.lifespan import (
+    LifespanProvider,
+    MetricsLifespanProvider,
+    build_lifespan,
+)
+from everos.core.middleware import (
+    DEFAULT_CORS_ALLOW_CREDENTIALS,
+    DEFAULT_CORS_ALLOW_HEADERS,
+    DEFAULT_CORS_ALLOW_METHODS,
+    DEFAULT_CORS_ORIGINS,
+    ProfileMiddleware,
+    PrometheusMiddleware,
+    global_exception_handler,
+)
+from everos.core.observability.logging import get_logger
+
+from .lifespans import (
+    CascadeLifespanProvider,
+    LanceDBLifespanProvider,
+    LLMLifespanProvider,
+    OmeLifespanProvider,
+    SqliteLifespanProvider,
+)
+from .routes import (
+    get,
+    health,
+    memorize,
+    metrics,
+    search,
+)
+
+logger = get_logger(__name__)
+
+
+def _docs_enabled() -> bool:
+    """Enable docs endpoints (/docs, /redoc, /openapi.json) only in dev."""
+    return os.environ.get("ENV", "prod").upper() == "DEV"
+
+
+def create_app(
+    *,
+    cors_origins: list[str] | None = None,
+    cors_allow_credentials: bool = DEFAULT_CORS_ALLOW_CREDENTIALS,
+    cors_allow_methods: list[str] | None = None,
+    cors_allow_headers: list[str] | None = None,
+    lifespan_providers: list[LifespanProvider] | None = None,
+) -> FastAPI:
+    """Build the FastAPI application instance.
+
+    Args:
+        cors_origins: Allowed CORS origins (default: ``["*"]``).
+        cors_allow_credentials: Whether to allow credentials (default: True).
+        cors_allow_methods: Allowed CORS methods (default: ``["*"]``).
+        cors_allow_headers: Allowed CORS headers (default: ``["*"]``).
+        lifespan_providers: Optional list of LifespanProvider; defaults to
+            ``[MetricsLifespanProvider(), SqliteLifespanProvider(),
+            LanceDBLifespanProvider(), CascadeLifespanProvider(),
+            OmeLifespanProvider()]``.
+
+    Returns:
+        FastAPI: Configured application instance.
+    """
+    enable_docs = _docs_enabled()
+
+    if lifespan_providers is None:
+        lifespan_providers = [
+            MetricsLifespanProvider(),
+            LLMLifespanProvider(),
+            SqliteLifespanProvider(),
+            LanceDBLifespanProvider(),
+            CascadeLifespanProvider(),
+            OmeLifespanProvider(),
+        ]
+
+    app = FastAPI(
+        title="everos",
+        version="0.1.0",
+        description="md-first memory extraction framework",
+        lifespan=build_lifespan(lifespan_providers),
+        docs_url="/docs" if enable_docs else None,
+        redoc_url="/redoc" if enable_docs else None,
+        openapi_url="/openapi.json" if enable_docs else None,
+    )
+
+    # Exception handlers: HTTPException, validation errors, plus a fallback.
+    app.add_exception_handler(HTTPException, global_exception_handler)
+    app.add_exception_handler(RequestValidationError, global_exception_handler)
+    app.add_exception_handler(Exception, global_exception_handler)
+
+    # Middleware order: earlier `add_middleware` calls become inner, later ones outer.
+    # CORS innermost (matches base_app.py legacy pattern).
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=cors_origins or DEFAULT_CORS_ORIGINS,
+        allow_credentials=cors_allow_credentials,
+        allow_methods=cors_allow_methods or DEFAULT_CORS_ALLOW_METHODS,
+        allow_headers=cors_allow_headers or DEFAULT_CORS_ALLOW_HEADERS,
+    )
+    app.add_middleware(PrometheusMiddleware)
+    app.add_middleware(ProfileMiddleware)
+
+    # Routes.
+    app.include_router(health.router)
+    app.include_router(metrics.router)
+    app.include_router(memorize.router)
+    app.include_router(search.router)
+    app.include_router(get.router)
+
+    logger.info("app_created", docs_enabled=enable_docs)
+    return app
--- a/src/everos/entrypoints/api/lifespans/init.py
+++ b/src/everos/entrypoints/api/lifespans/init.py
@ -0,0 +1,35 @@
+"""HTTP API lifespan providers.
+
+Concrete :class:`everos.core.lifespan.LifespanProvider` implementations
+for the storage + chassis backends this entrypoint composes. They live next to
+``app.py`` because they are *application-bootstrap* details, not
+generic chassis: a different deployment mode (CLI, embedded, batch
+worker) may compose a different set of providers.
+
+Putting these here also keeps ``core.lifespan`` free of concrete-
+backend imports — the chassis stays portable.
+
+External usage::
+
+    from everos.entrypoints.api.lifespans import (
+        LLMLifespanProvider,
+        SqliteLifespanProvider,
+        LanceDBLifespanProvider,
+        CascadeLifespanProvider,
+        OmeLifespanProvider,
+    )
+"""
+
+from .cascade import CascadeLifespanProvider as CascadeLifespanProvider
+from .lancedb import LanceDBLifespanProvider as LanceDBLifespanProvider
+from .llm import LLMLifespanProvider as LLMLifespanProvider
+from .ome import OmeLifespanProvider as OmeLifespanProvider
+from .sqlite import SqliteLifespanProvider as SqliteLifespanProvider
+
+__all__ = [
+    "CascadeLifespanProvider",
+    "LLMLifespanProvider",
+    "LanceDBLifespanProvider",
+    "OmeLifespanProvider",
+    "SqliteLifespanProvider",
+]
--- a/src/everos/entrypoints/api/lifespans/cascade.py
+++ b/src/everos/entrypoints/api/lifespans/cascade.py
@ -0,0 +1,55 @@
+"""Cascade lifespan provider — starts/stops :class:`CascadeOrchestrator`.
+
+Ordered after SqliteLifespan + LanceDBLifespan: the orchestrator
+depends on both stores being ready before its watcher / scanner /
+worker tasks can take the first row.
+
+Construction reads the live :class:`Settings` to build the embedding +
+tokenizer providers. If either is misconfigured the lifespan fails
+fast — the daemon would be useless without them anyway.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from everos.component.embedding import build_embedding_provider
+from everos.component.tokenizer import build_tokenizer
+from everos.config import load_settings
+from everos.core.lifespan import LifespanProvider
+from everos.core.observability.logging import get_logger
+from everos.core.persistence import MemoryRoot
+from everos.memory.cascade import CascadeOrchestrator
+
+logger = get_logger(__name__)
+
+
+class CascadeLifespanProvider(LifespanProvider):
+    """Manage the cascade subsystem for the app lifecycle."""
+
+    def __init__(self, order: int = 12) -> None:
+        super().__init__(name="cascade", order=order)
+        self._orchestrator: CascadeOrchestrator | None = None
+
+    async def startup(self, app: FastAPI) -> Any:
+        settings = load_settings()
+        memory_root = MemoryRoot.default()
+        memory_root.ensure()
+
+        embedder = build_embedding_provider(settings.embedding)
+        tokenizer = build_tokenizer()
+        self._orchestrator = CascadeOrchestrator(
+            memory_root=memory_root,
+            embedder=embedder,
+            tokenizer=tokenizer,
+        )
+        await self._orchestrator.start()
+        logger.info("cascade_lifespan_ready")
+        return self._orchestrator
+
+    async def shutdown(self, app: FastAPI) -> None:
+        if self._orchestrator is not None:
+            await self._orchestrator.stop()
+            self._orchestrator = None
--- a/src/everos/entrypoints/api/lifespans/lancedb.py
+++ b/src/everos/entrypoints/api/lifespans/lancedb.py
@ -0,0 +1,55 @@
+"""LanceDB lifespan provider (HTTP API entrypoint).
+
+Startup:
+    Open the connection via ``get_connection`` (lazy, idempotent).
+    Importing :mod:`everos.infra.persistence.lancedb` also triggers the
+    side-effect import of ``tables`` so business schemas are loaded
+    (future: preflight registration).
+
+Shutdown:
+    Close the connection (also clears the table cache).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from everos.core.lifespan import LifespanProvider
+from everos.core.observability.logging import get_logger
+from everos.infra.persistence.lancedb import (
+    dispose_connection,
+    ensure_business_indexes,
+    get_connection,
+    verify_business_schemas,
+)
+
+logger = get_logger(__name__)
+
+
+class LanceDBLifespanProvider(LifespanProvider):
+    """Manage the LanceDB connection + table cache for the app lifecycle.
+
+    Startup runs three steps:
+
+    1. ``get_connection`` — lazy-open the async connection.
+    2. ``verify_business_schemas`` — fail loud if an on-disk table's
+       columns drift from the current Pydantic schema. LanceDB has no
+       online migration; cascade is rebuildable from md so the recovery
+       is documented as ``rm -rf ~/.everos/.index/lancedb``.
+    3. ``ensure_business_indexes`` — idempotent FTS index creation.
+    """
+
+    def __init__(self, order: int = 11) -> None:
+        super().__init__(name="lancedb", order=order)
+
+    async def startup(self, app: FastAPI) -> Any:
+        conn = await get_connection()
+        await verify_business_schemas()
+        await ensure_business_indexes()
+        logger.info("lancedb_ready", uri=conn.uri)
+        return conn
+
+    async def shutdown(self, app: FastAPI) -> None:
+        await dispose_connection()
--- a/src/everos/entrypoints/api/lifespans/llm.py
+++ b/src/everos/entrypoints/api/lifespans/llm.py
@ -0,0 +1,36 @@
+"""LLM lifespan provider — eagerly resolves the LLM singleton at startup.
+
+The framework's core value (memory extraction) is meaningless without
+an LLM, so misconfiguration must surface as a startup failure instead
+of N silent skips per request downstream. Ordered before the storage
+stack so we fail before paying to bring sqlite / lancedb / cascade up.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from everos.component.llm import get_llm_client
+from everos.core.lifespan import LifespanProvider
+from everos.core.observability.logging import get_logger
+
+logger = get_logger(__name__)
+
+
+class LLMLifespanProvider(LifespanProvider):
+    """Resolve the LLM client at startup; raise if credentials are missing."""
+
+    def __init__(self, order: int = 8) -> None:
+        super().__init__(name="llm", order=order)
+
+    async def startup(self, app: FastAPI) -> Any:
+        client = get_llm_client()
+        logger.info("llm_lifespan_ready")
+        return client
+
+    async def shutdown(self, app: FastAPI) -> None:
+        # The client is stateless (algo facade over openai.AsyncOpenAI);
+        # nothing to tear down.
+        return None
--- a/src/everos/entrypoints/api/lifespans/ome.py
+++ b/src/everos/entrypoints/api/lifespans/ome.py
@ -0,0 +1,39 @@
+"""OME engine lifespan provider (HTTP API entrypoint).
+
+Startup: build the singleton engine via service.memorize._get_engine
+(which also registers strategies) and start it.
+
+Shutdown: stop the engine.
+"""
+
+from __future__ import annotations
+
+import importlib
+from typing import Any
+
+from fastapi import FastAPI
+
+from everos.core.lifespan import LifespanProvider
+from everos.core.observability.logging import get_logger
+
+logger = get_logger(__name__)
+
+
+class OmeLifespanProvider(LifespanProvider):
+    """Manage the OfflineEngine lifecycle for the FastAPI app."""
+
+    def __init__(self, order: int = 50) -> None:
+        super().__init__(name="ome", order=order)
+
+    async def startup(self, app: FastAPI) -> Any:
+        svc = importlib.import_module("everos.service.memorize")
+        engine = svc._get_engine()  # noqa: SLF001 — service-internal accessor
+        await engine.start()
+        logger.info("ome_engine_started")
+        return engine
+
+    async def shutdown(self, app: FastAPI) -> None:
+        svc = importlib.import_module("everos.service.memorize")
+        engine = svc._get_engine()  # noqa: SLF001
+        await engine.stop()
+        logger.info("ome_engine_stopped")
--- a/src/everos/entrypoints/api/lifespans/sqlite.py
+++ b/src/everos/entrypoints/api/lifespans/sqlite.py
@ -0,0 +1,45 @@
+"""SQLite system-DB lifespan provider (HTTP API entrypoint).
+
+Startup:
+    1. Build the engine via ``get_engine`` (lazy, idempotent). Importing
+       :mod:`everos.infra.persistence.sqlite` also triggers the side-
+       effect import of ``tables`` so every business SQLModel registers
+       itself in ``SQLModel.metadata``.
+    2. ``SQLModel.metadata.create_all`` so every registered table exists.
+
+Shutdown:
+    Dispose the engine + connection pool.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import FastAPI
+from sqlmodel import SQLModel
+
+from everos.core.lifespan import LifespanProvider
+from everos.core.observability.logging import get_logger
+from everos.infra.persistence.sqlite import dispose_engine, get_engine
+
+logger = get_logger(__name__)
+
+
+class SqliteLifespanProvider(LifespanProvider):
+    """Manage the SQLite system-DB engine + schema for the app lifecycle."""
+
+    def __init__(self, order: int = 10) -> None:
+        super().__init__(name="sqlite", order=order)
+
+    async def startup(self, app: FastAPI) -> Any:
+        engine = get_engine()
+        async with engine.begin() as conn:
+            await conn.run_sync(SQLModel.metadata.create_all)
+        logger.info(
+            "sqlite_schema_ready",
+            tables=len(SQLModel.metadata.tables),
+        )
+        return engine
+
+    async def shutdown(self, app: FastAPI) -> None:
+        await dispose_engine()
--- a/src/everos/entrypoints/api/routes/init.py
+++ b/src/everos/entrypoints/api/routes/init.py
@ -0,0 +1,5 @@
+"""HTTP route modules.
+
+Each module here exposes an ``APIRouter`` named ``router`` registered by
+:func:`everos.entrypoints.api.app.create_app` via ``app.include_router``.
+"""
--- a/src/everos/entrypoints/api/routes/get.py
+++ b/src/everos/entrypoints/api/routes/get.py
@ -0,0 +1,26 @@
+"""POST /api/v1/memory/get — paginated listing endpoint.
+
+Thin adapter: validate the request DTO, dispatch to the service layer,
+return the envelope verbatim. ``request_id`` is generated inside the
+:class:`GetManager`; we trust the value on the way out.
+"""
+
+from __future__ import annotations
+
+from fastapi import APIRouter, HTTPException
+
+from everos.memory.get import GetRequest, GetResponse
+from everos.memory.search import FilterError
+from everos.service import get as get_service
+
+router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
+
+
+@router.post("/get", response_model=GetResponse)
+async def post_get(req: GetRequest) -> GetResponse:
+    """Paginated listing over the requested ``memory_type``."""
+    try:
+        return await get_service(req)
+    except FilterError as exc:
+        # Filter-DSL violations surface as 422 with the compile message.
+        raise HTTPException(status_code=422, detail=str(exc)) from exc
--- a/src/everos/entrypoints/api/routes/health.py
+++ b/src/everos/entrypoints/api/routes/health.py
@ -0,0 +1,13 @@
+"""Health check route."""
+
+from __future__ import annotations
+
+from fastapi import APIRouter
+
+router = APIRouter(tags=["health"])
+
+
+@router.get("/health")
+async def health() -> dict[str, str]:
+    """Liveness probe — returns ``{"status": "ok"}`` with HTTP 200."""
+    return {"status": "ok"}
--- a/src/everos/entrypoints/api/routes/memorize.py
+++ b/src/everos/entrypoints/api/routes/memorize.py
@ -0,0 +1,195 @@
+"""POST /api/v1/memory/add and /api/v1/memory/flush.
+
+DTOs follow the v1 API brief (01_v1_api_brief.md §2 / §3). Routes are
+thin adapters: validate the DTO, dump to dict, hand to service. No
+business logic lives here.
+
+``/flush`` is OSS-only (the cloud edition decides boundary timing
+server-side and does not expose this endpoint).
+"""
+
+from __future__ import annotations
+
+from typing import Annotated, Any, Literal
+
+from fastapi import APIRouter, HTTPException, Request
+from pydantic import AfterValidator, BaseModel, ConfigDict, Field
+
+from everos.core.errors import MultimodalError
+from everos.core.observability.tracing import gen_request_id
+from everos.service import memorize
+
+router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
+
+
+# ── Path-safe identifier ────────────────────────────────────────────────────
+# ``app_id`` / ``project_id`` become directory segments under the memory
+# root, so they must reject ``.`` and ``..`` (path traversal). The basic
+# character whitelist is enforced via ``pattern`` (pydantic_core uses the
+# Rust regex engine, which does NOT support lookaround), and the two
+# reserved tokens are filtered out with a follow-up ``AfterValidator``.
+_PATH_SAFE_CHARSET = r"^[a-zA-Z0-9_.-]+$"
+_PATH_TRAVERSAL_TOKENS = frozenset({".", ".."})
+
+
+def _reject_path_traversal(value: str) -> str:
+    if value in _PATH_TRAVERSAL_TOKENS:
+        raise ValueError("'.' and '..' are reserved (path traversal)")
+    return value
+
+
+PathSafeId = Annotated[str, AfterValidator(_reject_path_traversal)]
+
+
+# DTOs ────────────────────────────────────────────────────────────────────────
+
+
+class ToolFunctionDTO(BaseModel):
+    name: str
+    arguments: str  # JSON string per OpenAI Chat Completions spec
+
+
+class ToolCallDTO(BaseModel):
+    id: str
+    type: str = "function"
+    function: ToolFunctionDTO
+
+
+class ContentItemDTO(BaseModel):
+    """Content piece (v1 API brief appendix A)."""
+
+    type: Literal["text", "image", "audio", "doc", "pdf", "html", "email"]
+    text: str | None = None
+    uri: str | None = None
+    base64: str | None = None
+    ext: str | None = None
+    name: str | None = None
+    extras: dict[str, Any] | None = None
+
+    model_config = ConfigDict(extra="forbid")
+
+
+class MessageItemDTO(BaseModel):
+    sender_id: str = Field(..., min_length=1)
+    sender_name: str | None = None
+    role: Literal["user", "assistant", "tool"]
+    timestamp: int = Field(
+        ...,
+        gt=0,
+        description=(
+            "Message event time as Unix epoch in **milliseconds** "
+            "(v1 API contract; the algo layer auto-detects sec vs ms "
+            "for backward compat but the contract is ms)."
+        ),
+    )
+    content: str | list[ContentItemDTO]
+    tool_calls: list[ToolCallDTO] | None = None
+    tool_call_id: str | None = None
+
+
+class MemorizeAddRequest(BaseModel):
+    session_id: str = Field(..., min_length=1, max_length=128)
+    app_id: PathSafeId = Field(
+        default="default",
+        min_length=1,
+        max_length=128,
+        pattern=_PATH_SAFE_CHARSET,
+    )
+    project_id: PathSafeId = Field(
+        default="default",
+        min_length=1,
+        max_length=128,
+        pattern=_PATH_SAFE_CHARSET,
+    )
+    messages: list[MessageItemDTO] = Field(..., min_length=1, max_length=500)
+
+
+class AddResponseData(BaseModel):
+    message_count: int
+    status: Literal["accumulated", "extracted"]
+
+
+class MemorizeFlushRequest(BaseModel):
+    session_id: str = Field(..., min_length=1, max_length=128)
+    app_id: PathSafeId = Field(
+        default="default",
+        min_length=1,
+        max_length=128,
+        pattern=_PATH_SAFE_CHARSET,
+    )
+    project_id: PathSafeId = Field(
+        default="default",
+        min_length=1,
+        max_length=128,
+        pattern=_PATH_SAFE_CHARSET,
+    )
+
+
+class FlushResponseData(BaseModel):
+    status: Literal["extracted", "no_extraction"]
+
+
+class SuccessEnvelope[T](BaseModel):
+    """200 wrapper: ``request_id`` sits at the top level, not inside ``data``."""
+
+    request_id: str
+    data: T
+
+
+# Route ──────────────────────────────────────────────────────────────────────
+
+
+@router.post("/add")
+async def add_memory(
+    req: Annotated[MemorizeAddRequest, ...],
+    request: Request,
+) -> SuccessEnvelope[AddResponseData]:
+    """Add messages into the user-memory + agent-memory pipelines."""
+    request_id = getattr(request.state, "request_id", None) or _gen_request_id()
+    try:
+        result = await memorize(req.model_dump())
+    except MultimodalError as exc:
+        raise HTTPException(status_code=415, detail=str(exc)) from exc
+    return SuccessEnvelope(
+        request_id=request_id,
+        data=AddResponseData(
+            message_count=result.message_count,
+            status=result.status,
+        ),
+    )
+
+
+@router.post("/flush")
+async def flush_memory(
+    req: Annotated[MemorizeFlushRequest, ...],
+    request: Request,
+) -> SuccessEnvelope[FlushResponseData]:
+    """Force boundary detection over the current ``session_id`` buffer.
+
+    [OSS-only] — cloud edition decides boundary timing server-side and
+    does not expose this endpoint.
+    """
+    request_id = getattr(request.state, "request_id", None) or _gen_request_id()
+    result = await memorize(
+        {
+            "session_id": req.session_id,
+            "app_id": req.app_id,
+            "project_id": req.project_id,
+            "messages": [],
+        },
+        is_final=True,
+    )
+    # service's ``accumulated`` = nothing to flush (buffer was empty);
+    # ``extracted`` = at least one cell carved out.
+    status: Literal["extracted", "no_extraction"] = (
+        "extracted" if result.status == "extracted" else "no_extraction"
+    )
+    return SuccessEnvelope(
+        request_id=request_id,
+        data=FlushResponseData(status=status),
+    )
+
+
+def _gen_request_id() -> str:
+    """Fallback request id when no middleware set one."""
+    return gen_request_id()
--- a/src/everos/entrypoints/api/routes/metrics.py
+++ b/src/everos/entrypoints/api/routes/metrics.py
@ -0,0 +1,20 @@
+"""Prometheus metrics route."""
+
+from __future__ import annotations
+
+from fastapi import APIRouter
+from fastapi.responses import Response
+from prometheus_client import CONTENT_TYPE_LATEST
+
+from everos.core.observability.metrics import generate_metrics_response
+
+router = APIRouter(tags=["metrics"])
+
+
+@router.get("/metrics")
+async def metrics() -> Response:
+    """Render the current Prometheus registry in exposition format."""
+    return Response(
+        content=generate_metrics_response(),
+        media_type=CONTENT_TYPE_LATEST,
+    )
--- a/src/everos/entrypoints/api/routes/search.py
+++ b/src/everos/entrypoints/api/routes/search.py
@ -0,0 +1,27 @@
+"""POST /api/v1/memory/search — hybrid retrieval endpoint.
+
+Thin adapter: validate the request DTO, dispatch to the service layer,
+return the envelope verbatim. ``request_id`` is generated inside the
+:class:`SearchManager` (uniform for OSS + cloud); we trust that value
+on the way out.
+"""
+
+from __future__ import annotations
+
+from fastapi import APIRouter, HTTPException
+
+from everos.memory.search import FilterError, SearchRequest, SearchResponse
+from everos.service import search
+
+router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
+
+
+@router.post("/search", response_model=SearchResponse)
+async def post_search(req: SearchRequest) -> SearchResponse:
+    """Hybrid retrieval across the configured memory backends."""
+    try:
+        return await search(req)
+    except FilterError as exc:
+        # Filter-DSL violations surface as 422 with the compile message
+        # (mirrors /get's contract).
+        raise HTTPException(status_code=422, detail=str(exc)) from exc
--- a/src/everos/entrypoints/cli/init.py
+++ b/src/everos/entrypoints/cli/init.py
@ -0,0 +1,5 @@
+"""Command line entry point.
+
+Contract-first design, JSON output by default, ``--describe`` machine-readable
+mode, granular exit codes.
+"""
--- a/src/everos/entrypoints/cli/commands/init.py
+++ b/src/everos/entrypoints/cli/commands/init.py
@ -0,0 +1,5 @@
+"""CLI subcommand modules.
+
+Each module here exposes a ``app: typer.Typer`` instance which is mounted
+as a subcommand group by :mod:`everos.entrypoints.cli.main`.
+"""
--- a/src/everos/entrypoints/cli/commands/cascade.py
+++ b/src/everos/entrypoints/cli/commands/cascade.py
@ -0,0 +1,267 @@
+"""``everos cascade`` subcommand group.
+
+Three one-shot operations on the cascade subsystem, all run in-process
+without standing up the FastAPI app:
+
+- ``cascade sync [PATH]`` — flush the work queue. With ``PATH`` the
+  command first force-enqueues that single file (used after a manual
+  md edit when waiting for the watcher is impractical), then drains.
+- ``cascade status`` — print the queue + LSN summary that the daemon
+  sees right now.
+- ``cascade fix`` — list every ``failed`` row. With ``--apply``, also
+  reset ``retryable=TRUE`` rows back to ``pending`` and drain the
+  worker once so the retry actually runs before the command returns.
+
+CLI is in-process (12 doc §7.1 + 16 doc §9.2): it constructs the same
+:class:`CascadeOrchestrator` as the daemon but only calls
+``sync_once`` / ``drain_once`` / ``queue_summary``. No watcher /
+scanner background task is started.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from contextlib import asynccontextmanager
+from pathlib import Path
+from typing import Annotated
+
+import typer
+from sqlmodel import SQLModel
+
+from everos.component.embedding import build_embedding_provider
+from everos.component.tokenizer import build_tokenizer
+from everos.component.utils.datetime import to_display_tz
+from everos.config import load_settings
+from everos.core.persistence import MemoryRoot
+from everos.infra.persistence.lancedb import (
+    dispose_connection,
+    ensure_business_indexes,
+    get_connection,
+    verify_business_schemas,
+)
+from everos.infra.persistence.sqlite import (
+    dispose_engine,
+    get_engine,
+    md_change_state_repo,
+)
+from everos.memory.cascade import CascadeOrchestrator, match_kind
+
+app = typer.Typer(
+    name="cascade",
+    help="Inspect and operate the md → LanceDB sync queue",
+    no_args_is_help=True,
+)
+
+
+# ── shared runtime context ───────────────────────────────────────────────
+
+
+@asynccontextmanager
+async def _runtime():  # type: ignore[no-untyped-def]
+    """Stand up sqlite + lancedb the same way the API lifespan would.
+
+    The CLI piggybacks on the same singletons as the running daemon
+    (lazy + process-wide), so if a server happens to be running on
+    the same memory root, both share state correctly.
+    """
+    engine = get_engine()
+    async with engine.begin() as conn:
+        await conn.run_sync(SQLModel.metadata.create_all)
+    await get_connection()
+    await verify_business_schemas()
+    await ensure_business_indexes()
+    try:
+        yield
+    finally:
+        await dispose_connection()
+        await dispose_engine()
+
+
+def _build_orchestrator() -> CascadeOrchestrator:
+    settings = load_settings()
+    memory_root = MemoryRoot.default()
+    memory_root.ensure()
+    embedder = build_embedding_provider(settings.embedding)
+    tokenizer = build_tokenizer()
+    return CascadeOrchestrator(
+        memory_root=memory_root,
+        embedder=embedder,
+        tokenizer=tokenizer,
+    )
+
+
+# ── sync ─────────────────────────────────────────────────────────────────
+
+
+@app.command("sync")
+def sync(
+    path: Annotated[
+        Path | None,
+        typer.Argument(
+            help="Optional md path to force-enqueue before draining. "
+            "If omitted, only the existing queue is drained.",
+        ),
+    ] = None,
+) -> None:
+    """Drain the cascade queue (and optionally re-enqueue a path first)."""
+
+    async def _run() -> None:
+        async with _runtime():
+            orchestrator = _build_orchestrator()
+            if path is not None:
+                rel = _resolve_relative(path)
+                spec = match_kind(rel)
+                if spec is None:
+                    typer.echo(
+                        f"error: path does not match any registered cascade "
+                        f"kind: {rel}",
+                        err=True,
+                    )
+                    raise typer.Exit(code=1)
+                await md_change_state_repo.force_enqueue(rel, spec.name)
+                typer.echo(f"force-enqueued {rel} (kind={spec.name})")
+            processed = await orchestrator.sync_once()
+            typer.echo(f"sync complete — processed {processed} row(s)")
+
+    asyncio.run(_run())
+
+
+# ── status ───────────────────────────────────────────────────────────────
+
+
+@app.command("status")
+def status() -> None:
+    """Print the queue / LSN summary."""
+
+    async def _run() -> None:
+        async with _runtime():
+            summary = await md_change_state_repo.queue_summary()
+            lag = max(0, summary.max_lsn - summary.last_processed_lsn)
+            typer.echo("queue:")
+            typer.echo(f"  pending:                  {summary.pending}")
+            typer.echo(f"  done:                     {summary.done}")
+            typer.echo(
+                f"  failed (retryable=TRUE):  {summary.failed_retryable}"
+                + (
+                    "     (eligible for `cascade fix --apply`)"
+                    if summary.failed_retryable
+                    else ""
+                )
+            )
+            typer.echo(
+                f"  failed (retryable=FALSE): {summary.failed_permanent}"
+                + (
+                    "     (fix md and re-save to recover)"
+                    if summary.failed_permanent
+                    else ""
+                )
+            )
+            typer.echo("lsn:")
+            typer.echo(f"  max:           {summary.max_lsn}")
+            typer.echo(f"  last_processed: {summary.last_processed_lsn}")
+            typer.echo(f"  lag:            {lag}")
+
+    asyncio.run(_run())
+
+
+# ── fix ──────────────────────────────────────────────────────────────────
+
+
+@app.command("fix")
+def fix(
+    apply: Annotated[
+        bool,
+        typer.Option(
+            "--apply",
+            help="Re-enqueue every `retryable=TRUE` row and drain the worker.",
+        ),
+    ] = False,
+) -> None:
+    """List failed rows (default) or re-enqueue retryable ones (``--apply``)."""
+
+    async def _run() -> None:
+        async with _runtime():
+            rows = await md_change_state_repo.list_failed()
+            if not rows:
+                typer.echo("no failed rows")
+                return
+
+            if not apply:
+                _print_failed_table(rows)
+                retryable = sum(1 for r in rows if r.retryable)
+                permanent = sum(1 for r in rows if not r.retryable)
+                typer.echo("")
+                if retryable:
+                    typer.echo(
+                        f"run `everos cascade fix --apply` to re-enqueue "
+                        f"the {retryable} retryable row(s)."
+                    )
+                if permanent:
+                    typer.echo(
+                        f"the {permanent} retryable=FALSE row(s) require "
+                        "editing the md and re-saving."
+                    )
+                return
+
+            moved = await md_change_state_repo.reset_retryable_to_pending()
+            typer.echo(f"re-enqueued {moved} retryable row(s)")
+            if moved:
+                orchestrator = _build_orchestrator()
+                processed = await orchestrator.drain_once()
+                typer.echo(f"[worker] processed {processed} row(s) on drain")
+            permanent_rows = [r for r in rows if not r.retryable]
+            if permanent_rows:
+                typer.echo(
+                    f"{len(permanent_rows)} retryable=FALSE row(s) left untouched:"
+                )
+                for r in permanent_rows:
+                    typer.echo(f"  {r.md_path}")
+
+    asyncio.run(_run())
+
+
+# ── helpers ──────────────────────────────────────────────────────────────
+
+
+def _resolve_relative(p: Path) -> str:
+    """Translate an absolute / relative path arg into the memory-root rel form.
+
+    The state table stores paths relative to memory root, so the CLI
+    must match that convention before calling :meth:`force_enqueue`.
+    Outside-the-root inputs surface as an error in the caller.
+    """
+    memory_root = MemoryRoot.default()
+    absolute = p.expanduser().resolve()
+    try:
+        rel = absolute.relative_to(memory_root.root)
+    except ValueError as exc:
+        raise typer.BadParameter(
+            f"path {p!s} is not under memory root {memory_root.root!s}"
+        ) from exc
+    return rel.as_posix()
+
+
+def _print_failed_table(rows: list) -> None:  # type: ignore[type-arg]
+    headers = ("md_path", "retryable", "retries", "last_attempt", "error")
+    widths = [
+        max(len(headers[0]), max(len(r.md_path) for r in rows)),
+        len(headers[1]),
+        len(headers[2]),
+        len(headers[3]),
+        max(len(headers[4]), max(len(r.error or "") for r in rows)),
+    ]
+    fmt = "  ".join(f"{{:<{w}}}" for w in widths)
+    typer.echo(f"{len(rows)} failed row(s):\n")
+    typer.echo(fmt.format(*headers))
+    for r in rows:
+        typer.echo(
+            fmt.format(
+                r.md_path,
+                "TRUE" if r.retryable else "FALSE",
+                r.retry_count,
+                to_display_tz(r.last_attempt_at).isoformat()
+                if r.last_attempt_at
+                else "",
+                r.error or "",
+            )
+        )
--- a/src/everos/entrypoints/cli/commands/init_cmd.py
+++ b/src/everos/entrypoints/cli/commands/init_cmd.py
@ -0,0 +1,183 @@
+"""``everos init`` — generate a starter ``.env`` from the packaged template.
+
+The ``env.template`` ships inside the wheel as package data at
+``everos/templates/env.template``. ``init`` reads it via
+:mod:`importlib.resources`, so the command works identically for pip-
+installed users and source-tree users (the file is the single source
+of truth).
+
+Subcommand mounted as ``everos init`` (top-level leaf command — not a
+Typer group), to match the idiomatic ``alembic init`` / ``django-admin
+startproject`` shape.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import logging
+import os
+import sys
+import tempfile
+from importlib import resources
+from pathlib import Path
+
+import typer
+
+_TEMPLATE_PACKAGE = "everos.templates"
+_TEMPLATE_NAME = "env.template"
+
+_log = logging.getLogger("everos.cli.init")
+
+
+def _read_template() -> str:
+    """Read the packaged ``env.template`` from wheel resources.
+
+    Returns the file contents as a UTF-8 string. Raises ``RuntimeError``
+    on missing-file — if this fires it means the wheel was built from a
+    source tree where ``src/everos/templates/env.template`` was missing
+    (canonical location; auto-included via ``packages=["src/everos"]``
+    in ``pyproject.toml``).
+    """
+    try:
+        return (
+            resources.files(_TEMPLATE_PACKAGE)
+            .joinpath(_TEMPLATE_NAME)
+            .read_text(encoding="utf-8")
+        )
+    except (FileNotFoundError, ModuleNotFoundError) as exc:
+        raise RuntimeError(
+            f"packaged template {_TEMPLATE_NAME!r} not found under "
+            f"{_TEMPLATE_PACKAGE!r}; the wheel is missing its "
+            "force-include entry (see pyproject.toml "
+            "[tool.hatch.build.targets.wheel.force-include])."
+        ) from exc
+
+
+def _xdg_default_path() -> Path:
+    """``$XDG_CONFIG_HOME/everos/.env`` (default ``~/.config/everos/.env``)."""
+    xdg = os.environ.get("XDG_CONFIG_HOME") or "~/.config"
+    return Path(xdg).expanduser() / "everos" / ".env"
+
+
+def _atomic_write(target: Path, content: str, mode: int = 0o600) -> None:
+    """Write ``content`` to ``target`` atomically with ``mode`` permission.
+
+    Writes to a tempfile in the same directory then ``os.replace``s it
+    onto the target — guarantees either the full new file is visible or
+    the original (if any) is untouched. Permission bits applied before
+    the rename so the file is never readable by other users.
+    """
+    target.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp_path = tempfile.mkstemp(
+        prefix=target.name + ".",
+        dir=target.parent,
+    )
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(content)
+        os.chmod(tmp_path, mode)
+        os.replace(tmp_path, target)
+    except Exception:
+        with contextlib.suppress(OSError):
+            os.unlink(tmp_path)
+        raise
+
+
+def register(parent: typer.Typer) -> None:
+    """Attach the ``init`` command to the root CLI app."""
+
+    @parent.command("init")
+    def init(
+        to: str | None = typer.Option(
+            None,
+            "--to",
+            help=(
+                "Target path for the .env file (default: ./.env). "
+                "Parent directories are created if needed."
+            ),
+        ),
+        force: bool = typer.Option(
+            False,
+            "--force",
+            help="Overwrite an existing file at the target path.",
+        ),
+        print_: bool = typer.Option(
+            False,
+            "--print",
+            help="Print the template to stdout instead of writing to disk.",
+        ),
+        xdg: bool = typer.Option(
+            False,
+            "--xdg",
+            help=(
+                "Shortcut for --to=${XDG_CONFIG_HOME:-~/.config}/everos/.env "
+                "(mutually exclusive with --to)."
+            ),
+        ),
+    ) -> None:
+        """Generate a starter ``.env`` from the packaged template.
+
+        Common flows::
+
+            everos init                  # writes ./.env
+            everos init --xdg            # writes ~/.config/everos/.env
+            everos init --to /etc/foo.env --force
+            everos init --print > custom.env
+
+        Exit codes:
+
+        - 0 — written successfully (or printed to stdout).
+        - 1 — target file already exists and ``--force`` was not given.
+        - 2 — packaged template missing (wheel build problem).
+        - 3 — write failed (permissions / disk full / parent unwritable).
+        """
+        if xdg and to is not None:
+            typer.secho(
+                "error: --xdg and --to are mutually exclusive",
+                fg=typer.colors.RED,
+                err=True,
+            )
+            raise typer.Exit(code=2)
+
+        try:
+            template = _read_template()
+        except RuntimeError as exc:
+            typer.secho(f"error: {exc}", fg=typer.colors.RED, err=True)
+            raise typer.Exit(code=2) from exc
+
+        if print_:
+            sys.stdout.write(template)
+            return
+
+        if xdg:
+            target = _xdg_default_path()
+        elif to is not None:
+            target = Path(to).expanduser().resolve()
+        else:
+            target = Path.cwd() / ".env"
+
+        if target.exists() and not force:
+            typer.secho(
+                f"error: {target} already exists; pass --force to overwrite",
+                fg=typer.colors.RED,
+                err=True,
+            )
+            raise typer.Exit(code=1)
+
+        try:
+            _atomic_write(target, template)
+        except OSError as exc:
+            typer.secho(
+                f"error: failed to write {target}: {exc}",
+                fg=typer.colors.RED,
+                err=True,
+            )
+            raise typer.Exit(code=3) from exc
+
+        # Friendly next-step block (stdout — quiet enough for piping).
+        size_kb = target.stat().st_size / 1024
+        typer.secho(f"✓ wrote {target} ({size_kb:.1f} KB)", fg=typer.colors.GREEN)
+        typer.echo("Next steps:")
+        typer.echo("  1. Edit the file and fill in the API keys (see comments inside).")
+        typer.echo("  2. Run `everos server start`.")
+        typer.echo("Docs: https://github.com/evermind/everos/blob/master/QUICKSTART.md")
--- a/src/everos/entrypoints/cli/commands/server.py
+++ b/src/everos/entrypoints/cli/commands/server.py
@ -0,0 +1,161 @@
+"""``everos server`` subcommand group.
+
+Provides ``everos server start`` to run the HTTP API via uvicorn. CLI
+parses arguments, configures structured logging, then hands off to
+uvicorn pointing at :func:`everos.entrypoints.api.app.create_app` as a
+factory.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+from pathlib import Path
+
+import typer
+import uvicorn
+
+app = typer.Typer(
+    name="server",
+    help="Run / manage the HTTP API server",
+    no_args_is_help=True,
+)
+
+
+def _resolve_env_file(explicit: str | None) -> Path | None:
+    """Find the first existing ``.env`` along the four-layer search path.
+
+    Search order (highest-wins):
+
+    1. ``explicit`` — when the caller passed ``--env-file <path>``.
+    2. ``./.env``   — the current working directory (project-local convention).
+    3. ``${XDG_CONFIG_HOME:-~/.config}/everos/.env`` — XDG-standard user config.
+    4. ``~/.everos/.env`` — the project's default memory-root location.
+
+    Returns ``None`` if none of the layers exist (caller may then fall back
+    to inherited process env / CI secrets).
+    """
+    candidates: list[Path] = []
+    if explicit:
+        candidates.append(Path(explicit).expanduser())
+    candidates.append(Path.cwd() / ".env")
+    xdg = os.environ.get("XDG_CONFIG_HOME") or "~/.config"
+    candidates.append(Path(xdg).expanduser() / "everos" / ".env")
+    candidates.append(Path("~/.everos/.env").expanduser())
+    for p in candidates:
+        try:
+            if p.is_file():
+                return p
+        except OSError:
+            # Path traversal / permission denied on a fallback candidate
+            # must not crash the search — skip and keep going.
+            continue
+    return None
+
+
+def _load_env_file(path: str | None) -> Path | None:
+    """Load environment variables from the resolved ``.env`` file.
+
+    Returns the path that was loaded, or ``None`` when no ``.env`` was
+    found anywhere along the search path. Existence of a ``.env`` is
+    optional — the user may rely entirely on inherited process env
+    (e.g. container / CI secret injection).
+    """
+    resolved = _resolve_env_file(path)
+    if resolved is None:
+        return None
+    try:
+        from dotenv import load_dotenv
+
+        load_dotenv(resolved, override=False)
+    except ImportError:
+        # python-dotenv is in our deps; tolerate its absence anyway.
+        pass
+    return resolved
+
+
+@app.command("start")
+def start(
+    host: str | None = typer.Option(
+        None,
+        "--host",
+        help="Bind host (env: EVEROS_API__HOST, default: 127.0.0.1)",
+    ),
+    port: int | None = typer.Option(
+        None,
+        "--port",
+        help="Bind port (env: EVEROS_API__PORT, default: 8000)",
+    ),
+    env_file: str | None = typer.Option(
+        None,
+        "--env-file",
+        help=(
+            "Path to a dotenv file (highest priority). When omitted, "
+            "the server searches: ./.env → ${XDG_CONFIG_HOME:-~/.config}"
+            "/everos/.env → ~/.everos/.env. Run `everos init` to create one."
+        ),
+    ),
+    reload: bool = typer.Option(
+        False,
+        "--reload",
+        help="Reload on source changes (development)",
+    ),
+    log_level: str | None = typer.Option(
+        None,
+        "--log-level",
+        help="Log level (env: EVEROS_LOG_LEVEL, default: INFO)",
+    ),
+) -> None:
+    """Start the HTTP API server."""
+    loaded_env = _load_env_file(env_file)
+
+    # Load settings AFTER .env is in place so EVEROS_API__HOST and
+    # EVEROS_API__PORT (and any other env override) are honored.
+    from everos.config import load_settings
+
+    settings = load_settings()
+
+    host_resolved = host or settings.api.host
+    port_resolved = port if port is not None else settings.api.port
+    log_level_resolved = (log_level or os.getenv("EVEROS_LOG_LEVEL", "INFO")).upper()
+
+    from everos.core.observability.logging import configure_logging
+
+    configure_logging(level=log_level_resolved)
+
+    bootstrap_logger = logging.getLogger("everos.cli.server")
+    if loaded_env is not None:
+        bootstrap_logger.info("loaded env file: %s", loaded_env)
+    else:
+        bootstrap_logger.info(
+            "no .env found along the search path; relying on inherited env vars "
+            "(run `everos init` to generate one)"
+        )
+    bootstrap_logger.info("starting everos on %s:%d", host_resolved, port_resolved)
+    if host_resolved == "0.0.0.0":
+        bootstrap_logger.warning(
+            "binding to 0.0.0.0 exposes the API on all interfaces; EverOS "
+            "ships no built-in auth — see SECURITY.md"
+        )
+
+    try:
+        uvicorn.run(
+            "everos.entrypoints.api.app:create_app",
+            host=host_resolved,
+            port=port_resolved,
+            reload=reload,
+            factory=True,
+            log_level=log_level_resolved.lower(),
+            # ``configure_logging()`` above already installed the root
+            # handler + structlog ProcessorFormatter. ``log_config=None``
+            # stops uvicorn from running its own ``dictConfig`` over
+            # ours; otherwise uvicorn / fastapi messages revert to the
+            # ``INFO:`` no-structlog format on every restart.
+            log_config=None,
+        )
+    except KeyboardInterrupt:
+        bootstrap_logger.info("interrupted; shutting down")
+    except (OSError, RuntimeError) as exc:
+        bootstrap_logger.error("startup failed: %s", exc)
+        sys.exit(1)
--- a/src/everos/entrypoints/cli/main.py
+++ b/src/everos/entrypoints/cli/main.py
@ -0,0 +1,34 @@
+"""everos CLI root entry point.
+
+Exposed as the ``everos`` console script in ``pyproject.toml``. Subcommand
+groups live under :mod:`everos.entrypoints.cli.commands` and are registered
+here.
+
+CLI subcommands run **in-process** — they call into the service layer
+directly rather than through the HTTP API. The HTTP API and CLI are two
+sibling surfaces over the same service layer.
+"""
+
+from __future__ import annotations
+
+import typer
+
+from .commands import cascade, init_cmd, server
+
+app = typer.Typer(
+    name="everos",
+    help="everos — md-first memory extraction framework",
+    no_args_is_help=True,
+    add_completion=False,
+)
+
+app.add_typer(server.app, name="server")
+app.add_typer(cascade.app, name="cascade")
+
+# ``init`` is a top-level leaf command (not a Typer group) — match the
+# idiomatic ``alembic init`` / ``django-admin startproject`` shape.
+init_cmd.register(app)
+
+
+if __name__ == "__main__":
+    app()
--- a/src/everos/infra/init.py
+++ b/src/everos/infra/init.py
@ -0,0 +1,5 @@
+"""Infrastructure layer.
+
+Adapts to external storage and persists domain models. Contains no
+business rules.
+"""
--- a/src/everos/infra/ome/init.py
+++ b/src/everos/infra/ome/init.py
@ -0,0 +1,63 @@
+"""Async offline strategy scheduling chassis.
+
+Provides decorator-based strategy registration, event-driven triggers
+(Cron/Idle/Manual), and gate-based concurrency control.
+"""
+
+from everos.infra.ome.config import OMEConfig as OMEConfig
+from everos.infra.ome.context import StrategyContext as StrategyContext
+from everos.infra.ome.decorator import offline_strategy as offline_strategy
+from everos.infra.ome.engine import OfflineEngine as OfflineEngine
+from everos.infra.ome.events import BaseEvent as BaseEvent
+from everos.infra.ome.events import CronTick as CronTick
+from everos.infra.ome.events import IdleTick as IdleTick
+from everos.infra.ome.events import ManualTick as ManualTick
+from everos.infra.ome.exceptions import (
+    EmitNotDeclaredError as EmitNotDeclaredError,
+)
+from everos.infra.ome.exceptions import (
+    EngineCallFromStrategyError as EngineCallFromStrategyError,
+)
+from everos.infra.ome.exceptions import (
+    EngineLockHeldError as EngineLockHeldError,
+)
+from everos.infra.ome.exceptions import OMEError as OMEError
+from everos.infra.ome.exceptions import (
+    StartupValidationError as StartupValidationError,
+)
+from everos.infra.ome.exceptions import (
+    StrategyContractError as StrategyContractError,
+)
+from everos.infra.ome.gates import Counter as Counter
+from everos.infra.ome.records import RunRecord as RunRecord
+from everos.infra.ome.records import RunStatus as RunStatus
+from everos.infra.ome.records import StrategyRouteInfo as StrategyRouteInfo
+from everos.infra.ome.triggers import Cron as Cron
+from everos.infra.ome.triggers import Idle as Idle
+from everos.infra.ome.triggers import Immediate as Immediate
+from everos.infra.ome.triggers import Trigger as Trigger
+
+__all__ = [
+    "BaseEvent",
+    "Counter",
+    "Cron",
+    "CronTick",
+    "EmitNotDeclaredError",
+    "EngineCallFromStrategyError",
+    "EngineLockHeldError",
+    "Idle",
+    "IdleTick",
+    "Immediate",
+    "ManualTick",
+    "OfflineEngine",
+    "OMEConfig",
+    "OMEError",
+    "RunRecord",
+    "RunStatus",
+    "StartupValidationError",
+    "StrategyContext",
+    "StrategyContractError",
+    "StrategyRouteInfo",
+    "Trigger",
+    "offline_strategy",
+]
--- a/src/everos/infra/ome/_background/init.py
+++ b/src/everos/infra/ome/_background/init.py
@ -0,0 +1 @@
+"""Internal: background loops (idle scan / config reload / crash recovery)."""
--- a/src/everos/infra/ome/_background/config_reloader.py
+++ b/src/everos/infra/ome/_background/config_reloader.py
@ -0,0 +1,254 @@
+"""Config hot-reload — watchfiles + tomllib + shallow merge.
+
+Hot-updatable fields: enabled / max_retries / gate / cron / idle_seconds /
+scan_interval_seconds. Trigger type swap (Immediate ↔ Cron ↔ Idle),
+event subscription (Immediate.on / Idle.on), and Idle.event_field
+remain immutable — these define strategy routing and changing them
+requires a code change and redeploy.
+
+Per-strategy two-phase commit: enabled is applied independently for
+emergency-stop semantics; max_retries / gate / trigger parameters
+form one atomic group that fully rolls back on any failure inside it.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import tomllib
+from contextlib import suppress
+from dataclasses import replace
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from pydantic import ValidationError
+from watchfiles import awatch
+
+from everos.core.observability.logging import get_logger
+from everos.infra.ome._dispatch.registry import StrategyRegistry
+from everos.infra.ome.config import StrategyOverride, TomlRoot
+from everos.infra.ome.decorator import StrategyMeta
+from everos.infra.ome.gates import Counter
+from everos.infra.ome.triggers import Cron, Idle, Trigger
+
+if TYPE_CHECKING:
+    from everos.infra.ome.engine import OfflineEngine
+
+logger = get_logger(__name__)
+
+
+class _SkipAtomicGroupError(Exception):
+    """Internal sentinel raised when the non-enabled atomic group for
+    one strategy must be skipped without affecting other strategies.
+    """
+
+
+def _apply_enabled(
+    meta: StrategyMeta,
+    override: StrategyOverride,
+    name: str,
+    registry: StrategyRegistry,
+) -> StrategyMeta:
+    """Step 1: apply `enabled` independently — never blocked by other fields."""
+    if override.enabled is None or override.enabled == meta.enabled:
+        return meta
+    new_meta = replace(meta, enabled=override.enabled)
+    registry.replace(name, new_meta)
+    return new_meta
+
+
+def _build_atomic_meta(
+    meta: StrategyMeta,
+    override: StrategyOverride,
+) -> tuple[StrategyMeta, Trigger]:
+    """Step 2 pure-compute: build (new_meta, new_trigger) from current state.
+
+    Raises `_SkipAtomicGroupError` on type mismatches or invalid gate intros.
+    No registry / engine writes happen here.
+    """
+    new_meta = meta
+    new_trigger: Trigger = meta.trigger
+
+    if override.max_retries is not None:
+        new_meta = replace(new_meta, max_retries=override.max_retries)
+
+    if override.gate is not None:
+        # Introducing a gate on a strategy that has none requires an explicit
+        # threshold — silently defaulting to 1 would mean "fire on every
+        # event", which is almost certainly not what the user intended.
+        if meta.gate is None and override.gate.threshold is None:
+            raise _SkipAtomicGroupError(
+                "introducing a gate requires explicit threshold"
+            )
+        base = meta.gate.model_dump() if meta.gate is not None else {}
+        for k, v in override.gate.model_dump(exclude_unset=True).items():
+            if v is not None:
+                base[k] = v
+        new_meta = replace(new_meta, gate=Counter(**base))
+
+    if override.cron is not None:
+        if not isinstance(meta.trigger, Cron):
+            raise _SkipAtomicGroupError(
+                f"cron given on non-Cron strategy "
+                f"(actual: {type(meta.trigger).__name__})"
+            )
+        new_trigger = Cron(expr=override.cron)
+
+    if override.idle_seconds is not None or override.scan_interval_seconds is not None:
+        if not isinstance(meta.trigger, Idle):
+            raise _SkipAtomicGroupError(
+                f"idle_* given on non-Idle strategy "
+                f"(actual: {type(meta.trigger).__name__})"
+            )
+        updates: dict[str, int] = {}
+        if override.idle_seconds is not None:
+            updates["idle_seconds"] = override.idle_seconds
+        if override.scan_interval_seconds is not None:
+            updates["scan_interval_seconds"] = override.scan_interval_seconds
+        # model_validate (not model_copy) re-runs Idle._validate_event_field on
+        # the merged dict; model_copy(update=...) would skip it and let an
+        # invalid event_field reach the registry.
+        new_trigger = Idle.model_validate({**meta.trigger.model_dump(), **updates})
+
+    if new_trigger is not meta.trigger:
+        new_meta = replace(new_meta, trigger=new_trigger)
+
+    return new_meta, new_trigger
+
+
+def _needs_aps_reschedule(old_trigger: Trigger, new_trigger: Trigger) -> bool:
+    """True iff scheduler-driving fields changed (cron expr / Idle scan_interval)."""
+    if new_trigger is old_trigger:
+        return False
+    if isinstance(new_trigger, Cron) and isinstance(old_trigger, Cron):
+        return new_trigger.expr != old_trigger.expr
+    if isinstance(new_trigger, Idle) and isinstance(old_trigger, Idle):
+        return new_trigger.scan_interval_seconds != old_trigger.scan_interval_seconds
+    return False
+
+
+def _maybe_reschedule_aps(
+    engine: OfflineEngine, name: str, new_trigger: Trigger
+) -> None:
+    """Push the new trigger's APS-relevant fields to the scheduler."""
+    if isinstance(new_trigger, Cron):
+        engine.reschedule_cron_job(name, new_trigger.expr)
+    elif isinstance(new_trigger, Idle):
+        engine.reschedule_idle_job(
+            name, scan_interval_seconds=new_trigger.scan_interval_seconds
+        )
+
+
+def _apply_one_strategy(
+    name: str,
+    override: StrategyOverride,
+    registry: StrategyRegistry,
+    engine: OfflineEngine,
+) -> None:
+    """Two-phase commit for one strategy: enabled, then atomic group."""
+    meta = registry.get(name)
+    meta = _apply_enabled(meta, override, name, registry)
+
+    try:
+        new_meta, new_trigger = _build_atomic_meta(meta, override)
+        if _needs_aps_reschedule(meta.trigger, new_trigger):
+            _maybe_reschedule_aps(engine, name, new_trigger)
+        registry.replace(name, new_meta)
+    except Exception as e:  # noqa: BLE001
+        # User-fixable config error (typo / type mismatch / APS runtime
+        # failure) — log + skip this strategy's atomic group, never the loop.
+        logger.warning(
+            "strategy_atomic_group_skipped",
+            strategy_name=name,
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+
+
+def apply_overrides(
+    registry: StrategyRegistry,
+    root: TomlRoot,
+    engine: OfflineEngine,
+) -> None:
+    """Shallow-merge TomlRoot overrides into registry strategies in place.
+
+    Two-phase per-strategy semantics:
+      Step 1 (enabled): applied independently — emergency-stop must
+        never be blocked by a typo in another field.
+      Step 2 (max_retries / gate / trigger params): applied as an atomic
+        group. Any failure (type mismatch, invalid cron, APS reschedule
+        error, ...) rolls the whole group back to the prior values.
+    """
+    known = {m.name for m in registry.all()}
+    for name, override in root.strategies.items():
+        if name not in known:
+            logger.warning("config_override_unknown_strategy", strategy_name=name)
+            continue
+        _apply_one_strategy(name, override, registry, engine)
+
+
+class ConfigReloader:
+    """Watch a TOML file and apply overrides on change."""
+
+    def __init__(
+        self,
+        *,
+        config_path: Path,
+        registry: StrategyRegistry,
+        engine: OfflineEngine,
+        debounce_ms: int = 1600,
+    ) -> None:
+        self._path = config_path
+        self._registry = registry
+        self._engine = engine
+        self._debounce_ms = debounce_ms
+        self._task: asyncio.Task[None] | None = None
+
+    def start(self) -> None:
+        """Fire-and-forget the watch loop. Idempotent: raises on double-start."""
+        if self._path is None:
+            return
+        if self._task is not None and not self._task.done():
+            raise RuntimeError("ConfigReloader already started")
+        self._task = asyncio.create_task(self._loop())
+
+    async def stop(self) -> None:
+        """Cancel the watch task and await it; safe to call multiple times."""
+        if self._task is not None:
+            self._task.cancel()
+            with suppress(asyncio.CancelledError):
+                await self._task
+            self._task = None
+
+    async def _loop(self) -> None:
+        """Initial load + per-FS-change reload; survives single-iteration failures."""
+        try:
+            await self._load_once()
+        except Exception:  # noqa: BLE001
+            logger.exception("config_reload_iteration_failed")
+        async for _changes in awatch(self._path, debounce=self._debounce_ms):
+            try:
+                await self._load_once()
+            except Exception:  # noqa: BLE001
+                logger.exception("config_reload_iteration_failed")
+
+    async def _load_once(self) -> None:
+        """Read TOML off the loop, parse + validate, apply overrides."""
+
+        def _read_and_parse() -> TomlRoot:
+            with open(self._path, "rb") as f:
+                content = f.read()
+            parsed = tomllib.loads(content.decode("utf-8"))
+            return TomlRoot.model_validate(parsed)
+
+        try:
+            root = await asyncio.to_thread(_read_and_parse)
+        except (OSError, tomllib.TOMLDecodeError, ValidationError) as e:
+            logger.warning(
+                "config_reload_failed",
+                error_type=type(e).__name__,
+                error=str(e),
+                path=str(self._path),
+            )
+            return
+        apply_overrides(self._registry, root, self._engine)
+        logger.info("config_reloaded", path=str(self._path))
--- a/src/everos/infra/ome/_background/crash_recovery.py
+++ b/src/everos/infra/ome/_background/crash_recovery.py
@ -0,0 +1,79 @@
+"""Startup crash recovery — stale RUNNING rows → CRASHED + re-enqueue.
+
+Runs once at engine.start() before normal dispatching begins. Rows
+whose started_at is older than ``timeout_seconds`` are marked CRASHED
+and re-enqueued with a fresh run_id reusing the original event payload.
+Fresher RUNNING rows are skipped — APScheduler's own jobstore may have
+already reattached them.
+
+At-most-once: ``mark_crashed`` and ``add_job`` are not atomic. If
+``add_job`` fails after ``mark_crashed``, the row stays CRASHED and
+the event is lost. Strategies needing at-least-once must add their own
+retry / monitor layer.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from datetime import timedelta
+from uuid import uuid4
+
+from everos.component.utils.datetime import get_utc_now
+from everos.core.observability.logging import get_logger
+from everos.infra.ome._stores.run_record import RunRecordStore
+
+logger = get_logger(__name__)
+
+
+async def scan_and_resume(
+    *,
+    run_record_store: RunRecordStore,
+    timeout_seconds: int,
+    add_job: Callable[[str, str, str, str, int], Awaitable[None]],
+) -> None:
+    """Scan ``run_record`` for stale RUNNING rows, mark them CRASHED, and
+    re-enqueue each via ``add_job``. See module docstring for the
+    at-most-once caveat.
+
+    ``add_job`` is called with positional args
+    ``(strategy_name, run_id, event_topic, event_payload, max_retries)``.
+
+    Raises:
+        ValueError: If ``timeout_seconds`` is not positive.
+    """
+    if timeout_seconds <= 0:
+        raise ValueError(f"timeout_seconds must be > 0, got {timeout_seconds}")
+    now = get_utc_now()
+    cutoff = now - timedelta(seconds=timeout_seconds)
+    running = await run_record_store.find_running()
+    for rec in running:
+        if rec.started_at >= cutoff:
+            continue
+        await run_record_store.mark_crashed(
+            run_id=rec.run_id,
+            finished_at=now,
+            error="crash recovery: marked CRASHED after start scan",
+        )
+        new_run_id = uuid4().hex
+        try:
+            await add_job(
+                rec.strategy_name,
+                new_run_id,
+                rec.event_topic,
+                rec.event_payload,
+                rec.max_retries_snapshot,
+            )
+            logger.info(
+                "crash_recovery_resumed",
+                strategy_name=rec.strategy_name,
+                event_topic=rec.event_topic,
+                old_run_id=rec.run_id,
+                new_run_id=new_run_id,
+            )
+        except Exception:  # noqa: BLE001
+            logger.exception(
+                "crash_recovery_resume_failed",
+                strategy_name=rec.strategy_name,
+                event_topic=rec.event_topic,
+                old_run_id=rec.run_id,
+            )
--- a/src/everos/infra/ome/_background/idle_scanner.py
+++ b/src/everos/infra/ome/_background/idle_scanner.py
@ -0,0 +1,60 @@
+"""IdleScanner — periodic scan of idle_store, emits IdleTick for overdue buckets."""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from datetime import datetime
+
+from everos.component.utils.datetime import get_utc_now
+from everos.core.observability.logging import get_logger
+from everos.infra.ome._stores.idle import IdleStore
+from everos.infra.ome.events import BaseEvent, IdleTick
+from everos.infra.ome.triggers import Idle
+
+logger = get_logger(__name__)
+
+
+class IdleScanner:
+    """Scans idle_store for overdue buckets and emits IdleTick events."""
+
+    def __init__(
+        self,
+        *,
+        strategy_name: str,
+        trigger: Idle,
+        idle_store: IdleStore,
+        emit: Callable[[BaseEvent], Awaitable[None]],
+    ) -> None:
+        self._name = strategy_name
+        self._trigger = trigger
+        self._idle_store = idle_store
+        self._emit = emit
+
+    async def scan_once(self, *, now: datetime | None = None) -> None:
+        """Find overdue buckets and emit IdleTick for each.
+
+        Per-bucket emit failures are caught and logged so a single
+        downstream error (e.g. dispatch hitting a transient DB lock)
+        cannot prevent sibling buckets from being notified this round.
+        """
+        effective_now = now if now is not None else get_utc_now()
+        overdue = await self._idle_store.scan_idle(
+            self._name,
+            idle_seconds=self._trigger.idle_seconds,
+            now=effective_now,
+        )
+        for bucket_key in overdue:
+            try:
+                await self._emit(
+                    IdleTick(
+                        strategy_name=self._name,
+                        bucket_key=bucket_key,
+                        idle_seconds=self._trigger.idle_seconds,
+                    )
+                )
+            except Exception:  # noqa: BLE001
+                logger.exception(
+                    "idle_emit_failed",
+                    strategy_name=self._name,
+                    bucket_key=bucket_key,
+                )
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`"""Internal: background loops (idle scan / config reload / crash recovery)."""`