chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/src/everos/infra/init.py
+++ b/src/everos/infra/init.py
@ -0,0 +1,5 @@
+"""Infrastructure layer.
+
+Adapts to external storage and persists domain models. Contains no
+business rules.
+"""
--- a/src/everos/infra/ome/init.py
+++ b/src/everos/infra/ome/init.py
@ -0,0 +1,63 @@
+"""Async offline strategy scheduling chassis.
+
+Provides decorator-based strategy registration, event-driven triggers
+(Cron/Idle/Manual), and gate-based concurrency control.
+"""
+
+from everos.infra.ome.config import OMEConfig as OMEConfig
+from everos.infra.ome.context import StrategyContext as StrategyContext
+from everos.infra.ome.decorator import offline_strategy as offline_strategy
+from everos.infra.ome.engine import OfflineEngine as OfflineEngine
+from everos.infra.ome.events import BaseEvent as BaseEvent
+from everos.infra.ome.events import CronTick as CronTick
+from everos.infra.ome.events import IdleTick as IdleTick
+from everos.infra.ome.events import ManualTick as ManualTick
+from everos.infra.ome.exceptions import (
+    EmitNotDeclaredError as EmitNotDeclaredError,
+)
+from everos.infra.ome.exceptions import (
+    EngineCallFromStrategyError as EngineCallFromStrategyError,
+)
+from everos.infra.ome.exceptions import (
+    EngineLockHeldError as EngineLockHeldError,
+)
+from everos.infra.ome.exceptions import OMEError as OMEError
+from everos.infra.ome.exceptions import (
+    StartupValidationError as StartupValidationError,
+)
+from everos.infra.ome.exceptions import (
+    StrategyContractError as StrategyContractError,
+)
+from everos.infra.ome.gates import Counter as Counter
+from everos.infra.ome.records import RunRecord as RunRecord
+from everos.infra.ome.records import RunStatus as RunStatus
+from everos.infra.ome.records import StrategyRouteInfo as StrategyRouteInfo
+from everos.infra.ome.triggers import Cron as Cron
+from everos.infra.ome.triggers import Idle as Idle
+from everos.infra.ome.triggers import Immediate as Immediate
+from everos.infra.ome.triggers import Trigger as Trigger
+
+__all__ = [
+    "BaseEvent",
+    "Counter",
+    "Cron",
+    "CronTick",
+    "EmitNotDeclaredError",
+    "EngineCallFromStrategyError",
+    "EngineLockHeldError",
+    "Idle",
+    "IdleTick",
+    "Immediate",
+    "ManualTick",
+    "OfflineEngine",
+    "OMEConfig",
+    "OMEError",
+    "RunRecord",
+    "RunStatus",
+    "StartupValidationError",
+    "StrategyContext",
+    "StrategyContractError",
+    "StrategyRouteInfo",
+    "Trigger",
+    "offline_strategy",
+]
--- a/src/everos/infra/ome/_background/init.py
+++ b/src/everos/infra/ome/_background/init.py
@ -0,0 +1 @@
+"""Internal: background loops (idle scan / config reload / crash recovery)."""
--- a/src/everos/infra/ome/_background/config_reloader.py
+++ b/src/everos/infra/ome/_background/config_reloader.py
@ -0,0 +1,254 @@
+"""Config hot-reload — watchfiles + tomllib + shallow merge.
+
+Hot-updatable fields: enabled / max_retries / gate / cron / idle_seconds /
+scan_interval_seconds. Trigger type swap (Immediate ↔ Cron ↔ Idle),
+event subscription (Immediate.on / Idle.on), and Idle.event_field
+remain immutable — these define strategy routing and changing them
+requires a code change and redeploy.
+
+Per-strategy two-phase commit: enabled is applied independently for
+emergency-stop semantics; max_retries / gate / trigger parameters
+form one atomic group that fully rolls back on any failure inside it.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import tomllib
+from contextlib import suppress
+from dataclasses import replace
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from pydantic import ValidationError
+from watchfiles import awatch
+
+from everos.core.observability.logging import get_logger
+from everos.infra.ome._dispatch.registry import StrategyRegistry
+from everos.infra.ome.config import StrategyOverride, TomlRoot
+from everos.infra.ome.decorator import StrategyMeta
+from everos.infra.ome.gates import Counter
+from everos.infra.ome.triggers import Cron, Idle, Trigger
+
+if TYPE_CHECKING:
+    from everos.infra.ome.engine import OfflineEngine
+
+logger = get_logger(__name__)
+
+
+class _SkipAtomicGroupError(Exception):
+    """Internal sentinel raised when the non-enabled atomic group for
+    one strategy must be skipped without affecting other strategies.
+    """
+
+
+def _apply_enabled(
+    meta: StrategyMeta,
+    override: StrategyOverride,
+    name: str,
+    registry: StrategyRegistry,
+) -> StrategyMeta:
+    """Step 1: apply `enabled` independently — never blocked by other fields."""
+    if override.enabled is None or override.enabled == meta.enabled:
+        return meta
+    new_meta = replace(meta, enabled=override.enabled)
+    registry.replace(name, new_meta)
+    return new_meta
+
+
+def _build_atomic_meta(
+    meta: StrategyMeta,
+    override: StrategyOverride,
+) -> tuple[StrategyMeta, Trigger]:
+    """Step 2 pure-compute: build (new_meta, new_trigger) from current state.
+
+    Raises `_SkipAtomicGroupError` on type mismatches or invalid gate intros.
+    No registry / engine writes happen here.
+    """
+    new_meta = meta
+    new_trigger: Trigger = meta.trigger
+
+    if override.max_retries is not None:
+        new_meta = replace(new_meta, max_retries=override.max_retries)
+
+    if override.gate is not None:
+        # Introducing a gate on a strategy that has none requires an explicit
+        # threshold — silently defaulting to 1 would mean "fire on every
+        # event", which is almost certainly not what the user intended.
+        if meta.gate is None and override.gate.threshold is None:
+            raise _SkipAtomicGroupError(
+                "introducing a gate requires explicit threshold"
+            )
+        base = meta.gate.model_dump() if meta.gate is not None else {}
+        for k, v in override.gate.model_dump(exclude_unset=True).items():
+            if v is not None:
+                base[k] = v
+        new_meta = replace(new_meta, gate=Counter(**base))
+
+    if override.cron is not None:
+        if not isinstance(meta.trigger, Cron):
+            raise _SkipAtomicGroupError(
+                f"cron given on non-Cron strategy "
+                f"(actual: {type(meta.trigger).__name__})"
+            )
+        new_trigger = Cron(expr=override.cron)
+
+    if override.idle_seconds is not None or override.scan_interval_seconds is not None:
+        if not isinstance(meta.trigger, Idle):
+            raise _SkipAtomicGroupError(
+                f"idle_* given on non-Idle strategy "
+                f"(actual: {type(meta.trigger).__name__})"
+            )
+        updates: dict[str, int] = {}
+        if override.idle_seconds is not None:
+            updates["idle_seconds"] = override.idle_seconds
+        if override.scan_interval_seconds is not None:
+            updates["scan_interval_seconds"] = override.scan_interval_seconds
+        # model_validate (not model_copy) re-runs Idle._validate_event_field on
+        # the merged dict; model_copy(update=...) would skip it and let an
+        # invalid event_field reach the registry.
+        new_trigger = Idle.model_validate({**meta.trigger.model_dump(), **updates})
+
+    if new_trigger is not meta.trigger:
+        new_meta = replace(new_meta, trigger=new_trigger)
+
+    return new_meta, new_trigger
+
+
+def _needs_aps_reschedule(old_trigger: Trigger, new_trigger: Trigger) -> bool:
+    """True iff scheduler-driving fields changed (cron expr / Idle scan_interval)."""
+    if new_trigger is old_trigger:
+        return False
+    if isinstance(new_trigger, Cron) and isinstance(old_trigger, Cron):
+        return new_trigger.expr != old_trigger.expr
+    if isinstance(new_trigger, Idle) and isinstance(old_trigger, Idle):
+        return new_trigger.scan_interval_seconds != old_trigger.scan_interval_seconds
+    return False
+
+
+def _maybe_reschedule_aps(
+    engine: OfflineEngine, name: str, new_trigger: Trigger
+) -> None:
+    """Push the new trigger's APS-relevant fields to the scheduler."""
+    if isinstance(new_trigger, Cron):
+        engine.reschedule_cron_job(name, new_trigger.expr)
+    elif isinstance(new_trigger, Idle):
+        engine.reschedule_idle_job(
+            name, scan_interval_seconds=new_trigger.scan_interval_seconds
+        )
+
+
+def _apply_one_strategy(
+    name: str,
+    override: StrategyOverride,
+    registry: StrategyRegistry,
+    engine: OfflineEngine,
+) -> None:
+    """Two-phase commit for one strategy: enabled, then atomic group."""
+    meta = registry.get(name)
+    meta = _apply_enabled(meta, override, name, registry)
+
+    try:
+        new_meta, new_trigger = _build_atomic_meta(meta, override)
+        if _needs_aps_reschedule(meta.trigger, new_trigger):
+            _maybe_reschedule_aps(engine, name, new_trigger)
+        registry.replace(name, new_meta)
+    except Exception as e:  # noqa: BLE001
+        # User-fixable config error (typo / type mismatch / APS runtime
+        # failure) — log + skip this strategy's atomic group, never the loop.
+        logger.warning(
+            "strategy_atomic_group_skipped",
+            strategy_name=name,
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+
+
+def apply_overrides(
+    registry: StrategyRegistry,
+    root: TomlRoot,
+    engine: OfflineEngine,
+) -> None:
+    """Shallow-merge TomlRoot overrides into registry strategies in place.
+
+    Two-phase per-strategy semantics:
+      Step 1 (enabled): applied independently — emergency-stop must
+        never be blocked by a typo in another field.
+      Step 2 (max_retries / gate / trigger params): applied as an atomic
+        group. Any failure (type mismatch, invalid cron, APS reschedule
+        error, ...) rolls the whole group back to the prior values.
+    """
+    known = {m.name for m in registry.all()}
+    for name, override in root.strategies.items():
+        if name not in known:
+            logger.warning("config_override_unknown_strategy", strategy_name=name)
+            continue
+        _apply_one_strategy(name, override, registry, engine)
+
+
+class ConfigReloader:
+    """Watch a TOML file and apply overrides on change."""
+
+    def __init__(
+        self,
+        *,
+        config_path: Path,
+        registry: StrategyRegistry,
+        engine: OfflineEngine,
+        debounce_ms: int = 1600,
+    ) -> None:
+        self._path = config_path
+        self._registry = registry
+        self._engine = engine
+        self._debounce_ms = debounce_ms
+        self._task: asyncio.Task[None] | None = None
+
+    def start(self) -> None:
+        """Fire-and-forget the watch loop. Idempotent: raises on double-start."""
+        if self._path is None:
+            return
+        if self._task is not None and not self._task.done():
+            raise RuntimeError("ConfigReloader already started")
+        self._task = asyncio.create_task(self._loop())
+
+    async def stop(self) -> None:
+        """Cancel the watch task and await it; safe to call multiple times."""
+        if self._task is not None:
+            self._task.cancel()
+            with suppress(asyncio.CancelledError):
+                await self._task
+            self._task = None
+
+    async def _loop(self) -> None:
+        """Initial load + per-FS-change reload; survives single-iteration failures."""
+        try:
+            await self._load_once()
+        except Exception:  # noqa: BLE001
+            logger.exception("config_reload_iteration_failed")
+        async for _changes in awatch(self._path, debounce=self._debounce_ms):
+            try:
+                await self._load_once()
+            except Exception:  # noqa: BLE001
+                logger.exception("config_reload_iteration_failed")
+
+    async def _load_once(self) -> None:
+        """Read TOML off the loop, parse + validate, apply overrides."""
+
+        def _read_and_parse() -> TomlRoot:
+            with open(self._path, "rb") as f:
+                content = f.read()
+            parsed = tomllib.loads(content.decode("utf-8"))
+            return TomlRoot.model_validate(parsed)
+
+        try:
+            root = await asyncio.to_thread(_read_and_parse)
+        except (OSError, tomllib.TOMLDecodeError, ValidationError) as e:
+            logger.warning(
+                "config_reload_failed",
+                error_type=type(e).__name__,
+                error=str(e),
+                path=str(self._path),
+            )
+            return
+        apply_overrides(self._registry, root, self._engine)
+        logger.info("config_reloaded", path=str(self._path))
--- a/src/everos/infra/ome/_background/crash_recovery.py
+++ b/src/everos/infra/ome/_background/crash_recovery.py
@ -0,0 +1,79 @@
+"""Startup crash recovery — stale RUNNING rows → CRASHED + re-enqueue.
+
+Runs once at engine.start() before normal dispatching begins. Rows
+whose started_at is older than ``timeout_seconds`` are marked CRASHED
+and re-enqueued with a fresh run_id reusing the original event payload.
+Fresher RUNNING rows are skipped — APScheduler's own jobstore may have
+already reattached them.
+
+At-most-once: ``mark_crashed`` and ``add_job`` are not atomic. If
+``add_job`` fails after ``mark_crashed``, the row stays CRASHED and
+the event is lost. Strategies needing at-least-once must add their own
+retry / monitor layer.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from datetime import timedelta
+from uuid import uuid4
+
+from everos.component.utils.datetime import get_utc_now
+from everos.core.observability.logging import get_logger
+from everos.infra.ome._stores.run_record import RunRecordStore
+
+logger = get_logger(__name__)
+
+
+async def scan_and_resume(
+    *,
+    run_record_store: RunRecordStore,
+    timeout_seconds: int,
+    add_job: Callable[[str, str, str, str, int], Awaitable[None]],
+) -> None:
+    """Scan ``run_record`` for stale RUNNING rows, mark them CRASHED, and
+    re-enqueue each via ``add_job``. See module docstring for the
+    at-most-once caveat.
+
+    ``add_job`` is called with positional args
+    ``(strategy_name, run_id, event_topic, event_payload, max_retries)``.
+
+    Raises:
+        ValueError: If ``timeout_seconds`` is not positive.
+    """
+    if timeout_seconds <= 0:
+        raise ValueError(f"timeout_seconds must be > 0, got {timeout_seconds}")
+    now = get_utc_now()
+    cutoff = now - timedelta(seconds=timeout_seconds)
+    running = await run_record_store.find_running()
+    for rec in running:
+        if rec.started_at >= cutoff:
+            continue
+        await run_record_store.mark_crashed(
+            run_id=rec.run_id,
+            finished_at=now,
+            error="crash recovery: marked CRASHED after start scan",
+        )
+        new_run_id = uuid4().hex
+        try:
+            await add_job(
+                rec.strategy_name,
+                new_run_id,
+                rec.event_topic,
+                rec.event_payload,
+                rec.max_retries_snapshot,
+            )
+            logger.info(
+                "crash_recovery_resumed",
+                strategy_name=rec.strategy_name,
+                event_topic=rec.event_topic,
+                old_run_id=rec.run_id,
+                new_run_id=new_run_id,
+            )
+        except Exception:  # noqa: BLE001
+            logger.exception(
+                "crash_recovery_resume_failed",
+                strategy_name=rec.strategy_name,
+                event_topic=rec.event_topic,
+                old_run_id=rec.run_id,
+            )
--- a/src/everos/infra/ome/_background/idle_scanner.py
+++ b/src/everos/infra/ome/_background/idle_scanner.py
@ -0,0 +1,60 @@
+"""IdleScanner — periodic scan of idle_store, emits IdleTick for overdue buckets."""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from datetime import datetime
+
+from everos.component.utils.datetime import get_utc_now
+from everos.core.observability.logging import get_logger
+from everos.infra.ome._stores.idle import IdleStore
+from everos.infra.ome.events import BaseEvent, IdleTick
+from everos.infra.ome.triggers import Idle
+
+logger = get_logger(__name__)
+
+
+class IdleScanner:
+    """Scans idle_store for overdue buckets and emits IdleTick events."""
+
+    def __init__(
+        self,
+        *,
+        strategy_name: str,
+        trigger: Idle,
+        idle_store: IdleStore,
+        emit: Callable[[BaseEvent], Awaitable[None]],
+    ) -> None:
+        self._name = strategy_name
+        self._trigger = trigger
+        self._idle_store = idle_store
+        self._emit = emit
+
+    async def scan_once(self, *, now: datetime | None = None) -> None:
+        """Find overdue buckets and emit IdleTick for each.
+
+        Per-bucket emit failures are caught and logged so a single
+        downstream error (e.g. dispatch hitting a transient DB lock)
+        cannot prevent sibling buckets from being notified this round.
+        """
+        effective_now = now if now is not None else get_utc_now()
+        overdue = await self._idle_store.scan_idle(
+            self._name,
+            idle_seconds=self._trigger.idle_seconds,
+            now=effective_now,
+        )
+        for bucket_key in overdue:
+            try:
+                await self._emit(
+                    IdleTick(
+                        strategy_name=self._name,
+                        bucket_key=bucket_key,
+                        idle_seconds=self._trigger.idle_seconds,
+                    )
+                )
+            except Exception:  # noqa: BLE001
+                logger.exception(
+                    "idle_emit_failed",
+                    strategy_name=self._name,
+                    bucket_key=bucket_key,
+                )
--- a/src/everos/infra/ome/_dispatch/init.py
+++ b/src/everos/infra/ome/_dispatch/init.py
@ -0,0 +1 @@
+"""Internal: event dispatch core (registry / dispatcher / runner)."""
--- a/src/everos/infra/ome/_dispatch/_state.py
+++ b/src/everos/infra/ome/_dispatch/_state.py
@ -0,0 +1,23 @@
+"""ContextVar shared between Runner and OfflineEngine.
+
+Python copies ContextVar values into child tasks at
+``asyncio.create_task`` (by design, for trace propagation), so
+``@_refuse_inside_strategy`` reliably catches only *same-task* calls.
+Never attach it to APS callback methods (``dispatch_run`` /
+``run_idle_scan``) — cascade emits would misfire.
+``test_engine_chain_emit_through_ctx`` is the regression.
+
+TODO: ``sys._getframe`` walk for a ``Runner.run`` frame is leak-proof.
+"""
+
+from __future__ import annotations
+
+from contextvars import ContextVar
+
+from everos.infra.ome.decorator import StrategyMeta
+
+_CURRENT_STRATEGY: ContextVar[StrategyMeta | None] = ContextVar(
+    "current_strategy", default=None
+)
+"""Set by ``Runner.run`` around ``meta.func(event, ctx)``; read by
+``@_refuse_inside_strategy``. ``None`` = not inside a strategy frame."""
--- a/src/everos/infra/ome/_dispatch/dispatcher.py
+++ b/src/everos/infra/ome/_dispatch/dispatcher.py
@ -0,0 +1,205 @@
+"""EventDispatcher — routing layer applying the three OME gates.
+
+For each dispatched event, every candidate strategy is run through three
+gates in order:
+
+  1. ``enabled``     — strategy may be hot-disabled via config
+  2. ``applies_to``  — per-strategy predicate over the event payload
+  3. ``Counter``     — N-of-M rate/threshold gate against
+                       :class:`CounterStore`
+
+:meth:`dispatch` is the read-write entry point — passing the counter
+gate increments the counter and returns ``(meta, run_id)`` pairs to
+enqueue. :meth:`inspect` is its dry-run twin — same gates, no counter
+mutation; returns one :class:`StrategyRouteInfo` per matched strategy
+including a snapshot of the counter so debug callers can see why a
+strategy will or won't fire.
+
+By design ``inspect`` does not accept ``force_enabled`` /
+``strategy_filter``: those are runtime overrides for the routing side
+(``trigger_manual``), not properties a debugger should second-guess.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from uuid import uuid4
+
+from everos.core.observability.logging import get_logger
+from everos.infra.ome._dispatch.registry import StrategyRegistry
+from everos.infra.ome._stores.counter import CounterStore
+from everos.infra.ome.decorator import StrategyMeta
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.records import CounterProgress, StrategyRouteInfo
+
+logger = get_logger(__name__)
+
+
+class EventDispatcher:
+    """Apply ``enabled / applies_to / Counter`` gates to one event."""
+
+    def __init__(
+        self,
+        *,
+        registry: StrategyRegistry,
+        counter_store: CounterStore,
+    ) -> None:
+        self._registry = registry
+        self._counter_store = counter_store
+
+    async def dispatch(
+        self,
+        event: BaseEvent,
+        *,
+        force_enabled: bool = False,
+        strategy_filter: str | None = None,
+    ) -> list[tuple[StrategyMeta, str]]:
+        """Run gates and return ``(meta, run_id)`` pairs to enqueue.
+
+        Args:
+            event: The event to route.
+            force_enabled: Bypass the ``meta.enabled`` gate. ``applies_to``
+                and the counter still apply. Used by manual triggers
+                with ``force=True``.
+            strategy_filter: Restrict to one strategy name regardless of
+                whether it subscribes to ``type(event)``. Manual triggers
+                use this when targeting a strategy with a caller-supplied
+                event. Raises ``KeyError`` if the name is not registered.
+
+        ``applies_to`` callables raised by a single strategy are caught,
+        logged, and treated as ``False`` for that strategy alone — sibling
+        strategies still dispatch. Framework errors (e.g. CounterStore
+        I/O) propagate.
+        """
+        if strategy_filter is not None:
+            metas: list[StrategyMeta] = [self._registry.get(strategy_filter)]
+        else:
+            metas = list(self._registry.lookup_by_event(type(event)))
+        out: list[tuple[StrategyMeta, str]] = []
+        for meta in metas:
+            if not _routes_to(event, meta):
+                continue
+            if not force_enabled and not meta.enabled:
+                continue
+            if not _safe_applies(meta, event):
+                continue
+            if meta.gate is not None:
+                bucket = _bucket_key(event, meta.gate.event_field)
+                passed, _ = await self._counter_store.incr_and_check(
+                    meta.name,
+                    bucket,
+                    threshold=meta.gate.threshold,
+                    cooldown_seconds=meta.gate.cooldown_seconds,
+                )
+                if not passed:
+                    continue
+            out.append((meta, uuid4().hex))
+        return out
+
+    async def inspect(self, event: BaseEvent) -> list[StrategyRouteInfo]:
+        """Dry-run twin of :meth:`dispatch` — no counter mutation.
+
+        Returns one :class:`StrategyRouteInfo` per matched strategy with
+        per-gate pass flags and a counter snapshot (read-only via
+        ``get_progress``). Same exception policy as :meth:`dispatch`:
+        a strategy's faulty ``applies_to`` callable is logged and that
+        strategy reports ``applies_to_pass=False`` rather than tanking
+        the whole inspection.
+        """
+        out: list[StrategyRouteInfo] = []
+        for meta in self._registry.lookup_by_event(type(event)):
+            if not _routes_to(event, meta):
+                continue
+            enabled_pass = bool(meta.enabled)
+            applies_pass = enabled_pass and _safe_applies(meta, event)
+            counter_pass = applies_pass and (meta.gate is None)
+            progress: CounterProgress | None = None
+            if applies_pass and meta.gate is not None:
+                bucket = _bucket_key(event, meta.gate.event_field)
+                cur = await self._counter_store.get_progress(
+                    meta.name,
+                    bucket,
+                )
+                next_cur = cur + 1
+                progress = CounterProgress(
+                    current=next_cur, threshold=meta.gate.threshold
+                )
+                counter_pass = next_cur >= meta.gate.threshold
+            out.append(
+                StrategyRouteInfo(
+                    strategy_name=meta.name,
+                    enabled_pass=enabled_pass,
+                    applies_to_pass=applies_pass,
+                    counter_pass=counter_pass,
+                    counter_progress=progress,
+                )
+            )
+        return out
+
+
+def _routes_to(event: BaseEvent, meta: StrategyMeta) -> bool:
+    """Narrow engine-emitted ticks to their single target strategy.
+
+    Cron / Idle / Manual ticks carry a ``strategy_name`` naming the
+    intended recipient — without this filter two strategies listening
+    on the same tick class would cross-fire. Business events have no
+    such field and therefore fan out to every matching strategy.
+    """
+    target = getattr(event, "strategy_name", None)
+    return target is None or target == meta.name
+
+
+def _safe_applies(meta: StrategyMeta, event: BaseEvent) -> bool:
+    """Evaluate ``meta.applies_to`` with user-callable exceptions isolated.
+
+    A faulty ``applies_to`` callable is logged at exception level with
+    ``strategy_name`` + ``event_topic`` context and treated as
+    ``False`` so that a single buggy predicate cannot tank the entire
+    fan-out for an event.
+    """
+    try:
+        return _applies(meta.applies_to, event)
+    except Exception:  # noqa: BLE001
+        logger.exception(
+            "applies_to_callable_raised",
+            strategy_name=meta.name,
+            event_topic=type(event).topic(),
+        )
+        return False
+
+
+def _applies(
+    spec: str | Callable[[BaseEvent], bool] | None,
+    event: BaseEvent,
+) -> bool:
+    """Resolve ``applies_to`` semantics.
+
+      * ``None`` — strategy applies to every event in its subscription
+      * callable — invoke and bool-cast the result
+      * str — read the named event attribute and bool-cast it; falsy
+        values (``""``, ``0``, ``None``, empty containers) are treated
+        as "field unset", so the strategy does NOT apply
+
+    Exceptions raised by a user callable propagate; the dispatcher wraps
+    this call in :func:`_safe_applies` to localise blast radius.
+    """
+    if spec is None:
+        return True
+    if callable(spec):
+        return bool(spec(event))
+    return bool(getattr(event, spec, None))
+
+
+def _bucket_key(event: BaseEvent, field: str | None) -> str:
+    """Compute a Counter-store bucket key from an event field.
+
+    ``field=None`` means the gate is un-bucketed → single shared bucket
+    ``"__all__"``. Missing or ``None`` field values map to ``"__none__"``
+    so a typo doesn't accidentally collapse every event into ``"__all__"``
+    (the StrategyRegistry validator catches typos at startup; the sentinel
+    here is the runtime safety net).
+    """
+    if field is None:
+        return "__all__"
+    val = getattr(event, field, None)
+    return str(val) if val is not None else "__none__"
--- a/src/everos/infra/ome/_dispatch/registry.py
+++ b/src/everos/infra/ome/_dispatch/registry.py
@ -0,0 +1,152 @@
+"""StrategyRegistry — registration + DAG cycle detection.
+
+Mutated at startup via :meth:`register` / :meth:`validate`, and at
+runtime via :meth:`replace` (config hot-reload). Cycle detection is a
+Kahn-style topological pass on the event-flow DAG implied by
+``trigger.on`` (incoming) and ``emits`` (outgoing).
+"""
+
+from __future__ import annotations
+
+from collections import defaultdict, deque
+from collections.abc import Callable
+from typing import Any
+
+from everos.infra.ome.decorator import StrategyMeta
+from everos.infra.ome.events import BaseEvent, CronTick, IdleTick
+from everos.infra.ome.exceptions import StartupValidationError
+from everos.infra.ome.triggers import Cron, Idle, Immediate, Trigger
+
+
+class StrategyRegistry:
+    """Startup-time registry for offline strategies with cycle detection."""
+
+    def __init__(self) -> None:
+        self._strategies: dict[str, StrategyMeta] = {}
+
+    def register(self, func: Callable[..., Any]) -> None:
+        """Register a strategy function (reads ``_ome_strategy_meta``).
+
+        Raises ``StartupValidationError`` if ``func`` is not decorated
+        with ``@offline_strategy`` or if its name is already registered.
+        """
+        meta = getattr(func, "_ome_strategy_meta", None)
+        if not isinstance(meta, StrategyMeta):
+            fn_name = getattr(func, "__name__", repr(func))
+            raise StartupValidationError(
+                f"register: {fn_name} is not decorated with @offline_strategy"
+            )
+        if meta.name in self._strategies:
+            raise StartupValidationError(
+                f"register: duplicate strategy name {meta.name!r}"
+            )
+        self._strategies[meta.name] = meta
+
+    def replace(self, name: str, new_meta: StrategyMeta) -> None:
+        """Swap an already-registered strategy's meta in place (hot-reload entry).
+
+        Cycle / gate validation is **not** re-run; callers (currently
+        :func:`apply_overrides`) must only feed metas where the
+        DAG-shaping fields (``trigger.on``, ``emits``, trigger type)
+        match the original. Raises ``KeyError`` if ``name`` is not yet
+        registered.
+        """
+        if name not in self._strategies:
+            raise KeyError(name)
+        self._strategies[name] = new_meta
+
+    def get(self, name: str) -> StrategyMeta:
+        """Return meta by name (raises ``KeyError`` if absent)."""
+        return self._strategies[name]
+
+    def all(self) -> list[StrategyMeta]:
+        """Return a snapshot list of every registered strategy."""
+        return list(self._strategies.values())
+
+    def lookup_by_event(self, event_cls: type[BaseEvent]) -> list[StrategyMeta]:
+        """Return strategies that may receive an event of ``event_cls``.
+
+        Resolution:
+          * ``Immediate`` strategy listening on the class → match
+          * ``CronTick``  → all Cron strategies (narrowed later by name)
+          * ``IdleTick``  → all Idle strategies (narrowed later by name)
+
+        Engine-emitted ticks carry a ``strategy_name`` field; dispatcher
+        narrows the returned set to the single target via ``_routes_to``.
+        """
+        out: list[StrategyMeta] = []
+        for m in self._strategies.values():
+            if (
+                (isinstance(m.trigger, Immediate) and event_cls in m.trigger.on)
+                or (isinstance(m.trigger, Cron) and event_cls is CronTick)
+                or (isinstance(m.trigger, Idle) and event_cls is IdleTick)
+            ):
+                out.append(m)
+        return out
+
+    def validate(self) -> None:
+        """Validate the strategy DAG for cycles and gate field existence."""
+        self._validate_no_cycles()
+        self._validate_gate_event_fields()
+
+    def _validate_no_cycles(self) -> None:
+        """Kahn topological sort over the event-flow DAG.
+
+        Edge ``s_a → s_b`` exists iff ``s_a.emits`` intersects
+        ``s_b.trigger.on``.
+        """
+        adj: dict[str, set[str]] = defaultdict(set)
+        indeg: dict[str, int] = dict.fromkeys(self._strategies, 0)
+
+        for src in self._strategies.values():
+            for ev in src.emits:
+                for dst in self._strategies.values():
+                    if (
+                        isinstance(dst.trigger, Immediate)
+                        and ev in dst.trigger.on
+                        and dst.name not in adj[src.name]
+                    ):
+                        adj[src.name].add(dst.name)
+                        indeg[dst.name] += 1
+
+        queue = deque(n for n, d in indeg.items() if d == 0)
+        visited = 0
+        while queue:
+            n = queue.popleft()
+            visited += 1
+            for nbr in adj[n]:
+                indeg[nbr] -= 1
+                if indeg[nbr] == 0:
+                    queue.append(nbr)
+
+        if visited < len(self._strategies):
+            raise StartupValidationError("cycle detected in strategy DAG")
+
+    def _validate_gate_event_fields(self) -> None:
+        """Reject any ``gate.event_field`` missing from a receivable event class.
+
+        Without this check a typo silently collapses every event into one
+        shared bucket and the rate gate stops segmenting.
+        """
+        for meta in self._strategies.values():
+            if meta.gate is None or meta.gate.event_field is None:
+                continue
+            field = meta.gate.event_field
+            for ev_cls in _event_classes_for_trigger(meta.trigger):
+                if field not in ev_cls.model_fields:  # type: ignore[operator]  # Pydantic model_fields → dict via @deprecated_instance_property (pydantic/main.py:277)
+                    raise StartupValidationError(
+                        f"strategy {meta.name!r}: gate.event_field {field!r} "
+                        f"not found in {ev_cls.__name__} fields "
+                        f"(available: {list(ev_cls.model_fields)})"  # type: ignore[arg-type]  # same as above
+                    )
+
+
+def _event_classes_for_trigger(trigger: Trigger) -> list[type[BaseEvent]]:
+    """Enumerate event classes a strategy with the given trigger receives."""
+    if isinstance(trigger, Immediate):
+        return list(trigger.on)
+    if isinstance(trigger, Cron):
+        return [CronTick]
+    if isinstance(trigger, Idle):
+        return [IdleTick]
+    raise NotImplementedError(f"unknown trigger type: {type(trigger).__name__}")
--- a/src/everos/infra/ome/_dispatch/runner.py
+++ b/src/everos/infra/ome/_dispatch/runner.py
@ -0,0 +1,247 @@
+"""Runner — single-strategy execution with attempt-level retry + DLQ.
+
+Acquires ``engine_sem`` (FIFO), drives the per-attempt RunRecord state
+machine (``RUNNING → SUCCESS / FAILED / DEAD_LETTER``), and fires
+``on_dead_letter`` after exhausted retries or contract violations.
+
+Per attempt, binds ``strategy_name`` / ``run_id`` / ``attempt`` into
+``structlog.contextvars`` (so every log record carries those fields
+automatically) and sets ``_CURRENT_STRATEGY`` ContextVar around
+``meta.func`` (so ``engine.emit`` can refuse direct calls from inside
+a strategy — strategies emit via ``ctx.emit``).
+
+**Idempotency contract**: if ``mark_success`` / ``mark_failed`` /
+``mark_dead_letter`` fails after the strategy body returned, the
+``RUNNING`` row stays and crash recovery on next start will treat the
+run as crashed and re-enqueue the same event. Strategy bodies must
+therefore be safe to re-execute with the same payload.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import traceback
+from collections.abc import Awaitable, Callable
+from uuid import uuid4
+
+from structlog.contextvars import bound_contextvars
+
+from everos.component.utils.datetime import get_utc_now
+from everos.core.observability.logging import get_logger
+from everos.infra.ome._dispatch._state import _CURRENT_STRATEGY
+from everos.infra.ome._stores.run_record import RunRecordStore
+from everos.infra.ome.decorator import StrategyMeta
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.exceptions import EmitNotDeclaredError, StrategyContractError
+from everos.infra.ome.records import RunRecord
+
+logger = get_logger(__name__)
+
+
+class _RunCtx:
+    """Per-invocation context handed to ``meta.func(event, ctx)``.
+
+    Carries ``run_id``, a strategy-scoped logger, and the ``emit``
+    callback that enforces the declared ``emits=[...]`` contract.
+    """
+
+    def __init__(
+        self,
+        *,
+        run_id: str,
+        strategy_name: str,
+        emit_hook: Callable[[BaseEvent], Awaitable[None]],
+        declared_emits: frozenset[type[BaseEvent]],
+    ) -> None:
+        self.run_id = run_id
+        self.logger = get_logger("ome.strategy")
+        self._emit_hook = emit_hook
+        self._declared = declared_emits
+        self._strategy_name = strategy_name
+
+    async def emit(self, event: BaseEvent) -> None:
+        if type(event) not in self._declared:
+            raise EmitNotDeclaredError(
+                strategy=self._strategy_name,
+                event=event,
+            )
+        await self._emit_hook(event)
+
+
+class Runner:
+    """Drive one strategy invocation through retries to a terminal state."""
+
+    def __init__(
+        self,
+        *,
+        run_record_store: RunRecordStore,
+        engine_sem: asyncio.Semaphore,
+        emit_hook: Callable[[BaseEvent], Awaitable[None]],
+        on_dead_letter: Callable[[RunRecord], None] | None = None,
+    ) -> None:
+        self._rec = run_record_store
+        self._sem = engine_sem
+        self._emit_hook = emit_hook
+        self._on_dead_letter = on_dead_letter
+
+    async def run(
+        self,
+        meta: StrategyMeta,
+        event: BaseEvent,
+        *,
+        run_id: str,
+        max_retries_snapshot: int,
+    ) -> None:
+        """Execute ``meta.func(event, ctx)`` with the attempt retry loop.
+
+        Holds ``engine_sem`` for the full retry chain so concurrency cap
+        applies end-to-end. Each attempt gets a fresh ``run_id`` after
+        the first, so the run history records every try.
+        """
+        if max_retries_snapshot < 0:
+            raise ValueError(
+                f"max_retries_snapshot must be >= 0, got {max_retries_snapshot}"
+            )
+
+        async with self._sem:
+            event_topic = type(event).topic()
+            event_payload = event.model_dump_json()
+            current_run_id = run_id
+
+            for attempt in range(max_retries_snapshot + 1):
+                if attempt > 0:
+                    current_run_id = uuid4().hex
+                terminated = await self._run_one_attempt(
+                    meta=meta,
+                    event=event,
+                    current_run_id=current_run_id,
+                    attempt=attempt,
+                    event_topic=event_topic,
+                    event_payload=event_payload,
+                    max_retries_snapshot=max_retries_snapshot,
+                )
+                if terminated:
+                    return
+
+    async def _run_one_attempt(
+        self,
+        *,
+        meta: StrategyMeta,
+        event: BaseEvent,
+        current_run_id: str,
+        attempt: int,
+        event_topic: str,
+        event_payload: str,
+        max_retries_snapshot: int,
+    ) -> bool:
+        """Run one attempt; return ``True`` if a terminal state was
+        written (success / dead-letter or persistence failure), ``False``
+        if FAILED and the caller should loop into the next attempt.
+        """
+        ctx = _RunCtx(
+            run_id=current_run_id,
+            strategy_name=meta.name,
+            emit_hook=self._emit_hook,
+            declared_emits=meta.emits,
+        )
+        with bound_contextvars(  # type: ignore[arg-type]  # structlog typed as Generator; @contextmanager wraps at runtime (structlog/contextvars.py:170)
+            strategy_name=meta.name,
+            run_id=current_run_id,
+            attempt=attempt,
+        ):
+            if not await self._record_start(
+                run_id=current_run_id,
+                strategy_name=meta.name,
+                attempt=attempt,
+                event_topic=event_topic,
+                event_payload=event_payload,
+                max_retries_snapshot=max_retries_snapshot,
+            ):
+                return True  # mark_running failed; abort run, no DB row exists
+            try:
+                token = _CURRENT_STRATEGY.set(meta)
+                try:
+                    await meta.func(event, ctx)
+                finally:
+                    _CURRENT_STRATEGY.reset(token)
+            except StrategyContractError as e:
+                await self._terminate_dead_letter(current_run_id, _format_error(e))
+                return True
+            except Exception as e:  # noqa: BLE001
+                err = _format_error(e)
+                if attempt < max_retries_snapshot:
+                    await self._rec.mark_failed(
+                        run_id=current_run_id,
+                        finished_at=get_utc_now(),
+                        error=err,
+                    )
+                    return False  # caller will retry
+                await self._terminate_dead_letter(current_run_id, err)
+                return True
+            else:
+                await self._rec.mark_success(
+                    run_id=current_run_id,
+                    finished_at=get_utc_now(),
+                )
+                return True
+
+    async def _record_start(
+        self,
+        *,
+        run_id: str,
+        strategy_name: str,
+        attempt: int,
+        event_topic: str,
+        event_payload: str,
+        max_retries_snapshot: int,
+    ) -> bool:
+        """Persist this attempt as RUNNING; return ``False`` on write failure.
+
+        When the write fails (DB lock, disk full, ...) the caller
+        aborts the retry loop — without a RUNNING row crash recovery
+        cannot rediscover the run, and it is silently lost. The
+        exception log emitted here is the only audit trail.
+        """
+        try:
+            await self._rec.mark_running(
+                run_id=run_id,
+                strategy_name=strategy_name,
+                attempt=attempt,
+                event_topic=event_topic,
+                event_payload=event_payload,
+                max_retries_snapshot=max_retries_snapshot,
+            )
+        except Exception:  # noqa: BLE001
+            logger.exception(
+                "mark_running_failed",
+                run_id=run_id,
+                strategy_name=strategy_name,
+                attempt=attempt,
+            )
+            return False
+        return True
+
+    async def _terminate_dead_letter(self, run_id: str, error: str) -> None:
+        """Mark DEAD_LETTER and fire ``on_dead_letter`` callback if set."""
+        await self._rec.mark_dead_letter(
+            run_id=run_id,
+            finished_at=get_utc_now(),
+            error=error,
+        )
+        await self._fire_dead_letter_callback(run_id)
+
+    async def _fire_dead_letter_callback(self, run_id: str) -> None:
+        if self._on_dead_letter is None:
+            return
+        rec = await self._rec.get(run_id)
+        if rec is None:
+            return
+        try:
+            self._on_dead_letter(rec)
+        except Exception:  # noqa: BLE001
+            logger.exception("on_dead_letter_failed")
+
+
+def _format_error(e: BaseException) -> str:
+    """Format an exception with type, message, and full traceback."""
+    return f"{type(e).__name__}: {e}\n{traceback.format_exc()}"
--- a/src/everos/infra/ome/_stores/init.py
+++ b/src/everos/infra/ome/_stores/init.py
@ -0,0 +1 @@
+"""Internal: SQLite-backed state stores (counter / idle / run_record)."""
--- a/src/everos/infra/ome/_stores/counter.py
+++ b/src/everos/infra/ome/_stores/counter.py
@ -0,0 +1,107 @@
+"""CounterStore — persistent (strategy_name, bucket_key) → counter rows.
+
+Backs the ``Counter`` gate in OME's dispatch pipeline: each call to
+:meth:`CounterStore.incr_and_check` atomically increments the bucket's
+counter and reports whether the strategy should fire this time.
+
+Pass semantics:
+  - ``counter >= threshold`` AND cooldown elapsed → ``passed=True``
+  - On pass, the row's counter resets to 0 and ``last_passed_ts``
+    advances to ``now``; the next pass needs a fresh accumulation.
+  - ``cooldown_seconds=0`` disables the cooldown gate (threshold alone).
+"""
+
+from __future__ import annotations
+
+from datetime import timedelta
+
+from everos.component.utils.datetime import (
+    from_iso_format,
+    get_utc_now,
+    to_iso_format,
+)
+from everos.infra.ome._stores.storage import OMEStorage
+
+
+class CounterStore:
+    """SQLite-backed counter for the ``Counter`` gate (see module docstring)."""
+
+    def __init__(self, storage: OMEStorage) -> None:
+        self._storage = storage
+
+    async def incr_and_check(
+        self,
+        strategy_name: str,
+        bucket_key: str,
+        *,
+        threshold: int,
+        cooldown_seconds: int,
+    ) -> tuple[bool, int]:
+        """Increment ``(strategy_name, bucket_key)``'s counter atomically.
+
+        Args:
+            strategy_name: Strategy whose counter to update.
+            bucket_key: The bucket value derived from the event field
+                (or ``"__all__"`` when the gate is unbucketed).
+            threshold: Pass once the counter reaches this value
+                (``>=``).
+            cooldown_seconds: Minimum seconds since the last pass for
+                the strategy/bucket; ``0`` disables the cooldown check.
+
+        Returns:
+            ``(passed, counter)``. ``counter`` is the counter value at
+            the moment of the check (i.e. pre-reset on pass). Useful for
+            diagnostics — ``threshold`` is *not* substituted, so callers
+            observing ``counter > threshold`` learn the gate is
+            over-armed (e.g. threshold was lowered via hot reload while
+            the counter had already accumulated past the new value).
+        """
+        now = get_utc_now()
+        async with self._storage.transaction() as conn:
+            cur = await conn.execute(
+                "SELECT counter, last_passed_ts FROM counter_store "
+                "WHERE strategy_name = ? AND bucket_key = ?",
+                (strategy_name, bucket_key),
+            )
+            row = await cur.fetchone()
+            counter = (row[0] if row else 0) + 1
+            last_passed = from_iso_format(row[1]) if row and row[1] else None
+
+            cooldown_ok = (
+                cooldown_seconds == 0
+                or last_passed is None
+                or now - last_passed >= timedelta(seconds=cooldown_seconds)
+            )
+            passed = counter >= threshold and cooldown_ok
+
+            new_counter = 0 if passed else counter
+            new_last_passed_ts = (
+                to_iso_format(now)
+                if passed
+                else (to_iso_format(last_passed) if last_passed else None)
+            )
+            await conn.execute(
+                "INSERT INTO counter_store (strategy_name, bucket_key, "
+                "counter, last_passed_ts) "
+                "VALUES (?, ?, ?, ?) "
+                "ON CONFLICT(strategy_name, bucket_key) DO UPDATE SET "
+                "counter = excluded.counter, "
+                "last_passed_ts = excluded.last_passed_ts",
+                (strategy_name, bucket_key, new_counter, new_last_passed_ts),
+            )
+            return passed, counter
+
+    async def get_progress(self, strategy_name: str, bucket_key: str) -> int:
+        """Return the counter value persisted for this bucket (0 if absent).
+
+        Read-only; does not increment. Used by dispatcher inspect-mode
+        to report progress without mutating state.
+        """
+        async with self._storage.connect() as conn:
+            cur = await conn.execute(
+                "SELECT counter FROM counter_store "
+                "WHERE strategy_name = ? AND bucket_key = ?",
+                (strategy_name, bucket_key),
+            )
+            row = await cur.fetchone()
+            return row[0] if row else 0
--- a/src/everos/infra/ome/_stores/idle.py
+++ b/src/everos/infra/ome/_stores/idle.py
@ -0,0 +1,64 @@
+"""IdleStore — last_activity_ts rows backing the Idle trigger.
+
+All writes pass through ``to_iso_format`` over a tz-aware datetime, so
+``last_activity_ts`` is a fixed-format ISO 8601 string whose
+lexicographic order matches temporal order — :meth:`scan_idle` relies
+on this to keep the column un-wrapped in its predicate so SQLite can
+use ``idx_idle_scan``.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timedelta
+
+from everos.component.utils.datetime import from_iso_format, to_iso_format
+from everos.infra.ome._stores.storage import OMEStorage
+
+
+class IdleStore:
+    """SQLite-backed last-activity tracker for the ``Idle`` trigger."""
+
+    def __init__(self, storage: OMEStorage) -> None:
+        self._storage = storage
+
+    async def touch(self, strategy_name: str, bucket_key: str, *, at: datetime) -> None:
+        """UPSERT ``last_activity_ts = at`` for ``(strategy_name, bucket_key)``."""
+        async with self._storage.connect() as conn:
+            await conn.execute(
+                "INSERT INTO idle_store "
+                "(strategy_name, bucket_key, last_activity_ts) "
+                "VALUES (?, ?, ?) "
+                "ON CONFLICT(strategy_name, bucket_key) DO UPDATE SET "
+                "last_activity_ts = excluded.last_activity_ts",
+                (strategy_name, bucket_key, to_iso_format(at)),
+            )
+            await conn.commit()
+
+    async def scan_idle(
+        self, strategy_name: str, *, idle_seconds: int, now: datetime
+    ) -> list[str]:
+        """Return bucket_keys with ``last_activity_ts`` older than ``idle_seconds``."""
+        # Cutoff on the RHS so the indexed column stays un-wrapped.
+        cutoff = to_iso_format(now - timedelta(seconds=idle_seconds))
+        async with self._storage.connect() as conn:
+            cur = await conn.execute(
+                "SELECT bucket_key FROM idle_store "
+                "WHERE strategy_name = ? AND last_activity_ts <= ? "
+                "ORDER BY last_activity_ts ASC",
+                (strategy_name, cutoff),
+            )
+            rows = await cur.fetchall()
+        return [r[0] for r in rows]
+
+    async def get_last_activity(
+        self, strategy_name: str, bucket_key: str
+    ) -> datetime | None:
+        """Return the stored ``last_activity_ts`` (``None`` if never touched)."""
+        async with self._storage.connect() as conn:
+            cur = await conn.execute(
+                "SELECT last_activity_ts FROM idle_store "
+                "WHERE strategy_name = ? AND bucket_key = ?",
+                (strategy_name, bucket_key),
+            )
+            row = await cur.fetchone()
+        return from_iso_format(row[0]) if row else None
--- a/src/everos/infra/ome/_stores/run_record.py
+++ b/src/everos/infra/ome/_stores/run_record.py
@ -0,0 +1,168 @@
+"""RunRecord persistence — state machine writes + same-transaction ring-buffer trim.
+
+State machine (one row per ``run_id``):
+    RUNNING  →  SUCCESS / FAILED / DEAD_LETTER / CRASHED
+
+Every :meth:`RunRecordStore.mark_running` INSERT runs inside one
+``BEGIN IMMEDIATE`` transaction with a paired DELETE that keeps only
+the newest ``max_records_per_strategy`` rows for that strategy. Bound
+is enforced atomically — no background sweeper, no transient
+over-budget state.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any
+
+from everos.component.utils.datetime import (
+    from_iso_format,
+    get_utc_now,
+    to_iso_format,
+)
+from everos.infra.ome._stores.storage import OMEStorage
+from everos.infra.ome.records import RunRecord, RunStatus
+
+
+class RunRecordStore:
+    """SQLite-backed persistence for ``RunRecord`` (see module docstring)."""
+
+    def __init__(self, storage: OMEStorage, max_records_per_strategy: int) -> None:
+        self._storage = storage
+        self._max = max_records_per_strategy
+
+    async def mark_running(
+        self,
+        *,
+        run_id: str,
+        strategy_name: str,
+        attempt: int,
+        event_topic: str,
+        event_payload: str,
+        max_retries_snapshot: int,
+    ) -> None:
+        """Insert a new RUNNING row and trim the strategy's ring buffer atomically."""
+        async with self._storage.transaction() as conn:
+            await conn.execute(
+                "INSERT INTO run_record "
+                "(run_id, strategy_name, status, attempt, started_at, "
+                " event_topic, event_payload, max_retries_snapshot) "
+                "VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+                (
+                    run_id,
+                    strategy_name,
+                    RunStatus.RUNNING.value,
+                    attempt,
+                    to_iso_format(get_utc_now()),
+                    event_topic,
+                    event_payload,
+                    max_retries_snapshot,
+                ),
+            )
+            await conn.execute(
+                "DELETE FROM run_record "
+                "WHERE strategy_name = ? AND run_id NOT IN ("
+                "  SELECT run_id FROM run_record WHERE strategy_name = ? "
+                "  ORDER BY started_at DESC LIMIT ?)",
+                (strategy_name, strategy_name, self._max),
+            )
+
+    async def mark_success(self, *, run_id: str, finished_at: datetime) -> None:
+        """Mark RUNNING → SUCCESS."""
+        await self._update_status(run_id, RunStatus.SUCCESS, finished_at, None)
+
+    async def mark_failed(
+        self, *, run_id: str, finished_at: datetime, error: str
+    ) -> None:
+        """Mark RUNNING → FAILED (retry pending)."""
+        await self._update_status(run_id, RunStatus.FAILED, finished_at, error)
+
+    async def mark_dead_letter(
+        self, *, run_id: str, finished_at: datetime, error: str
+    ) -> None:
+        """Mark RUNNING → DEAD_LETTER (retries exhausted or non-retryable)."""
+        await self._update_status(run_id, RunStatus.DEAD_LETTER, finished_at, error)
+
+    async def mark_crashed(
+        self, *, run_id: str, finished_at: datetime, error: str
+    ) -> None:
+        """Mark RUNNING → CRASHED (called by crash-recovery sweep)."""
+        await self._update_status(run_id, RunStatus.CRASHED, finished_at, error)
+
+    async def _update_status(
+        self,
+        run_id: str,
+        status: RunStatus,
+        finished_at: datetime,
+        error: str | None,
+    ) -> None:
+        async with self._storage.connect() as conn:
+            await conn.execute(
+                "UPDATE run_record "
+                "SET status = ?, finished_at = ?, error = ? "
+                "WHERE run_id = ?",
+                (status.value, to_iso_format(finished_at), error, run_id),
+            )
+            await conn.commit()
+
+    async def get(self, run_id: str) -> RunRecord | None:
+        """Return the record for ``run_id`` (``None`` if absent)."""
+        async with self._storage.connect() as conn:
+            cur = await conn.execute(
+                _SELECT_COLUMNS + " WHERE run_id = ?",
+                (run_id,),
+            )
+            row = await cur.fetchone()
+        return _row_to_record(row) if row else None
+
+    async def list_runs(
+        self,
+        *,
+        strategy_name: str,
+        status: RunStatus | None = None,
+        limit: int = 100,
+    ) -> list[RunRecord]:
+        """Return ``strategy_name``'s records, newest first; optional status filter."""
+        sql = _SELECT_COLUMNS + " WHERE strategy_name = ?"
+        args: list[Any] = [strategy_name]
+        if status is not None:
+            sql += " AND status = ?"
+            args.append(status.value)
+        sql += " ORDER BY started_at DESC LIMIT ?"
+        args.append(limit)
+        async with self._storage.connect() as conn:
+            cur = await conn.execute(sql, args)
+            rows = await cur.fetchall()
+        return [_row_to_record(r) for r in rows]
+
+    async def find_running(self) -> list[RunRecord]:
+        """Return every row still in RUNNING — used by crash recovery at start()."""
+        async with self._storage.connect() as conn:
+            cur = await conn.execute(
+                _SELECT_COLUMNS + " WHERE status = ?",
+                (RunStatus.RUNNING.value,),
+            )
+            rows = await cur.fetchall()
+        return [_row_to_record(r) for r in rows]
+
+
+_SELECT_COLUMNS = (
+    "SELECT run_id, strategy_name, status, attempt, started_at, finished_at, "
+    "       error, event_topic, event_payload, max_retries_snapshot "
+    "FROM run_record"
+)
+
+
+def _row_to_record(row: tuple) -> RunRecord:
+    return RunRecord(
+        run_id=row[0],
+        strategy_name=row[1],
+        status=RunStatus(row[2]),
+        attempt=row[3],
+        started_at=from_iso_format(row[4]),
+        finished_at=from_iso_format(row[5]) if row[5] else None,
+        error=row[6],
+        event_topic=row[7],
+        event_payload=row[8],
+        max_retries_snapshot=row[9],
+    )
--- a/src/everos/infra/ome/_stores/storage.py
+++ b/src/everos/infra/ome/_stores/storage.py
@ -0,0 +1,115 @@
+"""OME SQLite storage — schema initialization + connection factory.
+
+Single file (default ``MemoryRoot.default().ome_db`` ≡
+``<memory-root>/.index/sqlite/ome.db``). Holds 3 OME-managed tables
+(counter_store / idle_store / run_record); APS jobstore table is created
+by APScheduler itself when its SQLAlchemyJobStore connects.
+
+PRAGMA scopes (see https://www.sqlite.org/pragma.html):
+  - ``journal_mode=WAL`` is file-level — persisted in the db header,
+    applied once in :meth:`OMEStorage.init`.
+  - ``synchronous=NORMAL``, ``cache_size=-65536``, ``busy_timeout=5000``
+    are connection-level and reset on every new connection, so they are
+    re-applied inside :meth:`OMEStorage.connect` (which is why
+    ``connect`` is an ``@asynccontextmanager`` rather than a passthrough).
+    This mirrors SQLAlchemy's canonical ``@event.listens_for(Engine,
+    "connect")`` pattern for SQLite — aiosqlite exposes no equivalent
+    hook. ``busy_timeout=5000`` matters because the APS jobstore writes
+    its own table in the same db file; without it, WAL writer-vs-writer
+    contention surfaces as ``SQLITE_BUSY`` instead of brief backoff.
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from pathlib import Path
+
+import aiosqlite
+
+_SCHEMA = """
+CREATE TABLE IF NOT EXISTS counter_store (
+    strategy_name   TEXT NOT NULL,
+    bucket_key      TEXT NOT NULL,
+    counter         INTEGER NOT NULL DEFAULT 0,
+    last_passed_ts  TIMESTAMP,
+    PRIMARY KEY (strategy_name, bucket_key)
+);
+
+CREATE TABLE IF NOT EXISTS idle_store (
+    strategy_name      TEXT NOT NULL,
+    bucket_key         TEXT NOT NULL,
+    last_activity_ts   TIMESTAMP NOT NULL,
+    PRIMARY KEY (strategy_name, bucket_key)
+);
+CREATE INDEX IF NOT EXISTS idx_idle_scan
+    ON idle_store (strategy_name, last_activity_ts);
+
+CREATE TABLE IF NOT EXISTS run_record (
+    run_id                          TEXT PRIMARY KEY,
+    strategy_name                   TEXT NOT NULL,
+    status                          TEXT NOT NULL,
+    attempt                         INTEGER NOT NULL DEFAULT 0,
+    started_at                      TIMESTAMP NOT NULL,
+    finished_at                     TIMESTAMP,
+    error                           TEXT,
+    event_topic                     TEXT NOT NULL,
+    event_payload                   TEXT NOT NULL,
+    max_retries_snapshot            INTEGER NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_run_strategy_started
+    ON run_record (strategy_name, started_at DESC);
+CREATE INDEX IF NOT EXISTS idx_run_status_started
+    ON run_record (status, started_at DESC);
+"""
+
+_INIT_PRAGMAS = ("PRAGMA journal_mode=WAL",)
+_CONN_PRAGMAS = (
+    "PRAGMA synchronous=NORMAL",
+    "PRAGMA cache_size=-65536",
+    "PRAGMA busy_timeout=5000",
+)
+
+
+class OMEStorage:
+    """Connection factory + schema init for the OME SQLite db."""
+
+    def __init__(self, db_path: Path) -> None:
+        self.db_path = db_path
+
+    async def init(self) -> None:
+        """Create parent dirs + apply file-level pragmas + create schema."""
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        async with aiosqlite.connect(self.db_path) as conn:
+            for pragma in _INIT_PRAGMAS:
+                await conn.execute(pragma)
+            await conn.executescript(_SCHEMA)
+            await conn.commit()
+
+    @asynccontextmanager
+    async def connect(self) -> AsyncIterator[aiosqlite.Connection]:
+        """Yield an aiosqlite connection with per-connection pragmas applied."""
+        async with aiosqlite.connect(self.db_path) as conn:
+            for pragma in _CONN_PRAGMAS:
+                await conn.execute(pragma)
+            yield conn
+
+    @asynccontextmanager
+    async def transaction(self) -> AsyncIterator[aiosqlite.Connection]:
+        """Yield a connection inside an ``IMMEDIATE`` transaction.
+
+        Commits on success, rolls back on any exception. Mirrors
+        SQLAlchemy's ``conn.begin()`` for raw aiosqlite, which exposes
+        no built-in transaction context manager. ``BEGIN IMMEDIATE``
+        (rather than ``DEFERRED``) acquires the write lock upfront so
+        a read-modify-write block cannot lose to a competing writer
+        between its SELECT and its UPDATE.
+        """
+        async with self.connect() as conn:
+            try:
+                await conn.execute("BEGIN IMMEDIATE")
+                yield conn
+                await conn.commit()
+            except Exception:
+                await conn.rollback()
+                raise
--- a/src/everos/infra/ome/config.py
+++ b/src/everos/infra/ome/config.py
@ -0,0 +1,157 @@
+"""OMEConfig (engine-level) + TomlRoot (per-strategy override schema).
+
+All models forbid extra keys so configuration typos surface at startup
+as StartupValidationError instead of being silently ignored.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated, Self
+
+from apscheduler.triggers.cron import CronTrigger
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+
+from everos.core.persistence.memory_root import MemoryRoot
+
+
+def _default_jobstore_path() -> Path:
+    return MemoryRoot.default().ome_db
+
+
+class CounterOverride(BaseModel):
+    """TOML override for a strategy's Counter gate (per-key None means keep)."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    threshold: Annotated[int, Field(gt=0)] | None = None
+    cooldown_seconds: Annotated[int, Field(ge=0)] | None = None
+    event_field: Annotated[str, Field(min_length=1)] | None = None
+
+
+class StrategyOverride(BaseModel):
+    """TOML override for one strategy's decorator parameters."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    enabled: bool | None = None
+    max_retries: Annotated[int, Field(ge=0)] | None = None
+    gate: CounterOverride | None = None
+    cron: str | None = None
+    idle_seconds: Annotated[int, Field(gt=0)] | None = None
+    scan_interval_seconds: Annotated[int, Field(gt=0)] | None = None
+
+    @field_validator("cron")
+    @classmethod
+    def _validate_crontab(cls, v: str | None) -> str | None:
+        if v is not None:
+            CronTrigger.from_crontab(v)
+        return v
+
+    @model_validator(mode="after")
+    def _check_idle_pair_consistency(self) -> Self:
+        # One-sided overrides are merged with existing meta downstream,
+        # so cross-check only when both fields are in this payload.
+        if (
+            self.idle_seconds is not None
+            and self.scan_interval_seconds is not None
+            and self.scan_interval_seconds > self.idle_seconds // 2
+        ):
+            raise ValueError(
+                "StrategyOverride: scan_interval_seconds "
+                f"({self.scan_interval_seconds}) must be <= idle_seconds // 2 "
+                f"({self.idle_seconds // 2})"
+            )
+        return self
+
+
+class TomlRoot(BaseModel):
+    """Top-level TOML schema for ome.toml."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    strategies: dict[str, StrategyOverride] = Field(default_factory=dict)
+
+
+class OMEConfig(BaseModel):
+    """Engine-level configuration consumed by OfflineEngine."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    jobstore_path: Path = Field(
+        default_factory=_default_jobstore_path,
+        description="SQLite DB path holding OME's own state (run records, "
+        "counter store, idle store). Defaults to "
+        "``MemoryRoot.default().ome_db`` (``<memory-root>/.index/sqlite/ome.db``).",
+    )
+    aps_jobstore_path: Path | None = Field(
+        default=None,
+        description="SQLite DB path holding the APScheduler jobstore. Kept "
+        "in a separate file from ``jobstore_path`` so APS's sync SQLAlchemy "
+        "writer never contends with OME's async aiosqlite writer for the "
+        "same SQLite file lock. When unset, defaults to a sibling "
+        "``<stem>.aps.db`` next to ``jobstore_path``.",
+    )
+    max_concurrent_runs: Annotated[
+        int,
+        Field(
+            gt=0,
+            description="Engine-wide cap on concurrent strategy invocations "
+            "(asyncio.Semaphore in Runner).",
+        ),
+    ] = 20
+    max_retries: Annotated[
+        int,
+        Field(
+            ge=0,
+            description="Default retry budget per run, overridable via "
+            "@offline_strategy(max_retries=...) or StrategyOverride.max_retries. "
+            "0 disables retries.",
+        ),
+    ] = 1
+    max_records_per_strategy: Annotated[
+        int,
+        Field(
+            gt=0,
+            description="Per-strategy RunRecord ring-buffer size; oldest "
+            "entries are pruned on insert.",
+        ),
+    ] = 1000
+    crash_recovery_timeout_seconds: Annotated[
+        int,
+        Field(
+            gt=0,
+            description="A run lingering in RUNNING longer than this is "
+            "treated as crashed, marked CRASHED, and re-enqueued with a "
+            "fresh run_id.",
+        ),
+    ] = 1800
+    config_path: Path | None = Field(
+        default=None,
+        description="Path to ome.toml for per-strategy overrides. None "
+        "disables TOML-driven hot reload.",
+    )
+    config_watch: bool = Field(
+        default=True,
+        description="When true and config_path is set, watch the file for "
+        "edits and apply overrides at runtime.",
+    )
+    config_watch_debounce_ms: Annotated[
+        int,
+        Field(
+            gt=0,
+            description="Debounce window collapsing bursts of filesystem "
+            "events (e.g. editor saves) into one reload.",
+        ),
+    ] = 1600
+
+    @model_validator(mode="after")
+    def _derive_aps_jobstore_path(self) -> Self:
+        # When unset, materialize as a sibling of jobstore_path so callers
+        # that pass only jobstore_path (e.g. tests using tmp_path) still get
+        # an isolated APS db rather than the global default root.
+        if self.aps_jobstore_path is None:
+            self.aps_jobstore_path = self.jobstore_path.with_name(
+                self.jobstore_path.stem + ".aps.db"
+            )
+        return self
--- a/src/everos/infra/ome/context.py
+++ b/src/everos/infra/ome/context.py
@ -0,0 +1,33 @@
+"""StrategyContext Protocol — injected as second arg to every strategy.
+
+Strategies access run-local state through `run_id` and `logger`, and
+chain-emit follow-up events via `emit(event)`. Business IO is NOT mediated
+by this Protocol — strategies directly import their persistence adapters
+(memory → infra is allowed under the project's DDD layering).
+"""
+
+from __future__ import annotations
+
+from typing import Protocol
+
+from structlog.types import FilteringBoundLogger
+
+from everos.infra.ome.events import BaseEvent
+
+
+class StrategyContext(Protocol):
+    """Per-run context handed to a strategy function.
+
+    - run_id: the current RunRecord id (string).
+    - logger: structlog logger; ``strategy_name`` / ``run_id`` /
+      ``attempt`` are auto-injected into every log record in this call
+      — strategies don't have to use this specific logger to get those
+      fields.
+    - emit(event): chain-emit a follow-up event (must be in decorator's
+      ``emits=[...]``, else EmitNotDeclaredError).
+    """
+
+    run_id: str
+    logger: FilteringBoundLogger
+
+    async def emit(self, event: BaseEvent) -> None: ...
--- a/src/everos/infra/ome/decorator.py
+++ b/src/everos/infra/ome/decorator.py
@ -0,0 +1,69 @@
+"""@offline_strategy decorator — attaches StrategyMeta to the function.
+
+Decorator is side-effect-free; engine collects via explicit
+`engine.register(func)`.
+"""
+
+from __future__ import annotations
+
+import inspect
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+
+from everos.infra.ome.context import StrategyContext
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.gates import Counter
+from everos.infra.ome.triggers import Trigger
+
+type AppliesTo = str | Callable[[BaseEvent], bool] | None
+type StrategyFn = Callable[[BaseEvent, StrategyContext], Awaitable[None]]
+
+
+@dataclass(frozen=True)
+class StrategyMeta:
+    """Captured at decoration time; consumed by engine.register()."""
+
+    name: str
+    trigger: Trigger
+    emits: frozenset[type[BaseEvent]]
+    applies_to: AppliesTo
+    gate: Counter | None
+    max_retries: int | None
+    enabled: bool
+    func: StrategyFn
+
+
+def offline_strategy(
+    *,
+    name: str,
+    trigger: Trigger,
+    emits: list[type[BaseEvent]],
+    applies_to: AppliesTo = None,
+    gate: Counter | None = None,
+    max_retries: int | None = None,
+    enabled: bool = True,
+) -> Callable[[StrategyFn], StrategyFn]:
+    """Mark an async function as an OME strategy."""
+
+    if not name or not name.strip():
+        raise ValueError("offline_strategy: name must be a non-empty string")
+
+    def wrap(func: StrategyFn) -> StrategyFn:
+        if not inspect.iscoroutinefunction(func):
+            raise TypeError(
+                f"offline_strategy: {func.__name__} must be async (coroutine function)"
+            )
+        meta = StrategyMeta(
+            name=name,
+            trigger=trigger,
+            emits=frozenset(emits),
+            applies_to=applies_to,
+            gate=gate,
+            max_retries=max_retries,
+            enabled=enabled,
+            func=func,
+        )
+        func._ome_strategy_meta = meta  # type: ignore[attr-defined]
+        return func
+
+    return wrap
--- a/src/everos/infra/ome/engine.py
+++ b/src/everos/infra/ome/engine.py
@ -0,0 +1,797 @@
+"""OfflineEngine — OME runtime and scheduler.
+
+Manages strategy registration, start-stop lifecycle, event dispatch, and
+scheduling of Cron and Idle triggers via APScheduler. Enforces single-engine
+guard via portalocker for concurrent access safety.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import functools
+import inspect
+from collections.abc import Callable
+from pathlib import Path
+from typing import Any
+from uuid import uuid4
+
+import portalocker
+from apscheduler.executors.asyncio import AsyncIOExecutor
+from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
+from apscheduler.schedulers.asyncio import AsyncIOScheduler
+from apscheduler.triggers.cron import CronTrigger
+from apscheduler.triggers.interval import IntervalTrigger
+
+from everos.component.utils.datetime import get_utc_now
+from everos.core.observability.logging import get_logger
+from everos.infra.ome._background.config_reloader import ConfigReloader
+from everos.infra.ome._background.crash_recovery import scan_and_resume
+from everos.infra.ome._background.idle_scanner import IdleScanner
+from everos.infra.ome._dispatch._state import _CURRENT_STRATEGY
+from everos.infra.ome._dispatch.dispatcher import EventDispatcher
+from everos.infra.ome._dispatch.registry import StrategyRegistry
+from everos.infra.ome._dispatch.runner import Runner
+from everos.infra.ome._stores.counter import CounterStore
+from everos.infra.ome._stores.idle import IdleStore
+from everos.infra.ome._stores.run_record import RunRecordStore
+from everos.infra.ome._stores.storage import OMEStorage
+from everos.infra.ome.config import OMEConfig
+from everos.infra.ome.decorator import StrategyMeta
+from everos.infra.ome.events import BaseEvent, CronTick, ManualTick, resolve_topic
+from everos.infra.ome.exceptions import (
+    EngineCallFromStrategyError,
+    EngineLockHeldError,
+    OMEError,
+)
+from everos.infra.ome.records import RunRecord, RunStatus, StrategyRouteInfo
+from everos.infra.ome.triggers import Cron, Idle
+
+logger = get_logger(__name__)
+
+_ENGINES: dict[str, OfflineEngine] = {}
+
+
+def _refuse_inside_strategy(method: Any) -> Any:
+    """Raise :class:`EngineCallFromStrategyError` when called from a strategy.
+
+    Strategies must interact with the engine only via the ``(event, ctx)``
+    parameters Runner provides; direct calls bypass the declared
+    ``emits=[...]`` contract enforced by ``ctx.emit``. Wraps sync and async
+    methods alike.
+    """
+    if inspect.iscoroutinefunction(method):
+
+        @functools.wraps(method)
+        async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
+            current = _CURRENT_STRATEGY.get()
+            if current is not None:
+                raise EngineCallFromStrategyError(
+                    strategy=current.name, method=method.__name__
+                )
+            return await method(self, *args, **kwargs)
+
+        return async_wrapper
+
+    @functools.wraps(method)
+    def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
+        current = _CURRENT_STRATEGY.get()
+        if current is not None:
+            raise EngineCallFromStrategyError(
+                strategy=current.name, method=method.__name__
+            )
+        return method(self, *args, **kwargs)
+
+    return sync_wrapper
+
+
+async def _runner_entry(
+    engine_id: str,
+    strategy_name: str,
+    run_id: str,
+    event_topic: str,
+    event_payload: str,
+    max_retries_snapshot: int,
+) -> None:
+    """Module-level APS jobstore callback for a single run.
+
+    Looks the engine up by id and hands off to
+    :meth:`OfflineEngine.dispatch_run`. Pickle-safe (no closures, no
+    bound methods captured into APS jobstore args).
+    """
+    engine = _ENGINES.get(engine_id)
+    if engine is None:
+        logger.error(
+            "no_engine_for_runner",
+            engine_id=engine_id,
+            run_id=run_id,
+        )
+        return
+    await engine.dispatch_run(
+        strategy_name=strategy_name,
+        run_id=run_id,
+        event_topic=event_topic,
+        event_payload=event_payload,
+        max_retries_snapshot=max_retries_snapshot,
+    )
+
+
+async def _cron_entry(engine_id: str, strategy_name: str) -> None:
+    """Module-level APS jobstore callback for Cron triggers.
+
+    Looks the engine up by id and emits ``CronTick`` so the event flows
+    back through the standard dispatch pipeline.
+    """
+    engine = _ENGINES.get(engine_id)
+    if engine is None:
+        logger.error(
+            "no_engine_for_cron",
+            engine_id=engine_id,
+            strategy_name=strategy_name,
+        )
+        return
+    await engine.emit(CronTick(strategy_name=strategy_name))
+
+
+async def _idle_entry(engine_id: str, strategy_name: str) -> None:
+    """Module-level APS jobstore callback for Idle IntervalTriggers.
+
+    Looks the engine up by id and hands off to
+    :meth:`OfflineEngine.run_idle_scan`.
+    """
+    engine = _ENGINES.get(engine_id)
+    if engine is None:
+        logger.error(
+            "no_engine_for_idle",
+            engine_id=engine_id,
+            strategy_name=strategy_name,
+        )
+        return
+    await engine.run_idle_scan(strategy_name)
+
+
+class OfflineEngine:
+    """Offline Memory Engine — orchestrates strategy registration, scheduling,
+    and event dispatch.
+
+    Lifecycle::
+
+        engine = OfflineEngine(config=cfg)
+        engine.register(my_strategy)        # before start()
+        engine.on_dead_letter(cb)           # before start()
+        await engine.start()                # acquires file lock, boots scheduler
+        await engine.emit(SomeEvent(...))   # fan out through dispatcher
+        await engine.stop()                 # graceful shutdown
+
+    Single-process invariant: a file lock on
+    ``<jobstore_path>.lock`` guarantees at most one engine per jobstore
+    at any time (cross-process safe via ``portalocker``).
+    """
+
+    def __init__(
+        self,
+        *,
+        config: OMEConfig,
+    ) -> None:
+        self._config = config
+        self._registry = StrategyRegistry()
+        self._storage = OMEStorage(db_path=config.jobstore_path)
+        self._lock_handle: Any = None
+        self._started = False
+        self._on_dead_letter: Callable[[RunRecord], None] | None = None
+
+        # late-bound (set in start())
+        self._counter_store: CounterStore | None = None
+        self._run_record_store: RunRecordStore | None = None
+        self._dispatcher: EventDispatcher | None = None
+        self._runner: Runner | None = None
+        self._engine_sem: asyncio.Semaphore | None = None
+        self._idle_store: IdleStore | None = None
+        self._engine_id = uuid4().hex
+        self._scheduler: AsyncIOScheduler | None = None
+        self._config_reloader: ConfigReloader | None = None
+
+        # In-flight strategy-run accounting. Incremented at the moment a
+        # run is enqueued onto APS (so callers that emit-then-wait observe
+        # a non-zero count immediately), decremented in dispatch_run's
+        # finally. APS 3.x AsyncIOExecutor.shutdown(wait=True) does NOT
+        # honor wait for async coroutines (see apscheduler/executors/
+        # asyncio.py:24); this counter is how stop() / drain() learn the
+        # engine is genuinely idle.
+        self._active_runs = 0
+        self._idle_event: asyncio.Event | None = None
+
+    def register(self, func: Callable[..., Any]) -> None:
+        """Register a strategy decorated with :func:`offline_strategy`.
+
+        Must be called before :meth:`start`; registering after start raises
+        :class:`OMEError` because the scheduler has already snapshotted
+        the strategy set for Cron / Idle job creation.
+        """
+        if self._started:
+            raise OMEError("register: cannot register after start()")
+        self._registry.register(func)
+
+    @_refuse_inside_strategy
+    def reschedule_cron_job(self, name: str, expr: str) -> None:
+        """Reschedule a Cron strategy's APScheduler job to a new crontab.
+
+        APS reschedule_job is atomic: on success, pending invocations are
+        recomputed against the new trigger; on failure it raises and APS
+        state is unchanged, so callers can roll back paired registry
+        mutations.
+        """
+        if self._scheduler is None:
+            raise OMEError("reschedule_cron_job: engine not started")
+        self._scheduler.reschedule_job(
+            job_id=f"cron::{name}",
+            trigger=CronTrigger.from_crontab(expr),
+        )
+
+    @_refuse_inside_strategy
+    def reschedule_idle_job(self, name: str, scan_interval_seconds: int) -> None:
+        """Reschedule an Idle strategy's APScheduler scan job to a new interval."""
+        if self._scheduler is None:
+            raise OMEError("reschedule_idle_job: engine not started")
+        self._scheduler.reschedule_job(
+            job_id=f"idle::{name}",
+            trigger=IntervalTrigger(seconds=scan_interval_seconds),
+        )
+
+    def on_dead_letter(self, callback: Callable[[RunRecord], None]) -> None:
+        """Register a callback invoked after a run is marked DEAD_LETTER.
+
+        Must be set before start(); calls after start() are silently ignored
+        (logged at WARNING) to avoid racing with the already-instantiated
+        Runner that captured a snapshot of the callback. If called multiple
+        times before start(), only the last callback wins (no chaining).
+        """
+        if self._started:
+            logger.warning("on_dead_letter_after_start_ignored")
+            return
+        self._on_dead_letter = callback
+
+    async def start(self) -> None:
+        """Boot the engine: acquire the jobstore lock, validate the strategy
+        DAG, wire up late-bound stores, launch APScheduler, run crash
+        recovery, register Cron / Idle jobs, and optionally start the
+        config-reloader.
+
+        Idempotent: a second call while running is a no-op. On failure,
+        every partially-initialised resource (lock, scheduler thread,
+        :data:`_ENGINES` slot, config reloader) is rolled back so a retry
+        starts from a clean state.
+        """
+        if self._started:
+            return
+        await self._storage.init()
+        self._acquire_lock()
+        try:
+            self._registry.validate()
+            self._init_components()
+            self._idle_event = asyncio.Event()
+            self._idle_event.set()
+            self._launch_scheduler()
+            _ENGINES[self._engine_id] = self
+            await self._run_crash_recovery()
+            self._register_scheduled_jobs()
+            self._start_config_reloader()
+            self._started = True
+        except Exception:
+            await self._rollback_partial_start()
+            raise
+
+    def _init_components(self) -> None:
+        """Instantiate stores / dispatcher / runner / semaphore.
+
+        Called from :meth:`start` after the file lock is held and DAG
+        validation passed; never from anywhere else.
+        """
+        self._counter_store = CounterStore(storage=self._storage)
+        self._run_record_store = RunRecordStore(
+            storage=self._storage,
+            max_records_per_strategy=self._config.max_records_per_strategy,
+        )
+        self._dispatcher = EventDispatcher(
+            registry=self._registry,
+            counter_store=self._counter_store,
+        )
+        self._engine_sem = asyncio.Semaphore(self._config.max_concurrent_runs)
+        self._runner = Runner(
+            run_record_store=self._run_record_store,
+            engine_sem=self._engine_sem,
+            emit_hook=self._dispatch_event,
+            on_dead_letter=self._on_dead_letter,
+        )
+        self._idle_store = IdleStore(storage=self._storage)
+
+    def _launch_scheduler(self) -> None:
+        """Wire up AsyncIOScheduler + SQLAlchemyJobStore and start it.
+
+        The APS jobstore lives in its own SQLite file
+        (``aps_jobstore_path``) so APS's sync SQLAlchemy writes never
+        contend with OME's async aiosqlite writes for the same file lock
+        — both writers had previously raced on a single ``ome.db`` and
+        manifested as flaky ``SQLITE_BUSY: database is locked`` during
+        concurrent strategy dispatch.
+        """
+        self._scheduler = AsyncIOScheduler(
+            jobstores={
+                "default": SQLAlchemyJobStore(
+                    url=f"sqlite:///{self._config.aps_jobstore_path}",
+                ),
+            },
+            executors={"default": AsyncIOExecutor()},
+        )
+        self._scheduler.start()
+
+    async def _run_crash_recovery(self) -> None:
+        """Scan ``run_record`` for stale RUNNING rows and re-enqueue them.
+
+        Treats rows whose ``started_at`` is older than
+        ``crash_recovery_timeout_seconds`` as crashes from a previous
+        engine session: they are marked CRASHED and re-added to APS with
+        a fresh ``run_id`` reusing the original event payload.
+        """
+        await scan_and_resume(
+            run_record_store=self._run_record_store,
+            timeout_seconds=self._config.crash_recovery_timeout_seconds,
+            add_job=self._enqueue_recovery_job,
+        )
+
+    async def _enqueue_recovery_job(
+        self,
+        name: str,
+        run_id: str,
+        event_topic: str,
+        event_payload: str,
+        max_retries: int,
+    ) -> None:
+        """Add one APS job for a re-enqueued crashed run (callback for
+        :func:`scan_and_resume`).
+
+        Same enqueue-time bookkeeping as :meth:`_enqueue_run`: the run
+        will reach :meth:`dispatch_run` like any other, so the +1/-1
+        pair must wrap the ``add_job`` call here too.
+        """
+        self._on_run_enqueued()
+        try:
+            self._scheduler.add_job(
+                _runner_entry,
+                trigger="date",
+                run_date=get_utc_now(),
+                args=[
+                    self._engine_id,
+                    name,
+                    run_id,
+                    event_topic,
+                    event_payload,
+                    max_retries,
+                ],
+                id=run_id,
+                replace_existing=False,
+                misfire_grace_time=None,  # type: ignore[arg-type]  # APS accepts None ("no expiry"); stub omits it (apscheduler/job.py:213)
+            )
+        except Exception:
+            self._on_run_completed()
+            raise
+
+    def _register_scheduled_jobs(self) -> None:
+        """Add Cron / Idle APS jobs for every strategy with such a trigger.
+
+        Immediate-trigger strategies have nothing scheduled here — they
+        fire only when their declared event class is dispatched.
+        """
+        for meta in self._registry.all():
+            if isinstance(meta.trigger, Cron):
+                self._scheduler.add_job(
+                    _cron_entry,
+                    trigger=CronTrigger.from_crontab(meta.trigger.expr),
+                    args=[self._engine_id, meta.name],
+                    id=f"cron::{meta.name}",
+                    replace_existing=True,
+                )
+            elif isinstance(meta.trigger, Idle):
+                self._scheduler.add_job(
+                    _idle_entry,
+                    trigger=IntervalTrigger(seconds=meta.trigger.scan_interval_seconds),
+                    args=[self._engine_id, meta.name],
+                    id=f"idle::{meta.name}",
+                    replace_existing=True,
+                )
+
+    def _start_config_reloader(self) -> None:
+        """Start :class:`ConfigReloader` iff ``config_watch`` is on and a
+        ``config_path`` is provided.
+        """
+        if self._config.config_watch and self._config.config_path is not None:
+            self._config_reloader = ConfigReloader(
+                config_path=self._config.config_path,
+                registry=self._registry,
+                engine=self,
+                debounce_ms=self._config.config_watch_debounce_ms,
+            )
+            self._config_reloader.start()
+
+    async def _rollback_partial_start(self) -> None:
+        """Reverse-order cleanup of whatever :meth:`start` had already
+        wired up before the failure: stop reloader, drain in-flight runs
+        (best-effort, short timeout — startup failure shouldn't block on
+        recovery jobs), shut the scheduler, drop ``_ENGINES`` slot, and
+        release the file lock.
+
+        Same ``wait_idle → shutdown(wait=False)`` order as :meth:`stop`
+        for the same reasons (pause would freeze recovery jobs that
+        already own a +1).
+        """
+        if self._config_reloader is not None:
+            try:
+                await self._config_reloader.stop()
+            finally:
+                self._config_reloader = None
+        if self._scheduler is not None:
+            try:
+                await self.wait_idle(timeout=5.0)
+                self._scheduler.shutdown(wait=False)
+            finally:
+                self._scheduler = None
+        _ENGINES.pop(self._engine_id, None)
+        self._release_lock()
+        self._idle_event = None
+        self._active_runs = 0
+
+    async def wait_idle(self, *, timeout: float = 30.0) -> bool:  # noqa: ASYNC109
+        """Block until every in-flight strategy run has settled.
+
+        Returns ``True`` on idle, ``False`` if ``timeout`` elapses with
+        runs still active. "In flight" means anywhere between
+        :meth:`_enqueue_run` (which bumps the counter just before the
+        ``add_job`` call) and the end of :meth:`dispatch_run` (which
+        releases it in ``finally``).
+
+        Why this exists: APS 3.x ``AsyncIOExecutor.shutdown(wait=True)``
+        documents — in the executor source — that it cannot honor wait
+        for async coroutines and simply cancels their futures
+        (``apscheduler/executors/asyncio.py:24``). Anything depending on
+        "all jobs really completed" has to drain through this counter,
+        not the scheduler.
+        """
+        if self._idle_event is None:
+            return self._active_runs == 0
+        try:
+            await asyncio.wait_for(self._idle_event.wait(), timeout=timeout)
+            return True
+        except TimeoutError:
+            return False
+
+    async def stop(self) -> None:
+        """Shut the engine down gracefully: stop the config reloader, drain
+        in-flight strategy runs, shut the scheduler, drop the global
+        ``_ENGINES`` slot, and release the jobstore lock.
+
+        Idempotent: calling stop on an already-stopped engine is a no-op.
+
+        Drain ordering matters and is *deliberately* not
+        ``pause → wait_idle → shutdown``.
+
+        - We cannot ``pause()`` first: APS ``pause()`` freezes jobstore
+          dispatch including jobs already enqueued (see
+          ``apscheduler/schedulers/base.py:pause``: "prevent the scheduler
+          from waking up to do job processing"). Each such job already
+          owns a +1 in ``_active_runs`` from :meth:`_enqueue_run`, so
+          freezing dispatch deadlocks :meth:`wait_idle`.
+
+        - We cannot use ``shutdown(wait=True)``: APS 3.x
+          ``AsyncIOExecutor.shutdown`` documents in its own source that
+          it cannot honor wait for async coroutines and cancels their
+          futures (``apscheduler/executors/asyncio.py:24``). Cascade
+          ``CancelledError`` / "Event loop is closed" warnings follow.
+
+        Order used here: ``wait_idle`` first (lets APS finish dispatching
+        everything in the jobstore and lets every dispatch_run release its
+        counter), then ``shutdown(wait=False)`` (drops the executor cleanly
+        because there is nothing left in flight).
+
+        ``_ENGINES`` is popped only after the drain so ``_runner_entry``
+        can still find this engine via its id while finishing the last
+        few jobs.
+        """
+        if not self._started:
+            return
+        if self._config_reloader is not None:
+            await self._config_reloader.stop()
+            self._config_reloader = None
+        if self._scheduler is not None:
+            drained = await self.wait_idle(timeout=30.0)
+            if not drained:
+                logger.warning(
+                    "ome_stop_drain_timeout",
+                    engine_id=self._engine_id,
+                    active_runs=self._active_runs,
+                )
+            self._scheduler.shutdown(wait=False)
+            self._scheduler = None
+        _ENGINES.pop(self._engine_id, None)
+        self._release_lock()
+        self._started = False
+        self._idle_event = None
+        self._active_runs = 0
+
+    def _acquire_lock(self) -> None:
+        lock_path = Path(str(self._config.jobstore_path) + ".lock")
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            handle = open(lock_path, "a+")  # noqa: SIM115
+            portalocker.lock(handle, portalocker.LOCK_EX | portalocker.LOCK_NB)
+            self._lock_handle = handle
+        except portalocker.LockException as e:
+            raise EngineLockHeldError(
+                f"another OfflineEngine instance already holds {lock_path}"
+            ) from e
+
+    def _release_lock(self) -> None:
+        if self._lock_handle is not None:
+            try:
+                portalocker.unlock(self._lock_handle)
+            finally:
+                self._lock_handle.close()
+                self._lock_handle = None
+
+    @_refuse_inside_strategy
+    async def emit(self, event: BaseEvent) -> None:
+        """Public engine event entry point.
+
+        Strategies must NOT call this directly; use ``ctx.emit`` instead.
+        The :func:`_refuse_inside_strategy` guard raises
+        :class:`EngineCallFromStrategyError` on in-strategy calls — only
+        ``ctx.emit`` enforces the strategy's declared ``emits=[...]``
+        contract.
+        """
+        await self._dispatch_event(event)
+
+    async def _dispatch_event(self, event: BaseEvent) -> None:
+        """Internal: actually run an event through dispatch.
+
+        Used by Runner's ``emit_hook`` so ``ctx.emit`` flows through
+        dispatch without tripping the public-method guard.
+        """
+        if not self._started:
+            raise OMEError("emit: engine not started")
+        # Touch idle_store for any Idle strategy listening on this event type
+        # (best-effort; errors do not block dispatch)
+        for meta in self._registry.all():
+            if isinstance(meta.trigger, Idle) and type(event) in meta.trigger.on:
+                bucket = getattr(event, meta.trigger.event_field, None)
+                if bucket is not None:
+                    try:
+                        await self._idle_store.touch(  # type: ignore[union-attr]
+                            meta.name,
+                            str(bucket),
+                            at=get_utc_now(),
+                        )
+                    except Exception as e:
+                        logger.warning(
+                            "idle_touch_failed",
+                            strategy_name=meta.name,
+                            event_field=meta.trigger.event_field,
+                            error=str(e),
+                        )
+        routes = await self._dispatcher.dispatch(event)
+        for meta, run_id in routes:
+            self._enqueue_run(meta, event, run_id)
+
+    @_refuse_inside_strategy
+    async def trigger_manual(
+        self,
+        name: str,
+        *,
+        event: BaseEvent | None = None,
+        force: bool = False,
+    ) -> None:
+        """Manually trigger one strategy.
+
+        - ``event=None`` → engine self-emits ``ManualTick(strategy_name=name)``
+        - ``force=True`` → bypass the ``enabled`` gate (``applies_to`` and
+          ``Counter`` still apply)
+
+        Routes through :meth:`EventDispatcher.dispatch` with
+        ``strategy_filter=name`` so the same three-gate logic is applied
+        as for engine-driven dispatch.
+        """
+        if not self._started:
+            raise OMEError("trigger_manual: engine not started")
+        if event is None:
+            event = ManualTick(strategy_name=name)
+        routes = await self._dispatcher.dispatch(
+            event,
+            force_enabled=force,
+            strategy_filter=name,
+        )
+        for meta, run_id in routes:
+            self._enqueue_run(meta, event, run_id)
+
+    def _enqueue_run(self, meta: StrategyMeta, event: BaseEvent, run_id: str) -> None:
+        """Add a one-shot APScheduler job that hands the event to Runner.
+
+        Computes ``max_retries_snapshot`` from meta or engine default and
+        packages a pickle-safe args tuple — the dispatch tail shared by
+        ``_dispatch_event``, ``trigger_manual``, and crash recovery.
+
+        Counter ``self._active_runs`` is bumped *before* ``add_job`` so a
+        caller that ``emit`` s then immediately ``wait_idle`` s observes a
+        non-zero count; the matching decrement lives in
+        :meth:`dispatch_run` (which is guaranteed to run for every job
+        APS dispatches). If ``add_job`` itself raises, the counter is
+        rolled back here.
+        """
+        max_retries_snapshot = (
+            meta.max_retries
+            if meta.max_retries is not None
+            else self._config.max_retries
+        )
+        event_topic = type(event).topic()
+        self._on_run_enqueued()
+        try:
+            self._scheduler.add_job(
+                _runner_entry,
+                trigger="date",
+                run_date=get_utc_now(),
+                args=[
+                    self._engine_id,
+                    meta.name,
+                    run_id,
+                    event_topic,
+                    event.model_dump_json(),
+                    max_retries_snapshot,
+                ],
+                id=run_id,
+                replace_existing=False,
+                misfire_grace_time=None,  # type: ignore[arg-type]  # APS accepts None ("no expiry"); stub omits it (apscheduler/job.py:213)
+            )
+        except Exception:
+            self._on_run_completed()
+            raise
+
+    def _on_run_enqueued(self) -> None:
+        """Bump in-flight count and mark the engine non-idle."""
+        self._active_runs += 1
+        if self._idle_event is not None:
+            self._idle_event.clear()
+
+    def _on_run_completed(self) -> None:
+        """Drop in-flight count; mark the engine idle if the count hit zero.
+
+        Never lets the counter dip below zero — that would mask a bookkeeping
+        bug rather than fix it, and a stuck-clear idle_event would deadlock
+        ``wait_idle``.
+        """
+        if self._active_runs <= 0:
+            logger.error(
+                "active_runs_underflow",
+                engine_id=self._engine_id,
+            )
+            self._active_runs = 0
+            if self._idle_event is not None:
+                self._idle_event.set()
+            return
+        self._active_runs -= 1
+        if self._active_runs == 0 and self._idle_event is not None:
+            self._idle_event.set()
+
+    async def dispatch_run(
+        self,
+        *,
+        strategy_name: str,
+        run_id: str,
+        event_topic: str,
+        event_payload: str,
+        max_retries_snapshot: int,
+    ) -> None:
+        """APS jobstore callback target for one strategy run.
+
+        Public because the module-level :func:`_runner_entry` callback
+        must cross the pickle boundary — a bound method on ``self`` is
+        not picklable into the APS jobstore. Not part of the
+        strategy-author API; intended to be called only by
+        ``_runner_entry`` (and crash recovery). Not guarded with
+        ``_refuse_inside_strategy`` because APS executors may inherit
+        the calling task's ContextVar — a strategy that ``ctx.emit``s
+        and triggers a cascade would falsely trip the guard here.
+
+        Closes the +1 the matching enqueue path opened, in ``finally``
+        so cancellation, retries, and crashes all release the count.
+        """
+        try:
+            cls = resolve_topic(event_topic)
+            event = cls.model_validate_json(event_payload)
+            meta = self._registry.get(strategy_name)
+            await self._runner.run(
+                meta,
+                event,
+                run_id=run_id,
+                max_retries_snapshot=max_retries_snapshot,
+            )
+        finally:
+            self._on_run_completed()
+
+    async def run_idle_scan(self, strategy_name: str) -> None:
+        """APS IntervalTrigger callback target for one Idle strategy.
+
+        Constructs an :class:`IdleScanner` against the engine's idle_store
+        and runs one scan, emitting :class:`IdleTick` for each overdue
+        bucket. Public for the same APS-pickle reason as
+        :meth:`dispatch_run`; unguarded for the same ContextVar-
+        inheritance reason.
+        """
+        meta = self._registry.get(strategy_name)
+        if not isinstance(meta.trigger, Idle):
+            logger.error(
+                "idle_entry_bad_trigger_type",
+                strategy_name=strategy_name,
+                trigger_type=type(meta.trigger).__name__,
+            )
+            return
+        scanner = IdleScanner(
+            strategy_name=strategy_name,
+            trigger=meta.trigger,
+            idle_store=self._idle_store,  # type: ignore[arg-type]
+            emit=self.emit,
+        )
+        await scanner.scan_once()
+
+    @_refuse_inside_strategy
+    async def inspect_dispatch(self, event: BaseEvent) -> list[StrategyRouteInfo]:
+        """Return per-strategy routing info for event (read-only).
+
+        Calls the dispatcher in inspect mode (no counter mutation).
+        """
+        if not self._started:
+            raise OMEError("inspect_dispatch: engine not started")
+        return await self._dispatcher.inspect(event)
+
+    @_refuse_inside_strategy
+    async def list_runs(
+        self,
+        strategy_name: str,
+        *,
+        status: RunStatus | None = None,
+        limit: int = 100,
+    ) -> list[RunRecord]:
+        """Return run records for ``strategy_name``, optionally filtered by status.
+
+        Args:
+            strategy_name: Strategy whose runs to fetch.
+            status: Terminal status filter (e.g., ``RunStatus.SUCCESS``); ``None``
+                returns runs in any state.
+            limit: Maximum number of records to return; results are ordered
+                ``started_at DESC``.
+
+        Returns:
+            Up to ``limit`` ``RunRecord`` instances, newest first.
+
+        Raises:
+            OMEError: Engine has not been started.
+        """
+        if not self._started:
+            raise OMEError("list_runs: engine not started")
+        return await self._run_record_store.list_runs(
+            strategy_name=strategy_name,
+            status=status,
+            limit=limit,
+        )
+
+    @_refuse_inside_strategy
+    async def get_run_status(self, run_id: str) -> RunRecord | None:
+        """Fetch a single run record by ``run_id``.
+
+        Args:
+            run_id: The 32-character ``uuid4().hex`` assigned at dispatch.
+
+        Returns:
+            The matching ``RunRecord``, or ``None`` if no row exists for that id.
+
+        Raises:
+            OMEError: Engine has not been started.
+        """
+        if not self._started:
+            raise OMEError("get_run_status: engine not started")
+        return await self._run_record_store.get(run_id)
--- a/src/everos/infra/ome/events.py
+++ b/src/everos/infra/ome/events.py
@ -0,0 +1,78 @@
+"""OME event base class + built-in tick events.
+
+All business events should subclass BaseEvent. OME emits three built-in
+ticks for engine-driven triggers (Cron / Idle / Manual).
+"""
+
+from __future__ import annotations
+
+import importlib
+from datetime import datetime
+from functools import cache
+from typing import Any
+from uuid import uuid4
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from everos.component.utils.datetime import get_utc_now
+
+
+class BaseEvent(BaseModel):
+    """Base for all events flowing through OME.
+
+    Subclasses must be Pydantic v2 models (immutable) so `model_dump_json` /
+    `model_validate_json` work for crash-recovery payload persistence.
+    """
+
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    event_id: str = Field(default_factory=lambda: uuid4().hex)
+    ts: datetime = Field(default_factory=get_utc_now)
+
+    @classmethod
+    def topic(cls) -> str:
+        """Stable cross-process identifier of this event class.
+
+        Returns ``"<module>:<class>"`` (colon-separated, mirroring the
+        Python event-sourcing community convention). Used by OME to
+        persist event identity into RunRecord.event_topic and to re-import
+        the class during crash recovery via ``resolve_topic``.
+        """
+        return f"{cls.__module__}:{cls.__name__}"
+
+
+@cache
+def resolve_topic(topic: str) -> type[BaseEvent]:
+    """Inverse of ``BaseEvent.topic()``; imports and returns the class.
+
+    Cached because crash recovery may resolve the same topic many times in
+    a tight loop, and ``importlib.import_module`` is non-trivial.
+    """
+    module_name, sep, cls_name = topic.partition(":")
+    if not sep or not cls_name:
+        raise ValueError(f"invalid event topic: {topic!r}")
+    mod: Any = importlib.import_module(module_name)
+    cls = getattr(mod, cls_name, None)
+    if not (isinstance(cls, type) and issubclass(cls, BaseEvent)):
+        raise TypeError(f"topic {topic!r} did not resolve to a BaseEvent subclass")
+    return cls
+
+
+class CronTick(BaseEvent):
+    """Engine-emitted event for a strategy with `trigger=Cron(...)`."""
+
+    strategy_name: str
+
+
+class IdleTick(BaseEvent):
+    """Engine-emitted event for a strategy with `trigger=Idle(...)`."""
+
+    strategy_name: str
+    bucket_key: str
+    idle_seconds: int
+
+
+class ManualTick(BaseEvent):
+    """Engine-emitted event for `engine.trigger_manual(name, event=None)`."""
+
+    strategy_name: str
--- a/src/everos/infra/ome/exceptions.py
+++ b/src/everos/infra/ome/exceptions.py
@ -0,0 +1,61 @@
+"""OME exception hierarchy."""
+
+from __future__ import annotations
+
+from everos.infra.ome.events import BaseEvent
+
+
+class OMEError(Exception):
+    """Base for all OME-internal errors."""
+
+
+class StartupValidationError(OMEError):
+    """Raised by engine.start() for any startup-time validation failure."""
+
+
+class EngineLockHeldError(OMEError):
+    """Raised when another OfflineEngine instance holds the jobstore lock."""
+
+
+class StrategyContractError(OMEError):
+    """Base for strategy-side contract violations.
+
+    Subclasses indicate a programming bug in the strategy code that no
+    retry can fix (wrong API usage, undeclared emit). Runner
+    short-circuits the attempt loop on these and dead-letters
+    immediately — consuming the retry budget would only delay the
+    inevitable and spam logs. External callers can ``except
+    StrategyContractError`` to handle the whole category at once.
+    """
+
+
+class EngineCallFromStrategyError(StrategyContractError):
+    """A strategy called a public OfflineEngine method directly.
+
+    The convention is: strategy code interacts with the engine only via
+    the ``(event, ctx)`` parameters Runner supplies. Engine methods
+    (``emit``, ``trigger_manual``, ``inspect_dispatch``, ``list_runs``,
+    ``get_run_status``, ``reschedule_*``) are for external callers —
+    strategies invoking them bypass the framework's contracts.
+    """
+
+    def __init__(self, strategy: str, method: str) -> None:
+        self.strategy = strategy
+        self.method = method
+        super().__init__(
+            f"strategy {strategy!r} called engine.{method}() directly; "
+            "strategies must interact with the engine only via the "
+            "(event, ctx) parameters"
+        )
+
+
+class EmitNotDeclaredError(StrategyContractError):
+    """Raised when a strategy emits an event not listed in its decorator's emits."""
+
+    def __init__(self, strategy: str, event: BaseEvent) -> None:
+        self.strategy = strategy
+        self.event = event
+        super().__init__(
+            f"strategy {strategy!r} emitted {type(event).__name__!r} "
+            "which is not in its declared emits"
+        )
--- a/src/everos/infra/ome/gates.py
+++ b/src/everos/infra/ome/gates.py
@ -0,0 +1,52 @@
+"""OME gate types — declarative configuration only.
+
+Counter is the only built-in gate. The actual N-counting lives in
+_stores/counter.py keyed by (strategy_name, bucket_key).
+"""
+
+from __future__ import annotations
+
+from typing import Annotated
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class Counter(BaseModel):
+    """Counter gate: batch trigger by accumulated event count per bucket.
+    Each event increments the bucket counter; the `threshold`-th event
+    passes and resets.
+    """
+
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    threshold: Annotated[
+        int,
+        Field(
+            gt=0,
+            description=(
+                "Pass once every `threshold` events; threshold=1 lets every event pass."
+            ),
+        ),
+    ]
+    cooldown_seconds: Annotated[
+        int,
+        Field(
+            ge=0,
+            description=(
+                "Minimum seconds between consecutive passes per bucket; 0 disables."
+            ),
+        ),
+    ] = 0
+    event_field: Annotated[
+        str | None,
+        Field(
+            description=(
+                'Bucket dimension on the event (e.g. "user_id"); '
+                "None means a single global bucket."
+            ),
+        ),
+    ] = None
+
+
+# Single-member alias today; becomes a union as more gate types land.
+Gate = Counter
--- a/src/everos/infra/ome/records.py
+++ b/src/everos/infra/ome/records.py
@ -0,0 +1,99 @@
+"""RunRecord / RunStatus / StrategyRouteInfo / CounterProgress — pure data classes.
+
+Persistence in _stores/run_record.py.
+"""
+
+from __future__ import annotations
+
+from enum import StrEnum
+from typing import Annotated, NamedTuple, Self
+
+from pydantic import (
+    AwareDatetime,
+    BaseModel,
+    ConfigDict,
+    Field,
+    computed_field,
+    model_validator,
+)
+
+
+class RunStatus(StrEnum):
+    """Terminal-or-running state of a single strategy run."""
+
+    RUNNING = "running"
+    SUCCESS = "success"
+    FAILED = "failed"
+    DEAD_LETTER = "dead_letter"
+    CRASHED = "crashed"
+
+
+class RunRecord(BaseModel):
+    """One row of the run_record table."""
+
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    run_id: Annotated[str, Field(min_length=1)]
+    strategy_name: Annotated[str, Field(min_length=1)]
+    status: RunStatus
+    attempt: Annotated[int, Field(ge=0)]
+    started_at: AwareDatetime
+    finished_at: AwareDatetime | None = None
+    error: Annotated[str, Field(min_length=1)] | None = None
+    event_topic: Annotated[
+        str,
+        Field(
+            min_length=1,
+            description="Stable cross-process event identifier in "
+            "``<module>:<class>`` form (see ``BaseEvent.topic()``).",
+        ),
+    ]
+    event_payload: Annotated[
+        str,
+        Field(
+            min_length=1,
+            description="JSON-encoded event (``BaseEvent.model_dump_json`` output).",
+        ),
+    ]
+    max_retries_snapshot: Annotated[int, Field(ge=0)]
+
+    @model_validator(mode="after")
+    def _check_status_invariants(self) -> Self:
+        if self.status == RunStatus.RUNNING:
+            if self.finished_at is not None:
+                raise ValueError("RunRecord: RUNNING must have finished_at=None")
+            if self.error is not None:
+                raise ValueError("RunRecord: RUNNING must have error=None")
+        else:
+            if self.finished_at is None:
+                raise ValueError(f"RunRecord: {self.status} must have finished_at set")
+            if self.status == RunStatus.SUCCESS:
+                if self.error is not None:
+                    raise ValueError("RunRecord: SUCCESS must have error=None")
+            elif self.error is None:
+                raise ValueError(f"RunRecord: {self.status} must have error set")
+        return self
+
+
+class CounterProgress(NamedTuple):
+    """Per-bucket counter progress at inspect_dispatch time."""
+
+    current: int
+    threshold: int
+
+
+class StrategyRouteInfo(BaseModel):
+    """Per-strategy dispatch decision — returned by inspect_dispatch."""
+
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    strategy_name: Annotated[str, Field(min_length=1)]
+    enabled_pass: bool
+    applies_to_pass: bool
+    counter_pass: bool
+    counter_progress: CounterProgress | None = None
+
+    @computed_field  # type: ignore[prop-decorator]
+    @property
+    def will_run(self) -> bool:
+        return self.enabled_pass and self.applies_to_pass and self.counter_pass
--- a/src/everos/infra/ome/testing/init.py
+++ b/src/everos/infra/ome/testing/init.py
@ -0,0 +1,9 @@
+"""OME testing helpers.
+
+Fake strategy context and test harness for unit testing strategies.
+"""
+
+from everos.infra.ome.testing.fakes import FakeStrategyContext as FakeStrategyContext
+from everos.infra.ome.testing.harness import StrategyTestHarness as StrategyTestHarness
+
+__all__ = ["FakeStrategyContext", "StrategyTestHarness"]
--- a/src/everos/infra/ome/testing/fakes.py
+++ b/src/everos/infra/ome/testing/fakes.py
@ -0,0 +1,38 @@
+"""In-memory test doubles for the OME StrategyContext Protocol.
+
+Use FakeStrategyContext when you want to unit-test a strategy function
+in isolation without spinning up a full OfflineEngine.
+"""
+
+from __future__ import annotations
+
+from everos.core.observability.logging import get_logger
+from everos.infra.ome.events import BaseEvent
+
+
+class FakeStrategyContext:
+    """Implements StrategyContext Protocol; collects emit() calls in a list.
+
+    Attributes:
+        run_id: Unique identifier for this run (default: "fake_run").
+        logger: A structlog BoundLogger for test logging.
+        emitted: List of BaseEvent objects passed to emit().
+    """
+
+    def __init__(self, *, run_id: str = "fake_run") -> None:
+        """Initialize a FakeStrategyContext.
+
+        Args:
+            run_id: Run identifier, defaults to "fake_run".
+        """
+        self.run_id = run_id
+        self.logger = get_logger("ome.fake_ctx")
+        self.emitted: list[BaseEvent] = []
+
+    async def emit(self, event: BaseEvent) -> None:
+        """Collect an event into the emitted list.
+
+        Args:
+            event: The BaseEvent to emit.
+        """
+        self.emitted.append(event)
--- a/src/everos/infra/ome/testing/harness.py
+++ b/src/everos/infra/ome/testing/harness.py
@ -0,0 +1,118 @@
+"""StrategyTestHarness — full OfflineEngine on a tmp SQLite db.
+
+Designed for end-to-end strategy tests: register, start, emit, drain
+until terminal, inspect run records. Cleans up the tmp directory on exit.
+"""
+
+from __future__ import annotations
+
+import shutil
+from pathlib import Path
+from tempfile import mkdtemp
+from typing import Any
+
+from everos.infra.ome.config import OMEConfig
+from everos.infra.ome.engine import OfflineEngine
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.records import RunRecord, RunStatus
+
+
+class StrategyTestHarness:
+    """Async context manager wrapping OfflineEngine on a tmp SQLite db.
+
+    Provides a test-friendly interface to register strategies, emit events,
+    and inspect run records.
+
+    Example:
+        async with StrategyTestHarness() as h:
+            h.register(my_strategy_func)
+            await h.start()
+            await h.emit(MyEvent())
+            await h.drain(timeout=5)
+            runs = await h.list_runs("my_strategy")
+            assert len(runs) == 1
+    """
+
+    def __init__(self) -> None:
+        """Initialize a StrategyTestHarness with a temp SQLite db."""
+        self._tmpdir = Path(mkdtemp(prefix="ome_test_"))
+        cfg = OMEConfig(
+            jobstore_path=self._tmpdir / "ome.db",
+            config_watch=False,
+            max_concurrent_runs=20,
+            max_retries=1,
+        )
+        self._engine = OfflineEngine(config=cfg)
+
+    async def __aenter__(self) -> StrategyTestHarness:
+        """Enter the async context."""
+        return self
+
+    async def __aexit__(self, *exc: Any) -> None:
+        """Exit the async context and clean up temp resources."""
+        try:
+            await self._engine.stop()
+        finally:
+            shutil.rmtree(self._tmpdir, ignore_errors=True)  # noqa: SLF001
+
+    def register(self, func: Any) -> None:
+        """Register a strategy function.
+
+        Args:
+            func: A function decorated with @offline_strategy.
+        """
+        self._engine.register(func)
+
+    async def start(self) -> None:
+        """Start the OfflineEngine."""
+        await self._engine.start()
+
+    async def emit(self, event: BaseEvent) -> None:
+        """Emit an event to the engine.
+
+        Args:
+            event: A BaseEvent subclass instance.
+        """
+        await self._engine.emit(event)
+
+    async def drain(self, *, timeout: float = 30.0) -> None:  # noqa: ASYNC109
+        """Wait until every enqueued strategy run has finished.
+
+        Delegates to :meth:`OfflineEngine.wait_idle`, which tracks runs
+        from the moment ``_enqueue_run`` bumps the counter (so a caller
+        that ``emit``s then immediately ``drain``s does NOT see false-
+        idle while APS is still launching the coroutine). Polling
+        ``find_running`` alone — the previous implementation — missed
+        that gap between ``add_job`` and ``mark_running`` and let tests
+        race past in-flight jobs.
+
+        Args:
+            timeout: Maximum seconds to wait, defaults to 30.0.
+
+        Raises:
+            TimeoutError: if runs remain in flight after ``timeout`` seconds.
+        """
+        if not await self._engine.wait_idle(timeout=timeout):
+            raise TimeoutError(
+                f"drain: engine still has "
+                f"{self._engine._active_runs} in-flight runs after {timeout}s"  # noqa: SLF001
+            )
+
+    async def list_runs(
+        self,
+        strategy_name: str,
+        status: RunStatus | None = None,
+    ) -> list[RunRecord]:
+        """List run records for a strategy, optionally filtered by status.
+
+        Args:
+            strategy_name: The name of the strategy.
+            status: Optional status filter (e.g. RunStatus.SUCCESS).
+
+        Returns:
+            A list of RunRecord objects.
+        """
+        return await self._engine._run_record_store.list_runs(  # noqa: SLF001
+            strategy_name=strategy_name,
+            status=status,
+        )
--- a/src/everos/infra/ome/triggers.py
+++ b/src/everos/infra/ome/triggers.py
@ -0,0 +1,76 @@
+"""OME trigger types — declarative descriptors of when a strategy fires.
+
+Three concrete triggers: Immediate / Cron / Idle. Engine dispatches via
+`isinstance(meta.trigger, ...)` to pick the registration path.
+"""
+
+from __future__ import annotations
+
+from typing import Annotated, Self
+
+from apscheduler.triggers.cron import CronTrigger
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+
+from everos.infra.ome.events import BaseEvent
+
+
+class _TriggerBase(BaseModel):
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+
+class Immediate(_TriggerBase):
+    """Fire as soon as an event of any class in `on` is dispatched."""
+
+    on: Annotated[list[type[BaseEvent]], Field(min_length=1)]
+
+
+class Cron(_TriggerBase):
+    """Fire on a cron schedule. Engine emits CronTick to the strategy."""
+
+    expr: Annotated[str, Field(min_length=1)]
+
+    @field_validator("expr")
+    @classmethod
+    def _validate_crontab(cls, v: str) -> str:
+        # Delegates to APS's own parser so the trigger object cannot
+        # represent any crontab that APS would later refuse.
+        CronTrigger.from_crontab(v)
+        return v
+
+
+class Idle(_TriggerBase):
+    """Fire after every class in `on` has been silent (bucketed by
+    `event_field`) for `idle_seconds` — AND across classes. Engine
+    emits IdleTick.
+    """
+
+    on: Annotated[list[type[BaseEvent]], Field(min_length=1)]
+    event_field: str
+    idle_seconds: Annotated[int, Field(gt=0)]
+    scan_interval_seconds: Annotated[
+        int,
+        Field(gt=0, description="Per-strategy scan cadence; <= idle_seconds / 2."),
+    ] = 60
+
+    @model_validator(mode="after")
+    def _validate_event_field(self) -> Self:
+        for event_cls in self.on:
+            if self.event_field not in event_cls.model_fields:  # type: ignore[operator]  # Pydantic model_fields → dict via @deprecated_instance_property (pydantic/main.py:277)
+                available = list(event_cls.model_fields)  # type: ignore[arg-type]  # same as above
+                raise ValueError(
+                    f"event_field {self.event_field!r} not found in "
+                    f"{event_cls.__name__} fields (available: {available})"
+                )
+        return self
+
+    @model_validator(mode="after")
+    def _validate_scan_interval_bound(self) -> Self:
+        if self.scan_interval_seconds > self.idle_seconds // 2:
+            raise ValueError(
+                f"Idle: scan_interval_seconds ({self.scan_interval_seconds}) "
+                f"must be <= idle_seconds // 2 ({self.idle_seconds // 2})"
+            )
+        return self
+
+
+Trigger = Immediate | Cron | Idle
--- a/src/everos/infra/persistence/init.py
+++ b/src/everos/infra/persistence/init.py
--- a/src/everos/infra/persistence/lancedb/init.py
+++ b/src/everos/infra/persistence/lancedb/init.py
@ -0,0 +1,132 @@
+"""LanceDB business persistence layer.
+
+Sits on top of :mod:`everos.core.persistence.lancedb` (connection
+factory + ``BaseLanceTable`` + ``LanceRepoBase``) and provides:
+
+    * lazy process-wide connection + per-name table cache
+      (:mod:`.lancedb_manager`)
+    * concrete schemas under :mod:`.tables`
+    * concrete repository singletons under :mod:`.repos`
+
+External usage::
+
+    from everos.infra.persistence.lancedb import (
+        get_connection, get_table, dispose_connection,
+        Episode, AtomicFact, Foresight, AgentCase, AgentSkill, UserProfile,
+        episode_repo, atomic_fact_repo, foresight_repo,
+        agent_case_repo, agent_skill_repo, user_profile_repo,
+    )
+
+Three index kinds: scalar / BM25 / vector. Tables are created lazily on
+first access; row population is the cascade daemon's job (see
+``12_cascade_design.md``).
+"""
+
+# Importing ``tables`` registers every business :class:`BaseLanceTable`
+# schema so callers can rely on the package alone to surface every schema.
+from . import tables as tables  # noqa: F401
+from .lancedb_manager import dispose_connection as dispose_connection
+from .lancedb_manager import get_connection as get_connection
+from .lancedb_manager import get_table as get_table
+from .repos import agent_case_repo as agent_case_repo
+from .repos import agent_skill_repo as agent_skill_repo
+from .repos import atomic_fact_repo as atomic_fact_repo
+from .repos import episode_repo as episode_repo
+from .repos import foresight_repo as foresight_repo
+from .repos import user_profile_repo as user_profile_repo
+from .tables import AgentCase as AgentCase
+from .tables import AgentSkill as AgentSkill
+from .tables import AtomicFact as AtomicFact
+from .tables import Episode as Episode
+from .tables import Foresight as Foresight
+from .tables import ParentType as ParentType
+from .tables import UserProfile as UserProfile
+
+_BUSINESS_SCHEMAS = (
+    Episode,
+    AtomicFact,
+    Foresight,
+    AgentCase,
+    AgentSkill,
+    UserProfile,
+)
+
+
+class LanceDBSchemaMismatchError(RuntimeError):
+    """Raised at startup when an on-disk LanceDB table's columns drift
+    from the corresponding Pydantic schema.
+
+    Cascade re-builds LanceDB from md (the SoT), so the recovery is
+    deterministic: delete the index directory and let it reindex.
+    The lifespan surfaces the explicit ``rm -rf ~/.everos/.index/
+    lancedb`` instruction in the error message; see
+    ``docs/cascade_runbook.md`` for the wider context.
+    """
+
+
+async def ensure_business_indexes() -> None:
+    """Ensure FTS (BM25) indexes for every business table (idempotent).
+
+    Called once at startup by :class:`LanceDBLifespanProvider`. Walks
+    the 5 business schemas (each schema owns its ``TABLE_NAME`` +
+    ``BM25_FIELDS``), opens each table via :func:`get_table`, and
+    delegates to ``schema.ensure_fts_indexes(table)``. Already-indexed
+    columns are skipped, so re-runs are no-ops.
+
+    Adding a new business table = adding it to ``_BUSINESS_SCHEMAS``;
+    everything else (table name, columns to index) reads off the
+    schema's ClassVars.
+    """
+    for schema in _BUSINESS_SCHEMAS:
+        table = await get_table(schema.TABLE_NAME, schema)
+        await schema.ensure_fts_indexes(table)
+
+
+async def verify_business_schemas() -> None:
+    """Fail loud at startup if an existing LanceDB table's columns don't
+    match its current Pydantic schema.
+
+    LanceDB doesn't migrate columns automatically; an older index dir
+    (e.g. with the pre-``content_sha256`` shape) would fail
+    unpredictably on upsert. Checking column names up-front turns that
+    into a clean startup error pointing the user at the recovery path
+    (``rm -rf ~/.everos/.index/lancedb`` — the index is rebuildable
+    from md, see ``12_cascade_design.md``).
+    """
+    for schema in _BUSINESS_SCHEMAS:
+        table = await get_table(schema.TABLE_NAME, schema)
+        arrow_schema = await table.schema()
+        actual = set(arrow_schema.names)
+        expected = set(schema.model_fields.keys())
+        missing = expected - actual
+        extra = actual - expected
+        if missing or extra:
+            raise LanceDBSchemaMismatchError(
+                f"LanceDB table {schema.TABLE_NAME!r} schema drift: "
+                f"missing={sorted(missing)}, extra={sorted(extra)}. "
+                "The index is rebuildable from md — recover with "
+                "`rm -rf ~/.everos/.index/lancedb` and restart."
+            )
+
+
+__all__ = [
+    "AgentCase",
+    "AgentSkill",
+    "AtomicFact",
+    "Episode",
+    "Foresight",
+    "LanceDBSchemaMismatchError",
+    "ParentType",
+    "UserProfile",
+    "agent_case_repo",
+    "agent_skill_repo",
+    "atomic_fact_repo",
+    "dispose_connection",
+    "ensure_business_indexes",
+    "episode_repo",
+    "foresight_repo",
+    "get_connection",
+    "get_table",
+    "user_profile_repo",
+    "verify_business_schemas",
+]
--- a/src/everos/infra/persistence/lancedb/lancedb_manager.py
+++ b/src/everos/infra/persistence/lancedb/lancedb_manager.py
@ -0,0 +1,82 @@
+"""LanceDB connection + table singletons (lazy + process-wide, async).
+
+The single place that owns the LanceDB **runtime state**: the async
+connection and per-name table cache. Connection opens lazily on first
+:func:`get_connection` call; tables are cached after first
+:func:`get_table`. The :class:`LanceDBLifespanProvider` calls
+:func:`dispose_connection` on shutdown; in scripts you can call it
+manually.
+"""
+
+from __future__ import annotations
+
+import asyncio
+
+from lancedb import AsyncConnection, AsyncTable
+
+from everos.config import load_settings
+from everos.core.observability.logging import get_logger
+from everos.core.persistence import BaseLanceTable, MemoryRoot, open_lancedb_connection
+
+logger = get_logger(__name__)
+
+_conn: AsyncConnection | None = None
+_tables: dict[str, AsyncTable] = {}
+_lock = asyncio.Lock()
+
+
+async def get_connection() -> AsyncConnection:
+    """Return the process-wide async LanceDB connection.
+
+    Built on first call from ``MemoryRoot.default().lancedb_dir`` and
+    ``Settings.lancedb``. Subsequent calls return the same instance.
+    """
+    async with _lock:
+        return await _ensure_connection_locked()
+
+
+async def get_table(
+    name: str,
+    schema: type[BaseLanceTable],
+) -> AsyncTable:
+    """Open the named table (creating from ``schema`` if missing). Cached."""
+    async with _lock:
+        if name not in _tables:
+            conn = await _ensure_connection_locked()
+            existing = await conn.list_tables()
+            if name in list(existing.tables):
+                _tables[name] = await conn.open_table(name)
+                logger.info("lancedb_table_opened", name=name)
+            else:
+                _tables[name] = await conn.create_table(name, schema=schema)
+                logger.info("lancedb_table_created", name=name)
+        return _tables[name]
+
+
+async def dispose_connection() -> None:
+    """Close the connection + clear table cache. Idempotent."""
+    global _conn
+    async with _lock:
+        if _conn is not None:
+            try:
+                _conn.close()  # AsyncConnection.close() is sync in lancedb 0.30
+            except Exception:
+                logger.exception("lancedb_close_failed")
+            logger.info("lancedb_connection_closed")
+        _conn = None
+        _tables.clear()
+
+
+async def _ensure_connection_locked() -> AsyncConnection:
+    """Open the connection if not yet open. Caller must hold ``_lock``."""
+    global _conn
+    if _conn is None:
+        settings = load_settings()
+        memory_root = MemoryRoot.default()
+        memory_root.ensure()
+        _conn = await open_lancedb_connection(memory_root.lancedb_dir, settings.lancedb)
+        logger.info(
+            "lancedb_connection_opened",
+            path=str(memory_root.lancedb_dir),
+        )
+    return _conn
--- a/src/everos/infra/persistence/lancedb/repos/init.py
+++ b/src/everos/infra/persistence/lancedb/repos/init.py
@ -0,0 +1,37 @@
+"""LanceDB repo singletons (one per business table).
+
+Each repo is a module-level singleton — the table connection is
+resolved lazily on first call via :func:`..lancedb_manager.get_table`.
+Subclassing :class:`LanceRepoBase` lets each repo carry table-specific
+helpers later (e.g. ``find_by_owner``, ``search_for_owner``) without
+needing a separate factory.
+
+External usage::
+
+    from everos.infra.persistence.lancedb.repos import (
+        episode_repo,
+        atomic_fact_repo,
+        foresight_repo,
+        agent_case_repo,
+        agent_skill_repo,
+        user_profile_repo,
+    )
+
+    await episode_repo.add([Episode(...)])
+"""
+
+from .agent_case import agent_case_repo as agent_case_repo
+from .agent_skill import agent_skill_repo as agent_skill_repo
+from .atomic_fact import atomic_fact_repo as atomic_fact_repo
+from .episode import episode_repo as episode_repo
+from .foresight import foresight_repo as foresight_repo
+from .user_profile import user_profile_repo as user_profile_repo
+
+__all__ = [
+    "agent_case_repo",
+    "agent_skill_repo",
+    "atomic_fact_repo",
+    "episode_repo",
+    "foresight_repo",
+    "user_profile_repo",
+]
--- a/src/everos/infra/persistence/lancedb/repos/agent_case.py
+++ b/src/everos/infra/persistence/lancedb/repos/agent_case.py
@ -0,0 +1,20 @@
+"""LanceDB repo singleton for the ``agent_case`` table."""
+
+from __future__ import annotations
+
+from lancedb import AsyncTable
+
+from everos.core.persistence.lancedb import LanceDailyLogRepoBase
+
+from ..lancedb_manager import get_table
+from ..tables.agent_case import AgentCase
+
+
+class _AgentCaseRepo(LanceDailyLogRepoBase[AgentCase]):
+    schema = AgentCase
+
+    async def _table_lookup(self) -> AsyncTable:
+        return await get_table(self.schema.TABLE_NAME, self.schema)
+
+
+agent_case_repo = _AgentCaseRepo()
--- a/src/everos/infra/persistence/lancedb/repos/agent_skill.py
+++ b/src/everos/infra/persistence/lancedb/repos/agent_skill.py
@ -0,0 +1,84 @@
+"""LanceDB repo singleton for the ``agent_skill`` table."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from lancedb import AsyncTable
+
+from everos.core.persistence.lancedb import LanceRepoBase
+
+from ..lancedb_manager import get_table
+from ..tables.agent_skill import AgentSkill
+
+
+class _AgentSkillRepo(LanceRepoBase[AgentSkill]):
+    schema = AgentSkill
+
+    async def _table_lookup(self) -> AsyncTable:
+        return await get_table(self.schema.TABLE_NAME, self.schema)
+
+    async def count_in_cluster(self, *, owner_id: str, cluster_id: str) -> int:
+        """Count skills under one ``(owner_id, cluster_id)``."""
+        table = await self._table()
+        return await table.count_rows(filter=_in_cluster(owner_id, cluster_id))
+
+    async def find_in_cluster(
+        self, *, owner_id: str, cluster_id: str, limit: int
+    ) -> list[AgentSkill]:
+        """Scalar fetch within one cluster; no ranking, capped at ``limit``."""
+        return await self.find_where(_in_cluster(owner_id, cluster_id), limit=limit)
+
+    async def find_topk_relevant_in_cluster(
+        self,
+        *,
+        owner_id: str,
+        cluster_id: str,
+        query_vector: Sequence[float],
+        top_k: int,
+    ) -> list[AgentSkill]:
+        """Top-K cosine-relevant skills inside one cluster.
+
+        Cosine ranking is pushed down to LanceDB native ANN.
+        ``distance_type("cosine")`` matches
+        :class:`AgentSkillRecaller.dense_recall`, keeping ranking
+        semantics consistent across read paths.
+
+        Raises:
+            ValueError: When ``query_vector`` is empty — no relevance
+                signal is a caller-side policy decision; use
+                :meth:`find_in_cluster` for the scalar fallback.
+        """
+        if not query_vector:
+            raise ValueError(
+                "query_vector must be non-empty; "
+                "call find_in_cluster for the scalar fallback"
+            )
+        table = await self._table()
+        rows = await (
+            table.query()
+            .nearest_to(list(query_vector))
+            .distance_type("cosine")
+            .where(_in_cluster(owner_id, cluster_id))
+            .limit(top_k)
+            .to_list()
+        )
+        # LanceDB appends ``_distance`` to ranked rows; strip it before
+        # ``model_validate`` so this stays robust regardless of
+        # pydantic ``extra`` mode on the schema.
+        return [
+            self.schema.model_validate({k: v for k, v in r.items() if k != "_distance"})
+            for r in rows
+        ]
+
+
+def _q(value: str) -> str:
+    """SQL single-quote escape for LanceDB ``where`` predicate literals."""
+    return value.replace("'", "''")
+
+
+def _in_cluster(owner_id: str, cluster_id: str) -> str:
+    return f"owner_id = '{_q(owner_id)}' AND cluster_id = '{_q(cluster_id)}'"
+
+
+agent_skill_repo = _AgentSkillRepo()
--- a/src/everos/infra/persistence/lancedb/repos/atomic_fact.py
+++ b/src/everos/infra/persistence/lancedb/repos/atomic_fact.py
@ -0,0 +1,20 @@
+"""LanceDB repo singleton for the ``atomic_fact`` table."""
+
+from __future__ import annotations
+
+from lancedb import AsyncTable
+
+from everos.core.persistence.lancedb import LanceDailyLogRepoBase
+
+from ..lancedb_manager import get_table
+from ..tables.atomic_fact import AtomicFact
+
+
+class _AtomicFactRepo(LanceDailyLogRepoBase[AtomicFact]):
+    schema = AtomicFact
+
+    async def _table_lookup(self) -> AsyncTable:
+        return await get_table(self.schema.TABLE_NAME, self.schema)
+
+
+atomic_fact_repo = _AtomicFactRepo()
--- a/src/everos/infra/persistence/lancedb/repos/episode.py
+++ b/src/everos/infra/persistence/lancedb/repos/episode.py
@ -0,0 +1,20 @@
+"""LanceDB repo singleton for the ``episode`` table."""
+
+from __future__ import annotations
+
+from lancedb import AsyncTable
+
+from everos.core.persistence.lancedb import LanceDailyLogRepoBase
+
+from ..lancedb_manager import get_table
+from ..tables.episode import Episode
+
+
+class _EpisodeRepo(LanceDailyLogRepoBase[Episode]):
+    schema = Episode
+
+    async def _table_lookup(self) -> AsyncTable:
+        return await get_table(self.schema.TABLE_NAME, self.schema)
+
+
+episode_repo = _EpisodeRepo()
--- a/src/everos/infra/persistence/lancedb/repos/foresight.py
+++ b/src/everos/infra/persistence/lancedb/repos/foresight.py
@ -0,0 +1,20 @@
+"""LanceDB repo singleton for the ``foresight`` table."""
+
+from __future__ import annotations
+
+from lancedb import AsyncTable
+
+from everos.core.persistence.lancedb import LanceDailyLogRepoBase
+
+from ..lancedb_manager import get_table
+from ..tables.foresight import Foresight
+
+
+class _ForesightRepo(LanceDailyLogRepoBase[Foresight]):
+    schema = Foresight
+
+    async def _table_lookup(self) -> AsyncTable:
+        return await get_table(self.schema.TABLE_NAME, self.schema)
+
+
+foresight_repo = _ForesightRepo()
--- a/src/everos/infra/persistence/lancedb/repos/user_profile.py
+++ b/src/everos/infra/persistence/lancedb/repos/user_profile.py
@ -0,0 +1,20 @@
+"""LanceDB repo singleton for the ``user_profile`` table."""
+
+from __future__ import annotations
+
+from lancedb import AsyncTable
+
+from everos.core.persistence.lancedb import LanceRepoBase
+
+from ..lancedb_manager import get_table
+from ..tables.user_profile import UserProfile
+
+
+class _UserProfileRepo(LanceRepoBase[UserProfile]):
+    schema = UserProfile
+
+    async def _table_lookup(self) -> AsyncTable:
+        return await get_table(self.schema.TABLE_NAME, self.schema)
+
+
+user_profile_repo = _UserProfileRepo()
--- a/src/everos/infra/persistence/lancedb/tables/init.py
+++ b/src/everos/infra/persistence/lancedb/tables/init.py
@ -0,0 +1,35 @@
+"""LanceDB table schemas (one ``BaseLanceTable`` subclass per business table).
+
+Schemas live here; cascade-daemon-driven row population is wired
+through the matching repo singletons in :mod:`..repos`.
+
+External usage::
+
+    from everos.infra.persistence.lancedb.tables import (
+        Episode,
+        AtomicFact,
+        Foresight,
+        AgentCase,
+        AgentSkill,
+        UserProfile,
+        ParentType,
+    )
+"""
+
+from ._parent_type import ParentType as ParentType
+from .agent_case import AgentCase as AgentCase
+from .agent_skill import AgentSkill as AgentSkill
+from .atomic_fact import AtomicFact as AtomicFact
+from .episode import Episode as Episode
+from .foresight import Foresight as Foresight
+from .user_profile import UserProfile as UserProfile
+
+__all__ = [
+    "AgentCase",
+    "AgentSkill",
+    "AtomicFact",
+    "Episode",
+    "Foresight",
+    "ParentType",
+    "UserProfile",
+]
--- a/src/everos/infra/persistence/lancedb/tables/_parent_type.py
+++ b/src/everos/infra/persistence/lancedb/tables/_parent_type.py
@ -0,0 +1,24 @@
+"""``ParentType`` — provenance label for memory records linked back to a source.
+
+Currently the only value is :attr:`ParentType.MEMCELL`: every business row
+(episode / foresight / atomic_fact / agent_case) points back to a source
+MemCell. The earlier opensource design enumerated ``"episode"`` as an
+alternative parent but the production path never wrote that value, so the
+new framework collapses the enum to its single in-use member.
+
+Kept as an :class:`enum.Enum` (rather than a bare string constant) so that
+adding a future parent kind stays a non-breaking enum extension. LanceDB's
+pydantic-to-arrow conversion does not accept ``Enum`` field annotations,
+so table schemas declare ``parent_type: str = ParentType.MEMCELL.value``
+and reference the enum only at the default-value level.
+"""
+
+from __future__ import annotations
+
+from enum import StrEnum
+
+
+class ParentType(StrEnum):
+    """Provenance label of a memory record's parent."""
+
+    MEMCELL = "memcell"
--- a/src/everos/infra/persistence/lancedb/tables/agent_case.py
+++ b/src/everos/infra/persistence/lancedb/tables/agent_case.py
@ -0,0 +1,84 @@
+"""LanceDB ``agent_case`` table schema.
+
+Field set per 17_lancedb_tables_design.md §3.4. Each row records one
+task an agent worked on: intent, approach, optional pivotal insight,
+and a quality score. A MemCell extracted on the agent's own execution
+log yields at most one AgentCase.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import ClassVar
+
+from everos.core.persistence.lancedb import BaseLanceTable, Vector
+
+from ._parent_type import ParentType
+
+_DIM = 1024
+
+
+class AgentCase(BaseLanceTable):
+    """One agent case indexed in LanceDB."""
+
+    TABLE_NAME: ClassVar[str] = "agent_case"
+    BM25_FIELDS: ClassVar[list[str]] = ["task_intent_tokens", "approach_tokens"]
+
+    id: str
+    """PK = ``<owner_id>_<entry_id>``."""
+
+    entry_id: str
+    """md-side seq id ``ac_<YYYYMMDD>_<NNNN>``."""
+
+    owner_id: str
+    """The owning ``agent_id``."""
+
+    owner_type: str
+    """Fixed ``"agent"`` for this table."""
+
+    app_id: str = "default"
+    project_id: str = "default"
+    """App / project scope (default ``"default"``); cascade fills from md path."""
+
+    session_id: str
+    timestamp: _dt.datetime
+
+    parent_type: str = ParentType.MEMCELL.value
+    """Source pointer — always :attr:`ParentType.MEMCELL` for agent case."""
+
+    parent_id: str
+    """Source memcell id (one memcell ↔ one case)."""
+
+    quality_score: float
+    """0.0–1.0; task completion / quality estimate."""
+
+    task_intent: str
+    """≤ 50 tokens; original surface form (returned for display)."""
+
+    task_intent_tokens: str
+    """App-layer pre-tokenised ``task_intent`` — BM25 main field
+    (whitespace tokenizer); display goes through ``task_intent``."""
+
+    approach: str
+    """≤ 1000 tokens; step-by-step approach (display)."""
+
+    approach_tokens: str
+    """App-layer pre-tokenised ``approach`` — BM25 secondary field
+    (whitespace tokenizer). Searched in parallel with
+    ``task_intent_tokens`` then merged by max score in the recall
+    layer; task_intent typically scores higher because it's the
+    retrieval anchor, but approach catches queries that match a step
+    detail."""
+
+    key_insight: str | None = None
+    """≤ 40 tokens; pivotal strategy shift, optional."""
+
+    md_path: str
+    content_sha256: str
+    """SHA-256 hex digest over the **content-bearing fields only** of
+    the md entry — TaskIntent / Approach / KeyInsight sections plus
+    the ``quality_score`` inline. Audit inline (owner_id /
+    session_id / timestamp / parent_id) is NOT in the hash. See
+    :attr:`AgentCaseHandler.content_change_keys`."""
+
+    vector: Vector(_DIM)  # type: ignore[valid-type]
--- a/src/everos/infra/persistence/lancedb/tables/agent_skill.py
+++ b/src/everos/infra/persistence/lancedb/tables/agent_skill.py
@ -0,0 +1,80 @@
+"""LanceDB ``agent_skill`` table schema.
+
+Field set per 17_lancedb_tables_design.md §3.5. AgentSkill is a *named
+entity* rather than a daily-log entry — PK is ``<owner_id>_<skill_name>``
+(no date / seq), and same agent + same name is the same row (upsert).
+
+``content`` is cascade-assembled from ``SKILL.md`` body plus every
+``references/*.md`` sibling; ``scripts/`` is not indexed.
+"""
+
+from __future__ import annotations
+
+from typing import ClassVar
+
+from everos.core.persistence.lancedb import BaseLanceTable, Vector
+
+_DIM = 1024
+
+
+class AgentSkill(BaseLanceTable):
+    """One agent skill indexed in LanceDB."""
+
+    TABLE_NAME: ClassVar[str] = "agent_skill"
+    BM25_FIELDS: ClassVar[list[str]] = ["description_tokens", "content_tokens"]
+
+    id: str
+    """PK = ``<owner_id>_<skill_name>``."""
+
+    owner_id: str
+    """The owning ``agent_id``."""
+
+    owner_type: str
+    """Fixed ``"agent"`` for this table."""
+
+    app_id: str = "default"
+    project_id: str = "default"
+    """App / project scope (default ``"default"``); cascade fills from md path."""
+
+    name: str
+    """Skill identifier; half of the PK."""
+
+    description: str
+    """When-to-use / purpose — original surface form (Tier-1 ad copy)."""
+
+    description_tokens: str
+    """App-layer pre-tokenised ``description`` — BM25 main field
+    (whitespace tokenizer); display goes through ``description``."""
+
+    content: str
+    """Cascade-assembled body: ``SKILL.md`` main text concatenated with
+    every ``references/*.md`` sibling. ``scripts/`` files are excluded."""
+
+    content_tokens: str
+    """App-layer pre-tokenised ``content`` (secondary BM25 field).
+    Tokenised by cascade when assembling ``content`` from md sources."""
+
+    confidence: float
+    """0.0–1.0; LLM-emitted confidence in the skill."""
+
+    maturity_score: float
+    """0.0–1.0; LLM-emitted maturity score. The retrieval-time threshold
+    (``maturity_threshold``) lives in MemorizeConfig, not in this row."""
+
+    source_case_ids: list[str]
+    """AgentCase ids that fed into this skill's synthesis (lineage)."""
+
+    cluster_id: str | None = None
+    """Optional MemScene clustering tag."""
+
+    md_path: str
+    content_sha256: str
+    """SHA-256 hex digest over the **content-bearing fields only** of
+    the skill: ``name`` / ``description`` (frontmatter) + SKILL.md
+    body + concatenated references content + ``confidence`` /
+    ``maturity_score``. Cascade handler diffs by this digest to skip
+    re-upsert + re-embed when neither retrieval-anchor text nor scores
+    changed (e.g. the watcher fires for unrelated stat updates). See
+    :attr:`AgentSkillHandler.content_change_keys`."""
+
+    vector: Vector(_DIM)  # type: ignore[valid-type]
--- a/src/everos/infra/persistence/lancedb/tables/atomic_fact.py
+++ b/src/everos/infra/persistence/lancedb/tables/atomic_fact.py
@ -0,0 +1,62 @@
+"""LanceDB ``atomic_fact`` table schema.
+
+Field set per 17_lancedb_tables_design.md §3.2. Each row carries one
+atomic fact extracted by the algo layer; the parent is always the source
+MemCell — recorded via ``parent_type`` / ``parent_id``.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import ClassVar
+
+from everos.core.persistence.lancedb import BaseLanceTable, Vector
+
+from ._parent_type import ParentType
+
+_DIM = 1024
+
+
+class AtomicFact(BaseLanceTable):
+    """One atomic fact indexed in LanceDB."""
+
+    TABLE_NAME: ClassVar[str] = "atomic_fact"
+    BM25_FIELDS: ClassVar[list[str]] = ["fact_tokens"]
+
+    id: str
+    """PK = ``<owner_id>_<entry_id>``."""
+
+    entry_id: str
+    """md-side seq id ``af_<YYYYMMDD>_<NNNN>``."""
+
+    owner_id: str
+    owner_type: str
+    app_id: str = "default"
+    project_id: str = "default"
+    """App / project scope (default ``"default"``); cascade fills from md path."""
+    session_id: str
+    timestamp: _dt.datetime
+
+    parent_type: str = ParentType.MEMCELL.value
+    """Source pointer — always :attr:`ParentType.MEMCELL` for atomic fact."""
+
+    parent_id: str
+    """Source memcell id."""
+    sender_ids: list[str]
+    fact: str
+    """Atomic fact text — original surface form (returned for display)."""
+
+    fact_tokens: str
+    """App-layer pre-tokenised ``fact`` text — space-joined tokens.
+    BM25 index is built on this column (whitespace tokenizer);
+    ``fact`` itself is what callers display."""
+
+    md_path: str
+    content_sha256: str
+    """SHA-256 hex digest over the **content-bearing fields only** of
+    the md entry (per :attr:`AtomicFactHandler.content_change_keys`).
+    Matching digest → skip re-upsert + re-embed. Audit inline fields
+    (owner_id / session_id / timestamp / parent_id / sender_ids) are
+    NOT in the hash."""
+
+    vector: Vector(_DIM)  # type: ignore[valid-type]
--- a/src/everos/infra/persistence/lancedb/tables/episode.py
+++ b/src/everos/infra/persistence/lancedb/tables/episode.py
@ -0,0 +1,78 @@
+"""LanceDB ``episode`` table schema.
+
+Field set is fixed by the LanceDB tables design spec. Rows are populated
+by the cascade daemon from ``users/<owner_id>/episodes/episode-<YYYY-MM-DD>.md``
+and from ``agents/<owner_id>/episodes/...`` symmetrically.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import ClassVar
+
+from everos.core.persistence.lancedb import BaseLanceTable, Vector
+
+from ._parent_type import ParentType
+
+# Vector dimension is settings-managed at runtime; the class-level
+# constant pins the schema dim used at table creation.
+_DIM = 1024
+
+
+class Episode(BaseLanceTable):
+    """One episode record indexed in LanceDB."""
+
+    TABLE_NAME: ClassVar[str] = "episode"
+    BM25_FIELDS: ClassVar[list[str]] = ["episode_tokens"]
+
+    id: str
+    """PK = ``<owner_id>_<entry_id>`` (scalar PK)."""
+
+    entry_id: str
+    """md-side seq id ``ep_<YYYYMMDD>_<NNNN>`` (cascade reverse-lookup)."""
+
+    owner_id: str
+    owner_type: str
+    app_id: str = "default"
+    project_id: str = "default"
+    """App / project scope (default ``"default"``); cascade fills from md path."""
+    session_id: str
+    timestamp: _dt.datetime
+
+    parent_type: str = ParentType.MEMCELL.value
+    """Source pointer — always :attr:`ParentType.MEMCELL` for episode."""
+
+    parent_id: str
+    """Source memcell id. The pipeline knows the memcell currently being
+    processed and writes its id into the md entry's inline block; the
+    cascade handler reads it back. The new everalgo Episode type no
+    longer emits ``parent_id`` itself (collapsed to caller-managed),
+    so this is filled entirely from everos's engineering context."""
+
+    sender_ids: list[str]
+    """Distinct ``role=user|assistant`` senders behind the episode."""
+
+    subject: str | None = None
+    summary: str | None = None
+    episode: str
+    """Full narrative text — original surface form (returned for display)."""
+
+    episode_tokens: str
+    """App-layer pre-tokenised ``episode`` text — space-joined tokens
+    (e.g. produced by jieba). LanceDB FTS index is built on **this**
+    column using a whitespace tokenizer; the original ``episode`` field
+    is what callers display. Two-field BM25 scheme keeps tokenisation
+    deterministic and provider-pluggable at the app layer."""
+
+    md_path: str
+    content_sha256: str
+    """SHA-256 hex digest over the **content-bearing fields only** of the
+    md entry (per :attr:`EpisodeHandler.content_change_keys`). On
+    re-reconcile, a matching digest means none of the persistence /
+    embedding-relevant fields changed — the entry is skipped (no
+    re-upsert, no re-embed). Inline audit fields (owner_id /
+    session_id / timestamp / parent_id / sender_ids) are intentionally
+    NOT in the hash so editing them doesn't waste an embedding call.
+    See ``16_cascade_impl_design.md`` §3.3."""
+
+    vector: Vector(_DIM)  # type: ignore[valid-type]
--- a/src/everos/infra/persistence/lancedb/tables/foresight.py
+++ b/src/everos/infra/persistence/lancedb/tables/foresight.py
@ -0,0 +1,79 @@
+"""LanceDB ``foresight`` table schema.
+
+Field set per 17_lancedb_tables_design.md §3.3. Each row carries a
+forward-looking inference about the user (intent window, planned
+action, projected need); ``start_time`` / ``end_time`` describe the
+window the foresight applies to.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import ClassVar
+
+from everos.core.persistence.lancedb import BaseLanceTable, Vector
+
+from ._parent_type import ParentType
+
+_DIM = 1024
+
+
+class Foresight(BaseLanceTable):
+    """One foresight record indexed in LanceDB."""
+
+    TABLE_NAME: ClassVar[str] = "foresight"
+    BM25_FIELDS: ClassVar[list[str]] = ["foresight_tokens", "evidence_tokens"]
+
+    id: str
+    """PK = ``<owner_id>_<entry_id>``."""
+
+    entry_id: str
+    """md-side seq id ``fs_<YYYYMMDD>_<NNNN>``."""
+
+    owner_id: str
+    owner_type: str
+    app_id: str = "default"
+    project_id: str = "default"
+    """App / project scope (default ``"default"``); cascade fills from md path."""
+    session_id: str
+    timestamp: _dt.datetime
+    """Foresight generation time."""
+
+    start_time: _dt.datetime | None = None
+    """Foresight coverage window start; tz-aware."""
+
+    end_time: _dt.datetime | None = None
+    """Foresight coverage window end; tz-aware."""
+
+    duration_days: int | None = None
+
+    parent_type: str = ParentType.MEMCELL.value
+    """Source pointer — always :attr:`ParentType.MEMCELL` for foresight."""
+
+    parent_id: str
+    """Source memcell id."""
+
+    sender_ids: list[str]
+    foresight: str
+    """Foresight body — original surface form (returned for display)."""
+
+    foresight_tokens: str
+    """App-layer pre-tokenised ``foresight`` text — space-joined tokens.
+    BM25 index is built on this column (whitespace tokenizer)."""
+
+    evidence: str | None = None
+    """Supporting evidence excerpt; may be empty."""
+
+    evidence_tokens: str | None = None
+    """App-layer pre-tokenised ``evidence`` (secondary BM25 field).
+    ``None`` whenever ``evidence`` is None."""
+
+    md_path: str
+    content_sha256: str
+    """SHA-256 hex digest over the **content-bearing fields only** of
+    the md entry — Foresight / Evidence sections plus the time-window
+    inline fields (start_time / end_time / duration_days). Audit inline
+    (owner_id / session_id / timestamp / parent_id / sender_ids) is NOT
+    in the hash. See :attr:`ForesightHandler.content_change_keys`."""
+
+    vector: Vector(_DIM)  # type: ignore[valid-type]
--- a/src/everos/infra/persistence/lancedb/tables/user_profile.py
+++ b/src/everos/infra/persistence/lancedb/tables/user_profile.py
@ -0,0 +1,68 @@
+"""LanceDB ``user_profile`` table schema.
+
+Profile is a single-file kind: one ``users/<user_id>/user.md`` per
+user, replaced wholesale on edit (mirrors ``AgentSkill`` for the
+upsert/single-row contract). The LanceDB row is a typed projection
+of the md frontmatter that the cascade keeps in sync; it carries no
+vector / no BM25 because the recall surface is pure KV-by-owner
+(``fetch(owner_id)``) — when query-aware profile lookup ships later
+the schema will gain ``vector`` + ``*_tokens`` columns then.
+
+``explicit_info`` / ``implicit_traits`` are heterogeneous LLM
+emissions (mostly small dicts mixed with strings) — LanceDB has no
+``list[dict]`` column type, so we stash them as JSON strings and
+unpack at the recall boundary into ``profile_data`` of the DTO.
+"""
+
+from __future__ import annotations
+
+from typing import ClassVar
+
+from everos.core.persistence.lancedb import BaseLanceTable
+
+
+class UserProfile(BaseLanceTable):
+    """One ``users/<user_id>/user.md`` indexed in LanceDB."""
+
+    TABLE_NAME: ClassVar[str] = "user_profile"
+    # No BM25 columns: profile recall is KV-by-owner today.
+
+    id: str
+    """PK = ``owner_id`` (one row per user)."""
+
+    owner_id: str
+    owner_type: str
+    """Always ``"user"`` for this schema; agent-side profiles would
+    live in a sibling table once that schema lands."""
+
+    app_id: str = "default"
+    project_id: str = "default"
+    """App / project scope (default ``"default"``); cascade fills from md path."""
+
+    summary: str
+    """Free-form one-paragraph user summary (retrieval anchor for the
+    future query-aware lookup; today returned verbatim to the caller)."""
+
+    explicit_info_json: str
+    """JSON-serialised ``list[Any]`` — the algo's verbatim evidence
+    bucket. Stored as a string because LanceDB has no
+    ``list[dict]`` column type. The recaller json-decodes it back into
+    ``profile_data['explicit_info']`` at the DTO boundary."""
+
+    implicit_traits_json: str
+    """Same shape as :attr:`explicit_info_json`, for the LLM-inferred
+    preference bucket."""
+
+    profile_timestamp_ms: int
+    """Algo-emitted profile timestamp (ms epoch) — pinned to the
+    timestamp of the freshest MemCell that fed into the synthesis.
+    Mirrored from :attr:`UserProfileFrontmatter.profile_timestamp_ms`
+    so downstream code can compare freshness without re-reading md."""
+
+    md_path: str
+    content_sha256: str
+    """SHA-256 over the content-bearing frontmatter fields (summary +
+    explicit_info_json + implicit_traits_json). Matches → cascade
+    skips re-upsert. ``profile_timestamp_ms`` is intentionally not in
+    the hash: it drifts with every synthesis even when the underlying
+    content is identical, and the LanceDB row treats it as audit."""
--- a/src/everos/infra/persistence/markdown/init.py
+++ b/src/everos/infra/persistence/markdown/init.py
@ -0,0 +1,73 @@
+"""Markdown business persistence layer.
+
+Sits on top of :mod:`everos.core.persistence.markdown` (atomic write +
+parse + frontmatter chassis) and provides:
+
+    * concrete frontmatter schemas under :mod:`.mds`
+    * concrete business writers under :mod:`.writers`
+      (``BaseDailyWriter`` + subclasses, ``AgentSkillWriter``,
+      ``ProfileWriter``)
+    * concrete business readers under :mod:`.readers`
+      (``BaseDailyReader`` + subclasses, ``AgentSkillReader``,
+      ``ProfileReader``)
+
+External usage::
+
+    from everos.infra.persistence.markdown import (
+        BaseDailyWriter, BaseDailyReader,
+        EpisodeWriter, EpisodeReader, EpisodeDailyFrontmatter,
+        AtomicFactDailyFrontmatter,
+        ForesightDailyFrontmatter,
+        AgentCaseDailyFrontmatter,
+        AgentSkillFrontmatter, AgentSkillWriter, AgentSkillReader,
+        ProfileWriter, ProfileReader,
+    )
+
+Outer layers MUST go through this top-level package because
+``infra.persistence.markdown.**`` (sub-packages) are forbidden to outer
+layers by import-linter.
+"""
+
+from .mds import AgentCaseDailyFrontmatter as AgentCaseDailyFrontmatter
+from .mds import AgentSkillFrontmatter as AgentSkillFrontmatter
+from .mds import AtomicFactDailyFrontmatter as AtomicFactDailyFrontmatter
+from .mds import EpisodeDailyFrontmatter as EpisodeDailyFrontmatter
+from .mds import ForesightDailyFrontmatter as ForesightDailyFrontmatter
+from .mds import UserProfileFrontmatter as UserProfileFrontmatter
+from .readers import AgentCaseReader as AgentCaseReader
+from .readers import AgentSkillReader as AgentSkillReader
+from .readers import AtomicFactReader as AtomicFactReader
+from .readers import BaseDailyReader as BaseDailyReader
+from .readers import EpisodeReader as EpisodeReader
+from .readers import ForesightReader as ForesightReader
+from .readers import ProfileReader as ProfileReader
+from .writers import AgentCaseWriter as AgentCaseWriter
+from .writers import AgentSkillWriter as AgentSkillWriter
+from .writers import AtomicFactWriter as AtomicFactWriter
+from .writers import BaseDailyWriter as BaseDailyWriter
+from .writers import EpisodeWriter as EpisodeWriter
+from .writers import ForesightWriter as ForesightWriter
+from .writers import ProfileWriter as ProfileWriter
+
+__all__ = [
+    "AgentCaseDailyFrontmatter",
+    "AgentCaseReader",
+    "AgentCaseWriter",
+    "AgentSkillFrontmatter",
+    "AgentSkillReader",
+    "AgentSkillWriter",
+    "AtomicFactDailyFrontmatter",
+    "AtomicFactReader",
+    "AtomicFactWriter",
+    "BaseDailyReader",
+    "BaseDailyWriter",
+    "EpisodeDailyFrontmatter",
+    "EpisodeReader",
+    "EpisodeWriter",
+    "ForesightDailyFrontmatter",
+    "ForesightReader",
+    "ForesightWriter",
+    "ProfileReader",
+    "ProfileWriter",
+    "UserProfileFrontmatter",
+]
--- a/src/everos/infra/persistence/markdown/mds/init.py
+++ b/src/everos/infra/persistence/markdown/mds/init.py
@ -0,0 +1,40 @@
+"""Business markdown frontmatter schemas (mds = "markdown schemas").
+
+Each business record kind that is stored as markdown gets a concrete
+frontmatter class here, subclassing one of the chassis classes from
+:mod:`everos.core.persistence.markdown`:
+
+    * :class:`UserScopedFrontmatter` for user-track records
+    * :class:`AgentScopedFrontmatter` for agent-track records
+    * :class:`BaseFrontmatter` for scope-agnostic records (rare)
+
+Schemas drive path resolution via ClassVars; each storage strategy has
+its own conventions:
+
+- **Daily-log** schemas declare ``ENTRY_ID_PREFIX`` (token in
+  ``<prefix>_<date>_<seq>``), ``DIR_NAME`` (sub-directory under
+  ``<scope>/<id>/``) and ``FILE_PREFIX`` (leading token of the daily
+  filename joined with ``-<YYYY-MM-DD>.md``).
+- **Skill** schemas (:class:`AgentSkillFrontmatter`) pin the directory
+  layout via five ``SKILL_*`` ClassVars (container / dir prefix /
+  main filename / references / scripts).
+- **Profile** schemas declare ``PROFILE_FILENAME`` (``"user.md"`` /
+  ``"agent.md"`` / …) and inherit ``SCOPE_DIR`` from a scope mixin; no
+  profile base class — the writer/reader pair is duck-typed.
+"""
+
+from .agent_case import AgentCaseDailyFrontmatter as AgentCaseDailyFrontmatter
+from .agent_skill import AgentSkillFrontmatter as AgentSkillFrontmatter
+from .atomic_fact import AtomicFactDailyFrontmatter as AtomicFactDailyFrontmatter
+from .episode import EpisodeDailyFrontmatter as EpisodeDailyFrontmatter
+from .foresight import ForesightDailyFrontmatter as ForesightDailyFrontmatter
+from .profile import UserProfileFrontmatter as UserProfileFrontmatter
+
+__all__ = [
+    "AgentCaseDailyFrontmatter",
+    "AgentSkillFrontmatter",
+    "AtomicFactDailyFrontmatter",
+    "EpisodeDailyFrontmatter",
+    "ForesightDailyFrontmatter",
+    "UserProfileFrontmatter",
+]
--- a/src/everos/infra/persistence/markdown/mds/agent_case.py
+++ b/src/everos/infra/persistence/markdown/mds/agent_case.py
@ -0,0 +1,37 @@
+"""AgentCase frontmatter — daily-log markdown for agent-scoped cases.
+
+Path: ``agents/<scope_id>/.cases/agent_case-<YYYY-MM-DD>.md``.
+
+The directory is dotfile-hidden (``.cases``) so users only see the
+curated ``agent_skills/`` view, not the raw per-task case log — same
+convention as ``.atomic_facts`` / ``.foresights``.
+
+Each entry records one task an agent worked on: intent, approach taken,
+quality score, and an optional pivotal insight. A MemCell extracted on
+the agent's own execution log yields at most one AgentCase.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import ClassVar, Literal
+
+from everos.core.persistence.markdown import (
+    AgentScopedFrontmatter,
+    DailyLogPathMixin,
+)
+
+
+class AgentCaseDailyFrontmatter(DailyLogPathMixin, AgentScopedFrontmatter):
+    """Frontmatter for ``agents/<scope>/.cases/agent_case-<YYYY-MM-DD>.md``."""
+
+    ENTRY_ID_PREFIX: ClassVar[str] = "ac"
+    DIR_NAME: ClassVar[str] = ".cases"
+    FILE_PREFIX: ClassVar[str] = "agent_case"
+
+    type: Literal["agent_case_daily"] = "agent_case_daily"
+    file_type: Literal["agent_case_daily"] = "agent_case_daily"
+    date: _dt.date
+    entry_count: int = 0
+    created_at: _dt.datetime | None = None
+    last_appended_at: _dt.datetime | None = None
--- a/src/everos/infra/persistence/markdown/mds/agent_skill.py
+++ b/src/everos/infra/persistence/markdown/mds/agent_skill.py
@ -0,0 +1,63 @@
+"""AgentSkill frontmatter — single SKILL.md inside a skill directory.
+
+Path: ``agents/<scope_id>/skills/skill_<name>/SKILL.md`` (plus sibling
+``references/*.md`` and ``scripts/*.<ext>`` files that are not part of
+the frontmatter contract).
+
+Skills are *named entities* rather than daily-log entries: the
+LanceDB primary key is ``<owner_id>_<skill_name>`` (no date / seq).
+Upserts replace the file wholesale; the cascade daemon recomputes the
+``content`` index column by concatenating ``SKILL.md`` body with every
+``references/*.md`` sibling.
+
+Five directory-shape ClassVars pin the layout in one place so the
+writer / reader pair reads off them — no duplicated string literals.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import ClassVar, Literal
+
+from everos.core.persistence.markdown import (
+    AgentScopedFrontmatter,
+    SkillPathMixin,
+)
+
+
+class AgentSkillFrontmatter(SkillPathMixin, AgentScopedFrontmatter):
+    """Frontmatter for ``agents/<scope>/skills/skill_<name>/SKILL.md``."""
+
+    SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
+    SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
+    SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
+    SKILL_REFERENCES_DIR_NAME: ClassVar[str] = "references"
+    SKILL_SCRIPTS_DIR_NAME: ClassVar[str] = "scripts"
+
+    type: Literal["agent_skill"] = "agent_skill"
+
+    name: str
+    """Skill identifier — also the directory suffix
+    (``skills/skill_<name>/``). Keep snake_case so it is filesystem-safe
+    and ID-stable."""
+
+    description: str
+    """One-line summary surfaced at Tier-1 prompt injection. Short — the
+    agent's startup-time scanner reads ``(name, description)`` for every
+    skill, so the token budget is tight."""
+
+    confidence: float
+    """LLM-emitted confidence in the skill's correctness, 0.0–1.0."""
+
+    maturity_score: float
+    """LLM-emitted maturity score, 0.0–1.0. The retrieval-time threshold
+    (``maturity_threshold``) lives in MemorizeConfig, not on this file."""
+
+    source_case_ids: list[str] = []
+    """AgentCase ids that fed into this skill's synthesis (lineage)."""
+
+    cluster_id: str | None = None
+    """Optional MemScene clustering tag; may be unset early on."""
+
+    created_at: _dt.datetime | None = None
+    updated_at: _dt.datetime | None = None
--- a/src/everos/infra/persistence/markdown/mds/atomic_fact.py
+++ b/src/everos/infra/persistence/markdown/mds/atomic_fact.py
@ -0,0 +1,38 @@
+"""AtomicFact frontmatter — daily-log markdown for user-scoped atomic facts.
+
+Path: ``users/<scope_id>/.atomic_facts/atomic_fact-<YYYY-MM-DD>.md``.
+
+The directory is dot-prefixed so it is hidden from end users (same
+convention as ``.index``); atomic facts are framework-internal derived md,
+not material the user is expected to read by hand.
+
+Each entry carries one atomic fact extracted by the algo layer; the fact
+always hangs off the source MemCell (see ``parent_type`` in each entry's
+inline fields — handled at the StructuredEntry layer, not on the
+file-level frontmatter).
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import ClassVar, Literal
+
+from everos.core.persistence.markdown import (
+    DailyLogPathMixin,
+    UserScopedFrontmatter,
+)
+
+
+class AtomicFactDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
+    """Frontmatter for ``users/<scope>/.atomic_facts/atomic_fact-<YYYY-MM-DD>.md``."""
+
+    ENTRY_ID_PREFIX: ClassVar[str] = "af"
+    DIR_NAME: ClassVar[str] = ".atomic_facts"
+    FILE_PREFIX: ClassVar[str] = "atomic_fact"
+
+    type: Literal["atomic_fact_daily"] = "atomic_fact_daily"
+    file_type: Literal["atomic_fact_daily"] = "atomic_fact_daily"
+    date: _dt.date
+    entry_count: int = 0
+    created_at: _dt.datetime | None = None
+    last_appended_at: _dt.datetime | None = None
--- a/src/everos/infra/persistence/markdown/mds/episode.py
+++ b/src/everos/infra/persistence/markdown/mds/episode.py
@ -0,0 +1,33 @@
+"""Episode frontmatter — daily-log markdown for user-scoped episodes.
+
+Path: ``users/<scope_id>/episodes/episode-<YYYY-MM-DD>.md``.
+
+This milestone uses ``session_id`` as the scope key (since owner inference
+is out of scope). When owner inference lands the scope key will switch to
+``owner_id`` while the schema stays compatible.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import ClassVar, Literal
+
+from everos.core.persistence.markdown import (
+    DailyLogPathMixin,
+    UserScopedFrontmatter,
+)
+
+
+class EpisodeDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
+    """Frontmatter for ``users/<scope>/episodes/episode-<YYYY-MM-DD>.md``."""
+
+    ENTRY_ID_PREFIX: ClassVar[str] = "ep"
+    DIR_NAME: ClassVar[str] = "episodes"
+    FILE_PREFIX: ClassVar[str] = "episode"
+
+    type: Literal["episode_daily"] = "episode_daily"
+    file_type: Literal["episode_daily"] = "episode_daily"
+    date: _dt.date
+    entry_count: int = 0
+    created_at: _dt.datetime | None = None
+    last_appended_at: _dt.datetime | None = None
--- a/src/everos/infra/persistence/markdown/mds/foresight.py
+++ b/src/everos/infra/persistence/markdown/mds/foresight.py
@ -0,0 +1,38 @@
+"""Foresight frontmatter — daily-log markdown for user-scoped foresights.
+
+Path: ``users/<scope_id>/.foresights/foresight-<YYYY-MM-DD>.md``.
+
+The directory is dot-prefixed so it is hidden from end users (same
+convention as ``.index``); foresights are framework-internal derived md,
+not material the user is expected to read by hand.
+
+Each entry carries a forward-looking inference about the user (intent
+window, planned action, projected need) with ``start_time`` /
+``end_time`` describing the covered time range. ``parent_type`` always
+points back to a MemCell.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import ClassVar, Literal
+
+from everos.core.persistence.markdown import (
+    DailyLogPathMixin,
+    UserScopedFrontmatter,
+)
+
+
+class ForesightDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
+    """Frontmatter for ``users/<scope>/.foresights/foresight-<YYYY-MM-DD>.md``."""
+
+    ENTRY_ID_PREFIX: ClassVar[str] = "fs"
+    DIR_NAME: ClassVar[str] = ".foresights"
+    FILE_PREFIX: ClassVar[str] = "foresight"
+
+    type: Literal["foresight_daily"] = "foresight_daily"
+    file_type: Literal["foresight_daily"] = "foresight_daily"
+    date: _dt.date
+    entry_count: int = 0
+    created_at: _dt.datetime | None = None
+    last_appended_at: _dt.datetime | None = None
--- a/src/everos/infra/persistence/markdown/mds/profile.py
+++ b/src/everos/infra/persistence/markdown/mds/profile.py
@ -0,0 +1,40 @@
+"""UserProfile frontmatter — single-file profile markdown for users.
+
+Path: ``users/<user_id>/user.md``.
+
+Carries the LLM-synthesised user profile: a free-form ``summary`` plus the
+two evidence buckets emitted by :class:`everalgo.user_memory.ProfileExtractor`
+(``explicit_info`` / ``implicit_traits``). ``profile_timestamp_ms``
+mirrors :attr:`everalgo.types.Profile.timestamp` so the
+``extract_user_profile`` strategy can compare per-user freshness against
+cluster ``last_ts`` without re-parsing the body.
+"""
+
+from __future__ import annotations
+
+from typing import Any, ClassVar, Literal
+
+from everos.core.persistence.markdown import ProfilePathMixin, UserScopedFrontmatter
+
+
+class UserProfileFrontmatter(ProfilePathMixin, UserScopedFrontmatter):
+    """Frontmatter for ``users/<user_id>/user.md``."""
+
+    PROFILE_FILENAME: ClassVar[str] = "user.md"
+
+    type: Literal["user_profile"] = "user_profile"
+
+    summary: str = ""
+    """Free-form one-paragraph summary of the user — the retrieval anchor."""
+
+    explicit_info: list[Any] = []
+    """Algo-side ``explicit_info`` bucket (verbatim facts the user stated)."""
+
+    implicit_traits: list[Any] = []
+    """Algo-side ``implicit_traits`` bucket (LLM-inferred preferences)."""
+
+    profile_timestamp_ms: int = 0
+    """Algo-emitted profile timestamp (ms epoch); equals the timestamp of
+    the most recent MemCell that fed into the synthesis. Compared with
+    :attr:`everos.infra.persistence.sqlite.Cluster.last_ts_ms` to decide
+    whether a cluster is fresh enough to drive a profile re-extraction."""
--- a/src/everos/infra/persistence/markdown/readers/init.py
+++ b/src/everos/infra/persistence/markdown/readers/init.py
@ -0,0 +1,49 @@
+"""Business markdown readers — symmetric with the writers.
+
+Daily-log markdown is parsed via :class:`MarkdownReader` from ``core``
+(the base reader returns frontmatter dict + body + entry markers, all
+schema-agnostic). Reader classes here add the **business-aware
+locator** layer:
+
+    * :class:`BaseDailyReader` + subclasses — bind a daily-log schema,
+      resolve ``(scope_id, date)`` to a file, locate entries by id,
+      and optionally upgrade to :class:`StructuredEntry`. Symmetric
+      with :class:`BaseDailyWriter`.
+    * :class:`AgentSkillReader` — reads ``SKILL.md`` and parses the
+      frontmatter into the caller-supplied ``AgentSkillFrontmatter``
+      subclass; also reads individual reference / script files.
+    * :class:`ProfileReader` — reads a fixed-name profile file
+      (``user.md`` / ``agent.md`` / ``soul.md`` / …) and parses its
+      frontmatter into the caller-supplied schema.
+
+By design, no batch / list APIs live here: bulk enumeration for
+prompt-budget or cross-record queries goes through sqlite/lancedb
+(see the cascade daemon's index sync), not a markdown directory walk.
+
+External usage::
+
+    from everos.infra.persistence.markdown.readers import (
+        BaseDailyReader,
+        EpisodeReader,
+        AgentSkillReader,
+        ProfileReader,
+    )
+"""
+
+from .agent_case_reader import AgentCaseReader as AgentCaseReader
+from .agent_skill_reader import AgentSkillReader as AgentSkillReader
+from .atomic_fact_reader import AtomicFactReader as AtomicFactReader
+from .base import BaseDailyReader as BaseDailyReader
+from .episode_reader import EpisodeReader as EpisodeReader
+from .foresight_reader import ForesightReader as ForesightReader
+from .profile_reader import ProfileReader as ProfileReader
+
+__all__ = [
+    "AgentCaseReader",
+    "AgentSkillReader",
+    "AtomicFactReader",
+    "BaseDailyReader",
+    "EpisodeReader",
+    "ForesightReader",
+    "ProfileReader",
+]
--- a/src/everos/infra/persistence/markdown/readers/agent_case_reader.py
+++ b/src/everos/infra/persistence/markdown/readers/agent_case_reader.py
@ -0,0 +1,31 @@
+"""AgentCase daily-log reader — symmetric with :class:`AgentCaseWriter`."""
+
+from __future__ import annotations
+
+import datetime as _dt
+from pathlib import Path
+
+from everos.core.persistence import MemoryRoot
+
+from ..mds import AgentCaseDailyFrontmatter
+from .base import BaseDailyReader
+
+
+class AgentCaseReader(BaseDailyReader):
+    """Read agent-case daily-log files."""
+
+    schema = AgentCaseDailyFrontmatter
+
+    def __init__(self, root: MemoryRoot) -> None:
+        super().__init__(root)
+
+    def path_for(
+        self,
+        agent_id: str,
+        date: _dt.date | None = None,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Resolve the agent-case daily-log path under the <app>/<project> prefix."""
+        return super().path_for(agent_id, date, app_id=app_id, project_id=project_id)
--- a/src/everos/infra/persistence/markdown/readers/agent_skill_reader.py
+++ b/src/everos/infra/persistence/markdown/readers/agent_skill_reader.py
@ -0,0 +1,161 @@
+"""AgentSkillReader — typed read for the AgentSkill directory layout.
+
+Pairs with :class:`AgentSkillWriter`:
+
+- :meth:`read_main` reads ``SKILL.md`` and returns the caller's
+  :class:`AgentSkillFrontmatter` subclass instance + the Tier-2 body, so
+  the caller never deals with raw dicts.
+- :meth:`read_reference` / :meth:`read_script` are plain text reads;
+  no frontmatter, no schema.
+
+All three return ``None`` when the target is missing — readers do not
+raise on absence, since "skill not yet created" is a normal state for
+the upsert-style workflow. Callers that need to distinguish "missing"
+from "empty body" check for ``None`` explicitly.
+
+Path resolution mirrors :class:`AgentSkillWriter` and reads the same
+ClassVars off :class:`AgentSkillFrontmatter`.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TypeVar
+
+import anyio
+
+from everos.core.persistence import MarkdownReader, MemoryRoot
+
+from ..mds import AgentSkillFrontmatter
+
+T = TypeVar("T", bound=AgentSkillFrontmatter)
+
+
+class AgentSkillReader:
+    """Single-skill reader for the directory + progressive-disclosure layout."""
+
+    def __init__(self, root: MemoryRoot) -> None:
+        self._root = root
+
+    # ── Public API ────────────────────────────────────────────────────────
+
+    async def read_main(
+        self,
+        agent_id: str,
+        skill_name: str,
+        *,
+        schema: type[T],
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> tuple[T, str] | None:
+        """Read ``SKILL.md`` and parse its frontmatter into ``schema``.
+
+        Args:
+            schema: Concrete :class:`AgentSkillFrontmatter` subclass. The
+                frontmatter dict is validated against this schema via
+                :meth:`pydantic.BaseModel.model_validate`; extra fields
+                ride along (chassis sets ``extra="allow"``).
+
+        Returns:
+            ``(frontmatter, body)`` on success, ``None`` if the file
+            does not exist. ``body`` is the raw text after the closing
+            ``---``; the trailing newline added by :class:`AgentSkillWriter`
+            is stripped to give the *logical* body back.
+        """
+        path = self._main_path(agent_id, skill_name, app_id, project_id)
+        if not await anyio.Path(path).is_file():
+            return None
+        parsed = await MarkdownReader.read(path)
+        frontmatter = schema.model_validate(parsed.frontmatter)
+        body = parsed.body.rstrip("\n")
+        return frontmatter, body
+
+    async def read_reference(
+        self,
+        agent_id: str,
+        skill_name: str,
+        reference_name: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> str | None:
+        """Read ``references/<reference_name>.md`` verbatim, ``None`` if absent."""
+        path = self._reference_path(
+            agent_id, skill_name, reference_name, app_id, project_id
+        )
+        apath = anyio.Path(path)
+        if not await apath.is_file():
+            return None
+        text = await apath.read_text(encoding="utf-8")
+        return text.rstrip("\n")
+
+    async def read_script(
+        self,
+        agent_id: str,
+        skill_name: str,
+        script_filename: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> str | None:
+        """Read ``scripts/<script_filename>`` verbatim, ``None`` if absent.
+
+        Reading ≠ executing — this only returns the source text.
+        Sandboxing / exec-policy decisions belong to the caller.
+        """
+        path = self._script_path(
+            agent_id, skill_name, script_filename, app_id, project_id
+        )
+        apath = anyio.Path(path)
+        if not await apath.is_file():
+            return None
+        text = await apath.read_text(encoding="utf-8")
+        return text.rstrip("\n")
+
+    # ── Internals — same shape as AgentSkillWriter ────────────────────────────
+
+    def _skill_dir(
+        self, agent_id: str, skill_name: str, app_id: str, project_id: str
+    ) -> Path:
+        return (
+            self._root.agents_dir(app_id, project_id)
+            / agent_id
+            / AgentSkillFrontmatter.SKILLS_CONTAINER_NAME
+            / f"{AgentSkillFrontmatter.SKILL_DIR_PREFIX}{skill_name}"
+        )
+
+    def _main_path(
+        self, agent_id: str, skill_name: str, app_id: str, project_id: str
+    ) -> Path:
+        return (
+            self._skill_dir(agent_id, skill_name, app_id, project_id)
+            / AgentSkillFrontmatter.SKILL_MAIN_FILENAME
+        )
+
+    def _reference_path(
+        self,
+        agent_id: str,
+        skill_name: str,
+        reference_name: str,
+        app_id: str,
+        project_id: str,
+    ) -> Path:
+        return (
+            self._skill_dir(agent_id, skill_name, app_id, project_id)
+            / AgentSkillFrontmatter.SKILL_REFERENCES_DIR_NAME
+            / f"{reference_name}.md"
+        )
+
+    def _script_path(
+        self,
+        agent_id: str,
+        skill_name: str,
+        script_filename: str,
+        app_id: str,
+        project_id: str,
+    ) -> Path:
+        return (
+            self._skill_dir(agent_id, skill_name, app_id, project_id)
+            / AgentSkillFrontmatter.SKILL_SCRIPTS_DIR_NAME
+            / script_filename
+        )
--- a/src/everos/infra/persistence/markdown/readers/atomic_fact_reader.py
+++ b/src/everos/infra/persistence/markdown/readers/atomic_fact_reader.py
@ -0,0 +1,31 @@
+"""AtomicFact daily-log reader — symmetric with :class:`AtomicFactWriter`."""
+
+from __future__ import annotations
+
+import datetime as _dt
+from pathlib import Path
+
+from everos.core.persistence import MemoryRoot
+
+from ..mds import AtomicFactDailyFrontmatter
+from .base import BaseDailyReader
+
+
+class AtomicFactReader(BaseDailyReader):
+    """Read atomic-fact daily-log files."""
+
+    schema = AtomicFactDailyFrontmatter
+
+    def __init__(self, root: MemoryRoot) -> None:
+        super().__init__(root)
+
+    def path_for(
+        self,
+        owner_id: str,
+        date: _dt.date | None = None,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Resolve the atomic-fact daily-log path under the <app>/<project> prefix."""
+        return super().path_for(owner_id, date, app_id=app_id, project_id=project_id)
--- a/src/everos/infra/persistence/markdown/readers/base.py
+++ b/src/everos/infra/persistence/markdown/readers/base.py
@ -0,0 +1,177 @@
+"""Base business reader for daily-log markdown files.
+
+Symmetric to :class:`BaseDailyWriter`: reads the daily-log file for
+a given ``(scope_id, date)``, locates entries by id within it, and
+optionally upgrades them to :class:`StructuredEntry` so service-layer
+callers don't have to re-do that plumbing each time.
+
+Subclass usage::
+
+    class _MemcellReader(BaseDailyReader):
+        schema = UserMemcellDailyFrontmatter
+
+    reader = _MemcellReader(root)
+    parsed = reader.read_for("u_jason")               # today's file
+    entry = reader.find_entry("u_jason", "umc_20260422_0001")
+    structured = reader.find_structured("u_jason", entry.id)
+
+The reader does **not** typed-parse the file's frontmatter dict — the
+schema is used only for path resolution (matching what the appender
+writes). Frontmatter validation belongs to higher-level callers that
+know the business rules.
+
+Path resolution is identical to :class:`BaseDailyWriter` (same
+``SCOPE_DIR`` / ``DIR_NAME`` / ``FILE_PREFIX`` ClassVars), so a
+reader and writer bound to the same schema agree on every path.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from pathlib import Path
+from typing import ClassVar
+
+import anyio
+
+from everos.component.utils.datetime import today_with_timezone
+from everos.core.persistence import (
+    BaseFrontmatter,
+    Entry,
+    EntryId,
+    MarkdownReader,
+    MemoryRoot,
+    ParsedMarkdown,
+    StructuredEntry,
+    find_entry,
+)
+
+
+class BaseDailyReader:
+    """Single-record reader for daily-log markdown files.
+
+    Subclasses bind a :class:`BaseFrontmatter` subclass via the
+    ``schema`` ClassVar. The schema must declare ``SCOPE_DIR``,
+    ``DIR_NAME``, and ``FILE_PREFIX`` (same set the appender uses); no
+    ``ENTRY_ID_PREFIX`` requirement here because the reader takes the
+    entry id from the caller, not the schema.
+    """
+
+    schema: ClassVar[type[BaseFrontmatter]]  # subclass must declare
+
+    def __init__(self, root: MemoryRoot) -> None:
+        schema = getattr(type(self), "schema", None)
+        if schema is None:
+            raise TypeError(
+                f"{type(self).__name__} must declare a class-level ``schema`` attribute"
+            )
+        for attr in ("SCOPE_DIR", "DIR_NAME", "FILE_PREFIX"):
+            if not getattr(schema, attr, None):
+                raise TypeError(f"{schema.__name__} missing ClassVar {attr!r}")
+        self._root = root
+
+    # ── Public API ────────────────────────────────────────────────────────
+
+    async def read_for(
+        self,
+        scope_id: str,
+        date: _dt.date | None = None,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> ParsedMarkdown | None:
+        """Read the daily-log file for ``(scope_id, date)``.
+
+        Args:
+            scope_id: ``user_id`` or ``agent_id``.
+            date: Date bucket — defaults to today in the configured TZ.
+            app_id: App scope segment (defaults to the ``"default"`` space).
+            project_id: Project scope segment (defaults to ``"default"``).
+
+        Returns:
+            :class:`ParsedMarkdown` (frontmatter dict + body + entries),
+            or ``None`` when the file does not exist on disk. ``None``
+            avoids forcing every caller to wrap reads in try/except —
+            "no file yet" is a normal early state.
+        """
+        path = self._resolve_path(
+            scope_id, date or today_with_timezone(), app_id, project_id
+        )
+        if not await anyio.Path(path).is_file():
+            return None
+        return await MarkdownReader.read(path)
+
+    async def find_entry(
+        self,
+        scope_id: str,
+        entry_id: str | EntryId,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Entry | None:
+        """Locate the entry with ``entry_id`` inside its daily-log file.
+
+        The date bucket is taken from the entry id (an :class:`EntryId`
+        encodes its own date), so the caller doesn't pass a date.
+        Returns ``None`` if either the file or the entry is missing.
+        """
+        eid = entry_id if isinstance(entry_id, EntryId) else EntryId.parse(entry_id)
+        eid_str = eid.format()
+        parsed = await self.read_for(
+            scope_id, eid.date, app_id=app_id, project_id=project_id
+        )
+        if parsed is None:
+            return None
+        return find_entry(parsed.body, eid_str)
+
+    async def find_structured(
+        self,
+        scope_id: str,
+        entry_id: str | EntryId,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> StructuredEntry | None:
+        """Locate the entry and parse its body as audit-form data.
+
+        Sugar over :meth:`find_entry` + :meth:`Entry.as_structured`.
+        Returns ``None`` if the entry is missing.
+        """
+        entry = await self.find_entry(
+            scope_id, entry_id, app_id=app_id, project_id=project_id
+        )
+        if entry is None:
+            return None
+        return entry.as_structured()
+
+    def path_for(
+        self,
+        scope_id: str,
+        date: _dt.date | None = None,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Return the daily-log path for ``scope_id`` on ``date`` (today default).
+
+        Public counterpart of :meth:`_resolve_path` — symmetric with
+        :meth:`BaseDailyWriter.path_for`. Does not check existence.
+        """
+        return self._resolve_path(
+            scope_id, date or today_with_timezone(), app_id, project_id
+        )
+
+    # ── Internals ─────────────────────────────────────────────────────────
+
+    def _resolve_path(
+        self, scope_id: str, date: _dt.date, app_id: str, project_id: str
+    ) -> Path:
+        """Build the daily-log path for ``scope_id`` on ``date``."""
+        # SCOPE_DIR ("users" / "agents") names the matching MemoryRoot method,
+        # which prepends the <app>/<project> business prefix.
+        scope_dir = getattr(self._root, f"{self.schema.SCOPE_DIR}_dir")
+        return (
+            scope_dir(app_id, project_id)
+            / scope_id
+            / self.schema.DIR_NAME
+            / f"{self.schema.FILE_PREFIX}-{date.isoformat()}.md"
+        )
--- a/src/everos/infra/persistence/markdown/readers/episode_reader.py
+++ b/src/everos/infra/persistence/markdown/readers/episode_reader.py
@ -0,0 +1,41 @@
+"""Episode daily-log reader — symmetric with :class:`EpisodeWriter`.
+
+md is the source of truth for Episode memories; this reader gives
+cascade / search / verification scripts a typed locator instead of
+raw :class:`MarkdownReader` calls.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from pathlib import Path
+
+from everos.core.persistence import MemoryRoot
+
+from ..mds import EpisodeDailyFrontmatter
+from .base import BaseDailyReader
+
+
+class EpisodeReader(BaseDailyReader):
+    """Read episode daily-log files."""
+
+    schema = EpisodeDailyFrontmatter
+
+    def __init__(self, root: MemoryRoot) -> None:
+        super().__init__(root)
+
+    def path_for(
+        self,
+        owner_id: str,
+        date: _dt.date | None = None,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Resolve the daily-log path for ``owner_id`` on ``date`` (today by default).
+
+        Mirrors :meth:`EpisodeWriter`'s path-resolution shape so callers
+        can locate the file written for a given owner / day (under the
+        ``<app>/<project>`` prefix) without instantiating the writer.
+        """
+        return super().path_for(owner_id, date, app_id=app_id, project_id=project_id)
--- a/src/everos/infra/persistence/markdown/readers/foresight_reader.py
+++ b/src/everos/infra/persistence/markdown/readers/foresight_reader.py
@ -0,0 +1,31 @@
+"""Foresight daily-log reader — symmetric with :class:`ForesightWriter`."""
+
+from __future__ import annotations
+
+import datetime as _dt
+from pathlib import Path
+
+from everos.core.persistence import MemoryRoot
+
+from ..mds import ForesightDailyFrontmatter
+from .base import BaseDailyReader
+
+
+class ForesightReader(BaseDailyReader):
+    """Read foresight daily-log files."""
+
+    schema = ForesightDailyFrontmatter
+
+    def __init__(self, root: MemoryRoot) -> None:
+        super().__init__(root)
+
+    def path_for(
+        self,
+        owner_id: str,
+        date: _dt.date | None = None,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Resolve the foresight daily-log path under the <app>/<project> prefix."""
+        return super().path_for(owner_id, date, app_id=app_id, project_id=project_id)
--- a/src/everos/infra/persistence/markdown/readers/profile_reader.py
+++ b/src/everos/infra/persistence/markdown/readers/profile_reader.py
@ -0,0 +1,96 @@
+"""ProfileReader — typed read for the single-file profile layout.
+
+Pairs with :class:`ProfileWriter`. The schema (concrete profile
+frontmatter class) is supplied per call; the reader pulls
+``SCOPE_DIR`` + ``PROFILE_FILENAME`` ClassVars off it to build the
+path, then ``MarkdownReader.read`` + ``schema.model_validate`` give
+back a typed frontmatter instance plus the body string.
+
+Returns ``None`` when the profile file does not exist — "not yet
+written" is a normal early state for the upsert-style workflow.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TypeVar
+
+import anyio
+
+from everos.core.persistence import BaseFrontmatter, MarkdownReader, MemoryRoot
+
+T = TypeVar("T", bound=BaseFrontmatter)
+
+
+class ProfileReader:
+    """Typed read for fixed-name profile markdown files."""
+
+    def __init__(self, root: MemoryRoot) -> None:
+        self._root = root
+
+    # ── Public API ────────────────────────────────────────────────────────
+
+    async def read(
+        self,
+        scope_id: str,
+        *,
+        schema: type[T],
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> tuple[T, str] | None:
+        """Read the profile file and parse its frontmatter into ``schema``.
+
+        Args:
+            scope_id: ``user_id`` or ``agent_id`` (must match the
+                schema's scope mixin).
+            schema: Concrete profile frontmatter class — must declare
+                ``SCOPE_DIR`` (via scope mixin) and ``PROFILE_FILENAME``.
+            app_id: App scope segment (defaults to the ``"default"`` space).
+            project_id: Project scope segment (defaults to ``"default"``).
+
+        Returns:
+            ``(frontmatter, body)`` on success; ``None`` if the file is
+            missing. ``body`` is the raw text after the closing ``---``
+            with the writer-added trailing newline stripped.
+        """
+        path = self._resolve_path(scope_id, schema, app_id, project_id)
+        if not await anyio.Path(path).is_file():
+            return None
+        parsed = await MarkdownReader.read(path)
+        frontmatter = schema.model_validate(parsed.frontmatter)
+        body = parsed.body.rstrip("\n")
+        return frontmatter, body
+
+    def path_for(
+        self,
+        scope_id: str,
+        *,
+        schema: type[BaseFrontmatter],
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Return the profile path (no IO check)."""
+        return self._resolve_path(scope_id, schema, app_id, project_id)
+
+    # ── Internals — same shape as ProfileWriter ───────────────────────────
+
+    def _resolve_path(
+        self,
+        scope_id: str,
+        schema: type[BaseFrontmatter],
+        app_id: str,
+        project_id: str,
+    ) -> Path:
+        scope_dir = getattr(schema, "SCOPE_DIR", "")
+        filename = getattr(schema, "PROFILE_FILENAME", None)
+        if not scope_dir:
+            raise TypeError(
+                f"{schema.__name__} missing ``SCOPE_DIR`` ClassVar — "
+                "must inherit a scope mixin (UserScopedFrontmatter / "
+                "AgentScopedFrontmatter)."
+            )
+        if not filename:
+            raise TypeError(f"{schema.__name__} missing ``PROFILE_FILENAME`` ClassVar.")
+        # SCOPE_DIR names the matching MemoryRoot method (<app>/<project> prefix).
+        scope_root = getattr(self._root, f"{scope_dir}_dir")(app_id, project_id)
+        return scope_root / scope_id / filename
--- a/src/everos/infra/persistence/markdown/writers/init.py
+++ b/src/everos/infra/persistence/markdown/writers/init.py
@ -0,0 +1,43 @@
+"""Business markdown writers.
+
+Each storage strategy from the EverOS Markdown First spec gets a class
+here:
+
+    * :class:`BaseDailyWriter` — daily-log append (episode / atomic
+      fact / foresight / agent case). Subclass and bind ``schema``.
+    * :class:`AgentSkillWriter` — directory + progressive disclosure
+      (``skills/skill_<name>/{SKILL.md, references/, scripts/}``).
+      Single class, no subclassing.
+    * :class:`ProfileWriter` — single-file rewrite at a fixed name
+      (``user.md`` / ``behaviors.md`` / ``agent.md`` / ``soul.md`` /
+      ``tools.md``). Single class, no subclassing — caller hands in a
+      frontmatter instance whose ``PROFILE_FILENAME`` ClassVar pins
+      the filename.
+
+External usage::
+
+    from everos.infra.persistence.markdown.writers import (
+        BaseDailyWriter,
+        EpisodeWriter,
+        AgentSkillWriter,
+        ProfileWriter,
+    )
+"""
+
+from .agent_case_writer import AgentCaseWriter as AgentCaseWriter
+from .agent_skill_writer import AgentSkillWriter as AgentSkillWriter
+from .atomic_fact_writer import AtomicFactWriter as AtomicFactWriter
+from .base import BaseDailyWriter as BaseDailyWriter
+from .episode_writer import EpisodeWriter as EpisodeWriter
+from .foresight_writer import ForesightWriter as ForesightWriter
+from .profile_writer import ProfileWriter as ProfileWriter
+
+__all__ = [
+    "AgentCaseWriter",
+    "AgentSkillWriter",
+    "AtomicFactWriter",
+    "BaseDailyWriter",
+    "EpisodeWriter",
+    "ForesightWriter",
+    "ProfileWriter",
+]
--- a/src/everos/infra/persistence/markdown/writers/agent_case_writer.py
+++ b/src/everos/infra/persistence/markdown/writers/agent_case_writer.py
@ -0,0 +1,63 @@
+"""AgentCase daily-log writer — md is the SoT for agent cases.
+
+Lives on the agent track (``agents/<agent_id>/.cases/...``).
+Inline carries audit + scoring fields (``owner_id`` / ``session_id`` /
+``timestamp`` / ``parent_id`` / ``quality_score``); sections carry
+``TaskIntent`` (required, primary BM25/embed), ``Approach`` (verbatim,
+not indexed — too long), and optional ``KeyInsight`` (verbatim).
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Any
+
+import anyio
+
+from everos.component.utils.datetime import (
+    get_now_with_timezone,
+    to_iso_format,
+)
+from everos.core.persistence import MarkdownReader
+
+from ..mds import AgentCaseDailyFrontmatter
+from .base import BaseDailyWriter
+
+
+class AgentCaseWriter(BaseDailyWriter):
+    """Daily-log writer for the AgentCase schema (md = SoT).
+
+    ``append_entry`` / ``append_entries`` come from
+    :class:`BaseDailyWriter`. The scope id parameter is named ``agent_id``
+    semantically (this is the agent track), but the base class accepts
+    it via the generic ``scope_id`` parameter.
+    """
+
+    schema = AgentCaseDailyFrontmatter
+
+    def _frontmatter_updates(
+        self,
+        scope_id: str,
+        date: _dt.date,
+        *,
+        next_count: int,
+    ) -> Mapping[str, Any] | None:
+        return {
+            "id": f"agent_case_log_{scope_id}_{date.isoformat()}",
+            "type": "agent_case_daily",
+            "file_type": "agent_case_daily",
+            "schema_version": 1,
+            "agent_id": scope_id,
+            "track": "agent",
+            "date": date.isoformat(),
+            "entry_count": next_count,
+            "last_appended_at": to_iso_format(get_now_with_timezone()),
+        }
+
+    async def _current_count(self, path: Path) -> int:
+        if not await anyio.Path(path).is_file():
+            return 0
+        parsed = await MarkdownReader.read(path)
+        return parsed.frontmatter.get("entry_count", 0)
--- a/src/everos/infra/persistence/markdown/writers/agent_skill_writer.py
+++ b/src/everos/infra/persistence/markdown/writers/agent_skill_writer.py
@ -0,0 +1,204 @@
+"""AgentSkillWriter — upsert skill main file + reference / script attachments.
+
+Skill storage is **directory + progressive disclosure** (wiki "Memory
+Types Markdown Format" v4): each skill lives under
+``agents/<agent_id>/skills/skill_<name>/`` with a ``SKILL.md`` main
+file plus ``references/*.md`` and ``scripts/*.<ext>`` siblings.
+
+This writer is intentionally distinct from :class:`BaseDailyWriter`:
+
+- **Upsert, not append.** Each ``write_*`` call overwrites the target
+  file in full. Skills don't accumulate entry markers — the body of
+  ``SKILL.md`` is the latest revision; references / scripts are
+  individually replaceable files.
+- **Single-skill API.** The chassis is *not* responsible for bulk
+  enumeration (Tier-1 prompt scanning is a sqlite/lancedb concern,
+  not a markdown-walk concern). One skill in, one skill out.
+- **No counters / hooks.** No frontmatter merging, no entry-id
+  generation, no _frontmatter_updates hook — the caller hands in a
+  fully-built :class:`AgentSkillFrontmatter` subclass instance and the body
+  string; the writer atomically replaces the file.
+
+Path resolution comes from :class:`MemoryRoot` + the ClassVars on
+:class:`AgentSkillFrontmatter` (``SKILLS_CONTAINER_NAME`` /
+``SKILL_DIR_PREFIX`` / etc.). The writer + reader pair is the single
+addressing API for skills.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from everos.core.persistence import MarkdownWriter, MemoryRoot
+
+from ..mds import AgentSkillFrontmatter
+
+
+class AgentSkillWriter:
+    """Atomic writer for the AgentSkill directory layout.
+
+    Holds a :class:`MarkdownWriter` for the SKILL.md path (frontmatter +
+    body) and a thin atomic-write helper for plain-text reference /
+    script files (no frontmatter).
+    """
+
+    def __init__(
+        self,
+        root: MemoryRoot,
+        *,
+        writer: MarkdownWriter | None = None,
+    ) -> None:
+        self._root = root
+        self._writer = writer or MarkdownWriter(root)
+
+    # ── Public API ────────────────────────────────────────────────────────
+
+    async def write_main(
+        self,
+        agent_id: str,
+        skill_name: str,
+        *,
+        frontmatter: AgentSkillFrontmatter,
+        body: str,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Upsert ``skills/skill_<name>/SKILL.md``.
+
+        The file is replaced in full: ``frontmatter`` becomes the new
+        YAML head, ``body`` becomes the new body. Any prior content
+        (including manual human edits) is overwritten. The atomic
+        rename keeps readers from ever seeing a torn write.
+
+        Args:
+            agent_id: Owning agent.
+            skill_name: Unprefixed identifier (``"contract_risk_scan"``,
+                not ``"skill_contract_risk_scan"``).
+            frontmatter: Fully-built schema instance — its ``model_dump``
+                is what lands in the YAML head, including extra fields.
+            body: Tier-2 body text. Trailing newline is normalised.
+
+        Returns:
+            Absolute path of the written ``SKILL.md``.
+        """
+        path = self._main_path(agent_id, skill_name, app_id, project_id)
+        head_meta = frontmatter.model_dump(exclude_none=False)
+        return await self._writer.write_markdown(
+            path,
+            frontmatter=head_meta,
+            body=_ensure_trailing_newline(body),
+        )
+
+    async def write_reference(
+        self,
+        agent_id: str,
+        skill_name: str,
+        reference_name: str,
+        content: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Upsert ``skills/skill_<name>/references/<reference_name>.md``.
+
+        Reference files are plain markdown — no frontmatter, no entry
+        markers. Content is written verbatim (with a normalised
+        trailing newline).
+
+        Args:
+            reference_name: Filename stem (no ``.md`` extension).
+        """
+        path = self._reference_path(
+            agent_id, skill_name, reference_name, app_id, project_id
+        )
+        return await self._writer.write(path, _ensure_trailing_newline(content))
+
+    async def write_script(
+        self,
+        agent_id: str,
+        skill_name: str,
+        script_filename: str,
+        content: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Upsert ``skills/skill_<name>/scripts/<script_filename>``.
+
+        Script files are written verbatim — caller supplies the *full*
+        filename (including the extension; ``redline.py`` /
+        ``redline.sh`` / etc.) since scripts may be in any language.
+        Cascade does not index this directory.
+        """
+        path = self._script_path(
+            agent_id, skill_name, script_filename, app_id, project_id
+        )
+        return await self._writer.write(path, _ensure_trailing_newline(content))
+
+    # ── Path API (callers that need to echo paths in responses) ──────────
+
+    def main_path(
+        self,
+        agent_id: str,
+        skill_name: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Return ``skills/skill_<name>/SKILL.md`` (does not check existence)."""
+        return self._main_path(agent_id, skill_name, app_id, project_id)
+
+    # ── Internals — path resolution from AgentSkillFrontmatter ClassVars ──────
+
+    def _skill_dir(
+        self, agent_id: str, skill_name: str, app_id: str, project_id: str
+    ) -> Path:
+        return (
+            self._root.agents_dir(app_id, project_id)
+            / agent_id
+            / AgentSkillFrontmatter.SKILLS_CONTAINER_NAME
+            / f"{AgentSkillFrontmatter.SKILL_DIR_PREFIX}{skill_name}"
+        )
+
+    def _main_path(
+        self, agent_id: str, skill_name: str, app_id: str, project_id: str
+    ) -> Path:
+        return (
+            self._skill_dir(agent_id, skill_name, app_id, project_id)
+            / AgentSkillFrontmatter.SKILL_MAIN_FILENAME
+        )
+
+    def _reference_path(
+        self,
+        agent_id: str,
+        skill_name: str,
+        reference_name: str,
+        app_id: str,
+        project_id: str,
+    ) -> Path:
+        return (
+            self._skill_dir(agent_id, skill_name, app_id, project_id)
+            / AgentSkillFrontmatter.SKILL_REFERENCES_DIR_NAME
+            / f"{reference_name}.md"
+        )
+
+    def _script_path(
+        self,
+        agent_id: str,
+        skill_name: str,
+        script_filename: str,
+        app_id: str,
+        project_id: str,
+    ) -> Path:
+        return (
+            self._skill_dir(agent_id, skill_name, app_id, project_id)
+            / AgentSkillFrontmatter.SKILL_SCRIPTS_DIR_NAME
+            / script_filename
+        )
+
+
+def _ensure_trailing_newline(text: str) -> str:
+    """End the body with exactly one newline (POSIX text-file convention)."""
+    if not text:
+        return ""
+    return text if text.endswith("\n") else text + "\n"
--- a/src/everos/infra/persistence/markdown/writers/atomic_fact_writer.py
+++ b/src/everos/infra/persistence/markdown/writers/atomic_fact_writer.py
@ -0,0 +1,58 @@
+"""AtomicFact daily-log writer — md is the SoT for atomic facts.
+
+Caller hands pre-built ``inline`` (``owner_id`` / ``session_id`` /
+``timestamp`` / ``parent_id`` / ``sender_ids``) plus the single
+``Fact`` section. The chassis manages the in-file ``entry_id`` sequence
+(``af_<YYYYMMDD>_<NNNN>``). ``append_entry`` / ``append_entries`` come
+from :class:`BaseDailyWriter`; this subclass only declares the schema
+and the per-schema frontmatter / counter hooks.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Any
+
+import anyio
+
+from everos.component.utils.datetime import (
+    get_now_with_timezone,
+    to_iso_format,
+)
+from everos.core.persistence import MarkdownReader
+
+from ..mds import AtomicFactDailyFrontmatter
+from .base import BaseDailyWriter
+
+
+class AtomicFactWriter(BaseDailyWriter):
+    """Daily-log writer for the AtomicFact schema (md = SoT)."""
+
+    schema = AtomicFactDailyFrontmatter
+
+    def _frontmatter_updates(
+        self,
+        scope_id: str,
+        date: _dt.date,
+        *,
+        next_count: int,
+    ) -> Mapping[str, Any] | None:
+        return {
+            "id": f"atomic_fact_log_{scope_id}_{date.isoformat()}",
+            "type": "atomic_fact_daily",
+            "file_type": "atomic_fact_daily",
+            "schema_version": 1,
+            "user_id": scope_id,
+            "track": "user",
+            "date": date.isoformat(),
+            "entry_count": next_count,
+            "last_appended_at": to_iso_format(get_now_with_timezone()),
+        }
+
+    async def _current_count(self, path: Path) -> int:
+        if not await anyio.Path(path).is_file():
+            return 0
+        parsed = await MarkdownReader.read(path)
+        return parsed.frontmatter.get("entry_count", 0)
--- a/src/everos/infra/persistence/markdown/writers/base.py
+++ b/src/everos/infra/persistence/markdown/writers/base.py
@ -0,0 +1,301 @@
+"""Base business writer for daily-log markdown files.
+
+Daily-log files (memcell / episode / case / atomic_fact / foresight)
+share three things:
+
+    * scope (user-track or agent-track, derived from the schema)
+    * filename pattern: ``<FILE_PREFIX>-<YYYY-MM-DD>.md`` under
+      ``<scope_root>/<scope_id>/<DIR_NAME>/``
+    * entry id pattern: ``<ENTRY_ID_PREFIX>_<YYYYMMDD>_<NNN>``
+
+:class:`BaseDailyWriter` factors out **path resolution + entry-id
+construction + today's date default**, leaving frontmatter field
+maintenance (e.g. ``entry_count`` / ``last_appended_at``) to concrete
+business subclasses.
+
+Subclass usage::
+
+    class _MemcellWriter(BaseDailyWriter):
+        schema = UserMemcellDailyFrontmatter
+
+    writer = _MemcellWriter(layout)
+    eid = writer.append("u_jason", body="...")
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from collections.abc import Mapping, Sequence
+from pathlib import Path
+from typing import Any, ClassVar
+
+import anyio
+
+from everos.component.utils.datetime import today_with_timezone
+from everos.core.persistence import (
+    BaseFrontmatter,
+    EntryId,
+    MarkdownReader,
+    MarkdownWriter,
+    MemoryRoot,
+    render_structured_entry,
+)
+
+
+class BaseDailyWriter:
+    """Append a new entry to today's (or a given date's) daily-log file.
+
+    Subclasses bind a single :class:`BaseFrontmatter` subclass via the
+    ``schema`` ClassVar. The schema must declare ``SCOPE_DIR``,
+    ``ENTRY_ID_PREFIX``, ``DIR_NAME``, and ``FILE_PREFIX`` —
+    ``SCOPE_DIR`` is provided by inheriting :class:`UserScopedFrontmatter`
+    or :class:`AgentScopedFrontmatter` (or by a custom scope mixin).
+
+    Path resolution is driven entirely by the schema's ClassVars and
+    the given :class:`MemoryRoot` — write, read, and addressing for a
+    single record kind all live in this writer + its reader twin, no
+    separate layout layer.
+    """
+
+    schema: ClassVar[type[BaseFrontmatter]]  # subclass must declare
+
+    def __init__(
+        self,
+        root: MemoryRoot,
+        *,
+        writer: MarkdownWriter | None = None,
+    ) -> None:
+        schema = getattr(type(self), "schema", None)
+        if schema is None:
+            raise TypeError(
+                f"{type(self).__name__} must declare a class-level ``schema`` attribute"
+            )
+        for attr in ("SCOPE_DIR", "ENTRY_ID_PREFIX", "DIR_NAME", "FILE_PREFIX"):
+            if not getattr(schema, attr, None):
+                raise TypeError(f"{schema.__name__} missing ClassVar {attr!r}")
+        self._root = root
+        self._writer = writer or MarkdownWriter(root)
+
+    # ── Public API ────────────────────────────────────────────────────────
+
+    async def append_entry(
+        self,
+        scope_id: str,
+        *,
+        inline: Mapping[str, object],
+        sections: Mapping[str, str],
+        date: _dt.date | None = None,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> EntryId:
+        """Append a single rendered entry; return the freshly minted ``EntryId``.
+
+        Unifies the per-schema ``append_entry`` previously duplicated across
+        :class:`AtomicFactWriter` / :class:`ForesightWriter` /
+        :class:`EpisodeWriter` / :class:`AgentCaseWriter`. The whole flow
+        (read ``entry_count``, allocate ``EntryId``, render entry body,
+        update frontmatter, atomic write) runs inside one per-path lock,
+        eliminating the read-modify-write race that previously allowed
+        concurrent callers to silently overwrite each other's appends.
+
+        Args:
+            scope_id: ``user_id`` or ``agent_id`` (matches the schema's
+                scope flavour).
+            inline: Inline metadata (``**key**: value`` lines under the
+                H2 header).
+            sections: ``{title: body}`` blocks rendered as ``### Title`` +
+                body text.
+            date: Date bucket — defaults to today in the configured TZ.
+
+        Returns:
+            The :class:`EntryId` assigned to the new entry. Caller can
+            use it to write downstream state (sqlite row, lance index).
+        """
+        eids = await self.append_entries(
+            scope_id,
+            [(inline, sections)],
+            date=date,
+            app_id=app_id,
+            project_id=project_id,
+        )
+        return eids[0]
+
+    async def append_entries(
+        self,
+        scope_id: str,
+        items: Sequence[tuple[Mapping[str, object], Mapping[str, str]]],
+        *,
+        date: _dt.date | None = None,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> list[EntryId]:
+        """Append ``N`` rendered entries in one locked read-modify-write cycle.
+
+        Compared with looping :meth:`append_entry` ``N`` times, this:
+
+        * Performs one file read + one file write instead of ``N`` of each.
+        * Holds the per-path lock for one short critical section.
+        * Updates ``frontmatter`` (``entry_count`` / ``last_appended_at``)
+          once at the end (no intermediate flapping).
+
+        Order in ``items`` is the order entries land in the file (and the
+        order ``EntryId``s are allocated). Empty ``items`` is a no-op
+        that returns ``[]`` without touching the file.
+
+        Args:
+            scope_id: Subject scope (user / agent id).
+            items: Sequence of ``(inline, sections)`` pairs.
+            date: Date bucket — defaults to today in the configured TZ.
+
+        Returns:
+            ``N`` :class:`EntryId`s in the same order as ``items``.
+        """
+        bucket = date or today_with_timezone()
+        path = self._resolve_path(scope_id, bucket, app_id, project_id)
+        if not items:
+            return []
+
+        async with self._writer.lock_for(path):
+            base_count = await self._current_count(path)
+            eids = [
+                EntryId.next_for(self.schema.ENTRY_ID_PREFIX, bucket, base_count + i)
+                for i in range(len(items))
+            ]
+            rendered = [
+                (
+                    render_structured_entry(
+                        header=eid.format(),
+                        inline=inline,
+                        sections=sections,
+                    ),
+                    eid,
+                )
+                for eid, (inline, sections) in zip(eids, items, strict=True)
+            ]
+            frontmatter_updates = self._frontmatter_updates(
+                scope_id, bucket, next_count=base_count + len(items)
+            )
+            await self._writer._append_entries_unlocked(  # noqa: SLF001
+                path,
+                rendered,
+                frontmatter_updates=frontmatter_updates,
+            )
+            return eids
+
+    async def append(
+        self,
+        scope_id: str,
+        entry_body: str,
+        *,
+        date: _dt.date | None = None,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> EntryId:
+        """Append a pre-rendered ``entry_body`` to the daily-log file.
+
+        Kept for back-compat with callers that hand in fully rendered
+        bodies (rare — most callers should use :meth:`append_entry` and
+        let this class do the rendering). The whole sequence (read
+        ``entry_count``, allocate eid, write) runs inside the per-path
+        lock.
+
+        Args:
+            scope_id: ``user_id`` or ``agent_id`` (matches the schema's
+                scope flavour).
+            entry_body: Content placed between the entry markers.
+            date: Date bucket — defaults to today in the configured TZ.
+            frontmatter_updates: Optional fields to merge into the file's
+                frontmatter (e.g. ``entry_count`` / ``last_appended_at``).
+                When ``None``, the subclass hook
+                :meth:`_frontmatter_updates` is consulted to build
+                default updates.
+
+        Returns:
+            The :class:`EntryId` assigned to the new entry.
+        """
+        bucket = date or today_with_timezone()
+        path = self._resolve_path(scope_id, bucket, app_id, project_id)
+
+        async with self._writer.lock_for(path):
+            count = await self._current_count(path)
+            eid = EntryId.next_for(self.schema.ENTRY_ID_PREFIX, bucket, count)
+
+            # Subclass hook: derive defaults if caller passes nothing.
+            if frontmatter_updates is None:
+                frontmatter_updates = self._frontmatter_updates(
+                    scope_id, bucket, next_count=count + 1
+                )
+
+            await self._writer._append_entries_unlocked(  # noqa: SLF001
+                path,
+                [(entry_body, eid)],
+                frontmatter_updates=frontmatter_updates,
+            )
+            return eid
+
+    # ── Hooks (subclass override) ─────────────────────────────────────────
+
+    async def _current_count(self, path: Path) -> int:
+        """Return the current entry count for the file.
+
+        Default: number of ``<!-- entry:... -->`` blocks already present.
+        Subclasses may override to read a frontmatter field (e.g.
+        ``entry_count``) when they trust that field over a marker scan.
+        """
+        if not await anyio.Path(path).is_file():
+            return 0
+        parsed = await MarkdownReader.read(path)
+        return len(parsed.entries)
+
+    def _frontmatter_updates(
+        self,
+        scope_id: str,
+        date: _dt.date,
+        *,
+        next_count: int,
+    ) -> Mapping[str, Any] | None:
+        """Build the per-append frontmatter dict (subclass override).
+
+        Called only when :meth:`append`'s ``frontmatter_updates`` is
+        ``None``. Default returns ``None`` (no frontmatter mutation).
+        Concrete business subclasses override to maintain fields like
+        ``id`` / ``entry_count`` / ``last_appended_at`` automatically,
+        so callers don't repeat themselves on every append.
+        """
+        return None
+
+    # ── Path API ──────────────────────────────────────────────────────────
+
+    def path_for(
+        self,
+        scope_id: str,
+        date: _dt.date | None = None,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Return the daily-log path for ``scope_id`` on ``date`` (today default).
+
+        Public counterpart of :meth:`_resolve_path` — callers (services,
+        scripts) should use this rather than poking at private attrs.
+        """
+        return self._resolve_path(
+            scope_id, date or today_with_timezone(), app_id, project_id
+        )
+
+    # ── Internals ─────────────────────────────────────────────────────────
+
+    def _resolve_path(
+        self, scope_id: str, date: _dt.date, app_id: str, project_id: str
+    ) -> Path:
+        """Build the daily-log path for ``scope_id`` on ``date``."""
+        # SCOPE_DIR ("users" / "agents") names the matching MemoryRoot method,
+        # which prepends the <app>/<project> business prefix.
+        scope_dir = getattr(self._root, f"{self.schema.SCOPE_DIR}_dir")
+        return (
+            scope_dir(app_id, project_id)
+            / scope_id
+            / self.schema.DIR_NAME
+            / f"{self.schema.FILE_PREFIX}-{date.isoformat()}.md"
+        )
--- a/src/everos/infra/persistence/markdown/writers/episode_writer.py
+++ b/src/everos/infra/persistence/markdown/writers/episode_writer.py
@ -0,0 +1,69 @@
+"""Episode daily-log writer — md is the SoT for Episode memories.
+
+Stays in the chassis style: caller hands in pre-built ``inline`` and
+``sections`` dicts plus the scope id (``owner_id``). Domain →
+structured-entry shaping lives in the calling pipeline (cf. architecture
+rule: ``infra`` may not import ``memory``).
+
+This milestone assumes well-behaved callers (no retransmit dedupe needed).
+The writer just appends; the chassis manages the in-file ``entry_id``
+sequence, which is the single source of identity for an md entry.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Any
+
+import anyio
+
+from everos.component.utils.datetime import (
+    get_now_with_timezone,
+    to_iso_format,
+)
+from everos.core.persistence import MarkdownReader
+
+from ..mds import EpisodeDailyFrontmatter
+from .base import BaseDailyWriter
+
+
+class EpisodeWriter(BaseDailyWriter):
+    """Daily-log writer for the Episode schema (md = SoT).
+
+    ``append_entry`` / ``append_entries`` come from
+    :class:`BaseDailyWriter`; the ``entry_id`` (``ep_<YYYYMMDD>_<NNNN>``)
+    is the in-file identity allocated under the per-path lock. Callers
+    can derive a globally-unique id from ``(owner_id, entry_id)``
+    without persisting any algo-side uuid.
+    """
+
+    schema = EpisodeDailyFrontmatter
+
+    # ── Frontmatter override (entry_count + last_appended_at) ────────────
+
+    def _frontmatter_updates(
+        self,
+        scope_id: str,
+        date: _dt.date,
+        *,
+        next_count: int,
+    ) -> Mapping[str, Any] | None:
+        return {
+            "id": f"episode_log_{scope_id}_{date.isoformat()}",
+            "type": "episode_daily",
+            "file_type": "episode_daily",
+            "schema_version": 1,
+            "user_id": scope_id,
+            "track": "user",
+            "date": date.isoformat(),
+            "entry_count": next_count,
+            "last_appended_at": to_iso_format(get_now_with_timezone()),
+        }
+
+    async def _current_count(self, path: Path) -> int:
+        if not await anyio.Path(path).is_file():
+            return 0
+        parsed = await MarkdownReader.read(path)
+        return parsed.frontmatter.get("entry_count", 0)
--- a/src/everos/infra/persistence/markdown/writers/foresight_writer.py
+++ b/src/everos/infra/persistence/markdown/writers/foresight_writer.py
@ -0,0 +1,58 @@
+"""Foresight daily-log writer — md is the SoT for foresights.
+
+Inline carries the audit / scope + time-window fields (``owner_id`` /
+``session_id`` / ``timestamp`` / ``parent_id`` / ``sender_ids`` plus
+optional ``start_time`` / ``end_time`` / ``duration_days``). Sections
+carry the BM25-indexed content: ``Foresight`` (required, primary
+field) and optional ``Evidence`` (secondary BM25 field).
+``append_entry`` / ``append_entries`` come from :class:`BaseDailyWriter`.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Any
+
+import anyio
+
+from everos.component.utils.datetime import (
+    get_now_with_timezone,
+    to_iso_format,
+)
+from everos.core.persistence import MarkdownReader
+
+from ..mds import ForesightDailyFrontmatter
+from .base import BaseDailyWriter
+
+
+class ForesightWriter(BaseDailyWriter):
+    """Daily-log writer for the Foresight schema (md = SoT)."""
+
+    schema = ForesightDailyFrontmatter
+
+    def _frontmatter_updates(
+        self,
+        scope_id: str,
+        date: _dt.date,
+        *,
+        next_count: int,
+    ) -> Mapping[str, Any] | None:
+        return {
+            "id": f"foresight_log_{scope_id}_{date.isoformat()}",
+            "type": "foresight_daily",
+            "file_type": "foresight_daily",
+            "schema_version": 1,
+            "user_id": scope_id,
+            "track": "user",
+            "date": date.isoformat(),
+            "entry_count": next_count,
+            "last_appended_at": to_iso_format(get_now_with_timezone()),
+        }
+
+    async def _current_count(self, path: Path) -> int:
+        if not await anyio.Path(path).is_file():
+            return 0
+        parsed = await MarkdownReader.read(path)
+        return parsed.frontmatter.get("entry_count", 0)
--- a/src/everos/infra/persistence/markdown/writers/profile_writer.py
+++ b/src/everos/infra/persistence/markdown/writers/profile_writer.py
@ -0,0 +1,127 @@
+"""ProfileWriter — upsert a single-file, fixed-name profile markdown.
+
+Profile storage is **single-file rewrite** (the third storage strategy
+in the EverOS Markdown First spec). Each profile lives at a fixed
+filename under the agent or user directory::
+
+    users/<user_id>/user.md          ← user profile
+    users/<user_id>/behaviors.md     ← user behaviour patterns
+    agents/<agent_id>/agent.md       ← agent playbook
+    agents/<agent_id>/soul.md        ← agent identity / values
+    agents/<agent_id>/tools.md       ← agent tool declarations
+
+Compared with :class:`SkillWriter` (directory + progressive disclosure)
+and :class:`BaseDailyWriter` (per-date append + entry markers), the
+profile writer is the simplest of the three:
+
+- **Upsert, not append.** Each ``write`` overwrites the file in full.
+- **Fixed path.** Caller passes ``scope_id`` only — no ``name``
+  parameter; the filename is fixed by the schema's
+  ``PROFILE_FILENAME`` ClassVar.
+- **No business hooks.** No frontmatter merging, no entry-id
+  generation. The caller hands in a fully-built schema instance.
+
+The schema must declare two ClassVars:
+
+- ``SCOPE_DIR`` (``"users"`` / ``"agents"``) — inherited from
+  :class:`UserScopedFrontmatter` / :class:`AgentScopedFrontmatter`.
+- ``PROFILE_FILENAME`` (``"user.md"`` / ``"agent.md"`` / …) —
+  declared on the concrete profile schema itself.
+
+There is no ``ProfileFrontmatter`` base class: profile schemas are
+duck-typed via the two ClassVars. Subclasses inherit the scope mixin
+and add ``PROFILE_FILENAME`` plus their business fields directly.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from everos.core.persistence import BaseFrontmatter, MarkdownWriter, MemoryRoot
+
+
+class ProfileWriter:
+    """Atomic writer for the single-file profile layout."""
+
+    def __init__(
+        self,
+        root: MemoryRoot,
+        *,
+        writer: MarkdownWriter | None = None,
+    ) -> None:
+        self._root = root
+        self._writer = writer or MarkdownWriter(root)
+
+    # ── Public API ────────────────────────────────────────────────────────
+
+    async def write(
+        self,
+        scope_id: str,
+        *,
+        frontmatter: BaseFrontmatter,
+        body: str,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Upsert ``<app>/<project>/<scope>/<scope_id>/<PROFILE_FILENAME>``.
+
+        Args:
+            scope_id: ``user_id`` or ``agent_id`` (must match the
+                schema's scope mixin).
+            frontmatter: Fully-built schema instance — its ``model_dump``
+                lands as the YAML head, including extra fields.
+            body: Profile body text. Trailing newline is normalised.
+            app_id: App scope segment (defaults to the ``"default"`` space).
+            project_id: Project scope segment (defaults to ``"default"``).
+
+        Returns:
+            Absolute path of the written profile file.
+        """
+        path = self._resolve_path(scope_id, type(frontmatter), app_id, project_id)
+        head_meta = frontmatter.model_dump(exclude_none=False)
+        return await self._writer.write_markdown(
+            path,
+            frontmatter=head_meta,
+            body=_ensure_trailing_newline(body),
+        )
+
+    def path_for(
+        self,
+        scope_id: str,
+        *,
+        schema: type[BaseFrontmatter],
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> Path:
+        """Return the profile path (no IO check)."""
+        return self._resolve_path(scope_id, schema, app_id, project_id)
+
+    # ── Internals ─────────────────────────────────────────────────────────
+
+    def _resolve_path(
+        self,
+        scope_id: str,
+        schema: type[BaseFrontmatter],
+        app_id: str,
+        project_id: str,
+    ) -> Path:
+        scope_dir = getattr(schema, "SCOPE_DIR", "")
+        filename = getattr(schema, "PROFILE_FILENAME", None)
+        if not scope_dir:
+            raise TypeError(
+                f"{schema.__name__} missing ``SCOPE_DIR`` ClassVar — "
+                "must inherit a scope mixin (UserScopedFrontmatter / "
+                "AgentScopedFrontmatter)."
+            )
+        if not filename:
+            raise TypeError(f"{schema.__name__} missing ``PROFILE_FILENAME`` ClassVar.")
+        # SCOPE_DIR names the matching MemoryRoot method (<app>/<project> prefix).
+        scope_root = getattr(self._root, f"{scope_dir}_dir")(app_id, project_id)
+        return scope_root / scope_id / filename
+
+
+def _ensure_trailing_newline(text: str) -> str:
+    """End the body with exactly one newline (POSIX text-file convention)."""
+    if not text:
+        return ""
+    return text if text.endswith("\n") else text + "\n"
--- a/src/everos/infra/persistence/sqlite/init.py
+++ b/src/everos/infra/persistence/sqlite/init.py
@ -0,0 +1,66 @@
+"""SQLite business persistence layer.
+
+Sits on top of :mod:`everos.core.persistence.sqlite` (engine + sessions +
+``BaseTable`` + ``RepoBase``) and provides:
+
+    * lazy process-wide engine + session-factory singletons
+      (:mod:`.sqlite_manager`)
+    * concrete table schemas under :mod:`.tables`
+    * concrete repository singletons under :mod:`.repos`
+
+External usage::
+
+    from everos.infra.persistence.sqlite import (
+        get_engine, get_session_factory, dispose_engine,
+        # business tables / repos are re-exported here too —
+        # callers MUST go through this top-level package because
+        # ``infra.persistence.sqlite.**`` (sub-packages) are forbidden
+        # to ``service`` / ``memory`` / ``entrypoints`` by import-linter.
+        UnprocessedBuffer, Memcell, ConversationStatus,
+        unprocessed_buffer_repo, memcell_repo, conversation_status_repo,
+    )
+
+The :class:`SqliteLifespanProvider` runs ``SQLModel.metadata.create_all``
+on app startup and ``dispose_engine`` on shutdown, so business code does
+not need to manage either.
+"""
+
+# Importing ``tables`` registers every business SQLModel in
+# ``SQLModel.metadata`` so ``SqliteLifespanProvider.startup`` can
+# ``create_all`` without callers having to import each model module.
+from . import tables as tables  # noqa: F401
+from .repos import QueueSummary as QueueSummary
+from .repos import cluster_repo as cluster_repo
+from .repos import conversation_status_repo as conversation_status_repo
+from .repos import md_change_state_repo as md_change_state_repo
+from .repos import memcell_repo as memcell_repo
+from .repos import mint_cluster_id as mint_cluster_id
+from .repos import unprocessed_buffer_repo as unprocessed_buffer_repo
+from .sqlite_manager import dispose_engine as dispose_engine
+from .sqlite_manager import get_engine as get_engine
+from .sqlite_manager import get_session_factory as get_session_factory
+from .tables import Cluster as Cluster
+from .tables import ClusterMember as ClusterMember
+from .tables import ConversationStatus as ConversationStatus
+from .tables import MdChangeState as MdChangeState
+from .tables import Memcell as Memcell
+from .tables import UnprocessedBuffer as UnprocessedBuffer
+
+__all__ = [
+    "Cluster",
+    "ClusterMember",
+    "ConversationStatus",
+    "MdChangeState",
+    "Memcell",
+    "QueueSummary",
+    "UnprocessedBuffer",
+    "cluster_repo",
+    "conversation_status_repo",
+    "dispose_engine",
+    "get_engine",
+    "get_session_factory",
+    "md_change_state_repo",
+    "memcell_repo",
+    "mint_cluster_id",
+    "unprocessed_buffer_repo",
+]
--- a/src/everos/infra/persistence/sqlite/repos/init.py
+++ b/src/everos/infra/persistence/sqlite/repos/init.py
@ -0,0 +1,23 @@
+"""Business SQLite repository singletons.
+
+Repository instances for business tables, wired to the process-wide
+engine singleton.
+"""
+
+from .cluster import cluster_repo as cluster_repo
+from .cluster import mint_cluster_id as mint_cluster_id
+from .conversation_status import conversation_status_repo as conversation_status_repo
+from .md_change_state import QueueSummary as QueueSummary
+from .md_change_state import md_change_state_repo as md_change_state_repo
+from .memcell import memcell_repo as memcell_repo
+from .unprocessed_buffer import unprocessed_buffer_repo as unprocessed_buffer_repo
+
+__all__ = [
+    "QueueSummary",
+    "cluster_repo",
+    "conversation_status_repo",
+    "md_change_state_repo",
+    "memcell_repo",
+    "mint_cluster_id",
+    "unprocessed_buffer_repo",
+]
--- a/src/everos/infra/persistence/sqlite/repos/cluster.py
+++ b/src/everos/infra/persistence/sqlite/repos/cluster.py
@ -0,0 +1,240 @@
+"""Repository for the ``cluster`` + ``cluster_member`` pair.
+
+Bridges between the storage row shape and the algo-side
+:class:`everalgo.clustering.Cluster` value object. Callers always work in
+the algo type — this repo handles the centroid bytes round-trip, the
+preview JSON round-trip, and the membership join so the algo's
+``members: list[str]`` field is always fully populated on read. The
+``last_ts`` field is stored as int milliseconds (matches the algo type
+exactly) to keep the round-trip lossless across SQLite's tz-naive
+``DateTime`` storage.
+
+The single ``upsert_with_members`` write path is what every cluster
+strategy invokes after a merge / new-cluster decision: it stamps the
+``cluster`` row (UPSERT) and reconciles the ``cluster_member`` rows
+(diff-then-insert; pre-existing members are kept, new members appended)
+so calls are idempotent even if a strategy retries.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+
+import numpy as np
+from everalgo.clustering import Cluster as AlgoCluster
+from sqlalchemy import select
+from sqlalchemy.dialects.sqlite import insert as sqlite_insert
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from everos.component.utils.datetime import get_utc_now
+from everos.core.persistence.sqlite import RepoBase, session_scope
+
+from ..sqlite_manager import get_session_factory
+from ..tables import Cluster, ClusterMember
+
+_CENTROID_DTYPE = np.float32
+
+
+def mint_cluster_id() -> str:
+    """Mint a fresh cluster id (mirrors ``_mint_memcell_id``: ``cl_<12hex>``)."""
+    return f"cl_{uuid.uuid4().hex[:12]}"
+
+
+class _ClusterRepo(RepoBase[Cluster]):
+    model = Cluster
+
+    def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
+        return get_session_factory()
+
+    # ── Reads ──────────────────────────────────────────────────────────────
+
+    async def get_with_members(self, cluster_id: str) -> AlgoCluster | None:
+        """Fetch one cluster as a fully-hydrated algo value object.
+
+        Returns ``None`` when no row matches ``cluster_id`` — downstream
+        strategies that race the writer should treat this as a transient
+        miss and let OME retry the run.
+        """
+        async with session_scope(self._factory) as s:
+            row = await s.get(Cluster, cluster_id)
+            if row is None:
+                return None
+            members_by_cluster = await _load_members_by_cluster(s, [cluster_id])
+        return _row_to_algo(row, members_by_cluster.get(cluster_id, []))
+
+    async def list_for_owner(
+        self,
+        owner_id: str,
+        kind: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> list[AlgoCluster]:
+        """All clusters for ``(app, project, owner, kind)``, as algo objects.
+
+        Hot path for the cluster strategies (``cluster_by_geometry`` /
+        ``cluster_by_llm`` need the full ``existing_clusters`` list). Each
+        returned cluster carries its full ``members`` view, populated from
+        the join with :class:`ClusterMember`. Scoping by (app, project)
+        keeps one space's clusters from merging into another's.
+        """
+        async with session_scope(self._factory) as s:
+            rows = list(
+                (
+                    await s.execute(
+                        select(Cluster)
+                        .where(Cluster.app_id == app_id)
+                        .where(Cluster.project_id == project_id)
+                        .where(Cluster.owner_id == owner_id)
+                        .where(Cluster.kind == kind)
+                    )
+                )
+                .scalars()
+                .all()
+            )
+            if not rows:
+                return []
+            ids = [r.cluster_id for r in rows]
+            members_by_cluster = await _load_members_by_cluster(s, ids)
+        return [
+            _row_to_algo(row, members_by_cluster.get(row.cluster_id, []))
+            for row in rows
+        ]
+
+    async def find_cluster_id_for_member(
+        self,
+        member_type: str,
+        member_id: str,
+    ) -> str | None:
+        """Reverse lookup: ``(member_type, member_id) → cluster_id``.
+
+        Returns ``None`` when the entity is not yet attached to any cluster.
+        Backed by ``ix_cluster_member_reverse`` so it is O(log N).
+        """
+        async with session_scope(self._factory) as s:
+            stmt = (
+                select(ClusterMember.cluster_id)
+                .where(ClusterMember.member_type == member_type)
+                .where(ClusterMember.member_id == member_id)
+                .limit(1)
+            )
+            return (await s.execute(stmt)).scalar_one_or_none()
+
+    # ── Write ──────────────────────────────────────────────────────────────
+
+    async def upsert_with_members(
+        self,
+        algo_cluster: AlgoCluster,
+        *,
+        owner_id: str,
+        owner_type: str,
+        kind: str,
+        member_type: str,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> None:
+        """Persist one algo cluster snapshot + its membership rows.
+
+        ``algo_cluster.id`` must be non-None (caller-minted via
+        :func:`mint_cluster_id` for a brand-new cluster, or carried
+        through from a merge return). ``algo_cluster.members`` is the
+        full member list — the repo diffs against existing membership
+        and inserts only the new rows so the call is idempotent under
+        OME's at-least-once retry semantics.
+        """
+        cluster_id = algo_cluster.id
+        if not cluster_id:
+            raise ValueError(
+                "upsert_with_members requires algo_cluster.id (mint via "
+                "mint_cluster_id() before passing in)."
+            )
+        now = get_utc_now()
+        centroid_blob = np.asarray(
+            algo_cluster.centroid, dtype=_CENTROID_DTYPE
+        ).tobytes()
+        preview_json = json.dumps(list(algo_cluster.preview), ensure_ascii=False)
+
+        async with session_scope(self._factory) as s:
+            cluster_stmt = (
+                sqlite_insert(Cluster)
+                .values(
+                    cluster_id=cluster_id,
+                    app_id=app_id,
+                    project_id=project_id,
+                    owner_id=owner_id,
+                    owner_type=owner_type,
+                    kind=kind,
+                    centroid_blob=centroid_blob,
+                    count=algo_cluster.count,
+                    last_ts_ms=algo_cluster.last_ts,
+                    preview_json=preview_json,
+                )
+                .on_conflict_do_update(
+                    index_elements=["cluster_id"],
+                    set_={
+                        "centroid_blob": centroid_blob,
+                        "count": algo_cluster.count,
+                        "last_ts_ms": algo_cluster.last_ts,
+                        "preview_json": preview_json,
+                    },
+                )
+            )
+            await s.execute(cluster_stmt)
+
+            existing = set(
+                (
+                    await s.execute(
+                        select(ClusterMember.member_id).where(
+                            ClusterMember.cluster_id == cluster_id
+                        )
+                    )
+                )
+                .scalars()
+                .all()
+            )
+            new_member_rows = [
+                ClusterMember(
+                    cluster_id=cluster_id,
+                    member_id=mid,
+                    member_type=member_type,
+                    added_ts=now,
+                )
+                for mid in algo_cluster.members
+                if mid not in existing
+            ]
+            if new_member_rows:
+                s.add_all(new_member_rows)
+            await s.commit()
+
+
+def _row_to_algo(row: Cluster, members: list[str]) -> AlgoCluster:
+    centroid = np.frombuffer(row.centroid_blob, dtype=_CENTROID_DTYPE)
+    preview = json.loads(row.preview_json) if row.preview_json else []
+    return AlgoCluster(
+        id=row.cluster_id,
+        centroid=centroid,
+        count=row.count,
+        last_ts=row.last_ts_ms,
+        preview=preview,
+        members=list(members),
+    )
+
+
+async def _load_members_by_cluster(
+    session: AsyncSession,
+    cluster_ids: list[str],
+) -> dict[str, list[str]]:
+    """One query → ``{cluster_id: [member_id, ...]}`` (insertion order)."""
+    stmt = (
+        select(ClusterMember.cluster_id, ClusterMember.member_id)
+        .where(ClusterMember.cluster_id.in_(cluster_ids))
+        .order_by(ClusterMember.added_ts)
+    )
+    buckets: dict[str, list[str]] = {}
+    for cluster_id, member_id in (await session.execute(stmt)).all():
+        buckets.setdefault(cluster_id, []).append(member_id)
+    return buckets
+
+
+cluster_repo = _ClusterRepo()
--- a/src/everos/infra/persistence/sqlite/repos/conversation_status.py
+++ b/src/everos/infra/persistence/sqlite/repos/conversation_status.py
@ -0,0 +1,90 @@
+"""Repository for ``conversation_status`` — singleton bound to ``sqlite_manager``.
+
+Upsert helpers for the (session_id, track) window pointer.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from everos.core.persistence.sqlite import RepoBase, session_scope
+
+from ..sqlite_manager import get_session_factory
+from ..tables import ConversationStatus
+
+
+class _ConversationStatusRepo(RepoBase[ConversationStatus]):
+    model = ConversationStatus
+
+    def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
+        return get_session_factory()
+
+    async def touch_last_message_ts(
+        self,
+        session_id: str,
+        track: str,
+        ts: dt.datetime,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> None:
+        """Upsert (app, project, session, track); set ``last_message_ts``."""
+        await self._upsert(
+            session_id, track, app_id=app_id, project_id=project_id, last_message_ts=ts
+        )
+
+    async def touch_last_memcell_ts(
+        self,
+        session_id: str,
+        track: str,
+        ts: dt.datetime,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> None:
+        """Upsert (app, project, session, track); set ``last_memcell_ts``."""
+        await self._upsert(
+            session_id, track, app_id=app_id, project_id=project_id, last_memcell_ts=ts
+        )
+
+    async def _upsert(
+        self,
+        session_id: str,
+        track: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+        last_message_ts: dt.datetime | None = None,
+        last_memcell_ts: dt.datetime | None = None,
+    ) -> None:
+        async with session_scope(self._factory) as s:
+            stmt = select(ConversationStatus).where(
+                ConversationStatus.app_id == app_id,
+                ConversationStatus.project_id == project_id,
+                ConversationStatus.session_id == session_id,
+                ConversationStatus.track == track,
+            )
+            existing = (await s.execute(stmt)).scalars().first()
+            if existing is None:
+                s.add(
+                    ConversationStatus(
+                        app_id=app_id,
+                        project_id=project_id,
+                        session_id=session_id,
+                        track=track,
+                        last_message_ts=last_message_ts,
+                        last_memcell_ts=last_memcell_ts,
+                    )
+                )
+            else:
+                if last_message_ts is not None:
+                    existing.last_message_ts = last_message_ts
+                if last_memcell_ts is not None:
+                    existing.last_memcell_ts = last_memcell_ts
+            await s.commit()
+
+
+conversation_status_repo = _ConversationStatusRepo()
--- a/src/everos/infra/persistence/sqlite/repos/md_change_state.py
+++ b/src/everos/infra/persistence/sqlite/repos/md_change_state.py
@ -0,0 +1,434 @@
+"""Repository for ``md_change_state`` — cascade work queue.
+
+Sole writer of the table. The worker, watcher, scanner, and CLI all
+go through this repo so the state-machine invariants (``processing``
+claim semantics, retryable flag lifecycle) live in one place.
+
+LSN ordering is **best-effort**, not strictly monotonic across
+concurrent writers: :meth:`upsert` derives ``lsn = MAX(lsn) + 1``
+which is a classic read-modify-write that two parallel writers could
+race on (BEGIN DEFERRED leaves the SELECT half unprotected; cross-
+process this is even more visible). The table schema does **not**
+declare ``lsn UNIQUE`` and no caller depends on strict monotonicity —
+the worker uses ``ORDER BY lsn LIMIT N`` for fairness only, and a
+collision merely reorders two rows by a few ms; both rows are still
+processed and the next upsert bumps the counter past the duplicate.
+If a future feature needs strict monotonicity (e.g. CDC / audit log),
+revisit by giving ``upsert`` its own ``BEGIN IMMEDIATE`` transaction.
+
+Status values:
+
+- ``pending`` — visible to the worker.
+- ``processing`` — internal claim state (one worker is on it).
+- ``done`` — handler succeeded.
+- ``failed`` — handler exhausted retries or hit unrecoverable error
+  (see ``retryable`` for the eligibility flag).
+"""
+
+from __future__ import annotations
+
+import dataclasses
+
+from sqlalchemy import func, select, update
+from sqlalchemy.dialects.sqlite import insert as sqlite_insert
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from everos.component.utils.datetime import get_utc_now
+from everos.core.persistence.sqlite import RepoBase, session_scope
+
+from ..sqlite_manager import get_session_factory
+from ..tables import MdChangeState
+
+
+@dataclasses.dataclass(frozen=True)
+class QueueSummary:
+    """Aggregate counts for ``cascade status`` CLI output.
+
+    ``pending`` includes the internal ``processing`` rows so the public
+    state machine (12 doc §6) stays three-valued.
+    """
+
+    pending: int
+    """Rows the worker hasn't completed yet (includes ``processing``)."""
+
+    done: int
+    """Rows landed successfully."""
+
+    failed_retryable: int
+    """``status='failed' AND retryable=TRUE`` — eligible for
+    ``cascade fix --apply`` re-enqueue."""
+
+    failed_permanent: int
+    """``status='failed' AND retryable=FALSE`` — requires the user to
+    edit the md and re-save."""
+
+    max_lsn: int
+    """Largest ``lsn`` ever assigned; 0 if the table is empty."""
+
+    last_processed_lsn: int
+    """Largest ``lsn`` whose row has reached a terminal state
+    (``done`` or ``failed``); 0 if no terminal rows yet."""
+
+
+class _MdChangeStateRepo(RepoBase[MdChangeState]):
+    model = MdChangeState
+
+    def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
+        return get_session_factory()
+
+    # ── Writers: watcher / scanner / CLI sync ──────────────────────────────
+
+    async def upsert(
+        self,
+        md_path: str,
+        *,
+        kind: str,
+        change_type: str,
+        mtime: float,
+    ) -> int:
+        """Enqueue or re-enqueue ``md_path``; return the assigned LSN.
+
+        Behaviour:
+
+        - **New row** → insert with ``status='pending'``,
+          ``lsn = MAX(lsn) + 1``.
+        - **Existing row** → bump ``last_changed_at``, refresh
+          ``kind`` / ``change_type`` / ``mtime``, reset status back to
+          ``pending``, zero ``retry_count`` / ``error`` / ``retryable``,
+          and assign a fresh ``MAX(lsn) + 1`` so the worker re-processes
+          this path *after* anything queued in between.
+
+        The fresh LSN on re-enqueue is the property that lets the worker
+        rely on ``ORDER BY lsn`` for ordering without losing fairness
+        when a file flickers in and out of the queue. The ``MAX(lsn)+1``
+        derivation is best-effort under concurrent writers — see module
+        docstring for the trade-off.
+        """
+        now = get_utc_now()
+        async with session_scope(self._factory) as s:
+            new_lsn = await _next_lsn(s)
+            stmt = (
+                sqlite_insert(MdChangeState)
+                .values(
+                    md_path=md_path,
+                    kind=kind,
+                    change_type=change_type,
+                    mtime=mtime,
+                    first_seen_at=now,
+                    last_changed_at=now,
+                    lsn=new_lsn,
+                    status="pending",
+                    retryable=None,
+                    last_attempt_at=None,
+                    retry_count=0,
+                    error=None,
+                )
+                .on_conflict_do_update(
+                    index_elements=["md_path"],
+                    set_={
+                        "kind": kind,
+                        "change_type": change_type,
+                        "mtime": mtime,
+                        "last_changed_at": now,
+                        "lsn": new_lsn,
+                        "status": "pending",
+                        "retryable": None,
+                        "last_attempt_at": None,
+                        "retry_count": 0,
+                        "error": None,
+                    },
+                )
+            )
+            await s.execute(stmt)
+            await s.commit()
+            return new_lsn
+
+    async def force_enqueue(self, md_path: str, kind: str) -> int:
+        """`cascade sync --path` entry: re-enqueue regardless of status.
+
+        Semantically the same as :meth:`upsert` with ``change_type
+        ='modified'``; named separately because the CLI flow has no
+        watcher / scanner event to attribute the row to.
+        """
+        return await self.upsert(
+            md_path,
+            kind=kind,
+            change_type="modified",
+            mtime=0.0,
+        )
+
+    # ── Worker claim ───────────────────────────────────────────────────────
+
+    async def claim_one(self, md_path: str) -> MdChangeState | None:
+        """Atomically transition one row ``pending → processing``.
+
+        Implements the worker's claim contract: only the caller whose
+        ``UPDATE`` returns ``rowcount == 1`` "owns" the row and should
+        run the handler. All other concurrent callers get ``None`` and
+        must move on (no exception — claim contention is not an error).
+        """
+        now = get_utc_now()
+        async with session_scope(self._factory) as s:
+            result = await s.execute(
+                update(MdChangeState)
+                .where(MdChangeState.md_path == md_path)
+                .where(MdChangeState.status == "pending")
+                .values(status="processing", last_attempt_at=now)
+            )
+            await s.commit()
+            if result.rowcount != 1:
+                return None
+            row = await s.get(MdChangeState, md_path)
+            return row
+
+    async def claim_pending_batch(self, limit: int = 100) -> list[MdChangeState]:
+        """Claim up to ``limit`` pending rows in LSN order.
+
+        Returns the claimed rows (now ``status='processing'``); empty
+        list if none were pending. Sibling workers / processes may race
+        on the same prefix — the per-row ``WHERE status='pending'``
+        filter ensures each row lands in exactly one batch.
+        """
+        if limit <= 0:
+            return []
+        now = get_utc_now()
+        async with session_scope(self._factory) as s:
+            picks = (
+                (
+                    await s.execute(
+                        select(MdChangeState.md_path)
+                        .where(MdChangeState.status == "pending")
+                        .order_by(MdChangeState.lsn)
+                        .limit(limit)
+                    )
+                )
+                .scalars()
+                .all()
+            )
+            if not picks:
+                return []
+            update_result = await s.execute(
+                update(MdChangeState)
+                .where(MdChangeState.md_path.in_(picks))
+                .where(MdChangeState.status == "pending")
+                .values(status="processing", last_attempt_at=now)
+            )
+            await s.commit()
+            if update_result.rowcount == 0:
+                return []
+            rows = (
+                (
+                    await s.execute(
+                        select(MdChangeState)
+                        .where(MdChangeState.md_path.in_(picks))
+                        .where(MdChangeState.status == "processing")
+                        .order_by(MdChangeState.lsn)
+                    )
+                )
+                .scalars()
+                .all()
+            )
+            return list(rows)
+
+    # ── Worker result reporting ────────────────────────────────────────────
+
+    async def mark_done(self, md_path: str) -> None:
+        """Transition the row to ``done`` after a successful handler run.
+
+        Guarded by ``WHERE status='processing'`` so the call is a no-op
+        if a concurrent :meth:`upsert` (watcher / scanner re-enqueue)
+        has flipped the row back to ``pending`` while the worker was
+        running the handler. In that case the next
+        :meth:`claim_pending_batch` drain re-runs the handler against
+        the latest md state — losing the stale ``done`` write rather
+        than the new ``pending`` is the correct trade.
+        """
+        now = get_utc_now()
+        async with session_scope(self._factory) as s:
+            await s.execute(
+                update(MdChangeState)
+                .where(MdChangeState.md_path == md_path)
+                .where(MdChangeState.status == "processing")
+                .values(
+                    status="done",
+                    last_attempt_at=now,
+                    error=None,
+                    retryable=None,
+                )
+            )
+            await s.commit()
+
+    async def mark_failed(
+        self,
+        md_path: str,
+        *,
+        retryable: bool,
+        error: str,
+        new_retry_count: int,
+    ) -> None:
+        """Transition the row to ``failed`` with the given diagnostic.
+
+        Args:
+            md_path: The row's primary key.
+            retryable: ``True`` for transient failures (HTTP 5xx,
+                connection reset, 429) — ``cascade fix --apply`` will
+                re-enqueue. ``False`` for unrecoverable failures
+                (YAML parse, schema mismatch) — needs user edit.
+            error: Truncated failure message for ``cascade fix`` output.
+            new_retry_count: The retry count *after* this attempt (the
+                caller knows whether it was a retry or the final
+                attempt).
+
+        Guarded by ``WHERE status='processing'`` for the same reason as
+        :meth:`mark_done` — a concurrent re-enqueue must win over a
+        terminal write tied to a stale claim.
+        """
+        now = get_utc_now()
+        async with session_scope(self._factory) as s:
+            # Same guard as ``mark_done``: only flip ``processing → failed``.
+            # A concurrent watcher / scanner upsert may have reset the row
+            # back to ``pending`` (file changed during processing) — in
+            # that case the failure verdict is stale and we let the next
+            # drain re-attempt against the new md state instead of
+            # stamping ``failed`` over the live pending row.
+            await s.execute(
+                update(MdChangeState)
+                .where(MdChangeState.md_path == md_path)
+                .where(MdChangeState.status == "processing")
+                .values(
+                    status="failed",
+                    retryable=retryable,
+                    last_attempt_at=now,
+                    error=error,
+                    retry_count=new_retry_count,
+                )
+            )
+            await s.commit()
+
+    # ── Startup recovery ───────────────────────────────────────────────────
+
+    async def recover_orphan_processing(self) -> int:
+        """Reset every ``processing`` row to ``pending``; return the count.
+
+        Cascade runs single-process today, so any row in ``processing``
+        when the orchestrator boots is leftover from a prior crash
+        (the worker died between :meth:`claim_pending_batch` and
+        ``mark_done`` / ``mark_failed``). Idempotent — no rows in
+        ``processing`` is a clean no-op.
+        """
+        async with session_scope(self._factory) as s:
+            result = await s.execute(
+                update(MdChangeState)
+                .where(MdChangeState.status == "processing")
+                .values(status="pending", last_attempt_at=None)
+            )
+            await s.commit()
+            return int(result.rowcount or 0)
+
+    # ── CLI fix / status ───────────────────────────────────────────────────
+
+    async def list_failed(self) -> list[MdChangeState]:
+        """Return every ``status='failed'`` row, oldest LSN first.
+
+        Drives the ``cascade fix`` (no ``--apply``) preview table — the
+        CLI splits the result by ``retryable`` into two sections.
+        """
+        async with session_scope(self._factory) as s:
+            rows = (
+                (
+                    await s.execute(
+                        select(MdChangeState)
+                        .where(MdChangeState.status == "failed")
+                        .order_by(MdChangeState.lsn)
+                    )
+                )
+                .scalars()
+                .all()
+            )
+            return list(rows)
+
+    async def reset_retryable_to_pending(self) -> int:
+        """`cascade fix --apply` engine: re-enqueue every retryable row.
+
+        Affects only ``status='failed' AND retryable=TRUE``. Rows with
+        ``retryable=FALSE`` are left untouched — they need the user to
+        edit the md and re-save (the scanner / watcher will pick up the
+        change and re-enqueue them naturally).
+
+        Returns the number of rows transitioned.
+        """
+        now = get_utc_now()
+        async with session_scope(self._factory) as s:
+            result = await s.execute(
+                update(MdChangeState)
+                .where(MdChangeState.status == "failed")
+                .where(MdChangeState.retryable.is_(True))
+                .values(
+                    status="pending",
+                    retryable=None,
+                    retry_count=0,
+                    error=None,
+                    last_changed_at=now,
+                )
+            )
+            await s.commit()
+            return int(result.rowcount or 0)
+
+    async def queue_summary(self) -> QueueSummary:
+        """Aggregate the table for the ``cascade status`` CLI."""
+        async with session_scope(self._factory) as s:
+            pending = await _count_where(
+                s, MdChangeState.status.in_(["pending", "processing"])
+            )
+            done = await _count_where(s, MdChangeState.status == "done")
+            failed_retryable = await _count_where(
+                s,
+                (MdChangeState.status == "failed")
+                & (MdChangeState.retryable.is_(True)),
+            )
+            failed_permanent = await _count_where(
+                s,
+                (MdChangeState.status == "failed")
+                & (MdChangeState.retryable.is_(False)),
+            )
+            max_lsn_stmt = select(func.coalesce(func.max(MdChangeState.lsn), 0))
+            max_lsn = int((await s.execute(max_lsn_stmt)).scalar_one())
+            last_processed_lsn = int(
+                (
+                    await s.execute(
+                        select(func.coalesce(func.max(MdChangeState.lsn), 0)).where(
+                            MdChangeState.status.in_(["done", "failed"])
+                        )
+                    )
+                ).scalar_one()
+            )
+        return QueueSummary(
+            pending=pending,
+            done=done,
+            failed_retryable=failed_retryable,
+            failed_permanent=failed_permanent,
+            max_lsn=max_lsn,
+            last_processed_lsn=last_processed_lsn,
+        )
+
+
+async def _next_lsn(session: AsyncSession) -> int:
+    """Pick the next global LSN (``MAX(lsn) + 1``).
+
+    Called inside the same write transaction as the UPSERT so SQLite's
+    WAL writer serialisation guarantees no two writers see the same
+    ``MAX``. Empty table returns 1.
+    """
+    result = await session.execute(
+        select(func.coalesce(func.max(MdChangeState.lsn), 0))
+    )
+    return int(result.scalar_one()) + 1
+
+
+async def _count_where(session: AsyncSession, predicate: object) -> int:
+    """``SELECT COUNT(*) WHERE <predicate>`` returning a Python int."""
+    stmt = select(func.count()).select_from(MdChangeState).where(predicate)  # type: ignore[arg-type]
+    return int((await session.execute(stmt)).scalar_one())
+
+
+md_change_state_repo = _MdChangeStateRepo()
--- a/src/everos/infra/persistence/sqlite/repos/memcell.py
+++ b/src/everos/infra/persistence/sqlite/repos/memcell.py
@ -0,0 +1,52 @@
+"""Repository for ``memcell`` table — singleton bound to ``sqlite_manager``.
+
+Pure persistence: callers build the SQLModel ``Memcell`` rows (including
+``message_ids_json`` / ``sender_ids_json``) and hand them in. The pipeline
+is responsible for mapping algo-side messages back to everos
+``message_id`` because algo's ``Message`` does not carry per-message
+identifiers.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from everos.core.persistence.sqlite import RepoBase, session_scope
+
+from ..sqlite_manager import get_session_factory
+from ..tables import Memcell
+
+
+class _MemcellRepo(RepoBase[Memcell]):
+    model = Memcell
+
+    def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
+        return get_session_factory()
+
+    async def insert_many(self, rows: list[Memcell]) -> list[Memcell]:
+        """Insert MemCell rows in one transaction; rows are constructed by caller."""
+        async with session_scope(self._factory) as s:
+            s.add_all(rows)
+            await s.commit()
+            for r in rows:
+                await s.refresh(r)
+        return rows
+
+    async def find_by_ids(self, memcell_ids: list[str]) -> list[Memcell]:
+        """Bulk fetch rows by primary key list — preserves caller order.
+
+        Used by offline strategies that pull every memcell in a cluster
+        (membership lives in :class:`ClusterMember` and is supplied to
+        the strategy via :class:`everalgo.clustering.Cluster.members`).
+        """
+        if not memcell_ids:
+            return []
+        async with session_scope(self._factory) as s:
+            stmt = select(Memcell).where(Memcell.memcell_id.in_(memcell_ids))
+            rows = list((await s.execute(stmt)).scalars().all())
+        by_id = {r.memcell_id: r for r in rows}
+        return [by_id[mid] for mid in memcell_ids if mid in by_id]
+
+
+memcell_repo = _MemcellRepo()
--- a/src/everos/infra/persistence/sqlite/repos/unprocessed_buffer.py
+++ b/src/everos/infra/persistence/sqlite/repos/unprocessed_buffer.py
@ -0,0 +1,83 @@
+"""Repository for ``unprocessed_buffer`` — chat message accumulator.
+
+Singleton bound to the process-wide ``sqlite_manager`` session factory.
+
+Pure SQLModel persistence: row ↔ domain conversion lives in
+``everos.memory.extract.pipeline`` (the only caller that needs it).
+
+Exposes:
+
+- :meth:`list_for_track` — load all rows of (session_id, track), ordered by ts.
+- :meth:`replace` — atomically swap all rows of (session_id, track) for a
+  freshly-built list of :class:`UnprocessedBuffer` rows.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import delete, select
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from everos.core.persistence.sqlite import RepoBase, session_scope
+
+from ..sqlite_manager import get_session_factory
+from ..tables import UnprocessedBuffer
+
+
+class _UnprocessedBufferRepo(RepoBase[UnprocessedBuffer]):
+    model = UnprocessedBuffer
+
+    def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
+        return get_session_factory()
+
+    async def list_for_track(
+        self,
+        session_id: str,
+        track: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> list[UnprocessedBuffer]:
+        """Return all rows of (app, project, session, track), ts asc."""
+        async with session_scope(self._factory) as s:
+            stmt = (
+                select(UnprocessedBuffer)
+                .where(
+                    UnprocessedBuffer.app_id == app_id,
+                    UnprocessedBuffer.project_id == project_id,
+                    UnprocessedBuffer.session_id == session_id,
+                    UnprocessedBuffer.track == track,
+                )
+                .order_by(UnprocessedBuffer.timestamp.asc())  # type: ignore[union-attr]
+            )
+            return list((await s.execute(stmt)).scalars().all())
+
+    async def replace(
+        self,
+        session_id: str,
+        track: str,
+        rows: list[UnprocessedBuffer],
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> None:
+        """Atomically rewrite all rows of (app, project, session, track).
+
+        Delete-then-insert in one transaction. Empty ``rows`` clears the slice.
+        The delete is scoped to the same (app, project) as the incoming rows so
+        one space's buffer never wipes another's.
+        """
+        async with session_scope(self._factory) as s:
+            await s.execute(
+                delete(UnprocessedBuffer).where(
+                    UnprocessedBuffer.app_id == app_id,
+                    UnprocessedBuffer.project_id == project_id,
+                    UnprocessedBuffer.session_id == session_id,
+                    UnprocessedBuffer.track == track,
+                )
+            )
+            if rows:
+                s.add_all(rows)
+            await s.commit()
+
+
+unprocessed_buffer_repo = _UnprocessedBufferRepo()
--- a/src/everos/infra/persistence/sqlite/sqlite_manager.py
+++ b/src/everos/infra/persistence/sqlite/sqlite_manager.py
@ -0,0 +1,63 @@
+"""SQLite engine + session-factory singletons (lazy + process-wide).
+
+The single place that owns the SQLite **runtime state**: the async
+SQLAlchemy engine and the session factory bound to it. Built lazily on
+first :func:`get_engine` / :func:`get_session_factory` call from
+:func:`everos.config.load_settings` + :meth:`MemoryRoot.default`. The
+:class:`SqliteLifespanProvider` calls :func:`dispose_engine` on shutdown
+to drain the connection pool; in scripts you can call it manually.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
+
+from everos.config import load_settings
+from everos.core.observability.logging import get_logger
+from everos.core.persistence import (
+    MemoryRoot,
+    create_session_factory,
+    create_system_engine,
+)
+
+logger = get_logger(__name__)
+
+_engine: AsyncEngine | None = None
+_session_factory: async_sessionmaker[AsyncSession] | None = None
+
+
+def get_engine() -> AsyncEngine:
+    """Return the process-wide async SQLAlchemy engine.
+
+    Built on first call from ``MemoryRoot.default()`` and ``Settings.sqlite``.
+    Subsequent calls return the same instance.
+    """
+    global _engine
+    if _engine is None:
+        settings = load_settings()
+        memory_root = MemoryRoot.default()
+        memory_root.ensure()
+        _engine = create_system_engine(memory_root.system_db, settings.sqlite)
+        logger.info(
+            "sqlite_engine_built",
+            db_path=str(memory_root.system_db),
+        )
+    return _engine
+
+
+def get_session_factory() -> async_sessionmaker[AsyncSession]:
+    """Return the process-wide async session factory."""
+    global _session_factory
+    if _session_factory is None:
+        _session_factory = create_session_factory(get_engine())
+    return _session_factory
+
+
+async def dispose_engine() -> None:
+    """Dispose the engine + connection pool. Idempotent."""
+    global _engine, _session_factory
+    if _engine is not None:
+        await _engine.dispose()
+        logger.info("sqlite_engine_disposed")
+    _engine = None
+    _session_factory = None
--- a/src/everos/infra/persistence/sqlite/tables/init.py
+++ b/src/everos/infra/persistence/sqlite/tables/init.py
@ -0,0 +1,24 @@
+"""Business SQLModel table schemas.
+
+Each business table lives in its own module here (e.g. ``memcell.py``,
+``unprocessed_buffer.py``). The package ``__init__`` re-exports them so
+``SQLModel.metadata.create_all`` (run by
+:class:`everos.core.lifespan.SqliteLifespanProvider` at startup) sees
+every registered table.
+"""
+
+from .cluster import Cluster as Cluster
+from .cluster import ClusterMember as ClusterMember
+from .conversation_status import ConversationStatus as ConversationStatus
+from .md_change_state import MdChangeState as MdChangeState
+from .memcell import Memcell as Memcell
+from .unprocessed_buffer import UnprocessedBuffer as UnprocessedBuffer
+
+__all__ = [
+    "Cluster",
+    "ClusterMember",
+    "ConversationStatus",
+    "MdChangeState",
+    "Memcell",
+    "UnprocessedBuffer",
+]
--- a/src/everos/infra/persistence/sqlite/tables/cluster.py
+++ b/src/everos/infra/persistence/sqlite/tables/cluster.py
@ -0,0 +1,99 @@
+"""``cluster`` — persisted snapshot of one ``everalgo.clustering.Cluster``.
+
+Mirrors the algo-side frozen value object (centroid + count + last_ts +
+preview) plus everos engineering metadata (``owner_id`` / ``owner_type``
+/ ``kind``) so a single SQLite table can hold both the user-memory cluster
+track (episode embeddings) and the agent-case cluster track (task_intent
+embeddings). The ``members`` field on the algo type is persisted in the
+sibling :class:`ClusterMember` table to keep the relation queryable from
+both directions (forward by ``cluster_id``, reverse by ``(member_type,
+member_id)``).
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import Index, LargeBinary
+
+from everos.component.utils.datetime import UtcDatetime
+from everos.core.persistence.sqlite import BaseTable, Field
+from everos.core.persistence.sqlite.base import UtcDateTimeColumn
+
+
+class Cluster(BaseTable, table=True):
+    """One row per cluster. PK ``cluster_id`` (``cl_<12hex>``)."""
+
+    __tablename__ = "cluster"  # type: ignore[assignment]
+    __table_args__ = (
+        # List all clusters for one (app, project, owner, kind) on each strategy
+        # invocation; scope-first composite so clustering never mixes spaces.
+        Index("ix_cluster_owner_kind", "app_id", "project_id", "owner_id", "kind"),
+    )
+
+    cluster_id: str = Field(primary_key=True)
+    """Caller-minted opaque id (algo type carries it through verbatim).
+    Format: ``cl_<12 hex chars>`` to mirror :func:`memcell._mint_memcell_id`."""
+
+    app_id: str = Field(default="default")
+    project_id: str = Field(default="default")
+    """App / project scope segments. The aggregation key is
+    ``(app_id, project_id, owner_id, kind)`` so a cluster set never spans
+    two spaces."""
+
+    owner_id: str = Field(index=True)
+    """``user_id`` (kind=``user_memory``) or ``agent_id`` (kind=``agent_case``)."""
+
+    owner_type: str
+    """``"user"`` or ``"agent"`` — redundant with ``kind`` today but kept
+    explicit so future kinds (e.g. tenant-level) can plug in without a
+    schema change."""
+
+    kind: str
+    """``"user_memory"`` (episode-vector cluster, drives profile extraction)
+    or ``"agent_case"`` (task_intent-vector cluster, drives skill extraction)."""
+
+    centroid_blob: bytes = Field(sa_type=LargeBinary)
+    """``np.float32`` centroid serialised via ``ndarray.tobytes()``. The
+    repo round-trips bytes ↔ ``np.ndarray`` so callers see the algo type."""
+
+    count: int
+    """Number of members merged into this cluster (algo-maintained)."""
+
+    last_ts_ms: int
+    """Most recent member's timestamp as Unix epoch milliseconds — matches
+    :attr:`everalgo.clustering.Cluster.last_ts` exactly so no lossy
+    datetime ↔ int conversion is needed across the storage boundary."""
+
+    preview_json: str
+    """JSON-encoded ``list[str]`` — short text samples used by
+    :func:`cluster_by_llm` ranking. Repo round-trips JSON ↔ list."""
+
+
+class ClusterMember(BaseTable, table=True):
+    """One row per (cluster, entity) link.
+
+    Forward lookup (``cluster_id → list[member_id]``) is the algo-side
+    ``Cluster.members`` view. Reverse lookup (``(member_type, member_id)
+    → cluster_id``) is served by the composite index below — needed when
+    a downstream consumer holds an entity id and wants its cluster.
+
+    ``member_type`` is informational on the row (the parent ``Cluster.kind``
+    already disambiguates), but kept explicit so the reverse index can be
+    a single composite (member_type, member_id) without joining back.
+    """
+
+    __tablename__ = "cluster_member"  # type: ignore[assignment]
+    __table_args__ = (Index("ix_cluster_member_reverse", "member_type", "member_id"),)
+
+    cluster_id: str = Field(primary_key=True, foreign_key="cluster.cluster_id")
+    """Parent cluster id."""
+
+    member_id: str = Field(primary_key=True)
+    """``memcell_id`` (member_type=``memcell``) or md entry_id
+    (member_type=``case``) — the entity grouped into this cluster."""
+
+    member_type: str
+    """``"memcell"`` or ``"case"``. Echoes the parent cluster's ``kind``
+    domain but kept on the row so the reverse index is self-contained."""
+
+    added_ts: UtcDatetime = Field(sa_type=UtcDateTimeColumn)
+    """When this entity was first attached to the cluster."""
--- a/src/everos/infra/persistence/sqlite/tables/conversation_status.py
+++ b/src/everos/infra/persistence/sqlite/tables/conversation_status.py
@ -0,0 +1,38 @@
+"""``conversation_status`` — window pointer per (app, project, session, track).
+
+The window pointer is scoped by ``app_id`` / ``project_id`` so the same
+``session_id`` may recur in different spaces without colliding; those two
+segments lead the composite ``UniqueConstraint``.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import UniqueConstraint
+
+from everos.component.utils.datetime import UtcDatetime
+from everos.core.persistence.sqlite import BaseTable, Field
+from everos.core.persistence.sqlite.base import UtcDateTimeColumn
+
+
+class ConversationStatus(BaseTable, table=True):
+    """One row per (app, project, session, track). Tracks latest msg / memcell ts."""
+
+    __tablename__ = "conversation_status"  # type: ignore[assignment]
+    __table_args__ = (
+        UniqueConstraint(
+            "app_id",
+            "project_id",
+            "session_id",
+            "track",
+            name="uq_conversation_status_session_track",
+        ),
+    )
+
+    id: int | None = Field(default=None, primary_key=True)
+    app_id: str = Field(default="default")
+    project_id: str = Field(default="default")
+    """App / project scope segments (default ``"default"``)."""
+    session_id: str = Field(index=True)
+    track: str
+    last_message_ts: UtcDatetime | None = Field(default=None, sa_type=UtcDateTimeColumn)
+    last_memcell_ts: UtcDatetime | None = Field(default=None, sa_type=UtcDateTimeColumn)
--- a/src/everos/infra/persistence/sqlite/tables/md_change_state.py
+++ b/src/everos/infra/persistence/sqlite/tables/md_change_state.py
@ -0,0 +1,119 @@
+"""``md_change_state`` — cascade work queue.
+
+One row per markdown path. Both watcher (real-time fsevents) and
+scanner (periodic sweep) UPSERT into this table; the worker consumes
+``pending`` rows in ``lsn`` order, transitions them through an
+internal ``processing`` claim state, and lands them in ``done`` or
+``failed`` (with a ``retryable`` flag).
+
+Schema sourced from ``12_cascade_design.md`` §4.1 + decisions DD-3 …
+DD-12; the four indexes below are required by ``13_cascade_design.md``
+§7 status / fix queries.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import Index, text
+
+from everos.component.utils.datetime import UtcDatetime, get_utc_now
+from everos.core.persistence.sqlite import BaseTable, Field
+from everos.core.persistence.sqlite.base import UtcDateTimeColumn
+
+
+class MdChangeState(BaseTable, table=True):
+    """One row per markdown path; UPSERT-driven work queue for cascade.
+
+    The public state machine is the 3-tuple ``pending`` / ``done`` /
+    ``failed`` (12 doc §6). ``processing`` is an internal claim state
+    used by :meth:`MdChangeStateRepo.claim_one` and rolled back into
+    ``pending`` for CLI / status output (16 doc §4.2 — DD-12 keeps the
+    public surface clean).
+    """
+
+    __tablename__ = "md_change_state"  # type: ignore[assignment]
+    __table_args__ = (
+        # Worker scans pending rows in lsn order — partial index drops
+        # done/failed rows from the b-tree and keeps it tight.
+        Index(
+            "idx_md_change_pending",
+            "status",
+            "lsn",
+            sqlite_where=text("status = 'pending'"),
+        ),
+        # `cascade fix --apply` only ever touches failed + retryable=TRUE
+        # rows — partial index makes that pass essentially O(retryable).
+        Index(
+            "idx_md_change_retryable",
+            "status",
+            "retryable",
+            sqlite_where=text("status = 'failed' AND retryable = 1"),
+        ),
+        # Scanner reverse-reconcile (disk → state) compares mtime.
+        Index("idx_md_change_mtime", "mtime"),
+        # `cascade status` aggregates by kind.
+        Index("idx_md_change_kind", "kind"),
+    )
+
+    md_path: str = Field(primary_key=True)
+    """Path relative to the memory-root (e.g. ``users/u_jason/
+    episodes/episode-2026-05-12.md``). Every reverse-link anchors here."""
+
+    kind: str = Field(nullable=False, index=True)
+    """Kind registry name (e.g. ``"episode"``); worker dispatches the
+    matching handler."""
+
+    change_type: str = Field(nullable=False)
+    """``"added"`` | ``"modified"`` | ``"deleted"``. A hint for the
+    worker — handler re-derives truth from the actual file state."""
+
+    mtime: float = Field(default=0.0, nullable=False)
+    """File mtime captured when the row was last UPSERTed. Scanner
+    compares this against the on-disk mtime to identify dirty paths."""
+
+    first_seen_at: UtcDatetime = Field(
+        default_factory=get_utc_now, sa_type=UtcDateTimeColumn
+    )
+    """When the path was first enqueued."""
+
+    last_changed_at: UtcDatetime = Field(
+        default_factory=get_utc_now, sa_type=UtcDateTimeColumn
+    )
+    """Most recent UPSERT timestamp (re-stamped on every re-enqueue)."""
+
+    lsn: int = Field(nullable=False, index=True)
+    """Global monotonic sequence (``MAX(lsn) + 1`` per UPSERT). Worker
+    processes pending rows in ascending lsn order; the gap between
+    ``MAX(lsn)`` and the last processed lsn is the queue lag."""
+
+    status: str = Field(default="pending", nullable=False, index=True)
+    """Lifecycle:
+
+    - ``"pending"`` — waiting for the worker.
+    - ``"processing"`` — claimed by a worker (internal; CLI rolls into
+      pending for display).
+    - ``"done"`` — handler completed successfully.
+    - ``"failed"`` — handler exhausted retries or hit an
+      unrecoverable error (see :attr:`retryable`).
+    """
+
+    retryable: bool | None = Field(default=None)
+    """Meaningful only when ``status='failed'``.
+
+    - ``TRUE`` — RecoverableError exhausted MAX_RETRY; ``cascade fix
+      --apply`` will re-enqueue this row (pending, retry_count reset).
+    - ``FALSE`` — UnrecoverableError (malformed YAML, schema error
+      etc.); requires editing the md and re-saving.
+    - ``NULL`` — not a failed row (pending / processing / done).
+    """
+
+    last_attempt_at: UtcDatetime | None = Field(default=None, sa_type=UtcDateTimeColumn)
+    """Timestamp of the most recent worker attempt (success or
+    failure)."""
+
+    retry_count: int = Field(default=0, nullable=False)
+    """Number of retries the worker has *actually issued* (the first
+    attempt does not count). Reaches MAX_RETRY (default 3) before the
+    row transitions to ``failed`` with ``retryable=TRUE``."""
+
+    error: str | None = Field(default=None)
+    """Most recent failure message (truncated upstream if needed)."""
--- a/src/everos/infra/persistence/sqlite/tables/memcell.py
+++ b/src/everos/infra/persistence/sqlite/tables/memcell.py
@ -0,0 +1,55 @@
+"""``memcell`` — metadata + payload archive for boundary-detected MemCells.
+
+Holds ``message_ids_json`` / ``sender_ids_json`` (JSON arrays of audit
+ids) plus ``payload_json`` — the full :class:`everalgo.types.MemCell`
+serialised via ``model_dump_json``. The payload is what
+``unprocessed_buffer`` cannot keep (boundary's delete-then-insert clears
+the staging slice once messages fold into a cell): downstream offline
+strategies that need the raw chat messages (e.g. profile extraction)
+deserialise the payload back into an algo ``MemCell``. Episode markdown
+still carries the LLM-synthesised narrative; ``payload_json`` is the
+chat-stream archive that narrative was distilled from.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import Index
+
+from everos.component.utils.datetime import UtcDatetime
+from everos.core.persistence.sqlite import BaseTable, Field
+from everos.core.persistence.sqlite.base import UtcDateTimeColumn
+
+
+class Memcell(BaseTable, table=True):
+    """One row per MemCell. PK ``memcell_id`` (uuid4)."""
+
+    __tablename__ = "memcell"  # type: ignore[assignment]
+    __table_args__ = (
+        # Scope-first composite: app/project partition the lookup before the
+        # session window so cross-(app, project) rows never share an index slot.
+        Index(
+            "ix_memcell_session",
+            "app_id",
+            "project_id",
+            "session_id",
+            "track",
+            "timestamp",
+        ),
+    )
+
+    memcell_id: str = Field(primary_key=True)
+    app_id: str = Field(default="default")
+    project_id: str = Field(default="default")
+    """App / project scope segments. Default to ``"default"`` so the column is
+    always populated; callers in a non-default space pass real ids."""
+    session_id: str = Field(index=True)
+    track: str
+    raw_type: str
+    message_ids_json: str
+    sender_ids_json: str
+    payload_json: str
+    """``MemCell.model_dump_json()`` — the full algo-side MemCell (items =
+    chat messages / tool calls) serialised at boundary time so offline
+    strategies can deserialise it back into an algo MemCell long after
+    ``unprocessed_buffer`` has dropped the staging rows."""
+    timestamp: UtcDatetime = Field(sa_type=UtcDateTimeColumn)
--- a/src/everos/infra/persistence/sqlite/tables/unprocessed_buffer.py
+++ b/src/everos/infra/persistence/sqlite/tables/unprocessed_buffer.py
@ -0,0 +1,52 @@
+"""``unprocessed_buffer`` — chat-stream messages waiting on boundary detection.
+
+Schema property: presence in the table = pending; absence = consumed.
+There is no ``consumed`` column. Pipeline uses ``replace(session, track,
+remaining)`` to atomically rewrite the (session, track) slice each turn.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import Index
+
+from everos.component.utils.datetime import UtcDatetime
+from everos.core.persistence.sqlite import BaseTable, Field
+from everos.core.persistence.sqlite.base import UtcDateTimeColumn
+
+
+class UnprocessedBuffer(BaseTable, table=True):
+    """One row per unprocessed message. PK ``message_id``."""
+
+    __tablename__ = "unprocessed_buffer"  # type: ignore[assignment]
+    __table_args__ = (
+        # Scope-first composite: app/project partition the (session, track)
+        # staging slice so different spaces never share a buffer window.
+        Index(
+            "ix_unprocessed_buffer_lookup",
+            "app_id",
+            "project_id",
+            "session_id",
+            "track",
+            "timestamp",
+        ),
+    )
+
+    message_id: str = Field(primary_key=True)
+    app_id: str = Field(default="default")
+    project_id: str = Field(default="default")
+    """App / project scope segments (default ``"default"``)."""
+    session_id: str = Field(index=True)
+    track: str = Field(index=True)
+    sender_id: str
+    sender_name: str | None = None
+    role: str
+    timestamp: UtcDatetime = Field(sa_type=UtcDateTimeColumn)
+    # JSON-serialised raw ContentItem list (mirrors src_old
+    # RawMessage.content_items). Keeps the original multimodal payload
+    # available so a future parser can reach back to image / audio / etc.
+    content_items_json: str
+    # Derived plain-text concatenation of ``type=text`` entries — what
+    # downstream LLM-facing extractors and md writer consume today.
+    text: str
+    tool_calls_json: str | None = None
+    tool_call_id: str | None = None
				`@ -0,0 +1 @@`
				`"""Internal: background loops (idle scan / config reload / crash recovery)."""`
				`@ -0,0 +1 @@`
				`"""Internal: event dispatch core (registry / dispatcher / runner)."""`
				`@ -0,0 +1 @@`
				`"""Internal: SQLite-backed state stores (counter / idle / run_record)."""`