chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/src/everos/core/init.py
+++ b/src/everos/core/init.py
--- a/src/everos/core/context/init.py
+++ b/src/everos/core/context/init.py
--- a/src/everos/core/errors.py
+++ b/src/everos/core/errors.py
@ -0,0 +1,33 @@
+"""Cross-cutting domain errors surfaced to API callers.
+
+These live in ``core`` so the ``memory`` layer can raise them and the
+``entrypoints`` layer can catch them without crossing the layered import
+boundary — ``any -> core`` is the only edge both share (entrypoints must
+not import ``memory`` directly).
+"""
+
+from __future__ import annotations
+
+
+class MultimodalError(Exception):
+    """Base for multimodal-parsing errors meant to reach the caller.
+
+    The API layer maps any ``MultimodalError`` to an aligned
+    ``{error: {code, message}}`` envelope (HTTP 415).
+    """
+
+
+class UnsupportedModalityError(MultimodalError):
+    """everalgo cannot handle this modality (e.g. video stub, unknown type).
+
+    Wraps everalgo's ``NotImplementedError`` / dispatch ``ValueError`` so the
+    caller gets a stable, aligned error instead of a raw 500.
+    """
+
+
+class MultimodalNotEnabledError(MultimodalError):
+    """Multimodal capability is not ready.
+
+    Raised when the ``everos[multimodal]`` extra is not installed, or when a
+    required system dependency (LibreOffice for Office documents) is absent.
+    """
--- a/src/everos/core/lifespan/init.py
+++ b/src/everos/core/lifespan/init.py
@ -0,0 +1,27 @@
+"""Application lifespan composition (chassis only).
+
+This subpackage holds the *generic* lifespan machinery — the
+:class:`LifespanProvider` ABC, :func:`build_lifespan` factory, and
+chassis-level providers that are independent of any storage backend
+(observability metrics, etc.). Concrete storage-backend providers
+(SQLite / LanceDB) live next to the entrypoint that composes them
+(see :mod:`everos.entrypoints.api.lifespans`) so ``core`` stays free
+of concrete-backend imports.
+
+External usage:
+    from everos.core.lifespan import (
+        LifespanProvider,
+        MetricsLifespanProvider,
+        build_lifespan,
+    )
+"""
+
+from .base import LifespanProvider as LifespanProvider
+from .factory import build_lifespan as build_lifespan
+from .metrics_lifespan import MetricsLifespanProvider as MetricsLifespanProvider
+
+__all__ = [
+    "LifespanProvider",
+    "MetricsLifespanProvider",
+    "build_lifespan",
+]
--- a/src/everos/core/lifespan/base.py
+++ b/src/everos/core/lifespan/base.py
@ -0,0 +1,30 @@
+"""Lifespan provider abstract base.
+
+A LifespanProvider is one unit of startup / shutdown work invoked by the
+FastAPI lifespan factory. Providers are registered explicitly (no DI
+auto-discovery) and executed in ``order`` ascending on startup, reverse
+on shutdown.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from fastapi import FastAPI
+
+
+class LifespanProvider(ABC):
+    """One unit of startup / shutdown work."""
+
+    def __init__(self, name: str, order: int = 0) -> None:
+        self.name = name
+        self.order = order
+
+    @abstractmethod
+    async def startup(self, app: FastAPI) -> Any:
+        """Startup hook; return value is stored on ``app.state.lifespan_data[name]``."""
+
+    @abstractmethod
+    async def shutdown(self, app: FastAPI) -> None:
+        """Shutdown hook; called in reverse order during application teardown."""
--- a/src/everos/core/lifespan/factory.py
+++ b/src/everos/core/lifespan/factory.py
@ -0,0 +1,57 @@
+"""Lifespan composition factory.
+
+Builds a FastAPI lifespan context manager from an explicit list of
+LifespanProvider instances.
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator, Callable, Sequence
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+
+from everos.core.observability.logging import get_logger
+
+from .base import LifespanProvider
+
+logger = get_logger(__name__)
+
+
+def build_lifespan(
+    providers: Sequence[LifespanProvider],
+) -> Callable[[FastAPI], AsyncIterator[None]]:
+    """Compose providers into a FastAPI lifespan context manager.
+
+    Providers are run in ``order`` ascending on startup and reverse on
+    shutdown. A non-None return value from ``startup`` is stored under
+    ``app.state.lifespan_data[provider.name]``.
+    """
+    sorted_providers = sorted(providers, key=lambda p: p.order)
+
+    @asynccontextmanager
+    async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
+        lifespan_data: dict[str, object] = {}
+        try:
+            for provider in sorted_providers:
+                logger.info(
+                    "lifespan_provider_startup",
+                    name=provider.name,
+                    order=provider.order,
+                )
+                result = await provider.startup(app)
+                if result is not None:
+                    lifespan_data[provider.name] = result
+            app.state.lifespan_data = lifespan_data
+            yield
+        finally:
+            for provider in reversed(sorted_providers):
+                try:
+                    logger.info("lifespan_provider_shutdown", name=provider.name)
+                    await provider.shutdown(app)
+                except Exception:
+                    logger.exception(
+                        "lifespan_provider_shutdown_failed", name=provider.name
+                    )
+
+    return _lifespan
--- a/src/everos/core/lifespan/metrics_lifespan.py
+++ b/src/everos/core/lifespan/metrics_lifespan.py
@ -0,0 +1,36 @@
+"""Metrics lifespan provider.
+
+Confirms the metrics registry is ready and logs that the ``/metrics`` HTTP
+endpoint is mounted on the main API. Kept as a placeholder to demonstrate
+the lifespan pattern; replace or extend with a standalone metrics server
+(e.g. ``prometheus_client.start_http_server`` on a separate port) if you
+need to expose metrics on a dedicated socket.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from everos.core.observability.logging import get_logger
+from everos.core.observability.metrics import get_metrics_registry
+
+from .base import LifespanProvider
+
+logger = get_logger(__name__)
+
+
+class MetricsLifespanProvider(LifespanProvider):
+    """No-op startup that warms the metrics registry and logs readiness."""
+
+    def __init__(self, order: int = 5) -> None:
+        super().__init__(name="metrics", order=order)
+
+    async def startup(self, app: FastAPI) -> Any:
+        registry = get_metrics_registry()
+        logger.info("metrics_registry_ready", endpoint="/metrics")
+        return registry
+
+    async def shutdown(self, app: FastAPI) -> None:
+        logger.info("metrics_lifespan_shutdown")
--- a/src/everos/core/middleware/init.py
+++ b/src/everos/core/middleware/init.py
@ -0,0 +1,31 @@
+"""Cross-cutting HTTP middleware components.
+
+External usage:
+    from everos.core.middleware import (
+        DEFAULT_CORS_ALLOW_CREDENTIALS,
+        DEFAULT_CORS_ALLOW_HEADERS,
+        DEFAULT_CORS_ALLOW_METHODS,
+        DEFAULT_CORS_ORIGINS,
+        ProfileMiddleware,
+        PrometheusMiddleware,
+        global_exception_handler,
+    )
+"""
+
+from .cors import DEFAULT_CORS_ALLOW_CREDENTIALS as DEFAULT_CORS_ALLOW_CREDENTIALS
+from .cors import DEFAULT_CORS_ALLOW_HEADERS as DEFAULT_CORS_ALLOW_HEADERS
+from .cors import DEFAULT_CORS_ALLOW_METHODS as DEFAULT_CORS_ALLOW_METHODS
+from .cors import DEFAULT_CORS_ORIGINS as DEFAULT_CORS_ORIGINS
+from .global_exception import global_exception_handler as global_exception_handler
+from .profile import ProfileMiddleware as ProfileMiddleware
+from .prometheus import PrometheusMiddleware as PrometheusMiddleware
+
+__all__ = [
+    "DEFAULT_CORS_ALLOW_CREDENTIALS",
+    "DEFAULT_CORS_ALLOW_HEADERS",
+    "DEFAULT_CORS_ALLOW_METHODS",
+    "DEFAULT_CORS_ORIGINS",
+    "ProfileMiddleware",
+    "PrometheusMiddleware",
+    "global_exception_handler",
+]
--- a/src/everos/core/middleware/cors.py
+++ b/src/everos/core/middleware/cors.py
@ -0,0 +1,12 @@
+"""CORS configuration defaults.
+
+The CORS middleware itself is FastAPI's stock ``CORSMiddleware``; this module
+centralises the default policy values used by the application factory.
+"""
+
+from __future__ import annotations
+
+DEFAULT_CORS_ALLOW_CREDENTIALS: bool = True
+DEFAULT_CORS_ALLOW_HEADERS: list[str] = ["*"]
+DEFAULT_CORS_ALLOW_METHODS: list[str] = ["*"]
+DEFAULT_CORS_ORIGINS: list[str] = ["*"]
--- a/src/everos/core/middleware/global_exception.py
+++ b/src/everos/core/middleware/global_exception.py
@ -0,0 +1,143 @@
+"""Global exception handler — uniform error envelope per v1 API brief §1.
+
+Envelope shape (matches the v1 API brief §1 — ``request_id`` at the top
+level alongside ``error``; the ``error`` object carries ``code`` /
+``message`` plus ops-friendly ``timestamp`` / ``path`` for debugging)::
+
+    {
+      "request_id": "<32 lowercase hex chars — W3C trace_id format>",
+      "error": {
+        "code": "HTTP_ERROR" | "SYSTEM_ERROR",
+        "message": "<reason>",
+        "timestamp": "<ISO 8601 with tz>",
+        "path": "<request path>"
+      }
+    }
+
+Rules:
+- 4xx (DTO / business validation / HTTPException) → ``code="HTTP_ERROR"``
+  with the human-readable reason in ``message``.
+- 5xx (unhandled exception) → ``code="SYSTEM_ERROR"`` with a fixed
+  ``message="Internal server error"`` — internal exception details are
+  logged but never leak to the client.
+- ``request_id`` is sourced from ``request.state.request_id`` (set by
+  upstream middleware); falls back to a freshly minted id when absent.
+"""
+
+from __future__ import annotations
+
+from fastapi import HTTPException, Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from starlette.status import (
+    HTTP_422_UNPROCESSABLE_ENTITY,
+    HTTP_500_INTERNAL_SERVER_ERROR,
+)
+
+from everos.component.utils.datetime import (
+    get_now_with_timezone,
+    to_iso_format,
+)
+from everos.core.observability.logging import get_logger
+from everos.core.observability.tracing import gen_request_id
+
+logger = get_logger(__name__)
+
+_INTERNAL_ERROR_MESSAGE = "Internal server error"
+
+
+def _request_id(request: Request) -> str:
+    """Return the request_id set by middleware, or mint a fresh fallback."""
+    rid = getattr(request.state, "request_id", None)
+    if rid:
+        return str(rid)
+    return gen_request_id()
+
+
+def _envelope(
+    *,
+    code: str,
+    message: str,
+    request: Request,
+) -> dict[str, object]:
+    """Build the canonical error envelope (wiki §1 shape — nested ``error``).
+
+    ``request_id`` at the top level, ``error`` object carries the
+    contract fields (``code`` / ``message``) plus ops-friendly
+    ``timestamp`` / ``path``.
+    """
+    return {
+        "request_id": _request_id(request),
+        "error": {
+            "code": code,
+            "message": message,
+            "timestamp": to_iso_format(get_now_with_timezone()),
+            "path": str(request.url.path),
+        },
+    }
+
+
+async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse:
+    """Convert any exception into a uniform JSON error response."""
+    path = str(request.url.path)
+    method = request.method
+
+    if isinstance(exc, RequestValidationError):
+        errors = exc.errors()
+        if errors:
+            first = errors[0]
+            loc = ".".join(str(p) for p in first.get("loc", []) if p != "body")
+            msg = first.get("msg", "Validation error")
+            message = f"{msg}: {loc}" if loc else msg
+        else:
+            message = "Request validation error"
+
+        logger.warning("validation_error", method=method, path=path, message=message)
+        return JSONResponse(
+            status_code=HTTP_422_UNPROCESSABLE_ENTITY,
+            content=_envelope(code="HTTP_ERROR", message=message, request=request),
+        )
+
+    if isinstance(exc, HTTPException):
+        logger.warning(
+            "http_exception",
+            method=method,
+            path=path,
+            status_code=exc.status_code,
+            detail=exc.detail,
+        )
+        # 5xx routed through HTTPException is rare but valid; still honour
+        # the SYSTEM_ERROR code so the envelope is consistent.
+        if exc.status_code >= 500:
+            return JSONResponse(
+                status_code=exc.status_code,
+                content=_envelope(
+                    code="SYSTEM_ERROR",
+                    message=_INTERNAL_ERROR_MESSAGE,
+                    request=request,
+                ),
+            )
+        return JSONResponse(
+            status_code=exc.status_code,
+            content=_envelope(
+                code="HTTP_ERROR",
+                message=str(exc.detail),
+                request=request,
+            ),
+        )
+
+    logger.error(
+        "unhandled_exception",
+        method=method,
+        path=path,
+        exception_type=type(exc).__name__,
+        exc_info=True,
+    )
+    return JSONResponse(
+        status_code=HTTP_500_INTERNAL_SERVER_ERROR,
+        content=_envelope(
+            code="SYSTEM_ERROR",
+            message=_INTERNAL_ERROR_MESSAGE,
+            request=request,
+        ),
+    )
--- a/src/everos/core/middleware/profile.py
+++ b/src/everos/core/middleware/profile.py
@ -0,0 +1,69 @@
+"""Performance profiling middleware (HTML report via pyinstrument).
+
+Triggered with ``?profile=true`` query parameter when ``PROFILING_ENABLED=true``
+is set. Gracefully no-ops if pyinstrument is not installed.
+"""
+
+from __future__ import annotations
+
+import os
+from collections.abc import Awaitable, Callable
+
+from fastapi import Request
+from fastapi.responses import HTMLResponse
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.responses import Response
+from starlette.types import ASGIApp
+
+from everos.core.observability.logging import get_logger
+
+logger = get_logger(__name__)
+
+_TRUTHY = frozenset({"1", "true", "yes"})
+
+
+def _profiling_enabled() -> bool:
+    """Read ``PROFILING_ENABLED`` env var (1 / true / yes => enabled)."""
+    raw = os.getenv("PROFILING_ENABLED", os.getenv("PROFILING", "false")).lower()
+    return raw in _TRUTHY
+
+
+class ProfileMiddleware(BaseHTTPMiddleware):
+    """Returns a pyinstrument HTML report when ``?profile=true`` is set."""
+
+    def __init__(self, app: ASGIApp) -> None:
+        super().__init__(app)
+        self._enabled = _profiling_enabled()
+        self._available = False
+        if self._enabled:
+            try:
+                import pyinstrument  # noqa: F401
+
+                self._available = True
+                logger.info("profiling_middleware_enabled")
+            except ImportError:
+                logger.warning("profiling_requested_but_pyinstrument_missing")
+                self._enabled = False
+
+    async def dispatch(
+        self,
+        request: Request,
+        call_next: Callable[[Request], Awaitable[Response]],
+    ) -> Response:
+        if not self._enabled or not self._available:
+            return await call_next(request)
+
+        if request.query_params.get("profile", "").lower() not in _TRUTHY:
+            return await call_next(request)
+
+        from pyinstrument import Profiler
+
+        profiler = Profiler()
+        profiler.start()
+        logger.info("profile_started", method=request.method, path=request.url.path)
+        try:
+            await call_next(request)
+        except Exception:
+            logger.exception("profile_request_failed")
+        profiler.stop()
+        return HTMLResponse(content=profiler.output_html(), status_code=200)
--- a/src/everos/core/middleware/prometheus.py
+++ b/src/everos/core/middleware/prometheus.py
@ -0,0 +1,84 @@
+"""Prometheus HTTP metrics middleware.
+
+Auto-instruments incoming HTTP requests with a request counter and a
+duration histogram. Mounted via ``app.add_middleware(PrometheusMiddleware)``.
+
+Skips internal endpoints (``/metrics``, ``/health``, etc.) so they do not
+inflate cardinality or pollute their own statistics.
+"""
+
+from __future__ import annotations
+
+import time
+from collections.abc import Awaitable, Callable
+
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response
+
+from everos.core.observability.logging import get_logger
+from everos.core.observability.metrics import Counter, Histogram, HistogramBuckets
+
+logger = get_logger(__name__)
+
+
+_http_requests_total = Counter(
+    name="http_requests_total",
+    description="Total number of HTTP requests handled.",
+    labelnames=("method", "path", "status"),
+    namespace="everos",
+)
+
+_http_request_duration_seconds = Histogram(
+    name="http_request_duration_seconds",
+    description="HTTP request duration in seconds.",
+    labelnames=("method", "path"),
+    namespace="everos",
+    buckets=HistogramBuckets.DEFAULT,
+)
+
+
+_SKIP_PATHS = frozenset({"/metrics", "/health", "/healthz", "/favicon.ico"})
+
+
+def _normalize_path(request: Request) -> str:
+    """Resolve the route template (e.g. ``/users/{user_id}``) for stable labels."""
+    scope = getattr(request, "scope", {})
+    route = scope.get("route") if isinstance(scope, dict) else None
+    if route is not None and hasattr(route, "path"):
+        return route.path
+    if request.path_params:
+        path = request.url.path
+        for name, value in request.path_params.items():
+            if str(value) in path:
+                path = path.replace(str(value), f"{{{name}}}")
+        return path
+    return "{unmatched}"
+
+
+class PrometheusMiddleware(BaseHTTPMiddleware):
+    """Records ``http_requests_total`` and ``http_request_duration_seconds``."""
+
+    async def dispatch(
+        self,
+        request: Request,
+        call_next: Callable[[Request], Awaitable[Response]],
+    ) -> Response:
+        if request.url.path in _SKIP_PATHS:
+            return await call_next(request)
+
+        method = request.method
+        start = time.perf_counter()
+        status = "500"
+        response: Response | None = None
+        try:
+            response = await call_next(request)
+            status = str(response.status_code)
+            return response
+        finally:
+            duration = time.perf_counter() - start
+            path = _normalize_path(request)
+            _http_requests_total.labels(method=method, path=path, status=status).inc()
+            _http_request_duration_seconds.labels(method=method, path=path).observe(
+                duration
+            )
--- a/src/everos/core/observability/init.py
+++ b/src/everos/core/observability/init.py
--- a/src/everos/core/observability/logging/init.py
+++ b/src/everos/core/observability/logging/init.py
@ -0,0 +1,13 @@
+"""structlog-based logging factory.
+
+External usage:
+    from everos.core.observability.logging import get_logger, configure_logging
+
+    logger = get_logger(__name__)
+    logger.info("event_name", key=value)
+"""
+
+from .factory import configure_logging as configure_logging
+from .factory import get_logger as get_logger
+
+__all__ = ["configure_logging", "get_logger"]
--- a/src/everos/core/observability/logging/factory.py
+++ b/src/everos/core/observability/logging/factory.py
@ -0,0 +1,117 @@
+"""structlog logger factory.
+
+Provides ``get_logger(__name__)`` for module-level logger acquisition.
+``configure_logging()`` is called once at process startup (run.py / lifespan)
+to set up the structlog processor chain and route stdlib logging through
+the same formatter so output stays uniform regardless of the caller.
+
+The configuration follows structlog's official "Foreign Log Integration"
+recipe: a single ``ProcessorFormatter`` renders both everos's own
+``get_logger(...)`` calls and any stdlib ``logging.getLogger(...)`` call
+made by third-party libraries (uvicorn, fastapi, httpx, openai, ...).
+That way all three of the previously divergent prefixes — ``INFO:``,
+``[warning  ]``, plus the unconfigured no-prefix output — collapse to
+one ``[level] event key=value`` shape.
+
+Rust-side loggers (LanceDB / Lance / Arrow) live in the Rust ``log``
+crate and emit straight to stderr without going through Python; this
+module cannot reach them. Control their level with ``RUST_LOG`` env.
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+from typing import Any
+
+import structlog
+
+
+def get_logger(name: str) -> Any:
+    """Return a structlog logger bound to the given module name."""
+    return structlog.get_logger(name)
+
+
+def configure_logging(level: str = "INFO") -> None:
+    """Configure structlog and stdlib logging once at process startup.
+
+    After this call:
+
+    * Every ``structlog.get_logger(...)`` and ``logging.getLogger(...)``
+      message flows through the same ``ProcessorFormatter``, so output
+      format is identical regardless of which logging API the caller used.
+    * Root-logger handlers are replaced with a single ``StreamHandler``
+      pointing at ``sys.stdout``; any previously installed handler
+      (uvicorn's default ``LOGGING_CONFIG``, libraries that call
+      ``logging.basicConfig``, etc.) is removed.
+
+    The ``uvicorn.run(..., log_config=None)`` flag is the matching half
+    on the server entry point — without it, uvicorn re-installs its own
+    handlers on every startup and overrides what we set here.
+
+    Args:
+        level: Log level name (``DEBUG`` / ``INFO`` / ``WARNING`` / ``ERROR``).
+            Unknown names silently fall back to ``INFO`` via
+            ``getattr(logging, ..., INFO)``.
+    """
+    log_level = getattr(logging, level.upper(), logging.INFO)
+
+    shared_processors: list[Any] = [
+        structlog.contextvars.merge_contextvars,
+        structlog.processors.add_log_level,
+        structlog.processors.TimeStamper(fmt="iso"),
+        structlog.processors.StackInfoRenderer(),
+    ]
+
+    # structlog's own loggers feed into stdlib's logging, so the root
+    # logger handler decides where output lands and how it's rendered.
+    structlog.configure(
+        processors=[
+            *shared_processors,
+            structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
+        ],
+        wrapper_class=structlog.make_filtering_bound_logger(log_level),
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        cache_logger_on_first_use=True,
+    )
+
+    # The single formatter shared by both pipelines:
+    # * structlog events arrive already wrapped via ``wrap_for_formatter``;
+    # * foreign records (stdlib LogRecord) get pushed through
+    #   ``foreign_pre_chain`` so they pick up the same level / timestamp
+    #   fields before hitting ``ConsoleRenderer``.
+    formatter = structlog.stdlib.ProcessorFormatter(
+        foreign_pre_chain=shared_processors,
+        processors=[
+            structlog.stdlib.ProcessorFormatter.remove_processors_meta,
+            structlog.dev.ConsoleRenderer(),
+        ],
+    )
+
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(formatter)
+
+    # Drop any handler we installed on a previous ``configure_logging``
+    # call (identified by formatter type) so repeated invocations don't
+    # produce duplicate output, but keep handlers other parties have
+    # attached — pytest's caplog handler in particular has to survive,
+    # otherwise tests using the ``caplog`` fixture can't see records
+    # that flow through structlog.
+    root = logging.getLogger()
+    root.handlers = [
+        h
+        for h in root.handlers
+        if not isinstance(h.formatter, structlog.stdlib.ProcessorFormatter)
+    ]
+    root.addHandler(handler)
+    root.setLevel(log_level)
+
+    # Third-party HTTP clients log every successful request at INFO level —
+    # `httpx` is the worst offender (one line per call, called once per
+    # LLM / embedding / rerank request). A single LoCoMo conv run easily
+    # produces a thousand such lines, drowning everos's own events. They
+    # are useful for debugging API failures, but failures already surface
+    # via exceptions + status codes — so demote the success path to WARNING
+    # and let real errors still come through.
+    for noisy in ("httpx", "httpcore", "urllib3"):
+        logging.getLogger(noisy).setLevel(logging.WARNING)
--- a/src/everos/core/observability/metrics/init.py
+++ b/src/everos/core/observability/metrics/init.py
@ -0,0 +1,34 @@
+"""Prometheus-style metrics primitives + registry.
+
+External usage:
+    from everos.core.observability.metrics import (
+        Counter, Gauge, Histogram, HistogramBuckets,
+        get_metrics_registry, generate_metrics_response,
+    )
+"""
+
+from .counter import Counter as Counter
+from .counter import LabeledCounter as LabeledCounter
+from .gauge import Gauge as Gauge
+from .gauge import LabeledGauge as LabeledGauge
+from .histogram import Histogram as Histogram
+from .histogram import HistogramBuckets as HistogramBuckets
+from .histogram import LabeledHistogram as LabeledHistogram
+from .registry import generate_metrics_response as generate_metrics_response
+from .registry import get_metrics_registry as get_metrics_registry
+from .registry import reset_metrics_registry as reset_metrics_registry
+from .registry import set_metrics_registry as set_metrics_registry
+
+__all__ = [
+    "Counter",
+    "Gauge",
+    "Histogram",
+    "HistogramBuckets",
+    "LabeledCounter",
+    "LabeledGauge",
+    "LabeledHistogram",
+    "generate_metrics_response",
+    "get_metrics_registry",
+    "reset_metrics_registry",
+    "set_metrics_registry",
+]
--- a/src/everos/core/observability/metrics/counter.py
+++ b/src/everos/core/observability/metrics/counter.py
@ -0,0 +1,50 @@
+"""Counter wrapper around ``prometheus_client.Counter``."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from prometheus_client import Counter as PromCounter
+
+from .registry import get_metrics_registry
+
+
+class Counter:
+    """Monotonically-increasing counter (totals, error counts)."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        labelnames: Sequence[str] = (),
+        namespace: str = "",
+        subsystem: str = "",
+        unit: str = "",
+    ) -> None:
+        self._counter = PromCounter(
+            name=name,
+            documentation=description,
+            labelnames=labelnames,
+            namespace=namespace,
+            subsystem=subsystem,
+            unit=unit,
+            registry=get_metrics_registry(),
+        )
+        self._labelnames = tuple(labelnames)
+
+    def labels(self, **labels: str) -> LabeledCounter:
+        return LabeledCounter(self._counter.labels(**labels))
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._counter.inc(amount)
+
+
+class LabeledCounter:
+    """Counter slice with labels applied."""
+
+    def __init__(self, labeled: Any) -> None:
+        self._labeled = labeled
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._labeled.inc(amount)
--- a/src/everos/core/observability/metrics/gauge.py
+++ b/src/everos/core/observability/metrics/gauge.py
@ -0,0 +1,66 @@
+"""Gauge wrapper around ``prometheus_client.Gauge``.
+
+Async auto-refresh is intentionally not included in v0.1; subclass
+:class:`Gauge` and call :meth:`set` from your own scheduling logic when
+needed.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from prometheus_client import Gauge as PromGauge
+
+from .registry import get_metrics_registry
+
+
+class Gauge:
+    """Instantaneous numeric value (queue depth, cache size)."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        labelnames: Sequence[str] = (),
+        namespace: str = "",
+        subsystem: str = "",
+        unit: str = "",
+    ) -> None:
+        self._gauge = PromGauge(
+            name=name,
+            documentation=description,
+            labelnames=labelnames,
+            namespace=namespace,
+            subsystem=subsystem,
+            unit=unit,
+            registry=get_metrics_registry(),
+        )
+
+    def labels(self, **labels: str) -> LabeledGauge:
+        return LabeledGauge(self._gauge.labels(**labels))
+
+    def set(self, value: float) -> None:
+        self._gauge.set(value)
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._gauge.inc(amount)
+
+    def dec(self, amount: float = 1.0) -> None:
+        self._gauge.dec(amount)
+
+
+class LabeledGauge:
+    """Gauge slice with labels applied."""
+
+    def __init__(self, labeled: Any) -> None:
+        self._labeled = labeled
+
+    def set(self, value: float) -> None:
+        self._labeled.set(value)
+
+    def inc(self, amount: float = 1.0) -> None:
+        self._labeled.inc(amount)
+
+    def dec(self, amount: float = 1.0) -> None:
+        self._labeled.dec(amount)
--- a/src/everos/core/observability/metrics/histogram.py
+++ b/src/everos/core/observability/metrics/histogram.py
@ -0,0 +1,102 @@
+"""Histogram wrapper around ``prometheus_client.Histogram``."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from prometheus_client import Histogram as PromHistogram
+
+from .registry import get_metrics_registry
+
+
+class HistogramBuckets:
+    """Predefined bucket configurations for common workloads."""
+
+    DEFAULT: tuple[float, ...] = (
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.5,
+        5.0,
+        10.0,
+    )
+    FAST: tuple[float, ...] = (0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5)
+    API_CALL: tuple[float, ...] = (
+        0.01,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.0,
+        5.0,
+        10.0,
+        30.0,
+    )
+    BATCH: tuple[float, ...] = (0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0)
+    DATABASE: tuple[float, ...] = (
+        0.001,
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.1,
+        0.25,
+        0.5,
+        1.0,
+        2.5,
+        5.0,
+    )
+
+
+class Histogram:
+    """Distribution of observed values (latency, sizes)."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        labelnames: Sequence[str] = (),
+        namespace: str = "",
+        subsystem: str = "",
+        unit: str = "",
+        buckets: Sequence[float] = HistogramBuckets.DEFAULT,
+    ) -> None:
+        self._histogram = PromHistogram(
+            name=name,
+            documentation=description,
+            labelnames=labelnames,
+            namespace=namespace,
+            subsystem=subsystem,
+            unit=unit,
+            buckets=tuple(buckets),
+            registry=get_metrics_registry(),
+        )
+
+    def labels(self, **labels: str) -> LabeledHistogram:
+        return LabeledHistogram(self._histogram.labels(**labels))
+
+    def observe(self, amount: float) -> None:
+        self._histogram.observe(amount)
+
+    def time(self) -> Any:
+        return self._histogram.time()
+
+
+class LabeledHistogram:
+    """Histogram slice with labels applied."""
+
+    def __init__(self, labeled: Any) -> None:
+        self._labeled = labeled
+
+    def observe(self, amount: float) -> None:
+        self._labeled.observe(amount)
+
+    def time(self) -> Any:
+        return self._labeled.time()
--- a/src/everos/core/observability/metrics/registry.py
+++ b/src/everos/core/observability/metrics/registry.py
@ -0,0 +1,35 @@
+"""Prometheus metrics registry singleton."""
+
+from __future__ import annotations
+
+from prometheus_client import REGISTRY, CollectorRegistry, generate_latest
+
+_registry: CollectorRegistry | None = None
+
+
+def get_metrics_registry() -> CollectorRegistry:
+    """Return the global metrics registry.
+
+    Defaults to ``prometheus_client.REGISTRY``.
+    """
+    global _registry
+    if _registry is None:
+        _registry = REGISTRY
+    return _registry
+
+
+def set_metrics_registry(registry: CollectorRegistry) -> None:
+    """Override the global registry (mainly for tests)."""
+    global _registry
+    _registry = registry
+
+
+def generate_metrics_response() -> bytes:
+    """Render the current registry into Prometheus exposition format."""
+    return generate_latest(get_metrics_registry())
+
+
+def reset_metrics_registry() -> None:
+    """Reset the global registry override (mainly for tests)."""
+    global _registry
+    _registry = None
--- a/src/everos/core/observability/tracing/init.py
+++ b/src/everos/core/observability/tracing/init.py
@ -0,0 +1,32 @@
+"""Tracing utilities — W3C-compatible request id generation.
+
+External usage::
+
+    from everos.core.observability.tracing import gen_request_id
+"""
+
+from __future__ import annotations
+
+from uuid import uuid4
+
+
+def gen_request_id() -> str:
+    """Generate a request id matching the W3C trace-context spec.
+
+    Returns 32 lowercase hex characters (128-bit, no prefix) — the same
+    format as a W3C ``trace_id`` / OpenTelemetry trace identifier. Routes
+    and services that mint a fresh request id (when one wasn't injected
+    by upstream middleware) should call this helper rather than rolling
+    their own uuid / prefix format, so the id layer stays compatible
+    with OpenTelemetry exporters and standard APM tooling.
+
+    Example::
+
+        >>> rid = gen_request_id()
+        >>> len(rid)
+        32
+    """
+    return uuid4().hex
+
+
+__all__ = ["gen_request_id"]
--- a/src/everos/core/persistence/init.py
+++ b/src/everos/core/persistence/init.py
@ -0,0 +1,106 @@
+"""Persistence primitives.
+
+Read/write toolkit for markdown files, async wrappers around the SQLite
+system DB and LanceDB index, plus a memory-root path manager. Higher
+layers (``memory``, ``infra``) layer business semantics on top of these
+building blocks; this subpackage knows nothing about Entry / MemCell /
+Episode or any other business model.
+
+External usage:
+    from everos.core.persistence import (
+        # Path manager + lock
+        MemoryRoot, memory_root_lock, LockError,
+        # Markdown IO toolkit
+        MarkdownReader, MarkdownWriter, ParsedMarkdown, Entry,
+        parse_frontmatter, dump_frontmatter, split_entries, find_entry,
+        # Frontmatter schema chassis
+        BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
+        DailyLogPathMixin, SkillPathMixin,
+        # Async SQLite (SQLModel / SA 2.0)
+        create_system_engine, create_session_factory, session_scope,
+        SQLModel, Field, Relationship, BaseTable, RepoBase,
+        # Async LanceDB
+        open_lancedb_connection, LanceModel, Vector, BaseLanceTable, touch,
+        LanceRepoBase,
+    )
+"""
+
+from .lancedb import BaseLanceTable as BaseLanceTable
+from .lancedb import LanceModel as LanceModel
+from .lancedb import LanceRepoBase as LanceRepoBase
+from .lancedb import Vector as Vector
+from .lancedb import open_lancedb_connection as open_lancedb_connection
+from .lancedb import touch as touch
+from .locking import LockError as LockError
+from .locking import memory_root_lock as memory_root_lock
+from .markdown import AgentScopedFrontmatter as AgentScopedFrontmatter
+from .markdown import BaseFrontmatter as BaseFrontmatter
+from .markdown import DailyLogPathMixin as DailyLogPathMixin
+from .markdown import Entry as Entry
+from .markdown import EntryId as EntryId
+from .markdown import MarkdownReader as MarkdownReader
+from .markdown import MarkdownWriter as MarkdownWriter
+from .markdown import ParsedMarkdown as ParsedMarkdown
+from .markdown import SkillPathMixin as SkillPathMixin
+from .markdown import StructuredEntry as StructuredEntry
+from .markdown import UserScopedFrontmatter as UserScopedFrontmatter
+from .markdown import dump_frontmatter as dump_frontmatter
+from .markdown import find_entry as find_entry
+from .markdown import parse_frontmatter as parse_frontmatter
+from .markdown import parse_structured_entry as parse_structured_entry
+from .markdown import render_structured_entry as render_structured_entry
+from .markdown import split_entries as split_entries
+from .memory_root import MemoryRoot as MemoryRoot
+from .memory_root import app_dir_name as app_dir_name
+from .memory_root import app_id_from_dir as app_id_from_dir
+from .memory_root import project_dir_name as project_dir_name
+from .memory_root import project_id_from_dir as project_id_from_dir
+from .sqlite import BaseTable as BaseTable
+from .sqlite import Field as Field
+from .sqlite import Relationship as Relationship
+from .sqlite import RepoBase as RepoBase
+from .sqlite import SQLModel as SQLModel
+from .sqlite import create_session_factory as create_session_factory
+from .sqlite import create_system_engine as create_system_engine
+from .sqlite import session_scope as session_scope
+
+__all__ = [
+    "AgentScopedFrontmatter",
+    "BaseFrontmatter",
+    "BaseLanceTable",
+    "BaseTable",
+    "DailyLogPathMixin",
+    "Entry",
+    "EntryId",
+    "Field",
+    "LanceModel",
+    "LanceRepoBase",
+    "LockError",
+    "MarkdownReader",
+    "MarkdownWriter",
+    "MemoryRoot",
+    "ParsedMarkdown",
+    "Relationship",
+    "RepoBase",
+    "SkillPathMixin",
+    "StructuredEntry",
+    "SQLModel",
+    "UserScopedFrontmatter",
+    "Vector",
+    "app_dir_name",
+    "app_id_from_dir",
+    "create_session_factory",
+    "create_system_engine",
+    "dump_frontmatter",
+    "find_entry",
+    "memory_root_lock",
+    "project_dir_name",
+    "project_id_from_dir",
+    "open_lancedb_connection",
+    "parse_frontmatter",
+    "parse_structured_entry",
+    "render_structured_entry",
+    "session_scope",
+    "split_entries",
+    "touch",
+]
--- a/src/everos/core/persistence/lancedb/init.py
+++ b/src/everos/core/persistence/lancedb/init.py
@ -0,0 +1,34 @@
+"""LanceDB async persistence.
+
+External usage (connection):
+    from everos.core.persistence.lancedb import open_lancedb_connection
+
+External usage (ORM model basics — re-exported from lancedb.pydantic):
+    from everos.core.persistence.lancedb import (
+        LanceModel, Vector, BaseLanceTable, touch,
+    )
+
+External usage (generic CRUD repository base):
+    from everos.core.persistence.lancedb import LanceRepoBase
+"""
+
+# Re-export the LanceDB-flavoured Pydantic primitives so business code has a
+# single canonical entry point for table schemas.
+from lancedb.pydantic import LanceModel as LanceModel
+from lancedb.pydantic import Vector as Vector
+
+from .base import BaseLanceTable as BaseLanceTable
+from .base import touch as touch
+from .connection import open_lancedb_connection as open_lancedb_connection
+from .repository import LanceDailyLogRepoBase as LanceDailyLogRepoBase
+from .repository import LanceRepoBase as LanceRepoBase
+
+__all__ = [
+    "BaseLanceTable",
+    "LanceDailyLogRepoBase",
+    "LanceModel",
+    "LanceRepoBase",
+    "Vector",
+    "open_lancedb_connection",
+    "touch",
+]
--- a/src/everos/core/persistence/lancedb/base.py
+++ b/src/everos/core/persistence/lancedb/base.py
@ -0,0 +1,158 @@
+"""Common LanceDB base for everos tables.
+
+:class:`BaseLanceTable` adds ``created_at`` / ``updated_at`` columns and
+the :attr:`BM25_FIELDS` declaration + :meth:`ensure_fts_indexes`
+classmethod so each schema owns *both* its column shape **and** its
+BM25 index spec — repos stay focused on queries.
+
+Note:
+    LanceDB has no SQL ``onupdate`` equivalent — the application must
+    explicitly set ``updated_at = get_utc_now()`` before calling
+    :meth:`AsyncTable.update` / :meth:`AsyncTable.merge_insert`. The
+    convenience :func:`touch` helper does this in one call.
+
+    **Every datetime column automatically carries ``tz=UTC`` in the
+    Arrow schema.** LanceDB's Pydantic→PyArrow converter does not
+    understand ``typing.Annotated`` metadata, so :data:`UtcDatetime`
+    cannot be used as the field type annotation. Instead,
+    :meth:`BaseLanceTable.to_arrow_schema` walks the inferred schema
+    and rewrites every ``timestamp[us]`` (naive) column to
+    ``timestamp[us, tz=UTC]``. PyArrow then auto-``astimezone(UTC)``
+    aware inputs on write **and** returns aware UTC datetimes on read
+    — no per-table configuration, no caller-side ``ensure_utc``.
+
+    Subclasses just declare ``datetime`` fields normally::
+
+        class Episode(BaseLanceTable):
+            timestamp: dt.datetime
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+from typing import ClassVar
+
+import pyarrow as pa
+from lancedb import AsyncTable
+from lancedb.index import FTS
+from lancedb.pydantic import LanceModel
+from pydantic import Field
+
+from everos.component.utils.datetime import get_utc_now
+
+
+class BaseLanceTable(LanceModel):
+    """Pydantic / LanceDB base with ``created_at`` / ``updated_at`` and
+    schema-level LanceDB metadata (``TABLE_NAME`` / ``BM25_FIELDS``).
+
+    The schema is the single source of truth for everything LanceDB
+    needs to materialise the table: column shape, table name, vector
+    dim (declared per-subclass), and which columns carry an FTS index.
+    Repos read these ClassVars; they do not duplicate them.
+    """
+
+    TABLE_NAME: ClassVar[str] = ""
+    """LanceDB table name. Business schemas must override (e.g.
+    ``"episode"``). Left empty on chassis / test schemas that construct
+    their table inline."""
+
+    BM25_FIELDS: ClassVar[list[str]] = []
+    """Columns to build LanceDB FTS (BM25) indexes on.
+
+    Each declared column must already exist as a ``str`` (or
+    ``str | None``) field on the schema. Tokens are assumed to be
+    **app-layer pre-tokenised** (space-joined); the FTS index uses
+    ``base_tokenizer="whitespace"`` so segmentation is owned by the
+    app layer (:class:`JiebaTokenizer`). The same boundary owns stop-
+    word filtering (English + Chinese); FTS-side ``remove_stop_words``
+    is OFF. FTS *does* keep lightweight English-aware normalisation
+    (``lower_case`` / ``stem`` / ``ascii_folding``) as a belt-and-
+    braces layer on the same English tokens that survive jieba.
+    See ``17_lancedb_tables_design.md`` §2.4.1 and
+    :meth:`ensure_fts_indexes` below for the exact knobs."""
+
+    created_at: dt.datetime = Field(default_factory=get_utc_now)
+    updated_at: dt.datetime = Field(default_factory=get_utc_now)
+
+    @classmethod
+    def to_arrow_schema(cls) -> pa.Schema:
+        """Patch the default Arrow schema: force every timestamp to ``tz=UTC``.
+
+        The base ``LanceModel.to_arrow_schema()`` infers Arrow types from
+        Pydantic field annotations and emits naive ``timestamp[us]`` for
+        every :class:`datetime.datetime` column. We rewrite **every**
+        timestamp column to ``timestamp[us, tz=UTC]``:
+
+        * **on write** — PyArrow ``astimezone(UTC)``-s aware input
+          automatically before serialising the i64 epoch micros.
+        * **on read** — PyArrow returns aware UTC datetimes.
+
+        Zero per-table configuration. The rewrite also **overrides any
+        non-UTC tz** a subclass might have declared explicitly, because
+        project convention is: storage is always UTC. Mixed-tz columns
+        would violate the two-zone discipline (see
+        ``docs/datetime.md``); enforcing UTC at the schema level closes
+        that loophole.
+        """
+        base = super().to_arrow_schema()
+        return pa.schema(
+            [
+                pa.field(f.name, pa.timestamp("us", tz="UTC"), nullable=f.nullable)
+                if pa.types.is_timestamp(f.type)
+                else f
+                for f in base
+            ]
+        )
+
+    @classmethod
+    async def ensure_fts_indexes(cls, table: AsyncTable) -> None:
+        """Create FTS indexes on every column in :attr:`BM25_FIELDS`.
+
+        Idempotent: columns that already have an index are skipped, so
+        this is safe to call on every startup. The FTS config is fixed
+        to the app-layer pre-tokenisation + LanceDB normalisation
+        convention (designed for **multilingual mixed content**):
+
+        - ``base_tokenizer="whitespace"`` — split on the spaces our
+          app-layer tokenizer provider already inserted between tokens.
+        - ``lower_case=True`` — Unicode-aware case-fold (English A→a;
+          no-op on CJK characters).
+        - ``stem=True`` — Porter / Snowball English stemmer per
+          ``language="English"`` (tantivy default). CJK tokens have no
+          stemmer and pass through untouched.
+        - ``remove_stop_words=False`` — **stop-word removal is owned by
+          the app-layer** (:class:`JiebaTokenizer`), which already drops
+          both Chinese and English stop-words before tokens reach the
+          FTS index. Keeping FTS-side filtering off avoids double-
+          filtering and a divided source of truth.
+        - ``ascii_folding=True`` — strips diacritics (é→e) on Latin
+          characters; no-op on CJK.
+        - ``with_position=True`` — enables phrase queries.
+
+        Subclasses normally do not need to override this — declaring
+        :attr:`BM25_FIELDS` is enough.
+        """
+        if not cls.BM25_FIELDS:
+            return
+        indices = await table.list_indices()
+        indexed_cols = {col for idx in indices for col in (idx.columns or [])}
+        for field in cls.BM25_FIELDS:
+            if field in indexed_cols:
+                continue
+            await table.create_index(
+                column=field,
+                config=FTS(
+                    with_position=True,
+                    base_tokenizer="whitespace",
+                    lower_case=True,
+                    stem=True,
+                    remove_stop_words=False,
+                    ascii_folding=True,
+                ),
+            )
+
+
+def touch(record: BaseLanceTable) -> BaseLanceTable:
+    """Set ``record.updated_at = now`` and return the record (chainable)."""
+    record.updated_at = get_utc_now()
+    return record
--- a/src/everos/core/persistence/lancedb/connection.py
+++ b/src/everos/core/persistence/lancedb/connection.py
@ -0,0 +1,68 @@
+"""Async LanceDB connection factory.
+
+LanceDB does not live inside the SQLAlchemy ecosystem; it has its own
+``connect_async`` returning :class:`lancedb.AsyncConnection`. This module
+is a thin wrapper that:
+
+    1. ensures the lancedb root directory exists
+    2. converts ``LanceDBSettings.read_consistency_seconds`` into the
+       :class:`datetime.timedelta` value LanceDB expects
+    3. installs a capped :class:`lancedb.Session` so the global index
+       cache cannot grow unbounded and exhaust file descriptors
+       (see :attr:`LanceDBSettings.index_cache_size_bytes` for the
+       full rationale)
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+from pathlib import Path
+
+import lancedb
+from lancedb import AsyncConnection
+
+from everos.config import LanceDBSettings
+
+
+async def open_lancedb_connection(
+    lancedb_dir: Path,
+    lancedb_settings: LanceDBSettings,
+) -> AsyncConnection:
+    """Open an async LanceDB connection rooted at ``lancedb_dir``.
+
+    Args:
+        lancedb_dir: Filesystem path to the LanceDB root (typically
+            ``MemoryRoot.lancedb_dir``). Created if missing.
+        lancedb_settings: Tunables; the ``read_consistency_seconds`` field
+            is converted to a :class:`~datetime.timedelta`, and
+            ``index_cache_size_bytes`` caps the global index cache.
+
+    Returns:
+        An :class:`AsyncConnection` ready for table operations.
+    """
+    # mkdir is a microsecond-fast syscall and only fires on first connect;
+    # not worth pulling in anyio.Path / aiofiles for it.
+    lancedb_dir.mkdir(parents=True, exist_ok=True)  # noqa: ASYNC240
+
+    interval: dt.timedelta | None = None
+    if lancedb_settings.read_consistency_seconds is not None:
+        interval = dt.timedelta(seconds=lancedb_settings.read_consistency_seconds)
+
+    # Bound the index cache so its readers (each one holds the FDs of
+    # an opened ``_indices/<uuid>/...`` directory) get LRU-evicted
+    # rather than leaking. Without this, a long-running daemon's FD
+    # count grows monotonically until ``EMFILE``. The metadata cache
+    # is intentionally left at the lancedb default (unbounded): it
+    # holds parsed in-memory manifests with zero FD pressure, and a
+    # cap there would just thrash. See ``LanceDBSettings`` for the
+    # measurement that picked the default size.
+    session = lancedb.Session(
+        index_cache_size_bytes=lancedb_settings.index_cache_size_bytes,
+        metadata_cache_size_bytes=None,
+    )
+
+    return await lancedb.connect_async(
+        str(lancedb_dir),
+        read_consistency_interval=interval,
+        session=session,
+    )
--- a/src/everos/core/persistence/lancedb/repository.py
+++ b/src/everos/core/persistence/lancedb/repository.py
@ -0,0 +1,530 @@
+"""Generic CRUD repository for LanceDB-backed tables.
+
+``LanceRepoBase`` mirrors the SQLite ``RepoBase`` shape: a pure generic
+CRUD helper that knows nothing about a storage runtime. Concrete repos
+either pass an :class:`AsyncTable` explicitly (typical in tests) or
+override :meth:`_table_lookup` to pull the cached table from their
+storage manager (typical in
+:mod:`everos.infra.persistence.lancedb.repos`).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import datetime as dt
+from collections.abc import Sequence
+from typing import Any, ClassVar
+
+from lancedb import AsyncTable
+
+from everos.core.observability.logging import get_logger
+
+from .base import BaseLanceTable
+
+logger = get_logger(__name__)
+
+
+def _q(value: str) -> str:
+    """Escape single quotes for a LanceDB SQL-like ``where`` predicate.
+
+    LanceDB has no parameterised query API; predicates are strings.
+    Doubling the quote (``'`` → ``''``) is the SQL-standard way to keep
+    a literal single quote inside a single-quoted string. everos's PK
+    convention (``<owner_id>_<entry_id>``) never carries quotes — this
+    is defensive.
+    """
+    return value.replace("'", "''")
+
+
+class LanceRepoBase[T: BaseLanceTable]:
+    """Generic CRUD repository for one LanceDB table.
+
+    Subclass and bind to a schema. Two ways to provide the table:
+
+    1. **Explicit (tests / DI)** — pass it to ``__init__``::
+
+           repo = EpisodeRepo(table)
+
+    2. **Lazy hook (production singletons)** — override
+       :meth:`_table_lookup` so the repo can be instantiated as a
+       module-level singleton with no live connection yet::
+
+           class _EpisodeRepo(LanceRepoBase[Episode]):
+               schema = Episode
+
+               async def _table_lookup(self):
+                   from everos.infra.persistence.lancedb.lancedb_manager import (
+                       get_table,
+                   )
+                   return await get_table(self.schema.TABLE_NAME, self.schema)
+
+           episode_repo = _EpisodeRepo()
+           await episode_repo.add([Episode(text=..., vector=[...])])
+
+    The LanceDB table name lives on the schema (``BaseLanceTable.TABLE_NAME``)
+    so every LanceDB-side metadatum — column shape, table name,
+    vector dim, BM25 index spec — sits in one place. ``table_name``
+    here is a thin pass-through; subclasses normally do **not**
+    override it.
+
+    Write paths (``add`` / ``upsert`` / ``delete`` / ``delete_by_md_path``)
+    are serialised by a per-``table_name`` :class:`asyncio.Lock`. LanceDB's
+    ``merge_insert`` is a read-modify-write at the storage layer with no
+    application-visible OCC contract — two concurrent calls against the
+    same table can race on the version manifest and lose updates even
+    when the row sets are disjoint (observed: cascade worker
+    ``asyncio.gather`` over a batch of ``user_profile`` rows where one
+    write disappears). Serialising on the table name closes that window;
+    reads stay unlocked so search QPS is not impacted by writers.
+
+    Locks live in a class-level dict keyed by table name and are never
+    evicted (mirrors :mod:`everos.memory.strategies._partition_locks`
+    on bpo-28427 — a lock with pending waiters must outlive any dict
+    entry that points to it).
+    """
+
+    schema: type[T]
+
+    _table_locks: ClassVar[dict[str, asyncio.Lock]] = {}
+    """Per-table-name write lock pool (process-wide, lazily populated)."""
+
+    @property
+    def table_name(self) -> str:
+        """LanceDB table name, resolved from :attr:`schema.TABLE_NAME`."""
+        return self.schema.TABLE_NAME
+
+    @classmethod
+    def _write_lock(cls, table_name: str) -> asyncio.Lock:
+        """Return the write lock for ``table_name``; create on first use.
+
+        ``dict.setdefault`` is atomic under single-threaded asyncio (no
+        ``await`` between check and insert), so no meta-lock is needed.
+        """
+        return cls._table_locks.setdefault(table_name, asyncio.Lock())
+
+    @classmethod
+    def _reset_locks_for_tests(cls) -> None:
+        """Test-only: drop the write-lock pool.
+
+        ``asyncio.Lock`` binds to the current event loop on first
+        ``acquire()``; pytest-asyncio creates a fresh loop per test, so
+        a module-level lock surviving across tests fails with "bound to
+        a different event loop". The production cascade worker runs on
+        one loop forever and does not need this hook. Mirrors
+        :func:`everos.memory.strategies._partition_locks._reset_for_tests`.
+        """
+        cls._table_locks.clear()
+
+    def __init__(self, table: AsyncTable | None = None) -> None:
+        """Bind to a table directly; if ``None``, defer to ``_table_lookup``."""
+        self._table_override = table
+
+    async def _table_lookup(self) -> AsyncTable:
+        """Resolve the table on first use. Override in subclass.
+
+        ``LanceRepoBase`` itself has no idea where the runtime singleton
+        lives. The default raises so a missing override is loud rather
+        than silently broken.
+        """
+        raise NotImplementedError(
+            f"{type(self).__name__}: pass table= to __init__ "
+            "or override _table_lookup() to wire the storage manager."
+        )
+
+    async def _table(self) -> AsyncTable:
+        if self._table_override is not None:
+            return self._table_override
+        return await self._table_lookup()
+
+    # ── Create ─────────────────────────────────────────────────────────────
+
+    async def add(self, records: Sequence[T]) -> None:
+        """Insert one or more records."""
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            await table.add(list(records))
+
+    # ── Upsert ─────────────────────────────────────────────────────────────
+
+    async def upsert(
+        self,
+        records: Sequence[T],
+        *,
+        by: str = "id",
+    ) -> None:
+        """Upsert records keyed by ``by`` (PK column, default ``"id"``).
+
+        Wraps LanceDB's ``merge_insert(on=...)`` fluent builder with the
+        equivalent of ``INSERT ... ON CONFLICT(by) DO UPDATE`` — matching
+        rows are replaced wholesale, non-matching rows inserted.
+
+        Cascade uses this when reconciling md → LanceDB: an entry seen
+        for the first time inserts; an entry that was edited in md
+        updates its existing row.
+        """
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            await (
+                table.merge_insert(by)
+                .when_matched_update_all()
+                .when_not_matched_insert_all()
+                .execute(list(records))
+            )
+
+    # ── Maintenance ────────────────────────────────────────────────────────
+
+    async def optimize(self, *, cleanup_older_than: dt.timedelta | None = None) -> None:
+        """Compact fragments + merge new data into the FTS / vector indexes.
+
+        LanceDB's ``merge_insert`` writes new data into a fresh fragment.
+        The FTS (BM25) index built by :meth:`ensure_fts_indexes` only
+        covers fragments visible at index-build time, so rows written
+        after the initial build can become **invisible to BM25 queries**
+        until ``optimize()`` runs and merges those fragments into the
+        index segment that the query engine reads.
+
+        Symptom this guards against (verified on LoCoMo conv0): after
+        steady-state cascade ingest, ``nearest_to_text("any_common_word")``
+        returns 0 hits even though the column literally contains the
+        token in 100% of rows — the new fragments simply hadn't been
+        indexed.
+
+        Cascade triggers this through a per-kind throttle + trailing
+        edge scheduler (``CascadeWorker._schedule_optimize``): at most
+        one run per ~1s window per kind, decoupled from the drain
+        loop, with a 60s heartbeat sweep as a safety net. Cost is
+        O(N) data-rewrite per optimized fragment; the throttle is how
+        we cap it under sustained write pressure.
+
+        Args:
+            cleanup_older_than: When set, also prune (physically delete)
+                files belonging to dataset versions older than this
+                interval. ``None`` (default) compacts only — historical
+                manifests, replaced data fragments, and stale index
+                UUID files are kept on disk forever, which inflates the
+                file count (and FD usage at scan time) without bound.
+                Cascade passes a non-None value on a slower beat
+                (``CascadeWorker._optimize_prune_interval``) so the
+                hot drain path stays cheap. Note: this does *not*
+                shrink **active** index internals (FTS ``part_N`` count
+                or vector index UUID count) — those only collapse via
+                ``drop_index + create_index``, which is not done here.
+        """
+        table = await self._table()
+        await table.optimize(cleanup_older_than=cleanup_older_than)
+
+    async def rebuild_indexes(self) -> None:
+        """Drop and re-create every index on this table.
+
+        **Why this exists** — workaround for an upstream Python API gap:
+
+        Lance's Rust ``OptimizeOptions`` has a ``num_indices_to_merge``
+        knob (default 1) that bounds the number of active index UUIDs
+        per column. With ``Some(1)``, every ``optimize_indices()`` call
+        merges its delta into the base — active UUID count stays at 1.
+
+        Two problems block us from using it from the application layer:
+
+        1. ``lancedb.AsyncTable.optimize()`` does **not expose** this
+           parameter (verified on lancedb main 2026-05-28). It forwards
+           only ``cleanup_since_ms`` and ``delete_unverified`` to Rust.
+        2. Even calling Lance directly via ``pylance``, the merge
+           behaviour itself is buggy on ``lance crate 4.0`` (what
+           lancedb 0.30.2 embeds) — ``num_indices_to_merge=1`` does
+           nothing. Fix landed in ``lance 7.x``, but ``pylance 7.x``
+           can not collapse indexes on a ``lance 4.0``-format dataset
+           (verified by experiment).
+
+        So in our current stack there is **no application-level path**
+        to bound active index UUID growth. ``optimize()`` keeps
+        accumulating one new UUID (vector) / one new ``part_N`` (FTS)
+        per call.
+
+        This method is the workaround: drop every existing index and
+        rebuild from the schema's ``ensure_fts_indexes`` contract. The
+        rebuild is **O(N) full retrain** but cheap in practice (~0.3s
+        for 50k rows × 2 FTS columns on local SSD), and during the
+        window LanceDB transparently falls back to brute-force scan so
+        queries and writes stay available.
+
+        **Cadence** — :class:`CascadeWorker` runs this on a slow loop
+        (default 12h per kind). Frequency is bounded by the rebuild
+        cost, not by correctness — even daily is fine functionally;
+        12h is a conservative pick to keep file/UUID counts well below
+        any FD ceiling under steady-state ingest.
+
+        **When to remove** — once lancedb exposes ``num_indices_to_merge``
+        on the async Python API **and** the embedded ``lance crate``
+        ships the working merge implementation, delete this method and
+        switch to ``optimize(num_indices_to_merge=1)`` in the regular
+        ``optimize()`` path. Tracking issues / context:
+
+        - https://github.com/lancedb/lancedb/issues/2193
+        - https://github.com/lancedb/lancedb/issues/3177
+        - https://github.com/lance-format/lance/pull/6711 (partial fix
+          in lance v7.0.0)
+        - https://docs.rs/lancedb/latest/lancedb/table/struct.OptimizeOptions.html
+        """
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            for idx in await table.list_indices():
+                await table.drop_index(idx.name)
+            await self.schema.ensure_fts_indexes(table)
+
+    # ── Read ───────────────────────────────────────────────────────────────
+
+    async def count(self) -> int:
+        """Total row count."""
+        table = await self._table()
+        return await table.count_rows()
+
+    async def get_by_id(
+        self,
+        id_value: str,
+        *,
+        id_field: str = "id",
+    ) -> T | None:
+        """Fetch one row by scalar PK; ``None`` if missing.
+
+        Uses LanceDB scalar filter ``<id_field> = '<id_value>'``. Single
+        quotes in ``id_value`` are doubled to avoid breaking the SQL-like
+        predicate; everos's PK convention is ``<owner_id>_<entry_id>``
+        which never contains quotes, so the escape is defensive.
+        """
+        table = await self._table()
+        rows = (
+            await table.query()
+            .where(f"{id_field} = '{_q(id_value)}'")
+            .limit(1)
+            .to_list()
+        )
+        if not rows:
+            return None
+        return self.schema.model_validate(rows[0])
+
+    async def find_where(
+        self,
+        where: str,
+        *,
+        limit: int = 100,
+    ) -> list[T]:
+        """Scalar query returning *typed* schema instances.
+
+        Like :meth:`search` but returns ``list[T]`` rather than raw
+        LanceDB row dicts. No vector ANN; pure scalar filter only.
+        Use :meth:`search` when you need ``_distance`` or want to mix
+        ANN with filters.
+        """
+        table = await self._table()
+        rows = await table.query().where(where).limit(limit).to_list()
+        return [self.schema.model_validate(r) for r in rows]
+
+    async def find_one_where(self, where: str) -> T | None:
+        """Single-row variant of :meth:`find_where` (``None`` if no match)."""
+        rows = await self.find_where(where, limit=1)
+        return rows[0] if rows else None
+
+    async def find_where_paginated(
+        self,
+        where: str,
+        *,
+        sort_by: str,
+        descending: bool = True,
+        page: int = 1,
+        page_size: int = 20,
+        max_fetch: int = 20000,
+    ) -> tuple[list[T], int]:
+        """Paginated scalar query with in-memory sort.
+
+        LanceDB has no native ``ORDER BY``. The chassis fetches up to
+        ``max_fetch`` rows matching ``where``, sorts the resulting Arrow
+        table by ``sort_by``, then slices ``page`` × ``page_size``. The
+        *true* row count of the predicate is returned alongside the
+        page so callers can render pagination controls without a second
+        query.
+
+        Args:
+            where: SQL-like scalar predicate. Required (no implicit
+                full-table scan from ``find_where_paginated``).
+            sort_by: Column name to sort the result set by.
+            descending: ``True`` (default) → newest first; ``False`` →
+                ascending.
+            page: 1-indexed page number.
+            page_size: Rows per page.
+            max_fetch: Cap on rows pulled before the in-memory sort.
+                When the predicate matches more rows than this cap the
+                page is sorted over an *arbitrary* prefix and the page
+                contents are only approximately correct — the chassis
+                emits a warning so the caller learns about the
+                truncation.
+
+        Returns:
+            ``(rows, total)`` — ``rows`` is the typed page,
+            ``total`` is ``count_rows(filter=where)`` (the predicate's
+            true match count, regardless of ``max_fetch``).
+        """
+        table = await self._table()
+        total = await table.count_rows(filter=where)
+        if total > max_fetch:
+            logger.warning(
+                "find_where_paginated truncated",
+                extra={
+                    "table": self.table_name,
+                    "where": where,
+                    "total": total,
+                    "max_fetch": max_fetch,
+                },
+            )
+        arrow_tbl = await table.query().where(where).limit(max_fetch).to_arrow()
+        order = "descending" if descending else "ascending"
+        arrow_tbl = arrow_tbl.sort_by([(sort_by, order)])
+        offset = (page - 1) * page_size
+        page_rows = arrow_tbl.slice(offset, page_size)
+        return (
+            [self.schema.model_validate(r) for r in page_rows.to_pylist()],
+            total,
+        )
+
+    async def find_by_owner(
+        self,
+        owner_id: str,
+        *,
+        limit: int = 100,
+    ) -> list[T]:
+        """Fetch rows by ``owner_id`` (5 business tables share this column)."""
+        return await self.find_where(
+            f"owner_id = '{_q(owner_id)}'",
+            limit=limit,
+        )
+
+    async def find_by_md_path(self, md_path: str) -> T | None:
+        """Reverse-lookup from md path (cascade maps md edit → row)."""
+        return await self.find_one_where(f"md_path = '{_q(md_path)}'")
+
+    async def search(
+        self,
+        *,
+        vector: Sequence[float] | None = None,
+        where: str | None = None,
+        limit: int = 10,
+    ) -> list[dict[str, Any]]:
+        """Hybrid search: optional vector ANN + scalar SQL-like predicate.
+
+        Args:
+            vector: Embedding to find nearest rows for; ``None`` skips ANN.
+            where: SQL-like predicate (e.g. ``"tags = 'meeting'"``).
+            limit: Max rows.
+
+        Returns:
+            List of row dicts (LanceDB native shape — fields depend on
+            ``schema``; ``_distance`` added when ``vector`` is given).
+        """
+        table = await self._table()
+        q = table.query()
+        if vector is not None:
+            q = q.nearest_to(list(vector))
+        if where is not None:
+            q = q.where(where)
+        return await q.limit(limit).to_list()
+
+    # ── Delete ─────────────────────────────────────────────────────────────
+
+    async def delete(self, predicate: str) -> None:
+        """Delete rows matching a SQL-like predicate."""
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            await table.delete(predicate)
+
+    async def delete_by_md_path(self, md_path: str) -> int:
+        """Delete every row whose ``md_path`` matches; return rows deleted.
+
+        Cascade handler calls this when an md file is removed on disk
+        (or when reverse-reconcile discovers an orphaned LanceDB row).
+        Single quotes in ``md_path`` are doubled defensively.
+        """
+        table = await self._table()
+        async with self._write_lock(self.table_name):
+            result = await table.delete(f"md_path = '{_q(md_path)}'")
+        return int(result.num_deleted_rows)
+
+
+class LanceDailyLogRepoBase[T: BaseLanceTable](LanceRepoBase[T]):
+    """LanceRepoBase + queries unique to daily-log tables.
+
+    Daily-log tables (``episode`` / ``atomic_fact`` / ``foresight`` /
+    ``agent_case``) share a fixed schema slice: ``entry_id`` (md seq
+    id), ``session_id`` (conversation scope), and ``parent_type`` /
+    ``parent_id`` (record lineage). The queries below compose those
+    columns; ``agent_skill`` is *not* a daily-log (it is a named
+    single-file entity) and uses :class:`LanceRepoBase` directly.
+    """
+
+    async def find_by_owner_entry(
+        self,
+        owner_id: str,
+        entry_id: str,
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> T | None:
+        """Single point-query by ``(app, project, owner_id, entry_id)``.
+
+        ``entry_id`` is only unique within a (app, project, owner) scope —
+        the same ``ac_<date>_<seq>`` recurs in another space — so the
+        scope segments are part of the predicate to avoid a cross-space hit.
+        """
+        return await self.find_one_where(
+            f"owner_id = '{_q(owner_id)}' AND entry_id = '{_q(entry_id)}' "
+            f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'"
+        )
+
+    async def find_by_owner_entries(
+        self,
+        owner_id: str,
+        entry_ids: Sequence[str],
+        *,
+        app_id: str = "default",
+        project_id: str = "default",
+    ) -> list[T]:
+        """Bulk point-query by ``(app, project, owner_id, entry_id IN ...)``.
+
+        Empty ``entry_ids`` short-circuits to ``[]`` rather than emit a
+        ``WHERE entry_id IN ()`` predicate (LanceDB rejects empty
+        tuples). The query's ``limit`` is bound to ``len(entry_ids)``
+        because at most one row per id can exist under one (app, project,
+        owner) scope.
+        """
+        if not entry_ids:
+            return []
+        quoted = ", ".join(f"'{_q(eid)}'" for eid in entry_ids)
+        return await self.find_where(
+            f"owner_id = '{_q(owner_id)}' AND entry_id IN ({quoted}) "
+            f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'",
+            limit=len(entry_ids),
+        )
+
+    async def find_by_session(
+        self,
+        owner_id: str,
+        session_id: str,
+        *,
+        limit: int = 100,
+    ) -> list[T]:
+        """Every row in one conversation ``session_id`` under ``owner_id``."""
+        return await self.find_where(
+            f"owner_id = '{_q(owner_id)}' AND session_id = '{_q(session_id)}'",
+            limit=limit,
+        )
+
+    async def find_by_parent(
+        self,
+        parent_type: str,
+        parent_id: str,
+        *,
+        limit: int = 100,
+    ) -> list[T]:
+        """Every row whose parent matches ``(parent_type, parent_id)``."""
+        return await self.find_where(
+            f"parent_type = '{_q(parent_type)}' AND parent_id = '{_q(parent_id)}'",
+            limit=limit,
+        )
--- a/src/everos/core/persistence/locking.py
+++ b/src/everos/core/persistence/locking.py
@ -0,0 +1,76 @@
+"""Process-wide exclusive lock on a memory-root.
+
+Uses ``fcntl.flock`` (POSIX advisory locking, available on Linux + macOS;
+Windows is not supported — see project README on platform scope). The
+public surface is an :func:`contextlib.asynccontextmanager` so callers
+use ``async with memory_root_lock(mr):``; the underlying syscalls have
+no async equivalent so they run in a worker thread via
+:func:`anyio.to_thread.run_sync`.
+"""
+
+from __future__ import annotations
+
+import fcntl
+import os
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+
+import anyio
+
+from .memory_root import MemoryRoot
+
+
+class LockError(RuntimeError):
+    """Raised when the memory-root lock cannot be acquired in non-blocking mode."""
+
+
+@asynccontextmanager
+async def memory_root_lock(
+    memory_root: MemoryRoot,
+    *,
+    blocking: bool = True,
+) -> AsyncIterator[None]:
+    """Acquire an exclusive process lock on the memory-root.
+
+    Args:
+        memory_root: The memory-root to lock. The lock anchor file
+            (``<root>/.lock``) is created on first use.
+        blocking: If ``True`` (default), wait until the lock is free. If
+            ``False``, raise :class:`LockError` immediately when another
+            process holds it.
+
+    Raises:
+        LockError: When ``blocking=False`` and the lock is already held.
+    """
+    await anyio.Path(memory_root.root).mkdir(parents=True, exist_ok=True)
+    lock_path = memory_root.lock_file
+
+    # Open the anchor file (create on first use). The fd, not the path, is
+    # what fcntl operates on. ``os.open`` is microsecond-fast but offloaded
+    # for consistency with the rest of the lock acquisition flow.
+    fd = await anyio.to_thread.run_sync(
+        lambda: os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o644)
+    )
+
+    flags = fcntl.LOCK_EX
+    if not blocking:
+        flags |= fcntl.LOCK_NB
+
+    try:
+        await anyio.to_thread.run_sync(fcntl.flock, fd, flags)
+    except BlockingIOError as exc:
+        await anyio.to_thread.run_sync(os.close, fd)
+        raise LockError(
+            f"another process already holds the memory-root lock at {lock_path}"
+        ) from exc
+
+    # Lock acquired — release + close strictly on exit. The BlockingIOError
+    # path above already cleaned up its fd, so it must NOT enter this
+    # finally block (otherwise we'd double-close).
+    try:
+        yield
+    finally:
+        try:
+            await anyio.to_thread.run_sync(fcntl.flock, fd, fcntl.LOCK_UN)
+        finally:
+            await anyio.to_thread.run_sync(os.close, fd)
--- a/src/everos/core/persistence/markdown/init.py
+++ b/src/everos/core/persistence/markdown/init.py
@ -0,0 +1,62 @@
+"""Markdown file IO toolkit.
+
+Atomic write + YAML frontmatter parse/dump + entry marker parse +
+audit-form structured-entry parsing. Knows nothing about business
+models (no MemCell / Episode); the :class:`Entry` here is a
+*marker-delimited* span within a markdown body, not a business record.
+
+External usage (IO + parse):
+    from everos.core.persistence.markdown import (
+        Entry, EntryId, StructuredEntry,
+        MarkdownReader, MarkdownWriter, ParsedMarkdown,
+        parse_frontmatter, dump_frontmatter,
+        split_entries, find_entry,
+        parse_structured_entry, render_structured_entry,
+    )
+
+External usage (frontmatter schema chassis):
+    from everos.core.persistence.markdown import (
+        BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
+        DailyLogPathMixin, SkillPathMixin, ProfilePathMixin,
+    )
+"""
+
+from .entries import Entry as Entry
+from .entries import EntryId as EntryId
+from .entries import StructuredEntry as StructuredEntry
+from .entries import find_entry as find_entry
+from .entries import parse_structured_entry as parse_structured_entry
+from .entries import render_structured_entry as render_structured_entry
+from .entries import split_entries as split_entries
+from .frontmatter import AgentScopedFrontmatter as AgentScopedFrontmatter
+from .frontmatter import BaseFrontmatter as BaseFrontmatter
+from .frontmatter import DailyLogPathMixin as DailyLogPathMixin
+from .frontmatter import ProfilePathMixin as ProfilePathMixin
+from .frontmatter import SkillPathMixin as SkillPathMixin
+from .frontmatter import UserScopedFrontmatter as UserScopedFrontmatter
+from .frontmatter import dump_frontmatter as dump_frontmatter
+from .frontmatter import parse_frontmatter as parse_frontmatter
+from .parsed import ParsedMarkdown as ParsedMarkdown
+from .reader import MarkdownReader as MarkdownReader
+from .writer import MarkdownWriter as MarkdownWriter
+
+__all__ = [
+    "AgentScopedFrontmatter",
+    "BaseFrontmatter",
+    "DailyLogPathMixin",
+    "Entry",
+    "EntryId",
+    "MarkdownReader",
+    "MarkdownWriter",
+    "ParsedMarkdown",
+    "ProfilePathMixin",
+    "SkillPathMixin",
+    "StructuredEntry",
+    "UserScopedFrontmatter",
+    "dump_frontmatter",
+    "find_entry",
+    "parse_frontmatter",
+    "parse_structured_entry",
+    "render_structured_entry",
+    "split_entries",
+]
--- a/src/everos/core/persistence/markdown/entries.py
+++ b/src/everos/core/persistence/markdown/entries.py
@ -0,0 +1,368 @@
+"""Markdown entries — id format, marker spans, and audit-form parsing.
+
+Three closely-related entry concepts live together here so a reader
+sees the whole entry surface in one file:
+
+1. :class:`EntryId` — the ``<prefix>_<YYYYMMDD>_<NNNN>`` structured id
+   stamped into each daily-log entry's open / close markers. Carries
+   the prefix declared by the frontmatter schema, the date bucket, and
+   the in-file zero-padded sequence.
+
+2. :class:`Entry` — a marker-delimited span inside a markdown body::
+
+       <!-- entry:abc123 -->
+       ...content...
+       <!-- /entry:abc123 -->
+
+   :func:`split_entries` and :func:`find_entry` locate these spans
+   without interpreting the inner content. Higher layers (writers,
+   cascade) parse it per record type.
+
+3. :class:`StructuredEntry` — :class:`Entry` extended with the parsed
+   audit-form body fields (header / inline / sections). Built either
+   from a raw body string via :func:`parse_structured_entry` or from
+   an existing :class:`Entry` via :meth:`Entry.as_structured`.
+
+Audit-form layout::
+
+    ## <header>                ← optional H2 (usually entry id, for grep)
+
+    **key**: value             ← inline fields, one per line
+    **key2**: value2
+
+    ### Section Title          ← section fields: H3 + free-form text
+    body content...
+
+    ### Another Section
+    more content...
+
+The audit chassis is intentionally **type-agnostic** — every field
+round-trips as a string. Inline values are stringified on render
+(lists become ``[a, b, c]``, scalars use ``str()``); on parse
+everything is the raw text after the colon. Section titles are kept
+verbatim. This keeps parsing tolerant of stray fields, wrapped
+strings, and manually-typed timestamps; the strong-typed model lives
+in business writers + the SQLite/LanceDB indexes.
+
+Cross-user uniqueness is handled at the database layer via a composite
+``<user_id>_<entry_id>`` field; it is *not* encoded into the
+:class:`EntryId` string itself.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+import re
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+from typing import Self
+
+# ── EntryId — structured id for marker stamping ─────────────────────────
+
+_DATE_FMT = "%Y%m%d"
+_SEQ_DIGITS = 8
+"""Minimum zero-padding for the in-file seq.
+
+8 digits keeps lexicographic order == numeric order up to 10**8
+entries per file (per user, per day). ``format()`` is "at least 8" —
+larger seqs emit more digits without truncation. ``parse`` is
+permissive: shorter (legacy 4-digit) and longer seq strings both
+parse cleanly; format normalises to >= 8 digits on round-trip.
+"""
+
+
+@dataclass(frozen=True, slots=True)
+class EntryId:
+    """Parsed components of an entry id (``<prefix>_<YYYYMMDD>_<NNNN>``)."""
+
+    prefix: str
+    date: _dt.date
+    seq: int
+
+    def format(self) -> str:
+        """Render as ``<prefix>_<YYYYMMDD>_<NNNN>``."""
+        return (
+            f"{self.prefix}_{self.date.strftime(_DATE_FMT)}_{self.seq:0{_SEQ_DIGITS}d}"
+        )
+
+    def __str__(self) -> str:  # noqa: D401
+        return self.format()
+
+    @classmethod
+    def parse(cls, s: str) -> Self:
+        """Parse ``<prefix>_<YYYYMMDD>_<NNNN>``.
+
+        Uses ``rsplit("_", 2)`` so a multi-segment prefix (rare, but
+        possible) is preserved as-is.
+        """
+        parts = s.rsplit("_", 2)
+        if len(parts) != 3:
+            raise ValueError(f"invalid entry id format: {s!r}")
+        prefix, date_str, seq_str = parts
+        if not prefix:
+            raise ValueError(f"empty prefix in entry id: {s!r}")
+        try:
+            d = _dt.datetime.strptime(date_str, _DATE_FMT).date()
+        except ValueError as exc:
+            raise ValueError(f"invalid date in entry id: {s!r}") from exc
+        try:
+            seq = int(seq_str)
+        except ValueError as exc:
+            raise ValueError(f"invalid seq in entry id: {s!r}") from exc
+        if seq < 0:
+            raise ValueError(f"negative seq in entry id: {s!r}")
+        return cls(prefix=prefix, date=d, seq=seq)
+
+    @classmethod
+    def next_for(cls, prefix: str, date: _dt.date, current_count: int) -> Self:
+        """Build the id for the next entry given the file's current count.
+
+        ``current_count`` is the value of ``frontmatter.entry_count``
+        *before* this append. The new id gets ``seq = current_count + 1``.
+        """
+        if current_count < 0:
+            raise ValueError(f"current_count must be >= 0, got {current_count}")
+        return cls(prefix=prefix, date=date, seq=current_count + 1)
+
+
+# ── Entry — marker-delimited span inside a body ─────────────────────────
+
+# Filename / URL-safe id alphabet for the marker.
+_ID_PATTERN = r"[A-Za-z0-9_-]+"
+_OPEN_RE = re.compile(rf"<!-- entry:({_ID_PATTERN}) -->")
+
+
+@dataclass(frozen=True)
+class Entry:
+    """One marker-delimited entry within a markdown body.
+
+    Attributes:
+        id: Value between ``entry:`` and ``-->`` in the open marker.
+        body: Content between the open and close markers, with one leading
+            and one trailing newline removed (typical formatter output).
+        start: Offset of the opening ``<!-- entry:id -->`` in the source body.
+        end: Offset just past the closing ``<!-- /entry:id -->`` in the source.
+    """
+
+    id: str
+    body: str
+    start: int
+    end: int
+
+    def as_structured(self) -> StructuredEntry:
+        """Parse my body as audit-form and return a :class:`StructuredEntry`.
+
+        The id / body / start / end fields are preserved; the parsed
+        ``header`` / ``inline`` / ``sections`` are added on top.
+        """
+        return parse_structured_entry(self.body, _origin=self)
+
+
+def split_entries(body: str) -> list[Entry]:
+    """Scan ``body`` and return every entry in order.
+
+    Unmatched / unterminated open markers stop the scan at the first
+    such marker — partial entries are not returned. Callers needing
+    strict validation should layer a dedicated check on top.
+    """
+    entries: list[Entry] = []
+    pos = 0
+    while True:
+        open_match = _OPEN_RE.search(body, pos)
+        if open_match is None:
+            break
+        entry_id = open_match.group(1)
+        close_match = _close_re_for(entry_id).search(body, open_match.end())
+        if close_match is None:
+            # Unterminated entry — abort further scanning.
+            break
+        entries.append(
+            Entry(
+                id=entry_id,
+                body=_strip_one_newline(body[open_match.end() : close_match.start()]),
+                start=open_match.start(),
+                end=close_match.end(),
+            )
+        )
+        pos = close_match.end()
+    return entries
+
+
+def find_entry(body: str, entry_id: str) -> Entry | None:
+    """Find the first entry with ``entry_id``, or ``None``."""
+    open_re = re.compile(rf"<!-- entry:{re.escape(entry_id)} -->")
+    open_match = open_re.search(body)
+    if open_match is None:
+        return None
+    close_match = _close_re_for(entry_id).search(body, open_match.end())
+    if close_match is None:
+        return None
+    return Entry(
+        id=entry_id,
+        body=_strip_one_newline(body[open_match.end() : close_match.start()]),
+        start=open_match.start(),
+        end=close_match.end(),
+    )
+
+
+def _close_re_for(entry_id: str) -> re.Pattern[str]:
+    """Build the close-marker regex for a specific id."""
+    return re.compile(rf"<!-- /entry:{re.escape(entry_id)} -->")
+
+
+def _strip_one_newline(text: str) -> str:
+    """Strip one leading and one trailing newline (typical formatter padding)."""
+    if text.startswith("\r\n"):
+        text = text[2:]
+    elif text.startswith("\n"):
+        text = text[1:]
+    if text.endswith("\r\n"):
+        text = text[:-2]
+    elif text.endswith("\n"):
+        text = text[:-1]
+    return text
+
+
+# ── StructuredEntry — Entry + parsed audit-form fields ──────────────────
+
+# H2 line: ``## <header>``.
+_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
+# Inline field: ``**key**: value``. Anchored to line start so a stray
+# ``**emphasis**`` mid-paragraph isn't mistaken for a field.
+_INLINE_RE = re.compile(
+    r"^\*\*(?P<key>[^*\n]+?)\*\*:\s*(?P<value>.*?)\s*$",
+    re.MULTILINE,
+)
+# H3 line: ``### Title``.
+_H3_RE = re.compile(r"^###\s+(.+?)\s*$", re.MULTILINE)
+
+
+@dataclass(frozen=True)
+class StructuredEntry(Entry):
+    """:class:`Entry` whose body has been parsed as audit-form data.
+
+    Inherits ``id`` / ``body`` / ``start`` / ``end`` from :class:`Entry`
+    (zeroed when built from a raw body string with no marker context)
+    and adds three parsed views of the body: the optional H2 header,
+    the inline ``**key**: value`` map, and the ``### Title`` sections.
+
+    Audit-form values are strings only; type coercion is the caller's
+    job (a strong-typed model lives in the writer / index).
+    """
+
+    header: str | None = None
+    inline: dict[str, str] = field(default_factory=dict)
+    sections: dict[str, str] = field(default_factory=dict)
+
+
+def render_structured_entry(
+    *,
+    header: str | None = None,
+    inline: Mapping[str, object] | None = None,
+    sections: Mapping[str, str] | None = None,
+) -> str:
+    """Render an audit-form entry body.
+
+    Args:
+        header: Optional H2 line at the top (typically the entry id —
+            redundant with the marker but useful for plain-text grep).
+        inline: ``{key: value}`` rendered as ``**key**: value``. Values
+            are stringified: ``list``/``tuple`` become ``[a, b, c]``;
+            ``None`` becomes the empty string; everything else uses
+            ``str()``.
+        sections: ``{title: body}`` rendered as ``### Title`` plus the
+            body text. Title is verbatim; body's trailing whitespace is
+            stripped.
+
+    Returns:
+        The rendered string, no trailing newline (the caller — typically
+        :meth:`MarkdownWriter.append_entry` — handles markers + newlines).
+    """
+    inline = inline or {}
+    sections = sections or {}
+    lines: list[str] = []
+
+    if header:
+        lines.append(f"## {header}")
+        lines.append("")
+
+    for key, value in inline.items():
+        lines.append(f"**{key}**: {_render_value(value)}")
+
+    for title, body in sections.items():
+        lines.append("")
+        lines.append(f"### {title}")
+        lines.append(body.rstrip())
+
+    return "\n".join(lines)
+
+
+def parse_structured_entry(
+    body: str, *, _origin: Entry | None = None
+) -> StructuredEntry:
+    """Parse an audit-form entry body. Strings only — no type coercion.
+
+    Tolerant of:
+
+    - missing H2 (``header`` will be ``None``)
+    - inline fields appearing before, between or after sections
+      (only matches before the first H3 are taken as the inline block)
+    - extra whitespace and stray lines (silently kept inside the
+      enclosing section's body)
+
+    When called via :meth:`Entry.as_structured`, the ``_origin`` Entry
+    contributes its ``id`` / ``start`` / ``end``; otherwise those fall
+    back to ``""`` / ``0`` / ``len(body)``.
+
+    Returns:
+        :class:`StructuredEntry` with everything as strings.
+    """
+    text = body.strip("\n")
+
+    # Split on H3 lines.
+    parts = _H3_RE.split(text)
+    head = parts[0]
+    sections_dict: dict[str, str] = {}
+    for i in range(1, len(parts), 2):
+        title = parts[i].strip()
+        content = parts[i + 1] if i + 1 < len(parts) else ""
+        sections_dict[title] = content.strip("\n").rstrip()
+
+    header: str | None = None
+    h2 = _H2_RE.search(head)
+    if h2:
+        header = h2.group(1).strip()
+
+    inline_dict: dict[str, str] = {
+        m.group("key").strip(): m.group("value").strip()
+        for m in _INLINE_RE.finditer(head)
+    }
+
+    if _origin is not None:
+        return StructuredEntry(
+            id=_origin.id,
+            body=_origin.body,
+            start=_origin.start,
+            end=_origin.end,
+            header=header,
+            inline=inline_dict,
+            sections=sections_dict,
+        )
+    return StructuredEntry(
+        id="",
+        body=body,
+        start=0,
+        end=len(body),
+        header=header,
+        inline=inline_dict,
+        sections=sections_dict,
+    )
+
+
+def _render_value(value: object) -> str:
+    """Stringify an inline value the audit-friendly way."""
+    if value is None:
+        return ""
+    if isinstance(value, list | tuple):
+        return "[" + ", ".join(str(item) for item in value) + "]"
+    return str(value)
--- a/src/everos/core/persistence/markdown/frontmatter.py
+++ b/src/everos/core/persistence/markdown/frontmatter.py
@ -0,0 +1,300 @@
+"""Frontmatter — YAML block parse / dump + L1 schema chassis.
+
+Frontmatter is the leading ``---``-delimited YAML block at the top of
+a markdown document::
+
+    ---
+    title: Hello
+    tags: [a, b]
+    ---
+    # Body starts here
+
+Two complementary surfaces live here:
+
+1. :func:`parse_frontmatter` / :func:`dump_frontmatter` — schema-free
+   YAML helpers (``yaml.safe_load`` / ``yaml.safe_dump``,
+   ``sort_keys=False`` so caller-controlled key order is preserved).
+
+2. The L1 chassis classes — :class:`BaseFrontmatter`,
+   :class:`UserScopedFrontmatter`, :class:`AgentScopedFrontmatter` —
+   which fix the *absolute-readonly* fields (``id`` / ``type`` /
+   ``schema_version``) plus scope (``user_id`` / ``agent_id`` +
+   ``track``). Every business frontmatter schema in
+   ``infra/persistence/markdown/mds/`` subclasses one of these.
+
+Concrete business schemas (``UserMemcellDailyFrontmatter``,
+``SkillFrontmatter``, …) live in ``infra``; they add per-record
+business fields plus the path-resolution metadata daily-log writers
+need (``ENTRY_ID_PREFIX`` / ``DIR_NAME`` / ``FILE_PREFIX``).
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, ClassVar, Literal
+
+import yaml
+from pydantic import BaseModel, ConfigDict
+
+# ── YAML helpers ────────────────────────────────────────────────────────
+
+_DELIM = "---"
+
+
+def parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
+    """Parse a leading ``---\\n...\\n---\\n`` YAML block.
+
+    Returns:
+        (meta, remainder): ``meta`` is the parsed YAML mapping (empty dict
+        if no frontmatter present, malformed, or non-mapping). ``remainder``
+        is everything after the closing delimiter line — including the body's
+        leading content as-is.
+
+    Notes:
+        - If the document does not start with ``---``, returns ``({}, text)``
+          unchanged.
+        - If a closing ``---`` line is not found, returns ``({}, text)``.
+        - If the YAML block is empty (``---\\n---\\n``), returns
+          ``({}, remainder)``.
+        - If the parsed YAML is not a mapping (e.g. a scalar list), returns
+          ``({}, text)`` — frontmatter must be a mapping.
+    """
+    if not text.startswith(_DELIM):
+        return {}, text
+
+    # Skip the opening "---" and the newline that must follow it.
+    rest = text[len(_DELIM) :]
+    if rest.startswith("\r\n"):
+        rest = rest[2:]
+    elif rest.startswith("\n"):
+        rest = rest[1:]
+    else:
+        # Opening "---" not followed by a newline → not a valid frontmatter.
+        return {}, text
+
+    closing_idx = _find_closing_delim(rest)
+    if closing_idx is None:
+        return {}, text
+
+    yaml_block = rest[:closing_idx]
+    remainder = rest[closing_idx + len(_DELIM) :]
+    # Drop the newline that follows the closing delimiter, if any.
+    if remainder.startswith("\r\n"):
+        remainder = remainder[2:]
+    elif remainder.startswith("\n"):
+        remainder = remainder[1:]
+
+    parsed: Any = yaml.safe_load(yaml_block) if yaml_block.strip() else {}
+    if parsed is None:
+        parsed = {}
+    if not isinstance(parsed, dict):
+        return {}, text
+    return parsed, remainder
+
+
+def dump_frontmatter(meta: Mapping[str, Any]) -> str:
+    """Render a mapping as a ``---\\n<yaml>\\n---\\n`` block.
+
+    An empty mapping yields the empty string (no delimiters). The YAML
+    payload preserves caller-supplied key order (``sort_keys=False``).
+    """
+    if not meta:
+        return ""
+    yaml_block = yaml.safe_dump(
+        dict(meta),
+        sort_keys=False,
+        allow_unicode=True,
+        default_flow_style=False,
+    )
+    return f"{_DELIM}\n{yaml_block}{_DELIM}\n"
+
+
+def _find_closing_delim(text: str) -> int | None:
+    """Find the offset of a line that is exactly ``---``.
+
+    A "line" is text between two newlines (or string boundaries).
+    Returns the offset of the first character of the matching line, or
+    ``None`` if no such line exists.
+    """
+    pos = 0
+    while pos < len(text):
+        nl = text.find("\n", pos)
+        line = text[pos:nl] if nl != -1 else text[pos:]
+        if line.rstrip("\r") == _DELIM:
+            return pos
+        if nl == -1:
+            return None
+        pos = nl + 1
+    return None
+
+
+# ── L1 schema chassis ───────────────────────────────────────────────────
+
+
+class BaseFrontmatter(BaseModel):
+    """L1 fields every markdown frontmatter must carry.
+
+    These match the *absolute-readonly* tier in the EverOS Markdown First
+    spec — they identify the record across markdown ↔ LanceDB and must
+    never be rewritten by a human edit.
+
+    Subclasses add scope (``UserScopedFrontmatter`` /
+    ``AgentScopedFrontmatter``) plus per-record business fields.
+    """
+
+    SCOPE_DIR: ClassVar[str] = ""
+    """Top-level directory under the memory-root that holds this kind.
+
+    Scope mixins set this to ``"users"`` / ``"agents"``. Scope-agnostic
+    schemas (rare) leave it empty; consumers that need to resolve a path
+    (writers, layout reverse-lookup) must reject schemas with empty
+    ``SCOPE_DIR``.
+    """
+
+    id: str
+    type: str
+    schema_version: int = 1
+
+    # Permit additional fields so L2 system-managed metadata
+    # (``md_sha256``, ``last_indexed_at``, ``lsn``, …) can ride along on
+    # the same model without forcing every subclass to redeclare them.
+    model_config = ConfigDict(extra="allow")
+
+    @classmethod
+    def path_glob(cls) -> str:
+        """Return an ``fnmatch``-style glob (relative to memory-root)
+        covering every markdown file this schema describes.
+
+        Used by the cascade kind registry — the scanner walks every kind's
+        ``path_glob()`` to enumerate eligible files without hard-coding
+        path patterns in cascade. The schema is the single source of truth
+        for both the writer's path resolution and the scanner's enumeration.
+
+        Subclasses must override — typically by mixing in
+        :class:`DailyLogPathMixin` or :class:`SkillPathMixin` *before* the
+        scope mixin in the MRO so this abstract version is shadowed.
+        """
+        raise NotImplementedError(
+            f"{cls.__name__} must declare path_glob() "
+            f"(mix in DailyLogPathMixin / SkillPathMixin, or override directly)"
+        )
+
+
+class DailyLogPathMixin:
+    """Path strategy for daily-log files.
+
+    Files live at ``<SCOPE_DIR>/<scope_id>/<DIR_NAME>/<FILE_PREFIX>-<YYYY-MM-DD>.md``.
+    Subclasses must inherit a scope mixin (``UserScopedFrontmatter`` /
+    ``AgentScopedFrontmatter``) supplying ``SCOPE_DIR``, and must declare
+    their own ``DIR_NAME`` / ``FILE_PREFIX`` ClassVars.
+
+    Place **this mixin first** so Python's MRO resolves ``path_glob()`` to
+    the mixin's concrete implementation rather than
+    :meth:`BaseFrontmatter.path_glob`'s ``NotImplementedError`` stub::
+
+        class EpisodeDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
+            DIR_NAME: ClassVar[str] = "episodes"
+            FILE_PREFIX: ClassVar[str] = "episode"
+            ...
+    """
+
+    DIR_NAME: ClassVar[str]
+    FILE_PREFIX: ClassVar[str]
+    SCOPE_DIR: ClassVar[str]
+
+    @classmethod
+    def path_glob(cls) -> str:
+        # Leading ``*/*/`` matches the <app>/<project> scope prefix that
+        # precedes every user-visible dir; the scanner's ``root.glob`` is
+        # anchored at root, so the prefix is mandatory (without it nothing
+        # matches), and the watcher's right-anchored ``PurePosixPath.match``
+        # agrees on the same shape.
+        return f"*/*/{cls.SCOPE_DIR}/*/{cls.DIR_NAME}/{cls.FILE_PREFIX}-*.md"
+
+
+class SkillPathMixin:
+    """Path strategy for skill-directory files.
+
+    Each skill lives at ``<SCOPE_DIR>/<scope_id>/<SKILLS_CONTAINER_NAME>/
+    <SKILL_DIR_PREFIX><skill_name>/<SKILL_MAIN_FILENAME>``. The glob covers
+    every skill's main file; sibling ``references/*.md`` and ``scripts/*``
+    are excluded (they ride alongside the main file and the cascade
+    daemon rebuilds the index column by concatenation, see
+    :class:`AgentSkillFrontmatter`'s docstring).
+
+    Place **this mixin first** so MRO resolves ``path_glob()`` here::
+
+        class AgentSkillFrontmatter(SkillPathMixin, AgentScopedFrontmatter):
+            SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
+            SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
+            SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
+            ...
+    """
+
+    SKILLS_CONTAINER_NAME: ClassVar[str]
+    SKILL_DIR_PREFIX: ClassVar[str]
+    SKILL_MAIN_FILENAME: ClassVar[str]
+    SCOPE_DIR: ClassVar[str]
+
+    @classmethod
+    def path_glob(cls) -> str:
+        # Leading ``*/*/`` matches the <app>/<project> scope prefix.
+        return (
+            f"*/*/{cls.SCOPE_DIR}/*/{cls.SKILLS_CONTAINER_NAME}/"
+            f"{cls.SKILL_DIR_PREFIX}*/{cls.SKILL_MAIN_FILENAME}"
+        )
+
+
+class ProfilePathMixin:
+    """Path strategy for single-file profile markdown.
+
+    Profiles live at ``<SCOPE_DIR>/<scope_id>/<PROFILE_FILENAME>`` —
+    one fixed-name file directly under the scope's owner directory, no
+    intermediate ``<dir>/`` segment (unlike daily-logs) and no per-name
+    subdir (unlike skills). Subclasses must inherit a scope mixin
+    (``UserScopedFrontmatter`` / ``AgentScopedFrontmatter``) supplying
+    ``SCOPE_DIR`` and declare their own ``PROFILE_FILENAME``.
+
+    Place **this mixin first** so MRO resolves ``path_glob()`` here::
+
+        class UserProfileFrontmatter(ProfilePathMixin, UserScopedFrontmatter):
+            PROFILE_FILENAME: ClassVar[str] = "user.md"
+            ...
+    """
+
+    PROFILE_FILENAME: ClassVar[str]
+    SCOPE_DIR: ClassVar[str]
+
+    @classmethod
+    def path_glob(cls) -> str:
+        # Leading ``*/*/`` matches the <app>/<project> scope prefix.
+        return f"*/*/{cls.SCOPE_DIR}/*/{cls.PROFILE_FILENAME}"
+
+
+class UserScopedFrontmatter(BaseFrontmatter):
+    """Records that belong to a single user (track = ``user``).
+
+    The frontmatter only carries the *file-level* scope (``user_id``,
+    which the path itself already expresses); business attributes like
+    ``group_id`` live inside each entry's structured body — see
+    :class:`StructuredEntry` in :mod:`.entries`.
+    """
+
+    SCOPE_DIR: ClassVar[str] = "users"
+
+    user_id: str
+    track: Literal["user"] = "user"
+
+
+class AgentScopedFrontmatter(BaseFrontmatter):
+    """Records that belong to a single agent (track = ``agent``).
+
+    Same scope-vs-business split as :class:`UserScopedFrontmatter`:
+    ``agent_id`` is the file-level scope; ``group_id`` etc. ride on
+    each entry, not on the file frontmatter.
+    """
+
+    SCOPE_DIR: ClassVar[str] = "agents"
+
+    agent_id: str
+    track: Literal["agent"] = "agent"
--- a/src/everos/core/persistence/markdown/parsed.py
+++ b/src/everos/core/persistence/markdown/parsed.py
@ -0,0 +1,31 @@
+"""Parsed-markdown data type.
+
+The output shape of :class:`MarkdownReader` is held here, separate
+from the reader implementation: callers that only consume parse
+results don't need to import the reader machinery, and downstream
+modules (writer, business readers) can produce :class:`ParsedMarkdown`
+without going through ``MarkdownReader.read`` if they already hold
+the pieces.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from .entries import Entry
+
+
+@dataclass(frozen=True)
+class ParsedMarkdown:
+    """A markdown document after parsing.
+
+    Attributes:
+        frontmatter: Parsed YAML mapping (empty dict if no frontmatter block).
+        body: Document text after the frontmatter block; not entry-stripped.
+        entries: Marker-delimited entries discovered inside ``body``.
+    """
+
+    frontmatter: dict[str, Any]
+    body: str
+    entries: list[Entry] = field(default_factory=list)
--- a/src/everos/core/persistence/markdown/reader.py
+++ b/src/everos/core/persistence/markdown/reader.py
@ -0,0 +1,42 @@
+"""Markdown file reader.
+
+Loads a markdown document and splits it into:
+
+    1. ``frontmatter`` — parsed YAML (empty dict if absent)
+    2. ``body`` — raw text after the closing ``---`` delimiter
+    3. ``entries`` — marker-delimited spans inside ``body``
+
+The reader is purely parsing; it does not validate frontmatter shape,
+entry content, or cross-references. Higher layers add business-aware
+checks. The :class:`ParsedMarkdown` data type lives in :mod:`.parsed`.
+
+``parse`` is sync (pure in-memory string processing). ``read`` is async
+and uses :class:`anyio.Path` so file I/O does not block the event loop.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import anyio
+
+from .entries import split_entries
+from .frontmatter import parse_frontmatter
+from .parsed import ParsedMarkdown
+
+
+class MarkdownReader:
+    """Parse markdown files / strings into :class:`ParsedMarkdown`."""
+
+    @staticmethod
+    def parse(text: str) -> ParsedMarkdown:
+        """Parse already-loaded text (no IO)."""
+        meta, body = parse_frontmatter(text)
+        entries = split_entries(body)
+        return ParsedMarkdown(frontmatter=meta, body=body, entries=entries)
+
+    @staticmethod
+    async def read(path: Path) -> ParsedMarkdown:
+        """Read the file at ``path`` and parse its content."""
+        text = await anyio.Path(path).read_text(encoding="utf-8")
+        return MarkdownReader.parse(text)
--- a/src/everos/core/persistence/markdown/writer.py
+++ b/src/everos/core/persistence/markdown/writer.py
@ -0,0 +1,269 @@
+"""Markdown file writer with atomic write semantics.
+
+Atomicity is provided by writing to a same-directory temp file
+(``.<name>.tmp.<uuid>``) and using :func:`os.replace` to rename it onto
+the target. Keeping the temp file in the same directory guarantees the
+rename is on the same filesystem (POSIX rename is atomic only within a
+single fs).
+
+All public methods are async. File I/O (``read_text`` / ``write_text``
+/ ``mkdir``) goes through :class:`anyio.Path`; the few syscalls without
+a native async equivalent (``os.fsync`` / ``os.replace`` / ``unlink``
+in the cleanup path) are offloaded via :func:`anyio.to_thread.run_sync`.
+
+In-process per-path locking
+---------------------------
+:meth:`append_entry` / :meth:`append_entries` are read-modify-write of
+the whole file (load frontmatter+body, merge an entry block, atomic
+write the result). The atomic write itself is safe, but the read→write
+window crosses ``await`` points. Concurrent asyncio tasks targeting the
+same path would otherwise lose-update each other (both read N entries,
+both produce N+1, second write overwrites the first → 1 entry lost).
+
+To prevent this, an in-process per-path :class:`asyncio.Lock` is held
+across the entire read-modify-write sequence. Lock objects live on the
+writer instance (not class-level) so they bind to the event loop active
+when the writer was constructed — this avoids the
+"Lock bound to different loop" failure mode that surfaces when
+pytest-asyncio rebuilds the loop between tests but module-level writer
+singletons leak Lock objects across boundaries.
+
+Process-level coordination (multi-process writers against the same
+memory-root) remains the job of
+:func:`everos.core.persistence.locking.memory_root_lock`, which uses
+``fcntl.flock``. The two locks compose: per-path async lock serialises
+tasks within one process, ``memory_root_lock`` serialises processes
+against each other.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import os
+import uuid
+from collections.abc import Mapping, Sequence
+from pathlib import Path
+from typing import Any
+
+import anyio
+
+from ..memory_root import MemoryRoot
+from .entries import EntryId
+from .frontmatter import dump_frontmatter
+from .reader import MarkdownReader
+
+
+class MarkdownWriter:
+    """Atomic writer for markdown files inside a memory-root.
+
+    The ``memory_root`` reference is held to enable future enforcement that
+    targets stay within the configured root; current writes do not depend on
+    it for the rename itself (same-dir temp file).
+    """
+
+    def __init__(self, memory_root: MemoryRoot) -> None:
+        self._memory_root = memory_root
+        # Per-path async lock registry. ``setdefault`` is GIL-atomic, so
+        # concurrent callers race only on the dict insert (resolved by
+        # ``setdefault`` returning the existing value), not on the Lock.
+        # Plain dict (not WeakValueDictionary): a Lock with pending waiters
+        # must outlive any task awaiting it; ref-counted GC would race with
+        # those waiters. See Python bpo-28427 for the WeakValueDictionary
+        # multithreading hazard that bites the weak-ref approach.
+        self._path_locks: dict[Path, asyncio.Lock] = {}
+
+    @property
+    def memory_root(self) -> MemoryRoot:
+        return self._memory_root
+
+    def lock_for(self, path: Path) -> asyncio.Lock:
+        """Return the per-path lock; create on first use.
+
+        Public so that higher-level writers (e.g. :class:`BaseDailyWriter`)
+        can serialise their own multi-step ``read → compute → write``
+        sequences against this writer's single-step ``append`` paths.
+        Pair with :meth:`_append_entries_unlocked` to avoid reentrant
+        re-acquisition of the same lock from within an already-locked
+        critical section (``asyncio.Lock`` is *not* reentrant).
+        """
+        # Resolve to an absolute canonical path so aliases (relative vs.
+        # absolute, symlinks) share the same lock object.
+        key = Path(path).resolve()
+        lock = self._path_locks.get(key)
+        if lock is None:
+            lock = asyncio.Lock()
+            self._path_locks[key] = lock
+        return lock
+
+    async def write(self, path: Path, content: str) -> Path:
+        """Atomically write ``content`` to ``path``.
+
+        Steps:
+            1. ``mkdir -p`` the parent directory.
+            2. Write to ``<parent>/.<name>.tmp.<uuid>``.
+            3. ``flush`` + ``fsync`` the temp file.
+            4. ``os.replace`` the temp file onto ``path`` (atomic on POSIX).
+
+        Returns:
+            ``path`` (resolved as written).
+        """
+        target = Path(path)
+        await anyio.Path(target.parent).mkdir(parents=True, exist_ok=True)
+        tmp = target.parent / f".{target.name}.tmp.{uuid.uuid4().hex}"
+        try:
+            await anyio.to_thread.run_sync(_write_and_fsync, tmp, content)
+            await anyio.to_thread.run_sync(os.replace, tmp, target)
+        except Exception:
+            # Best-effort cleanup of the staging file on failure.
+            await _unlink_quiet(tmp)
+            raise
+        return target
+
+    async def write_markdown(
+        self,
+        path: Path,
+        *,
+        frontmatter: Mapping[str, Any] | None = None,
+        body: str = "",
+    ) -> Path:
+        """Assemble ``frontmatter`` + ``body`` then atomic-write to ``path``."""
+        head = dump_frontmatter(frontmatter or {})
+        return await self.write(path, head + body)
+
+    async def append_entry(
+        self,
+        path: Path,
+        *,
+        entry_body: str,
+        entry_id: EntryId,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+    ) -> Path:
+        """Append a single entry block to a markdown file, merging frontmatter.
+
+        Convenience wrapper around :meth:`append_entries` for single-entry
+        callers. See that method for full semantics.
+
+        Args:
+            path: Target markdown file. Created if missing.
+            entry_body: Content between the open and close markers.
+                One leading and trailing newline are added automatically.
+            entry_id: The id to stamp on this entry. The caller normally
+                builds it with :meth:`EntryId.next_for`.
+            frontmatter_updates: Mapping shallow-merged into existing
+                frontmatter (later wins). ``None`` skips the merge.
+
+        Returns:
+            ``path`` (resolved as written).
+        """
+        return await self.append_entries(
+            path,
+            [(entry_body, entry_id)],
+            frontmatter_updates=frontmatter_updates,
+        )
+
+    async def append_entries(
+        self,
+        path: Path,
+        entries: Sequence[tuple[str, EntryId]],
+        *,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+    ) -> Path:
+        """Append ``N`` entry blocks in a single locked read-modify-write cycle.
+
+        Compared with calling :meth:`append_entry` ``N`` times, this:
+
+        * Performs one file read + one file write instead of ``N`` of each
+          (IO complexity drops from ``O(N²)`` to ``O(N)`` when the file
+          already holds many entries).
+        * Holds the per-path lock for one short critical section instead of
+          ``N`` separate acquisitions.
+        * Updates ``frontmatter`` once at the end (no intermediate
+          ``entry_count`` flapping).
+
+        The caller assigns and supplies all :class:`EntryId` values — see
+        :meth:`append_entry` for the rationale. The order in ``entries`` is
+        the order the blocks land in the file.
+
+        Args:
+            path: Target markdown file. Created if missing.
+            entries: ``(entry_body, entry_id)`` pairs to append, in order.
+                Empty sequence is allowed; the file is still touched for
+                frontmatter updates if any are supplied.
+            frontmatter_updates: Mapping shallow-merged into existing
+                frontmatter once after all entries are appended.
+
+        Returns:
+            ``path`` (resolved as written).
+        """
+        target = Path(path)
+        async with self.lock_for(target):
+            return await self._append_entries_unlocked(
+                target,
+                entries,
+                frontmatter_updates=frontmatter_updates,
+            )
+
+    async def _append_entries_unlocked(
+        self,
+        path: Path,
+        entries: Sequence[tuple[str, EntryId]],
+        *,
+        frontmatter_updates: Mapping[str, Any] | None = None,
+    ) -> Path:
+        """Same as :meth:`append_entries` but assumes the caller already
+        holds :meth:`lock_for` ``(path)``.
+
+        For use by higher-level writers that perform a multi-step
+        ``read → compute eid → write`` sequence and need to keep the lock
+        held across the read and the write. Public ``append_entries`` /
+        ``append_entry`` always wrap this with the lock.
+
+        Reentrant re-acquisition is unsafe — ``asyncio.Lock`` is not
+        reentrant, so calling this without holding the lock yourself
+        breaks the safety contract.
+        """
+        target = Path(path)
+
+        # 1. Load existing markdown (or initialise empty).
+        if await anyio.Path(target).is_file():
+            parsed = await MarkdownReader.read(target)
+            meta: dict[str, Any] = dict(parsed.frontmatter)
+            body = parsed.body
+        else:
+            meta = {}
+            body = ""
+
+        # 2. Shallow-merge frontmatter updates.
+        if frontmatter_updates:
+            meta.update(frontmatter_updates)
+
+        # 3. Append all entry blocks in order.
+        if entries:
+            if body and not body.endswith("\n"):
+                body += "\n"
+            appended_blocks: list[str] = []
+            for entry_body, entry_id in entries:
+                eid_str = entry_id.format()
+                appended_blocks.append(
+                    f"<!-- entry:{eid_str} -->\n{entry_body}\n"
+                    f"<!-- /entry:{eid_str} -->\n"
+                )
+            body = body + "".join(appended_blocks)
+
+        # 4. Atomic write.
+        return await self.write_markdown(target, frontmatter=meta, body=body)
+
+
+def _write_and_fsync(tmp: Path, content: str) -> None:
+    """Sync helper: write + fsync the staging file. Offloaded to a thread."""
+    with open(tmp, "w", encoding="utf-8") as fh:
+        fh.write(content)
+        fh.flush()
+        os.fsync(fh.fileno())
+
+
+async def _unlink_quiet(tmp: Path) -> None:
+    """Best-effort unlink — swallow OSError so the original exception wins."""
+    with contextlib.suppress(OSError):
+        await anyio.Path(tmp).unlink(missing_ok=True)
--- a/src/everos/core/persistence/memory_root.py
+++ b/src/everos/core/persistence/memory_root.py
@ -0,0 +1,243 @@
+"""memory-root path manager.
+
+Single root directory holding all persisted memory:
+
+    User-visible (no dot prefix, edited by humans / agents):
+        agents/      per-agent records
+        users/       per-user records
+        knowledge/   global shared knowledge
+
+    System-managed (dotfile prefix, hidden by default in ls / Finder):
+        .index/             derived indexes (rebuildable from markdown)
+            sqlite/         system.db (+ WAL/SHM), ome.db, ome.aps.db
+            lancedb/        LanceDB tables
+        .tmp/               atomic-write staging directory
+        .lock               single-process lock anchor (created on demand by
+                            ``memory_root_lock``)
+
+    User-editable (at the root):
+        ome.toml            OME strategy overrides (hot-reloaded)
+
+The cascade queue, LSN watermark, and change audit all live in
+``system.db`` (table ``md_change_state``), not in separate dotfiles.
+
+The default location and tunables come from :class:`everos.config.Settings`
+(loaded from ``config/default.toml`` + ``EVEROS_*`` environment variables);
+:meth:`MemoryRoot.default` resolves the configured path.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+
+# ── app / project directory-name convention ──────────────────────────────────
+#
+# A memory root is partitioned by ``<app>/<project>`` *before* the user-visible
+# scope dirs (``agents`` / ``users`` / ``knowledge``), so memory for different
+# (app, project) pairs never shares a directory. The reserved id ``"default"``
+# materialises as ``default_app`` / ``default_project`` on disk (rather than a
+# bare ``default``) so a default space is visually distinct from a user-named
+# directory; every other id maps to itself.
+#
+# The mapping is symmetric: the cascade path parser reverses it (see
+# :func:`app_id_from_dir`) to recover the ids from an on-disk path. The write
+# side (here) and the read side (cascade) MUST stay in lockstep, or rebuilt
+# rows carry app/project that disagree with what was written. ``default_app`` /
+# ``default_project`` are therefore reserved directory names.
+_DEFAULT_SCOPE_ID = "default"
+_DEFAULT_APP_DIR = "default_app"
+_DEFAULT_PROJECT_DIR = "default_project"
+
+# Path to the shipped OME override template; copied to ``<root>/ome.toml`` on
+# first ``ensure()`` so users have a real file to edit instead of having to
+# create one from scratch. ``parents[2]`` is the ``src/everos/`` package root
+# (memory_root.py sits at ``core/persistence/memory_root.py``).
+_OME_TEMPLATE_PATH = Path(__file__).parents[2] / "config" / "default_ome.toml"
+
+
+def app_dir_name(app_id: str) -> str:
+    """Map an ``app_id`` to its on-disk directory name."""
+    return _DEFAULT_APP_DIR if app_id == _DEFAULT_SCOPE_ID else app_id
+
+
+def project_dir_name(project_id: str) -> str:
+    """Map a ``project_id`` to its on-disk directory name."""
+    return _DEFAULT_PROJECT_DIR if project_id == _DEFAULT_SCOPE_ID else project_id
+
+
+def app_id_from_dir(dir_name: str) -> str:
+    """Inverse of :func:`app_dir_name` — recover the ``app_id`` from a dir name."""
+    return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_APP_DIR else dir_name
+
+
+def project_id_from_dir(dir_name: str) -> str:
+    """Inverse of :func:`project_dir_name` — recover the ``project_id``."""
+    return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_PROJECT_DIR else dir_name
+
+
+@dataclass(frozen=True, init=False)
+class MemoryRoot:
+    """Path manager for a memory-root directory.
+
+    Constructor accepts any path-like (``str`` or ``Path``); it is normalised
+    to an absolute, resolved ``Path`` so equality and hashing are stable
+    regardless of how the caller spells the path. ``init=False`` is paired
+    with a hand-written ``__init__`` so the input type (``Path | str``) is
+    decoupled from the stored field type (``Path``) — stdlib dataclass has
+    no converter slot, and Pyright would otherwise reject ``MemoryRoot(s)``
+    where ``s`` is a ``str``.
+    """
+
+    root: Path
+
+    def __init__(self, root: Path | str) -> None:
+        # ``frozen=True`` forbids attribute assignment, so go through
+        # ``object.__setattr__`` to install the normalised Path field.
+        resolved = Path(root).expanduser().resolve()
+        object.__setattr__(self, "root", resolved)
+
+    @classmethod
+    def default(cls) -> MemoryRoot:
+        """Return the memory-root from :class:`everos.config.Settings`.
+
+        The effective default lives in ``config/default.toml`` (``[memory]
+        root``); environment variable ``EVEROS_MEMORY__ROOT`` overrides it.
+        """
+        # Lazy import to keep this module dependency-free at import time.
+        from everos.config import load_settings
+
+        return cls(load_settings().memory.root)
+
+    # ── User-visible (partitioned by app / project) ──────────────────────────
+    #
+    # These take ``(app_id, project_id)`` because the scope dirs hang off the
+    # ``<root>/<app>/<project>/`` prefix; they are request-level inputs, never
+    # instance state. Both default to ``"default"`` so call sites that don't
+    # yet carry scope still resolve to the default space.
+
+    def agents_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
+        """``<root>/<app>/<project>/agents/`` — per-agent records."""
+        return (
+            self.root / app_dir_name(app_id) / project_dir_name(project_id) / "agents"
+        )
+
+    def users_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
+        """``<root>/<app>/<project>/users/`` — per-user records."""
+        return self.root / app_dir_name(app_id) / project_dir_name(project_id) / "users"
+
+    def knowledge_dir(
+        self, app_id: str = "default", project_id: str = "default"
+    ) -> Path:
+        """``<root>/<app>/<project>/knowledge/`` — shared knowledge."""
+        return (
+            self.root
+            / app_dir_name(app_id)
+            / project_dir_name(project_id)
+            / "knowledge"
+        )
+
+    # ── System-managed (dotfiles) ───────────────────────────────────────────
+
+    @property
+    def index_dir(self) -> Path:
+        """``<root>/.index/`` — derived index root."""
+        return self.root / ".index"
+
+    @property
+    def lancedb_dir(self) -> Path:
+        """``<root>/.index/lancedb/`` — LanceDB table root."""
+        return self.index_dir / "lancedb"
+
+    @property
+    def sqlite_dir(self) -> Path:
+        """``<root>/.index/sqlite/`` — SQLite system DB root.
+
+        Holds ``system.db`` plus its sidecars (``-wal`` / ``-shm`` in WAL
+        mode). Symmetric with :attr:`lancedb_dir`.
+        """
+        return self.index_dir / "sqlite"
+
+    @property
+    def system_db(self) -> Path:
+        """``<root>/.index/sqlite/system.db`` — SQLite DB for system
+        state, audit log, task queue, LSN watermark, and other metadata.
+        """
+        return self.sqlite_dir / "system.db"
+
+    @property
+    def ome_db(self) -> Path:
+        """``<root>/.index/sqlite/ome.db`` — SQLite DB backing the Offline
+        Memory Engine's own state: run records, counter store, idle store.
+        Symmetric with :attr:`system_db`.
+        """
+        return self.sqlite_dir / "ome.db"
+
+    @property
+    def ome_aps_db(self) -> Path:
+        """``<root>/.index/sqlite/ome.aps.db`` — SQLite DB holding the
+        APScheduler jobstore for the Offline Memory Engine. Split from
+        :attr:`ome_db` so APS's sync SQLAlchemy writer and OME's async
+        aiosqlite writer never contend for the same sqlite file lock.
+        """
+        return self.sqlite_dir / "ome.aps.db"
+
+    @property
+    def ome_config(self) -> Path:
+        """``<root>/ome.toml`` — user-editable OME strategy overrides.
+
+        Drop a file here to toggle strategies on/off or tweak per-strategy
+        knobs (max_retries, gate, cron …) without restarting the server.
+        The engine watches this file and hot-reloads changes within ~2 s.
+
+        Example to disable foresight and user-profile extraction::
+
+            [strategies.extract_foresight]
+            enabled = false
+
+            [strategies.extract_user_profile]
+            enabled = false
+        """
+        return self.root / "ome.toml"
+
+    @property
+    def lock_file(self) -> Path:
+        """``<root>/.lock`` — single-process exclusive lock anchor."""
+        return self.root / ".lock"
+
+    @property
+    def tmp_dir(self) -> Path:
+        """``<root>/.tmp/`` — staging directory for batch / multi-step writes.
+
+        Note:
+            ``MarkdownWriter`` does *not* use this for atomic single-file
+            writes; it uses a same-directory temp file to guarantee a
+            same-filesystem rename. This directory is reserved for callers
+            that need scratch space outside any single target directory.
+        """
+        return self.root / ".tmp"
+
+    # ── Operations ──────────────────────────────────────────────────────────
+
+    def ensure(self) -> None:
+        """Create the memory-root and the runtime-required dotfile dirs.
+
+        User-visible directories (``agents/`` / ``users/`` / ``knowledge/``)
+        are *not* pre-created — they appear on first write of their records.
+        Only directories the runtime infrastructure requires are made:
+
+            <root>/
+            <root>/.index/
+            <root>/.index/sqlite/
+            <root>/.index/lancedb/
+            <root>/.tmp/
+        """
+        self.root.mkdir(parents=True, exist_ok=True)
+        self.index_dir.mkdir(parents=True, exist_ok=True)
+        self.sqlite_dir.mkdir(parents=True, exist_ok=True)
+        self.lancedb_dir.mkdir(parents=True, exist_ok=True)
+        self.tmp_dir.mkdir(parents=True, exist_ok=True)
+        # Materialize the OME override template on first run; existence-only
+        # check preserves any edits the user has already made.
+        if not self.ome_config.exists():
+            self.ome_config.write_bytes(_OME_TEMPLATE_PATH.read_bytes())
--- a/src/everos/core/persistence/sqlite/init.py
+++ b/src/everos/core/persistence/sqlite/init.py
@ -0,0 +1,42 @@
+"""SQLite async persistence (SQLModel + SQLAlchemy 2.0 + aiosqlite).
+
+External usage (engine + sessions):
+    from everos.core.persistence.sqlite import (
+        create_system_engine, create_session_factory, session_scope,
+    )
+
+External usage (ORM model basics — re-exported from sqlmodel):
+    from everos.core.persistence.sqlite import (
+        SQLModel, Field, Relationship, BaseTable,
+    )
+
+External usage (generic CRUD repository base):
+    from everos.core.persistence.sqlite import RepoBase
+
+The ``system_db`` is the everos
+``<memory_root>/.index/sqlite/system.db`` SQLite file holding system
+state, audit log, task queue, LSN watermark, and other metadata.
+"""
+
+# Re-export key sqlmodel symbols so business code has a single canonical
+# entry point (``everos.core.persistence.sqlite``) for ORM authoring.
+from sqlmodel import Field as Field
+from sqlmodel import Relationship as Relationship
+from sqlmodel import SQLModel as SQLModel
+
+from .base import BaseTable as BaseTable
+from .engine import create_system_engine as create_system_engine
+from .repository import RepoBase as RepoBase
+from .session import create_session_factory as create_session_factory
+from .session import session_scope as session_scope
+
+__all__ = [
+    "BaseTable",
+    "Field",
+    "Relationship",
+    "RepoBase",
+    "SQLModel",
+    "create_session_factory",
+    "create_system_engine",
+    "session_scope",
+]
--- a/src/everos/core/persistence/sqlite/base.py
+++ b/src/everos/core/persistence/sqlite/base.py
@ -0,0 +1,112 @@
+"""Common SQLModel base for everos tables.
+
+:class:`BaseTable` adds ``created_at`` / ``updated_at`` columns. The
+``updated_at`` column auto-refreshes on UPDATE through SA's ``onupdate``
+hook (no explicit assignment needed in business code).
+
+The **two-zone storage-UTC discipline** is enforced by a SQLAlchemy
+:class:`TypeDecorator` (:class:`UtcDateTimeColumn`) used as the SQL
+column type for every datetime field:
+
+* **on write** — ``process_bind_param`` converts every datetime to
+  aware UTC before SQLAlchemy emits the bound parameter. This covers
+  *every* SQLAlchemy write path uniformly:
+
+  - ORM ``session.add()`` / ``session.merge()`` (unit-of-work flush)
+  - Core ``session.execute(insert(...).values(...))``
+  - Core ``session.execute(update(...).values(...))``
+  - Bulk ``bulk_insert_mappings`` / ``bulk_save_objects``
+  - Raw SQL with bound parameters
+
+  Reaching into the column type is the only place SQLAlchemy guarantees
+  *every* write path passes through. Mapper events (``before_insert`` /
+  ``before_update``) only fire on the ORM unit-of-work path and would
+  silently miss Core statements — which :mod:`everos.infra.persistence
+  .sqlite.repos.md_change_state` uses heavily.
+
+* **on read** — ``process_result_value`` re-attaches ``tzinfo=UTC`` to
+  every naive datetime returned from SQLite (which has no native tz
+  storage and always returns naive). Callers therefore never observe a
+  naive datetime regardless of which read API they use.
+
+Subclass with ``table=True`` to declare a real SQLite table::
+
+    from sqlmodel import Field
+
+    class Sender(BaseTable, table=True):
+        id: int | None = Field(default=None, primary_key=True)
+        name: str
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from typing import Any
+
+from sqlalchemy import DateTime
+from sqlalchemy import types as sa_types
+from sqlmodel import Field, SQLModel
+
+from everos.component.utils.datetime import UtcDatetime, ensure_utc, get_utc_now
+
+
+class UtcDateTimeColumn(sa_types.TypeDecorator[_dt.datetime]):
+    """SQLAlchemy column type enforcing storage-UTC on every read/write.
+
+    Implementation:
+
+    * ``impl = DateTime`` — uses the dialect's standard DateTime SQL type
+      (TEXT ISO-8601 on SQLite; ``TIMESTAMP`` on Postgres etc.).
+    * ``process_bind_param`` — write hook. Awares → ``astimezone(UTC)``;
+      naives → assumed already UTC (storage-boundary convention; see
+      :func:`ensure_utc` docstring); ``None`` passes through.
+    * ``process_result_value`` — read hook. Naive ``datetime`` →
+      ``replace(tzinfo=UTC)``; aware passes through unchanged.
+
+    ``cache_ok = True`` — SQLAlchemy can safely cache statement
+    compilations using this type (no per-instance mutable state).
+    """
+
+    impl = DateTime
+    cache_ok = True
+
+    def process_bind_param(
+        self, value: _dt.datetime | None, _dialect: Any
+    ) -> _dt.datetime | None:
+        if value is None:
+            return None
+        if not isinstance(value, _dt.datetime):
+            return value
+        return ensure_utc(value)
+
+    def process_result_value(
+        self, value: _dt.datetime | None, _dialect: Any
+    ) -> _dt.datetime | None:
+        if value is None:
+            return None
+        if isinstance(value, _dt.datetime) and value.tzinfo is None:
+            return value.replace(tzinfo=_dt.UTC)
+        return value
+
+
+class BaseTable(SQLModel):
+    """Mixin providing ``created_at`` / ``updated_at`` columns.
+
+    Both default to :func:`get_utc_now` on INSERT.
+    ``updated_at`` is auto-refreshed by SQLAlchemy on every UPDATE via the
+    ``onupdate`` hook — do not set it manually unless overriding intentionally.
+
+    Both columns use :class:`UtcDateTimeColumn` as the SQL column type
+    so storage-UTC is enforced **at the SQLAlchemy bind layer** on every
+    write path (ORM + Core + bulk + raw bound params).
+    """
+
+    created_at: UtcDatetime = Field(
+        default_factory=get_utc_now,
+        sa_type=UtcDateTimeColumn,
+    )
+    updated_at: UtcDatetime = Field(
+        default_factory=get_utc_now,
+        sa_type=UtcDateTimeColumn,
+        sa_column_kwargs={"onupdate": get_utc_now},
+    )
--- a/src/everos/core/persistence/sqlite/engine.py
+++ b/src/everos/core/persistence/sqlite/engine.py
@ -0,0 +1,74 @@
+"""Async SQLAlchemy engine factory + per-connection PRAGMA listener.
+
+The engine connects through ``aiosqlite`` (SA URL ``sqlite+aiosqlite://``).
+PRAGMAs are *per-connection* — they must be re-applied every time the
+SA pool opens a new connection. We attach a ``connect`` event listener on
+the engine's underlying sync engine for that purpose.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from sqlalchemy import event
+from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
+
+from everos.config import SqliteSettings
+
+
+def create_system_engine(
+    db_path: Path,
+    sqlite_settings: SqliteSettings,
+    *,
+    echo: bool = False,
+) -> AsyncEngine:
+    """Create an async SQLAlchemy engine for the everos system DB.
+
+    ``MemoryRoot.system_db`` is the conventional path; the DB holds system
+    state, audit log, task queue, LSN watermark, and other metadata.
+
+    Args:
+        db_path: Filesystem path to the system DB file. Parent directory is
+            created if missing.
+        sqlite_settings: Tunables (journal_mode, synchronous, foreign_keys,
+            temp_store, busy_timeout, journal_size_limit, cache_size).
+        echo: When ``True``, SQLAlchemy logs every statement (development).
+
+    Returns:
+        An :class:`AsyncEngine` ready for use with :class:`AsyncSession`.
+    """
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Three slashes = relative path; four slashes = absolute. ``str(db_path)``
+    # of an absolute Path begins with ``/`` so the f-string yields four.
+    url = f"sqlite+aiosqlite:///{db_path}"
+    engine = create_async_engine(url, echo=echo, future=True)
+
+    _register_pragma_listener(engine, sqlite_settings)
+    return engine
+
+
+def _register_pragma_listener(
+    engine: AsyncEngine,
+    sqlite_settings: SqliteSettings,
+) -> None:
+    """Attach a ``connect`` listener that applies PRAGMAs on every new connection."""
+
+    @event.listens_for(engine.sync_engine, "connect")
+    def _apply_pragmas(dbapi_connection, _connection_record) -> None:  # type: ignore[no-untyped-def]
+        cursor = dbapi_connection.cursor()
+        try:
+            cursor.execute(f"PRAGMA journal_mode={sqlite_settings.journal_mode}")
+            cursor.execute(f"PRAGMA synchronous={sqlite_settings.synchronous}")
+            cursor.execute(
+                f"PRAGMA foreign_keys={'ON' if sqlite_settings.foreign_keys else 'OFF'}"
+            )
+            cursor.execute(f"PRAGMA temp_store={sqlite_settings.temp_store}")
+            cursor.execute(f"PRAGMA busy_timeout={sqlite_settings.busy_timeout_ms}")
+            cursor.execute(
+                f"PRAGMA journal_size_limit={sqlite_settings.journal_size_limit_bytes}"
+            )
+            # cache_size: negative = KB, positive = pages.
+            cursor.execute(f"PRAGMA cache_size=-{sqlite_settings.cache_size_kb}")
+        finally:
+            cursor.close()
--- a/src/everos/core/persistence/sqlite/repository.py
+++ b/src/everos/core/persistence/sqlite/repository.py
@ -0,0 +1,166 @@
+"""Generic CRUD repository for SQLModel-backed tables.
+
+``RepoBase`` is a pure generic CRUD helper that sits alongside
+:class:`BaseTable`. It knows nothing about a storage runtime — concrete
+repos either pass ``session_factory`` explicitly (typical in tests) or
+override :meth:`_factory_lookup` to pull the singleton from their
+storage manager (typical in :mod:`everos.infra.persistence.sqlite.repos`).
+
+Each method opens its own ``session_scope`` (auto rollback on exception,
+session closed at end). For multi-step transactional work, use the
+session factory directly via :attr:`session_factory`.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from sqlalchemy import func
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+from sqlmodel import SQLModel, select
+
+from .session import session_scope
+
+
+class RepoBase[T: SQLModel]:
+    """Generic CRUD repository for one SQLModel table.
+
+    Subclass and bind to a model. Two ways to provide the session factory:
+
+    1. **Explicit (tests / DI)** — pass it to ``__init__``::
+
+           repo = SenderRepo(session_factory)
+
+    2. **Lazy hook (production singletons)** — override
+       :meth:`_factory_lookup` so the repo can be instantiated as a
+       module-level singleton with no factory bound yet::
+
+           class _SenderRepo(RepoBase[Sender]):
+               model = Sender
+               def _factory_lookup(self):
+                   from everos.infra.persistence.sqlite.sqlite_manager import (
+                       get_session_factory,
+                   )
+                   return get_session_factory()
+
+           sender_repo = _SenderRepo()
+           await sender_repo.add(Sender(name="alice"))
+    """
+
+    model: type[T]
+
+    def __init__(
+        self,
+        session_factory: async_sessionmaker[AsyncSession] | None = None,
+    ) -> None:
+        """Bind to a session factory; if ``None``, defer to ``_factory_lookup``."""
+        self._factory_override = session_factory
+
+    def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
+        """Resolve a session factory on first use. Override in subclass.
+
+        ``RepoBase`` itself has no idea where the runtime singleton lives
+        — that knowledge belongs to the infra subclass. The default raises
+        so a missing override is loud rather than silently broken.
+        """
+        raise NotImplementedError(
+            f"{type(self).__name__}: pass session_factory= to __init__ "
+            "or override _factory_lookup() to wire the storage manager."
+        )
+
+    @property
+    def _factory(self) -> async_sessionmaker[AsyncSession]:
+        if self._factory_override is not None:
+            return self._factory_override
+        return self._factory_lookup()
+
+    @property
+    def session_factory(self) -> async_sessionmaker[AsyncSession]:
+        """Underlying session factory (for multi-step transactions)."""
+        return self._factory
+
+    # ── Create ─────────────────────────────────────────────────────────────
+
+    async def add(self, instance: T) -> T:
+        """Insert one row, commit, refresh, return the instance."""
+        async with session_scope(self._factory) as s:
+            s.add(instance)
+            await s.commit()
+            await s.refresh(instance)
+        return instance
+
+    async def add_many(self, instances: Sequence[T]) -> list[T]:
+        """Insert many rows in one transaction."""
+        items = list(instances)
+        async with session_scope(self._factory) as s:
+            s.add_all(items)
+            await s.commit()
+            for inst in items:
+                await s.refresh(inst)
+        return items
+
+    # ── Read ───────────────────────────────────────────────────────────────
+
+    async def get_by_id(self, id_value: Any) -> T | None:
+        """Get a row by primary key. Returns ``None`` if not found."""
+        async with session_scope(self._factory) as s:
+            return await s.get(self.model, id_value)
+
+    async def list_all(self) -> list[T]:
+        """Return all rows (no filter, no order)."""
+        async with session_scope(self._factory) as s:
+            stmt = select(self.model)
+            return list((await s.execute(stmt)).scalars().all())
+
+    async def find_where(self, **filters: Any) -> list[T]:
+        """Equality-only filtering, e.g. ``find_where(name="alice", active=True)``."""
+        async with session_scope(self._factory) as s:
+            stmt = select(self.model).filter_by(**filters)
+            return list((await s.execute(stmt)).scalars().all())
+
+    async def find_one(self, **filters: Any) -> T | None:
+        """First row matching ``filters`` (no ordering); ``None`` if not found."""
+        async with session_scope(self._factory) as s:
+            stmt = select(self.model).filter_by(**filters).limit(1)
+            return (await s.execute(stmt)).scalars().first()
+
+    async def count(self) -> int:
+        """Total row count (no filter)."""
+        async with session_scope(self._factory) as s:
+            stmt = select(func.count()).select_from(self.model)
+            return int((await s.execute(stmt)).scalar_one())
+
+    # ── Update ─────────────────────────────────────────────────────────────
+
+    async def update(self, instance: T) -> T:
+        """Persist changes on an instance whose primary key already exists.
+
+        Uses ``session.merge`` so detached / fresh-from-Pydantic instances
+        are reattached. ``BaseTable.updated_at`` auto-bumps via SA's
+        ``onupdate`` hook.
+        """
+        async with session_scope(self._factory) as s:
+            merged = await s.merge(instance)
+            await s.commit()
+            await s.refresh(merged)
+        return merged
+
+    # ── Delete ─────────────────────────────────────────────────────────────
+
+    async def delete(self, instance: T) -> None:
+        """Delete by instance (primary key must be set)."""
+        async with session_scope(self._factory) as s:
+            merged = await s.merge(instance)
+            await s.delete(merged)
+            await s.commit()
+
+    async def delete_by_id(self, id_value: Any) -> bool:
+        """Delete by primary key. Returns ``True`` if a row was removed."""
+        async with session_scope(self._factory) as s:
+            instance = await s.get(self.model, id_value)
+            if instance is None:
+                return False
+            await s.delete(instance)
+            await s.commit()
+            return True
--- a/src/everos/core/persistence/sqlite/session.py
+++ b/src/everos/core/persistence/sqlite/session.py
@ -0,0 +1,45 @@
+"""Async session factory + session scope context manager."""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+
+from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
+
+
+def create_session_factory(engine: AsyncEngine) -> async_sessionmaker[AsyncSession]:
+    """Build an :class:`async_sessionmaker` bound to ``engine``.
+
+    ``expire_on_commit=False`` keeps attribute access on instances valid
+    after commit, which is the conventional setup for async SA usage.
+    """
+    return async_sessionmaker(
+        bind=engine,
+        class_=AsyncSession,
+        expire_on_commit=False,
+    )
+
+
+@asynccontextmanager
+async def session_scope(
+    session_factory: async_sessionmaker[AsyncSession],
+) -> AsyncIterator[AsyncSession]:
+    """Yield an :class:`AsyncSession` inside a try/rollback/close block.
+
+    The session is rolled back on any exception in the ``async with`` body,
+    then closed. Callers are responsible for calling ``await session.commit()``
+    on success.
+
+    Usage:
+        factory = create_session_factory(engine)
+        async with session_scope(factory) as session:
+            session.add(some_record)
+            await session.commit()
+    """
+    async with session_factory() as session:
+        try:
+            yield session
+        except Exception:
+            await session.rollback()
+            raise