chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/tests/e2e/init.py
+++ b/tests/e2e/init.py
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@ -0,0 +1,286 @@
+"""Shared fixtures for ``tests/e2e/``.
+
+Provides:
+
+- ``core_pipeline_runtime``: tmp memory root + reset memorize singletons.
+  Uses the **real** LLM / embedding / rerank creds from ``.env`` per the
+  project test policy.
+- ``async_client``: ``httpx.AsyncClient`` wired into ``create_app()`` with
+  the full lifespan stack (SQLite + LanceDB + Cascade + OME).
+- ``cascade_done_poll``: wait until ``md_change_state`` queue is fully
+  drained (``pending`` rows == 0; includes the internal ``processing``).
+- ``pipeline_done_poll``: composite drain — waits until OME strategy runs AND
+  ``md_change_state`` queue both drain (use for tests that exercise the full
+  OME → md → cascade pipeline).
+- ``buffer_count`` / ``memcell_count``: raw counts for buffer-delta and
+  memcell-growth assertions.
+
+The ``long_conversation`` fixture (LoCoMo conv_0) lives in
+:mod:`tests.conftest` so both ``tests/e2e/`` and
+``tests/integration/search/`` can depend on it.
+
+Conventions:
+
+- ``.env`` is loaded at import time (before any everos module reads
+  settings) — overrides for ``EVEROS_MEMORY__ROOT`` happen per-test.
+- This file does **not** define ``cascade_runtime`` — that name belongs
+  to ``tests/integration/test_cascade_integration.py``'s local fixture.
+  The pipeline test uses ``core_pipeline_runtime`` to avoid name
+  collision.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import importlib
+import json
+from collections.abc import AsyncIterator, Awaitable, Callable
+from pathlib import Path
+
+import httpx
+import pytest
+import pytest_asyncio
+from dotenv import load_dotenv
+from sqlalchemy import text
+
+# Load real .env creds before any everos import touches load_settings().
+_PROJECT_ROOT = Path(__file__).resolve().parents[2]
+load_dotenv(_PROJECT_ROOT / ".env", override=False)
+
+_FIXTURE_DIR = _PROJECT_ROOT / "tests" / "fixtures"
+_SEARCH_SEED_DIR = _FIXTURE_DIR / "search_seed"
+
+# Memorize service module-level singletons that survive across tests; we
+# null them out so each test rebuilds against its own ``tmp_path``.
+_MEMORIZE_SINGLETONS: tuple[str, ...] = (
+    "_episode_writer",
+    "_prompt_loader",
+    "_user_pipeline",
+    "_agent_pipeline",
+    "_ome_engine",
+)
+
+# OME strategy modules carry module-level lazy singletons (``_writer`` /
+# ``_reader``) that capture ``MemoryRoot.default()`` at first call. They
+# survive across tests, so the second test writes its output to the
+# **first test's** tmp_path. Reset all of them per-test.
+_STRATEGY_SINGLETONS: tuple[tuple[str, tuple[str, ...]], ...] = (
+    ("everos.memory.strategies.extract_atomic_facts", ("_writer",)),
+    ("everos.memory.strategies.extract_foresight", ("_writer",)),
+    ("everos.memory.strategies.extract_user_profile", ("_writer", "_reader")),
+    ("everos.memory.strategies.extract_agent_case", ("_writer",)),
+    ("everos.memory.strategies.extract_agent_skill", ("_writer",)),
+)
+
+
+def _reset_strategy_singletons(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Null every strategy ``_writer`` / ``_reader`` so the next test
+    rebuilds against its own ``MemoryRoot.default()`` (driven by the
+    fresh ``EVEROS_MEMORY__ROOT`` env var set by the calling fixture).
+    """
+    for mod_name, attrs in _STRATEGY_SINGLETONS:
+        mod = importlib.import_module(mod_name)
+        for attr in attrs:
+            monkeypatch.setattr(mod, attr, None, raising=False)
+
+
+# ---------------------------------------------------------------------------
+# Data fixture
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def search_seed() -> dict[str, list[dict]]:
+    """Load the search seed slice produced by ``_dump_search_seed.py``.
+
+    Returns a dict with four keys (``episode`` / ``atomic_fact`` /
+    ``foresight`` / ``user_profile``); each value is a list of raw row
+    dicts ready to be fed into ``Model.model_validate`` for LanceDB.
+
+    Tests pick the subset they need and may mutate per-row fields
+    (e.g. set distinct ``session_id`` values to exercise filter DSL)
+    before instantiating the pydantic model.
+    """
+    return {
+        name: json.loads((_SEARCH_SEED_DIR / f"{name}.json").read_text())
+        for name in ("episode", "atomic_fact", "foresight", "user_profile")
+    }
+
+
+# ---------------------------------------------------------------------------
+# Runtime fixture: tmp memory root + singleton reset (no app lifespan)
+# ---------------------------------------------------------------------------
+
+
+@pytest_asyncio.fixture
+async def core_pipeline_runtime(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> AsyncIterator[Path]:
+    """Prepare clean memory root + reset memorize singletons.
+
+    Keeps real LLM / embedding settings from ``.env`` (do NOT overwrite
+    ``EVEROS_LLM__*`` or ``EVEROS_EMBEDDING__*``).
+    """
+    monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
+
+    from everos.config import load_settings
+
+    load_settings.cache_clear()
+
+    svc = importlib.import_module("everos.service.memorize")
+    client_mod = importlib.import_module("everos.component.llm.client")
+
+    for attr in _MEMORIZE_SINGLETONS:
+        monkeypatch.setattr(svc, attr, None, raising=False)
+    monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
+    _reset_strategy_singletons(monkeypatch)
+
+    yield tmp_path
+
+
+# ---------------------------------------------------------------------------
+# Async client fixture (full app lifespan)
+# ---------------------------------------------------------------------------
+
+
+@pytest_asyncio.fixture
+async def async_client(
+    core_pipeline_runtime: Path,
+) -> AsyncIterator[httpx.AsyncClient]:
+    """Bring up the full everos app with lifespan, return an httpx client.
+
+    The lifespan starts: SQLite engine, LanceDB connection + business
+    indexes, Cascade orchestrator (watcher + scanner + worker), OME
+    engine. Teardown stops everything in reverse.
+    """
+    from everos.entrypoints.api.app import create_app
+
+    app = create_app()
+    transport = httpx.ASGITransport(app=app)
+
+    # Drive starlette's lifespan_context explicitly — httpx.ASGITransport
+    # does not run startup / shutdown on its own.
+    async with (
+        app.router.lifespan_context(app),
+        httpx.AsyncClient(transport=transport, base_url="http://test") as client,
+    ):
+        yield client
+
+
+# ---------------------------------------------------------------------------
+# Poll helpers
+# ---------------------------------------------------------------------------
+
+
+async def _poll(
+    condition: Callable[[], Awaitable[bool]],
+    *,
+    deadline_seconds: float,
+    interval: float = 0.5,
+) -> None:
+    """Poll an async predicate until truthy; ``TimeoutError`` on deadline."""
+    async with asyncio.timeout(deadline_seconds):
+        while True:
+            if await condition():
+                return
+            await asyncio.sleep(interval)
+
+
+@pytest.fixture
+def cascade_done_poll() -> Callable[..., Awaitable[None]]:
+    """Wait until ``md_change_state`` queue is drained (no pending/processing)."""
+
+    async def _wait(*, deadline_seconds: float = 180.0) -> None:
+        from everos.infra.persistence.sqlite import md_change_state_repo
+
+        async def _drained() -> bool:
+            summary = await md_change_state_repo.queue_summary()
+            # `pending` includes the internal `processing` rows (see QueueSummary).
+            return summary.pending == 0
+
+        await _poll(_drained, deadline_seconds=deadline_seconds)
+
+    return _wait
+
+
+@pytest.fixture
+def pipeline_done_poll() -> Callable[..., Awaitable[None]]:
+    """Wait until OME strategy runs AND ``md_change_state`` queue both drain.
+
+    Composite drain — fixes the trap where :func:`cascade_done_poll`
+    alone returns immediately while a slow LLM-driven strategy is still
+    in flight (the strategy has not written md yet, so the cascade queue
+    is momentarily empty). Pipeline tests that touch the full async
+    chain (OME -> md -> cascade -> LanceDB) must use this instead of
+    ``cascade_done_poll``.
+    """
+
+    async def _wait(*, deadline_seconds: float = 180.0) -> None:
+        from everos.infra.persistence.sqlite import md_change_state_repo
+        from everos.service.memorize import _get_engine
+
+        engine = _get_engine()
+
+        async def _drained() -> bool:
+            # OME side first: cascade can only fire after a strategy
+            # writes md, so an in-flight run means the queue check below
+            # is premature.
+            if not await engine.wait_idle(timeout=0.5):
+                return False
+            # `pending` includes the internal `processing` rows (see
+            # QueueSummary).
+            summary = await md_change_state_repo.queue_summary()
+            return summary.pending == 0
+
+        await _poll(_drained, deadline_seconds=deadline_seconds)
+
+    return _wait
+
+
+# ---------------------------------------------------------------------------
+# Count helpers (used directly by tests for buffer-delta assertions)
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def buffer_count() -> Callable[[str], Awaitable[int]]:
+    """Return an async callable: ``await buffer_count(session_id) -> int``."""
+
+    async def _count(session_id: str) -> int:
+        from everos.infra.persistence.sqlite import get_engine
+
+        engine = get_engine()
+        async with engine.connect() as conn:
+            result = await conn.execute(
+                text("SELECT COUNT(*) FROM unprocessed_buffer WHERE session_id = :sid"),
+                {"sid": session_id},
+            )
+            return int(result.scalar() or 0)
+
+    return _count
+
+
+@pytest.fixture
+def memcell_count() -> Callable[[str], Awaitable[int]]:
+    """Return an async callable: ``await memcell_count(user_id_or_session) -> int``.
+
+    Counts memcell rows; pass session_id to count by session, or omit to
+    count all.
+    """
+
+    async def _count(session_id: str | None = None) -> int:
+        from everos.infra.persistence.sqlite import get_engine
+
+        engine = get_engine()
+        async with engine.connect() as conn:
+            if session_id is None:
+                result = await conn.execute(text("SELECT COUNT(*) FROM memcell"))
+            else:
+                result = await conn.execute(
+                    text("SELECT COUNT(*) FROM memcell WHERE session_id = :sid"),
+                    {"sid": session_id},
+                )
+            return int(result.scalar() or 0)
+
+    return _count
--- a/tests/e2e/test_add_flush_agent_pipeline_e2e.py
+++ b/tests/e2e/test_add_flush_agent_pipeline_e2e.py
@ -0,0 +1,206 @@
+"""Agent pipeline e2e: 5 SWE-bench trajectories drive /add + /flush.
+
+Drives the full HTTP route through to storage, exercising the agent-track
+pipeline (boundary → memcell → extract_agent_case → trigger_skill_clustering
+→ extract_agent_skill) with real LLM and real embedder credentials.
+
+Mixed tenancy by design (sender_id alignment from fixture):
+
+    agent_pytest  (1 session, pytest-dev/pytest-7236)      ┐ independent
+    agent_sympy   (1 session, sympy/sympy-18763)           ┘ owners
+    agent_django  (3 sessions, django/django-{14311,16255,16263})  shared
+
+Concurrency strategy (workaround for the known
+``trigger_skill_clustering`` read-modify-write race on a shared owner_id):
+
+    Phase 1: pytest + sympy concurrent via asyncio.gather (disjoint owners)
+    Phase 2: 3 django sessions sequential (same owner, would race)
+
+Once the cluster race is fixed in production, Phase 2 can collapse into
+the same gather and the test will still pass — the assertions are
+race-free, only the driver is conservative.
+
+White-box assertions (audit trail of internal surfaces touched):
+    - sqlite ``memcell`` rows per session_id
+    - filesystem ``<root>/agents/<agent>/.cases/*.md`` presence
+    - LanceDB ``agent_case`` rows by ``owner_id`` (count + session_id set)
+    - LanceDB ``agent_skill`` rows by ``owner_id`` (soft — LLM-dependent)
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from collections.abc import Awaitable, Callable
+from pathlib import Path
+
+import httpx
+import pytest
+
+from everos.infra.persistence.lancedb import agent_case_repo, agent_skill_repo
+from everos.infra.persistence.markdown import AgentCaseDailyFrontmatter
+
+_FIXTURE_DIR = Path(__file__).resolve().parents[1] / "fixtures" / "agent_trajectories"
+
+# Hand-picked trajectories (kept in-tree as fixtures; this selection is
+# the source of truth — the original converter is not in the repo).
+_PYTEST_SESSION = "session_pytest_7236"
+_SYMPY_SESSION = "session_sympy_18763"
+_DJANGO_SESSIONS = (
+    "session_django_14311",
+    "session_django_16255",
+    "session_django_16263",
+)
+
+_AGENT_PYTEST = "agent_pytest"
+_AGENT_SYMPY = "agent_sympy"
+_AGENT_DJANGO = "agent_django"
+
+# Phase 3 drain budget: OME chain (case → cluster → skill) writes md in
+# stages, each picked up by cascade. Multiple drain rounds with brief
+# sleeps let the chain quiesce without false-positive completion.
+_DRAIN_ROUNDS = 4
+_DRAIN_TIMEOUT_SECONDS = 300.0
+_DRAIN_INTER_ROUND_SLEEP_SECONDS = 5.0
+
+
+def _load_fixture(session_id: str) -> dict:
+    return json.loads((_FIXTURE_DIR / f"{session_id}.json").read_text())
+
+
+async def _drive_session(
+    client: httpx.AsyncClient, session_data: dict
+) -> tuple[str, str]:
+    """Run /add followed by /flush for one trajectory; return status."""
+    sid = session_data["everos_session_id"]
+    msgs = session_data["messages"]
+    # MessageItemDTO.max_length=500; our largest fixture has 324 messages.
+    r = await client.post(
+        "/api/v1/memory/add",
+        json={"session_id": sid, "messages": msgs},
+        timeout=600.0,
+    )
+    assert r.status_code == 200, (
+        f"{sid}: /add returned {r.status_code} — {r.text[:300]}"
+    )
+    r = await client.post(
+        "/api/v1/memory/flush",
+        json={"session_id": sid},
+        timeout=600.0,
+    )
+    assert r.status_code == 200, (
+        f"{sid}: /flush returned {r.status_code} — {r.text[:300]}"
+    )
+    return sid, r.json()["data"]["status"]
+
+
+@pytest.mark.slow
+@pytest.mark.live_llm
+async def test_agent_pipeline_e2e_mixed_tenancy(
+    async_client: httpx.AsyncClient,
+    core_pipeline_runtime: Path,
+    pipeline_done_poll: Callable[..., Awaitable[None]],
+    memcell_count: Callable[..., Awaitable[int]],
+) -> None:
+    """5 SWE-bench trajectories → agent_case + agent_skill on three agents."""
+    memory_root = core_pipeline_runtime
+
+    pytest_fx = _load_fixture(_PYTEST_SESSION)
+    sympy_fx = _load_fixture(_SYMPY_SESSION)
+    django_fxs = [_load_fixture(s) for s in _DJANGO_SESSIONS]
+
+    # ── Phase 1: independent owners concurrent ────────────────────────────
+    await asyncio.gather(
+        _drive_session(async_client, pytest_fx),
+        _drive_session(async_client, sympy_fx),
+    )
+
+    # ── Phase 2: shared owner_id, sequential to dodge cluster race ────────
+    for fx in django_fxs:
+        await _drive_session(async_client, fx)
+
+    # ── Phase 3: drain OME chain + cascade ────────────────────────────────
+    for _ in range(_DRAIN_ROUNDS):
+        await pipeline_done_poll(deadline_seconds=_DRAIN_TIMEOUT_SECONDS)
+        await asyncio.sleep(_DRAIN_INTER_ROUND_SLEEP_SECONDS)
+
+    # ── Phase 4: assertions ───────────────────────────────────────────────
+
+    # 4.1 every session produced ≥1 memcell
+    all_sessions = (_PYTEST_SESSION, _SYMPY_SESSION, *_DJANGO_SESSIONS)
+    for sid in all_sessions:
+        n = await memcell_count(sid)
+        assert n >= 1, f"no memcell for session {sid!r} (got {n})"
+
+    # 4.2 each agent has a .cases dir with ≥1 .md file
+    agents_dir = memory_root / "default_app" / "default_project" / "agents"
+    case_dir_name = AgentCaseDailyFrontmatter.DIR_NAME
+    for agent_id in (_AGENT_PYTEST, _AGENT_SYMPY, _AGENT_DJANGO):
+        case_dir = agents_dir / agent_id / case_dir_name
+        assert case_dir.is_dir(), f"missing {case_dir!s} for agent={agent_id!r}"
+        md_files = list(case_dir.glob("*.md"))
+        assert md_files, f"no agent_case md under {case_dir!s}"
+
+    # 4.3 LanceDB agent_case rows per owner
+    pytest_cases = await agent_case_repo.find_where(f"owner_id = '{_AGENT_PYTEST}'")
+    sympy_cases = await agent_case_repo.find_where(f"owner_id = '{_AGENT_SYMPY}'")
+    django_cases = await agent_case_repo.find_where(f"owner_id = '{_AGENT_DJANGO}'")
+
+    assert len(pytest_cases) >= 1, (
+        f"no agent_pytest rows in LanceDB (got {len(pytest_cases)})"
+    )
+    assert len(sympy_cases) >= 1, (
+        f"no agent_sympy rows in LanceDB (got {len(sympy_cases)})"
+    )
+    # Each django session writes at least one cell → at least one case per
+    # session. Lower bound 3 covers the minimum; LLM may produce more.
+    assert len(django_cases) >= 3, (
+        f"agent_django expected ≥3 LanceDB cases (3 sessions), got {len(django_cases)}"
+    )
+
+    # 4.4 cross-owner isolation — each agent's cases trace back only to
+    # its own sessions
+    pytest_session_ids = {c.session_id for c in pytest_cases}
+    assert pytest_session_ids == {_PYTEST_SESSION}, (
+        f"agent_pytest cases leaked across sessions: {pytest_session_ids}"
+    )
+    sympy_session_ids = {c.session_id for c in sympy_cases}
+    assert sympy_session_ids == {_SYMPY_SESSION}, (
+        f"agent_sympy cases leaked across sessions: {sympy_session_ids}"
+    )
+    django_session_ids = {c.session_id for c in django_cases}
+    assert django_session_ids == set(_DJANGO_SESSIONS), (
+        f"agent_django session set mismatch — got {django_session_ids}, "
+        f"want {set(_DJANGO_SESSIONS)}"
+    )
+
+    # 4.5 agent_skill — soft: emission depends on LLM clustering quality
+    # gate (skip_quality_threshold + cluster size). pytest/sympy are
+    # single-case clusters and may legitimately yield 0 skills. django
+    # has 3 cases and should aggregate into ≥1 cluster of size ≥2,
+    # producing ≥1 skill — but we keep this informational (LLM-dependent)
+    # rather than a hard floor to avoid flaky CI signal.
+    pytest_skills = await agent_skill_repo.find_where(f"owner_id = '{_AGENT_PYTEST}'")
+    sympy_skills = await agent_skill_repo.find_where(f"owner_id = '{_AGENT_SYMPY}'")
+    django_skills = await agent_skill_repo.find_where(f"owner_id = '{_AGENT_DJANGO}'")
+    # Hard sanity: counts non-negative (the repo isn't broken).
+    assert len(pytest_skills) >= 0
+    assert len(sympy_skills) >= 0
+    assert len(django_skills) >= 0
+
+    # 4.6 strict md ↔ LanceDB parity across every cascade kind
+    #
+    # The per-owner counts above are loose (LLM-emission-dependent); this
+    # check enforces byte-exact id-set + content_sha256 parity across
+    # every md the agent pipeline wrote.
+    #
+    # ``expect_at_least`` pins agent_case (every session writes ≥1 case)
+    # so an empty glob would fail loudly. agent_skill is NOT pinned —
+    # emission depends on the LLM clustering quality gate per 4.5; a
+    # legitimately empty agent_skill md set is still a passing run.
+    from tests._consistency_assertions import assert_md_lance_strict_consistent
+
+    await assert_md_lance_strict_consistent(
+        memory_root,
+        expect_at_least={"agent_case": 1},
+    )
--- a/tests/e2e/test_add_flush_user_pipeline_e2e.py
+++ b/tests/e2e/test_add_flush_user_pipeline_e2e.py
@ -0,0 +1,337 @@
+"""Add + Flush core pipeline smoke — long real-conversation drive.
+
+Goal: prove the user-side add/flush chain is end-to-end live. Feeds
+**419 real LoCoMo messages** through ``POST /api/v1/memory/add`` (in 19
+batches sharing one session_id) then a final ``POST /flush``, and
+verifies:
+
+1. Each /add returns a sane status and the unprocessed_buffer delta
+   matches what the service claims (accumulated → grew by batch size;
+   extracted → shrank or stayed flat).
+2. After /flush the buffer is empty and the memcell table has rows.
+3. After cascade drains, episode md files exist and LanceDB rows
+   reflect them with valid content_sha256 + vector.
+4. OME-driven async strategies have produced atomic_fact / foresight /
+   profile md files.
+
+Real LLM + real embedder (creds via ``.env``). Marked ``slow`` —
+``pytest -m slow tests/integration/test_add_flush_core_pipeline_smoke.py``.
+"""
+
+from __future__ import annotations
+
+import os
+import shutil
+from collections.abc import Awaitable, Callable
+from pathlib import Path
+
+import httpx
+import pytest
+
+from everos.infra.persistence.markdown import (
+    AtomicFactDailyFrontmatter,
+    EpisodeDailyFrontmatter,
+    ForesightDailyFrontmatter,
+)
+
+# Directory names live on the frontmatter schemas (single source of truth);
+# atomic_facts / foresights are dotfile-hidden so users only see episodes.
+_EPISODE_DIR = EpisodeDailyFrontmatter.DIR_NAME
+_ATOMIC_FACT_DIR = AtomicFactDailyFrontmatter.DIR_NAME
+_FORESIGHT_DIR = ForesightDailyFrontmatter.DIR_NAME
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _to_add_messages(batch: dict) -> list[dict]:
+    """Strip ``_audit_*`` fields; keep only what MessageItemDTO accepts."""
+    return [
+        {
+            "sender_id": m["sender_id"],
+            "role": m["role"],
+            "timestamp": m["timestamp"],
+            "content": m["content"],
+        }
+        for m in batch["messages"]
+    ]
+
+
+def _list_md_files(memory_root: Path, subpath: str) -> list[Path]:
+    """List .md files under
+    ``<memory_root>/default_app/default_project/users/<user>/<subpath>/``."""
+    user_dir = memory_root / "default_app" / "default_project" / "users"
+    if not user_dir.exists():
+        return []
+    out: list[Path] = []
+    for user_dir_child in user_dir.iterdir():
+        target = user_dir_child / subpath
+        if target.is_dir():
+            out.extend(target.rglob("*.md"))
+        elif target.with_suffix(".md").exists():
+            out.append(target.with_suffix(".md"))
+    return out
+
+
+def _count_episode_entries(md_files: list[Path]) -> int:
+    """Count ``## entry-*`` blocks across all episode md files."""
+    n = 0
+    for f in md_files:
+        for line in f.read_text().splitlines():
+            stripped = line.strip()
+            # Daily-log entries start with `## ` followed by an id token.
+            # We count any second-level heading that isn't the standard
+            # subsection headers used inside an entry.
+            if stripped.startswith("## ") and not stripped.startswith(
+                ("## Subject", "## Summary", "## Content", "## Fact", "## Foresight")
+            ):
+                n += 1
+    return n
+
+
+def _maybe_snapshot_memory_root(memory_root: Path) -> None:
+    """Copy ``memory_root`` to ``$EVEROS_KEEP_CORPUS_TO`` when set.
+
+    Used to harvest a known-good corpus (md + sqlite + lancedb three-piece
+    set) after a green test run, for later upload as the /search e2e
+    fixture. Pure sync I/O — kept out of the async test body so ASYNC240
+    doesn't complain about pathlib usage on the async path.
+    """
+    keep_to = os.environ.get("EVEROS_KEEP_CORPUS_TO")
+    if not keep_to:
+        return
+    dest = Path(keep_to).resolve()
+    if dest.exists():
+        shutil.rmtree(dest)
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copytree(memory_root, dest)
+
+
+# ---------------------------------------------------------------------------
+# The test (slow — hits real LLM + embedder; opt in via `pytest -m slow`)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.slow
+@pytest.mark.live_llm
+# Retries cover transient real-LLM flakes: OME profile clustering
+# occasionally fails to emit user.md within the cascade-drain deadline
+# (LLM timeout, empty response, or async race), but is reliably stable
+# on retry. reruns_delay leaves the cascade workers idle between
+# attempts so we don't pile state on top of a prior run.
+@pytest.mark.flaky(reruns=2, reruns_delay=5)
+async def test_long_conversation_produces_all_memory_types(
+    long_conversation: dict,
+    async_client: httpx.AsyncClient,
+    core_pipeline_runtime: Path,
+    cascade_done_poll: Callable[..., Awaitable[None]],
+    buffer_count: Callable[[str], Awaitable[int]],
+    memcell_count: Callable[..., Awaitable[int]],
+) -> None:
+    """One big seamless run: add 19 batches, flush, poll, assert everything."""
+
+    session_id = long_conversation["everos_session_id"]
+    memory_root = core_pipeline_runtime
+
+    # ── Stage 0: baseline ─────────────────────────────────────────────────
+    assert await buffer_count(session_id) == 0
+    assert await memcell_count(session_id) == 0
+
+    # ── Stage 1: drip 19 batches into /add, asserting buffer delta ────────
+    last_status: str | None = None
+
+    for idx, batch in enumerate(long_conversation["batches"]):
+        msg_count = batch["message_count"]
+
+        buf_before = await buffer_count(session_id)
+        cells_before = await memcell_count(session_id)
+
+        resp = await async_client.post(
+            "/api/v1/memory/add",
+            json={"session_id": session_id, "messages": _to_add_messages(batch)},
+            timeout=600.0,  # boundary detection may call LLM
+        )
+        assert resp.status_code == 200, (
+            f"batch {idx} ({batch['locomo_session']}): {resp.status_code} {resp.text}"
+        )
+        body = resp.json()
+        status: str = body["data"]["status"]
+        returned_count: int = body["data"]["message_count"]
+        assert status in {"accumulated", "extracted"}, body
+        assert returned_count == msg_count, body
+        last_status = status
+
+        buf_after = await buffer_count(session_id)
+        cells_after = await memcell_count(session_id)
+
+        # Buffer-delta invariants:
+        if status == "accumulated":
+            # No boundary cut → entire batch piled into the buffer.
+            assert buf_after == buf_before + msg_count, (
+                f"batch {idx} accumulated: expected buf {buf_before + msg_count}, "
+                f"got {buf_after}"
+            )
+            assert cells_after == cells_before, (
+                f"batch {idx} accumulated: memcell should not change "
+                f"({cells_before} → {cells_after})"
+            )
+        else:  # "extracted"
+            # Boundary fired: some messages turned into memcell(s), tail
+            # (if any) stays in the buffer. We can't predict the exact tail
+            # length but two invariants must hold.
+            assert cells_after > cells_before, (
+                f"batch {idx} extracted: memcell should grow "
+                f"({cells_before} → {cells_after})"
+            )
+            assert buf_after >= 0
+            # Conservation: nothing should silently vanish — the union of
+            # (buffer carry-over + this batch) must equal (new buffer +
+            # messages carved into cells). We approximate by asserting the
+            # new buffer is at most the carry-over + this batch size.
+            assert buf_after <= buf_before + msg_count, (
+                f"batch {idx} extracted: buffer overflow "
+                f"({buf_before} + {msg_count} → {buf_after})"
+            )
+
+    # ── Stage 2: flush ────────────────────────────────────────────────────
+    cells_pre_flush = await memcell_count(session_id)
+    resp = await async_client.post(
+        "/api/v1/memory/flush",
+        json={"session_id": session_id},
+        timeout=600.0,
+    )
+    assert resp.status_code == 200, resp.text
+    flush_status = resp.json()["data"]["status"]
+    assert flush_status in {"extracted", "no_extraction"}, resp.json()
+
+    assert await buffer_count(session_id) == 0, "buffer must be drained after flush"
+
+    cells_after_flush = await memcell_count(session_id)
+    # If the last /add was already 'extracted' and emptied the buffer,
+    # flush returns 'no_extraction'. Otherwise flush must produce ≥ 1
+    # cell to satisfy the boundary semantics.
+    if flush_status == "extracted":
+        assert cells_after_flush > cells_pre_flush
+
+    # 419 LoCoMo messages produce ~19 memcells in practice (LLM boundary
+    # decides semantic cuts; daily-life chat carves coarsely). Threshold
+    # 15 leaves room for run-to-run variance from the boundary LLM.
+    assert cells_after_flush >= 15, (
+        f"expected ≥ 15 memcells from 419 messages, got {cells_after_flush}; "
+        f"last add status was {last_status!r}, flush was {flush_status!r}"
+    )
+
+    # ── Stage 3 + 4: wait for cascade to drain ────────────────────────────
+    # Cascade syncs md → LanceDB. OME async strategies (atomic / foresight /
+    # profile) also write md, which then cascade picks up. So one wait on
+    # cascade-drain effectively covers both pipelines, IF OME has already
+    # emitted its strategies (which memorize.py does inline via engine.emit).
+    await cascade_done_poll(deadline_seconds=600.0)
+
+    # ── Stage 5: artifacts on disk + LanceDB ──────────────────────────────
+    # 5.1 episodes
+    episode_files = _list_md_files(memory_root, _EPISODE_DIR)
+    assert episode_files, "no episode md files written"
+    episode_entries = _count_episode_entries(episode_files)
+    # 19 memcells × 2 owners (caroline + melanie) ≈ 36 episode rows seen
+    # in practice; threshold 15 leaves variance room.
+    assert episode_entries >= 15, (
+        f"expected ≥ 15 episode entries across {len(episode_files)} files, "
+        f"got {episode_entries}"
+    )
+
+    # 5.2 episode → LanceDB
+    from everos.infra.persistence.lancedb import episode_repo
+
+    lance_episode_count = await episode_repo.count()
+    assert lance_episode_count >= 15, (
+        f"LanceDB episode rows ({lance_episode_count}) < md entries ({episode_entries})"
+    )
+
+    # 5.3 atomic_fact
+    af_files = _list_md_files(memory_root, _ATOMIC_FACT_DIR)
+    assert af_files, "no atomic_fact md files — extract_atomic_facts did not emit"
+
+    from everos.infra.persistence.lancedb import atomic_fact_repo
+
+    lance_af_count = await atomic_fact_repo.count()
+    assert lance_af_count >= 1, (
+        f"LanceDB atomic_fact rows = {lance_af_count}; expected ≥ 1"
+    )
+
+    # 5.4 foresight
+    # Foresight extractor is correctly invoked (log: ``foresights_extracted``
+    # per memcell) but daily-life chat about kids / work / hobbies rarely
+    # yields explicit future-intent statements, so count is usually 0.
+    # We assert the LanceDB table exists (count returns 0 cleanly) — not
+    # that any row was emitted.
+    from everos.infra.persistence.lancedb import foresight_repo
+
+    lance_fs_count = await foresight_repo.count()
+    assert lance_fs_count >= 0, f"foresight table broken: count={lance_fs_count}"
+
+    # 5.5 profile (md only — profile retrieval path is stub; we only assert
+    # the writer wrote something). Profile lives as a single file
+    # ``users/<user_id>/user.md`` (schema: ``UserProfileFrontmatter.PROFILE_FILENAME``).
+    from everos.infra.persistence.markdown import UserProfileFrontmatter
+
+    profile_filename = UserProfileFrontmatter.PROFILE_FILENAME
+    profile_files: list[Path] = []
+    users_root = memory_root / "default_app" / "default_project" / "users"
+    if users_root.is_dir():
+        for ud in users_root.iterdir():
+            candidate = ud / profile_filename
+            if candidate.exists():
+                profile_files.append(candidate)
+    assert profile_files, (
+        f"no {profile_filename} written — extract_user_profile / "
+        "trigger_profile_clustering did not emit"
+    )
+    # At least one profile file has non-trivial content.
+    assert any(f.read_text().strip() for f in profile_files), (
+        "all profile.md files are empty"
+    )
+
+    # ── Stage 5b: strict md ↔ LanceDB parity (every cascade kind) ─────────
+    # Counts above are looser ``>=`` checks against LLM non-determinism;
+    # here we enforce byte-exact id-set + content_sha256 parity across
+    # every md the pipeline wrote. Catches: missing rows, orphan rows,
+    # content drift between md and the indexed projection.
+    #
+    # ``expect_at_least`` pins the kinds this pipeline MUST produce so an
+    # empty glob (kind not emitted at all) fails loudly — without this
+    # guard the parity check would silently pass on zero files. Foresight
+    # is NOT pinned because the LLM frequently yields 0 future-intent
+    # statements on daily-life chat (see commentary above stage 5.4).
+    from tests._consistency_assertions import assert_md_lance_strict_consistent
+
+    await assert_md_lance_strict_consistent(
+        memory_root,
+        expect_at_least={
+            "episode": 1,
+            "atomic_fact": 1,
+            "user_profile": 1,
+        },
+    )
+
+    # ── Stage 6: optional corpus snapshot ─────────────────────────────────
+    # When ``EVEROS_KEEP_CORPUS_TO=<dest>`` is set, copy the post-test
+    # ``memory_root`` to ``<dest>`` so it can be tarred + uploaded as a
+    # test corpus for the /search e2e suite. Skipped silently when the
+    # env var is absent (default test runs don't snapshot).
+    _maybe_snapshot_memory_root(memory_root)
+
+
+# ---------------------------------------------------------------------------
+# Diagnostic: lighter smoke that doesn't depend on the long fixture, used
+# to validate the conftest fixtures themselves are wired correctly.
+# ---------------------------------------------------------------------------
+
+
+async def test_async_client_starts_and_health_responds(
+    async_client: httpx.AsyncClient,
+) -> None:
+    """Tiny smoke — proves the conftest fixture brings the app up."""
+    resp = await async_client.get("/health")
+    assert resp.status_code == 200, resp.text
--- a/tests/e2e/test_full_pipeline_timezone_e2e.py
+++ b/tests/e2e/test_full_pipeline_timezone_e2e.py
@ -0,0 +1,219 @@
+"""Real full-pipeline timezone e2e — the gold-standard anti-drift test.
+
+Exercises the **complete stack** under a display-tz switch:
+
+    POST /add  →  unprocessed_buffer  →  POST /flush
+                                            ↓
+                                 boundary detection (memcell)
+                                            ↓
+                                  markdown writer (episode.md)
+                                            ↓
+                                 cascade scanner / worker
+                                            ↓
+                                   LanceDB index (episode row)
+
+then POST /search and POST /get under display tz = Shanghai,
+switch display tz to UTC, repeat /search + /get.
+
+Pin: the **UTC instant** of every returned ``timestamp`` field is
+identical across all four renders. Only the offset / wall-clock
+changes. This is the user-facing contract of the storage-UTC discipline.
+
+Real LLM (boundary detection + episode extraction) + real embedder
+(LanceDB vector + FTS) — marked ``@slow`` ``@live_llm``.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+from collections.abc import Awaitable, Callable
+
+import httpx
+import pytest
+
+from everos.component.utils import datetime as dt_module
+from everos.component.utils.datetime import from_iso_format
+from everos.config import load_settings
+
+
+async def _switch_display_tz(monkeypatch: pytest.MonkeyPatch, tz: str) -> None:
+    """Hot-swap the display tz mid-test + drop both caches.
+
+    The ``_display_tz`` resolver and ``load_settings`` are
+    ``functools.cache``-d; missing either ``cache_clear`` would let the
+    new env var read silently no-op.
+    """
+    monkeypatch.setenv("EVEROS_MEMORY__TIMEZONE", tz)
+    load_settings.cache_clear()
+    dt_module._display_tz.cache_clear()
+
+
+@pytest.mark.slow
+@pytest.mark.live_llm
+async def test_full_pipeline_tz_switch_preserves_utc_instant(
+    async_client: httpx.AsyncClient,
+    pipeline_done_poll: Callable[..., Awaitable[None]],
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Real /add → /flush → cascade → LanceDB → /search /get under tz switch.
+
+    Steps:
+
+    1. Configure ``EVEROS_MEMORY__TIMEZONE=Asia/Shanghai``.
+    2. POST /add a single message with a pinned epoch-ms timestamp.
+    3. POST /flush — forces boundary detection to carve a memcell out
+       of the single-message buffer.
+    4. Wait for cascade to drain (md → LanceDB indexed).
+    5. POST /search + POST /get: capture episode timestamp strings.
+    6. Switch ``EVEROS_MEMORY__TIMEZONE=UTC``.
+    7. POST /search + POST /get again: capture episode timestamp strings.
+    8. Parse all four timestamp strings back to UTC instants. They must
+       all be equal. The offsets and wall-clock numbers will differ
+       between Shanghai and UTC renders — that's expected; what must
+       NOT differ is the absolute UTC instant.
+
+    Anti-drift contract is end-to-end: writes under one display tz
+    must read back under another with zero data drift.
+    """
+    user_id = "alice_full_tz"
+    session_id = "sess_full_tz"
+    # 1748498400000 ms = 2026-05-29T06:00:00Z = 2026-05-29T14:00:00+08:00
+    pinned_ms = 1748498400000
+    expected_instant = dt.datetime.fromtimestamp(pinned_ms / 1000, tz=dt.UTC)
+
+    # ── Step 1+2: configure Shanghai + write via /add ──
+    await _switch_display_tz(monkeypatch, "Asia/Shanghai")
+    resp = await async_client.post(
+        "/api/v1/memory/add",
+        json={
+            "user_id": user_id,
+            "session_id": session_id,
+            "messages": [
+                {
+                    "sender_id": user_id,
+                    "role": "user",
+                    "timestamp": pinned_ms,
+                    "content": "I love climbing in Yosemite every spring.",
+                },
+            ],
+        },
+        timeout=60.0,
+    )
+    assert resp.status_code == 200, resp.text
+
+    # ── Step 3: /flush forces boundary detection on the single-message buffer ──
+    resp = await async_client.post(
+        "/api/v1/memory/flush",
+        json={"user_id": user_id, "session_id": session_id},
+        timeout=60.0,
+    )
+    assert resp.status_code == 200, resp.text
+
+    # ── Step 4: wait for OME strategies + cascade to fully drain ──
+    # 10-minute deadline: extract_episode + extract_atomic_facts run under
+    # real LLM and the cascade worker only fires after md lands. The
+    # `pipeline_done_poll` fixture covers both OME idle and cascade queue
+    # empty.
+    await pipeline_done_poll(deadline_seconds=600.0)
+
+    # ── Step 5: /search + /get under Shanghai display tz ──
+    resp_search_sh = await async_client.post(
+        "/api/v1/memory/search",
+        json={
+            "user_id": user_id,
+            "query": "climbing",
+            "method": "keyword",  # no embedder cost; FTS index built by cascade
+            "filters": {"session_id": session_id},
+        },
+        timeout=60.0,
+    )
+    assert resp_search_sh.status_code == 200, resp_search_sh.text
+    eps_search_sh = resp_search_sh.json()["data"]["episodes"]
+    assert eps_search_sh, (
+        f"/search must return an episode after flush+cascade; got {eps_search_sh!r}"
+    )
+    ts_search_sh = eps_search_sh[0]["timestamp"]
+    assert ts_search_sh.endswith("+08:00"), (
+        f"Shanghai display tz should render offset +08:00; got {ts_search_sh!r}"
+    )
+
+    resp_get_sh = await async_client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": user_id,
+            "memory_type": "episode",
+            "page": 1,
+            "page_size": 20,
+        },
+        timeout=60.0,
+    )
+    assert resp_get_sh.status_code == 200, resp_get_sh.text
+    eps_get_sh = resp_get_sh.json()["data"]["episodes"]
+    assert eps_get_sh, "/get must return the same episode /search did"
+    ts_get_sh = eps_get_sh[0]["timestamp"]
+    assert ts_get_sh.endswith("+08:00"), ts_get_sh
+
+    # ── Step 6: switch to UTC display tz (drops caches) ──
+    await _switch_display_tz(monkeypatch, "UTC")
+
+    # ── Step 7: /search + /get again, same on-disk row, new render ──
+    resp_search_utc = await async_client.post(
+        "/api/v1/memory/search",
+        json={
+            "user_id": user_id,
+            "query": "climbing",
+            "method": "keyword",
+            "filters": {"session_id": session_id},
+        },
+        timeout=60.0,
+    )
+    assert resp_search_utc.status_code == 200, resp_search_utc.text
+    eps_search_utc = resp_search_utc.json()["data"]["episodes"]
+    assert eps_search_utc
+    ts_search_utc = eps_search_utc[0]["timestamp"]
+    assert ts_search_utc.endswith("Z") or ts_search_utc.endswith("+00:00"), (
+        f"UTC display tz should render Z / +00:00; got {ts_search_utc!r}"
+    )
+
+    resp_get_utc = await async_client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": user_id,
+            "memory_type": "episode",
+            "page": 1,
+            "page_size": 20,
+        },
+        timeout=60.0,
+    )
+    assert resp_get_utc.status_code == 200, resp_get_utc.text
+    eps_get_utc = resp_get_utc.json()["data"]["episodes"]
+    ts_get_utc = eps_get_utc[0]["timestamp"]
+    assert ts_get_utc.endswith("Z") or ts_get_utc.endswith("+00:00"), ts_get_utc
+
+    # ── Step 8: anti-drift assertion — all four UTC instants identical ──
+    instants = {
+        "search/Shanghai": from_iso_format(ts_search_sh).astimezone(dt.UTC),
+        "get/Shanghai": from_iso_format(ts_get_sh).astimezone(dt.UTC),
+        "search/UTC": from_iso_format(ts_search_utc).astimezone(dt.UTC),
+        "get/UTC": from_iso_format(ts_get_utc).astimezone(dt.UTC),
+    }
+    distinct = set(instants.values())
+    assert len(distinct) == 1, (
+        f"display-tz switch must NOT drift the UTC instant. Got distinct "
+        f"instants across renders: {instants!r}"
+    )
+    actual_instant = next(iter(distinct))
+    # Episode timestamp inherits from the last message's epoch ms — the
+    # pinned input value must round-trip exactly.
+    assert actual_instant == expected_instant, (
+        f"episode UTC instant must equal the pinned input ms epoch; "
+        f"expected {expected_instant.isoformat()}, got {actual_instant.isoformat()}"
+    )
+
+    # ── Sanity: across the four renders, identical instant projects to the
+    # correct wall-clock under each display tz ──
+    # Shanghai: 14:00 wall clock; UTC: 06:00 wall clock.
+    assert "T14:00:00" in ts_search_sh, ts_search_sh
+    assert "T14:00:00" in ts_get_sh, ts_get_sh
+    assert "T06:00:00" in ts_search_utc, ts_search_utc
+    assert "T06:00:00" in ts_get_utc, ts_get_utc
--- a/tests/e2e/test_get_endpoint_e2e.py
+++ b/tests/e2e/test_get_endpoint_e2e.py
@ -0,0 +1,829 @@
+"""End-to-end integration tests for ``POST /api/v1/memory/get``.
+
+These tests spin up the FastAPI app with **no lifespan providers**
+against a tmp ``EVEROS_MEMORY__ROOT``, populate a real LanceDB
+``episode`` table directly via the repo singleton, and exercise the
+HTTP route. They cover the wiring that unit tests cannot: pydantic
+422s from the route, JSON envelope shape, and the full
+``request → service → manager → LanceDB`` path.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import datetime as _dt
+from collections.abc import AsyncIterator
+from importlib import import_module
+from pathlib import Path
+
+import pytest
+from httpx import ASGITransport, AsyncClient
+
+from everos.config import load_settings
+from everos.entrypoints.api.app import create_app
+from everos.infra.persistence.lancedb import (
+    AgentCase,
+    AgentSkill,
+    Episode,
+    UserProfile,
+    agent_case_repo,
+    agent_skill_repo,
+    episode_repo,
+    lancedb_manager,
+    user_profile_repo,
+)
+
+# ``everos.service.__init__`` re-exports the ``get`` function under the
+# same name as the submodule (``from .get import get as get``), which
+# shadows the submodule when imported normally. Pull the actual module
+# via importlib so the test can poke at its ``_manager`` singleton.
+get_service_mod = import_module("everos.service.get")
+
+
+def _ts(day: int) -> _dt.datetime:
+    return _dt.datetime(2026, 1, day, tzinfo=_dt.UTC)
+
+
+def _episode(
+    entry: str,
+    *,
+    owner: str = "u1",
+    session: str = "sess_a",
+    parent_id: str = "mc_1",
+    sender_ids: list[str] | None = None,
+    day: int = 1,
+) -> Episode:
+    return Episode(
+        id=f"{owner}_{entry}",
+        entry_id=entry,
+        owner_id=owner,
+        owner_type="user",
+        session_id=session,
+        timestamp=_ts(day),
+        parent_type="memcell",
+        parent_id=parent_id,
+        sender_ids=sender_ids if sender_ids is not None else [owner, "assistant"],
+        subject=f"subj {entry}",
+        summary=f"summary {entry}",
+        episode=f"body of {entry}",
+        episode_tokens=f"body of {entry}",
+        md_path=f"users/{owner}/episodes/{entry}.md",
+        content_sha256="abc",
+        vector=[0.0] * 1024,
+    )
+
+
+def _agent_case(
+    entry: str,
+    *,
+    owner: str = "a1",
+    session: str = "sess_x",
+    day: int = 1,
+) -> AgentCase:
+    return AgentCase(
+        id=f"{owner}_{entry}",
+        entry_id=entry,
+        owner_id=owner,
+        owner_type="agent",
+        session_id=session,
+        timestamp=_ts(day),
+        parent_type="memcell",
+        parent_id="mc_99",
+        quality_score=0.8,
+        task_intent=f"intent {entry}",
+        task_intent_tokens=f"intent {entry}",
+        approach=f"approach {entry}",
+        approach_tokens=f"approach {entry}",
+        key_insight=None,
+        md_path=f"agents/{owner}/cases/{entry}.md",
+        content_sha256="abc",
+        vector=[0.0] * 1024,
+    )
+
+
+def _agent_skill(
+    name: str,
+    *,
+    owner: str = "a1",
+) -> AgentSkill:
+    return AgentSkill(
+        id=f"{owner}_{name}",
+        owner_id=owner,
+        owner_type="agent",
+        name=name,
+        description=f"desc {name}",
+        description_tokens=f"desc {name}",
+        content=f"content {name}",
+        content_tokens=f"content {name}",
+        confidence=0.9,
+        maturity_score=0.7,
+        source_case_ids=[f"{owner}_ac_1"],
+        md_path=f"agents/{owner}/skills/{name}/SKILL.md",
+        content_sha256="abc",
+        vector=[0.0] * 1024,
+    )
+
+
+@pytest.fixture
+async def client(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> AsyncIterator[AsyncClient]:
+    """Build the FastAPI app against a tmp memory root with no lifespan."""
+    monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
+    load_settings.cache_clear()
+
+    # Reset every module-level singleton the get-path touches.
+    lancedb_manager._conn = None
+    lancedb_manager._tables.clear()
+    get_service_mod._manager = None
+
+    app = create_app(lifespan_providers=[])
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as c:
+        yield c
+
+    await lancedb_manager.dispose_connection()
+    load_settings.cache_clear()
+
+
+# ── Happy path ──────────────────────────────────────────────────────────
+
+
+async def test_get_episodes_returns_page_and_total(
+    client: AsyncClient,
+) -> None:
+    """5 rows in, page_size=2 → 2 episodes back + total_count=5."""
+    await episode_repo.add(
+        [_episode(f"ep_{i:03d}", day=i) for i in range(1, 6)],
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "page": 1,
+            "page_size": 2,
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    rid = body["request_id"]
+    assert len(rid) == 32 and all(c in "0123456789abcdef" for c in rid)
+    data = body["data"]
+    assert data["total_count"] == 5
+    assert data["count"] == 2
+    assert len(data["episodes"]) == 2
+    # default sort = timestamp DESC → highest day first
+    assert data["episodes"][0]["id"] == "u1_ep_005"
+    assert data["episodes"][1]["id"] == "u1_ep_004"
+    # The non-requested kinds are empty arrays (envelope invariant).
+    assert data["profiles"] == []
+    assert data["agent_cases"] == []
+    assert data["agent_skills"] == []
+
+
+async def test_get_episodes_filtered_by_session_id(
+    client: AsyncClient,
+) -> None:
+    """Filter narrows results to the matching ``session_id`` only."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", session="sess_a"),
+            _episode("ep_002", session="sess_a"),
+            _episode("ep_003", session="sess_b"),
+        ],
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "filters": {"session_id": "sess_a"},
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["data"]["total_count"] == 2
+    assert body["data"]["count"] == 2
+    ids = {ep["id"] for ep in body["data"]["episodes"]}
+    assert ids == {"u1_ep_001", "u1_ep_002"}
+
+
+async def test_get_empty_returns_zero_counts(client: AsyncClient) -> None:
+    """An owner with no rows yields total_count=0 + empty episodes list."""
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "ghost",
+            "memory_type": "episode",
+        },
+    )
+    assert resp.status_code == 200
+    data = resp.json()["data"]
+    assert data["total_count"] == 0
+    assert data["count"] == 0
+    assert data["episodes"] == []
+
+
+async def test_get_profile_miss_returns_empty(client: AsyncClient) -> None:
+    """Cold start (no profile row) → ``profiles=[]`` / ``total_count=0``."""
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "profile",
+        },
+    )
+    assert resp.status_code == 200
+    data = resp.json()["data"]
+    assert data["profiles"] == []
+    assert data["total_count"] == 0
+
+
+async def test_get_profile_returns_seeded_row(client: AsyncClient) -> None:
+    """A profile row in the ``user_profile`` table is returned + json-decoded.
+
+    Full-stack: seed the LanceDB ``user_profile`` table (as cascade would
+    from ``users/u1/user.md``), then read it back through the HTTP route.
+    White-box surface: ``user_profile_repo`` (the same table /search's
+    ``include_profile`` reads).
+    """
+    await user_profile_repo.add(
+        [
+            UserProfile(
+                id="u1",
+                owner_id="u1",
+                owner_type="user",
+                app_id="default",
+                project_id="default",
+                summary="u1 loves climbing in Yosemite",
+                explicit_info_json='[{"category": "Hobby", "description": "climbing"}]',
+                implicit_traits_json='[{"trait": "Outdoorsy"}]',
+                profile_timestamp_ms=1780304400000,
+                md_path="users/u1/user.md",
+                content_sha256="abc",
+            )
+        ]
+    )
+
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={"user_id": "u1", "memory_type": "profile"},
+    )
+    assert resp.status_code == 200
+    data = resp.json()["data"]
+    assert data["total_count"] == 1
+    assert data["count"] == 1
+    assert len(data["profiles"]) == 1
+    prof = data["profiles"][0]
+    assert prof["id"] == "u1"
+    assert prof["user_id"] == "u1"
+    assert prof["profile_data"]["summary"] == "u1 loves climbing in Yosemite"
+    assert prof["profile_data"]["explicit_info"] == [
+        {"category": "Hobby", "description": "climbing"}
+    ]
+    assert prof["profile_data"]["implicit_traits"] == [{"trait": "Outdoorsy"}]
+
+
+# ── Pagination + sort ───────────────────────────────────────────────────
+
+
+async def test_get_episodes_page_two_returns_correct_slice(
+    client: AsyncClient,
+) -> None:
+    """5 rows / page_size=2 / page=2 → middle slice (rows 3 + 4 by DESC ts)."""
+    await episode_repo.add(
+        [_episode(f"ep_{i:03d}", day=i) for i in range(1, 6)],
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "page": 2,
+            "page_size": 2,
+        },
+    )
+    assert resp.status_code == 200
+    data = resp.json()["data"]
+    assert data["total_count"] == 5
+    assert data["count"] == 2
+    # default sort = timestamp DESC; page 2 of 2-per-page over 5 rows →
+    # rows at offsets 2,3 → day=3, day=2 (1-indexed: ep_003, ep_002).
+    assert [ep["id"] for ep in data["episodes"]] == ["u1_ep_003", "u1_ep_002"]
+
+
+async def test_get_episodes_sort_order_asc(client: AsyncClient) -> None:
+    """``sort_order=asc`` flips the order (oldest first)."""
+    await episode_repo.add(
+        [_episode(f"ep_{i:03d}", day=i) for i in range(1, 4)],
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "sort_order": "asc",
+        },
+    )
+    assert resp.status_code == 200
+    ids = [ep["id"] for ep in resp.json()["data"]["episodes"]]
+    assert ids == ["u1_ep_001", "u1_ep_002", "u1_ep_003"]
+
+
+# ── Agent-side kinds ────────────────────────────────────────────────────
+
+
+async def test_get_agent_cases_happy_path(client: AsyncClient) -> None:
+    """``agent_case`` listing returns shaped items, populates only that array."""
+    await agent_case_repo.add(
+        [_agent_case(f"ac_{i:03d}", day=i) for i in range(1, 4)],
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "agent_id": "a1",
+            "memory_type": "agent_case",
+        },
+    )
+    assert resp.status_code == 200
+    data = resp.json()["data"]
+    assert data["total_count"] == 3
+    assert data["count"] == 3
+    assert [c["id"] for c in data["agent_cases"]] == [
+        "a1_ac_003",
+        "a1_ac_002",
+        "a1_ac_001",
+    ]
+    # Cross-kind envelope stays empty.
+    assert data["episodes"] == []
+    assert data["agent_skills"] == []
+    # AgentCase item shape — score absent (vs SearchAgentCaseItem),
+    # quality_score round-trips.
+    first = data["agent_cases"][0]
+    assert "score" not in first
+    assert first["quality_score"] == 0.8
+    assert first["agent_id"] == "a1"
+
+
+async def test_get_agent_cases_filtered_by_session(client: AsyncClient) -> None:
+    """Filter narrows ``agent_case`` rows to the session."""
+    await agent_case_repo.add(
+        [
+            _agent_case("ac_001", session="sess_x"),
+            _agent_case("ac_002", session="sess_x"),
+            _agent_case("ac_003", session="sess_y"),
+        ]
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "agent_id": "a1",
+            "memory_type": "agent_case",
+            "filters": {"session_id": "sess_x"},
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["data"]["total_count"] == 2
+    ids = {c["id"] for c in body["data"]["agent_cases"]}
+    assert ids == {"a1_ac_001", "a1_ac_002"}
+
+
+async def test_get_agent_skills_happy_path(client: AsyncClient) -> None:
+    """``agent_skill`` listing — sort silently uses ``updated_at``."""
+    await agent_skill_repo.add(
+        [_agent_skill(name) for name in ("planner", "summariser")],
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "agent_id": "a1",
+            "memory_type": "agent_skill",
+        },
+    )
+    assert resp.status_code == 200
+    data = resp.json()["data"]
+    assert data["total_count"] == 2
+    names = {s["name"] for s in data["agent_skills"]}
+    assert names == {"planner", "summariser"}
+
+
+async def test_get_agent_skills_sort_by_timestamp_silently_downgraded(
+    client: AsyncClient,
+) -> None:
+    """Explicit ``sort_by=timestamp`` does not 500 — manager rewrites to
+    ``updated_at`` (the only temporal column on ``agent_skill``)."""
+    await agent_skill_repo.add([_agent_skill("planner")])
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "agent_id": "a1",
+            "memory_type": "agent_skill",
+            "sort_by": "timestamp",
+        },
+    )
+    assert resp.status_code == 200
+    assert resp.json()["data"]["total_count"] == 1
+
+
+# ── Filter coverage end-to-end ──────────────────────────────────────────
+
+
+async def test_get_episodes_filtered_by_ne_session(client: AsyncClient) -> None:
+    """``ne`` op on a str field excludes matching rows end-to-end."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", session="sess_a"),
+            _episode("ep_002", session="sess_internal"),
+            _episode("ep_003", session="sess_b"),
+        ]
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "filters": {"session_id": {"ne": "sess_internal"}},
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["data"]["total_count"] == 2
+    ids = {ep["id"] for ep in body["data"]["episodes"]}
+    assert ids == {"u1_ep_001", "u1_ep_003"}
+
+
+async def test_get_episodes_filtered_by_iso_timestamp(
+    client: AsyncClient,
+) -> None:
+    """ISO 8601 string timestamp literal is accepted alongside epoch ms."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", day=1),  # 2026-01-01
+            _episode("ep_002", day=5),  # 2026-01-05
+            _episode("ep_003", day=9),  # 2026-01-09
+        ]
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "filters": {"timestamp": {"gte": "2026-01-04T00:00:00+00:00"}},
+        },
+    )
+    assert resp.status_code == 200
+    ids = {ep["id"] for ep in resp.json()["data"]["episodes"]}
+    assert ids == {"u1_ep_002", "u1_ep_003"}
+
+
+async def test_get_episodes_filtered_by_parent_id(client: AsyncClient) -> None:
+    """Core use case: every episode derived from one memcell."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", parent_id="mc_target"),
+            _episode("ep_002", parent_id="mc_target"),
+            _episode("ep_003", parent_id="mc_other"),
+        ]
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "filters": {"parent_id": "mc_target"},
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["data"]["total_count"] == 2
+    ids = {ep["id"] for ep in body["data"]["episodes"]}
+    assert ids == {"u1_ep_001", "u1_ep_002"}
+
+
+async def test_get_episodes_filtered_by_sender_id_in(
+    client: AsyncClient,
+) -> None:
+    """``sender_id: {"in": [...]}`` → ``array_has(sender_ids, ...) OR ...``."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", sender_ids=["alice", "assistant"]),
+            _episode("ep_002", sender_ids=["bob", "assistant"]),
+            _episode("ep_003", sender_ids=["carol", "assistant"]),
+        ]
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "filters": {"sender_id": {"in": ["alice", "bob"]}},
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["data"]["total_count"] == 2
+    ids = {ep["id"] for ep in body["data"]["episodes"]}
+    assert ids == {"u1_ep_001", "u1_ep_002"}
+
+
+async def test_get_episodes_nested_and_inside_or(client: AsyncClient) -> None:
+    """Nested ``AND`` inside ``OR`` — parity with /search combinator semantics."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", session="sess_a", parent_id="mc_target"),
+            _episode("ep_002", session="sess_a", parent_id="mc_other"),
+            _episode("ep_003", session="sess_b", parent_id="mc_target"),
+            _episode("ep_004", session="sess_c", parent_id="mc_other"),
+        ]
+    )
+    # (session=sess_a AND parent_id=mc_target)
+    #   OR (parent_id=mc_other AND session=sess_c)
+    # → ep_001 + ep_004
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "filters": {
+                "OR": [
+                    {
+                        "AND": [
+                            {"session_id": "sess_a"},
+                            {"parent_id": "mc_target"},
+                        ]
+                    },
+                    {
+                        "AND": [
+                            {"parent_id": "mc_other"},
+                            {"session_id": "sess_c"},
+                        ]
+                    },
+                ]
+            },
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["data"]["total_count"] == 2
+    ids = {ep["id"] for ep in body["data"]["episodes"]}
+    assert ids == {"u1_ep_001", "u1_ep_004"}
+
+
+# ── Filter combinators (200 — happy path) ──────────────────────────────
+# Pure 422 / validation cases moved to
+# tests/unit/test_entrypoints/test_api/test_routes/test_get_route_validation.py
+
+
+async def test_get_top_level_and_or_compiles_and_filters(
+    client: AsyncClient,
+) -> None:
+    """``AND`` / ``OR`` combinators are accepted (parity with /search)."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", session="sess_a"),
+            _episode("ep_002", session="sess_b"),
+            _episode("ep_003", session="sess_c"),
+        ],
+    )
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "filters": {"OR": [{"session_id": "sess_a"}, {"session_id": "sess_b"}]},
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["data"]["total_count"] == 2
+    ids = {ep["id"] for ep in body["data"]["episodes"]}
+    assert ids == {"u1_ep_001", "u1_ep_002"}
+
+
+async def test_get_episodes_filtered_by_timestamp_range(
+    client: AsyncClient,
+) -> None:
+    """``timestamp: {gte, lt}`` — same-field double op compiles to implicit AND."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", day=1),  # 2026-01-01
+            _episode("ep_002", day=3),  # 2026-01-03
+            _episode("ep_003", day=5),  # 2026-01-05
+            _episode("ep_004", day=7),  # 2026-01-07
+            _episode("ep_005", day=9),  # 2026-01-09
+        ]
+    )
+    # Window [Jan 3, Jan 7) → ep_002 + ep_003 (Jan 7 excluded by `lt`).
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "filters": {
+                "timestamp": {
+                    "gte": "2026-01-03T00:00:00+00:00",
+                    "lt": "2026-01-07T00:00:00+00:00",
+                }
+            },
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["data"]["total_count"] == 2
+    ids = {ep["id"] for ep in body["data"]["episodes"]}
+    assert ids == {"u1_ep_002", "u1_ep_003"}
+
+
+async def test_get_episodes_top_level_and_filter(client: AsyncClient) -> None:
+    """Explicit top-level ``AND`` — distinct from implicit multi-field AND."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", session="sess_a", parent_id="mc_target"),
+            _episode("ep_002", session="sess_a", parent_id="mc_other"),
+            _episode("ep_003", session="sess_b", parent_id="mc_target"),
+        ]
+    )
+    # session=sess_a AND parent_id=mc_target → ep_001 only
+    resp = await client.post(
+        "/api/v1/memory/get",
+        json={
+            "user_id": "u1",
+            "memory_type": "episode",
+            "filters": {
+                "AND": [
+                    {"session_id": "sess_a"},
+                    {"parent_id": "mc_target"},
+                ]
+            },
+        },
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["data"]["total_count"] == 1
+    assert body["data"]["episodes"][0]["id"] == "u1_ep_001"
+
+
+# ── max_fetch limit trigger ─────────────────────────────────────────────
+
+
+async def test_get_truncates_above_max_fetch(
+    client: AsyncClient,
+    monkeypatch: pytest.MonkeyPatch,
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """Filter matches > ``max_fetch`` rows → chassis emits warning + page
+    contents come from the truncated prefix; ``total_count`` is still the
+    *true* match count (``count_rows`` ignores ``max_fetch``).
+
+    Injects a low ``max_fetch=5`` by wrapping the bound method so the
+    end-to-end path runs through the truncation branch without populating
+    20k+ rows.
+    """
+    # The e2e ``client`` fixture builds the app without lifespan providers,
+    # so ``configure_logging`` (normally invoked by the CLI entry) never
+    # runs. Call it here so the structlog → stdlib logging bridge is
+    # wired up and ``caplog`` can observe the chassis warning.
+    from everos.core.observability.logging import configure_logging
+
+    configure_logging(level="WARNING")
+
+    await episode_repo.add(
+        [_episode(f"ep_{i:03d}", day=i) for i in range(1, 11)],
+    )
+    original = episode_repo.find_where_paginated
+
+    async def low_cap(*args: object, **kwargs: object) -> object:
+        kwargs["max_fetch"] = 5
+        return await original(*args, **kwargs)  # type: ignore[arg-type]
+
+    monkeypatch.setattr(episode_repo, "find_where_paginated", low_cap)
+
+    with caplog.at_level("WARNING"):
+        resp = await client.post(
+            "/api/v1/memory/get",
+            json={
+                "user_id": "u1",
+                "memory_type": "episode",
+                "page": 1,
+                "page_size": 3,
+            },
+        )
+    assert resp.status_code == 200
+    body = resp.json()
+    # True row count is still 10, even though only 5 made it into the sort.
+    assert body["data"]["total_count"] == 10
+    assert body["data"]["count"] == 3
+    # structlog now routes through stdlib's root logger (see
+    # ``core/observability/logging/factory.py``); the warning surfaces via
+    # the standard ``caplog`` fixture rather than direct stdout capture.
+    assert "find_where_paginated truncated" in caplog.text
+
+
+# ── Concurrency ─────────────────────────────────────────────────────────
+
+
+async def test_get_concurrent_owners_no_cross_contamination(
+    client: AsyncClient,
+) -> None:
+    """Concurrent /get requests against different ``owner_id`` partitions
+    return only their own rows. ``GetManager`` is a lazy singleton —
+    this also exercises first-request lazy-init under contention."""
+    await episode_repo.add(
+        [
+            _episode("ep_001", owner="u1"),
+            _episode("ep_002", owner="u1"),
+            _episode("ep_001", owner="u2"),
+            _episode("ep_001", owner="u3"),
+        ]
+    )
+
+    async def query(owner: str) -> dict[str, object]:
+        resp = await client.post(
+            "/api/v1/memory/get",
+            json={
+                "user_id": owner,
+                "memory_type": "episode",
+            },
+        )
+        assert resp.status_code == 200, f"{owner}: {resp.text}"
+        return resp.json()
+
+    bodies = await asyncio.gather(
+        query("u1"),
+        query("u2"),
+        query("u3"),
+    )
+    u1, u2, u3 = bodies
+    assert u1["data"]["total_count"] == 2  # type: ignore[index]
+    assert u2["data"]["total_count"] == 1  # type: ignore[index]
+    assert u3["data"]["total_count"] == 1  # type: ignore[index]
+    assert {ep["id"] for ep in u1["data"]["episodes"]} == {  # type: ignore[index]
+        "u1_ep_001",
+        "u1_ep_002",
+    }
+    assert {ep["id"] for ep in u2["data"]["episodes"]} == {"u2_ep_001"}  # type: ignore[index]
+    assert {ep["id"] for ep in u3["data"]["episodes"]} == {"u3_ep_001"}  # type: ignore[index]
+
+
+async def test_get_concurrent_different_memory_types(client: AsyncClient) -> None:
+    """Concurrent /get on different ``memory_type`` (episode + agent_case +
+    agent_skill) returns each kind in its own envelope slot, with no
+    cross-array bleed."""
+    await episode_repo.add([_episode("ep_001", owner="u1")])
+    await agent_case_repo.add([_agent_case("ac_001", owner="a1")])
+    await agent_skill_repo.add([_agent_skill("planner", owner="a1")])
+
+    async def query(payload: dict[str, object]) -> dict[str, object]:
+        resp = await client.post("/api/v1/memory/get", json=payload)
+        assert resp.status_code == 200, resp.text
+        return resp.json()
+
+    ep_body, case_body, skill_body = await asyncio.gather(
+        query({"user_id": "u1", "memory_type": "episode"}),
+        query(
+            {
+                "agent_id": "a1",
+                "memory_type": "agent_case",
+            }
+        ),
+        query(
+            {
+                "agent_id": "a1",
+                "memory_type": "agent_skill",
+            }
+        ),
+    )
+    # Episode envelope: only ``episodes`` populated.
+    assert len(ep_body["data"]["episodes"]) == 1  # type: ignore[index]
+    assert ep_body["data"]["agent_cases"] == []  # type: ignore[index]
+    assert ep_body["data"]["agent_skills"] == []  # type: ignore[index]
+    # Case envelope: only ``agent_cases`` populated.
+    assert len(case_body["data"]["agent_cases"]) == 1  # type: ignore[index]
+    assert case_body["data"]["episodes"] == []  # type: ignore[index]
+    # Skill envelope: only ``agent_skills`` populated.
+    assert len(skill_body["data"]["agent_skills"]) == 1  # type: ignore[index]
+    assert skill_body["data"]["episodes"] == []  # type: ignore[index]
+
+
+async def test_get_concurrent_lazy_init_builds_one_manager(
+    client: AsyncClient,
+) -> None:
+    """The lazy singleton survives first-request contention — N concurrent
+    requests against a virgin manager all succeed and leave one instance."""
+    # ``client`` fixture already reset _manager to None.
+    assert get_service_mod._manager is None
+    await episode_repo.add([_episode("ep_001")])
+
+    payload = {
+        "user_id": "u1",
+        "memory_type": "episode",
+    }
+    results = await asyncio.gather(
+        *(client.post("/api/v1/memory/get", json=payload) for _ in range(8))
+    )
+    assert all(r.status_code == 200 for r in results)
+    # After the storm, exactly one manager instance is cached.
+    assert get_service_mod._manager is not None
--- a/tests/e2e/test_multimodal_add_e2e.py
+++ b/tests/e2e/test_multimodal_add_e2e.py
@ -0,0 +1,140 @@
+"""E2E: multimodal /add parses HTML (base64) and http(s) uri end-to-end.
+
+Scope: full HTTP stack (``create_app()`` + ``AsyncClient``) → ingest →
+multimodal parse → unprocessed_buffer. Proves the three paths the unit
+tests can only mock:
+
+1. ``type="html"`` + base64 + ``ext="html"`` — the normal HTML-file call.
+2. ``type="html"`` + ``https`` uri — everalgo fetches the page and
+   dispatches by the response Content-Type.
+3. ``type="html"`` + ``file://`` uri — EverOS reads the file locally and
+   hands everalgo hydrated bytes (the library never touches the fs).
+
+Real multimodal LLM (creds via ``.env``) + real public internet, so the
+module is marked ``live_llm``. Skipped when the ``[multimodal]`` extra is
+absent.
+
+White-box surface: reads the ``text`` column of ``unprocessed_buffer``
+(the derived text the ingest stage produced from the parsed content) to
+assert the parsed payload actually flowed into the buffer.
+"""
+
+from __future__ import annotations
+
+import base64
+from pathlib import Path
+
+import httpx
+import pytest
+from sqlalchemy import text as sql_text
+
+pytest.importorskip("everalgo.parser")
+
+pytestmark = pytest.mark.live_llm
+
+
+async def _buffer_text(session_id: str) -> str:
+    """Concatenated derived ``text`` of all buffer rows for a session."""
+    from everos.infra.persistence.sqlite import get_engine
+
+    async with get_engine().connect() as conn:
+        rows = (
+            await conn.execute(
+                sql_text("SELECT text FROM unprocessed_buffer WHERE session_id = :sid"),
+                {"sid": session_id},
+            )
+        ).all()
+    return "\n".join(str(r[0]) for r in rows)
+
+
+async def test_add_html_base64_parsed_into_buffer(
+    async_client: httpx.AsyncClient,
+) -> None:
+    """A base64 HTML file is parsed and its text lands in the buffer."""
+    html = (
+        b"<html><body><h1>Release</h1>"
+        b"<p>Version 9.9.9 ships Dark Mode.</p></body></html>"
+    )
+    sid = "e2e-mm-html-b64"
+    resp = await async_client.post(
+        "/api/v1/memory/add",
+        json={
+            "session_id": sid,
+            "messages": [
+                {
+                    "sender_id": "alice",
+                    "role": "user",
+                    "timestamp": 1780304400000,
+                    "content": [
+                        {
+                            "type": "html",
+                            "base64": base64.b64encode(html).decode(),
+                            "ext": "html",
+                            "name": "notes.html",
+                        }
+                    ],
+                }
+            ],
+        },
+    )
+    assert resp.status_code == 200, resp.text
+
+    buffered = await _buffer_text(sid)
+    assert "9.9.9" in buffered
+
+
+async def test_add_html_https_uri_parsed_into_buffer(
+    async_client: httpx.AsyncClient,
+) -> None:
+    """An https uri is fetched + parsed and its text lands in the buffer."""
+    sid = "e2e-mm-html-uri"
+    resp = await async_client.post(
+        "/api/v1/memory/add",
+        json={
+            "session_id": sid,
+            "messages": [
+                {
+                    "sender_id": "alice",
+                    "role": "user",
+                    "timestamp": 1780304400000,
+                    "content": [{"type": "html", "uri": "https://example.com"}],
+                }
+            ],
+        },
+    )
+    assert resp.status_code == 200, resp.text
+
+    buffered = await _buffer_text(sid)
+    assert "example domain" in buffered.lower()
+
+
+async def test_add_html_file_uri_parsed_into_buffer(
+    async_client: httpx.AsyncClient,
+    tmp_path: Path,
+) -> None:
+    """A file:// html asset is read locally (hydrated) + parsed into buffer.
+
+    Exercises EverOS-side file:// support: the parser receives bytes, never
+    the path. Default allowlist is empty (local-first) so the temp file reads.
+    """
+    doc = tmp_path / "release.html"
+    doc.write_text("<html><body><p>Version 9.9.9 ships Dark Mode.</p></body></html>")
+    sid = "e2e-mm-html-file"
+    resp = await async_client.post(
+        "/api/v1/memory/add",
+        json={
+            "session_id": sid,
+            "messages": [
+                {
+                    "sender_id": "alice",
+                    "role": "user",
+                    "timestamp": 1780304400000,
+                    "content": [{"type": "html", "uri": f"file://{doc}"}],
+                }
+            ],
+        },
+    )
+    assert resp.status_code == 200, resp.text
+
+    buffered = await _buffer_text(sid)
+    assert "9.9.9" in buffered
--- a/tests/e2e/test_openapi_endpoint_matches_docs.py
+++ b/tests/e2e/test_openapi_endpoint_matches_docs.py
@ -0,0 +1,87 @@
+"""Belt-and-braces gate: dev-mode ``GET /openapi.json`` ≡ ``docs/openapi.json``.
+
+The lint-time ``make check-openapi`` already diffs ``app.openapi()``
+against the committed ``docs/openapi.json``. This e2e test closes the
+remaining theoretical gap: if anyone ever adds a *lifespan-mutated*
+OpenAPI schema (e.g. ``app.openapi_schema = ...`` inside a startup
+handler), the in-memory ``app.openapi()`` and the runtime
+``GET /openapi.json`` response would diverge — the lint gate would
+miss it, but this test wouldn't.
+
+How:
+
+1. Force ``ENV=DEV`` so the ``openapi_url`` route is enabled.
+2. Construct the app via ``create_app(lifespan_providers=[])`` to skip
+   SQLite / LanceDB / OME (the schema is route-driven, not state-
+   driven) — but *do* run the lifespan context, so any startup hook
+   that mutates ``app.openapi_schema`` is exercised.
+3. ``GET /openapi.json`` through ``httpx.AsyncClient``.
+4. Diff against ``docs/openapi.json`` byte-for-byte (after JSON
+   normalisation to defeat ordering nondeterminism).
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+
+import httpx
+import pytest
+
+_REPO_ROOT = Path(__file__).resolve().parents[2]
+_COMMITTED_OPENAPI = _REPO_ROOT / "docs" / "openapi.json"
+
+
+async def test_dev_mode_openapi_endpoint_matches_committed_docs(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Runtime ``GET /openapi.json`` (dev mode) must equal ``docs/openapi.json``."""
+    # The gate's own committed snapshot must exist — otherwise the dev
+    # workflow ``make openapi`` has been skipped.
+    assert _COMMITTED_OPENAPI.is_file(), (
+        f"{_COMMITTED_OPENAPI} not found — run `make openapi`"
+    )
+
+    # Force dev-mode so ``openapi_url="/openapi.json"`` is registered.
+    monkeypatch.setenv("ENV", "DEV")
+
+    from everos.entrypoints.api.app import create_app
+
+    app = create_app(lifespan_providers=[])
+    transport = httpx.ASGITransport(app=app)
+    async with (
+        app.router.lifespan_context(app),
+        httpx.AsyncClient(transport=transport, base_url="http://test") as client,
+    ):
+        resp = await client.get("/openapi.json")
+    assert resp.status_code == 200, resp.text
+    runtime_schema = resp.json()
+
+    committed_schema = json.loads(_COMMITTED_OPENAPI.read_text(encoding="utf-8"))
+
+    if runtime_schema != committed_schema:
+        # Emit a concise diff to help locate the drift cause.
+        import difflib
+
+        runtime_rendered = json.dumps(runtime_schema, indent=2, ensure_ascii=False)
+        committed_rendered = json.dumps(committed_schema, indent=2, ensure_ascii=False)
+        diff = "\n".join(
+            list(
+                difflib.unified_diff(
+                    committed_rendered.splitlines(),
+                    runtime_rendered.splitlines(),
+                    fromfile="docs/openapi.json (committed)",
+                    tofile="GET /openapi.json (runtime)",
+                    lineterm="",
+                )
+            )[:120]
+        )
+        raise AssertionError(
+            "runtime /openapi.json drifts from docs/openapi.json; "
+            "run `make openapi` and commit the result.\n\n" + diff
+        )
+
+
+# Keep ``os`` legit in case future scenarios need direct env reads.
+_ = os
--- a/tests/e2e/test_search_endpoint_e2e.py
+++ b/tests/e2e/test_search_endpoint_e2e.py