chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/tests/integration/search/init.py
+++ b/tests/integration/search/init.py
--- a/tests/integration/search/_helpers.py
+++ b/tests/integration/search/_helpers.py
@ -0,0 +1,269 @@
+"""Private helpers shared across the search e2e tests.
+
+* :func:`pick_query_seeds` — scans the session corpus's
+  ``.atomic_facts/`` md files and returns a list of
+  ``(owner_id, fact_text)`` tuples to use as deterministic search
+  queries. Bootstrapping queries off the corpus's own extraction
+  output gives us a closed-loop correctness signal — what was
+  written should be findable.
+
+* :func:`assert_recall` — the canonical "this search returned at
+  least one sensible hit for ``owner``" assertion bundle. Used by
+  the keyword / vector / hybrid recall tests so the assertion logic
+  is in one place.
+
+* :func:`flatten_hits` — collapses ``SearchData``'s four arrays into
+  one ``(owner_id, score, text)`` tuple list for relevance checks.
+
+The helpers do **not** hardcode topical keywords ("hiking" / "work")
+— they are derived from what the pipeline produced. This keeps the
+suite stable across LLM-driven boundary-cut variance.
+"""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+from typing import Any
+
+import httpx
+
+# Cap how many fact strings we sample per call — running every test
+# against every fact would blow the LLM rerank budget.
+_DEFAULT_SEED_LIMIT = 3
+
+# Tokenise on word characters; lowercase; drop short tokens that carry
+# no signal for the "content overlap" check.
+_TOKEN_RE = re.compile(r"\w+", re.UNICODE)
+_MIN_TOKEN_LEN = 3
+_STOPWORDS: frozenset[str] = frozenset(
+    {
+        "the",
+        "and",
+        "for",
+        "that",
+        "with",
+        "this",
+        "was",
+        "has",
+        "have",
+        "are",
+        "but",
+        "from",
+        "you",
+        "she",
+        "her",
+        "his",
+        "him",
+        "they",
+        "them",
+        "their",
+    }
+)
+
+
+# ── Query seed extraction ───────────────────────────────────────────────
+
+
+def pick_query_seeds(
+    memory_root: Path,
+    *,
+    limit: int = _DEFAULT_SEED_LIMIT,
+) -> list[tuple[str, str]]:
+    """Sample ``(owner_id, fact_text)`` tuples from atomic_facts md files.
+
+    Walks ``users/<owner>/.atomic_facts/atomic_fact-*.md`` and parses
+    the ``## Fact\\n<text>`` sections inside each daily-log entry.
+    Returns deterministic seeds (insertion order of ``rglob`` is
+    sort-stable thanks to the explicit ``sorted`` call) so a flaky
+    test surfaces a real regression, not query-rotation variance.
+
+    Raises:
+        AssertionError: if no facts were extracted — that's a fixture
+            failure, not a test failure, and should fail loudly.
+    """
+    seeds: list[tuple[str, str]] = []
+    users_dir = memory_root / "default_app" / "default_project" / "users"
+    if not users_dir.is_dir():
+        raise AssertionError(f"expected {users_dir} to exist after ingest")
+
+    for owner_dir in sorted(users_dir.iterdir()):
+        if not owner_dir.is_dir():
+            continue
+        facts_dir = owner_dir / ".atomic_facts"
+        if not facts_dir.is_dir():
+            continue
+        for md in sorted(facts_dir.rglob("*.md")):
+            for fact in _extract_fact_sections(md):
+                if fact:
+                    seeds.append((owner_dir.name, fact))
+                    if len(seeds) >= limit:
+                        return seeds
+    if not seeds:
+        raise AssertionError(
+            f"no atomic_fact md entries under {users_dir} — pipeline did "
+            "not produce any facts; cannot bootstrap search queries"
+        )
+    return seeds
+
+
+def _extract_fact_sections(md: Path) -> list[str]:
+    """Return every ``### Fact`` section body in a daily-log md file.
+
+    Daily-log entries are ``## <entry-id>`` blocks; the labelled body
+    sections inside an entry are h3 (``### Fact``, ``### Foresight``,
+    …). We scan linearly for ``### Fact`` and collect lines until the
+    next heading at any level or the end-of-entry marker.
+    """
+    body = md.read_text(encoding="utf-8")
+    sections: list[str] = []
+    in_fact = False
+    buf: list[str] = []
+    for line in body.splitlines():
+        stripped = line.lstrip()
+        if stripped.startswith("### Fact"):
+            if in_fact:
+                sections.append("\n".join(buf).strip())
+            in_fact = True
+            buf = []
+            continue
+        # Any subsequent heading or entry-end marker closes the section.
+        if in_fact and (stripped.startswith("#") or stripped.startswith("<!-- /entry")):
+            sections.append("\n".join(buf).strip())
+            in_fact = False
+            buf = []
+            continue
+        if in_fact:
+            buf.append(line)
+    if in_fact:
+        sections.append("\n".join(buf).strip())
+    return [s for s in sections if s]
+
+
+# ── Response flattening + assertions ────────────────────────────────────
+
+
+def flatten_hits(data: dict[str, Any]) -> list[tuple[str | None, float, str]]:
+    """Collapse ``SearchData``'s four arrays into ``(owner_id, score, text)``.
+
+    Stable shape across track-kinds so the recall / partition tests
+    don't have to branch. Episodes / profiles carry ``user_id`` on the
+    item; cases / skills carry ``agent_id`` — both project to the
+    generic ``owner`` slot here. ``owner`` may be ``None`` for profile
+    hits where the owner is implicit.
+    """
+    out: list[tuple[str | None, float, str]] = []
+    for ep in data.get("episodes", []):
+        out.append(
+            (
+                ep.get("user_id"),
+                float(ep.get("score") or 0.0),
+                ep.get("episode") or ep.get("summary") or ep.get("subject") or "",
+            )
+        )
+    for pf in data.get("profiles", []):
+        out.append(
+            (
+                pf.get("user_id"),
+                float(pf.get("score") or 0.0),
+                str(pf.get("profile_data") or ""),
+            )
+        )
+    for cs in data.get("agent_cases", []):
+        out.append(
+            (
+                cs.get("agent_id"),
+                float(cs.get("score") or 0.0),
+                cs.get("approach") or cs.get("task_intent") or "",
+            )
+        )
+    for sk in data.get("agent_skills", []):
+        out.append(
+            (
+                sk.get("agent_id"),
+                float(sk.get("score") or 0.0),
+                sk.get("content") or sk.get("description") or "",
+            )
+        )
+    return out
+
+
+async def assert_recall(
+    client: httpx.AsyncClient,
+    *,
+    owner_id: str,
+    query: str,
+    method: str,
+    min_score: float = 0.0,
+    top_k: int = 10,
+) -> dict[str, Any]:
+    """Hit ``/search`` and lock the four standard recall invariants.
+
+    1. **Status** 200 — the route compiled.
+    2. **Existence** — ``total >= 1`` across the four arrays.
+    3. **Owner partition** — every non-``None`` ``owner_id`` matches
+       the queried owner. Profile hits may carry ``None`` so they're
+       skipped from the check.
+    4. **Score sanity** — the top-scored hit clears ``min_score``.
+
+    Returns the parsed response body so the caller can layer
+    case-specific assertions on top.
+    """
+    resp = await client.post(
+        "/api/v1/memory/search",
+        json={
+            "user_id": owner_id,
+            "query": query,
+            "method": method,
+            "top_k": top_k,
+        },
+        timeout=120.0,
+    )
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+    hits = flatten_hits(body["data"])
+    assert hits, (
+        f"no hits for owner={owner_id} query={query!r} method={method} — "
+        f"recall is broken"
+    )
+    for hit_owner, _score, _text in hits:
+        if hit_owner is not None:
+            assert hit_owner == owner_id, (
+                f"partition leak: got owner={hit_owner!r} when querying {owner_id!r}"
+            )
+    top_score = max(score for _o, score, _t in hits)
+    assert top_score >= min_score, (
+        f"top hit score {top_score:.3f} < min {min_score} for method={method}"
+    )
+    return body
+
+
+# ── Token utilities (for content-overlap checks) ────────────────────────
+
+
+def query_tokens(query: str) -> set[str]:
+    """Lowercase content tokens worth checking for overlap in hit text."""
+    return {
+        t.lower()
+        for t in _TOKEN_RE.findall(query)
+        if len(t) >= _MIN_TOKEN_LEN and t.lower() not in _STOPWORDS
+    }
+
+
+def content_tokens_in_order(query: str) -> list[str]:
+    """Content tokens in original document order, dedup'd by first occurrence.
+
+    Used by the keyword test: the project's BM25 tokenizer (jieba) is
+    Chinese-first and degrades to near-zero recall on single short
+    English tokens. Multi-token phrases recall well in practice, so
+    keyword queries are built by concatenating consecutive content
+    tokens from the source fact rather than sorting alphabetically.
+    """
+    seen: set[str] = set()
+    out: list[str] = []
+    for t in _TOKEN_RE.findall(query):
+        low = t.lower()
+        if len(t) >= _MIN_TOKEN_LEN and low not in _STOPWORDS and low not in seen:
+            seen.add(low)
+            out.append(low)
+    return out
--- a/tests/integration/search/_rerun_probes.py
+++ b/tests/integration/search/_rerun_probes.py
@ -0,0 +1,83 @@
+"""Re-run probes against an existing corpus + regenerate the report.
+
+Reuses everything from :mod:`_run_full_report` except the ingest step —
+points at the already-populated ``~/.everos-report-corpus`` and only
+re-runs the search probes + report rendering. Useful when the corpus
+is already there from a previous run and you just want to refresh the
+retrieval section without paying for LLM ingestion again.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+from pathlib import Path
+
+import httpx
+from dotenv import load_dotenv
+
+_PROJECT_ROOT = Path(__file__).resolve().parents[3]
+load_dotenv(_PROJECT_ROOT / ".env", override=False)
+
+
+from _run_full_report import (  # noqa: E402
+    CONVERSATION,
+    CORPUS_ROOT,
+    REPORT_PATH,
+    inspect_artifacts,
+    render_report,
+    run_probes,
+)
+
+
+async def main() -> None:
+    if not (CORPUS_ROOT / "users").is_dir():
+        raise SystemExit(f"{CORPUS_ROOT} not populated — run _run_full_report.py first")
+    os.environ["EVEROS_MEMORY__ROOT"] = str(CORPUS_ROOT)
+    from everos.config import load_settings
+
+    load_settings.cache_clear()
+
+    print(f"[1/3] using corpus at {CORPUS_ROOT}")
+
+    from everos.entrypoints.api.app import create_app
+
+    app = create_app()
+    transport = httpx.ASGITransport(app=app)
+
+    async with (
+        app.router.lifespan_context(app),
+        httpx.AsyncClient(transport=transport, base_url="http://test") as client,
+    ):
+        print("[2/3] inspecting artifacts + running probes ...")
+        artifacts = await inspect_artifacts(CORPUS_ROOT)
+        probes = await run_probes(client)
+
+    print("[3/3] re-rendering report ...")
+    md = render_report(
+        memory_root=CORPUS_ROOT,
+        ingest_summary={
+            "batches": [
+                {
+                    "idx": i,
+                    "msg_count": len(b),
+                    "status": "extracted (cached)",
+                    "returned_count": len(b),
+                }
+                for i, b in enumerate(CONVERSATION)
+            ],
+            "flush_status": "extracted (cached)",
+        },
+        cascade_summary={
+            "note": "cascade was force-completed via _rerun_probes.py "
+            "after initial run; counts below are post-completion."
+        },
+        artifacts=artifacts,
+        probes=probes,
+    )
+    REPORT_PATH.write_text(md, encoding="utf-8")
+    print(f"      → {REPORT_PATH}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/integration/search/_run_full_report.py
+++ b/tests/integration/search/_run_full_report.py
@ -0,0 +1,660 @@
+"""End-to-end report generator: fresh corpus → ingest → retrieve → markdown report.
+
+Run with::
+
+    PYTHONPATH=src python tests/integration/search/_run_full_report.py
+
+Writes a fresh ``~/.everos-report-corpus/`` memory_root, runs a small
+synthetic 16-message conversation between two new users (``u_diana`` +
+``u_ethan``) through ``/add`` + ``/flush``, waits for cascade drain, then
+runs a curated set of search probes and dumps a structured markdown
+report to ``tests/integration/search/SEARCH_REPORT.md``.
+
+Not a pytest test — pure investigative script, real LLM, real embedder.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+import shutil
+from pathlib import Path
+
+import httpx
+from dotenv import load_dotenv
+
+# Load .env BEFORE any everos import so settings are correct.
+_PROJECT_ROOT = Path(__file__).resolve().parents[3]
+load_dotenv(_PROJECT_ROOT / ".env", override=False)
+
+
+# ── Corpus location ────────────────────────────────────────────────────
+
+
+CORPUS_ROOT = Path.home() / ".everos-report-corpus"
+REPORT_PATH = _PROJECT_ROOT / "tests/integration/search/SEARCH_REPORT.md"
+SESSION_ID = "report_session_diana_ethan"
+
+
+# ── Synthetic conversation (16 msgs, 2 batches) ────────────────────────
+
+
+CONVERSATION = [
+    # Batch 1 — introducing hobbies
+    [
+        {
+            "sender_id": "u_diana",
+            "role": "user",
+            "timestamp": 1778414400000,
+            "content": "Hey Ethan! Just got back from a 3-day hike in Yosemite. "
+            "My new Sony A7 camera is amazing for landscape shots.",
+        },
+        {
+            "sender_id": "u_ethan",
+            "role": "user",
+            "timestamp": 1778407260000,
+            "content": "Wow that sounds intense! I'd never survive without my "
+            "espresso. How's the Rust programming learning going?",
+        },
+        {
+            "sender_id": "u_diana",
+            "role": "user",
+            "timestamp": 1778407320000,
+            "content": "Slow but steady. Working through the official book. "
+            "The borrow checker still trips me up.",
+        },
+        {
+            "sender_id": "u_ethan",
+            "role": "user",
+            "timestamp": 1778407380000,
+            "content": "I'm marathon training — up to 15 miles long runs now. "
+            "Plus I joined a jazz quartet on weekends.",
+        },
+        {
+            "sender_id": "u_diana",
+            "role": "user",
+            "timestamp": 1778407440000,
+            "content": "That's awesome! Saxophone again?",
+        },
+        {
+            "sender_id": "u_ethan",
+            "role": "user",
+            "timestamp": 1778407500000,
+            "content": "Yeah, alto sax. We're playing at the Blue Note next month.",
+        },
+        {
+            "sender_id": "u_diana",
+            "role": "user",
+            "timestamp": 1778407560000,
+            "content": "I'll come watch! Speaking of trips, want to do "
+            "that Iceland thing this summer?",
+        },
+        {
+            "sender_id": "u_ethan",
+            "role": "user",
+            "timestamp": 1778407620000,
+            "content": "100% yes. I've been researching ring road photography spots.",
+        },
+    ],
+    # Batch 2 — Iceland trip planning
+    [
+        {
+            "sender_id": "u_diana",
+            "role": "user",
+            "timestamp": 1778410800000,
+            "content": "I want to see the Northern Lights and shoot some "
+            "volcanic landscapes.",
+        },
+        {
+            "sender_id": "u_ethan",
+            "role": "user",
+            "timestamp": 1778410860000,
+            "content": "We should rent a 4x4. The F-roads are insane I hear.",
+        },
+        {
+            "sender_id": "u_diana",
+            "role": "user",
+            "timestamp": 1778410920000,
+            "content": "And I want to try Icelandic lamb stew. You cook, right?",
+        },
+        {
+            "sender_id": "u_ethan",
+            "role": "user",
+            "timestamp": 1778410980000,
+            "content": (
+                "Yeah, I'll bring my Dutch oven. Maybe a cast iron pan for fish."
+            ),
+        },
+        {
+            "sender_id": "u_diana",
+            "role": "user",
+            "timestamp": 1778411040000,
+            "content": "Perfect. Mid-July works for me — I have a Rust conference "
+            "in late August.",
+        },
+        {
+            "sender_id": "u_ethan",
+            "role": "user",
+            "timestamp": 1778411100000,
+            "content": "July it is. I have the Boston Marathon qualifier in October "
+            "so I can't go after.",
+        },
+        {
+            "sender_id": "u_diana",
+            "role": "user",
+            "timestamp": 1778411160000,
+            "content": "Let's book flights this weekend?",
+        },
+        {
+            "sender_id": "u_ethan",
+            "role": "user",
+            "timestamp": 1778411220000,
+            "content": "Deal. Also bringing my Olympus E-M1 for the landscapes.",
+        },
+    ],
+]
+
+
+# ── Probe set ───────────────────────────────────────────────────────────
+
+
+PROBES: list[dict] = [
+    # Owner-specific topical: should recall the right owner's episodes.
+    {
+        "section": "Owner-specific topical (diana)",
+        "owner": "u_diana",
+        "query": "hiking",
+        "method": "hybrid",
+        "expect": "diana's Yosemite episode",
+    },
+    {
+        "section": "Owner-specific topical (diana)",
+        "owner": "u_diana",
+        "query": "Rust programming",
+        "method": "hybrid",
+        "expect": "diana's Rust learning facts",
+    },
+    {
+        "section": "Owner-specific topical (diana)",
+        "owner": "u_diana",
+        "query": "photography",
+        "method": "hybrid",
+        "expect": "diana's camera (Sony A7) facts",
+    },
+    {
+        "section": "Owner-specific topical (ethan)",
+        "owner": "u_ethan",
+        "query": "jazz",
+        "method": "hybrid",
+        "expect": "ethan's jazz quartet / sax facts",
+    },
+    {
+        "section": "Owner-specific topical (ethan)",
+        "owner": "u_ethan",
+        "query": "marathon training",
+        "method": "hybrid",
+        "expect": "ethan's marathon facts",
+    },
+    {
+        "section": "Owner-specific topical (ethan)",
+        "owner": "u_ethan",
+        "query": "cooking",
+        "method": "hybrid",
+        "expect": "ethan's Dutch oven / lamb stew facts",
+    },
+    # Shared topic — both should recall their own perspective.
+    {
+        "section": "Shared topic (Iceland)",
+        "owner": "u_diana",
+        "query": "Iceland trip",
+        "method": "hybrid",
+        "expect": "diana's planning episode",
+    },
+    {
+        "section": "Shared topic (Iceland)",
+        "owner": "u_ethan",
+        "query": "Iceland trip",
+        "method": "hybrid",
+        "expect": "ethan's planning episode",
+    },
+    # Method comparison on the same query.
+    {
+        "section": "Method comparison (diana + 'Rust')",
+        "owner": "u_diana",
+        "query": "Rust",
+        "method": "keyword",
+        "expect": "BM25 single token",
+    },
+    {
+        "section": "Method comparison (diana + 'Rust')",
+        "owner": "u_diana",
+        "query": "Rust",
+        "method": "vector",
+        "expect": "cosine ANN",
+    },
+    {
+        "section": "Method comparison (diana + 'Rust')",
+        "owner": "u_diana",
+        "query": "Rust",
+        "method": "hybrid",
+        "expect": "fusion of BM25 + vector",
+    },
+    # Owner partition: diana searching for ethan's exclusive topic.
+    {
+        "section": "Owner partition",
+        "owner": "u_diana",
+        "query": "jazz quartet",
+        "method": "hybrid",
+        "expect": "should NOT leak ethan's content",
+    },
+    {
+        "section": "Owner partition",
+        "owner": "u_ethan",
+        "query": "Rust programming",
+        "method": "hybrid",
+        "expect": "should NOT leak diana's content",
+    },
+    # Phrase + bigram.
+    {
+        "section": "Phrase queries",
+        "owner": "u_diana",
+        "query": "Northern Lights",
+        "method": "keyword",
+        "expect": "diana's Iceland aurora plans",
+    },
+    {
+        "section": "Phrase queries",
+        "owner": "u_ethan",
+        "query": "Boston Marathon",
+        "method": "keyword",
+        "expect": "ethan's qualifier date",
+    },
+    # include_profile.
+    {
+        "section": "Profile attach",
+        "owner": "u_diana",
+        "query": "anything",
+        "method": "hybrid",
+        "include_profile": True,
+        "expect": "should return diana's profile object",
+    },
+    # Unknown owner.
+    {
+        "section": "Unknown owner",
+        "owner": "u_ghost_does_not_exist",
+        "query": "hiking",
+        "method": "hybrid",
+        "expect": "empty arrays, status 200",
+    },
+    # Non-existent term.
+    {
+        "section": "Non-existent term",
+        "owner": "u_diana",
+        "query": "quantum blockchain pizza",
+        "method": "keyword",
+        "expect": "0 hits, status 200",
+    },
+]
+
+
+# ── Pipeline runners ───────────────────────────────────────────────────
+
+
+async def ingest(client: httpx.AsyncClient) -> dict:
+    """POST /add for each batch, then /flush. Return summary."""
+    summary: dict = {"batches": [], "flush_status": None}
+    for i, batch in enumerate(CONVERSATION):
+        resp = await client.post(
+            "/api/v1/memory/add",
+            json={"session_id": SESSION_ID, "messages": batch},
+            timeout=600.0,
+        )
+        resp.raise_for_status()
+        data = resp.json()["data"]
+        summary["batches"].append(
+            {
+                "idx": i,
+                "msg_count": len(batch),
+                "status": data["status"],
+                "returned_count": data["message_count"],
+            }
+        )
+    resp = await client.post(
+        "/api/v1/memory/flush",
+        json={"session_id": SESSION_ID},
+        timeout=600.0,
+    )
+    resp.raise_for_status()
+    summary["flush_status"] = resp.json()["data"]["status"]
+    return summary
+
+
+async def wait_cascade(
+    *,
+    expected_md_paths: int = 8,
+    stable_checks: int = 5,
+    deadline_seconds: float = 600.0,
+) -> dict:
+    """Block until cascade is *stably* done across all expected md kinds.
+
+    A plain ``pending == 0`` check is racy: OME async strategies
+    (extract_foresight / extract_user_profile) emit md writes
+    asynchronously after ``/flush`` returns, and there's a window
+    where the cascade queue is momentarily empty before OME's writes
+    arrive. We require two stronger conditions:
+
+    1. At least ``expected_md_paths`` rows exist in ``md_change_state``
+       (one per expected (owner × kind) — episodes + atomic_facts +
+       foresights + user_profile, per owner). This guards against
+       returning before OME has emitted *anything*.
+    2. ``pending == 0`` stays true for ``stable_checks`` consecutive
+       polls (separated by 1s sleep). This guards against a transient
+       empty queue while a strategy is still mid-write.
+    """
+    from everos.infra.persistence.sqlite import md_change_state_repo
+
+    consecutive_zero = 0
+    async with asyncio.timeout(deadline_seconds):
+        while True:
+            sm = await md_change_state_repo.queue_summary()
+            total_rows = (
+                sm.pending + sm.done + sm.failed_retryable + sm.failed_permanent
+            )
+            if sm.pending == 0 and total_rows >= expected_md_paths:
+                consecutive_zero += 1
+                if consecutive_zero >= stable_checks:
+                    return {
+                        "done": sm.done,
+                        "failed_retryable": sm.failed_retryable,
+                        "failed_permanent": sm.failed_permanent,
+                        "max_lsn": sm.max_lsn,
+                        "last_processed_lsn": sm.last_processed_lsn,
+                    }
+            else:
+                consecutive_zero = 0
+            await asyncio.sleep(1.0)
+
+
+async def inspect_artifacts(memory_root: Path) -> dict:
+    """Read md files + LanceDB counts after cascade drain."""
+    from everos.infra.persistence.lancedb import (
+        atomic_fact_repo,
+        dispose_connection,
+        episode_repo,
+        foresight_repo,
+        get_connection,
+        user_profile_repo,
+        verify_business_schemas,
+    )
+
+    await get_connection()
+    await verify_business_schemas()
+    counts = {
+        "episode_rows": await episode_repo.count(),
+        "atomic_fact_rows": await atomic_fact_repo.count(),
+        "foresight_rows": await foresight_repo.count(),
+        "user_profile_rows": await user_profile_repo.count(),
+    }
+    await dispose_connection()
+
+    md_files: list[str] = []
+    users_dir = memory_root / "default_app" / "default_project" / "users"
+    if users_dir.is_dir():
+        for f in sorted(users_dir.rglob("*.md")):
+            md_files.append(str(f.relative_to(memory_root)))
+    counts["md_files"] = md_files
+    return counts
+
+
+async def run_probes(client: httpx.AsyncClient) -> list[dict]:
+    """Execute every probe in :data:`PROBES`; return captured rows."""
+    rows: list[dict] = []
+    for p in PROBES:
+        payload: dict = {
+            "owner_id": p["owner"],
+            "owner_type": "user",
+            "query": p["query"],
+            "method": p["method"],
+            "top_k": 5,
+        }
+        if p.get("include_profile"):
+            payload["include_profile"] = True
+        resp = await client.post("/api/v1/memory/search", json=payload, timeout=120.0)
+        body = resp.json()
+        data = body.get("data", {})
+        rows.append(
+            {
+                "section": p["section"],
+                "expect": p["expect"],
+                "request": payload,
+                "status": resp.status_code,
+                "episodes": [
+                    {
+                        "id": e["id"],
+                        "owner_id": e["owner_id"],
+                        "score": round(float(e["score"]), 3),
+                        "summary": (e.get("summary") or "")[:150],
+                        "atomic_facts_count": len(e.get("atomic_facts", [])),
+                    }
+                    for e in data.get("episodes", [])
+                ],
+                "profiles": [
+                    {
+                        "owner_id": p_.get("owner_id"),
+                        "score": p_.get("score"),
+                        "summary_excerpt": str(p_.get("profile_data", {}))[:200],
+                    }
+                    for p_ in data.get("profiles", [])
+                ],
+            }
+        )
+    return rows
+
+
+# ── Markdown report renderer ───────────────────────────────────────────
+
+
+def render_report(
+    *,
+    memory_root: Path,
+    ingest_summary: dict,
+    cascade_summary: dict,
+    artifacts: dict,
+    probes: list[dict],
+) -> str:
+    lines: list[str] = []
+    lines.append("# Search E2E Report — fresh corpus (u_diana + u_ethan)\n")
+    lines.append(
+        "Generated by [`_run_full_report.py`](_run_full_report.py). "
+        "Two synthetic users with distinct hobbies feed a 16-message "
+        "conversation through the full pipeline; the report below "
+        "captures ingest stats, cascade drain numbers, on-disk "
+        "artifacts, and the response of every curated search probe.\n"
+    )
+
+    # ── Section: Setup ────────────────────────────────────────────────
+    lines.append("## 1. Setup\n")
+    lines.append(f"- **Memory root**: `{memory_root}`\n")
+    lines.append(f"- **Session id**: `{SESSION_ID}`\n")
+    lines.append(
+        "- **Users**: `u_diana` (hiking / Rust / photography), "
+        "`u_ethan` (jazz / marathon / cooking)\n"
+    )
+    lines.append(
+        f"- **Batches**: {len(CONVERSATION)} "
+        f"({sum(len(b) for b in CONVERSATION)} messages total)\n"
+    )
+
+    # ── Section: Ingest stats ─────────────────────────────────────────
+    lines.append("\n## 2. Ingest (`/add` × N + `/flush`)\n")
+    lines.append("| batch | msg_count | status |\n")
+    lines.append("|---|---|---|\n")
+    for b in ingest_summary["batches"]:
+        lines.append(f"| {b['idx']} | {b['msg_count']} | `{b['status']}` |\n")
+    lines.append(f"\n**Flush status**: `{ingest_summary['flush_status']}`\n")
+
+    # ── Section: Cascade drain ────────────────────────────────────────
+    lines.append("\n## 3. Cascade drain (md → LanceDB sync)\n")
+    lines.append("```\n")
+    lines.append(json.dumps(cascade_summary, indent=2) + "\n")
+    lines.append("```\n")
+
+    # ── Section: Artifacts ────────────────────────────────────────────
+    lines.append("\n## 4. On-disk artifacts\n")
+    lines.append("### LanceDB row counts\n\n")
+    lines.append("| table | rows |\n")
+    lines.append("|---|---|\n")
+    for k in (
+        "episode_rows",
+        "atomic_fact_rows",
+        "foresight_rows",
+        "user_profile_rows",
+    ):
+        lines.append(f"| {k.replace('_rows', '')} | {artifacts[k]} |\n")
+    lines.append("\n### Markdown files\n\n")
+    for f in artifacts["md_files"]:
+        lines.append(f"- `{f}`\n")
+
+    # ── Section: Probes ───────────────────────────────────────────────
+    lines.append("\n## 5. Retrieval probes\n")
+    lines.append(
+        "Every row below is one POST to `/api/v1/memory/search`. "
+        "`expected` is what the test designer expects to see; "
+        "actual results are captured verbatim.\n"
+    )
+    current_section = None
+    for row in probes:
+        if row["section"] != current_section:
+            lines.append(f"\n### {row['section']}\n")
+            current_section = row["section"]
+        req = row["request"]
+        lines.append(
+            f"\n#### `{req['query']}`  (method=`{req['method']}`, "
+            f"owner=`{req['owner_id']}`)\n"
+        )
+        lines.append(f"\n- **Expected**: {row['expect']}\n")
+        lines.append(f"- **Status**: {row['status']}\n")
+        lines.append(f"- **Episodes returned**: {len(row['episodes'])}\n")
+        if row["episodes"]:
+            lines.append("\n| rank | score | owner | atomic_facts | summary |\n")
+            lines.append("|---|---|---|---|---|\n")
+            for i, ep in enumerate(row["episodes"], 1):
+                summary = ep["summary"].replace("|", "\\|")
+                lines.append(
+                    f"| {i} | {ep['score']} | `{ep['owner_id']}` | "
+                    f"{ep['atomic_facts_count']} | {summary} |\n"
+                )
+        else:
+            lines.append("\n_(no episodes)_\n")
+        if row["profiles"]:
+            lines.append(
+                "\n**Profile attached**: "
+                f"`{row['profiles'][0]['owner_id']}` "
+                f"(excerpt: {row['profiles'][0]['summary_excerpt']!r})\n"
+            )
+
+    # ── Section: Pass/Fail summary ────────────────────────────────────
+    lines.append("\n## 6. Pass / Fail summary\n")
+    pf = _grade(probes)
+    lines.append("| # | section | query | result |\n")
+    lines.append("|---|---|---|---|\n")
+    for r in pf:
+        lines.append(
+            f"| {r['idx']} | {r['section']} | `{r['query']}` | {r['verdict']} |\n"
+        )
+    passed = sum(1 for r in pf if r["verdict"].startswith("✅"))
+    lines.append(f"\n**Total: {passed}/{len(pf)} passed.**\n")
+
+    return "".join(lines)
+
+
+def _grade(probes: list[dict]) -> list[dict]:
+    """Apply soft heuristic pass/fail to each probe based on its 'expect'."""
+    graded: list[dict] = []
+    for i, row in enumerate(probes, 1):
+        req = row["request"]
+        expect = row["expect"].lower()
+        verdict = "—"
+        if "should not leak" in expect:
+            leaked = any(ep["owner_id"] != req["owner_id"] for ep in row["episodes"])
+            verdict = "❌ leaked" if leaked else "✅ no leak"
+        elif "empty arrays" in expect or "0 hits" in expect:
+            verdict = "✅" if not row["episodes"] else f"❌ got {len(row['episodes'])}"
+        elif "profile" in expect:
+            verdict = "✅" if row["profiles"] else "❌ no profile"
+        elif row["episodes"]:
+            top_owner = row["episodes"][0]["owner_id"]
+            verdict = (
+                "✅" if top_owner == req["owner_id"] else f"❌ wrong owner: {top_owner}"
+            )
+        else:
+            verdict = "❌ no hits"
+        graded.append(
+            {
+                "idx": i,
+                "section": row["section"],
+                "query": req["query"],
+                "verdict": verdict,
+            }
+        )
+    return graded
+
+
+# ── Main ────────────────────────────────────────────────────────────────
+
+
+async def main() -> None:
+    # Reset corpus to a known empty state.
+    if CORPUS_ROOT.exists():
+        shutil.rmtree(CORPUS_ROOT)
+    CORPUS_ROOT.mkdir(parents=True)
+    os.environ["EVEROS_MEMORY__ROOT"] = str(CORPUS_ROOT)
+
+    # Reset cached singletons so they pick up the new env.
+    from everos.config import load_settings
+
+    load_settings.cache_clear()
+
+    print(f"[1/6] fresh corpus at {CORPUS_ROOT}")
+
+    from everos.entrypoints.api.app import create_app
+
+    app = create_app()
+    transport = httpx.ASGITransport(app=app)
+
+    async with (
+        app.router.lifespan_context(app),
+        httpx.AsyncClient(transport=transport, base_url="http://test") as client,
+    ):
+        print("[2/6] ingesting via /add + /flush ...")
+        ingest_summary = await ingest(client)
+        print(f"      batches={ingest_summary['batches']}")
+
+        print("[3/6] waiting for cascade drain ...")
+        cascade_summary = await wait_cascade()
+        print(f"      drained: {cascade_summary}")
+
+        print("[4/6] inspecting on-disk artifacts ...")
+        artifacts = await inspect_artifacts(CORPUS_ROOT)
+        print(
+            "      lancedb: {k: v for k,v in artifacts.items() if k.endswith('_rows')}"
+        )
+
+        print(f"[5/6] running {len(PROBES)} search probes ...")
+        probes = await run_probes(client)
+
+    print("[6/6] rendering report ...")
+    md = render_report(
+        memory_root=CORPUS_ROOT,
+        ingest_summary=ingest_summary,
+        cascade_summary=cascade_summary,
+        artifacts=artifacts,
+        probes=probes,
+    )
+    REPORT_PATH.write_text(md, encoding="utf-8")
+    print(f"      → {REPORT_PATH}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/integration/search/conftest.py
+++ b/tests/integration/search/conftest.py
@ -0,0 +1,269 @@
+"""Session-scoped corpus fixture for ``tests/integration/search/``.
+
+The pipeline that produces the search corpus (`/add` × 19 + `/flush` +
+cascade drain) is the same one exercised by
+``tests/integration/test_add_flush_pipeline_e2e.py`` — and it costs
+~10 minutes against real LLMs. To keep the search test suite usable
+in CI we run that pipeline **once per session** here, persist the
+resulting memory_root to a session ``tmp_path``, and let every test
+re-attach a fresh FastAPI lifespan against the on-disk corpus.
+
+Layout::
+
+    _ingested_memory_root  (session-scoped)
+        └── ingests LoCoMo conv_0 via the HTTP API, then tears
+            lifespan down. Returns the memory_root path with md +
+            sqlite + lancedb populated on disk.
+
+    search_client  (function-scoped)
+        └── per-test ``httpx.AsyncClient`` wired to a freshly built
+            FastAPI app, ``EVEROS_MEMORY__ROOT`` pointed at the
+            session corpus. Singletons are reset so each test starts
+            with cold caches and the lifespan is the only thing
+            constructing them.
+
+This is intentionally separate from ``tests/integration/conftest.py``
+fixtures (which are function-scoped). Cross-suite isolation: tests
+under ``search/`` cannot poison or be poisoned by the ones above.
+
+All tests in this folder are marked ``slow`` via the module-level
+``pytestmark`` in ``test_search_e2e.py`` — a non-``-m slow`` run skips
+the whole suite cleanly without paying the ingest cost.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import importlib
+import os
+from collections.abc import AsyncIterator, Awaitable, Callable, Generator
+from pathlib import Path
+
+import httpx
+import pytest
+import pytest_asyncio
+from sqlalchemy import text
+
+# Set ``EVEROS_REUSE_CORPUS=<path>`` to skip ingest and point the
+# session fixture at an existing memory_root (md + lancedb already
+# populated). Search is a read-only path, so no copy is needed — the
+# fixture just sets ``EVEROS_MEMORY__ROOT`` to that directory.
+_REUSE_ENV = "EVEROS_REUSE_CORPUS"
+
+# Memorize-service module-level lazy singletons; reset between phases so
+# stale clients / engines don't leak from ingest into per-test lifespans.
+_MEMORIZE_SINGLETONS: tuple[str, ...] = (
+    "_episode_writer",
+    "_prompt_loader",
+    "_user_pipeline",
+    "_agent_pipeline",
+    "_ome_engine",
+)
+
+
+# ── Session-scoped MonkeyPatch ─────────────────────────────────────────
+
+
+@pytest.fixture(scope="session")
+def _session_monkeypatch() -> Generator[pytest.MonkeyPatch, None, None]:
+    """A ``MonkeyPatch`` instance with session lifetime.
+
+    Pytest's default ``monkeypatch`` is function-scoped. The ingest
+    fixture below has to set env vars and null singletons before the
+    lifespan even starts — those changes have to live for the whole
+    session, so we open our own ``MonkeyPatch`` and undo it at session
+    end.
+    """
+    mp = pytest.MonkeyPatch()
+    yield mp
+    mp.undo()
+
+
+# ── Singleton reset helper ─────────────────────────────────────────────
+
+
+def _reset_memorize_singletons(mp: pytest.MonkeyPatch) -> None:
+    """Null out memorize/strategy/LLM-client lazy singletons.
+
+    Called once before ingest (so the freshly-set ``EVEROS_MEMORY__ROOT``
+    actually wins) and once per test (so the session corpus's lifespan
+    sees clean caches).
+    """
+    from everos.config import load_settings
+
+    load_settings.cache_clear()
+
+    svc = importlib.import_module("everos.service.memorize")
+    client_mod = importlib.import_module("everos.component.llm.client")
+    af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
+    fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
+
+    for attr in _MEMORIZE_SINGLETONS:
+        mp.setattr(svc, attr, None, raising=False)
+    mp.setattr(client_mod, "_llm_client", None, raising=False)
+    mp.setattr(af_mod, "_writer", None, raising=False)
+    mp.setattr(fs_mod, "_writer", None, raising=False)
+
+
+# ── Session corpus: ingest once ────────────────────────────────────────
+
+
+@pytest.fixture(scope="session")
+def _ingested_memory_root(
+    tmp_path_factory: pytest.TempPathFactory,
+    _session_monkeypatch: pytest.MonkeyPatch,
+    long_conversation: dict,
+) -> Path:
+    """Run /add × 19 + /flush + cascade drain once; return the memory_root.
+
+    All on-disk artifacts (md files + sqlite system.db + lancedb
+    tables) survive lifespan teardown, so per-test fixtures can
+    re-attach a fresh app against the populated root and exercise
+    only the read path.
+
+    Marked **slow** transitively via ``pytestmark`` in
+    ``test_search_e2e.py`` — without ``-m slow`` the test module is
+    deselected and this fixture is never instantiated.
+    """
+    reuse = os.environ.get(_REUSE_ENV)
+    if reuse:
+        memory_root = Path(reuse).expanduser().resolve()
+        users_dir = memory_root / "default_app" / "default_project" / "users"
+        if not users_dir.is_dir():
+            raise AssertionError(
+                f"{_REUSE_ENV}={memory_root} has no "
+                "default_app/default_project/users/ subdir — point it at a "
+                "fully-ingested memory_root or unset to rebuild from scratch"
+            )
+    else:
+        memory_root = tmp_path_factory.mktemp("search_corpus")
+
+    _session_monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(memory_root))
+    _reset_memorize_singletons(_session_monkeypatch)
+
+    if reuse:
+        # Search is read-only; the corpus is consumed in place, no copy.
+        return memory_root
+
+    # Drive the ingest in its own event loop. The lifespan inside
+    # ``_ingest`` properly closes LanceDB / SQLite handles on exit so
+    # the per-test lifespans can re-open them.
+    asyncio.run(_ingest(memory_root, long_conversation))
+    return memory_root
+
+
+async def _ingest(memory_root: Path, long_conversation: dict) -> None:
+    """Bring up the app once, push the LoCoMo fixture through /add+/flush."""
+    from everos.entrypoints.api.app import create_app
+
+    app = create_app()
+    transport = httpx.ASGITransport(app=app)
+
+    async with (
+        app.router.lifespan_context(app),
+        httpx.AsyncClient(transport=transport, base_url="http://test") as client,
+    ):
+        session_id = long_conversation["everos_session_id"]
+        for batch in long_conversation["batches"]:
+            messages = [
+                {
+                    "sender_id": m["sender_id"],
+                    "role": m["role"],
+                    "timestamp": m["timestamp"],
+                    "content": m["content"],
+                }
+                for m in batch["messages"]
+            ]
+            resp = await client.post(
+                "/api/v1/memory/add",
+                json={"session_id": session_id, "messages": messages},
+                timeout=600.0,
+            )
+            resp.raise_for_status()
+
+        resp = await client.post(
+            "/api/v1/memory/flush",
+            json={"session_id": session_id},
+            timeout=600.0,
+        )
+        resp.raise_for_status()
+
+        await _poll_cascade_drained(deadline_seconds=600.0)
+
+
+async def _poll_cascade_drained(*, deadline_seconds: float) -> None:
+    """Block until ``md_change_state.pending == 0`` or deadline."""
+    from everos.infra.persistence.sqlite import md_change_state_repo
+
+    async with asyncio.timeout(deadline_seconds):
+        while True:
+            summary = await md_change_state_repo.queue_summary()
+            if summary.pending == 0:
+                return
+            await asyncio.sleep(0.5)
+
+
+# ── Per-test client against the session corpus ─────────────────────────
+
+
+@pytest_asyncio.fixture
+async def search_client(
+    _ingested_memory_root: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> AsyncIterator[httpx.AsyncClient]:
+    """Per-test ``AsyncClient`` reading from the session corpus.
+
+    Singletons are reset before the lifespan starts so the search
+    manager builds a fresh embedding / rerank / LLM client per test —
+    we don't want cross-test client state to mask a regression.
+    """
+    monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(_ingested_memory_root))
+    _reset_memorize_singletons(monkeypatch)
+
+    # The search service has its own module-level singletons; reset
+    # those too so re-attach is clean.
+    search_svc = importlib.import_module("everos.service.search")
+    for attr in (
+        "_manager",
+        "_embedding",
+        "_reranker",
+        "_llm_client",
+        "_embedding_resolved",
+        "_rerank_resolved",
+        "_llm_resolved",
+    ):
+        if hasattr(search_svc, attr):
+            monkeypatch.setattr(
+                search_svc,
+                attr,
+                None if not attr.endswith("_resolved") else False,
+                raising=False,
+            )
+
+    from everos.entrypoints.api.app import create_app
+
+    app = create_app()
+    transport = httpx.ASGITransport(app=app)
+    async with (
+        app.router.lifespan_context(app),
+        httpx.AsyncClient(transport=transport, base_url="http://test") as client,
+    ):
+        yield client
+
+
+# ── Diagnostic helpers (handy for tests that probe SQLite directly) ───
+
+
+@pytest.fixture
+def memcell_count() -> Callable[[], Awaitable[int]]:
+    """Return an async callable: ``await memcell_count() -> int``."""
+
+    async def _count() -> int:
+        from everos.infra.persistence.sqlite import get_engine
+
+        engine = get_engine()
+        async with engine.connect() as conn:
+            result = await conn.execute(text("SELECT COUNT(*) FROM memcell"))
+            return int(result.scalar() or 0)
+
+    return _count
--- a/tests/integration/search/test_search_e2e.py
+++ b/tests/integration/search/test_search_e2e.py
@ -0,0 +1,241 @@
+"""End-to-end ``/api/v1/memory/search`` tests over a real LoCoMo corpus.
+
+Six tests, each pinning one path through :class:`SearchManager`:
+
+============================================  =================================
+``test_keyword_recalls_atomic_fact_origin``   keyword (BM25 only)
+``test_vector_recalls_atomic_fact_origin``    vector (cosine only)
+``test_hybrid_with_profile_returns_profile``  hybrid + ``include_profile``
+``test_partition_respects_owner_id``          cross-owner isolation
+``test_unknown_owner_returns_empty_200``      empty response, no 500
+``test_filter_dsl_compiles_and_excludes``     filters DSL → LanceDB ``where``
+============================================  =================================
+
+The corpus is built once by :func:`_ingested_memory_root` (session-
+scoped fixture in ``conftest.py``) and shared across all tests. Each
+test re-attaches a fresh lifespan via :func:`search_client`, so the
+search-manager singletons rebuild from cold per-test — a regression
+in the lazy-init path can't hide behind warm state from a prior test.
+
+Bootstrapping: queries are derived from the corpus's own
+``atomic_facts`` md files via :func:`pick_query_seeds`, not
+hardcoded. Closed-loop correctness — what the pipeline extracted
+should be findable by the search side.
+
+Assertions follow the project's "守恒 + 下界 + 形状" convention
+(see :func:`_helpers.assert_recall`): no exact ranks, no exact
+scores, no exact ids. LLM-driven retrieval is non-deterministic
+across runs; brittle assertions cause CI noise, not signal.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import httpx
+import pytest
+
+from ._helpers import (
+    assert_recall,
+    flatten_hits,
+    pick_query_seeds,
+)
+
+# Whole module is opt-in — it depends on ``_ingested_memory_root`` which
+# spends ~10 min running real LLM + embedder against LoCoMo conv_0.
+pytestmark = pytest.mark.slow
+
+
+# ── 1. Keyword recall ──────────────────────────────────────────────────
+
+
+async def test_keyword_recalls_atomic_fact_origin(
+    search_client: httpx.AsyncClient,
+    _ingested_memory_root: Path,
+) -> None:
+    """BM25 must recall *some* episode for *some* fact-derived bigram.
+
+    The project's tokenizer is jieba (CJK-first); single short
+    English tokens and proper nouns / all-caps acronyms recall
+    poorly, but ordinary lowercase content bigrams recall reliably
+    (verified empirically). So we walk through the first N atomic
+    facts, pull consecutive lowercase content tokens, and pass the
+    test as soon as one candidate bigram returns ≥ 1 hit. This
+    validates the BM25 plumbing without coupling to which specific
+    fact got sampled — vector + hybrid tests own the strict
+    closed-loop recall claim.
+    """
+    seeds = pick_query_seeds(_ingested_memory_root, limit=20)
+    last_query: str | None = None
+    for owner, fact in seeds:
+        for query in _candidate_bigrams(fact):
+            last_query = query
+            resp = await search_client.post(
+                "/api/v1/memory/search",
+                json={
+                    "user_id": owner,
+                    "query": query,
+                    "method": "keyword",
+                    "top_k": 5,
+                },
+                timeout=60.0,
+            )
+            assert resp.status_code == 200, resp.text
+            hits = flatten_hits(resp.json()["data"])
+            if hits:
+                # Partition still holds even on a successful keyword hit.
+                for hit_owner, _s, _t in hits:
+                    if hit_owner is not None:
+                        assert hit_owner == owner
+                return
+    raise AssertionError(
+        f"BM25 returned 0 hits across {len(seeds)} fact seeds; "
+        f"last tried query={last_query!r}"
+    )
+
+
+def _candidate_bigrams(fact: str) -> list[str]:
+    """Lowercase consecutive content-token bigrams from ``fact``.
+
+    Skip tokens that include uppercase letters in the original text
+    (proper nouns / acronyms — empirically poor BM25 recall under
+    jieba). Returns at most 5 candidates per fact, in source order.
+    """
+    import re as _re
+
+    out: list[str] = []
+    tokens: list[str] = []
+    for raw in _re.findall(r"\w+", fact):
+        if raw.lower() == raw and len(raw) >= 3:
+            tokens.append(raw)
+    for i in range(len(tokens) - 1):
+        out.append(f"{tokens[i]} {tokens[i + 1]}")
+        if len(out) >= 5:
+            break
+    return out
+
+
+# ── 2. Vector recall ───────────────────────────────────────────────────
+
+
+async def test_vector_recalls_atomic_fact_origin(
+    search_client: httpx.AsyncClient,
+    _ingested_memory_root: Path,
+) -> None:
+    """Same fact via cosine ANN — independent of BM25 tokenisation."""
+    owner, fact = pick_query_seeds(_ingested_memory_root, limit=1)[0]
+    await assert_recall(
+        search_client,
+        owner_id=owner,
+        query=fact,
+        method="vector",
+        # Cosine: identical text would score ~1.0; threshold loose
+        # because the LLM-summarised episode text isn't the verbatim fact.
+        min_score=0.1,
+    )
+
+
+# ── 3. Hybrid + include_profile ────────────────────────────────────────
+
+
+async def test_hybrid_with_profile_returns_profile(
+    search_client: httpx.AsyncClient,
+    _ingested_memory_root: Path,
+) -> None:
+    """``include_profile=true`` must populate the profiles array."""
+    owner, fact = pick_query_seeds(_ingested_memory_root, limit=1)[0]
+    resp = await search_client.post(
+        "/api/v1/memory/search",
+        json={
+            "user_id": owner,
+            "query": fact,
+            "method": "hybrid",
+            "top_k": 5,
+            "include_profile": True,
+        },
+        timeout=120.0,
+    )
+    assert resp.status_code == 200, resp.text
+    data = resp.json()["data"]
+    assert data["profiles"], "include_profile=true but profiles[] empty"
+    assert data["profiles"][0]["user_id"] == owner
+
+
+# ── 4. Owner partition ─────────────────────────────────────────────────
+
+
+async def test_partition_respects_owner_id(
+    search_client: httpx.AsyncClient,
+    _ingested_memory_root: Path,
+) -> None:
+    """Querying owner=A must not leak owner=B's data, even on shared topics."""
+    seeds = pick_query_seeds(_ingested_memory_root, limit=2)
+    owners = {o for o, _ in seeds}
+    assert len(owners) >= 1, "need at least one owner in the corpus"
+    target_owner = next(iter(owners))
+    _, fact = next((o, f) for o, f in seeds if o == target_owner)
+
+    body = await assert_recall(
+        search_client,
+        owner_id=target_owner,
+        query=fact,
+        method="hybrid",
+    )
+    # Agent tracks must be empty for user owners.
+    assert body["data"]["agent_cases"] == []
+    assert body["data"]["agent_skills"] == []
+
+
+# ── 5. Unknown owner ───────────────────────────────────────────────────
+
+
+async def test_unknown_owner_returns_empty_200(
+    search_client: httpx.AsyncClient,
+) -> None:
+    """An owner that the corpus never saw → 200 with four empty arrays."""
+    resp = await search_client.post(
+        "/api/v1/memory/search",
+        json={
+            "user_id": "ghost_user_does_not_exist",
+            "query": "anything",
+            "method": "hybrid",
+            "top_k": 5,
+        },
+        timeout=60.0,
+    )
+    assert resp.status_code == 200, resp.text
+    data = resp.json()["data"]
+    assert data["episodes"] == []
+    assert data["profiles"] == []
+    assert data["agent_cases"] == []
+    assert data["agent_skills"] == []
+
+
+# ── 6. Filter DSL ──────────────────────────────────────────────────────
+
+
+async def test_filter_dsl_compiles_and_excludes(
+    search_client: httpx.AsyncClient,
+    _ingested_memory_root: Path,
+) -> None:
+    """Add a ``session_id`` ne-filter, verify the returned hits respect it."""
+    owner, fact = pick_query_seeds(_ingested_memory_root, limit=1)[0]
+    bogus_session = "session_that_never_was"
+    resp = await search_client.post(
+        "/api/v1/memory/search",
+        json={
+            "user_id": owner,
+            "query": fact,
+            "method": "keyword",
+            "top_k": 10,
+            "filters": {"session_id": {"ne": bogus_session}},
+        },
+        timeout=120.0,
+    )
+    assert resp.status_code == 200, resp.text
+    data = resp.json()["data"]
+    # The filter is satisfied by every real episode (none have the
+    # bogus id), so the hit count should be ≥ 1 — the filter
+    # compiled and shipped to LanceDB without breaking recall.
+    for ep in data["episodes"]:
+        assert ep["session_id"] != bogus_session