"""Session-scoped corpus fixture for ``tests/integration/search/``. The pipeline that produces the search corpus (`/add` × 19 + `/flush` + cascade drain) is the same one exercised by ``tests/integration/test_add_flush_pipeline_e2e.py`` — and it costs ~10 minutes against real LLMs. To keep the search test suite usable in CI we run that pipeline **once per session** here, persist the resulting memory_root to a session ``tmp_path``, and let every test re-attach a fresh FastAPI lifespan against the on-disk corpus. Layout:: _ingested_memory_root (session-scoped) └── ingests LoCoMo conv_0 via the HTTP API, then tears lifespan down. Returns the memory_root path with md + sqlite + lancedb populated on disk. search_client (function-scoped) └── per-test ``httpx.AsyncClient`` wired to a freshly built FastAPI app, ``EVEROS_MEMORY__ROOT`` pointed at the session corpus. Singletons are reset so each test starts with cold caches and the lifespan is the only thing constructing them. This is intentionally separate from ``tests/integration/conftest.py`` fixtures (which are function-scoped). Cross-suite isolation: tests under ``search/`` cannot poison or be poisoned by the ones above. All tests in this folder are marked ``slow`` via the module-level ``pytestmark`` in ``test_search_e2e.py`` — a non-``-m slow`` run skips the whole suite cleanly without paying the ingest cost. """ from __future__ import annotations import asyncio import importlib import os from collections.abc import AsyncIterator, Awaitable, Callable, Generator from pathlib import Path import httpx import pytest import pytest_asyncio from sqlalchemy import text # Set ``EVEROS_REUSE_CORPUS=`` to skip ingest and point the # session fixture at an existing memory_root (md + lancedb already # populated). Search is a read-only path, so no copy is needed — the # fixture just sets ``EVEROS_MEMORY__ROOT`` to that directory. _REUSE_ENV = "EVEROS_REUSE_CORPUS" # Memorize-service module-level lazy singletons; reset between phases so # stale clients / engines don't leak from ingest into per-test lifespans. _MEMORIZE_SINGLETONS: tuple[str, ...] = ( "_episode_writer", "_prompt_loader", "_user_pipeline", "_agent_pipeline", "_ome_engine", ) # ── Session-scoped MonkeyPatch ───────────────────────────────────────── @pytest.fixture(scope="session") def _session_monkeypatch() -> Generator[pytest.MonkeyPatch, None, None]: """A ``MonkeyPatch`` instance with session lifetime. Pytest's default ``monkeypatch`` is function-scoped. The ingest fixture below has to set env vars and null singletons before the lifespan even starts — those changes have to live for the whole session, so we open our own ``MonkeyPatch`` and undo it at session end. """ mp = pytest.MonkeyPatch() yield mp mp.undo() # ── Singleton reset helper ───────────────────────────────────────────── def _reset_memorize_singletons(mp: pytest.MonkeyPatch) -> None: """Null out memorize/strategy/LLM-client lazy singletons. Called once before ingest (so the freshly-set ``EVEROS_MEMORY__ROOT`` actually wins) and once per test (so the session corpus's lifespan sees clean caches). """ from everos.config import load_settings load_settings.cache_clear() svc = importlib.import_module("everos.service.memorize") client_mod = importlib.import_module("everos.component.llm.client") af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts") fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight") for attr in _MEMORIZE_SINGLETONS: mp.setattr(svc, attr, None, raising=False) mp.setattr(client_mod, "_llm_client", None, raising=False) mp.setattr(af_mod, "_writer", None, raising=False) mp.setattr(fs_mod, "_writer", None, raising=False) # ── Session corpus: ingest once ──────────────────────────────────────── @pytest.fixture(scope="session") def _ingested_memory_root( tmp_path_factory: pytest.TempPathFactory, _session_monkeypatch: pytest.MonkeyPatch, long_conversation: dict, ) -> Path: """Run /add × 19 + /flush + cascade drain once; return the memory_root. All on-disk artifacts (md files + sqlite system.db + lancedb tables) survive lifespan teardown, so per-test fixtures can re-attach a fresh app against the populated root and exercise only the read path. Marked **slow** transitively via ``pytestmark`` in ``test_search_e2e.py`` — without ``-m slow`` the test module is deselected and this fixture is never instantiated. """ reuse = os.environ.get(_REUSE_ENV) if reuse: memory_root = Path(reuse).expanduser().resolve() users_dir = memory_root / "default_app" / "default_project" / "users" if not users_dir.is_dir(): raise AssertionError( f"{_REUSE_ENV}={memory_root} has no " "default_app/default_project/users/ subdir — point it at a " "fully-ingested memory_root or unset to rebuild from scratch" ) else: memory_root = tmp_path_factory.mktemp("search_corpus") _session_monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(memory_root)) _reset_memorize_singletons(_session_monkeypatch) if reuse: # Search is read-only; the corpus is consumed in place, no copy. return memory_root # Drive the ingest in its own event loop. The lifespan inside # ``_ingest`` properly closes LanceDB / SQLite handles on exit so # the per-test lifespans can re-open them. asyncio.run(_ingest(memory_root, long_conversation)) return memory_root async def _ingest(memory_root: Path, long_conversation: dict) -> None: """Bring up the app once, push the LoCoMo fixture through /add+/flush.""" from everos.entrypoints.api.app import create_app app = create_app() transport = httpx.ASGITransport(app=app) async with ( app.router.lifespan_context(app), httpx.AsyncClient(transport=transport, base_url="http://test") as client, ): session_id = long_conversation["everos_session_id"] for batch in long_conversation["batches"]: messages = [ { "sender_id": m["sender_id"], "role": m["role"], "timestamp": m["timestamp"], "content": m["content"], } for m in batch["messages"] ] resp = await client.post( "/api/v1/memory/add", json={"session_id": session_id, "messages": messages}, timeout=600.0, ) resp.raise_for_status() resp = await client.post( "/api/v1/memory/flush", json={"session_id": session_id}, timeout=600.0, ) resp.raise_for_status() await _poll_cascade_drained(deadline_seconds=600.0) async def _poll_cascade_drained(*, deadline_seconds: float) -> None: """Block until ``md_change_state.pending == 0`` or deadline.""" from everos.infra.persistence.sqlite import md_change_state_repo async with asyncio.timeout(deadline_seconds): while True: summary = await md_change_state_repo.queue_summary() if summary.pending == 0: return await asyncio.sleep(0.5) # ── Per-test client against the session corpus ───────────────────────── @pytest_asyncio.fixture async def search_client( _ingested_memory_root: Path, monkeypatch: pytest.MonkeyPatch, ) -> AsyncIterator[httpx.AsyncClient]: """Per-test ``AsyncClient`` reading from the session corpus. Singletons are reset before the lifespan starts so the search manager builds a fresh embedding / rerank / LLM client per test — we don't want cross-test client state to mask a regression. """ monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(_ingested_memory_root)) _reset_memorize_singletons(monkeypatch) # The search service has its own module-level singletons; reset # those too so re-attach is clean. search_svc = importlib.import_module("everos.service.search") for attr in ( "_manager", "_embedding", "_reranker", "_llm_client", "_embedding_resolved", "_rerank_resolved", "_llm_resolved", ): if hasattr(search_svc, attr): monkeypatch.setattr( search_svc, attr, None if not attr.endswith("_resolved") else False, raising=False, ) from everos.entrypoints.api.app import create_app app = create_app() transport = httpx.ASGITransport(app=app) async with ( app.router.lifespan_context(app), httpx.AsyncClient(transport=transport, base_url="http://test") as client, ): yield client # ── Diagnostic helpers (handy for tests that probe SQLite directly) ─── @pytest.fixture def memcell_count() -> Callable[[], Awaitable[int]]: """Return an async callable: ``await memcell_count() -> int``.""" async def _count() -> int: from everos.infra.persistence.sqlite import get_engine engine = get_engine() async with engine.connect() as conn: result = await conn.execute(text("SELECT COUNT(*) FROM memcell")) return int(result.scalar() or 0) return _count