EverOS/tests/e2e/conftest.py

"""Shared fixtures for ``tests/e2e/``.

Provides:

- ``core_pipeline_runtime``: tmp memory root + reset memorize singletons.
  Uses the **real** LLM / embedding / rerank creds from ``.env`` per the
  project test policy.
- ``async_client``: ``httpx.AsyncClient`` wired into ``create_app()`` with
  the full lifespan stack (SQLite + LanceDB + Cascade + OME).
- ``cascade_done_poll``: wait until ``md_change_state`` queue is fully
  drained (``pending`` rows == 0; includes the internal ``processing``).
- ``pipeline_done_poll``: composite drain — waits until OME strategy runs AND
  ``md_change_state`` queue both drain (use for tests that exercise the full
  OME → md → cascade pipeline).
- ``buffer_count`` / ``memcell_count``: raw counts for buffer-delta and
  memcell-growth assertions.

The ``long_conversation`` fixture (LoCoMo conv_0) lives in
:mod:`tests.conftest` so both ``tests/e2e/`` and
``tests/integration/search/`` can depend on it.

Conventions:

- ``.env`` is loaded at import time (before any everos module reads
  settings) — overrides for ``EVEROS_MEMORY__ROOT`` happen per-test.
- This file does **not** define ``cascade_runtime`` — that name belongs
  to ``tests/integration/test_cascade_integration.py``'s local fixture.
  The pipeline test uses ``core_pipeline_runtime`` to avoid name
  collision.
"""

from __future__ import annotations

import asyncio
import importlib
import json
from collections.abc import AsyncIterator, Awaitable, Callable
from pathlib import Path

import httpx
import pytest
import pytest_asyncio
from dotenv import load_dotenv
from sqlalchemy import text

# Load real .env creds before any everos import touches load_settings().
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
load_dotenv(_PROJECT_ROOT / ".env", override=False)

_FIXTURE_DIR = _PROJECT_ROOT / "tests" / "fixtures"
_SEARCH_SEED_DIR = _FIXTURE_DIR / "search_seed"

# Memorize service module-level singletons that survive across tests; we
# null them out so each test rebuilds against its own ``tmp_path``.
_MEMORIZE_SINGLETONS: tuple[str, ...] = (
    "_episode_writer",
    "_prompt_loader",
    "_user_pipeline",
    "_agent_pipeline",
    "_ome_engine",
)

# OME strategy modules carry module-level lazy singletons (``_writer`` /
# ``_reader``) that capture ``MemoryRoot.default()`` at first call. They
# survive across tests, so the second test writes its output to the
# **first test's** tmp_path. Reset all of them per-test.
_STRATEGY_SINGLETONS: tuple[tuple[str, tuple[str, ...]], ...] = (
    ("everos.memory.strategies.extract_atomic_facts", ("_writer",)),
    ("everos.memory.strategies.extract_foresight", ("_writer",)),
    ("everos.memory.strategies.extract_user_profile", ("_writer", "_reader")),
    ("everos.memory.strategies.extract_agent_case", ("_writer",)),
    ("everos.memory.strategies.extract_agent_skill", ("_writer",)),
)


def _reset_strategy_singletons(monkeypatch: pytest.MonkeyPatch) -> None:
    """Null every strategy ``_writer`` / ``_reader`` so the next test
    rebuilds against its own ``MemoryRoot.default()`` (driven by the
    fresh ``EVEROS_MEMORY__ROOT`` env var set by the calling fixture).
    """
    for mod_name, attrs in _STRATEGY_SINGLETONS:
        mod = importlib.import_module(mod_name)
        for attr in attrs:
            monkeypatch.setattr(mod, attr, None, raising=False)


# ---------------------------------------------------------------------------
# Data fixture
# ---------------------------------------------------------------------------


@pytest.fixture(scope="session")
def search_seed() -> dict[str, list[dict]]:
    """Load the search seed slice produced by ``_dump_search_seed.py``.

    Returns a dict with four keys (``episode`` / ``atomic_fact`` /
    ``foresight`` / ``user_profile``); each value is a list of raw row
    dicts ready to be fed into ``Model.model_validate`` for LanceDB.

    Tests pick the subset they need and may mutate per-row fields
    (e.g. set distinct ``session_id`` values to exercise filter DSL)
    before instantiating the pydantic model.
    """
    return {
        name: json.loads((_SEARCH_SEED_DIR / f"{name}.json").read_text())
        for name in ("episode", "atomic_fact", "foresight", "user_profile")
    }


# ---------------------------------------------------------------------------
# Runtime fixture: tmp memory root + singleton reset (no app lifespan)
# ---------------------------------------------------------------------------


@pytest_asyncio.fixture
async def core_pipeline_runtime(
    tmp_path: Path,
    monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[Path]:
    """Prepare clean memory root + reset memorize singletons.

    Keeps real LLM / embedding settings from ``.env`` (do NOT overwrite
    ``EVEROS_LLM__*`` or ``EVEROS_EMBEDDING__*``).
    """
    monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))

    from everos.config import load_settings

    load_settings.cache_clear()

    svc = importlib.import_module("everos.service.memorize")
    client_mod = importlib.import_module("everos.component.llm.client")

    for attr in _MEMORIZE_SINGLETONS:
        monkeypatch.setattr(svc, attr, None, raising=False)
    monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
    _reset_strategy_singletons(monkeypatch)

    yield tmp_path


# ---------------------------------------------------------------------------
# Async client fixture (full app lifespan)
# ---------------------------------------------------------------------------


@pytest_asyncio.fixture
async def async_client(
    core_pipeline_runtime: Path,
) -> AsyncIterator[httpx.AsyncClient]:
    """Bring up the full everos app with lifespan, return an httpx client.

    The lifespan starts: SQLite engine, LanceDB connection + business
    indexes, Cascade orchestrator (watcher + scanner + worker), OME
    engine. Teardown stops everything in reverse.
    """
    from everos.entrypoints.api.app import create_app

    app = create_app()
    transport = httpx.ASGITransport(app=app)

    # Drive starlette's lifespan_context explicitly — httpx.ASGITransport
    # does not run startup / shutdown on its own.
    async with (
        app.router.lifespan_context(app),
        httpx.AsyncClient(transport=transport, base_url="http://test") as client,
    ):
        yield client


# ---------------------------------------------------------------------------
# Poll helpers
# ---------------------------------------------------------------------------


async def _poll(
    condition: Callable[[], Awaitable[bool]],
    *,
    deadline_seconds: float,
    interval: float = 0.5,
) -> None:
    """Poll an async predicate until truthy; ``TimeoutError`` on deadline."""
    async with asyncio.timeout(deadline_seconds):
        while True:
            if await condition():
                return
            await asyncio.sleep(interval)


@pytest.fixture
def cascade_done_poll() -> Callable[..., Awaitable[None]]:
    """Wait until ``md_change_state`` queue is drained (no pending/processing)."""

    async def _wait(*, deadline_seconds: float = 180.0) -> None:
        from everos.infra.persistence.sqlite import md_change_state_repo

        async def _drained() -> bool:
            summary = await md_change_state_repo.queue_summary()
            # `pending` includes the internal `processing` rows (see QueueSummary).
            return summary.pending == 0

        await _poll(_drained, deadline_seconds=deadline_seconds)

    return _wait


@pytest.fixture
def pipeline_done_poll() -> Callable[..., Awaitable[None]]:
    """Wait until OME strategy runs AND ``md_change_state`` queue both drain.

    Composite drain — fixes the trap where :func:`cascade_done_poll`
    alone returns immediately while a slow LLM-driven strategy is still
    in flight (the strategy has not written md yet, so the cascade queue
    is momentarily empty). Pipeline tests that touch the full async
    chain (OME -> md -> cascade -> LanceDB) must use this instead of
    ``cascade_done_poll``.
    """

    async def _wait(*, deadline_seconds: float = 180.0) -> None:
        from everos.infra.persistence.sqlite import md_change_state_repo
        from everos.service.memorize import _get_engine

        engine = _get_engine()

        async def _drained() -> bool:
            # OME side first: cascade can only fire after a strategy
            # writes md, so an in-flight run means the queue check below
            # is premature.
            if not await engine.wait_idle(timeout=0.5):
                return False
            # `pending` includes the internal `processing` rows (see
            # QueueSummary).
            summary = await md_change_state_repo.queue_summary()
            return summary.pending == 0

        await _poll(_drained, deadline_seconds=deadline_seconds)

    return _wait


# ---------------------------------------------------------------------------
# Count helpers (used directly by tests for buffer-delta assertions)
# ---------------------------------------------------------------------------


@pytest.fixture
def buffer_count() -> Callable[[str], Awaitable[int]]:
    """Return an async callable: ``await buffer_count(session_id) -> int``."""

    async def _count(session_id: str) -> int:
        from everos.infra.persistence.sqlite import get_engine

        engine = get_engine()
        async with engine.connect() as conn:
            result = await conn.execute(
                text("SELECT COUNT(*) FROM unprocessed_buffer WHERE session_id = :sid"),
                {"sid": session_id},
            )
            return int(result.scalar() or 0)

    return _count


@pytest.fixture
def memcell_count() -> Callable[[str], Awaitable[int]]:
    """Return an async callable: ``await memcell_count(user_id_or_session) -> int``.

    Counts memcell rows; pass session_id to count by session, or omit to
    count all.
    """

    async def _count(session_id: str | None = None) -> int:
        from everos.infra.persistence.sqlite import get_engine

        engine = get_engine()
        async with engine.connect() as conn:
            if session_id is None:
                result = await conn.execute(text("SELECT COUNT(*) FROM memcell"))
            else:
                result = await conn.execute(
                    text("SELECT COUNT(*) FROM memcell WHERE session_id = :sid"),
                    {"sid": session_id},
                )
            return int(result.scalar() or 0)

    return _count