Files
EverOS/tests/integration/search/conftest.py
Elliot Chen 518b8eca85 chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-06 07:33:17 +08:00

270 lines
9.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Session-scoped corpus fixture for ``tests/integration/search/``.
The pipeline that produces the search corpus (`/add` × 19 + `/flush` +
cascade drain) is the same one exercised by
``tests/integration/test_add_flush_pipeline_e2e.py`` — and it costs
~10 minutes against real LLMs. To keep the search test suite usable
in CI we run that pipeline **once per session** here, persist the
resulting memory_root to a session ``tmp_path``, and let every test
re-attach a fresh FastAPI lifespan against the on-disk corpus.
Layout::
_ingested_memory_root (session-scoped)
└── ingests LoCoMo conv_0 via the HTTP API, then tears
lifespan down. Returns the memory_root path with md +
sqlite + lancedb populated on disk.
search_client (function-scoped)
└── per-test ``httpx.AsyncClient`` wired to a freshly built
FastAPI app, ``EVEROS_MEMORY__ROOT`` pointed at the
session corpus. Singletons are reset so each test starts
with cold caches and the lifespan is the only thing
constructing them.
This is intentionally separate from ``tests/integration/conftest.py``
fixtures (which are function-scoped). Cross-suite isolation: tests
under ``search/`` cannot poison or be poisoned by the ones above.
All tests in this folder are marked ``slow`` via the module-level
``pytestmark`` in ``test_search_e2e.py`` — a non-``-m slow`` run skips
the whole suite cleanly without paying the ingest cost.
"""
from __future__ import annotations
import asyncio
import importlib
import os
from collections.abc import AsyncIterator, Awaitable, Callable, Generator
from pathlib import Path
import httpx
import pytest
import pytest_asyncio
from sqlalchemy import text
# Set ``EVEROS_REUSE_CORPUS=<path>`` to skip ingest and point the
# session fixture at an existing memory_root (md + lancedb already
# populated). Search is a read-only path, so no copy is needed — the
# fixture just sets ``EVEROS_MEMORY__ROOT`` to that directory.
_REUSE_ENV = "EVEROS_REUSE_CORPUS"
# Memorize-service module-level lazy singletons; reset between phases so
# stale clients / engines don't leak from ingest into per-test lifespans.
_MEMORIZE_SINGLETONS: tuple[str, ...] = (
"_episode_writer",
"_prompt_loader",
"_user_pipeline",
"_agent_pipeline",
"_ome_engine",
)
# ── Session-scoped MonkeyPatch ─────────────────────────────────────────
@pytest.fixture(scope="session")
def _session_monkeypatch() -> Generator[pytest.MonkeyPatch, None, None]:
"""A ``MonkeyPatch`` instance with session lifetime.
Pytest's default ``monkeypatch`` is function-scoped. The ingest
fixture below has to set env vars and null singletons before the
lifespan even starts — those changes have to live for the whole
session, so we open our own ``MonkeyPatch`` and undo it at session
end.
"""
mp = pytest.MonkeyPatch()
yield mp
mp.undo()
# ── Singleton reset helper ─────────────────────────────────────────────
def _reset_memorize_singletons(mp: pytest.MonkeyPatch) -> None:
"""Null out memorize/strategy/LLM-client lazy singletons.
Called once before ingest (so the freshly-set ``EVEROS_MEMORY__ROOT``
actually wins) and once per test (so the session corpus's lifespan
sees clean caches).
"""
from everos.config import load_settings
load_settings.cache_clear()
svc = importlib.import_module("everos.service.memorize")
client_mod = importlib.import_module("everos.component.llm.client")
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
for attr in _MEMORIZE_SINGLETONS:
mp.setattr(svc, attr, None, raising=False)
mp.setattr(client_mod, "_llm_client", None, raising=False)
mp.setattr(af_mod, "_writer", None, raising=False)
mp.setattr(fs_mod, "_writer", None, raising=False)
# ── Session corpus: ingest once ────────────────────────────────────────
@pytest.fixture(scope="session")
def _ingested_memory_root(
tmp_path_factory: pytest.TempPathFactory,
_session_monkeypatch: pytest.MonkeyPatch,
long_conversation: dict,
) -> Path:
"""Run /add × 19 + /flush + cascade drain once; return the memory_root.
All on-disk artifacts (md files + sqlite system.db + lancedb
tables) survive lifespan teardown, so per-test fixtures can
re-attach a fresh app against the populated root and exercise
only the read path.
Marked **slow** transitively via ``pytestmark`` in
``test_search_e2e.py`` — without ``-m slow`` the test module is
deselected and this fixture is never instantiated.
"""
reuse = os.environ.get(_REUSE_ENV)
if reuse:
memory_root = Path(reuse).expanduser().resolve()
users_dir = memory_root / "default_app" / "default_project" / "users"
if not users_dir.is_dir():
raise AssertionError(
f"{_REUSE_ENV}={memory_root} has no "
"default_app/default_project/users/ subdir — point it at a "
"fully-ingested memory_root or unset to rebuild from scratch"
)
else:
memory_root = tmp_path_factory.mktemp("search_corpus")
_session_monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(memory_root))
_reset_memorize_singletons(_session_monkeypatch)
if reuse:
# Search is read-only; the corpus is consumed in place, no copy.
return memory_root
# Drive the ingest in its own event loop. The lifespan inside
# ``_ingest`` properly closes LanceDB / SQLite handles on exit so
# the per-test lifespans can re-open them.
asyncio.run(_ingest(memory_root, long_conversation))
return memory_root
async def _ingest(memory_root: Path, long_conversation: dict) -> None:
"""Bring up the app once, push the LoCoMo fixture through /add+/flush."""
from everos.entrypoints.api.app import create_app
app = create_app()
transport = httpx.ASGITransport(app=app)
async with (
app.router.lifespan_context(app),
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
):
session_id = long_conversation["everos_session_id"]
for batch in long_conversation["batches"]:
messages = [
{
"sender_id": m["sender_id"],
"role": m["role"],
"timestamp": m["timestamp"],
"content": m["content"],
}
for m in batch["messages"]
]
resp = await client.post(
"/api/v1/memory/add",
json={"session_id": session_id, "messages": messages},
timeout=600.0,
)
resp.raise_for_status()
resp = await client.post(
"/api/v1/memory/flush",
json={"session_id": session_id},
timeout=600.0,
)
resp.raise_for_status()
await _poll_cascade_drained(deadline_seconds=600.0)
async def _poll_cascade_drained(*, deadline_seconds: float) -> None:
"""Block until ``md_change_state.pending == 0`` or deadline."""
from everos.infra.persistence.sqlite import md_change_state_repo
async with asyncio.timeout(deadline_seconds):
while True:
summary = await md_change_state_repo.queue_summary()
if summary.pending == 0:
return
await asyncio.sleep(0.5)
# ── Per-test client against the session corpus ─────────────────────────
@pytest_asyncio.fixture
async def search_client(
_ingested_memory_root: Path,
monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[httpx.AsyncClient]:
"""Per-test ``AsyncClient`` reading from the session corpus.
Singletons are reset before the lifespan starts so the search
manager builds a fresh embedding / rerank / LLM client per test —
we don't want cross-test client state to mask a regression.
"""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(_ingested_memory_root))
_reset_memorize_singletons(monkeypatch)
# The search service has its own module-level singletons; reset
# those too so re-attach is clean.
search_svc = importlib.import_module("everos.service.search")
for attr in (
"_manager",
"_embedding",
"_reranker",
"_llm_client",
"_embedding_resolved",
"_rerank_resolved",
"_llm_resolved",
):
if hasattr(search_svc, attr):
monkeypatch.setattr(
search_svc,
attr,
None if not attr.endswith("_resolved") else False,
raising=False,
)
from everos.entrypoints.api.app import create_app
app = create_app()
transport = httpx.ASGITransport(app=app)
async with (
app.router.lifespan_context(app),
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
):
yield client
# ── Diagnostic helpers (handy for tests that probe SQLite directly) ───
@pytest.fixture
def memcell_count() -> Callable[[], Awaitable[int]]:
"""Return an async callable: ``await memcell_count() -> int``."""
async def _count() -> int:
from everos.infra.persistence.sqlite import get_engine
engine = get_engine()
async with engine.connect() as conn:
result = await conn.execute(text("SELECT COUNT(*) FROM memcell"))
return int(result.scalar() or 0)
return _count