Files
EverOS/tests/e2e/conftest.py
Elliot Chen 518b8eca85 chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-06 07:33:17 +08:00

287 lines
10 KiB
Python

"""Shared fixtures for ``tests/e2e/``.
Provides:
- ``core_pipeline_runtime``: tmp memory root + reset memorize singletons.
Uses the **real** LLM / embedding / rerank creds from ``.env`` per the
project test policy.
- ``async_client``: ``httpx.AsyncClient`` wired into ``create_app()`` with
the full lifespan stack (SQLite + LanceDB + Cascade + OME).
- ``cascade_done_poll``: wait until ``md_change_state`` queue is fully
drained (``pending`` rows == 0; includes the internal ``processing``).
- ``pipeline_done_poll``: composite drain — waits until OME strategy runs AND
``md_change_state`` queue both drain (use for tests that exercise the full
OME → md → cascade pipeline).
- ``buffer_count`` / ``memcell_count``: raw counts for buffer-delta and
memcell-growth assertions.
The ``long_conversation`` fixture (LoCoMo conv_0) lives in
:mod:`tests.conftest` so both ``tests/e2e/`` and
``tests/integration/search/`` can depend on it.
Conventions:
- ``.env`` is loaded at import time (before any everos module reads
settings) — overrides for ``EVEROS_MEMORY__ROOT`` happen per-test.
- This file does **not** define ``cascade_runtime`` — that name belongs
to ``tests/integration/test_cascade_integration.py``'s local fixture.
The pipeline test uses ``core_pipeline_runtime`` to avoid name
collision.
"""
from __future__ import annotations
import asyncio
import importlib
import json
from collections.abc import AsyncIterator, Awaitable, Callable
from pathlib import Path
import httpx
import pytest
import pytest_asyncio
from dotenv import load_dotenv
from sqlalchemy import text
# Load real .env creds before any everos import touches load_settings().
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
load_dotenv(_PROJECT_ROOT / ".env", override=False)
_FIXTURE_DIR = _PROJECT_ROOT / "tests" / "fixtures"
_SEARCH_SEED_DIR = _FIXTURE_DIR / "search_seed"
# Memorize service module-level singletons that survive across tests; we
# null them out so each test rebuilds against its own ``tmp_path``.
_MEMORIZE_SINGLETONS: tuple[str, ...] = (
"_episode_writer",
"_prompt_loader",
"_user_pipeline",
"_agent_pipeline",
"_ome_engine",
)
# OME strategy modules carry module-level lazy singletons (``_writer`` /
# ``_reader``) that capture ``MemoryRoot.default()`` at first call. They
# survive across tests, so the second test writes its output to the
# **first test's** tmp_path. Reset all of them per-test.
_STRATEGY_SINGLETONS: tuple[tuple[str, tuple[str, ...]], ...] = (
("everos.memory.strategies.extract_atomic_facts", ("_writer",)),
("everos.memory.strategies.extract_foresight", ("_writer",)),
("everos.memory.strategies.extract_user_profile", ("_writer", "_reader")),
("everos.memory.strategies.extract_agent_case", ("_writer",)),
("everos.memory.strategies.extract_agent_skill", ("_writer",)),
)
def _reset_strategy_singletons(monkeypatch: pytest.MonkeyPatch) -> None:
"""Null every strategy ``_writer`` / ``_reader`` so the next test
rebuilds against its own ``MemoryRoot.default()`` (driven by the
fresh ``EVEROS_MEMORY__ROOT`` env var set by the calling fixture).
"""
for mod_name, attrs in _STRATEGY_SINGLETONS:
mod = importlib.import_module(mod_name)
for attr in attrs:
monkeypatch.setattr(mod, attr, None, raising=False)
# ---------------------------------------------------------------------------
# Data fixture
# ---------------------------------------------------------------------------
@pytest.fixture(scope="session")
def search_seed() -> dict[str, list[dict]]:
"""Load the search seed slice produced by ``_dump_search_seed.py``.
Returns a dict with four keys (``episode`` / ``atomic_fact`` /
``foresight`` / ``user_profile``); each value is a list of raw row
dicts ready to be fed into ``Model.model_validate`` for LanceDB.
Tests pick the subset they need and may mutate per-row fields
(e.g. set distinct ``session_id`` values to exercise filter DSL)
before instantiating the pydantic model.
"""
return {
name: json.loads((_SEARCH_SEED_DIR / f"{name}.json").read_text())
for name in ("episode", "atomic_fact", "foresight", "user_profile")
}
# ---------------------------------------------------------------------------
# Runtime fixture: tmp memory root + singleton reset (no app lifespan)
# ---------------------------------------------------------------------------
@pytest_asyncio.fixture
async def core_pipeline_runtime(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[Path]:
"""Prepare clean memory root + reset memorize singletons.
Keeps real LLM / embedding settings from ``.env`` (do NOT overwrite
``EVEROS_LLM__*`` or ``EVEROS_EMBEDDING__*``).
"""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
from everos.config import load_settings
load_settings.cache_clear()
svc = importlib.import_module("everos.service.memorize")
client_mod = importlib.import_module("everos.component.llm.client")
for attr in _MEMORIZE_SINGLETONS:
monkeypatch.setattr(svc, attr, None, raising=False)
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
_reset_strategy_singletons(monkeypatch)
yield tmp_path
# ---------------------------------------------------------------------------
# Async client fixture (full app lifespan)
# ---------------------------------------------------------------------------
@pytest_asyncio.fixture
async def async_client(
core_pipeline_runtime: Path,
) -> AsyncIterator[httpx.AsyncClient]:
"""Bring up the full everos app with lifespan, return an httpx client.
The lifespan starts: SQLite engine, LanceDB connection + business
indexes, Cascade orchestrator (watcher + scanner + worker), OME
engine. Teardown stops everything in reverse.
"""
from everos.entrypoints.api.app import create_app
app = create_app()
transport = httpx.ASGITransport(app=app)
# Drive starlette's lifespan_context explicitly — httpx.ASGITransport
# does not run startup / shutdown on its own.
async with (
app.router.lifespan_context(app),
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
):
yield client
# ---------------------------------------------------------------------------
# Poll helpers
# ---------------------------------------------------------------------------
async def _poll(
condition: Callable[[], Awaitable[bool]],
*,
deadline_seconds: float,
interval: float = 0.5,
) -> None:
"""Poll an async predicate until truthy; ``TimeoutError`` on deadline."""
async with asyncio.timeout(deadline_seconds):
while True:
if await condition():
return
await asyncio.sleep(interval)
@pytest.fixture
def cascade_done_poll() -> Callable[..., Awaitable[None]]:
"""Wait until ``md_change_state`` queue is drained (no pending/processing)."""
async def _wait(*, deadline_seconds: float = 180.0) -> None:
from everos.infra.persistence.sqlite import md_change_state_repo
async def _drained() -> bool:
summary = await md_change_state_repo.queue_summary()
# `pending` includes the internal `processing` rows (see QueueSummary).
return summary.pending == 0
await _poll(_drained, deadline_seconds=deadline_seconds)
return _wait
@pytest.fixture
def pipeline_done_poll() -> Callable[..., Awaitable[None]]:
"""Wait until OME strategy runs AND ``md_change_state`` queue both drain.
Composite drain — fixes the trap where :func:`cascade_done_poll`
alone returns immediately while a slow LLM-driven strategy is still
in flight (the strategy has not written md yet, so the cascade queue
is momentarily empty). Pipeline tests that touch the full async
chain (OME -> md -> cascade -> LanceDB) must use this instead of
``cascade_done_poll``.
"""
async def _wait(*, deadline_seconds: float = 180.0) -> None:
from everos.infra.persistence.sqlite import md_change_state_repo
from everos.service.memorize import _get_engine
engine = _get_engine()
async def _drained() -> bool:
# OME side first: cascade can only fire after a strategy
# writes md, so an in-flight run means the queue check below
# is premature.
if not await engine.wait_idle(timeout=0.5):
return False
# `pending` includes the internal `processing` rows (see
# QueueSummary).
summary = await md_change_state_repo.queue_summary()
return summary.pending == 0
await _poll(_drained, deadline_seconds=deadline_seconds)
return _wait
# ---------------------------------------------------------------------------
# Count helpers (used directly by tests for buffer-delta assertions)
# ---------------------------------------------------------------------------
@pytest.fixture
def buffer_count() -> Callable[[str], Awaitable[int]]:
"""Return an async callable: ``await buffer_count(session_id) -> int``."""
async def _count(session_id: str) -> int:
from everos.infra.persistence.sqlite import get_engine
engine = get_engine()
async with engine.connect() as conn:
result = await conn.execute(
text("SELECT COUNT(*) FROM unprocessed_buffer WHERE session_id = :sid"),
{"sid": session_id},
)
return int(result.scalar() or 0)
return _count
@pytest.fixture
def memcell_count() -> Callable[[str], Awaitable[int]]:
"""Return an async callable: ``await memcell_count(user_id_or_session) -> int``.
Counts memcell rows; pass session_id to count by session, or omit to
count all.
"""
async def _count(session_id: str | None = None) -> int:
from everos.infra.persistence.sqlite import get_engine
engine = get_engine()
async with engine.connect() as conn:
if session_id is None:
result = await conn.execute(text("SELECT COUNT(*) FROM memcell"))
else:
result = await conn.execute(
text("SELECT COUNT(*) FROM memcell WHERE session_id = :sid"),
{"sid": session_id},
)
return int(result.scalar() or 0)
return _count