chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@ -0,0 +1,286 @@
+"""Shared fixtures for ``tests/e2e/``.
+
+Provides:
+
+- ``core_pipeline_runtime``: tmp memory root + reset memorize singletons.
+  Uses the **real** LLM / embedding / rerank creds from ``.env`` per the
+  project test policy.
+- ``async_client``: ``httpx.AsyncClient`` wired into ``create_app()`` with
+  the full lifespan stack (SQLite + LanceDB + Cascade + OME).
+- ``cascade_done_poll``: wait until ``md_change_state`` queue is fully
+  drained (``pending`` rows == 0; includes the internal ``processing``).
+- ``pipeline_done_poll``: composite drain — waits until OME strategy runs AND
+  ``md_change_state`` queue both drain (use for tests that exercise the full
+  OME → md → cascade pipeline).
+- ``buffer_count`` / ``memcell_count``: raw counts for buffer-delta and
+  memcell-growth assertions.
+
+The ``long_conversation`` fixture (LoCoMo conv_0) lives in
+:mod:`tests.conftest` so both ``tests/e2e/`` and
+``tests/integration/search/`` can depend on it.
+
+Conventions:
+
+- ``.env`` is loaded at import time (before any everos module reads
+  settings) — overrides for ``EVEROS_MEMORY__ROOT`` happen per-test.
+- This file does **not** define ``cascade_runtime`` — that name belongs
+  to ``tests/integration/test_cascade_integration.py``'s local fixture.
+  The pipeline test uses ``core_pipeline_runtime`` to avoid name
+  collision.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import importlib
+import json
+from collections.abc import AsyncIterator, Awaitable, Callable
+from pathlib import Path
+
+import httpx
+import pytest
+import pytest_asyncio
+from dotenv import load_dotenv
+from sqlalchemy import text
+
+# Load real .env creds before any everos import touches load_settings().
+_PROJECT_ROOT = Path(__file__).resolve().parents[2]
+load_dotenv(_PROJECT_ROOT / ".env", override=False)
+
+_FIXTURE_DIR = _PROJECT_ROOT / "tests" / "fixtures"
+_SEARCH_SEED_DIR = _FIXTURE_DIR / "search_seed"
+
+# Memorize service module-level singletons that survive across tests; we
+# null them out so each test rebuilds against its own ``tmp_path``.
+_MEMORIZE_SINGLETONS: tuple[str, ...] = (
+    "_episode_writer",
+    "_prompt_loader",
+    "_user_pipeline",
+    "_agent_pipeline",
+    "_ome_engine",
+)
+
+# OME strategy modules carry module-level lazy singletons (``_writer`` /
+# ``_reader``) that capture ``MemoryRoot.default()`` at first call. They
+# survive across tests, so the second test writes its output to the
+# **first test's** tmp_path. Reset all of them per-test.
+_STRATEGY_SINGLETONS: tuple[tuple[str, tuple[str, ...]], ...] = (
+    ("everos.memory.strategies.extract_atomic_facts", ("_writer",)),
+    ("everos.memory.strategies.extract_foresight", ("_writer",)),
+    ("everos.memory.strategies.extract_user_profile", ("_writer", "_reader")),
+    ("everos.memory.strategies.extract_agent_case", ("_writer",)),
+    ("everos.memory.strategies.extract_agent_skill", ("_writer",)),
+)
+
+
+def _reset_strategy_singletons(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Null every strategy ``_writer`` / ``_reader`` so the next test
+    rebuilds against its own ``MemoryRoot.default()`` (driven by the
+    fresh ``EVEROS_MEMORY__ROOT`` env var set by the calling fixture).
+    """
+    for mod_name, attrs in _STRATEGY_SINGLETONS:
+        mod = importlib.import_module(mod_name)
+        for attr in attrs:
+            monkeypatch.setattr(mod, attr, None, raising=False)
+
+
+# ---------------------------------------------------------------------------
+# Data fixture
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def search_seed() -> dict[str, list[dict]]:
+    """Load the search seed slice produced by ``_dump_search_seed.py``.
+
+    Returns a dict with four keys (``episode`` / ``atomic_fact`` /
+    ``foresight`` / ``user_profile``); each value is a list of raw row
+    dicts ready to be fed into ``Model.model_validate`` for LanceDB.
+
+    Tests pick the subset they need and may mutate per-row fields
+    (e.g. set distinct ``session_id`` values to exercise filter DSL)
+    before instantiating the pydantic model.
+    """
+    return {
+        name: json.loads((_SEARCH_SEED_DIR / f"{name}.json").read_text())
+        for name in ("episode", "atomic_fact", "foresight", "user_profile")
+    }
+
+
+# ---------------------------------------------------------------------------
+# Runtime fixture: tmp memory root + singleton reset (no app lifespan)
+# ---------------------------------------------------------------------------
+
+
+@pytest_asyncio.fixture
+async def core_pipeline_runtime(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> AsyncIterator[Path]:
+    """Prepare clean memory root + reset memorize singletons.
+
+    Keeps real LLM / embedding settings from ``.env`` (do NOT overwrite
+    ``EVEROS_LLM__*`` or ``EVEROS_EMBEDDING__*``).
+    """
+    monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
+
+    from everos.config import load_settings
+
+    load_settings.cache_clear()
+
+    svc = importlib.import_module("everos.service.memorize")
+    client_mod = importlib.import_module("everos.component.llm.client")
+
+    for attr in _MEMORIZE_SINGLETONS:
+        monkeypatch.setattr(svc, attr, None, raising=False)
+    monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
+    _reset_strategy_singletons(monkeypatch)
+
+    yield tmp_path
+
+
+# ---------------------------------------------------------------------------
+# Async client fixture (full app lifespan)
+# ---------------------------------------------------------------------------
+
+
+@pytest_asyncio.fixture
+async def async_client(
+    core_pipeline_runtime: Path,
+) -> AsyncIterator[httpx.AsyncClient]:
+    """Bring up the full everos app with lifespan, return an httpx client.
+
+    The lifespan starts: SQLite engine, LanceDB connection + business
+    indexes, Cascade orchestrator (watcher + scanner + worker), OME
+    engine. Teardown stops everything in reverse.
+    """
+    from everos.entrypoints.api.app import create_app
+
+    app = create_app()
+    transport = httpx.ASGITransport(app=app)
+
+    # Drive starlette's lifespan_context explicitly — httpx.ASGITransport
+    # does not run startup / shutdown on its own.
+    async with (
+        app.router.lifespan_context(app),
+        httpx.AsyncClient(transport=transport, base_url="http://test") as client,
+    ):
+        yield client
+
+
+# ---------------------------------------------------------------------------
+# Poll helpers
+# ---------------------------------------------------------------------------
+
+
+async def _poll(
+    condition: Callable[[], Awaitable[bool]],
+    *,
+    deadline_seconds: float,
+    interval: float = 0.5,
+) -> None:
+    """Poll an async predicate until truthy; ``TimeoutError`` on deadline."""
+    async with asyncio.timeout(deadline_seconds):
+        while True:
+            if await condition():
+                return
+            await asyncio.sleep(interval)
+
+
+@pytest.fixture
+def cascade_done_poll() -> Callable[..., Awaitable[None]]:
+    """Wait until ``md_change_state`` queue is drained (no pending/processing)."""
+
+    async def _wait(*, deadline_seconds: float = 180.0) -> None:
+        from everos.infra.persistence.sqlite import md_change_state_repo
+
+        async def _drained() -> bool:
+            summary = await md_change_state_repo.queue_summary()
+            # `pending` includes the internal `processing` rows (see QueueSummary).
+            return summary.pending == 0
+
+        await _poll(_drained, deadline_seconds=deadline_seconds)
+
+    return _wait
+
+
+@pytest.fixture
+def pipeline_done_poll() -> Callable[..., Awaitable[None]]:
+    """Wait until OME strategy runs AND ``md_change_state`` queue both drain.
+
+    Composite drain — fixes the trap where :func:`cascade_done_poll`
+    alone returns immediately while a slow LLM-driven strategy is still
+    in flight (the strategy has not written md yet, so the cascade queue
+    is momentarily empty). Pipeline tests that touch the full async
+    chain (OME -> md -> cascade -> LanceDB) must use this instead of
+    ``cascade_done_poll``.
+    """
+
+    async def _wait(*, deadline_seconds: float = 180.0) -> None:
+        from everos.infra.persistence.sqlite import md_change_state_repo
+        from everos.service.memorize import _get_engine
+
+        engine = _get_engine()
+
+        async def _drained() -> bool:
+            # OME side first: cascade can only fire after a strategy
+            # writes md, so an in-flight run means the queue check below
+            # is premature.
+            if not await engine.wait_idle(timeout=0.5):
+                return False
+            # `pending` includes the internal `processing` rows (see
+            # QueueSummary).
+            summary = await md_change_state_repo.queue_summary()
+            return summary.pending == 0
+
+        await _poll(_drained, deadline_seconds=deadline_seconds)
+
+    return _wait
+
+
+# ---------------------------------------------------------------------------
+# Count helpers (used directly by tests for buffer-delta assertions)
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def buffer_count() -> Callable[[str], Awaitable[int]]:
+    """Return an async callable: ``await buffer_count(session_id) -> int``."""
+
+    async def _count(session_id: str) -> int:
+        from everos.infra.persistence.sqlite import get_engine
+
+        engine = get_engine()
+        async with engine.connect() as conn:
+            result = await conn.execute(
+                text("SELECT COUNT(*) FROM unprocessed_buffer WHERE session_id = :sid"),
+                {"sid": session_id},
+            )
+            return int(result.scalar() or 0)
+
+    return _count
+
+
+@pytest.fixture
+def memcell_count() -> Callable[[str], Awaitable[int]]:
+    """Return an async callable: ``await memcell_count(user_id_or_session) -> int``.
+
+    Counts memcell rows; pass session_id to count by session, or omit to
+    count all.
+    """
+
+    async def _count(session_id: str | None = None) -> int:
+        from everos.infra.persistence.sqlite import get_engine
+
+        engine = get_engine()
+        async with engine.connect() as conn:
+            if session_id is None:
+                result = await conn.execute(text("SELECT COUNT(*) FROM memcell"))
+            else:
+                result = await conn.execute(
+                    text("SELECT COUNT(*) FROM memcell WHERE session_id = :sid"),
+                    {"sid": session_id},
+                )
+            return int(result.scalar() or 0)
+
+    return _count