chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
0
tests/e2e/__init__.py
Normal file
0
tests/e2e/__init__.py
Normal file
286
tests/e2e/conftest.py
Normal file
286
tests/e2e/conftest.py
Normal file
@ -0,0 +1,286 @@
|
||||
"""Shared fixtures for ``tests/e2e/``.
|
||||
|
||||
Provides:
|
||||
|
||||
- ``core_pipeline_runtime``: tmp memory root + reset memorize singletons.
|
||||
Uses the **real** LLM / embedding / rerank creds from ``.env`` per the
|
||||
project test policy.
|
||||
- ``async_client``: ``httpx.AsyncClient`` wired into ``create_app()`` with
|
||||
the full lifespan stack (SQLite + LanceDB + Cascade + OME).
|
||||
- ``cascade_done_poll``: wait until ``md_change_state`` queue is fully
|
||||
drained (``pending`` rows == 0; includes the internal ``processing``).
|
||||
- ``pipeline_done_poll``: composite drain — waits until OME strategy runs AND
|
||||
``md_change_state`` queue both drain (use for tests that exercise the full
|
||||
OME → md → cascade pipeline).
|
||||
- ``buffer_count`` / ``memcell_count``: raw counts for buffer-delta and
|
||||
memcell-growth assertions.
|
||||
|
||||
The ``long_conversation`` fixture (LoCoMo conv_0) lives in
|
||||
:mod:`tests.conftest` so both ``tests/e2e/`` and
|
||||
``tests/integration/search/`` can depend on it.
|
||||
|
||||
Conventions:
|
||||
|
||||
- ``.env`` is loaded at import time (before any everos module reads
|
||||
settings) — overrides for ``EVEROS_MEMORY__ROOT`` happen per-test.
|
||||
- This file does **not** define ``cascade_runtime`` — that name belongs
|
||||
to ``tests/integration/test_cascade_integration.py``'s local fixture.
|
||||
The pipeline test uses ``core_pipeline_runtime`` to avoid name
|
||||
collision.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import json
|
||||
from collections.abc import AsyncIterator, Awaitable, Callable
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from dotenv import load_dotenv
|
||||
from sqlalchemy import text
|
||||
|
||||
# Load real .env creds before any everos import touches load_settings().
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
load_dotenv(_PROJECT_ROOT / ".env", override=False)
|
||||
|
||||
_FIXTURE_DIR = _PROJECT_ROOT / "tests" / "fixtures"
|
||||
_SEARCH_SEED_DIR = _FIXTURE_DIR / "search_seed"
|
||||
|
||||
# Memorize service module-level singletons that survive across tests; we
|
||||
# null them out so each test rebuilds against its own ``tmp_path``.
|
||||
_MEMORIZE_SINGLETONS: tuple[str, ...] = (
|
||||
"_episode_writer",
|
||||
"_prompt_loader",
|
||||
"_user_pipeline",
|
||||
"_agent_pipeline",
|
||||
"_ome_engine",
|
||||
)
|
||||
|
||||
# OME strategy modules carry module-level lazy singletons (``_writer`` /
|
||||
# ``_reader``) that capture ``MemoryRoot.default()`` at first call. They
|
||||
# survive across tests, so the second test writes its output to the
|
||||
# **first test's** tmp_path. Reset all of them per-test.
|
||||
_STRATEGY_SINGLETONS: tuple[tuple[str, tuple[str, ...]], ...] = (
|
||||
("everos.memory.strategies.extract_atomic_facts", ("_writer",)),
|
||||
("everos.memory.strategies.extract_foresight", ("_writer",)),
|
||||
("everos.memory.strategies.extract_user_profile", ("_writer", "_reader")),
|
||||
("everos.memory.strategies.extract_agent_case", ("_writer",)),
|
||||
("everos.memory.strategies.extract_agent_skill", ("_writer",)),
|
||||
)
|
||||
|
||||
|
||||
def _reset_strategy_singletons(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Null every strategy ``_writer`` / ``_reader`` so the next test
|
||||
rebuilds against its own ``MemoryRoot.default()`` (driven by the
|
||||
fresh ``EVEROS_MEMORY__ROOT`` env var set by the calling fixture).
|
||||
"""
|
||||
for mod_name, attrs in _STRATEGY_SINGLETONS:
|
||||
mod = importlib.import_module(mod_name)
|
||||
for attr in attrs:
|
||||
monkeypatch.setattr(mod, attr, None, raising=False)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data fixture
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def search_seed() -> dict[str, list[dict]]:
|
||||
"""Load the search seed slice produced by ``_dump_search_seed.py``.
|
||||
|
||||
Returns a dict with four keys (``episode`` / ``atomic_fact`` /
|
||||
``foresight`` / ``user_profile``); each value is a list of raw row
|
||||
dicts ready to be fed into ``Model.model_validate`` for LanceDB.
|
||||
|
||||
Tests pick the subset they need and may mutate per-row fields
|
||||
(e.g. set distinct ``session_id`` values to exercise filter DSL)
|
||||
before instantiating the pydantic model.
|
||||
"""
|
||||
return {
|
||||
name: json.loads((_SEARCH_SEED_DIR / f"{name}.json").read_text())
|
||||
for name in ("episode", "atomic_fact", "foresight", "user_profile")
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Runtime fixture: tmp memory root + singleton reset (no app lifespan)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def core_pipeline_runtime(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> AsyncIterator[Path]:
|
||||
"""Prepare clean memory root + reset memorize singletons.
|
||||
|
||||
Keeps real LLM / embedding settings from ``.env`` (do NOT overwrite
|
||||
``EVEROS_LLM__*`` or ``EVEROS_EMBEDDING__*``).
|
||||
"""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
client_mod = importlib.import_module("everos.component.llm.client")
|
||||
|
||||
for attr in _MEMORIZE_SINGLETONS:
|
||||
monkeypatch.setattr(svc, attr, None, raising=False)
|
||||
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
|
||||
_reset_strategy_singletons(monkeypatch)
|
||||
|
||||
yield tmp_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Async client fixture (full app lifespan)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client(
|
||||
core_pipeline_runtime: Path,
|
||||
) -> AsyncIterator[httpx.AsyncClient]:
|
||||
"""Bring up the full everos app with lifespan, return an httpx client.
|
||||
|
||||
The lifespan starts: SQLite engine, LanceDB connection + business
|
||||
indexes, Cascade orchestrator (watcher + scanner + worker), OME
|
||||
engine. Teardown stops everything in reverse.
|
||||
"""
|
||||
from everos.entrypoints.api.app import create_app
|
||||
|
||||
app = create_app()
|
||||
transport = httpx.ASGITransport(app=app)
|
||||
|
||||
# Drive starlette's lifespan_context explicitly — httpx.ASGITransport
|
||||
# does not run startup / shutdown on its own.
|
||||
async with (
|
||||
app.router.lifespan_context(app),
|
||||
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
|
||||
):
|
||||
yield client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Poll helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _poll(
|
||||
condition: Callable[[], Awaitable[bool]],
|
||||
*,
|
||||
deadline_seconds: float,
|
||||
interval: float = 0.5,
|
||||
) -> None:
|
||||
"""Poll an async predicate until truthy; ``TimeoutError`` on deadline."""
|
||||
async with asyncio.timeout(deadline_seconds):
|
||||
while True:
|
||||
if await condition():
|
||||
return
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cascade_done_poll() -> Callable[..., Awaitable[None]]:
|
||||
"""Wait until ``md_change_state`` queue is drained (no pending/processing)."""
|
||||
|
||||
async def _wait(*, deadline_seconds: float = 180.0) -> None:
|
||||
from everos.infra.persistence.sqlite import md_change_state_repo
|
||||
|
||||
async def _drained() -> bool:
|
||||
summary = await md_change_state_repo.queue_summary()
|
||||
# `pending` includes the internal `processing` rows (see QueueSummary).
|
||||
return summary.pending == 0
|
||||
|
||||
await _poll(_drained, deadline_seconds=deadline_seconds)
|
||||
|
||||
return _wait
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pipeline_done_poll() -> Callable[..., Awaitable[None]]:
|
||||
"""Wait until OME strategy runs AND ``md_change_state`` queue both drain.
|
||||
|
||||
Composite drain — fixes the trap where :func:`cascade_done_poll`
|
||||
alone returns immediately while a slow LLM-driven strategy is still
|
||||
in flight (the strategy has not written md yet, so the cascade queue
|
||||
is momentarily empty). Pipeline tests that touch the full async
|
||||
chain (OME -> md -> cascade -> LanceDB) must use this instead of
|
||||
``cascade_done_poll``.
|
||||
"""
|
||||
|
||||
async def _wait(*, deadline_seconds: float = 180.0) -> None:
|
||||
from everos.infra.persistence.sqlite import md_change_state_repo
|
||||
from everos.service.memorize import _get_engine
|
||||
|
||||
engine = _get_engine()
|
||||
|
||||
async def _drained() -> bool:
|
||||
# OME side first: cascade can only fire after a strategy
|
||||
# writes md, so an in-flight run means the queue check below
|
||||
# is premature.
|
||||
if not await engine.wait_idle(timeout=0.5):
|
||||
return False
|
||||
# `pending` includes the internal `processing` rows (see
|
||||
# QueueSummary).
|
||||
summary = await md_change_state_repo.queue_summary()
|
||||
return summary.pending == 0
|
||||
|
||||
await _poll(_drained, deadline_seconds=deadline_seconds)
|
||||
|
||||
return _wait
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Count helpers (used directly by tests for buffer-delta assertions)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def buffer_count() -> Callable[[str], Awaitable[int]]:
|
||||
"""Return an async callable: ``await buffer_count(session_id) -> int``."""
|
||||
|
||||
async def _count(session_id: str) -> int:
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
engine = get_engine()
|
||||
async with engine.connect() as conn:
|
||||
result = await conn.execute(
|
||||
text("SELECT COUNT(*) FROM unprocessed_buffer WHERE session_id = :sid"),
|
||||
{"sid": session_id},
|
||||
)
|
||||
return int(result.scalar() or 0)
|
||||
|
||||
return _count
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memcell_count() -> Callable[[str], Awaitable[int]]:
|
||||
"""Return an async callable: ``await memcell_count(user_id_or_session) -> int``.
|
||||
|
||||
Counts memcell rows; pass session_id to count by session, or omit to
|
||||
count all.
|
||||
"""
|
||||
|
||||
async def _count(session_id: str | None = None) -> int:
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
engine = get_engine()
|
||||
async with engine.connect() as conn:
|
||||
if session_id is None:
|
||||
result = await conn.execute(text("SELECT COUNT(*) FROM memcell"))
|
||||
else:
|
||||
result = await conn.execute(
|
||||
text("SELECT COUNT(*) FROM memcell WHERE session_id = :sid"),
|
||||
{"sid": session_id},
|
||||
)
|
||||
return int(result.scalar() or 0)
|
||||
|
||||
return _count
|
||||
206
tests/e2e/test_add_flush_agent_pipeline_e2e.py
Normal file
206
tests/e2e/test_add_flush_agent_pipeline_e2e.py
Normal file
@ -0,0 +1,206 @@
|
||||
"""Agent pipeline e2e: 5 SWE-bench trajectories drive /add + /flush.
|
||||
|
||||
Drives the full HTTP route through to storage, exercising the agent-track
|
||||
pipeline (boundary → memcell → extract_agent_case → trigger_skill_clustering
|
||||
→ extract_agent_skill) with real LLM and real embedder credentials.
|
||||
|
||||
Mixed tenancy by design (sender_id alignment from fixture):
|
||||
|
||||
agent_pytest (1 session, pytest-dev/pytest-7236) ┐ independent
|
||||
agent_sympy (1 session, sympy/sympy-18763) ┘ owners
|
||||
agent_django (3 sessions, django/django-{14311,16255,16263}) shared
|
||||
|
||||
Concurrency strategy (workaround for the known
|
||||
``trigger_skill_clustering`` read-modify-write race on a shared owner_id):
|
||||
|
||||
Phase 1: pytest + sympy concurrent via asyncio.gather (disjoint owners)
|
||||
Phase 2: 3 django sessions sequential (same owner, would race)
|
||||
|
||||
Once the cluster race is fixed in production, Phase 2 can collapse into
|
||||
the same gather and the test will still pass — the assertions are
|
||||
race-free, only the driver is conservative.
|
||||
|
||||
White-box assertions (audit trail of internal surfaces touched):
|
||||
- sqlite ``memcell`` rows per session_id
|
||||
- filesystem ``<root>/agents/<agent>/.cases/*.md`` presence
|
||||
- LanceDB ``agent_case`` rows by ``owner_id`` (count + session_id set)
|
||||
- LanceDB ``agent_skill`` rows by ``owner_id`` (soft — LLM-dependent)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from collections.abc import Awaitable, Callable
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from everos.infra.persistence.lancedb import agent_case_repo, agent_skill_repo
|
||||
from everos.infra.persistence.markdown import AgentCaseDailyFrontmatter
|
||||
|
||||
_FIXTURE_DIR = Path(__file__).resolve().parents[1] / "fixtures" / "agent_trajectories"
|
||||
|
||||
# Hand-picked trajectories (kept in-tree as fixtures; this selection is
|
||||
# the source of truth — the original converter is not in the repo).
|
||||
_PYTEST_SESSION = "session_pytest_7236"
|
||||
_SYMPY_SESSION = "session_sympy_18763"
|
||||
_DJANGO_SESSIONS = (
|
||||
"session_django_14311",
|
||||
"session_django_16255",
|
||||
"session_django_16263",
|
||||
)
|
||||
|
||||
_AGENT_PYTEST = "agent_pytest"
|
||||
_AGENT_SYMPY = "agent_sympy"
|
||||
_AGENT_DJANGO = "agent_django"
|
||||
|
||||
# Phase 3 drain budget: OME chain (case → cluster → skill) writes md in
|
||||
# stages, each picked up by cascade. Multiple drain rounds with brief
|
||||
# sleeps let the chain quiesce without false-positive completion.
|
||||
_DRAIN_ROUNDS = 4
|
||||
_DRAIN_TIMEOUT_SECONDS = 300.0
|
||||
_DRAIN_INTER_ROUND_SLEEP_SECONDS = 5.0
|
||||
|
||||
|
||||
def _load_fixture(session_id: str) -> dict:
|
||||
return json.loads((_FIXTURE_DIR / f"{session_id}.json").read_text())
|
||||
|
||||
|
||||
async def _drive_session(
|
||||
client: httpx.AsyncClient, session_data: dict
|
||||
) -> tuple[str, str]:
|
||||
"""Run /add followed by /flush for one trajectory; return status."""
|
||||
sid = session_data["everos_session_id"]
|
||||
msgs = session_data["messages"]
|
||||
# MessageItemDTO.max_length=500; our largest fixture has 324 messages.
|
||||
r = await client.post(
|
||||
"/api/v1/memory/add",
|
||||
json={"session_id": sid, "messages": msgs},
|
||||
timeout=600.0,
|
||||
)
|
||||
assert r.status_code == 200, (
|
||||
f"{sid}: /add returned {r.status_code} — {r.text[:300]}"
|
||||
)
|
||||
r = await client.post(
|
||||
"/api/v1/memory/flush",
|
||||
json={"session_id": sid},
|
||||
timeout=600.0,
|
||||
)
|
||||
assert r.status_code == 200, (
|
||||
f"{sid}: /flush returned {r.status_code} — {r.text[:300]}"
|
||||
)
|
||||
return sid, r.json()["data"]["status"]
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.live_llm
|
||||
async def test_agent_pipeline_e2e_mixed_tenancy(
|
||||
async_client: httpx.AsyncClient,
|
||||
core_pipeline_runtime: Path,
|
||||
pipeline_done_poll: Callable[..., Awaitable[None]],
|
||||
memcell_count: Callable[..., Awaitable[int]],
|
||||
) -> None:
|
||||
"""5 SWE-bench trajectories → agent_case + agent_skill on three agents."""
|
||||
memory_root = core_pipeline_runtime
|
||||
|
||||
pytest_fx = _load_fixture(_PYTEST_SESSION)
|
||||
sympy_fx = _load_fixture(_SYMPY_SESSION)
|
||||
django_fxs = [_load_fixture(s) for s in _DJANGO_SESSIONS]
|
||||
|
||||
# ── Phase 1: independent owners concurrent ────────────────────────────
|
||||
await asyncio.gather(
|
||||
_drive_session(async_client, pytest_fx),
|
||||
_drive_session(async_client, sympy_fx),
|
||||
)
|
||||
|
||||
# ── Phase 2: shared owner_id, sequential to dodge cluster race ────────
|
||||
for fx in django_fxs:
|
||||
await _drive_session(async_client, fx)
|
||||
|
||||
# ── Phase 3: drain OME chain + cascade ────────────────────────────────
|
||||
for _ in range(_DRAIN_ROUNDS):
|
||||
await pipeline_done_poll(deadline_seconds=_DRAIN_TIMEOUT_SECONDS)
|
||||
await asyncio.sleep(_DRAIN_INTER_ROUND_SLEEP_SECONDS)
|
||||
|
||||
# ── Phase 4: assertions ───────────────────────────────────────────────
|
||||
|
||||
# 4.1 every session produced ≥1 memcell
|
||||
all_sessions = (_PYTEST_SESSION, _SYMPY_SESSION, *_DJANGO_SESSIONS)
|
||||
for sid in all_sessions:
|
||||
n = await memcell_count(sid)
|
||||
assert n >= 1, f"no memcell for session {sid!r} (got {n})"
|
||||
|
||||
# 4.2 each agent has a .cases dir with ≥1 .md file
|
||||
agents_dir = memory_root / "default_app" / "default_project" / "agents"
|
||||
case_dir_name = AgentCaseDailyFrontmatter.DIR_NAME
|
||||
for agent_id in (_AGENT_PYTEST, _AGENT_SYMPY, _AGENT_DJANGO):
|
||||
case_dir = agents_dir / agent_id / case_dir_name
|
||||
assert case_dir.is_dir(), f"missing {case_dir!s} for agent={agent_id!r}"
|
||||
md_files = list(case_dir.glob("*.md"))
|
||||
assert md_files, f"no agent_case md under {case_dir!s}"
|
||||
|
||||
# 4.3 LanceDB agent_case rows per owner
|
||||
pytest_cases = await agent_case_repo.find_where(f"owner_id = '{_AGENT_PYTEST}'")
|
||||
sympy_cases = await agent_case_repo.find_where(f"owner_id = '{_AGENT_SYMPY}'")
|
||||
django_cases = await agent_case_repo.find_where(f"owner_id = '{_AGENT_DJANGO}'")
|
||||
|
||||
assert len(pytest_cases) >= 1, (
|
||||
f"no agent_pytest rows in LanceDB (got {len(pytest_cases)})"
|
||||
)
|
||||
assert len(sympy_cases) >= 1, (
|
||||
f"no agent_sympy rows in LanceDB (got {len(sympy_cases)})"
|
||||
)
|
||||
# Each django session writes at least one cell → at least one case per
|
||||
# session. Lower bound 3 covers the minimum; LLM may produce more.
|
||||
assert len(django_cases) >= 3, (
|
||||
f"agent_django expected ≥3 LanceDB cases (3 sessions), got {len(django_cases)}"
|
||||
)
|
||||
|
||||
# 4.4 cross-owner isolation — each agent's cases trace back only to
|
||||
# its own sessions
|
||||
pytest_session_ids = {c.session_id for c in pytest_cases}
|
||||
assert pytest_session_ids == {_PYTEST_SESSION}, (
|
||||
f"agent_pytest cases leaked across sessions: {pytest_session_ids}"
|
||||
)
|
||||
sympy_session_ids = {c.session_id for c in sympy_cases}
|
||||
assert sympy_session_ids == {_SYMPY_SESSION}, (
|
||||
f"agent_sympy cases leaked across sessions: {sympy_session_ids}"
|
||||
)
|
||||
django_session_ids = {c.session_id for c in django_cases}
|
||||
assert django_session_ids == set(_DJANGO_SESSIONS), (
|
||||
f"agent_django session set mismatch — got {django_session_ids}, "
|
||||
f"want {set(_DJANGO_SESSIONS)}"
|
||||
)
|
||||
|
||||
# 4.5 agent_skill — soft: emission depends on LLM clustering quality
|
||||
# gate (skip_quality_threshold + cluster size). pytest/sympy are
|
||||
# single-case clusters and may legitimately yield 0 skills. django
|
||||
# has 3 cases and should aggregate into ≥1 cluster of size ≥2,
|
||||
# producing ≥1 skill — but we keep this informational (LLM-dependent)
|
||||
# rather than a hard floor to avoid flaky CI signal.
|
||||
pytest_skills = await agent_skill_repo.find_where(f"owner_id = '{_AGENT_PYTEST}'")
|
||||
sympy_skills = await agent_skill_repo.find_where(f"owner_id = '{_AGENT_SYMPY}'")
|
||||
django_skills = await agent_skill_repo.find_where(f"owner_id = '{_AGENT_DJANGO}'")
|
||||
# Hard sanity: counts non-negative (the repo isn't broken).
|
||||
assert len(pytest_skills) >= 0
|
||||
assert len(sympy_skills) >= 0
|
||||
assert len(django_skills) >= 0
|
||||
|
||||
# 4.6 strict md ↔ LanceDB parity across every cascade kind
|
||||
#
|
||||
# The per-owner counts above are loose (LLM-emission-dependent); this
|
||||
# check enforces byte-exact id-set + content_sha256 parity across
|
||||
# every md the agent pipeline wrote.
|
||||
#
|
||||
# ``expect_at_least`` pins agent_case (every session writes ≥1 case)
|
||||
# so an empty glob would fail loudly. agent_skill is NOT pinned —
|
||||
# emission depends on the LLM clustering quality gate per 4.5; a
|
||||
# legitimately empty agent_skill md set is still a passing run.
|
||||
from tests._consistency_assertions import assert_md_lance_strict_consistent
|
||||
|
||||
await assert_md_lance_strict_consistent(
|
||||
memory_root,
|
||||
expect_at_least={"agent_case": 1},
|
||||
)
|
||||
337
tests/e2e/test_add_flush_user_pipeline_e2e.py
Normal file
337
tests/e2e/test_add_flush_user_pipeline_e2e.py
Normal file
@ -0,0 +1,337 @@
|
||||
"""Add + Flush core pipeline smoke — long real-conversation drive.
|
||||
|
||||
Goal: prove the user-side add/flush chain is end-to-end live. Feeds
|
||||
**419 real LoCoMo messages** through ``POST /api/v1/memory/add`` (in 19
|
||||
batches sharing one session_id) then a final ``POST /flush``, and
|
||||
verifies:
|
||||
|
||||
1. Each /add returns a sane status and the unprocessed_buffer delta
|
||||
matches what the service claims (accumulated → grew by batch size;
|
||||
extracted → shrank or stayed flat).
|
||||
2. After /flush the buffer is empty and the memcell table has rows.
|
||||
3. After cascade drains, episode md files exist and LanceDB rows
|
||||
reflect them with valid content_sha256 + vector.
|
||||
4. OME-driven async strategies have produced atomic_fact / foresight /
|
||||
profile md files.
|
||||
|
||||
Real LLM + real embedder (creds via ``.env``). Marked ``slow`` —
|
||||
``pytest -m slow tests/integration/test_add_flush_core_pipeline_smoke.py``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from collections.abc import Awaitable, Callable
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from everos.infra.persistence.markdown import (
|
||||
AtomicFactDailyFrontmatter,
|
||||
EpisodeDailyFrontmatter,
|
||||
ForesightDailyFrontmatter,
|
||||
)
|
||||
|
||||
# Directory names live on the frontmatter schemas (single source of truth);
|
||||
# atomic_facts / foresights are dotfile-hidden so users only see episodes.
|
||||
_EPISODE_DIR = EpisodeDailyFrontmatter.DIR_NAME
|
||||
_ATOMIC_FACT_DIR = AtomicFactDailyFrontmatter.DIR_NAME
|
||||
_FORESIGHT_DIR = ForesightDailyFrontmatter.DIR_NAME
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _to_add_messages(batch: dict) -> list[dict]:
|
||||
"""Strip ``_audit_*`` fields; keep only what MessageItemDTO accepts."""
|
||||
return [
|
||||
{
|
||||
"sender_id": m["sender_id"],
|
||||
"role": m["role"],
|
||||
"timestamp": m["timestamp"],
|
||||
"content": m["content"],
|
||||
}
|
||||
for m in batch["messages"]
|
||||
]
|
||||
|
||||
|
||||
def _list_md_files(memory_root: Path, subpath: str) -> list[Path]:
|
||||
"""List .md files under
|
||||
``<memory_root>/default_app/default_project/users/<user>/<subpath>/``."""
|
||||
user_dir = memory_root / "default_app" / "default_project" / "users"
|
||||
if not user_dir.exists():
|
||||
return []
|
||||
out: list[Path] = []
|
||||
for user_dir_child in user_dir.iterdir():
|
||||
target = user_dir_child / subpath
|
||||
if target.is_dir():
|
||||
out.extend(target.rglob("*.md"))
|
||||
elif target.with_suffix(".md").exists():
|
||||
out.append(target.with_suffix(".md"))
|
||||
return out
|
||||
|
||||
|
||||
def _count_episode_entries(md_files: list[Path]) -> int:
|
||||
"""Count ``## entry-*`` blocks across all episode md files."""
|
||||
n = 0
|
||||
for f in md_files:
|
||||
for line in f.read_text().splitlines():
|
||||
stripped = line.strip()
|
||||
# Daily-log entries start with `## ` followed by an id token.
|
||||
# We count any second-level heading that isn't the standard
|
||||
# subsection headers used inside an entry.
|
||||
if stripped.startswith("## ") and not stripped.startswith(
|
||||
("## Subject", "## Summary", "## Content", "## Fact", "## Foresight")
|
||||
):
|
||||
n += 1
|
||||
return n
|
||||
|
||||
|
||||
def _maybe_snapshot_memory_root(memory_root: Path) -> None:
|
||||
"""Copy ``memory_root`` to ``$EVEROS_KEEP_CORPUS_TO`` when set.
|
||||
|
||||
Used to harvest a known-good corpus (md + sqlite + lancedb three-piece
|
||||
set) after a green test run, for later upload as the /search e2e
|
||||
fixture. Pure sync I/O — kept out of the async test body so ASYNC240
|
||||
doesn't complain about pathlib usage on the async path.
|
||||
"""
|
||||
keep_to = os.environ.get("EVEROS_KEEP_CORPUS_TO")
|
||||
if not keep_to:
|
||||
return
|
||||
dest = Path(keep_to).resolve()
|
||||
if dest.exists():
|
||||
shutil.rmtree(dest)
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copytree(memory_root, dest)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# The test (slow — hits real LLM + embedder; opt in via `pytest -m slow`)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.live_llm
|
||||
# Retries cover transient real-LLM flakes: OME profile clustering
|
||||
# occasionally fails to emit user.md within the cascade-drain deadline
|
||||
# (LLM timeout, empty response, or async race), but is reliably stable
|
||||
# on retry. reruns_delay leaves the cascade workers idle between
|
||||
# attempts so we don't pile state on top of a prior run.
|
||||
@pytest.mark.flaky(reruns=2, reruns_delay=5)
|
||||
async def test_long_conversation_produces_all_memory_types(
|
||||
long_conversation: dict,
|
||||
async_client: httpx.AsyncClient,
|
||||
core_pipeline_runtime: Path,
|
||||
cascade_done_poll: Callable[..., Awaitable[None]],
|
||||
buffer_count: Callable[[str], Awaitable[int]],
|
||||
memcell_count: Callable[..., Awaitable[int]],
|
||||
) -> None:
|
||||
"""One big seamless run: add 19 batches, flush, poll, assert everything."""
|
||||
|
||||
session_id = long_conversation["everos_session_id"]
|
||||
memory_root = core_pipeline_runtime
|
||||
|
||||
# ── Stage 0: baseline ─────────────────────────────────────────────────
|
||||
assert await buffer_count(session_id) == 0
|
||||
assert await memcell_count(session_id) == 0
|
||||
|
||||
# ── Stage 1: drip 19 batches into /add, asserting buffer delta ────────
|
||||
last_status: str | None = None
|
||||
|
||||
for idx, batch in enumerate(long_conversation["batches"]):
|
||||
msg_count = batch["message_count"]
|
||||
|
||||
buf_before = await buffer_count(session_id)
|
||||
cells_before = await memcell_count(session_id)
|
||||
|
||||
resp = await async_client.post(
|
||||
"/api/v1/memory/add",
|
||||
json={"session_id": session_id, "messages": _to_add_messages(batch)},
|
||||
timeout=600.0, # boundary detection may call LLM
|
||||
)
|
||||
assert resp.status_code == 200, (
|
||||
f"batch {idx} ({batch['locomo_session']}): {resp.status_code} {resp.text}"
|
||||
)
|
||||
body = resp.json()
|
||||
status: str = body["data"]["status"]
|
||||
returned_count: int = body["data"]["message_count"]
|
||||
assert status in {"accumulated", "extracted"}, body
|
||||
assert returned_count == msg_count, body
|
||||
last_status = status
|
||||
|
||||
buf_after = await buffer_count(session_id)
|
||||
cells_after = await memcell_count(session_id)
|
||||
|
||||
# Buffer-delta invariants:
|
||||
if status == "accumulated":
|
||||
# No boundary cut → entire batch piled into the buffer.
|
||||
assert buf_after == buf_before + msg_count, (
|
||||
f"batch {idx} accumulated: expected buf {buf_before + msg_count}, "
|
||||
f"got {buf_after}"
|
||||
)
|
||||
assert cells_after == cells_before, (
|
||||
f"batch {idx} accumulated: memcell should not change "
|
||||
f"({cells_before} → {cells_after})"
|
||||
)
|
||||
else: # "extracted"
|
||||
# Boundary fired: some messages turned into memcell(s), tail
|
||||
# (if any) stays in the buffer. We can't predict the exact tail
|
||||
# length but two invariants must hold.
|
||||
assert cells_after > cells_before, (
|
||||
f"batch {idx} extracted: memcell should grow "
|
||||
f"({cells_before} → {cells_after})"
|
||||
)
|
||||
assert buf_after >= 0
|
||||
# Conservation: nothing should silently vanish — the union of
|
||||
# (buffer carry-over + this batch) must equal (new buffer +
|
||||
# messages carved into cells). We approximate by asserting the
|
||||
# new buffer is at most the carry-over + this batch size.
|
||||
assert buf_after <= buf_before + msg_count, (
|
||||
f"batch {idx} extracted: buffer overflow "
|
||||
f"({buf_before} + {msg_count} → {buf_after})"
|
||||
)
|
||||
|
||||
# ── Stage 2: flush ────────────────────────────────────────────────────
|
||||
cells_pre_flush = await memcell_count(session_id)
|
||||
resp = await async_client.post(
|
||||
"/api/v1/memory/flush",
|
||||
json={"session_id": session_id},
|
||||
timeout=600.0,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
flush_status = resp.json()["data"]["status"]
|
||||
assert flush_status in {"extracted", "no_extraction"}, resp.json()
|
||||
|
||||
assert await buffer_count(session_id) == 0, "buffer must be drained after flush"
|
||||
|
||||
cells_after_flush = await memcell_count(session_id)
|
||||
# If the last /add was already 'extracted' and emptied the buffer,
|
||||
# flush returns 'no_extraction'. Otherwise flush must produce ≥ 1
|
||||
# cell to satisfy the boundary semantics.
|
||||
if flush_status == "extracted":
|
||||
assert cells_after_flush > cells_pre_flush
|
||||
|
||||
# 419 LoCoMo messages produce ~19 memcells in practice (LLM boundary
|
||||
# decides semantic cuts; daily-life chat carves coarsely). Threshold
|
||||
# 15 leaves room for run-to-run variance from the boundary LLM.
|
||||
assert cells_after_flush >= 15, (
|
||||
f"expected ≥ 15 memcells from 419 messages, got {cells_after_flush}; "
|
||||
f"last add status was {last_status!r}, flush was {flush_status!r}"
|
||||
)
|
||||
|
||||
# ── Stage 3 + 4: wait for cascade to drain ────────────────────────────
|
||||
# Cascade syncs md → LanceDB. OME async strategies (atomic / foresight /
|
||||
# profile) also write md, which then cascade picks up. So one wait on
|
||||
# cascade-drain effectively covers both pipelines, IF OME has already
|
||||
# emitted its strategies (which memorize.py does inline via engine.emit).
|
||||
await cascade_done_poll(deadline_seconds=600.0)
|
||||
|
||||
# ── Stage 5: artifacts on disk + LanceDB ──────────────────────────────
|
||||
# 5.1 episodes
|
||||
episode_files = _list_md_files(memory_root, _EPISODE_DIR)
|
||||
assert episode_files, "no episode md files written"
|
||||
episode_entries = _count_episode_entries(episode_files)
|
||||
# 19 memcells × 2 owners (caroline + melanie) ≈ 36 episode rows seen
|
||||
# in practice; threshold 15 leaves variance room.
|
||||
assert episode_entries >= 15, (
|
||||
f"expected ≥ 15 episode entries across {len(episode_files)} files, "
|
||||
f"got {episode_entries}"
|
||||
)
|
||||
|
||||
# 5.2 episode → LanceDB
|
||||
from everos.infra.persistence.lancedb import episode_repo
|
||||
|
||||
lance_episode_count = await episode_repo.count()
|
||||
assert lance_episode_count >= 15, (
|
||||
f"LanceDB episode rows ({lance_episode_count}) < md entries ({episode_entries})"
|
||||
)
|
||||
|
||||
# 5.3 atomic_fact
|
||||
af_files = _list_md_files(memory_root, _ATOMIC_FACT_DIR)
|
||||
assert af_files, "no atomic_fact md files — extract_atomic_facts did not emit"
|
||||
|
||||
from everos.infra.persistence.lancedb import atomic_fact_repo
|
||||
|
||||
lance_af_count = await atomic_fact_repo.count()
|
||||
assert lance_af_count >= 1, (
|
||||
f"LanceDB atomic_fact rows = {lance_af_count}; expected ≥ 1"
|
||||
)
|
||||
|
||||
# 5.4 foresight
|
||||
# Foresight extractor is correctly invoked (log: ``foresights_extracted``
|
||||
# per memcell) but daily-life chat about kids / work / hobbies rarely
|
||||
# yields explicit future-intent statements, so count is usually 0.
|
||||
# We assert the LanceDB table exists (count returns 0 cleanly) — not
|
||||
# that any row was emitted.
|
||||
from everos.infra.persistence.lancedb import foresight_repo
|
||||
|
||||
lance_fs_count = await foresight_repo.count()
|
||||
assert lance_fs_count >= 0, f"foresight table broken: count={lance_fs_count}"
|
||||
|
||||
# 5.5 profile (md only — profile retrieval path is stub; we only assert
|
||||
# the writer wrote something). Profile lives as a single file
|
||||
# ``users/<user_id>/user.md`` (schema: ``UserProfileFrontmatter.PROFILE_FILENAME``).
|
||||
from everos.infra.persistence.markdown import UserProfileFrontmatter
|
||||
|
||||
profile_filename = UserProfileFrontmatter.PROFILE_FILENAME
|
||||
profile_files: list[Path] = []
|
||||
users_root = memory_root / "default_app" / "default_project" / "users"
|
||||
if users_root.is_dir():
|
||||
for ud in users_root.iterdir():
|
||||
candidate = ud / profile_filename
|
||||
if candidate.exists():
|
||||
profile_files.append(candidate)
|
||||
assert profile_files, (
|
||||
f"no {profile_filename} written — extract_user_profile / "
|
||||
"trigger_profile_clustering did not emit"
|
||||
)
|
||||
# At least one profile file has non-trivial content.
|
||||
assert any(f.read_text().strip() for f in profile_files), (
|
||||
"all profile.md files are empty"
|
||||
)
|
||||
|
||||
# ── Stage 5b: strict md ↔ LanceDB parity (every cascade kind) ─────────
|
||||
# Counts above are looser ``>=`` checks against LLM non-determinism;
|
||||
# here we enforce byte-exact id-set + content_sha256 parity across
|
||||
# every md the pipeline wrote. Catches: missing rows, orphan rows,
|
||||
# content drift between md and the indexed projection.
|
||||
#
|
||||
# ``expect_at_least`` pins the kinds this pipeline MUST produce so an
|
||||
# empty glob (kind not emitted at all) fails loudly — without this
|
||||
# guard the parity check would silently pass on zero files. Foresight
|
||||
# is NOT pinned because the LLM frequently yields 0 future-intent
|
||||
# statements on daily-life chat (see commentary above stage 5.4).
|
||||
from tests._consistency_assertions import assert_md_lance_strict_consistent
|
||||
|
||||
await assert_md_lance_strict_consistent(
|
||||
memory_root,
|
||||
expect_at_least={
|
||||
"episode": 1,
|
||||
"atomic_fact": 1,
|
||||
"user_profile": 1,
|
||||
},
|
||||
)
|
||||
|
||||
# ── Stage 6: optional corpus snapshot ─────────────────────────────────
|
||||
# When ``EVEROS_KEEP_CORPUS_TO=<dest>`` is set, copy the post-test
|
||||
# ``memory_root`` to ``<dest>`` so it can be tarred + uploaded as a
|
||||
# test corpus for the /search e2e suite. Skipped silently when the
|
||||
# env var is absent (default test runs don't snapshot).
|
||||
_maybe_snapshot_memory_root(memory_root)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Diagnostic: lighter smoke that doesn't depend on the long fixture, used
|
||||
# to validate the conftest fixtures themselves are wired correctly.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_async_client_starts_and_health_responds(
|
||||
async_client: httpx.AsyncClient,
|
||||
) -> None:
|
||||
"""Tiny smoke — proves the conftest fixture brings the app up."""
|
||||
resp = await async_client.get("/health")
|
||||
assert resp.status_code == 200, resp.text
|
||||
219
tests/e2e/test_full_pipeline_timezone_e2e.py
Normal file
219
tests/e2e/test_full_pipeline_timezone_e2e.py
Normal file
@ -0,0 +1,219 @@
|
||||
"""Real full-pipeline timezone e2e — the gold-standard anti-drift test.
|
||||
|
||||
Exercises the **complete stack** under a display-tz switch:
|
||||
|
||||
POST /add → unprocessed_buffer → POST /flush
|
||||
↓
|
||||
boundary detection (memcell)
|
||||
↓
|
||||
markdown writer (episode.md)
|
||||
↓
|
||||
cascade scanner / worker
|
||||
↓
|
||||
LanceDB index (episode row)
|
||||
|
||||
then POST /search and POST /get under display tz = Shanghai,
|
||||
switch display tz to UTC, repeat /search + /get.
|
||||
|
||||
Pin: the **UTC instant** of every returned ``timestamp`` field is
|
||||
identical across all four renders. Only the offset / wall-clock
|
||||
changes. This is the user-facing contract of the storage-UTC discipline.
|
||||
|
||||
Real LLM (boundary detection + episode extraction) + real embedder
|
||||
(LanceDB vector + FTS) — marked ``@slow`` ``@live_llm``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from collections.abc import Awaitable, Callable
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from everos.component.utils import datetime as dt_module
|
||||
from everos.component.utils.datetime import from_iso_format
|
||||
from everos.config import load_settings
|
||||
|
||||
|
||||
async def _switch_display_tz(monkeypatch: pytest.MonkeyPatch, tz: str) -> None:
|
||||
"""Hot-swap the display tz mid-test + drop both caches.
|
||||
|
||||
The ``_display_tz`` resolver and ``load_settings`` are
|
||||
``functools.cache``-d; missing either ``cache_clear`` would let the
|
||||
new env var read silently no-op.
|
||||
"""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__TIMEZONE", tz)
|
||||
load_settings.cache_clear()
|
||||
dt_module._display_tz.cache_clear()
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.live_llm
|
||||
async def test_full_pipeline_tz_switch_preserves_utc_instant(
|
||||
async_client: httpx.AsyncClient,
|
||||
pipeline_done_poll: Callable[..., Awaitable[None]],
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Real /add → /flush → cascade → LanceDB → /search /get under tz switch.
|
||||
|
||||
Steps:
|
||||
|
||||
1. Configure ``EVEROS_MEMORY__TIMEZONE=Asia/Shanghai``.
|
||||
2. POST /add a single message with a pinned epoch-ms timestamp.
|
||||
3. POST /flush — forces boundary detection to carve a memcell out
|
||||
of the single-message buffer.
|
||||
4. Wait for cascade to drain (md → LanceDB indexed).
|
||||
5. POST /search + POST /get: capture episode timestamp strings.
|
||||
6. Switch ``EVEROS_MEMORY__TIMEZONE=UTC``.
|
||||
7. POST /search + POST /get again: capture episode timestamp strings.
|
||||
8. Parse all four timestamp strings back to UTC instants. They must
|
||||
all be equal. The offsets and wall-clock numbers will differ
|
||||
between Shanghai and UTC renders — that's expected; what must
|
||||
NOT differ is the absolute UTC instant.
|
||||
|
||||
Anti-drift contract is end-to-end: writes under one display tz
|
||||
must read back under another with zero data drift.
|
||||
"""
|
||||
user_id = "alice_full_tz"
|
||||
session_id = "sess_full_tz"
|
||||
# 1748498400000 ms = 2026-05-29T06:00:00Z = 2026-05-29T14:00:00+08:00
|
||||
pinned_ms = 1748498400000
|
||||
expected_instant = dt.datetime.fromtimestamp(pinned_ms / 1000, tz=dt.UTC)
|
||||
|
||||
# ── Step 1+2: configure Shanghai + write via /add ──
|
||||
await _switch_display_tz(monkeypatch, "Asia/Shanghai")
|
||||
resp = await async_client.post(
|
||||
"/api/v1/memory/add",
|
||||
json={
|
||||
"user_id": user_id,
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"sender_id": user_id,
|
||||
"role": "user",
|
||||
"timestamp": pinned_ms,
|
||||
"content": "I love climbing in Yosemite every spring.",
|
||||
},
|
||||
],
|
||||
},
|
||||
timeout=60.0,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
# ── Step 3: /flush forces boundary detection on the single-message buffer ──
|
||||
resp = await async_client.post(
|
||||
"/api/v1/memory/flush",
|
||||
json={"user_id": user_id, "session_id": session_id},
|
||||
timeout=60.0,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
# ── Step 4: wait for OME strategies + cascade to fully drain ──
|
||||
# 10-minute deadline: extract_episode + extract_atomic_facts run under
|
||||
# real LLM and the cascade worker only fires after md lands. The
|
||||
# `pipeline_done_poll` fixture covers both OME idle and cascade queue
|
||||
# empty.
|
||||
await pipeline_done_poll(deadline_seconds=600.0)
|
||||
|
||||
# ── Step 5: /search + /get under Shanghai display tz ──
|
||||
resp_search_sh = await async_client.post(
|
||||
"/api/v1/memory/search",
|
||||
json={
|
||||
"user_id": user_id,
|
||||
"query": "climbing",
|
||||
"method": "keyword", # no embedder cost; FTS index built by cascade
|
||||
"filters": {"session_id": session_id},
|
||||
},
|
||||
timeout=60.0,
|
||||
)
|
||||
assert resp_search_sh.status_code == 200, resp_search_sh.text
|
||||
eps_search_sh = resp_search_sh.json()["data"]["episodes"]
|
||||
assert eps_search_sh, (
|
||||
f"/search must return an episode after flush+cascade; got {eps_search_sh!r}"
|
||||
)
|
||||
ts_search_sh = eps_search_sh[0]["timestamp"]
|
||||
assert ts_search_sh.endswith("+08:00"), (
|
||||
f"Shanghai display tz should render offset +08:00; got {ts_search_sh!r}"
|
||||
)
|
||||
|
||||
resp_get_sh = await async_client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": user_id,
|
||||
"memory_type": "episode",
|
||||
"page": 1,
|
||||
"page_size": 20,
|
||||
},
|
||||
timeout=60.0,
|
||||
)
|
||||
assert resp_get_sh.status_code == 200, resp_get_sh.text
|
||||
eps_get_sh = resp_get_sh.json()["data"]["episodes"]
|
||||
assert eps_get_sh, "/get must return the same episode /search did"
|
||||
ts_get_sh = eps_get_sh[0]["timestamp"]
|
||||
assert ts_get_sh.endswith("+08:00"), ts_get_sh
|
||||
|
||||
# ── Step 6: switch to UTC display tz (drops caches) ──
|
||||
await _switch_display_tz(monkeypatch, "UTC")
|
||||
|
||||
# ── Step 7: /search + /get again, same on-disk row, new render ──
|
||||
resp_search_utc = await async_client.post(
|
||||
"/api/v1/memory/search",
|
||||
json={
|
||||
"user_id": user_id,
|
||||
"query": "climbing",
|
||||
"method": "keyword",
|
||||
"filters": {"session_id": session_id},
|
||||
},
|
||||
timeout=60.0,
|
||||
)
|
||||
assert resp_search_utc.status_code == 200, resp_search_utc.text
|
||||
eps_search_utc = resp_search_utc.json()["data"]["episodes"]
|
||||
assert eps_search_utc
|
||||
ts_search_utc = eps_search_utc[0]["timestamp"]
|
||||
assert ts_search_utc.endswith("Z") or ts_search_utc.endswith("+00:00"), (
|
||||
f"UTC display tz should render Z / +00:00; got {ts_search_utc!r}"
|
||||
)
|
||||
|
||||
resp_get_utc = await async_client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": user_id,
|
||||
"memory_type": "episode",
|
||||
"page": 1,
|
||||
"page_size": 20,
|
||||
},
|
||||
timeout=60.0,
|
||||
)
|
||||
assert resp_get_utc.status_code == 200, resp_get_utc.text
|
||||
eps_get_utc = resp_get_utc.json()["data"]["episodes"]
|
||||
ts_get_utc = eps_get_utc[0]["timestamp"]
|
||||
assert ts_get_utc.endswith("Z") or ts_get_utc.endswith("+00:00"), ts_get_utc
|
||||
|
||||
# ── Step 8: anti-drift assertion — all four UTC instants identical ──
|
||||
instants = {
|
||||
"search/Shanghai": from_iso_format(ts_search_sh).astimezone(dt.UTC),
|
||||
"get/Shanghai": from_iso_format(ts_get_sh).astimezone(dt.UTC),
|
||||
"search/UTC": from_iso_format(ts_search_utc).astimezone(dt.UTC),
|
||||
"get/UTC": from_iso_format(ts_get_utc).astimezone(dt.UTC),
|
||||
}
|
||||
distinct = set(instants.values())
|
||||
assert len(distinct) == 1, (
|
||||
f"display-tz switch must NOT drift the UTC instant. Got distinct "
|
||||
f"instants across renders: {instants!r}"
|
||||
)
|
||||
actual_instant = next(iter(distinct))
|
||||
# Episode timestamp inherits from the last message's epoch ms — the
|
||||
# pinned input value must round-trip exactly.
|
||||
assert actual_instant == expected_instant, (
|
||||
f"episode UTC instant must equal the pinned input ms epoch; "
|
||||
f"expected {expected_instant.isoformat()}, got {actual_instant.isoformat()}"
|
||||
)
|
||||
|
||||
# ── Sanity: across the four renders, identical instant projects to the
|
||||
# correct wall-clock under each display tz ──
|
||||
# Shanghai: 14:00 wall clock; UTC: 06:00 wall clock.
|
||||
assert "T14:00:00" in ts_search_sh, ts_search_sh
|
||||
assert "T14:00:00" in ts_get_sh, ts_get_sh
|
||||
assert "T06:00:00" in ts_search_utc, ts_search_utc
|
||||
assert "T06:00:00" in ts_get_utc, ts_get_utc
|
||||
829
tests/e2e/test_get_endpoint_e2e.py
Normal file
829
tests/e2e/test_get_endpoint_e2e.py
Normal file
@ -0,0 +1,829 @@
|
||||
"""End-to-end integration tests for ``POST /api/v1/memory/get``.
|
||||
|
||||
These tests spin up the FastAPI app with **no lifespan providers**
|
||||
against a tmp ``EVEROS_MEMORY__ROOT``, populate a real LanceDB
|
||||
``episode`` table directly via the repo singleton, and exercise the
|
||||
HTTP route. They cover the wiring that unit tests cannot: pydantic
|
||||
422s from the route, JSON envelope shape, and the full
|
||||
``request → service → manager → LanceDB`` path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime as _dt
|
||||
from collections.abc import AsyncIterator
|
||||
from importlib import import_module
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from everos.config import load_settings
|
||||
from everos.entrypoints.api.app import create_app
|
||||
from everos.infra.persistence.lancedb import (
|
||||
AgentCase,
|
||||
AgentSkill,
|
||||
Episode,
|
||||
UserProfile,
|
||||
agent_case_repo,
|
||||
agent_skill_repo,
|
||||
episode_repo,
|
||||
lancedb_manager,
|
||||
user_profile_repo,
|
||||
)
|
||||
|
||||
# ``everos.service.__init__`` re-exports the ``get`` function under the
|
||||
# same name as the submodule (``from .get import get as get``), which
|
||||
# shadows the submodule when imported normally. Pull the actual module
|
||||
# via importlib so the test can poke at its ``_manager`` singleton.
|
||||
get_service_mod = import_module("everos.service.get")
|
||||
|
||||
|
||||
def _ts(day: int) -> _dt.datetime:
|
||||
return _dt.datetime(2026, 1, day, tzinfo=_dt.UTC)
|
||||
|
||||
|
||||
def _episode(
|
||||
entry: str,
|
||||
*,
|
||||
owner: str = "u1",
|
||||
session: str = "sess_a",
|
||||
parent_id: str = "mc_1",
|
||||
sender_ids: list[str] | None = None,
|
||||
day: int = 1,
|
||||
) -> Episode:
|
||||
return Episode(
|
||||
id=f"{owner}_{entry}",
|
||||
entry_id=entry,
|
||||
owner_id=owner,
|
||||
owner_type="user",
|
||||
session_id=session,
|
||||
timestamp=_ts(day),
|
||||
parent_type="memcell",
|
||||
parent_id=parent_id,
|
||||
sender_ids=sender_ids if sender_ids is not None else [owner, "assistant"],
|
||||
subject=f"subj {entry}",
|
||||
summary=f"summary {entry}",
|
||||
episode=f"body of {entry}",
|
||||
episode_tokens=f"body of {entry}",
|
||||
md_path=f"users/{owner}/episodes/{entry}.md",
|
||||
content_sha256="abc",
|
||||
vector=[0.0] * 1024,
|
||||
)
|
||||
|
||||
|
||||
def _agent_case(
|
||||
entry: str,
|
||||
*,
|
||||
owner: str = "a1",
|
||||
session: str = "sess_x",
|
||||
day: int = 1,
|
||||
) -> AgentCase:
|
||||
return AgentCase(
|
||||
id=f"{owner}_{entry}",
|
||||
entry_id=entry,
|
||||
owner_id=owner,
|
||||
owner_type="agent",
|
||||
session_id=session,
|
||||
timestamp=_ts(day),
|
||||
parent_type="memcell",
|
||||
parent_id="mc_99",
|
||||
quality_score=0.8,
|
||||
task_intent=f"intent {entry}",
|
||||
task_intent_tokens=f"intent {entry}",
|
||||
approach=f"approach {entry}",
|
||||
approach_tokens=f"approach {entry}",
|
||||
key_insight=None,
|
||||
md_path=f"agents/{owner}/cases/{entry}.md",
|
||||
content_sha256="abc",
|
||||
vector=[0.0] * 1024,
|
||||
)
|
||||
|
||||
|
||||
def _agent_skill(
|
||||
name: str,
|
||||
*,
|
||||
owner: str = "a1",
|
||||
) -> AgentSkill:
|
||||
return AgentSkill(
|
||||
id=f"{owner}_{name}",
|
||||
owner_id=owner,
|
||||
owner_type="agent",
|
||||
name=name,
|
||||
description=f"desc {name}",
|
||||
description_tokens=f"desc {name}",
|
||||
content=f"content {name}",
|
||||
content_tokens=f"content {name}",
|
||||
confidence=0.9,
|
||||
maturity_score=0.7,
|
||||
source_case_ids=[f"{owner}_ac_1"],
|
||||
md_path=f"agents/{owner}/skills/{name}/SKILL.md",
|
||||
content_sha256="abc",
|
||||
vector=[0.0] * 1024,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def client(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> AsyncIterator[AsyncClient]:
|
||||
"""Build the FastAPI app against a tmp memory root with no lifespan."""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
load_settings.cache_clear()
|
||||
|
||||
# Reset every module-level singleton the get-path touches.
|
||||
lancedb_manager._conn = None
|
||||
lancedb_manager._tables.clear()
|
||||
get_service_mod._manager = None
|
||||
|
||||
app = create_app(lifespan_providers=[])
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as c:
|
||||
yield c
|
||||
|
||||
await lancedb_manager.dispose_connection()
|
||||
load_settings.cache_clear()
|
||||
|
||||
|
||||
# ── Happy path ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_get_episodes_returns_page_and_total(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""5 rows in, page_size=2 → 2 episodes back + total_count=5."""
|
||||
await episode_repo.add(
|
||||
[_episode(f"ep_{i:03d}", day=i) for i in range(1, 6)],
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"page": 1,
|
||||
"page_size": 2,
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
rid = body["request_id"]
|
||||
assert len(rid) == 32 and all(c in "0123456789abcdef" for c in rid)
|
||||
data = body["data"]
|
||||
assert data["total_count"] == 5
|
||||
assert data["count"] == 2
|
||||
assert len(data["episodes"]) == 2
|
||||
# default sort = timestamp DESC → highest day first
|
||||
assert data["episodes"][0]["id"] == "u1_ep_005"
|
||||
assert data["episodes"][1]["id"] == "u1_ep_004"
|
||||
# The non-requested kinds are empty arrays (envelope invariant).
|
||||
assert data["profiles"] == []
|
||||
assert data["agent_cases"] == []
|
||||
assert data["agent_skills"] == []
|
||||
|
||||
|
||||
async def test_get_episodes_filtered_by_session_id(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""Filter narrows results to the matching ``session_id`` only."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", session="sess_a"),
|
||||
_episode("ep_002", session="sess_a"),
|
||||
_episode("ep_003", session="sess_b"),
|
||||
],
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"filters": {"session_id": "sess_a"},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["data"]["total_count"] == 2
|
||||
assert body["data"]["count"] == 2
|
||||
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
||||
assert ids == {"u1_ep_001", "u1_ep_002"}
|
||||
|
||||
|
||||
async def test_get_empty_returns_zero_counts(client: AsyncClient) -> None:
|
||||
"""An owner with no rows yields total_count=0 + empty episodes list."""
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "ghost",
|
||||
"memory_type": "episode",
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()["data"]
|
||||
assert data["total_count"] == 0
|
||||
assert data["count"] == 0
|
||||
assert data["episodes"] == []
|
||||
|
||||
|
||||
async def test_get_profile_miss_returns_empty(client: AsyncClient) -> None:
|
||||
"""Cold start (no profile row) → ``profiles=[]`` / ``total_count=0``."""
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "profile",
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()["data"]
|
||||
assert data["profiles"] == []
|
||||
assert data["total_count"] == 0
|
||||
|
||||
|
||||
async def test_get_profile_returns_seeded_row(client: AsyncClient) -> None:
|
||||
"""A profile row in the ``user_profile`` table is returned + json-decoded.
|
||||
|
||||
Full-stack: seed the LanceDB ``user_profile`` table (as cascade would
|
||||
from ``users/u1/user.md``), then read it back through the HTTP route.
|
||||
White-box surface: ``user_profile_repo`` (the same table /search's
|
||||
``include_profile`` reads).
|
||||
"""
|
||||
await user_profile_repo.add(
|
||||
[
|
||||
UserProfile(
|
||||
id="u1",
|
||||
owner_id="u1",
|
||||
owner_type="user",
|
||||
app_id="default",
|
||||
project_id="default",
|
||||
summary="u1 loves climbing in Yosemite",
|
||||
explicit_info_json='[{"category": "Hobby", "description": "climbing"}]',
|
||||
implicit_traits_json='[{"trait": "Outdoorsy"}]',
|
||||
profile_timestamp_ms=1780304400000,
|
||||
md_path="users/u1/user.md",
|
||||
content_sha256="abc",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={"user_id": "u1", "memory_type": "profile"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()["data"]
|
||||
assert data["total_count"] == 1
|
||||
assert data["count"] == 1
|
||||
assert len(data["profiles"]) == 1
|
||||
prof = data["profiles"][0]
|
||||
assert prof["id"] == "u1"
|
||||
assert prof["user_id"] == "u1"
|
||||
assert prof["profile_data"]["summary"] == "u1 loves climbing in Yosemite"
|
||||
assert prof["profile_data"]["explicit_info"] == [
|
||||
{"category": "Hobby", "description": "climbing"}
|
||||
]
|
||||
assert prof["profile_data"]["implicit_traits"] == [{"trait": "Outdoorsy"}]
|
||||
|
||||
|
||||
# ── Pagination + sort ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_get_episodes_page_two_returns_correct_slice(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""5 rows / page_size=2 / page=2 → middle slice (rows 3 + 4 by DESC ts)."""
|
||||
await episode_repo.add(
|
||||
[_episode(f"ep_{i:03d}", day=i) for i in range(1, 6)],
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"page": 2,
|
||||
"page_size": 2,
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()["data"]
|
||||
assert data["total_count"] == 5
|
||||
assert data["count"] == 2
|
||||
# default sort = timestamp DESC; page 2 of 2-per-page over 5 rows →
|
||||
# rows at offsets 2,3 → day=3, day=2 (1-indexed: ep_003, ep_002).
|
||||
assert [ep["id"] for ep in data["episodes"]] == ["u1_ep_003", "u1_ep_002"]
|
||||
|
||||
|
||||
async def test_get_episodes_sort_order_asc(client: AsyncClient) -> None:
|
||||
"""``sort_order=asc`` flips the order (oldest first)."""
|
||||
await episode_repo.add(
|
||||
[_episode(f"ep_{i:03d}", day=i) for i in range(1, 4)],
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"sort_order": "asc",
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
ids = [ep["id"] for ep in resp.json()["data"]["episodes"]]
|
||||
assert ids == ["u1_ep_001", "u1_ep_002", "u1_ep_003"]
|
||||
|
||||
|
||||
# ── Agent-side kinds ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_get_agent_cases_happy_path(client: AsyncClient) -> None:
|
||||
"""``agent_case`` listing returns shaped items, populates only that array."""
|
||||
await agent_case_repo.add(
|
||||
[_agent_case(f"ac_{i:03d}", day=i) for i in range(1, 4)],
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"agent_id": "a1",
|
||||
"memory_type": "agent_case",
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()["data"]
|
||||
assert data["total_count"] == 3
|
||||
assert data["count"] == 3
|
||||
assert [c["id"] for c in data["agent_cases"]] == [
|
||||
"a1_ac_003",
|
||||
"a1_ac_002",
|
||||
"a1_ac_001",
|
||||
]
|
||||
# Cross-kind envelope stays empty.
|
||||
assert data["episodes"] == []
|
||||
assert data["agent_skills"] == []
|
||||
# AgentCase item shape — score absent (vs SearchAgentCaseItem),
|
||||
# quality_score round-trips.
|
||||
first = data["agent_cases"][0]
|
||||
assert "score" not in first
|
||||
assert first["quality_score"] == 0.8
|
||||
assert first["agent_id"] == "a1"
|
||||
|
||||
|
||||
async def test_get_agent_cases_filtered_by_session(client: AsyncClient) -> None:
|
||||
"""Filter narrows ``agent_case`` rows to the session."""
|
||||
await agent_case_repo.add(
|
||||
[
|
||||
_agent_case("ac_001", session="sess_x"),
|
||||
_agent_case("ac_002", session="sess_x"),
|
||||
_agent_case("ac_003", session="sess_y"),
|
||||
]
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"agent_id": "a1",
|
||||
"memory_type": "agent_case",
|
||||
"filters": {"session_id": "sess_x"},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["data"]["total_count"] == 2
|
||||
ids = {c["id"] for c in body["data"]["agent_cases"]}
|
||||
assert ids == {"a1_ac_001", "a1_ac_002"}
|
||||
|
||||
|
||||
async def test_get_agent_skills_happy_path(client: AsyncClient) -> None:
|
||||
"""``agent_skill`` listing — sort silently uses ``updated_at``."""
|
||||
await agent_skill_repo.add(
|
||||
[_agent_skill(name) for name in ("planner", "summariser")],
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"agent_id": "a1",
|
||||
"memory_type": "agent_skill",
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()["data"]
|
||||
assert data["total_count"] == 2
|
||||
names = {s["name"] for s in data["agent_skills"]}
|
||||
assert names == {"planner", "summariser"}
|
||||
|
||||
|
||||
async def test_get_agent_skills_sort_by_timestamp_silently_downgraded(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""Explicit ``sort_by=timestamp`` does not 500 — manager rewrites to
|
||||
``updated_at`` (the only temporal column on ``agent_skill``)."""
|
||||
await agent_skill_repo.add([_agent_skill("planner")])
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"agent_id": "a1",
|
||||
"memory_type": "agent_skill",
|
||||
"sort_by": "timestamp",
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["data"]["total_count"] == 1
|
||||
|
||||
|
||||
# ── Filter coverage end-to-end ──────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_get_episodes_filtered_by_ne_session(client: AsyncClient) -> None:
|
||||
"""``ne`` op on a str field excludes matching rows end-to-end."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", session="sess_a"),
|
||||
_episode("ep_002", session="sess_internal"),
|
||||
_episode("ep_003", session="sess_b"),
|
||||
]
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"filters": {"session_id": {"ne": "sess_internal"}},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["data"]["total_count"] == 2
|
||||
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
||||
assert ids == {"u1_ep_001", "u1_ep_003"}
|
||||
|
||||
|
||||
async def test_get_episodes_filtered_by_iso_timestamp(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""ISO 8601 string timestamp literal is accepted alongside epoch ms."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", day=1), # 2026-01-01
|
||||
_episode("ep_002", day=5), # 2026-01-05
|
||||
_episode("ep_003", day=9), # 2026-01-09
|
||||
]
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"filters": {"timestamp": {"gte": "2026-01-04T00:00:00+00:00"}},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
ids = {ep["id"] for ep in resp.json()["data"]["episodes"]}
|
||||
assert ids == {"u1_ep_002", "u1_ep_003"}
|
||||
|
||||
|
||||
async def test_get_episodes_filtered_by_parent_id(client: AsyncClient) -> None:
|
||||
"""Core use case: every episode derived from one memcell."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", parent_id="mc_target"),
|
||||
_episode("ep_002", parent_id="mc_target"),
|
||||
_episode("ep_003", parent_id="mc_other"),
|
||||
]
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"filters": {"parent_id": "mc_target"},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["data"]["total_count"] == 2
|
||||
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
||||
assert ids == {"u1_ep_001", "u1_ep_002"}
|
||||
|
||||
|
||||
async def test_get_episodes_filtered_by_sender_id_in(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""``sender_id: {"in": [...]}`` → ``array_has(sender_ids, ...) OR ...``."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", sender_ids=["alice", "assistant"]),
|
||||
_episode("ep_002", sender_ids=["bob", "assistant"]),
|
||||
_episode("ep_003", sender_ids=["carol", "assistant"]),
|
||||
]
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"filters": {"sender_id": {"in": ["alice", "bob"]}},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["data"]["total_count"] == 2
|
||||
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
||||
assert ids == {"u1_ep_001", "u1_ep_002"}
|
||||
|
||||
|
||||
async def test_get_episodes_nested_and_inside_or(client: AsyncClient) -> None:
|
||||
"""Nested ``AND`` inside ``OR`` — parity with /search combinator semantics."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", session="sess_a", parent_id="mc_target"),
|
||||
_episode("ep_002", session="sess_a", parent_id="mc_other"),
|
||||
_episode("ep_003", session="sess_b", parent_id="mc_target"),
|
||||
_episode("ep_004", session="sess_c", parent_id="mc_other"),
|
||||
]
|
||||
)
|
||||
# (session=sess_a AND parent_id=mc_target)
|
||||
# OR (parent_id=mc_other AND session=sess_c)
|
||||
# → ep_001 + ep_004
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"filters": {
|
||||
"OR": [
|
||||
{
|
||||
"AND": [
|
||||
{"session_id": "sess_a"},
|
||||
{"parent_id": "mc_target"},
|
||||
]
|
||||
},
|
||||
{
|
||||
"AND": [
|
||||
{"parent_id": "mc_other"},
|
||||
{"session_id": "sess_c"},
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["data"]["total_count"] == 2
|
||||
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
||||
assert ids == {"u1_ep_001", "u1_ep_004"}
|
||||
|
||||
|
||||
# ── Filter combinators (200 — happy path) ──────────────────────────────
|
||||
# Pure 422 / validation cases moved to
|
||||
# tests/unit/test_entrypoints/test_api/test_routes/test_get_route_validation.py
|
||||
|
||||
|
||||
async def test_get_top_level_and_or_compiles_and_filters(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""``AND`` / ``OR`` combinators are accepted (parity with /search)."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", session="sess_a"),
|
||||
_episode("ep_002", session="sess_b"),
|
||||
_episode("ep_003", session="sess_c"),
|
||||
],
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"filters": {"OR": [{"session_id": "sess_a"}, {"session_id": "sess_b"}]},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["data"]["total_count"] == 2
|
||||
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
||||
assert ids == {"u1_ep_001", "u1_ep_002"}
|
||||
|
||||
|
||||
async def test_get_episodes_filtered_by_timestamp_range(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""``timestamp: {gte, lt}`` — same-field double op compiles to implicit AND."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", day=1), # 2026-01-01
|
||||
_episode("ep_002", day=3), # 2026-01-03
|
||||
_episode("ep_003", day=5), # 2026-01-05
|
||||
_episode("ep_004", day=7), # 2026-01-07
|
||||
_episode("ep_005", day=9), # 2026-01-09
|
||||
]
|
||||
)
|
||||
# Window [Jan 3, Jan 7) → ep_002 + ep_003 (Jan 7 excluded by `lt`).
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"filters": {
|
||||
"timestamp": {
|
||||
"gte": "2026-01-03T00:00:00+00:00",
|
||||
"lt": "2026-01-07T00:00:00+00:00",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["data"]["total_count"] == 2
|
||||
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
||||
assert ids == {"u1_ep_002", "u1_ep_003"}
|
||||
|
||||
|
||||
async def test_get_episodes_top_level_and_filter(client: AsyncClient) -> None:
|
||||
"""Explicit top-level ``AND`` — distinct from implicit multi-field AND."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", session="sess_a", parent_id="mc_target"),
|
||||
_episode("ep_002", session="sess_a", parent_id="mc_other"),
|
||||
_episode("ep_003", session="sess_b", parent_id="mc_target"),
|
||||
]
|
||||
)
|
||||
# session=sess_a AND parent_id=mc_target → ep_001 only
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"filters": {
|
||||
"AND": [
|
||||
{"session_id": "sess_a"},
|
||||
{"parent_id": "mc_target"},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["data"]["total_count"] == 1
|
||||
assert body["data"]["episodes"][0]["id"] == "u1_ep_001"
|
||||
|
||||
|
||||
# ── max_fetch limit trigger ─────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_get_truncates_above_max_fetch(
|
||||
client: AsyncClient,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""Filter matches > ``max_fetch`` rows → chassis emits warning + page
|
||||
contents come from the truncated prefix; ``total_count`` is still the
|
||||
*true* match count (``count_rows`` ignores ``max_fetch``).
|
||||
|
||||
Injects a low ``max_fetch=5`` by wrapping the bound method so the
|
||||
end-to-end path runs through the truncation branch without populating
|
||||
20k+ rows.
|
||||
"""
|
||||
# The e2e ``client`` fixture builds the app without lifespan providers,
|
||||
# so ``configure_logging`` (normally invoked by the CLI entry) never
|
||||
# runs. Call it here so the structlog → stdlib logging bridge is
|
||||
# wired up and ``caplog`` can observe the chassis warning.
|
||||
from everos.core.observability.logging import configure_logging
|
||||
|
||||
configure_logging(level="WARNING")
|
||||
|
||||
await episode_repo.add(
|
||||
[_episode(f"ep_{i:03d}", day=i) for i in range(1, 11)],
|
||||
)
|
||||
original = episode_repo.find_where_paginated
|
||||
|
||||
async def low_cap(*args: object, **kwargs: object) -> object:
|
||||
kwargs["max_fetch"] = 5
|
||||
return await original(*args, **kwargs) # type: ignore[arg-type]
|
||||
|
||||
monkeypatch.setattr(episode_repo, "find_where_paginated", low_cap)
|
||||
|
||||
with caplog.at_level("WARNING"):
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
"page": 1,
|
||||
"page_size": 3,
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
# True row count is still 10, even though only 5 made it into the sort.
|
||||
assert body["data"]["total_count"] == 10
|
||||
assert body["data"]["count"] == 3
|
||||
# structlog now routes through stdlib's root logger (see
|
||||
# ``core/observability/logging/factory.py``); the warning surfaces via
|
||||
# the standard ``caplog`` fixture rather than direct stdout capture.
|
||||
assert "find_where_paginated truncated" in caplog.text
|
||||
|
||||
|
||||
# ── Concurrency ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_get_concurrent_owners_no_cross_contamination(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""Concurrent /get requests against different ``owner_id`` partitions
|
||||
return only their own rows. ``GetManager`` is a lazy singleton —
|
||||
this also exercises first-request lazy-init under contention."""
|
||||
await episode_repo.add(
|
||||
[
|
||||
_episode("ep_001", owner="u1"),
|
||||
_episode("ep_002", owner="u1"),
|
||||
_episode("ep_001", owner="u2"),
|
||||
_episode("ep_001", owner="u3"),
|
||||
]
|
||||
)
|
||||
|
||||
async def query(owner: str) -> dict[str, object]:
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/get",
|
||||
json={
|
||||
"user_id": owner,
|
||||
"memory_type": "episode",
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200, f"{owner}: {resp.text}"
|
||||
return resp.json()
|
||||
|
||||
bodies = await asyncio.gather(
|
||||
query("u1"),
|
||||
query("u2"),
|
||||
query("u3"),
|
||||
)
|
||||
u1, u2, u3 = bodies
|
||||
assert u1["data"]["total_count"] == 2 # type: ignore[index]
|
||||
assert u2["data"]["total_count"] == 1 # type: ignore[index]
|
||||
assert u3["data"]["total_count"] == 1 # type: ignore[index]
|
||||
assert {ep["id"] for ep in u1["data"]["episodes"]} == { # type: ignore[index]
|
||||
"u1_ep_001",
|
||||
"u1_ep_002",
|
||||
}
|
||||
assert {ep["id"] for ep in u2["data"]["episodes"]} == {"u2_ep_001"} # type: ignore[index]
|
||||
assert {ep["id"] for ep in u3["data"]["episodes"]} == {"u3_ep_001"} # type: ignore[index]
|
||||
|
||||
|
||||
async def test_get_concurrent_different_memory_types(client: AsyncClient) -> None:
|
||||
"""Concurrent /get on different ``memory_type`` (episode + agent_case +
|
||||
agent_skill) returns each kind in its own envelope slot, with no
|
||||
cross-array bleed."""
|
||||
await episode_repo.add([_episode("ep_001", owner="u1")])
|
||||
await agent_case_repo.add([_agent_case("ac_001", owner="a1")])
|
||||
await agent_skill_repo.add([_agent_skill("planner", owner="a1")])
|
||||
|
||||
async def query(payload: dict[str, object]) -> dict[str, object]:
|
||||
resp = await client.post("/api/v1/memory/get", json=payload)
|
||||
assert resp.status_code == 200, resp.text
|
||||
return resp.json()
|
||||
|
||||
ep_body, case_body, skill_body = await asyncio.gather(
|
||||
query({"user_id": "u1", "memory_type": "episode"}),
|
||||
query(
|
||||
{
|
||||
"agent_id": "a1",
|
||||
"memory_type": "agent_case",
|
||||
}
|
||||
),
|
||||
query(
|
||||
{
|
||||
"agent_id": "a1",
|
||||
"memory_type": "agent_skill",
|
||||
}
|
||||
),
|
||||
)
|
||||
# Episode envelope: only ``episodes`` populated.
|
||||
assert len(ep_body["data"]["episodes"]) == 1 # type: ignore[index]
|
||||
assert ep_body["data"]["agent_cases"] == [] # type: ignore[index]
|
||||
assert ep_body["data"]["agent_skills"] == [] # type: ignore[index]
|
||||
# Case envelope: only ``agent_cases`` populated.
|
||||
assert len(case_body["data"]["agent_cases"]) == 1 # type: ignore[index]
|
||||
assert case_body["data"]["episodes"] == [] # type: ignore[index]
|
||||
# Skill envelope: only ``agent_skills`` populated.
|
||||
assert len(skill_body["data"]["agent_skills"]) == 1 # type: ignore[index]
|
||||
assert skill_body["data"]["episodes"] == [] # type: ignore[index]
|
||||
|
||||
|
||||
async def test_get_concurrent_lazy_init_builds_one_manager(
|
||||
client: AsyncClient,
|
||||
) -> None:
|
||||
"""The lazy singleton survives first-request contention — N concurrent
|
||||
requests against a virgin manager all succeed and leave one instance."""
|
||||
# ``client`` fixture already reset _manager to None.
|
||||
assert get_service_mod._manager is None
|
||||
await episode_repo.add([_episode("ep_001")])
|
||||
|
||||
payload = {
|
||||
"user_id": "u1",
|
||||
"memory_type": "episode",
|
||||
}
|
||||
results = await asyncio.gather(
|
||||
*(client.post("/api/v1/memory/get", json=payload) for _ in range(8))
|
||||
)
|
||||
assert all(r.status_code == 200 for r in results)
|
||||
# After the storm, exactly one manager instance is cached.
|
||||
assert get_service_mod._manager is not None
|
||||
140
tests/e2e/test_multimodal_add_e2e.py
Normal file
140
tests/e2e/test_multimodal_add_e2e.py
Normal file
@ -0,0 +1,140 @@
|
||||
"""E2E: multimodal /add parses HTML (base64) and http(s) uri end-to-end.
|
||||
|
||||
Scope: full HTTP stack (``create_app()`` + ``AsyncClient``) → ingest →
|
||||
multimodal parse → unprocessed_buffer. Proves the three paths the unit
|
||||
tests can only mock:
|
||||
|
||||
1. ``type="html"`` + base64 + ``ext="html"`` — the normal HTML-file call.
|
||||
2. ``type="html"`` + ``https`` uri — everalgo fetches the page and
|
||||
dispatches by the response Content-Type.
|
||||
3. ``type="html"`` + ``file://`` uri — EverOS reads the file locally and
|
||||
hands everalgo hydrated bytes (the library never touches the fs).
|
||||
|
||||
Real multimodal LLM (creds via ``.env``) + real public internet, so the
|
||||
module is marked ``live_llm``. Skipped when the ``[multimodal]`` extra is
|
||||
absent.
|
||||
|
||||
White-box surface: reads the ``text`` column of ``unprocessed_buffer``
|
||||
(the derived text the ingest stage produced from the parsed content) to
|
||||
assert the parsed payload actually flowed into the buffer.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from sqlalchemy import text as sql_text
|
||||
|
||||
pytest.importorskip("everalgo.parser")
|
||||
|
||||
pytestmark = pytest.mark.live_llm
|
||||
|
||||
|
||||
async def _buffer_text(session_id: str) -> str:
|
||||
"""Concatenated derived ``text`` of all buffer rows for a session."""
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
async with get_engine().connect() as conn:
|
||||
rows = (
|
||||
await conn.execute(
|
||||
sql_text("SELECT text FROM unprocessed_buffer WHERE session_id = :sid"),
|
||||
{"sid": session_id},
|
||||
)
|
||||
).all()
|
||||
return "\n".join(str(r[0]) for r in rows)
|
||||
|
||||
|
||||
async def test_add_html_base64_parsed_into_buffer(
|
||||
async_client: httpx.AsyncClient,
|
||||
) -> None:
|
||||
"""A base64 HTML file is parsed and its text lands in the buffer."""
|
||||
html = (
|
||||
b"<html><body><h1>Release</h1>"
|
||||
b"<p>Version 9.9.9 ships Dark Mode.</p></body></html>"
|
||||
)
|
||||
sid = "e2e-mm-html-b64"
|
||||
resp = await async_client.post(
|
||||
"/api/v1/memory/add",
|
||||
json={
|
||||
"session_id": sid,
|
||||
"messages": [
|
||||
{
|
||||
"sender_id": "alice",
|
||||
"role": "user",
|
||||
"timestamp": 1780304400000,
|
||||
"content": [
|
||||
{
|
||||
"type": "html",
|
||||
"base64": base64.b64encode(html).decode(),
|
||||
"ext": "html",
|
||||
"name": "notes.html",
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
buffered = await _buffer_text(sid)
|
||||
assert "9.9.9" in buffered
|
||||
|
||||
|
||||
async def test_add_html_https_uri_parsed_into_buffer(
|
||||
async_client: httpx.AsyncClient,
|
||||
) -> None:
|
||||
"""An https uri is fetched + parsed and its text lands in the buffer."""
|
||||
sid = "e2e-mm-html-uri"
|
||||
resp = await async_client.post(
|
||||
"/api/v1/memory/add",
|
||||
json={
|
||||
"session_id": sid,
|
||||
"messages": [
|
||||
{
|
||||
"sender_id": "alice",
|
||||
"role": "user",
|
||||
"timestamp": 1780304400000,
|
||||
"content": [{"type": "html", "uri": "https://example.com"}],
|
||||
}
|
||||
],
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
buffered = await _buffer_text(sid)
|
||||
assert "example domain" in buffered.lower()
|
||||
|
||||
|
||||
async def test_add_html_file_uri_parsed_into_buffer(
|
||||
async_client: httpx.AsyncClient,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
"""A file:// html asset is read locally (hydrated) + parsed into buffer.
|
||||
|
||||
Exercises EverOS-side file:// support: the parser receives bytes, never
|
||||
the path. Default allowlist is empty (local-first) so the temp file reads.
|
||||
"""
|
||||
doc = tmp_path / "release.html"
|
||||
doc.write_text("<html><body><p>Version 9.9.9 ships Dark Mode.</p></body></html>")
|
||||
sid = "e2e-mm-html-file"
|
||||
resp = await async_client.post(
|
||||
"/api/v1/memory/add",
|
||||
json={
|
||||
"session_id": sid,
|
||||
"messages": [
|
||||
{
|
||||
"sender_id": "alice",
|
||||
"role": "user",
|
||||
"timestamp": 1780304400000,
|
||||
"content": [{"type": "html", "uri": f"file://{doc}"}],
|
||||
}
|
||||
],
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
buffered = await _buffer_text(sid)
|
||||
assert "9.9.9" in buffered
|
||||
87
tests/e2e/test_openapi_endpoint_matches_docs.py
Normal file
87
tests/e2e/test_openapi_endpoint_matches_docs.py
Normal file
@ -0,0 +1,87 @@
|
||||
"""Belt-and-braces gate: dev-mode ``GET /openapi.json`` ≡ ``docs/openapi.json``.
|
||||
|
||||
The lint-time ``make check-openapi`` already diffs ``app.openapi()``
|
||||
against the committed ``docs/openapi.json``. This e2e test closes the
|
||||
remaining theoretical gap: if anyone ever adds a *lifespan-mutated*
|
||||
OpenAPI schema (e.g. ``app.openapi_schema = ...`` inside a startup
|
||||
handler), the in-memory ``app.openapi()`` and the runtime
|
||||
``GET /openapi.json`` response would diverge — the lint gate would
|
||||
miss it, but this test wouldn't.
|
||||
|
||||
How:
|
||||
|
||||
1. Force ``ENV=DEV`` so the ``openapi_url`` route is enabled.
|
||||
2. Construct the app via ``create_app(lifespan_providers=[])`` to skip
|
||||
SQLite / LanceDB / OME (the schema is route-driven, not state-
|
||||
driven) — but *do* run the lifespan context, so any startup hook
|
||||
that mutates ``app.openapi_schema`` is exercised.
|
||||
3. ``GET /openapi.json`` through ``httpx.AsyncClient``.
|
||||
4. Diff against ``docs/openapi.json`` byte-for-byte (after JSON
|
||||
normalisation to defeat ordering nondeterminism).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
_COMMITTED_OPENAPI = _REPO_ROOT / "docs" / "openapi.json"
|
||||
|
||||
|
||||
async def test_dev_mode_openapi_endpoint_matches_committed_docs(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Runtime ``GET /openapi.json`` (dev mode) must equal ``docs/openapi.json``."""
|
||||
# The gate's own committed snapshot must exist — otherwise the dev
|
||||
# workflow ``make openapi`` has been skipped.
|
||||
assert _COMMITTED_OPENAPI.is_file(), (
|
||||
f"{_COMMITTED_OPENAPI} not found — run `make openapi`"
|
||||
)
|
||||
|
||||
# Force dev-mode so ``openapi_url="/openapi.json"`` is registered.
|
||||
monkeypatch.setenv("ENV", "DEV")
|
||||
|
||||
from everos.entrypoints.api.app import create_app
|
||||
|
||||
app = create_app(lifespan_providers=[])
|
||||
transport = httpx.ASGITransport(app=app)
|
||||
async with (
|
||||
app.router.lifespan_context(app),
|
||||
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
|
||||
):
|
||||
resp = await client.get("/openapi.json")
|
||||
assert resp.status_code == 200, resp.text
|
||||
runtime_schema = resp.json()
|
||||
|
||||
committed_schema = json.loads(_COMMITTED_OPENAPI.read_text(encoding="utf-8"))
|
||||
|
||||
if runtime_schema != committed_schema:
|
||||
# Emit a concise diff to help locate the drift cause.
|
||||
import difflib
|
||||
|
||||
runtime_rendered = json.dumps(runtime_schema, indent=2, ensure_ascii=False)
|
||||
committed_rendered = json.dumps(committed_schema, indent=2, ensure_ascii=False)
|
||||
diff = "\n".join(
|
||||
list(
|
||||
difflib.unified_diff(
|
||||
committed_rendered.splitlines(),
|
||||
runtime_rendered.splitlines(),
|
||||
fromfile="docs/openapi.json (committed)",
|
||||
tofile="GET /openapi.json (runtime)",
|
||||
lineterm="",
|
||||
)
|
||||
)[:120]
|
||||
)
|
||||
raise AssertionError(
|
||||
"runtime /openapi.json drifts from docs/openapi.json; "
|
||||
"run `make openapi` and commit the result.\n\n" + diff
|
||||
)
|
||||
|
||||
|
||||
# Keep ``os`` legit in case future scenarios need direct env reads.
|
||||
_ = os
|
||||
2102
tests/e2e/test_search_endpoint_e2e.py
Normal file
2102
tests/e2e/test_search_endpoint_e2e.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user