md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
830 lines
28 KiB
Python
830 lines
28 KiB
Python
"""End-to-end integration tests for ``POST /api/v1/memory/get``.
|
|
|
|
These tests spin up the FastAPI app with **no lifespan providers**
|
|
against a tmp ``EVEROS_MEMORY__ROOT``, populate a real LanceDB
|
|
``episode`` table directly via the repo singleton, and exercise the
|
|
HTTP route. They cover the wiring that unit tests cannot: pydantic
|
|
422s from the route, JSON envelope shape, and the full
|
|
``request → service → manager → LanceDB`` path.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import datetime as _dt
|
|
from collections.abc import AsyncIterator
|
|
from importlib import import_module
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from httpx import ASGITransport, AsyncClient
|
|
|
|
from everos.config import load_settings
|
|
from everos.entrypoints.api.app import create_app
|
|
from everos.infra.persistence.lancedb import (
|
|
AgentCase,
|
|
AgentSkill,
|
|
Episode,
|
|
UserProfile,
|
|
agent_case_repo,
|
|
agent_skill_repo,
|
|
episode_repo,
|
|
lancedb_manager,
|
|
user_profile_repo,
|
|
)
|
|
|
|
# ``everos.service.__init__`` re-exports the ``get`` function under the
|
|
# same name as the submodule (``from .get import get as get``), which
|
|
# shadows the submodule when imported normally. Pull the actual module
|
|
# via importlib so the test can poke at its ``_manager`` singleton.
|
|
get_service_mod = import_module("everos.service.get")
|
|
|
|
|
|
def _ts(day: int) -> _dt.datetime:
|
|
return _dt.datetime(2026, 1, day, tzinfo=_dt.UTC)
|
|
|
|
|
|
def _episode(
|
|
entry: str,
|
|
*,
|
|
owner: str = "u1",
|
|
session: str = "sess_a",
|
|
parent_id: str = "mc_1",
|
|
sender_ids: list[str] | None = None,
|
|
day: int = 1,
|
|
) -> Episode:
|
|
return Episode(
|
|
id=f"{owner}_{entry}",
|
|
entry_id=entry,
|
|
owner_id=owner,
|
|
owner_type="user",
|
|
session_id=session,
|
|
timestamp=_ts(day),
|
|
parent_type="memcell",
|
|
parent_id=parent_id,
|
|
sender_ids=sender_ids if sender_ids is not None else [owner, "assistant"],
|
|
subject=f"subj {entry}",
|
|
summary=f"summary {entry}",
|
|
episode=f"body of {entry}",
|
|
episode_tokens=f"body of {entry}",
|
|
md_path=f"users/{owner}/episodes/{entry}.md",
|
|
content_sha256="abc",
|
|
vector=[0.0] * 1024,
|
|
)
|
|
|
|
|
|
def _agent_case(
|
|
entry: str,
|
|
*,
|
|
owner: str = "a1",
|
|
session: str = "sess_x",
|
|
day: int = 1,
|
|
) -> AgentCase:
|
|
return AgentCase(
|
|
id=f"{owner}_{entry}",
|
|
entry_id=entry,
|
|
owner_id=owner,
|
|
owner_type="agent",
|
|
session_id=session,
|
|
timestamp=_ts(day),
|
|
parent_type="memcell",
|
|
parent_id="mc_99",
|
|
quality_score=0.8,
|
|
task_intent=f"intent {entry}",
|
|
task_intent_tokens=f"intent {entry}",
|
|
approach=f"approach {entry}",
|
|
approach_tokens=f"approach {entry}",
|
|
key_insight=None,
|
|
md_path=f"agents/{owner}/cases/{entry}.md",
|
|
content_sha256="abc",
|
|
vector=[0.0] * 1024,
|
|
)
|
|
|
|
|
|
def _agent_skill(
|
|
name: str,
|
|
*,
|
|
owner: str = "a1",
|
|
) -> AgentSkill:
|
|
return AgentSkill(
|
|
id=f"{owner}_{name}",
|
|
owner_id=owner,
|
|
owner_type="agent",
|
|
name=name,
|
|
description=f"desc {name}",
|
|
description_tokens=f"desc {name}",
|
|
content=f"content {name}",
|
|
content_tokens=f"content {name}",
|
|
confidence=0.9,
|
|
maturity_score=0.7,
|
|
source_case_ids=[f"{owner}_ac_1"],
|
|
md_path=f"agents/{owner}/skills/{name}/SKILL.md",
|
|
content_sha256="abc",
|
|
vector=[0.0] * 1024,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
async def client(
|
|
tmp_path: Path,
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> AsyncIterator[AsyncClient]:
|
|
"""Build the FastAPI app against a tmp memory root with no lifespan."""
|
|
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
|
load_settings.cache_clear()
|
|
|
|
# Reset every module-level singleton the get-path touches.
|
|
lancedb_manager._conn = None
|
|
lancedb_manager._tables.clear()
|
|
get_service_mod._manager = None
|
|
|
|
app = create_app(lifespan_providers=[])
|
|
transport = ASGITransport(app=app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as c:
|
|
yield c
|
|
|
|
await lancedb_manager.dispose_connection()
|
|
load_settings.cache_clear()
|
|
|
|
|
|
# ── Happy path ──────────────────────────────────────────────────────────
|
|
|
|
|
|
async def test_get_episodes_returns_page_and_total(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""5 rows in, page_size=2 → 2 episodes back + total_count=5."""
|
|
await episode_repo.add(
|
|
[_episode(f"ep_{i:03d}", day=i) for i in range(1, 6)],
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"page": 1,
|
|
"page_size": 2,
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
rid = body["request_id"]
|
|
assert len(rid) == 32 and all(c in "0123456789abcdef" for c in rid)
|
|
data = body["data"]
|
|
assert data["total_count"] == 5
|
|
assert data["count"] == 2
|
|
assert len(data["episodes"]) == 2
|
|
# default sort = timestamp DESC → highest day first
|
|
assert data["episodes"][0]["id"] == "u1_ep_005"
|
|
assert data["episodes"][1]["id"] == "u1_ep_004"
|
|
# The non-requested kinds are empty arrays (envelope invariant).
|
|
assert data["profiles"] == []
|
|
assert data["agent_cases"] == []
|
|
assert data["agent_skills"] == []
|
|
|
|
|
|
async def test_get_episodes_filtered_by_session_id(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""Filter narrows results to the matching ``session_id`` only."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", session="sess_a"),
|
|
_episode("ep_002", session="sess_a"),
|
|
_episode("ep_003", session="sess_b"),
|
|
],
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"filters": {"session_id": "sess_a"},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
assert body["data"]["total_count"] == 2
|
|
assert body["data"]["count"] == 2
|
|
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
|
assert ids == {"u1_ep_001", "u1_ep_002"}
|
|
|
|
|
|
async def test_get_empty_returns_zero_counts(client: AsyncClient) -> None:
|
|
"""An owner with no rows yields total_count=0 + empty episodes list."""
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "ghost",
|
|
"memory_type": "episode",
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()["data"]
|
|
assert data["total_count"] == 0
|
|
assert data["count"] == 0
|
|
assert data["episodes"] == []
|
|
|
|
|
|
async def test_get_profile_miss_returns_empty(client: AsyncClient) -> None:
|
|
"""Cold start (no profile row) → ``profiles=[]`` / ``total_count=0``."""
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "profile",
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()["data"]
|
|
assert data["profiles"] == []
|
|
assert data["total_count"] == 0
|
|
|
|
|
|
async def test_get_profile_returns_seeded_row(client: AsyncClient) -> None:
|
|
"""A profile row in the ``user_profile`` table is returned + json-decoded.
|
|
|
|
Full-stack: seed the LanceDB ``user_profile`` table (as cascade would
|
|
from ``users/u1/user.md``), then read it back through the HTTP route.
|
|
White-box surface: ``user_profile_repo`` (the same table /search's
|
|
``include_profile`` reads).
|
|
"""
|
|
await user_profile_repo.add(
|
|
[
|
|
UserProfile(
|
|
id="u1",
|
|
owner_id="u1",
|
|
owner_type="user",
|
|
app_id="default",
|
|
project_id="default",
|
|
summary="u1 loves climbing in Yosemite",
|
|
explicit_info_json='[{"category": "Hobby", "description": "climbing"}]',
|
|
implicit_traits_json='[{"trait": "Outdoorsy"}]',
|
|
profile_timestamp_ms=1780304400000,
|
|
md_path="users/u1/user.md",
|
|
content_sha256="abc",
|
|
)
|
|
]
|
|
)
|
|
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={"user_id": "u1", "memory_type": "profile"},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()["data"]
|
|
assert data["total_count"] == 1
|
|
assert data["count"] == 1
|
|
assert len(data["profiles"]) == 1
|
|
prof = data["profiles"][0]
|
|
assert prof["id"] == "u1"
|
|
assert prof["user_id"] == "u1"
|
|
assert prof["profile_data"]["summary"] == "u1 loves climbing in Yosemite"
|
|
assert prof["profile_data"]["explicit_info"] == [
|
|
{"category": "Hobby", "description": "climbing"}
|
|
]
|
|
assert prof["profile_data"]["implicit_traits"] == [{"trait": "Outdoorsy"}]
|
|
|
|
|
|
# ── Pagination + sort ───────────────────────────────────────────────────
|
|
|
|
|
|
async def test_get_episodes_page_two_returns_correct_slice(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""5 rows / page_size=2 / page=2 → middle slice (rows 3 + 4 by DESC ts)."""
|
|
await episode_repo.add(
|
|
[_episode(f"ep_{i:03d}", day=i) for i in range(1, 6)],
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"page": 2,
|
|
"page_size": 2,
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()["data"]
|
|
assert data["total_count"] == 5
|
|
assert data["count"] == 2
|
|
# default sort = timestamp DESC; page 2 of 2-per-page over 5 rows →
|
|
# rows at offsets 2,3 → day=3, day=2 (1-indexed: ep_003, ep_002).
|
|
assert [ep["id"] for ep in data["episodes"]] == ["u1_ep_003", "u1_ep_002"]
|
|
|
|
|
|
async def test_get_episodes_sort_order_asc(client: AsyncClient) -> None:
|
|
"""``sort_order=asc`` flips the order (oldest first)."""
|
|
await episode_repo.add(
|
|
[_episode(f"ep_{i:03d}", day=i) for i in range(1, 4)],
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"sort_order": "asc",
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
ids = [ep["id"] for ep in resp.json()["data"]["episodes"]]
|
|
assert ids == ["u1_ep_001", "u1_ep_002", "u1_ep_003"]
|
|
|
|
|
|
# ── Agent-side kinds ────────────────────────────────────────────────────
|
|
|
|
|
|
async def test_get_agent_cases_happy_path(client: AsyncClient) -> None:
|
|
"""``agent_case`` listing returns shaped items, populates only that array."""
|
|
await agent_case_repo.add(
|
|
[_agent_case(f"ac_{i:03d}", day=i) for i in range(1, 4)],
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"agent_id": "a1",
|
|
"memory_type": "agent_case",
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()["data"]
|
|
assert data["total_count"] == 3
|
|
assert data["count"] == 3
|
|
assert [c["id"] for c in data["agent_cases"]] == [
|
|
"a1_ac_003",
|
|
"a1_ac_002",
|
|
"a1_ac_001",
|
|
]
|
|
# Cross-kind envelope stays empty.
|
|
assert data["episodes"] == []
|
|
assert data["agent_skills"] == []
|
|
# AgentCase item shape — score absent (vs SearchAgentCaseItem),
|
|
# quality_score round-trips.
|
|
first = data["agent_cases"][0]
|
|
assert "score" not in first
|
|
assert first["quality_score"] == 0.8
|
|
assert first["agent_id"] == "a1"
|
|
|
|
|
|
async def test_get_agent_cases_filtered_by_session(client: AsyncClient) -> None:
|
|
"""Filter narrows ``agent_case`` rows to the session."""
|
|
await agent_case_repo.add(
|
|
[
|
|
_agent_case("ac_001", session="sess_x"),
|
|
_agent_case("ac_002", session="sess_x"),
|
|
_agent_case("ac_003", session="sess_y"),
|
|
]
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"agent_id": "a1",
|
|
"memory_type": "agent_case",
|
|
"filters": {"session_id": "sess_x"},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
assert body["data"]["total_count"] == 2
|
|
ids = {c["id"] for c in body["data"]["agent_cases"]}
|
|
assert ids == {"a1_ac_001", "a1_ac_002"}
|
|
|
|
|
|
async def test_get_agent_skills_happy_path(client: AsyncClient) -> None:
|
|
"""``agent_skill`` listing — sort silently uses ``updated_at``."""
|
|
await agent_skill_repo.add(
|
|
[_agent_skill(name) for name in ("planner", "summariser")],
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"agent_id": "a1",
|
|
"memory_type": "agent_skill",
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()["data"]
|
|
assert data["total_count"] == 2
|
|
names = {s["name"] for s in data["agent_skills"]}
|
|
assert names == {"planner", "summariser"}
|
|
|
|
|
|
async def test_get_agent_skills_sort_by_timestamp_silently_downgraded(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""Explicit ``sort_by=timestamp`` does not 500 — manager rewrites to
|
|
``updated_at`` (the only temporal column on ``agent_skill``)."""
|
|
await agent_skill_repo.add([_agent_skill("planner")])
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"agent_id": "a1",
|
|
"memory_type": "agent_skill",
|
|
"sort_by": "timestamp",
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
assert resp.json()["data"]["total_count"] == 1
|
|
|
|
|
|
# ── Filter coverage end-to-end ──────────────────────────────────────────
|
|
|
|
|
|
async def test_get_episodes_filtered_by_ne_session(client: AsyncClient) -> None:
|
|
"""``ne`` op on a str field excludes matching rows end-to-end."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", session="sess_a"),
|
|
_episode("ep_002", session="sess_internal"),
|
|
_episode("ep_003", session="sess_b"),
|
|
]
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"filters": {"session_id": {"ne": "sess_internal"}},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
assert body["data"]["total_count"] == 2
|
|
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
|
assert ids == {"u1_ep_001", "u1_ep_003"}
|
|
|
|
|
|
async def test_get_episodes_filtered_by_iso_timestamp(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""ISO 8601 string timestamp literal is accepted alongside epoch ms."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", day=1), # 2026-01-01
|
|
_episode("ep_002", day=5), # 2026-01-05
|
|
_episode("ep_003", day=9), # 2026-01-09
|
|
]
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"filters": {"timestamp": {"gte": "2026-01-04T00:00:00+00:00"}},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
ids = {ep["id"] for ep in resp.json()["data"]["episodes"]}
|
|
assert ids == {"u1_ep_002", "u1_ep_003"}
|
|
|
|
|
|
async def test_get_episodes_filtered_by_parent_id(client: AsyncClient) -> None:
|
|
"""Core use case: every episode derived from one memcell."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", parent_id="mc_target"),
|
|
_episode("ep_002", parent_id="mc_target"),
|
|
_episode("ep_003", parent_id="mc_other"),
|
|
]
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"filters": {"parent_id": "mc_target"},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
assert body["data"]["total_count"] == 2
|
|
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
|
assert ids == {"u1_ep_001", "u1_ep_002"}
|
|
|
|
|
|
async def test_get_episodes_filtered_by_sender_id_in(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""``sender_id: {"in": [...]}`` → ``array_has(sender_ids, ...) OR ...``."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", sender_ids=["alice", "assistant"]),
|
|
_episode("ep_002", sender_ids=["bob", "assistant"]),
|
|
_episode("ep_003", sender_ids=["carol", "assistant"]),
|
|
]
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"filters": {"sender_id": {"in": ["alice", "bob"]}},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
assert body["data"]["total_count"] == 2
|
|
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
|
assert ids == {"u1_ep_001", "u1_ep_002"}
|
|
|
|
|
|
async def test_get_episodes_nested_and_inside_or(client: AsyncClient) -> None:
|
|
"""Nested ``AND`` inside ``OR`` — parity with /search combinator semantics."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", session="sess_a", parent_id="mc_target"),
|
|
_episode("ep_002", session="sess_a", parent_id="mc_other"),
|
|
_episode("ep_003", session="sess_b", parent_id="mc_target"),
|
|
_episode("ep_004", session="sess_c", parent_id="mc_other"),
|
|
]
|
|
)
|
|
# (session=sess_a AND parent_id=mc_target)
|
|
# OR (parent_id=mc_other AND session=sess_c)
|
|
# → ep_001 + ep_004
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"filters": {
|
|
"OR": [
|
|
{
|
|
"AND": [
|
|
{"session_id": "sess_a"},
|
|
{"parent_id": "mc_target"},
|
|
]
|
|
},
|
|
{
|
|
"AND": [
|
|
{"parent_id": "mc_other"},
|
|
{"session_id": "sess_c"},
|
|
]
|
|
},
|
|
]
|
|
},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
assert body["data"]["total_count"] == 2
|
|
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
|
assert ids == {"u1_ep_001", "u1_ep_004"}
|
|
|
|
|
|
# ── Filter combinators (200 — happy path) ──────────────────────────────
|
|
# Pure 422 / validation cases moved to
|
|
# tests/unit/test_entrypoints/test_api/test_routes/test_get_route_validation.py
|
|
|
|
|
|
async def test_get_top_level_and_or_compiles_and_filters(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""``AND`` / ``OR`` combinators are accepted (parity with /search)."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", session="sess_a"),
|
|
_episode("ep_002", session="sess_b"),
|
|
_episode("ep_003", session="sess_c"),
|
|
],
|
|
)
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"filters": {"OR": [{"session_id": "sess_a"}, {"session_id": "sess_b"}]},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
assert body["data"]["total_count"] == 2
|
|
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
|
assert ids == {"u1_ep_001", "u1_ep_002"}
|
|
|
|
|
|
async def test_get_episodes_filtered_by_timestamp_range(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""``timestamp: {gte, lt}`` — same-field double op compiles to implicit AND."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", day=1), # 2026-01-01
|
|
_episode("ep_002", day=3), # 2026-01-03
|
|
_episode("ep_003", day=5), # 2026-01-05
|
|
_episode("ep_004", day=7), # 2026-01-07
|
|
_episode("ep_005", day=9), # 2026-01-09
|
|
]
|
|
)
|
|
# Window [Jan 3, Jan 7) → ep_002 + ep_003 (Jan 7 excluded by `lt`).
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"filters": {
|
|
"timestamp": {
|
|
"gte": "2026-01-03T00:00:00+00:00",
|
|
"lt": "2026-01-07T00:00:00+00:00",
|
|
}
|
|
},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
assert body["data"]["total_count"] == 2
|
|
ids = {ep["id"] for ep in body["data"]["episodes"]}
|
|
assert ids == {"u1_ep_002", "u1_ep_003"}
|
|
|
|
|
|
async def test_get_episodes_top_level_and_filter(client: AsyncClient) -> None:
|
|
"""Explicit top-level ``AND`` — distinct from implicit multi-field AND."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", session="sess_a", parent_id="mc_target"),
|
|
_episode("ep_002", session="sess_a", parent_id="mc_other"),
|
|
_episode("ep_003", session="sess_b", parent_id="mc_target"),
|
|
]
|
|
)
|
|
# session=sess_a AND parent_id=mc_target → ep_001 only
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"filters": {
|
|
"AND": [
|
|
{"session_id": "sess_a"},
|
|
{"parent_id": "mc_target"},
|
|
]
|
|
},
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
assert body["data"]["total_count"] == 1
|
|
assert body["data"]["episodes"][0]["id"] == "u1_ep_001"
|
|
|
|
|
|
# ── max_fetch limit trigger ─────────────────────────────────────────────
|
|
|
|
|
|
async def test_get_truncates_above_max_fetch(
|
|
client: AsyncClient,
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
caplog: pytest.LogCaptureFixture,
|
|
) -> None:
|
|
"""Filter matches > ``max_fetch`` rows → chassis emits warning + page
|
|
contents come from the truncated prefix; ``total_count`` is still the
|
|
*true* match count (``count_rows`` ignores ``max_fetch``).
|
|
|
|
Injects a low ``max_fetch=5`` by wrapping the bound method so the
|
|
end-to-end path runs through the truncation branch without populating
|
|
20k+ rows.
|
|
"""
|
|
# The e2e ``client`` fixture builds the app without lifespan providers,
|
|
# so ``configure_logging`` (normally invoked by the CLI entry) never
|
|
# runs. Call it here so the structlog → stdlib logging bridge is
|
|
# wired up and ``caplog`` can observe the chassis warning.
|
|
from everos.core.observability.logging import configure_logging
|
|
|
|
configure_logging(level="WARNING")
|
|
|
|
await episode_repo.add(
|
|
[_episode(f"ep_{i:03d}", day=i) for i in range(1, 11)],
|
|
)
|
|
original = episode_repo.find_where_paginated
|
|
|
|
async def low_cap(*args: object, **kwargs: object) -> object:
|
|
kwargs["max_fetch"] = 5
|
|
return await original(*args, **kwargs) # type: ignore[arg-type]
|
|
|
|
monkeypatch.setattr(episode_repo, "find_where_paginated", low_cap)
|
|
|
|
with caplog.at_level("WARNING"):
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
"page": 1,
|
|
"page_size": 3,
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.json()
|
|
# True row count is still 10, even though only 5 made it into the sort.
|
|
assert body["data"]["total_count"] == 10
|
|
assert body["data"]["count"] == 3
|
|
# structlog now routes through stdlib's root logger (see
|
|
# ``core/observability/logging/factory.py``); the warning surfaces via
|
|
# the standard ``caplog`` fixture rather than direct stdout capture.
|
|
assert "find_where_paginated truncated" in caplog.text
|
|
|
|
|
|
# ── Concurrency ─────────────────────────────────────────────────────────
|
|
|
|
|
|
async def test_get_concurrent_owners_no_cross_contamination(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""Concurrent /get requests against different ``owner_id`` partitions
|
|
return only their own rows. ``GetManager`` is a lazy singleton —
|
|
this also exercises first-request lazy-init under contention."""
|
|
await episode_repo.add(
|
|
[
|
|
_episode("ep_001", owner="u1"),
|
|
_episode("ep_002", owner="u1"),
|
|
_episode("ep_001", owner="u2"),
|
|
_episode("ep_001", owner="u3"),
|
|
]
|
|
)
|
|
|
|
async def query(owner: str) -> dict[str, object]:
|
|
resp = await client.post(
|
|
"/api/v1/memory/get",
|
|
json={
|
|
"user_id": owner,
|
|
"memory_type": "episode",
|
|
},
|
|
)
|
|
assert resp.status_code == 200, f"{owner}: {resp.text}"
|
|
return resp.json()
|
|
|
|
bodies = await asyncio.gather(
|
|
query("u1"),
|
|
query("u2"),
|
|
query("u3"),
|
|
)
|
|
u1, u2, u3 = bodies
|
|
assert u1["data"]["total_count"] == 2 # type: ignore[index]
|
|
assert u2["data"]["total_count"] == 1 # type: ignore[index]
|
|
assert u3["data"]["total_count"] == 1 # type: ignore[index]
|
|
assert {ep["id"] for ep in u1["data"]["episodes"]} == { # type: ignore[index]
|
|
"u1_ep_001",
|
|
"u1_ep_002",
|
|
}
|
|
assert {ep["id"] for ep in u2["data"]["episodes"]} == {"u2_ep_001"} # type: ignore[index]
|
|
assert {ep["id"] for ep in u3["data"]["episodes"]} == {"u3_ep_001"} # type: ignore[index]
|
|
|
|
|
|
async def test_get_concurrent_different_memory_types(client: AsyncClient) -> None:
|
|
"""Concurrent /get on different ``memory_type`` (episode + agent_case +
|
|
agent_skill) returns each kind in its own envelope slot, with no
|
|
cross-array bleed."""
|
|
await episode_repo.add([_episode("ep_001", owner="u1")])
|
|
await agent_case_repo.add([_agent_case("ac_001", owner="a1")])
|
|
await agent_skill_repo.add([_agent_skill("planner", owner="a1")])
|
|
|
|
async def query(payload: dict[str, object]) -> dict[str, object]:
|
|
resp = await client.post("/api/v1/memory/get", json=payload)
|
|
assert resp.status_code == 200, resp.text
|
|
return resp.json()
|
|
|
|
ep_body, case_body, skill_body = await asyncio.gather(
|
|
query({"user_id": "u1", "memory_type": "episode"}),
|
|
query(
|
|
{
|
|
"agent_id": "a1",
|
|
"memory_type": "agent_case",
|
|
}
|
|
),
|
|
query(
|
|
{
|
|
"agent_id": "a1",
|
|
"memory_type": "agent_skill",
|
|
}
|
|
),
|
|
)
|
|
# Episode envelope: only ``episodes`` populated.
|
|
assert len(ep_body["data"]["episodes"]) == 1 # type: ignore[index]
|
|
assert ep_body["data"]["agent_cases"] == [] # type: ignore[index]
|
|
assert ep_body["data"]["agent_skills"] == [] # type: ignore[index]
|
|
# Case envelope: only ``agent_cases`` populated.
|
|
assert len(case_body["data"]["agent_cases"]) == 1 # type: ignore[index]
|
|
assert case_body["data"]["episodes"] == [] # type: ignore[index]
|
|
# Skill envelope: only ``agent_skills`` populated.
|
|
assert len(skill_body["data"]["agent_skills"]) == 1 # type: ignore[index]
|
|
assert skill_body["data"]["episodes"] == [] # type: ignore[index]
|
|
|
|
|
|
async def test_get_concurrent_lazy_init_builds_one_manager(
|
|
client: AsyncClient,
|
|
) -> None:
|
|
"""The lazy singleton survives first-request contention — N concurrent
|
|
requests against a virgin manager all succeed and leave one instance."""
|
|
# ``client`` fixture already reset _manager to None.
|
|
assert get_service_mod._manager is None
|
|
await episode_repo.add([_episode("ep_001")])
|
|
|
|
payload = {
|
|
"user_id": "u1",
|
|
"memory_type": "episode",
|
|
}
|
|
results = await asyncio.gather(
|
|
*(client.post("/api/v1/memory/get", json=payload) for _ in range(8))
|
|
)
|
|
assert all(r.status_code == 200 for r in results)
|
|
# After the storm, exactly one manager instance is cached.
|
|
assert get_service_mod._manager is not None
|