chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
0
tests/integration/__init__.py
Normal file
0
tests/integration/__init__.py
Normal file
0
tests/integration/search/__init__.py
Normal file
0
tests/integration/search/__init__.py
Normal file
269
tests/integration/search/_helpers.py
Normal file
269
tests/integration/search/_helpers.py
Normal file
@ -0,0 +1,269 @@
|
||||
"""Private helpers shared across the search e2e tests.
|
||||
|
||||
* :func:`pick_query_seeds` — scans the session corpus's
|
||||
``.atomic_facts/`` md files and returns a list of
|
||||
``(owner_id, fact_text)`` tuples to use as deterministic search
|
||||
queries. Bootstrapping queries off the corpus's own extraction
|
||||
output gives us a closed-loop correctness signal — what was
|
||||
written should be findable.
|
||||
|
||||
* :func:`assert_recall` — the canonical "this search returned at
|
||||
least one sensible hit for ``owner``" assertion bundle. Used by
|
||||
the keyword / vector / hybrid recall tests so the assertion logic
|
||||
is in one place.
|
||||
|
||||
* :func:`flatten_hits` — collapses ``SearchData``'s four arrays into
|
||||
one ``(owner_id, score, text)`` tuple list for relevance checks.
|
||||
|
||||
The helpers do **not** hardcode topical keywords ("hiking" / "work")
|
||||
— they are derived from what the pipeline produced. This keeps the
|
||||
suite stable across LLM-driven boundary-cut variance.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
# Cap how many fact strings we sample per call — running every test
|
||||
# against every fact would blow the LLM rerank budget.
|
||||
_DEFAULT_SEED_LIMIT = 3
|
||||
|
||||
# Tokenise on word characters; lowercase; drop short tokens that carry
|
||||
# no signal for the "content overlap" check.
|
||||
_TOKEN_RE = re.compile(r"\w+", re.UNICODE)
|
||||
_MIN_TOKEN_LEN = 3
|
||||
_STOPWORDS: frozenset[str] = frozenset(
|
||||
{
|
||||
"the",
|
||||
"and",
|
||||
"for",
|
||||
"that",
|
||||
"with",
|
||||
"this",
|
||||
"was",
|
||||
"has",
|
||||
"have",
|
||||
"are",
|
||||
"but",
|
||||
"from",
|
||||
"you",
|
||||
"she",
|
||||
"her",
|
||||
"his",
|
||||
"him",
|
||||
"they",
|
||||
"them",
|
||||
"their",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# ── Query seed extraction ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def pick_query_seeds(
|
||||
memory_root: Path,
|
||||
*,
|
||||
limit: int = _DEFAULT_SEED_LIMIT,
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Sample ``(owner_id, fact_text)`` tuples from atomic_facts md files.
|
||||
|
||||
Walks ``users/<owner>/.atomic_facts/atomic_fact-*.md`` and parses
|
||||
the ``## Fact\\n<text>`` sections inside each daily-log entry.
|
||||
Returns deterministic seeds (insertion order of ``rglob`` is
|
||||
sort-stable thanks to the explicit ``sorted`` call) so a flaky
|
||||
test surfaces a real regression, not query-rotation variance.
|
||||
|
||||
Raises:
|
||||
AssertionError: if no facts were extracted — that's a fixture
|
||||
failure, not a test failure, and should fail loudly.
|
||||
"""
|
||||
seeds: list[tuple[str, str]] = []
|
||||
users_dir = memory_root / "default_app" / "default_project" / "users"
|
||||
if not users_dir.is_dir():
|
||||
raise AssertionError(f"expected {users_dir} to exist after ingest")
|
||||
|
||||
for owner_dir in sorted(users_dir.iterdir()):
|
||||
if not owner_dir.is_dir():
|
||||
continue
|
||||
facts_dir = owner_dir / ".atomic_facts"
|
||||
if not facts_dir.is_dir():
|
||||
continue
|
||||
for md in sorted(facts_dir.rglob("*.md")):
|
||||
for fact in _extract_fact_sections(md):
|
||||
if fact:
|
||||
seeds.append((owner_dir.name, fact))
|
||||
if len(seeds) >= limit:
|
||||
return seeds
|
||||
if not seeds:
|
||||
raise AssertionError(
|
||||
f"no atomic_fact md entries under {users_dir} — pipeline did "
|
||||
"not produce any facts; cannot bootstrap search queries"
|
||||
)
|
||||
return seeds
|
||||
|
||||
|
||||
def _extract_fact_sections(md: Path) -> list[str]:
|
||||
"""Return every ``### Fact`` section body in a daily-log md file.
|
||||
|
||||
Daily-log entries are ``## <entry-id>`` blocks; the labelled body
|
||||
sections inside an entry are h3 (``### Fact``, ``### Foresight``,
|
||||
…). We scan linearly for ``### Fact`` and collect lines until the
|
||||
next heading at any level or the end-of-entry marker.
|
||||
"""
|
||||
body = md.read_text(encoding="utf-8")
|
||||
sections: list[str] = []
|
||||
in_fact = False
|
||||
buf: list[str] = []
|
||||
for line in body.splitlines():
|
||||
stripped = line.lstrip()
|
||||
if stripped.startswith("### Fact"):
|
||||
if in_fact:
|
||||
sections.append("\n".join(buf).strip())
|
||||
in_fact = True
|
||||
buf = []
|
||||
continue
|
||||
# Any subsequent heading or entry-end marker closes the section.
|
||||
if in_fact and (stripped.startswith("#") or stripped.startswith("<!-- /entry")):
|
||||
sections.append("\n".join(buf).strip())
|
||||
in_fact = False
|
||||
buf = []
|
||||
continue
|
||||
if in_fact:
|
||||
buf.append(line)
|
||||
if in_fact:
|
||||
sections.append("\n".join(buf).strip())
|
||||
return [s for s in sections if s]
|
||||
|
||||
|
||||
# ── Response flattening + assertions ────────────────────────────────────
|
||||
|
||||
|
||||
def flatten_hits(data: dict[str, Any]) -> list[tuple[str | None, float, str]]:
|
||||
"""Collapse ``SearchData``'s four arrays into ``(owner_id, score, text)``.
|
||||
|
||||
Stable shape across track-kinds so the recall / partition tests
|
||||
don't have to branch. Episodes / profiles carry ``user_id`` on the
|
||||
item; cases / skills carry ``agent_id`` — both project to the
|
||||
generic ``owner`` slot here. ``owner`` may be ``None`` for profile
|
||||
hits where the owner is implicit.
|
||||
"""
|
||||
out: list[tuple[str | None, float, str]] = []
|
||||
for ep in data.get("episodes", []):
|
||||
out.append(
|
||||
(
|
||||
ep.get("user_id"),
|
||||
float(ep.get("score") or 0.0),
|
||||
ep.get("episode") or ep.get("summary") or ep.get("subject") or "",
|
||||
)
|
||||
)
|
||||
for pf in data.get("profiles", []):
|
||||
out.append(
|
||||
(
|
||||
pf.get("user_id"),
|
||||
float(pf.get("score") or 0.0),
|
||||
str(pf.get("profile_data") or ""),
|
||||
)
|
||||
)
|
||||
for cs in data.get("agent_cases", []):
|
||||
out.append(
|
||||
(
|
||||
cs.get("agent_id"),
|
||||
float(cs.get("score") or 0.0),
|
||||
cs.get("approach") or cs.get("task_intent") or "",
|
||||
)
|
||||
)
|
||||
for sk in data.get("agent_skills", []):
|
||||
out.append(
|
||||
(
|
||||
sk.get("agent_id"),
|
||||
float(sk.get("score") or 0.0),
|
||||
sk.get("content") or sk.get("description") or "",
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
async def assert_recall(
|
||||
client: httpx.AsyncClient,
|
||||
*,
|
||||
owner_id: str,
|
||||
query: str,
|
||||
method: str,
|
||||
min_score: float = 0.0,
|
||||
top_k: int = 10,
|
||||
) -> dict[str, Any]:
|
||||
"""Hit ``/search`` and lock the four standard recall invariants.
|
||||
|
||||
1. **Status** 200 — the route compiled.
|
||||
2. **Existence** — ``total >= 1`` across the four arrays.
|
||||
3. **Owner partition** — every non-``None`` ``owner_id`` matches
|
||||
the queried owner. Profile hits may carry ``None`` so they're
|
||||
skipped from the check.
|
||||
4. **Score sanity** — the top-scored hit clears ``min_score``.
|
||||
|
||||
Returns the parsed response body so the caller can layer
|
||||
case-specific assertions on top.
|
||||
"""
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/search",
|
||||
json={
|
||||
"user_id": owner_id,
|
||||
"query": query,
|
||||
"method": method,
|
||||
"top_k": top_k,
|
||||
},
|
||||
timeout=120.0,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
body = resp.json()
|
||||
hits = flatten_hits(body["data"])
|
||||
assert hits, (
|
||||
f"no hits for owner={owner_id} query={query!r} method={method} — "
|
||||
f"recall is broken"
|
||||
)
|
||||
for hit_owner, _score, _text in hits:
|
||||
if hit_owner is not None:
|
||||
assert hit_owner == owner_id, (
|
||||
f"partition leak: got owner={hit_owner!r} when querying {owner_id!r}"
|
||||
)
|
||||
top_score = max(score for _o, score, _t in hits)
|
||||
assert top_score >= min_score, (
|
||||
f"top hit score {top_score:.3f} < min {min_score} for method={method}"
|
||||
)
|
||||
return body
|
||||
|
||||
|
||||
# ── Token utilities (for content-overlap checks) ────────────────────────
|
||||
|
||||
|
||||
def query_tokens(query: str) -> set[str]:
|
||||
"""Lowercase content tokens worth checking for overlap in hit text."""
|
||||
return {
|
||||
t.lower()
|
||||
for t in _TOKEN_RE.findall(query)
|
||||
if len(t) >= _MIN_TOKEN_LEN and t.lower() not in _STOPWORDS
|
||||
}
|
||||
|
||||
|
||||
def content_tokens_in_order(query: str) -> list[str]:
|
||||
"""Content tokens in original document order, dedup'd by first occurrence.
|
||||
|
||||
Used by the keyword test: the project's BM25 tokenizer (jieba) is
|
||||
Chinese-first and degrades to near-zero recall on single short
|
||||
English tokens. Multi-token phrases recall well in practice, so
|
||||
keyword queries are built by concatenating consecutive content
|
||||
tokens from the source fact rather than sorting alphabetically.
|
||||
"""
|
||||
seen: set[str] = set()
|
||||
out: list[str] = []
|
||||
for t in _TOKEN_RE.findall(query):
|
||||
low = t.lower()
|
||||
if len(t) >= _MIN_TOKEN_LEN and low not in _STOPWORDS and low not in seen:
|
||||
seen.add(low)
|
||||
out.append(low)
|
||||
return out
|
||||
83
tests/integration/search/_rerun_probes.py
Normal file
83
tests/integration/search/_rerun_probes.py
Normal file
@ -0,0 +1,83 @@
|
||||
"""Re-run probes against an existing corpus + regenerate the report.
|
||||
|
||||
Reuses everything from :mod:`_run_full_report` except the ingest step —
|
||||
points at the already-populated ``~/.everos-report-corpus`` and only
|
||||
re-runs the search probes + report rendering. Useful when the corpus
|
||||
is already there from a previous run and you just want to refresh the
|
||||
retrieval section without paying for LLM ingestion again.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from dotenv import load_dotenv
|
||||
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
load_dotenv(_PROJECT_ROOT / ".env", override=False)
|
||||
|
||||
|
||||
from _run_full_report import ( # noqa: E402
|
||||
CONVERSATION,
|
||||
CORPUS_ROOT,
|
||||
REPORT_PATH,
|
||||
inspect_artifacts,
|
||||
render_report,
|
||||
run_probes,
|
||||
)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
if not (CORPUS_ROOT / "users").is_dir():
|
||||
raise SystemExit(f"{CORPUS_ROOT} not populated — run _run_full_report.py first")
|
||||
os.environ["EVEROS_MEMORY__ROOT"] = str(CORPUS_ROOT)
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
print(f"[1/3] using corpus at {CORPUS_ROOT}")
|
||||
|
||||
from everos.entrypoints.api.app import create_app
|
||||
|
||||
app = create_app()
|
||||
transport = httpx.ASGITransport(app=app)
|
||||
|
||||
async with (
|
||||
app.router.lifespan_context(app),
|
||||
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
|
||||
):
|
||||
print("[2/3] inspecting artifacts + running probes ...")
|
||||
artifacts = await inspect_artifacts(CORPUS_ROOT)
|
||||
probes = await run_probes(client)
|
||||
|
||||
print("[3/3] re-rendering report ...")
|
||||
md = render_report(
|
||||
memory_root=CORPUS_ROOT,
|
||||
ingest_summary={
|
||||
"batches": [
|
||||
{
|
||||
"idx": i,
|
||||
"msg_count": len(b),
|
||||
"status": "extracted (cached)",
|
||||
"returned_count": len(b),
|
||||
}
|
||||
for i, b in enumerate(CONVERSATION)
|
||||
],
|
||||
"flush_status": "extracted (cached)",
|
||||
},
|
||||
cascade_summary={
|
||||
"note": "cascade was force-completed via _rerun_probes.py "
|
||||
"after initial run; counts below are post-completion."
|
||||
},
|
||||
artifacts=artifacts,
|
||||
probes=probes,
|
||||
)
|
||||
REPORT_PATH.write_text(md, encoding="utf-8")
|
||||
print(f" → {REPORT_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
660
tests/integration/search/_run_full_report.py
Normal file
660
tests/integration/search/_run_full_report.py
Normal file
@ -0,0 +1,660 @@
|
||||
"""End-to-end report generator: fresh corpus → ingest → retrieve → markdown report.
|
||||
|
||||
Run with::
|
||||
|
||||
PYTHONPATH=src python tests/integration/search/_run_full_report.py
|
||||
|
||||
Writes a fresh ``~/.everos-report-corpus/`` memory_root, runs a small
|
||||
synthetic 16-message conversation between two new users (``u_diana`` +
|
||||
``u_ethan``) through ``/add`` + ``/flush``, waits for cascade drain, then
|
||||
runs a curated set of search probes and dumps a structured markdown
|
||||
report to ``tests/integration/search/SEARCH_REPORT.md``.
|
||||
|
||||
Not a pytest test — pure investigative script, real LLM, real embedder.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load .env BEFORE any everos import so settings are correct.
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
load_dotenv(_PROJECT_ROOT / ".env", override=False)
|
||||
|
||||
|
||||
# ── Corpus location ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
CORPUS_ROOT = Path.home() / ".everos-report-corpus"
|
||||
REPORT_PATH = _PROJECT_ROOT / "tests/integration/search/SEARCH_REPORT.md"
|
||||
SESSION_ID = "report_session_diana_ethan"
|
||||
|
||||
|
||||
# ── Synthetic conversation (16 msgs, 2 batches) ────────────────────────
|
||||
|
||||
|
||||
CONVERSATION = [
|
||||
# Batch 1 — introducing hobbies
|
||||
[
|
||||
{
|
||||
"sender_id": "u_diana",
|
||||
"role": "user",
|
||||
"timestamp": 1778414400000,
|
||||
"content": "Hey Ethan! Just got back from a 3-day hike in Yosemite. "
|
||||
"My new Sony A7 camera is amazing for landscape shots.",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_ethan",
|
||||
"role": "user",
|
||||
"timestamp": 1778407260000,
|
||||
"content": "Wow that sounds intense! I'd never survive without my "
|
||||
"espresso. How's the Rust programming learning going?",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_diana",
|
||||
"role": "user",
|
||||
"timestamp": 1778407320000,
|
||||
"content": "Slow but steady. Working through the official book. "
|
||||
"The borrow checker still trips me up.",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_ethan",
|
||||
"role": "user",
|
||||
"timestamp": 1778407380000,
|
||||
"content": "I'm marathon training — up to 15 miles long runs now. "
|
||||
"Plus I joined a jazz quartet on weekends.",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_diana",
|
||||
"role": "user",
|
||||
"timestamp": 1778407440000,
|
||||
"content": "That's awesome! Saxophone again?",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_ethan",
|
||||
"role": "user",
|
||||
"timestamp": 1778407500000,
|
||||
"content": "Yeah, alto sax. We're playing at the Blue Note next month.",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_diana",
|
||||
"role": "user",
|
||||
"timestamp": 1778407560000,
|
||||
"content": "I'll come watch! Speaking of trips, want to do "
|
||||
"that Iceland thing this summer?",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_ethan",
|
||||
"role": "user",
|
||||
"timestamp": 1778407620000,
|
||||
"content": "100% yes. I've been researching ring road photography spots.",
|
||||
},
|
||||
],
|
||||
# Batch 2 — Iceland trip planning
|
||||
[
|
||||
{
|
||||
"sender_id": "u_diana",
|
||||
"role": "user",
|
||||
"timestamp": 1778410800000,
|
||||
"content": "I want to see the Northern Lights and shoot some "
|
||||
"volcanic landscapes.",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_ethan",
|
||||
"role": "user",
|
||||
"timestamp": 1778410860000,
|
||||
"content": "We should rent a 4x4. The F-roads are insane I hear.",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_diana",
|
||||
"role": "user",
|
||||
"timestamp": 1778410920000,
|
||||
"content": "And I want to try Icelandic lamb stew. You cook, right?",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_ethan",
|
||||
"role": "user",
|
||||
"timestamp": 1778410980000,
|
||||
"content": (
|
||||
"Yeah, I'll bring my Dutch oven. Maybe a cast iron pan for fish."
|
||||
),
|
||||
},
|
||||
{
|
||||
"sender_id": "u_diana",
|
||||
"role": "user",
|
||||
"timestamp": 1778411040000,
|
||||
"content": "Perfect. Mid-July works for me — I have a Rust conference "
|
||||
"in late August.",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_ethan",
|
||||
"role": "user",
|
||||
"timestamp": 1778411100000,
|
||||
"content": "July it is. I have the Boston Marathon qualifier in October "
|
||||
"so I can't go after.",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_diana",
|
||||
"role": "user",
|
||||
"timestamp": 1778411160000,
|
||||
"content": "Let's book flights this weekend?",
|
||||
},
|
||||
{
|
||||
"sender_id": "u_ethan",
|
||||
"role": "user",
|
||||
"timestamp": 1778411220000,
|
||||
"content": "Deal. Also bringing my Olympus E-M1 for the landscapes.",
|
||||
},
|
||||
],
|
||||
]
|
||||
|
||||
|
||||
# ── Probe set ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
PROBES: list[dict] = [
|
||||
# Owner-specific topical: should recall the right owner's episodes.
|
||||
{
|
||||
"section": "Owner-specific topical (diana)",
|
||||
"owner": "u_diana",
|
||||
"query": "hiking",
|
||||
"method": "hybrid",
|
||||
"expect": "diana's Yosemite episode",
|
||||
},
|
||||
{
|
||||
"section": "Owner-specific topical (diana)",
|
||||
"owner": "u_diana",
|
||||
"query": "Rust programming",
|
||||
"method": "hybrid",
|
||||
"expect": "diana's Rust learning facts",
|
||||
},
|
||||
{
|
||||
"section": "Owner-specific topical (diana)",
|
||||
"owner": "u_diana",
|
||||
"query": "photography",
|
||||
"method": "hybrid",
|
||||
"expect": "diana's camera (Sony A7) facts",
|
||||
},
|
||||
{
|
||||
"section": "Owner-specific topical (ethan)",
|
||||
"owner": "u_ethan",
|
||||
"query": "jazz",
|
||||
"method": "hybrid",
|
||||
"expect": "ethan's jazz quartet / sax facts",
|
||||
},
|
||||
{
|
||||
"section": "Owner-specific topical (ethan)",
|
||||
"owner": "u_ethan",
|
||||
"query": "marathon training",
|
||||
"method": "hybrid",
|
||||
"expect": "ethan's marathon facts",
|
||||
},
|
||||
{
|
||||
"section": "Owner-specific topical (ethan)",
|
||||
"owner": "u_ethan",
|
||||
"query": "cooking",
|
||||
"method": "hybrid",
|
||||
"expect": "ethan's Dutch oven / lamb stew facts",
|
||||
},
|
||||
# Shared topic — both should recall their own perspective.
|
||||
{
|
||||
"section": "Shared topic (Iceland)",
|
||||
"owner": "u_diana",
|
||||
"query": "Iceland trip",
|
||||
"method": "hybrid",
|
||||
"expect": "diana's planning episode",
|
||||
},
|
||||
{
|
||||
"section": "Shared topic (Iceland)",
|
||||
"owner": "u_ethan",
|
||||
"query": "Iceland trip",
|
||||
"method": "hybrid",
|
||||
"expect": "ethan's planning episode",
|
||||
},
|
||||
# Method comparison on the same query.
|
||||
{
|
||||
"section": "Method comparison (diana + 'Rust')",
|
||||
"owner": "u_diana",
|
||||
"query": "Rust",
|
||||
"method": "keyword",
|
||||
"expect": "BM25 single token",
|
||||
},
|
||||
{
|
||||
"section": "Method comparison (diana + 'Rust')",
|
||||
"owner": "u_diana",
|
||||
"query": "Rust",
|
||||
"method": "vector",
|
||||
"expect": "cosine ANN",
|
||||
},
|
||||
{
|
||||
"section": "Method comparison (diana + 'Rust')",
|
||||
"owner": "u_diana",
|
||||
"query": "Rust",
|
||||
"method": "hybrid",
|
||||
"expect": "fusion of BM25 + vector",
|
||||
},
|
||||
# Owner partition: diana searching for ethan's exclusive topic.
|
||||
{
|
||||
"section": "Owner partition",
|
||||
"owner": "u_diana",
|
||||
"query": "jazz quartet",
|
||||
"method": "hybrid",
|
||||
"expect": "should NOT leak ethan's content",
|
||||
},
|
||||
{
|
||||
"section": "Owner partition",
|
||||
"owner": "u_ethan",
|
||||
"query": "Rust programming",
|
||||
"method": "hybrid",
|
||||
"expect": "should NOT leak diana's content",
|
||||
},
|
||||
# Phrase + bigram.
|
||||
{
|
||||
"section": "Phrase queries",
|
||||
"owner": "u_diana",
|
||||
"query": "Northern Lights",
|
||||
"method": "keyword",
|
||||
"expect": "diana's Iceland aurora plans",
|
||||
},
|
||||
{
|
||||
"section": "Phrase queries",
|
||||
"owner": "u_ethan",
|
||||
"query": "Boston Marathon",
|
||||
"method": "keyword",
|
||||
"expect": "ethan's qualifier date",
|
||||
},
|
||||
# include_profile.
|
||||
{
|
||||
"section": "Profile attach",
|
||||
"owner": "u_diana",
|
||||
"query": "anything",
|
||||
"method": "hybrid",
|
||||
"include_profile": True,
|
||||
"expect": "should return diana's profile object",
|
||||
},
|
||||
# Unknown owner.
|
||||
{
|
||||
"section": "Unknown owner",
|
||||
"owner": "u_ghost_does_not_exist",
|
||||
"query": "hiking",
|
||||
"method": "hybrid",
|
||||
"expect": "empty arrays, status 200",
|
||||
},
|
||||
# Non-existent term.
|
||||
{
|
||||
"section": "Non-existent term",
|
||||
"owner": "u_diana",
|
||||
"query": "quantum blockchain pizza",
|
||||
"method": "keyword",
|
||||
"expect": "0 hits, status 200",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# ── Pipeline runners ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def ingest(client: httpx.AsyncClient) -> dict:
|
||||
"""POST /add for each batch, then /flush. Return summary."""
|
||||
summary: dict = {"batches": [], "flush_status": None}
|
||||
for i, batch in enumerate(CONVERSATION):
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/add",
|
||||
json={"session_id": SESSION_ID, "messages": batch},
|
||||
timeout=600.0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()["data"]
|
||||
summary["batches"].append(
|
||||
{
|
||||
"idx": i,
|
||||
"msg_count": len(batch),
|
||||
"status": data["status"],
|
||||
"returned_count": data["message_count"],
|
||||
}
|
||||
)
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/flush",
|
||||
json={"session_id": SESSION_ID},
|
||||
timeout=600.0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
summary["flush_status"] = resp.json()["data"]["status"]
|
||||
return summary
|
||||
|
||||
|
||||
async def wait_cascade(
|
||||
*,
|
||||
expected_md_paths: int = 8,
|
||||
stable_checks: int = 5,
|
||||
deadline_seconds: float = 600.0,
|
||||
) -> dict:
|
||||
"""Block until cascade is *stably* done across all expected md kinds.
|
||||
|
||||
A plain ``pending == 0`` check is racy: OME async strategies
|
||||
(extract_foresight / extract_user_profile) emit md writes
|
||||
asynchronously after ``/flush`` returns, and there's a window
|
||||
where the cascade queue is momentarily empty before OME's writes
|
||||
arrive. We require two stronger conditions:
|
||||
|
||||
1. At least ``expected_md_paths`` rows exist in ``md_change_state``
|
||||
(one per expected (owner × kind) — episodes + atomic_facts +
|
||||
foresights + user_profile, per owner). This guards against
|
||||
returning before OME has emitted *anything*.
|
||||
2. ``pending == 0`` stays true for ``stable_checks`` consecutive
|
||||
polls (separated by 1s sleep). This guards against a transient
|
||||
empty queue while a strategy is still mid-write.
|
||||
"""
|
||||
from everos.infra.persistence.sqlite import md_change_state_repo
|
||||
|
||||
consecutive_zero = 0
|
||||
async with asyncio.timeout(deadline_seconds):
|
||||
while True:
|
||||
sm = await md_change_state_repo.queue_summary()
|
||||
total_rows = (
|
||||
sm.pending + sm.done + sm.failed_retryable + sm.failed_permanent
|
||||
)
|
||||
if sm.pending == 0 and total_rows >= expected_md_paths:
|
||||
consecutive_zero += 1
|
||||
if consecutive_zero >= stable_checks:
|
||||
return {
|
||||
"done": sm.done,
|
||||
"failed_retryable": sm.failed_retryable,
|
||||
"failed_permanent": sm.failed_permanent,
|
||||
"max_lsn": sm.max_lsn,
|
||||
"last_processed_lsn": sm.last_processed_lsn,
|
||||
}
|
||||
else:
|
||||
consecutive_zero = 0
|
||||
await asyncio.sleep(1.0)
|
||||
|
||||
|
||||
async def inspect_artifacts(memory_root: Path) -> dict:
|
||||
"""Read md files + LanceDB counts after cascade drain."""
|
||||
from everos.infra.persistence.lancedb import (
|
||||
atomic_fact_repo,
|
||||
dispose_connection,
|
||||
episode_repo,
|
||||
foresight_repo,
|
||||
get_connection,
|
||||
user_profile_repo,
|
||||
verify_business_schemas,
|
||||
)
|
||||
|
||||
await get_connection()
|
||||
await verify_business_schemas()
|
||||
counts = {
|
||||
"episode_rows": await episode_repo.count(),
|
||||
"atomic_fact_rows": await atomic_fact_repo.count(),
|
||||
"foresight_rows": await foresight_repo.count(),
|
||||
"user_profile_rows": await user_profile_repo.count(),
|
||||
}
|
||||
await dispose_connection()
|
||||
|
||||
md_files: list[str] = []
|
||||
users_dir = memory_root / "default_app" / "default_project" / "users"
|
||||
if users_dir.is_dir():
|
||||
for f in sorted(users_dir.rglob("*.md")):
|
||||
md_files.append(str(f.relative_to(memory_root)))
|
||||
counts["md_files"] = md_files
|
||||
return counts
|
||||
|
||||
|
||||
async def run_probes(client: httpx.AsyncClient) -> list[dict]:
|
||||
"""Execute every probe in :data:`PROBES`; return captured rows."""
|
||||
rows: list[dict] = []
|
||||
for p in PROBES:
|
||||
payload: dict = {
|
||||
"owner_id": p["owner"],
|
||||
"owner_type": "user",
|
||||
"query": p["query"],
|
||||
"method": p["method"],
|
||||
"top_k": 5,
|
||||
}
|
||||
if p.get("include_profile"):
|
||||
payload["include_profile"] = True
|
||||
resp = await client.post("/api/v1/memory/search", json=payload, timeout=120.0)
|
||||
body = resp.json()
|
||||
data = body.get("data", {})
|
||||
rows.append(
|
||||
{
|
||||
"section": p["section"],
|
||||
"expect": p["expect"],
|
||||
"request": payload,
|
||||
"status": resp.status_code,
|
||||
"episodes": [
|
||||
{
|
||||
"id": e["id"],
|
||||
"owner_id": e["owner_id"],
|
||||
"score": round(float(e["score"]), 3),
|
||||
"summary": (e.get("summary") or "")[:150],
|
||||
"atomic_facts_count": len(e.get("atomic_facts", [])),
|
||||
}
|
||||
for e in data.get("episodes", [])
|
||||
],
|
||||
"profiles": [
|
||||
{
|
||||
"owner_id": p_.get("owner_id"),
|
||||
"score": p_.get("score"),
|
||||
"summary_excerpt": str(p_.get("profile_data", {}))[:200],
|
||||
}
|
||||
for p_ in data.get("profiles", [])
|
||||
],
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
# ── Markdown report renderer ───────────────────────────────────────────
|
||||
|
||||
|
||||
def render_report(
|
||||
*,
|
||||
memory_root: Path,
|
||||
ingest_summary: dict,
|
||||
cascade_summary: dict,
|
||||
artifacts: dict,
|
||||
probes: list[dict],
|
||||
) -> str:
|
||||
lines: list[str] = []
|
||||
lines.append("# Search E2E Report — fresh corpus (u_diana + u_ethan)\n")
|
||||
lines.append(
|
||||
"Generated by [`_run_full_report.py`](_run_full_report.py). "
|
||||
"Two synthetic users with distinct hobbies feed a 16-message "
|
||||
"conversation through the full pipeline; the report below "
|
||||
"captures ingest stats, cascade drain numbers, on-disk "
|
||||
"artifacts, and the response of every curated search probe.\n"
|
||||
)
|
||||
|
||||
# ── Section: Setup ────────────────────────────────────────────────
|
||||
lines.append("## 1. Setup\n")
|
||||
lines.append(f"- **Memory root**: `{memory_root}`\n")
|
||||
lines.append(f"- **Session id**: `{SESSION_ID}`\n")
|
||||
lines.append(
|
||||
"- **Users**: `u_diana` (hiking / Rust / photography), "
|
||||
"`u_ethan` (jazz / marathon / cooking)\n"
|
||||
)
|
||||
lines.append(
|
||||
f"- **Batches**: {len(CONVERSATION)} "
|
||||
f"({sum(len(b) for b in CONVERSATION)} messages total)\n"
|
||||
)
|
||||
|
||||
# ── Section: Ingest stats ─────────────────────────────────────────
|
||||
lines.append("\n## 2. Ingest (`/add` × N + `/flush`)\n")
|
||||
lines.append("| batch | msg_count | status |\n")
|
||||
lines.append("|---|---|---|\n")
|
||||
for b in ingest_summary["batches"]:
|
||||
lines.append(f"| {b['idx']} | {b['msg_count']} | `{b['status']}` |\n")
|
||||
lines.append(f"\n**Flush status**: `{ingest_summary['flush_status']}`\n")
|
||||
|
||||
# ── Section: Cascade drain ────────────────────────────────────────
|
||||
lines.append("\n## 3. Cascade drain (md → LanceDB sync)\n")
|
||||
lines.append("```\n")
|
||||
lines.append(json.dumps(cascade_summary, indent=2) + "\n")
|
||||
lines.append("```\n")
|
||||
|
||||
# ── Section: Artifacts ────────────────────────────────────────────
|
||||
lines.append("\n## 4. On-disk artifacts\n")
|
||||
lines.append("### LanceDB row counts\n\n")
|
||||
lines.append("| table | rows |\n")
|
||||
lines.append("|---|---|\n")
|
||||
for k in (
|
||||
"episode_rows",
|
||||
"atomic_fact_rows",
|
||||
"foresight_rows",
|
||||
"user_profile_rows",
|
||||
):
|
||||
lines.append(f"| {k.replace('_rows', '')} | {artifacts[k]} |\n")
|
||||
lines.append("\n### Markdown files\n\n")
|
||||
for f in artifacts["md_files"]:
|
||||
lines.append(f"- `{f}`\n")
|
||||
|
||||
# ── Section: Probes ───────────────────────────────────────────────
|
||||
lines.append("\n## 5. Retrieval probes\n")
|
||||
lines.append(
|
||||
"Every row below is one POST to `/api/v1/memory/search`. "
|
||||
"`expected` is what the test designer expects to see; "
|
||||
"actual results are captured verbatim.\n"
|
||||
)
|
||||
current_section = None
|
||||
for row in probes:
|
||||
if row["section"] != current_section:
|
||||
lines.append(f"\n### {row['section']}\n")
|
||||
current_section = row["section"]
|
||||
req = row["request"]
|
||||
lines.append(
|
||||
f"\n#### `{req['query']}` (method=`{req['method']}`, "
|
||||
f"owner=`{req['owner_id']}`)\n"
|
||||
)
|
||||
lines.append(f"\n- **Expected**: {row['expect']}\n")
|
||||
lines.append(f"- **Status**: {row['status']}\n")
|
||||
lines.append(f"- **Episodes returned**: {len(row['episodes'])}\n")
|
||||
if row["episodes"]:
|
||||
lines.append("\n| rank | score | owner | atomic_facts | summary |\n")
|
||||
lines.append("|---|---|---|---|---|\n")
|
||||
for i, ep in enumerate(row["episodes"], 1):
|
||||
summary = ep["summary"].replace("|", "\\|")
|
||||
lines.append(
|
||||
f"| {i} | {ep['score']} | `{ep['owner_id']}` | "
|
||||
f"{ep['atomic_facts_count']} | {summary} |\n"
|
||||
)
|
||||
else:
|
||||
lines.append("\n_(no episodes)_\n")
|
||||
if row["profiles"]:
|
||||
lines.append(
|
||||
"\n**Profile attached**: "
|
||||
f"`{row['profiles'][0]['owner_id']}` "
|
||||
f"(excerpt: {row['profiles'][0]['summary_excerpt']!r})\n"
|
||||
)
|
||||
|
||||
# ── Section: Pass/Fail summary ────────────────────────────────────
|
||||
lines.append("\n## 6. Pass / Fail summary\n")
|
||||
pf = _grade(probes)
|
||||
lines.append("| # | section | query | result |\n")
|
||||
lines.append("|---|---|---|---|\n")
|
||||
for r in pf:
|
||||
lines.append(
|
||||
f"| {r['idx']} | {r['section']} | `{r['query']}` | {r['verdict']} |\n"
|
||||
)
|
||||
passed = sum(1 for r in pf if r["verdict"].startswith("✅"))
|
||||
lines.append(f"\n**Total: {passed}/{len(pf)} passed.**\n")
|
||||
|
||||
return "".join(lines)
|
||||
|
||||
|
||||
def _grade(probes: list[dict]) -> list[dict]:
|
||||
"""Apply soft heuristic pass/fail to each probe based on its 'expect'."""
|
||||
graded: list[dict] = []
|
||||
for i, row in enumerate(probes, 1):
|
||||
req = row["request"]
|
||||
expect = row["expect"].lower()
|
||||
verdict = "—"
|
||||
if "should not leak" in expect:
|
||||
leaked = any(ep["owner_id"] != req["owner_id"] for ep in row["episodes"])
|
||||
verdict = "❌ leaked" if leaked else "✅ no leak"
|
||||
elif "empty arrays" in expect or "0 hits" in expect:
|
||||
verdict = "✅" if not row["episodes"] else f"❌ got {len(row['episodes'])}"
|
||||
elif "profile" in expect:
|
||||
verdict = "✅" if row["profiles"] else "❌ no profile"
|
||||
elif row["episodes"]:
|
||||
top_owner = row["episodes"][0]["owner_id"]
|
||||
verdict = (
|
||||
"✅" if top_owner == req["owner_id"] else f"❌ wrong owner: {top_owner}"
|
||||
)
|
||||
else:
|
||||
verdict = "❌ no hits"
|
||||
graded.append(
|
||||
{
|
||||
"idx": i,
|
||||
"section": row["section"],
|
||||
"query": req["query"],
|
||||
"verdict": verdict,
|
||||
}
|
||||
)
|
||||
return graded
|
||||
|
||||
|
||||
# ── Main ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
# Reset corpus to a known empty state.
|
||||
if CORPUS_ROOT.exists():
|
||||
shutil.rmtree(CORPUS_ROOT)
|
||||
CORPUS_ROOT.mkdir(parents=True)
|
||||
os.environ["EVEROS_MEMORY__ROOT"] = str(CORPUS_ROOT)
|
||||
|
||||
# Reset cached singletons so they pick up the new env.
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
print(f"[1/6] fresh corpus at {CORPUS_ROOT}")
|
||||
|
||||
from everos.entrypoints.api.app import create_app
|
||||
|
||||
app = create_app()
|
||||
transport = httpx.ASGITransport(app=app)
|
||||
|
||||
async with (
|
||||
app.router.lifespan_context(app),
|
||||
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
|
||||
):
|
||||
print("[2/6] ingesting via /add + /flush ...")
|
||||
ingest_summary = await ingest(client)
|
||||
print(f" batches={ingest_summary['batches']}")
|
||||
|
||||
print("[3/6] waiting for cascade drain ...")
|
||||
cascade_summary = await wait_cascade()
|
||||
print(f" drained: {cascade_summary}")
|
||||
|
||||
print("[4/6] inspecting on-disk artifacts ...")
|
||||
artifacts = await inspect_artifacts(CORPUS_ROOT)
|
||||
print(
|
||||
" lancedb: {k: v for k,v in artifacts.items() if k.endswith('_rows')}"
|
||||
)
|
||||
|
||||
print(f"[5/6] running {len(PROBES)} search probes ...")
|
||||
probes = await run_probes(client)
|
||||
|
||||
print("[6/6] rendering report ...")
|
||||
md = render_report(
|
||||
memory_root=CORPUS_ROOT,
|
||||
ingest_summary=ingest_summary,
|
||||
cascade_summary=cascade_summary,
|
||||
artifacts=artifacts,
|
||||
probes=probes,
|
||||
)
|
||||
REPORT_PATH.write_text(md, encoding="utf-8")
|
||||
print(f" → {REPORT_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
269
tests/integration/search/conftest.py
Normal file
269
tests/integration/search/conftest.py
Normal file
@ -0,0 +1,269 @@
|
||||
"""Session-scoped corpus fixture for ``tests/integration/search/``.
|
||||
|
||||
The pipeline that produces the search corpus (`/add` × 19 + `/flush` +
|
||||
cascade drain) is the same one exercised by
|
||||
``tests/integration/test_add_flush_pipeline_e2e.py`` — and it costs
|
||||
~10 minutes against real LLMs. To keep the search test suite usable
|
||||
in CI we run that pipeline **once per session** here, persist the
|
||||
resulting memory_root to a session ``tmp_path``, and let every test
|
||||
re-attach a fresh FastAPI lifespan against the on-disk corpus.
|
||||
|
||||
Layout::
|
||||
|
||||
_ingested_memory_root (session-scoped)
|
||||
└── ingests LoCoMo conv_0 via the HTTP API, then tears
|
||||
lifespan down. Returns the memory_root path with md +
|
||||
sqlite + lancedb populated on disk.
|
||||
|
||||
search_client (function-scoped)
|
||||
└── per-test ``httpx.AsyncClient`` wired to a freshly built
|
||||
FastAPI app, ``EVEROS_MEMORY__ROOT`` pointed at the
|
||||
session corpus. Singletons are reset so each test starts
|
||||
with cold caches and the lifespan is the only thing
|
||||
constructing them.
|
||||
|
||||
This is intentionally separate from ``tests/integration/conftest.py``
|
||||
fixtures (which are function-scoped). Cross-suite isolation: tests
|
||||
under ``search/`` cannot poison or be poisoned by the ones above.
|
||||
|
||||
All tests in this folder are marked ``slow`` via the module-level
|
||||
``pytestmark`` in ``test_search_e2e.py`` — a non-``-m slow`` run skips
|
||||
the whole suite cleanly without paying the ingest cost.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
from collections.abc import AsyncIterator, Awaitable, Callable, Generator
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy import text
|
||||
|
||||
# Set ``EVEROS_REUSE_CORPUS=<path>`` to skip ingest and point the
|
||||
# session fixture at an existing memory_root (md + lancedb already
|
||||
# populated). Search is a read-only path, so no copy is needed — the
|
||||
# fixture just sets ``EVEROS_MEMORY__ROOT`` to that directory.
|
||||
_REUSE_ENV = "EVEROS_REUSE_CORPUS"
|
||||
|
||||
# Memorize-service module-level lazy singletons; reset between phases so
|
||||
# stale clients / engines don't leak from ingest into per-test lifespans.
|
||||
_MEMORIZE_SINGLETONS: tuple[str, ...] = (
|
||||
"_episode_writer",
|
||||
"_prompt_loader",
|
||||
"_user_pipeline",
|
||||
"_agent_pipeline",
|
||||
"_ome_engine",
|
||||
)
|
||||
|
||||
|
||||
# ── Session-scoped MonkeyPatch ─────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def _session_monkeypatch() -> Generator[pytest.MonkeyPatch, None, None]:
|
||||
"""A ``MonkeyPatch`` instance with session lifetime.
|
||||
|
||||
Pytest's default ``monkeypatch`` is function-scoped. The ingest
|
||||
fixture below has to set env vars and null singletons before the
|
||||
lifespan even starts — those changes have to live for the whole
|
||||
session, so we open our own ``MonkeyPatch`` and undo it at session
|
||||
end.
|
||||
"""
|
||||
mp = pytest.MonkeyPatch()
|
||||
yield mp
|
||||
mp.undo()
|
||||
|
||||
|
||||
# ── Singleton reset helper ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def _reset_memorize_singletons(mp: pytest.MonkeyPatch) -> None:
|
||||
"""Null out memorize/strategy/LLM-client lazy singletons.
|
||||
|
||||
Called once before ingest (so the freshly-set ``EVEROS_MEMORY__ROOT``
|
||||
actually wins) and once per test (so the session corpus's lifespan
|
||||
sees clean caches).
|
||||
"""
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
client_mod = importlib.import_module("everos.component.llm.client")
|
||||
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
|
||||
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
|
||||
|
||||
for attr in _MEMORIZE_SINGLETONS:
|
||||
mp.setattr(svc, attr, None, raising=False)
|
||||
mp.setattr(client_mod, "_llm_client", None, raising=False)
|
||||
mp.setattr(af_mod, "_writer", None, raising=False)
|
||||
mp.setattr(fs_mod, "_writer", None, raising=False)
|
||||
|
||||
|
||||
# ── Session corpus: ingest once ────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def _ingested_memory_root(
|
||||
tmp_path_factory: pytest.TempPathFactory,
|
||||
_session_monkeypatch: pytest.MonkeyPatch,
|
||||
long_conversation: dict,
|
||||
) -> Path:
|
||||
"""Run /add × 19 + /flush + cascade drain once; return the memory_root.
|
||||
|
||||
All on-disk artifacts (md files + sqlite system.db + lancedb
|
||||
tables) survive lifespan teardown, so per-test fixtures can
|
||||
re-attach a fresh app against the populated root and exercise
|
||||
only the read path.
|
||||
|
||||
Marked **slow** transitively via ``pytestmark`` in
|
||||
``test_search_e2e.py`` — without ``-m slow`` the test module is
|
||||
deselected and this fixture is never instantiated.
|
||||
"""
|
||||
reuse = os.environ.get(_REUSE_ENV)
|
||||
if reuse:
|
||||
memory_root = Path(reuse).expanduser().resolve()
|
||||
users_dir = memory_root / "default_app" / "default_project" / "users"
|
||||
if not users_dir.is_dir():
|
||||
raise AssertionError(
|
||||
f"{_REUSE_ENV}={memory_root} has no "
|
||||
"default_app/default_project/users/ subdir — point it at a "
|
||||
"fully-ingested memory_root or unset to rebuild from scratch"
|
||||
)
|
||||
else:
|
||||
memory_root = tmp_path_factory.mktemp("search_corpus")
|
||||
|
||||
_session_monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(memory_root))
|
||||
_reset_memorize_singletons(_session_monkeypatch)
|
||||
|
||||
if reuse:
|
||||
# Search is read-only; the corpus is consumed in place, no copy.
|
||||
return memory_root
|
||||
|
||||
# Drive the ingest in its own event loop. The lifespan inside
|
||||
# ``_ingest`` properly closes LanceDB / SQLite handles on exit so
|
||||
# the per-test lifespans can re-open them.
|
||||
asyncio.run(_ingest(memory_root, long_conversation))
|
||||
return memory_root
|
||||
|
||||
|
||||
async def _ingest(memory_root: Path, long_conversation: dict) -> None:
|
||||
"""Bring up the app once, push the LoCoMo fixture through /add+/flush."""
|
||||
from everos.entrypoints.api.app import create_app
|
||||
|
||||
app = create_app()
|
||||
transport = httpx.ASGITransport(app=app)
|
||||
|
||||
async with (
|
||||
app.router.lifespan_context(app),
|
||||
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
|
||||
):
|
||||
session_id = long_conversation["everos_session_id"]
|
||||
for batch in long_conversation["batches"]:
|
||||
messages = [
|
||||
{
|
||||
"sender_id": m["sender_id"],
|
||||
"role": m["role"],
|
||||
"timestamp": m["timestamp"],
|
||||
"content": m["content"],
|
||||
}
|
||||
for m in batch["messages"]
|
||||
]
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/add",
|
||||
json={"session_id": session_id, "messages": messages},
|
||||
timeout=600.0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
resp = await client.post(
|
||||
"/api/v1/memory/flush",
|
||||
json={"session_id": session_id},
|
||||
timeout=600.0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
await _poll_cascade_drained(deadline_seconds=600.0)
|
||||
|
||||
|
||||
async def _poll_cascade_drained(*, deadline_seconds: float) -> None:
|
||||
"""Block until ``md_change_state.pending == 0`` or deadline."""
|
||||
from everos.infra.persistence.sqlite import md_change_state_repo
|
||||
|
||||
async with asyncio.timeout(deadline_seconds):
|
||||
while True:
|
||||
summary = await md_change_state_repo.queue_summary()
|
||||
if summary.pending == 0:
|
||||
return
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
|
||||
# ── Per-test client against the session corpus ─────────────────────────
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def search_client(
|
||||
_ingested_memory_root: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> AsyncIterator[httpx.AsyncClient]:
|
||||
"""Per-test ``AsyncClient`` reading from the session corpus.
|
||||
|
||||
Singletons are reset before the lifespan starts so the search
|
||||
manager builds a fresh embedding / rerank / LLM client per test —
|
||||
we don't want cross-test client state to mask a regression.
|
||||
"""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(_ingested_memory_root))
|
||||
_reset_memorize_singletons(monkeypatch)
|
||||
|
||||
# The search service has its own module-level singletons; reset
|
||||
# those too so re-attach is clean.
|
||||
search_svc = importlib.import_module("everos.service.search")
|
||||
for attr in (
|
||||
"_manager",
|
||||
"_embedding",
|
||||
"_reranker",
|
||||
"_llm_client",
|
||||
"_embedding_resolved",
|
||||
"_rerank_resolved",
|
||||
"_llm_resolved",
|
||||
):
|
||||
if hasattr(search_svc, attr):
|
||||
monkeypatch.setattr(
|
||||
search_svc,
|
||||
attr,
|
||||
None if not attr.endswith("_resolved") else False,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
from everos.entrypoints.api.app import create_app
|
||||
|
||||
app = create_app()
|
||||
transport = httpx.ASGITransport(app=app)
|
||||
async with (
|
||||
app.router.lifespan_context(app),
|
||||
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
|
||||
):
|
||||
yield client
|
||||
|
||||
|
||||
# ── Diagnostic helpers (handy for tests that probe SQLite directly) ───
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memcell_count() -> Callable[[], Awaitable[int]]:
|
||||
"""Return an async callable: ``await memcell_count() -> int``."""
|
||||
|
||||
async def _count() -> int:
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
engine = get_engine()
|
||||
async with engine.connect() as conn:
|
||||
result = await conn.execute(text("SELECT COUNT(*) FROM memcell"))
|
||||
return int(result.scalar() or 0)
|
||||
|
||||
return _count
|
||||
241
tests/integration/search/test_search_e2e.py
Normal file
241
tests/integration/search/test_search_e2e.py
Normal file
@ -0,0 +1,241 @@
|
||||
"""End-to-end ``/api/v1/memory/search`` tests over a real LoCoMo corpus.
|
||||
|
||||
Six tests, each pinning one path through :class:`SearchManager`:
|
||||
|
||||
============================================ =================================
|
||||
``test_keyword_recalls_atomic_fact_origin`` keyword (BM25 only)
|
||||
``test_vector_recalls_atomic_fact_origin`` vector (cosine only)
|
||||
``test_hybrid_with_profile_returns_profile`` hybrid + ``include_profile``
|
||||
``test_partition_respects_owner_id`` cross-owner isolation
|
||||
``test_unknown_owner_returns_empty_200`` empty response, no 500
|
||||
``test_filter_dsl_compiles_and_excludes`` filters DSL → LanceDB ``where``
|
||||
============================================ =================================
|
||||
|
||||
The corpus is built once by :func:`_ingested_memory_root` (session-
|
||||
scoped fixture in ``conftest.py``) and shared across all tests. Each
|
||||
test re-attaches a fresh lifespan via :func:`search_client`, so the
|
||||
search-manager singletons rebuild from cold per-test — a regression
|
||||
in the lazy-init path can't hide behind warm state from a prior test.
|
||||
|
||||
Bootstrapping: queries are derived from the corpus's own
|
||||
``atomic_facts`` md files via :func:`pick_query_seeds`, not
|
||||
hardcoded. Closed-loop correctness — what the pipeline extracted
|
||||
should be findable by the search side.
|
||||
|
||||
Assertions follow the project's "守恒 + 下界 + 形状" convention
|
||||
(see :func:`_helpers.assert_recall`): no exact ranks, no exact
|
||||
scores, no exact ids. LLM-driven retrieval is non-deterministic
|
||||
across runs; brittle assertions cause CI noise, not signal.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from ._helpers import (
|
||||
assert_recall,
|
||||
flatten_hits,
|
||||
pick_query_seeds,
|
||||
)
|
||||
|
||||
# Whole module is opt-in — it depends on ``_ingested_memory_root`` which
|
||||
# spends ~10 min running real LLM + embedder against LoCoMo conv_0.
|
||||
pytestmark = pytest.mark.slow
|
||||
|
||||
|
||||
# ── 1. Keyword recall ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_keyword_recalls_atomic_fact_origin(
|
||||
search_client: httpx.AsyncClient,
|
||||
_ingested_memory_root: Path,
|
||||
) -> None:
|
||||
"""BM25 must recall *some* episode for *some* fact-derived bigram.
|
||||
|
||||
The project's tokenizer is jieba (CJK-first); single short
|
||||
English tokens and proper nouns / all-caps acronyms recall
|
||||
poorly, but ordinary lowercase content bigrams recall reliably
|
||||
(verified empirically). So we walk through the first N atomic
|
||||
facts, pull consecutive lowercase content tokens, and pass the
|
||||
test as soon as one candidate bigram returns ≥ 1 hit. This
|
||||
validates the BM25 plumbing without coupling to which specific
|
||||
fact got sampled — vector + hybrid tests own the strict
|
||||
closed-loop recall claim.
|
||||
"""
|
||||
seeds = pick_query_seeds(_ingested_memory_root, limit=20)
|
||||
last_query: str | None = None
|
||||
for owner, fact in seeds:
|
||||
for query in _candidate_bigrams(fact):
|
||||
last_query = query
|
||||
resp = await search_client.post(
|
||||
"/api/v1/memory/search",
|
||||
json={
|
||||
"user_id": owner,
|
||||
"query": query,
|
||||
"method": "keyword",
|
||||
"top_k": 5,
|
||||
},
|
||||
timeout=60.0,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
hits = flatten_hits(resp.json()["data"])
|
||||
if hits:
|
||||
# Partition still holds even on a successful keyword hit.
|
||||
for hit_owner, _s, _t in hits:
|
||||
if hit_owner is not None:
|
||||
assert hit_owner == owner
|
||||
return
|
||||
raise AssertionError(
|
||||
f"BM25 returned 0 hits across {len(seeds)} fact seeds; "
|
||||
f"last tried query={last_query!r}"
|
||||
)
|
||||
|
||||
|
||||
def _candidate_bigrams(fact: str) -> list[str]:
|
||||
"""Lowercase consecutive content-token bigrams from ``fact``.
|
||||
|
||||
Skip tokens that include uppercase letters in the original text
|
||||
(proper nouns / acronyms — empirically poor BM25 recall under
|
||||
jieba). Returns at most 5 candidates per fact, in source order.
|
||||
"""
|
||||
import re as _re
|
||||
|
||||
out: list[str] = []
|
||||
tokens: list[str] = []
|
||||
for raw in _re.findall(r"\w+", fact):
|
||||
if raw.lower() == raw and len(raw) >= 3:
|
||||
tokens.append(raw)
|
||||
for i in range(len(tokens) - 1):
|
||||
out.append(f"{tokens[i]} {tokens[i + 1]}")
|
||||
if len(out) >= 5:
|
||||
break
|
||||
return out
|
||||
|
||||
|
||||
# ── 2. Vector recall ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_vector_recalls_atomic_fact_origin(
|
||||
search_client: httpx.AsyncClient,
|
||||
_ingested_memory_root: Path,
|
||||
) -> None:
|
||||
"""Same fact via cosine ANN — independent of BM25 tokenisation."""
|
||||
owner, fact = pick_query_seeds(_ingested_memory_root, limit=1)[0]
|
||||
await assert_recall(
|
||||
search_client,
|
||||
owner_id=owner,
|
||||
query=fact,
|
||||
method="vector",
|
||||
# Cosine: identical text would score ~1.0; threshold loose
|
||||
# because the LLM-summarised episode text isn't the verbatim fact.
|
||||
min_score=0.1,
|
||||
)
|
||||
|
||||
|
||||
# ── 3. Hybrid + include_profile ────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_hybrid_with_profile_returns_profile(
|
||||
search_client: httpx.AsyncClient,
|
||||
_ingested_memory_root: Path,
|
||||
) -> None:
|
||||
"""``include_profile=true`` must populate the profiles array."""
|
||||
owner, fact = pick_query_seeds(_ingested_memory_root, limit=1)[0]
|
||||
resp = await search_client.post(
|
||||
"/api/v1/memory/search",
|
||||
json={
|
||||
"user_id": owner,
|
||||
"query": fact,
|
||||
"method": "hybrid",
|
||||
"top_k": 5,
|
||||
"include_profile": True,
|
||||
},
|
||||
timeout=120.0,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
data = resp.json()["data"]
|
||||
assert data["profiles"], "include_profile=true but profiles[] empty"
|
||||
assert data["profiles"][0]["user_id"] == owner
|
||||
|
||||
|
||||
# ── 4. Owner partition ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_partition_respects_owner_id(
|
||||
search_client: httpx.AsyncClient,
|
||||
_ingested_memory_root: Path,
|
||||
) -> None:
|
||||
"""Querying owner=A must not leak owner=B's data, even on shared topics."""
|
||||
seeds = pick_query_seeds(_ingested_memory_root, limit=2)
|
||||
owners = {o for o, _ in seeds}
|
||||
assert len(owners) >= 1, "need at least one owner in the corpus"
|
||||
target_owner = next(iter(owners))
|
||||
_, fact = next((o, f) for o, f in seeds if o == target_owner)
|
||||
|
||||
body = await assert_recall(
|
||||
search_client,
|
||||
owner_id=target_owner,
|
||||
query=fact,
|
||||
method="hybrid",
|
||||
)
|
||||
# Agent tracks must be empty for user owners.
|
||||
assert body["data"]["agent_cases"] == []
|
||||
assert body["data"]["agent_skills"] == []
|
||||
|
||||
|
||||
# ── 5. Unknown owner ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_unknown_owner_returns_empty_200(
|
||||
search_client: httpx.AsyncClient,
|
||||
) -> None:
|
||||
"""An owner that the corpus never saw → 200 with four empty arrays."""
|
||||
resp = await search_client.post(
|
||||
"/api/v1/memory/search",
|
||||
json={
|
||||
"user_id": "ghost_user_does_not_exist",
|
||||
"query": "anything",
|
||||
"method": "hybrid",
|
||||
"top_k": 5,
|
||||
},
|
||||
timeout=60.0,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
data = resp.json()["data"]
|
||||
assert data["episodes"] == []
|
||||
assert data["profiles"] == []
|
||||
assert data["agent_cases"] == []
|
||||
assert data["agent_skills"] == []
|
||||
|
||||
|
||||
# ── 6. Filter DSL ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_filter_dsl_compiles_and_excludes(
|
||||
search_client: httpx.AsyncClient,
|
||||
_ingested_memory_root: Path,
|
||||
) -> None:
|
||||
"""Add a ``session_id`` ne-filter, verify the returned hits respect it."""
|
||||
owner, fact = pick_query_seeds(_ingested_memory_root, limit=1)[0]
|
||||
bogus_session = "session_that_never_was"
|
||||
resp = await search_client.post(
|
||||
"/api/v1/memory/search",
|
||||
json={
|
||||
"user_id": owner,
|
||||
"query": fact,
|
||||
"method": "keyword",
|
||||
"top_k": 10,
|
||||
"filters": {"session_id": {"ne": bogus_session}},
|
||||
},
|
||||
timeout=120.0,
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
data = resp.json()["data"]
|
||||
# The filter is satisfied by every real episode (none have the
|
||||
# bogus id), so the hit count should be ≥ 1 — the filter
|
||||
# compiled and shipped to LanceDB without breaking recall.
|
||||
for ep in data["episodes"]:
|
||||
assert ep["session_id"] != bogus_session
|
||||
316
tests/integration/test_cascade_all_kinds_consistency.py
Normal file
316
tests/integration/test_cascade_all_kinds_consistency.py
Normal file
@ -0,0 +1,316 @@
|
||||
"""Strict md <-> lancedb consistency across all 4 daily-log kinds.
|
||||
|
||||
For each registered daily-log kind, seed N entries via the kind's
|
||||
writer, wait for the cascade to drain, then assert exact equality
|
||||
between md state and LanceDB state:
|
||||
|
||||
* ``frontmatter.entry_count == N``
|
||||
* number of ``<!-- entry:... -->`` blocks == N
|
||||
* ``lance_repo.count_rows(md_path=...) == N``
|
||||
* lance ``entry_id`` set == md ``entry_id`` set
|
||||
|
||||
This is the strict counterpart to the loose ``>=`` assertions in
|
||||
:mod:`test_add_flush_user_pipeline_e2e` (which can't be exact because
|
||||
LLM output is non-deterministic).
|
||||
|
||||
Skill / profile are single-file (not daily-log) kinds and are covered
|
||||
by the e2e pipeline tests where the OME drives real LLM emissions.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import datetime as _dt
|
||||
from collections.abc import AsyncIterator, Callable, Mapping
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.component.embedding import EmbeddingProvider
|
||||
from everos.component.tokenizer import build_tokenizer
|
||||
from everos.core.persistence import MarkdownReader, MemoryRoot
|
||||
from everos.infra.persistence.lancedb import (
|
||||
agent_case_repo,
|
||||
atomic_fact_repo,
|
||||
dispose_connection,
|
||||
ensure_business_indexes,
|
||||
episode_repo,
|
||||
foresight_repo,
|
||||
)
|
||||
from everos.infra.persistence.lancedb.lancedb_manager import get_table
|
||||
from everos.infra.persistence.lancedb.tables.agent_case import AgentCase
|
||||
from everos.infra.persistence.lancedb.tables.atomic_fact import AtomicFact
|
||||
from everos.infra.persistence.lancedb.tables.episode import Episode
|
||||
from everos.infra.persistence.lancedb.tables.foresight import Foresight
|
||||
from everos.infra.persistence.markdown import (
|
||||
AgentCaseWriter,
|
||||
AtomicFactWriter,
|
||||
EpisodeWriter,
|
||||
ForesightWriter,
|
||||
)
|
||||
from everos.infra.persistence.sqlite import (
|
||||
dispose_engine,
|
||||
get_engine,
|
||||
md_change_state_repo,
|
||||
)
|
||||
from everos.memory.cascade import CascadeConfig, CascadeOrchestrator
|
||||
from everos.memory.cascade.registry import KIND_REGISTRY
|
||||
from tests._consistency_assertions import _daily_log_sha_for_entry
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_lancedb_write_locks() -> None:
|
||||
"""ClassVar lock pool reset; see test_repository.py for rationale."""
|
||||
from everos.core.persistence.lancedb.repository import LanceRepoBase
|
||||
|
||||
LanceRepoBase._reset_locks_for_tests()
|
||||
|
||||
|
||||
class _StubEmbedder(EmbeddingProvider):
|
||||
dim = 1024
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
return [0.0] * self.dim
|
||||
|
||||
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
|
||||
return [[0.0] * self.dim for _ in texts]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def cascade_runtime(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> AsyncIterator[MemoryRoot]:
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
engine = get_engine()
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
await ensure_business_indexes()
|
||||
yield MemoryRoot.default()
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class _DailyLogKindCase:
|
||||
"""A single registered daily-log kind, packaged for parametrization."""
|
||||
|
||||
name: str
|
||||
scope: str # "users" | "agents"
|
||||
dir_name: str
|
||||
file_prefix: str
|
||||
writer_factory: Callable[[MemoryRoot], Any]
|
||||
repo: Any
|
||||
table_cls: type
|
||||
build_item: Callable[[str, int], tuple[Mapping[str, object], Mapping[str, str]]]
|
||||
|
||||
|
||||
def _af_item(scope_id: str, j: int):
|
||||
return (
|
||||
{
|
||||
"owner_id": scope_id,
|
||||
"session_id": f"s_{j}",
|
||||
"timestamp": "2026-05-19T07:04:26+00:00",
|
||||
"parent_id": f"mc_{j}",
|
||||
"sender_ids": [scope_id],
|
||||
},
|
||||
{"Fact": f"af fact body {j}"},
|
||||
)
|
||||
|
||||
|
||||
def _ep_item(scope_id: str, j: int):
|
||||
return (
|
||||
{
|
||||
"owner_id": scope_id,
|
||||
"session_id": f"s_{j}",
|
||||
"timestamp": "2026-05-19T07:04:26+00:00",
|
||||
"parent_id": f"mc_{j}",
|
||||
"sender_ids": [scope_id],
|
||||
},
|
||||
{"Subject": f"subj {j}", "Summary": f"sum {j}", "Content": f"content {j}"},
|
||||
)
|
||||
|
||||
|
||||
def _fs_item(scope_id: str, j: int):
|
||||
return (
|
||||
{
|
||||
"owner_id": scope_id,
|
||||
"session_id": f"s_{j}",
|
||||
"timestamp": "2026-05-19T07:04:26+00:00",
|
||||
"parent_id": f"mc_{j}",
|
||||
"sender_ids": [scope_id],
|
||||
},
|
||||
{"Foresight": f"foresight body {j}"},
|
||||
)
|
||||
|
||||
|
||||
def _ac_item(scope_id: str, j: int):
|
||||
return (
|
||||
{
|
||||
"owner_id": scope_id,
|
||||
"session_id": f"s_{j}",
|
||||
"timestamp": "2026-05-19T07:04:26+00:00",
|
||||
"parent_id": f"mc_{j}",
|
||||
"quality_score": 0.9,
|
||||
},
|
||||
{
|
||||
"TaskIntent": f"task intent {j}",
|
||||
"Approach": f"approach {j}",
|
||||
"KeyInsight": f"insight {j}",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
_KIND_CASES: list[_DailyLogKindCase] = [
|
||||
_DailyLogKindCase(
|
||||
name="atomic_fact",
|
||||
scope="users",
|
||||
dir_name=".atomic_facts",
|
||||
file_prefix="atomic_fact",
|
||||
writer_factory=AtomicFactWriter,
|
||||
repo=atomic_fact_repo,
|
||||
table_cls=AtomicFact,
|
||||
build_item=_af_item,
|
||||
),
|
||||
_DailyLogKindCase(
|
||||
name="episode",
|
||||
scope="users",
|
||||
dir_name="episodes",
|
||||
file_prefix="episode",
|
||||
writer_factory=EpisodeWriter,
|
||||
repo=episode_repo,
|
||||
table_cls=Episode,
|
||||
build_item=_ep_item,
|
||||
),
|
||||
_DailyLogKindCase(
|
||||
name="foresight",
|
||||
scope="users",
|
||||
dir_name=".foresights",
|
||||
file_prefix="foresight",
|
||||
writer_factory=ForesightWriter,
|
||||
repo=foresight_repo,
|
||||
table_cls=Foresight,
|
||||
build_item=_fs_item,
|
||||
),
|
||||
_DailyLogKindCase(
|
||||
name="agent_case",
|
||||
scope="agents",
|
||||
dir_name=".cases",
|
||||
file_prefix="agent_case",
|
||||
writer_factory=AgentCaseWriter,
|
||||
repo=agent_case_repo,
|
||||
table_cls=AgentCase,
|
||||
build_item=_ac_item,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
async def _wait_path_done(md_path: str, *, deadline: float = 15.0) -> None:
|
||||
async with asyncio.timeout(deadline):
|
||||
while True: # noqa: ASYNC110 - polling cascade state
|
||||
row = await md_change_state_repo.get_by_id(md_path)
|
||||
if row is not None:
|
||||
break
|
||||
await asyncio.sleep(0.05)
|
||||
while True: # noqa: ASYNC110 - polling cascade state
|
||||
row = await md_change_state_repo.get_by_id(md_path)
|
||||
if row is not None and row.status in ("done", "failed"):
|
||||
break
|
||||
await asyncio.sleep(0.05)
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", _KIND_CASES, ids=lambda c: c.name)
|
||||
async def test_md_lance_strict_consistency_per_kind(
|
||||
cascade_runtime: MemoryRoot,
|
||||
case: _DailyLogKindCase,
|
||||
) -> None:
|
||||
"""Per-kind strict equality: md entries / frontmatter / lance rows all == N."""
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = CascadeOrchestrator(
|
||||
memory_root=memory_root,
|
||||
embedder=_StubEmbedder(),
|
||||
tokenizer=build_tokenizer(),
|
||||
config=CascadeConfig(
|
||||
scan_interval_seconds=60.0,
|
||||
worker_batch_size=20,
|
||||
worker_max_retry=1,
|
||||
worker_poll_interval_seconds=0.05,
|
||||
worker_retry_backoff_seconds=0.0,
|
||||
),
|
||||
)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = case.writer_factory(root=memory_root)
|
||||
scope_id = f"sid_{case.name}"
|
||||
bucket = _dt.date(2026, 5, 19)
|
||||
n = 5
|
||||
items = [case.build_item(scope_id, j) for j in range(n)]
|
||||
eids = await writer.append_entries(scope_id, items, date=bucket)
|
||||
assert len(eids) == n, f"writer returned {len(eids)} eids, expected {n}"
|
||||
|
||||
md_path = (
|
||||
f"default_app/default_project/{case.scope}/{scope_id}/{case.dir_name}/"
|
||||
f"{case.file_prefix}-{bucket.isoformat()}.md"
|
||||
)
|
||||
absolute = memory_root.root / md_path
|
||||
await _wait_path_done(md_path)
|
||||
|
||||
# 1) frontmatter.entry_count == N
|
||||
parsed = await MarkdownReader.read(absolute)
|
||||
assert parsed.frontmatter.get("entry_count") == n, (
|
||||
f"{case.name}: frontmatter.entry_count="
|
||||
f"{parsed.frontmatter.get('entry_count')}, expected {n}"
|
||||
)
|
||||
|
||||
# 2) md entry blocks == N
|
||||
assert len(parsed.entries) == n, (
|
||||
f"{case.name}: md has {len(parsed.entries)} entry blocks, expected {n}"
|
||||
)
|
||||
|
||||
# 3) lance count_rows(md_path) == N (strict equality)
|
||||
table = await get_table(case.table_cls.TABLE_NAME, case.table_cls)
|
||||
lance_count = await table.count_rows(filter=f"md_path = '{md_path}'")
|
||||
assert lance_count == n, (
|
||||
f"{case.name}: md={n} lance={lance_count} for {md_path}"
|
||||
)
|
||||
|
||||
# 4) lance entry_id set == md entry_id set
|
||||
lance_rows = await case.repo.find_where(f"md_path = '{md_path}'", limit=100)
|
||||
lance_eids = {r.entry_id for r in lance_rows}
|
||||
md_eids = {e.id for e in parsed.entries}
|
||||
assert lance_eids == md_eids, (
|
||||
f"{case.name}: lance eids {lance_eids} != md eids {md_eids}"
|
||||
)
|
||||
|
||||
# 4b) lance content_sha256 per entry == md-recomputed content_sha256
|
||||
# Catches "id present but content mismatched" — orthogonal to (4).
|
||||
handler_cls = next(
|
||||
spec.handler_factory for spec in KIND_REGISTRY if spec.name == case.name
|
||||
)
|
||||
md_sha_by_id = {
|
||||
e.id: _daily_log_sha_for_entry(handler_cls, e.as_structured())
|
||||
for e in parsed.entries
|
||||
}
|
||||
lance_sha_by_id = {r.entry_id: r.content_sha256 for r in lance_rows}
|
||||
assert md_sha_by_id == lance_sha_by_id, (
|
||||
f"{case.name}: per-entry content_sha256 mismatch "
|
||||
f"@ {md_path}: md={md_sha_by_id} lance={lance_sha_by_id}"
|
||||
)
|
||||
|
||||
# 5) row state row is terminally done (not failed)
|
||||
state_row = await md_change_state_repo.get_by_id(md_path)
|
||||
assert state_row is not None and state_row.status == "done", (
|
||||
f"{case.name}: state row status={state_row.status if state_row else 'NONE'}"
|
||||
)
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
196
tests/integration/test_cascade_cli_integration.py
Normal file
196
tests/integration/test_cascade_cli_integration.py
Normal file
@ -0,0 +1,196 @@
|
||||
"""Integration test for ``everos cascade`` CLI commands.
|
||||
|
||||
Drives the actual Typer commands against a real sqlite + lancedb under a
|
||||
tmp memory root. Validates the in-process orchestration that
|
||||
``test_cascade_command`` (unit) cannot reach: ``_runtime()`` context,
|
||||
queue summary formatting, fix (no-rows path), and a full
|
||||
``cascade sync <path>`` round-trip with a stub embedder.
|
||||
|
||||
The CLI commands call ``asyncio.run(_run())`` internally, so this test
|
||||
is **synchronous** — pytest-asyncio's auto mode would otherwise wrap it
|
||||
in an event loop, which collides with the CLI's own loop.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime as _dt
|
||||
import re
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from everos.component.embedding import EmbeddingProvider
|
||||
from everos.config import load_settings
|
||||
from everos.entrypoints.cli.commands import cascade as cascade_mod
|
||||
from everos.infra.persistence.lancedb import dispose_connection
|
||||
from everos.infra.persistence.sqlite import dispose_engine
|
||||
|
||||
|
||||
class _StubEmbedder(EmbeddingProvider):
|
||||
dim = 1024
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
return [0.0] * self.dim
|
||||
|
||||
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
|
||||
return [[0.0] * self.dim for _ in texts]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cli_runtime(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Iterator[Path]:
|
||||
"""Tmp memory root + clean singletons; CLI bootstraps the schema itself."""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
|
||||
load_settings.cache_clear()
|
||||
|
||||
# Strip any singleton state from a neighbouring test.
|
||||
asyncio.run(_dispose_all())
|
||||
yield tmp_path
|
||||
asyncio.run(_dispose_all())
|
||||
|
||||
|
||||
async def _dispose_all() -> None:
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
def test_status_on_empty_queue(cli_runtime: Path) -> None:
|
||||
"""``cascade status`` boots the runtime + prints zeros for a fresh DB."""
|
||||
result = CliRunner().invoke(cascade_mod.app, ["status"])
|
||||
assert result.exit_code == 0, result.stdout
|
||||
assert "queue:" in result.stdout
|
||||
assert "pending:" in result.stdout
|
||||
# Fresh DB: every counter is zero.
|
||||
assert "0" in result.stdout
|
||||
assert "lsn:" in result.stdout
|
||||
|
||||
|
||||
def test_fix_with_no_failed_rows(cli_runtime: Path) -> None:
|
||||
"""``cascade fix`` (no ``--apply``) prints the empty-state message."""
|
||||
result = CliRunner().invoke(cascade_mod.app, ["fix"])
|
||||
assert result.exit_code == 0, result.stdout
|
||||
assert "no failed rows" in result.stdout
|
||||
|
||||
|
||||
def test_fix_apply_with_no_failed_rows(cli_runtime: Path) -> None:
|
||||
"""``cascade fix --apply`` is a noop when there's nothing to fix."""
|
||||
result = CliRunner().invoke(cascade_mod.app, ["fix", "--apply"])
|
||||
assert result.exit_code == 0, result.stdout
|
||||
assert "no failed rows" in result.stdout
|
||||
|
||||
|
||||
def test_sync_on_empty_queue_with_stub_embedder(
|
||||
cli_runtime: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""``cascade sync`` invokes orchestrator.drain even on empty queue."""
|
||||
# CLI builds the embedder via build_embedding_provider() which would
|
||||
# try to connect; replace the orchestrator builder with one wired to
|
||||
# the stub embedder.
|
||||
from everos.component.tokenizer import build_tokenizer
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.memory.cascade import CascadeOrchestrator
|
||||
|
||||
def fake_build_orchestrator() -> CascadeOrchestrator:
|
||||
root = MemoryRoot.default()
|
||||
root.ensure()
|
||||
return CascadeOrchestrator(
|
||||
memory_root=root,
|
||||
embedder=_StubEmbedder(),
|
||||
tokenizer=build_tokenizer(),
|
||||
)
|
||||
|
||||
monkeypatch.setattr(cascade_mod, "_build_orchestrator", fake_build_orchestrator)
|
||||
|
||||
result = CliRunner().invoke(cascade_mod.app, ["sync"])
|
||||
assert result.exit_code == 0, result.stdout
|
||||
assert "sync complete" in result.stdout
|
||||
assert "processed 0 row(s)" in result.stdout
|
||||
|
||||
|
||||
def test_sync_with_path_outside_root_errors(
|
||||
cli_runtime: Path, tmp_path_factory: pytest.TempPathFactory
|
||||
) -> None:
|
||||
"""``cascade sync <path>`` rejects paths outside the memory root."""
|
||||
other = tmp_path_factory.mktemp("other") / "x.md"
|
||||
other.write_text("# unrelated\n")
|
||||
result = CliRunner().invoke(cascade_mod.app, ["sync", str(other)])
|
||||
assert result.exit_code != 0
|
||||
# Typer.BadParameter surfaces in stderr / mixed output. The rich
|
||||
# error box wraps the message at terminal width and pads each line
|
||||
# with ``│`` (U+2502 box-drawing); so ``not under`` and
|
||||
# ``memory root`` end up separated by spaces *plus* box characters
|
||||
# *plus* a newline. ``\s`` doesn't match ``│``, so widen to
|
||||
# ``[^\w]+`` (anything that isn't an alnum / underscore) — that
|
||||
# tolerates the rich frame without falsely matching real text
|
||||
# between the two tokens.
|
||||
output = result.stdout + (result.stderr or "")
|
||||
assert re.search(r"not under[^\w]+memory root", output), output
|
||||
|
||||
|
||||
def test_sync_with_unmatched_path(
|
||||
cli_runtime: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""A path under the root but matching no cascade kind exits 1 with a hint."""
|
||||
from everos.component.tokenizer import build_tokenizer
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.memory.cascade import CascadeOrchestrator
|
||||
|
||||
def fake_build_orchestrator() -> CascadeOrchestrator:
|
||||
return CascadeOrchestrator(
|
||||
memory_root=MemoryRoot.default(),
|
||||
embedder=_StubEmbedder(),
|
||||
tokenizer=build_tokenizer(),
|
||||
)
|
||||
|
||||
monkeypatch.setattr(cascade_mod, "_build_orchestrator", fake_build_orchestrator)
|
||||
|
||||
# File under the root but in an unregistered subdirectory.
|
||||
unregistered = cli_runtime / "stuff" / "random.md"
|
||||
unregistered.parent.mkdir(parents=True, exist_ok=True)
|
||||
unregistered.write_text("# random\n")
|
||||
result = CliRunner().invoke(cascade_mod.app, ["sync", str(unregistered)])
|
||||
assert result.exit_code == 1
|
||||
# stderr in CliRunner is merged into stdout for typer.echo(..., err=True).
|
||||
output = result.stdout + (result.stderr or "")
|
||||
assert "does not match any registered cascade kind" in output
|
||||
|
||||
|
||||
# Keep a baseline so future regressions show as a hard failure.
|
||||
def test_status_handles_pending_rows(cli_runtime: Path) -> None:
|
||||
"""Seed one pending row via the repo before invoking status."""
|
||||
|
||||
async def seed() -> None:
|
||||
# Bring the runtime up like the CLI does, seed, then dispose.
|
||||
async with cascade_mod._runtime():
|
||||
from everos.infra.persistence.sqlite import md_change_state_repo
|
||||
|
||||
await md_change_state_repo.force_enqueue(
|
||||
"users/u1/episodes/episode-2026-01-01.md", "episode"
|
||||
)
|
||||
|
||||
asyncio.run(seed())
|
||||
|
||||
result = CliRunner().invoke(cascade_mod.app, ["status"])
|
||||
assert result.exit_code == 0, result.stdout
|
||||
# One row pending; LSN must be ≥ 1.
|
||||
assert "pending: 1" in result.stdout
|
||||
|
||||
|
||||
# Reduce false negatives on date drift.
|
||||
def test_resolve_relative_via_command_arg(cli_runtime: Path) -> None:
|
||||
"""An absolute path under the root works through ``cascade sync <path>``."""
|
||||
md_file = cli_runtime / "users" / "u1" / "episodes" / "episode-2026-05-25.md"
|
||||
md_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
today = _dt.date.today().isoformat() # only used so the var isn't unused
|
||||
md_file.write_text(f"# {today}\n")
|
||||
|
||||
# We don't need the orchestrator to actually drain anything; pass --help
|
||||
# against the sync subcommand to verify the path resolution helper
|
||||
# doesn't barf at construction time.
|
||||
result = CliRunner().invoke(cascade_mod.app, ["sync", "--help"])
|
||||
assert result.exit_code == 0
|
||||
193
tests/integration/test_cascade_fsevents_repro.py
Normal file
193
tests/integration/test_cascade_fsevents_repro.py
Normal file
@ -0,0 +1,193 @@
|
||||
"""Repro: high-frequency atomic-replace bursts vs. cascade drain.
|
||||
|
||||
Drives N successive ``AtomicFactWriter.append_entries`` calls against the
|
||||
same daily-log md, simulating multiple OME memcells landing in the same
|
||||
owner+day bucket within a few ms of each other.
|
||||
|
||||
Before the watcher.on_deleted stat-guard, macOS FSEvents emits a paired
|
||||
(moved, deleted) per ``os.replace`` and the synthetic deletion can
|
||||
become the final ``change_type`` of the row — driving the worker into
|
||||
``handle_deleted`` and wiping LanceDB while md is intact. Repeat the
|
||||
test ~20x to surface the race if it ever resurfaces.
|
||||
|
||||
Scanner interval is held at 60s so the watcher path is the only thing
|
||||
exercised (a scanner sweep would mask a watcher bug).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime as _dt
|
||||
from collections.abc import AsyncIterator
|
||||
from pathlib import Path
|
||||
|
||||
import anyio
|
||||
import pytest
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.component.embedding import EmbeddingProvider
|
||||
from everos.component.tokenizer import build_tokenizer
|
||||
from everos.core.persistence import MarkdownReader, MemoryRoot
|
||||
from everos.infra.persistence.lancedb import (
|
||||
dispose_connection,
|
||||
ensure_business_indexes,
|
||||
)
|
||||
from everos.infra.persistence.lancedb.lancedb_manager import get_table
|
||||
from everos.infra.persistence.lancedb.tables.atomic_fact import AtomicFact
|
||||
from everos.infra.persistence.markdown import AtomicFactWriter
|
||||
from everos.infra.persistence.sqlite import (
|
||||
dispose_engine,
|
||||
get_engine,
|
||||
md_change_state_repo,
|
||||
)
|
||||
from everos.memory.cascade import CascadeConfig, CascadeOrchestrator
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_lancedb_write_locks() -> None:
|
||||
"""Drop the per-table write-lock pool between tests; mirrors the
|
||||
unit-test fixture in test_repository.py. Without this, the second
|
||||
test in this module hits "Lock bound to a different event loop"
|
||||
because LanceRepoBase stashes locks in a ClassVar dict."""
|
||||
from everos.core.persistence.lancedb.repository import LanceRepoBase
|
||||
|
||||
LanceRepoBase._reset_locks_for_tests()
|
||||
|
||||
|
||||
class _StubEmbedder(EmbeddingProvider):
|
||||
dim = 1024
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
return [0.0] * self.dim
|
||||
|
||||
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
|
||||
return [[0.0] * self.dim for _ in texts]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def cascade_runtime(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> AsyncIterator[MemoryRoot]:
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
|
||||
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
|
||||
engine = get_engine()
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
await ensure_business_indexes()
|
||||
|
||||
yield MemoryRoot.default()
|
||||
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
async def _wait_drain(deadline: float = 15.0) -> None:
|
||||
async with asyncio.timeout(deadline):
|
||||
while True:
|
||||
summary = await md_change_state_repo.queue_summary()
|
||||
if summary.pending == 0:
|
||||
return
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
|
||||
async def _count_lance_rows(md_path: str) -> int:
|
||||
table = await get_table(AtomicFact.TABLE_NAME, AtomicFact)
|
||||
return await table.count_rows(filter=f"md_path = '{md_path}'")
|
||||
|
||||
|
||||
async def _count_md_entries(absolute: Path) -> int:
|
||||
if not await anyio.Path(absolute).is_file():
|
||||
return 0
|
||||
parsed = await MarkdownReader.read(absolute)
|
||||
return len(parsed.entries)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"n_calls,items_per_call,inter_call_sleep_ms",
|
||||
[
|
||||
(20, 1, 0.0),
|
||||
(20, 1, 1.0),
|
||||
(20, 3, 0.0),
|
||||
(10, 3, 5.0),
|
||||
],
|
||||
)
|
||||
async def test_high_freq_atomic_fact_append_no_loss(
|
||||
cascade_runtime: MemoryRoot,
|
||||
n_calls: int,
|
||||
items_per_call: int,
|
||||
inter_call_sleep_ms: float,
|
||||
) -> None:
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = CascadeOrchestrator(
|
||||
memory_root=memory_root,
|
||||
embedder=_StubEmbedder(),
|
||||
tokenizer=build_tokenizer(),
|
||||
config=CascadeConfig(
|
||||
scan_interval_seconds=60.0,
|
||||
worker_batch_size=20,
|
||||
worker_max_retry=1,
|
||||
worker_poll_interval_seconds=0.05,
|
||||
worker_retry_backoff_seconds=0.0,
|
||||
),
|
||||
)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
bucket = _dt.date(2026, 5, 19)
|
||||
owner_id = "bob"
|
||||
total = 0
|
||||
for i in range(n_calls):
|
||||
items = [
|
||||
(
|
||||
{
|
||||
"owner_id": owner_id,
|
||||
"session_id": f"s_{i}_{j}",
|
||||
"timestamp": "2026-05-19T07:04:26+00:00",
|
||||
"parent_id": f"mc_{i}",
|
||||
"sender_ids": [owner_id],
|
||||
},
|
||||
{"Fact": f"fact body call={i} item={j}"},
|
||||
)
|
||||
for j in range(items_per_call)
|
||||
]
|
||||
await writer.append_entries(owner_id, items, date=bucket)
|
||||
total += items_per_call
|
||||
if inter_call_sleep_ms > 0:
|
||||
await asyncio.sleep(inter_call_sleep_ms / 1000.0)
|
||||
|
||||
await _wait_drain(deadline=15.0)
|
||||
# FSEvents has ~30-100ms kernel-to-userspace delivery latency,
|
||||
# so the watcher's `on_*` callbacks for the LAST few
|
||||
# os.replace() bursts may arrive AFTER sqlite first reads
|
||||
# `pending == 0`. Absorb that tail: settle 500ms, then drain
|
||||
# again until truly quiescent.
|
||||
await asyncio.sleep(0.5)
|
||||
await _wait_drain(deadline=15.0)
|
||||
|
||||
md_path = (
|
||||
f"default_app/default_project/users/{owner_id}/.atomic_facts/"
|
||||
f"atomic_fact-{bucket.isoformat()}.md"
|
||||
)
|
||||
absolute = memory_root.root / md_path
|
||||
md_entries = await _count_md_entries(absolute)
|
||||
lance_rows = await _count_lance_rows(md_path)
|
||||
state_row = await md_change_state_repo.get_by_id(md_path)
|
||||
|
||||
assert md_entries == total, (
|
||||
f"writer self-check failed: total={total} md={md_entries}"
|
||||
)
|
||||
assert lance_rows == md_entries, (
|
||||
f"CASCADE LOSS: md={md_entries} lance={lance_rows} "
|
||||
f"state={state_row.status if state_row else 'NONE'} "
|
||||
f"lsn={state_row.lsn if state_row else None}"
|
||||
)
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
242
tests/integration/test_cascade_integration.py
Normal file
242
tests/integration/test_cascade_integration.py
Normal file
@ -0,0 +1,242 @@
|
||||
"""End-to-end cascade flow.
|
||||
|
||||
Drives the full pipeline once with real components except the embedder
|
||||
(stubbed so the test never hits an external API):
|
||||
|
||||
EpisodeWriter.append_entry ─▶ md file on disk
|
||||
watchdog FSEvents thread ─▶ CascadeWatcher._enqueue_async
|
||||
md_change_state.upsert ─▶ pending row
|
||||
CascadeWorker.drain_once ─▶ EpisodeHandler.handle_added_or_modified
|
||||
episode_repo.upsert ─▶ LanceDB row
|
||||
|
||||
Asserts the row landed with the right shape (md_path, content_sha256,
|
||||
episode tokens, vector dim). Validates that the three loops actually
|
||||
talk to each other — no unit test covers the cross-loop wiring.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime as _dt
|
||||
from collections.abc import AsyncIterator
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.component.embedding import EmbeddingProvider
|
||||
from everos.component.tokenizer import build_tokenizer
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.infra.persistence.lancedb import (
|
||||
dispose_connection,
|
||||
ensure_business_indexes,
|
||||
episode_repo,
|
||||
)
|
||||
from everos.infra.persistence.markdown import EpisodeWriter
|
||||
from everos.infra.persistence.sqlite import (
|
||||
dispose_engine,
|
||||
get_engine,
|
||||
md_change_state_repo,
|
||||
)
|
||||
from everos.memory.cascade import CascadeConfig, CascadeOrchestrator
|
||||
|
||||
|
||||
class _StubEmbedder(EmbeddingProvider):
|
||||
"""1024-dim deterministic vector; counts calls for the assertion."""
|
||||
|
||||
dim = 1024
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.calls = 0
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
self.calls += 1
|
||||
return [0.0] * self.dim
|
||||
|
||||
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
|
||||
return [await self.embed(t) for t in texts]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def cascade_runtime(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> AsyncIterator[MemoryRoot]:
|
||||
"""Boot sqlite + lancedb against a tmp memory_root; dispose at teardown.
|
||||
|
||||
Cascade uses module-level singletons; we reset them up-front to
|
||||
guarantee no state leaks in from neighbouring tests, then dispose
|
||||
on the way out so the next test sees a clean slate.
|
||||
"""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
# Embedding settings are required for the lifespan factory; the
|
||||
# stub bypasses real network, but the orchestrator still expects
|
||||
# the env to be valid-looking.
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
|
||||
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
|
||||
engine = get_engine()
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
await ensure_business_indexes()
|
||||
|
||||
yield MemoryRoot.default()
|
||||
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
async def _poll(condition, *, deadline_seconds: float = 10.0, interval: float = 0.05): # type: ignore[no-untyped-def]
|
||||
"""Poll ``condition()`` (async) until truthy, or :class:`TimeoutError`.
|
||||
|
||||
Wraps the loop in :func:`asyncio.timeout` so the test surfaces a
|
||||
clean ``TimeoutError`` instead of silently spinning. The polling
|
||||
interval is a low-cost sleep; the deadline is the hard cap.
|
||||
"""
|
||||
async with asyncio.timeout(deadline_seconds):
|
||||
while True:
|
||||
result = await condition()
|
||||
if result:
|
||||
return result
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
|
||||
async def test_append_to_md_propagates_to_lancedb(
|
||||
cascade_runtime: MemoryRoot,
|
||||
) -> None:
|
||||
"""Happy path: writer append → watcher → state row → worker → LanceDB."""
|
||||
memory_root = cascade_runtime
|
||||
embedder = _StubEmbedder()
|
||||
orchestrator = CascadeOrchestrator(
|
||||
memory_root=memory_root,
|
||||
embedder=embedder,
|
||||
tokenizer=build_tokenizer(),
|
||||
# Tight worker poll so the test wraps in seconds, not minutes.
|
||||
# Scanner interval kept long so the watcher path is the one
|
||||
# actually exercised (the scanner would mask a watcher bug).
|
||||
config=CascadeConfig(
|
||||
scan_interval_seconds=60.0,
|
||||
worker_batch_size=10,
|
||||
worker_max_retry=1,
|
||||
worker_poll_interval_seconds=0.05,
|
||||
worker_retry_backoff_seconds=0.0,
|
||||
),
|
||||
)
|
||||
await orchestrator.start()
|
||||
# Give the watchdog Observer thread a beat to actually subscribe;
|
||||
# this is the watchdog API gap (start() returns before the kqueue
|
||||
# / FSEvents subscription is live on macOS).
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = EpisodeWriter(memory_root)
|
||||
today = _dt.date(2026, 5, 14)
|
||||
eid = await writer.append_entry(
|
||||
"u_integration",
|
||||
inline={
|
||||
"owner_id": "u_integration",
|
||||
"session_id": "s_int",
|
||||
"timestamp": "2026-05-14T10:00:00+00:00",
|
||||
"parent_id": "mc_integration_parent",
|
||||
"sender_ids": ["u_integration"],
|
||||
},
|
||||
sections={
|
||||
"Subject": "Test",
|
||||
"Summary": "Stub",
|
||||
"Content": "the user mentioned dark mode preference",
|
||||
},
|
||||
date=today,
|
||||
)
|
||||
md_path = (
|
||||
"default_app/default_project/users/u_integration/episodes/"
|
||||
"episode-2026-05-14.md"
|
||||
)
|
||||
|
||||
# 1. Watcher enqueues the path.
|
||||
async def _state_appeared(): # type: ignore[no-untyped-def]
|
||||
return await md_change_state_repo.get_by_id(md_path)
|
||||
|
||||
row = await _poll(_state_appeared, deadline_seconds=5.0)
|
||||
assert row.kind == "episode"
|
||||
|
||||
# 2. Worker drives it to done.
|
||||
async def _state_done(): # type: ignore[no-untyped-def]
|
||||
r = await md_change_state_repo.get_by_id(md_path)
|
||||
return r if (r is not None and r.status == "done") else None
|
||||
|
||||
done_row = await _poll(_state_done, deadline_seconds=10.0)
|
||||
assert done_row.error is None
|
||||
|
||||
# 3. LanceDB carries the typed episode row.
|
||||
episode_id = f"u_integration_{eid.format()}"
|
||||
ep_row = await episode_repo.get_by_id(episode_id)
|
||||
assert ep_row is not None
|
||||
assert ep_row.episode == "the user mentioned dark mode preference"
|
||||
assert ep_row.episode_tokens # tokenizer ran
|
||||
assert ep_row.md_path == md_path
|
||||
assert ep_row.parent_id == "mc_integration_parent"
|
||||
assert ep_row.content_sha256
|
||||
assert len(ep_row.vector) == 1024
|
||||
assert embedder.calls >= 1
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
|
||||
|
||||
async def test_delete_md_wipes_lancedb_row(
|
||||
cascade_runtime: MemoryRoot,
|
||||
) -> None:
|
||||
"""Append + drain, then ``unlink`` the md and watch the row evaporate."""
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = CascadeOrchestrator(
|
||||
memory_root=memory_root,
|
||||
embedder=_StubEmbedder(),
|
||||
tokenizer=build_tokenizer(),
|
||||
config=CascadeConfig(
|
||||
scan_interval_seconds=60.0,
|
||||
worker_batch_size=10,
|
||||
worker_max_retry=1,
|
||||
worker_poll_interval_seconds=0.05,
|
||||
worker_retry_backoff_seconds=0.0,
|
||||
),
|
||||
)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = EpisodeWriter(memory_root)
|
||||
today = _dt.date(2026, 5, 14)
|
||||
eid = await writer.append_entry(
|
||||
"u_del",
|
||||
inline={
|
||||
"owner_id": "u_del",
|
||||
"session_id": "s",
|
||||
"timestamp": "2026-05-14T10:00:00+00:00",
|
||||
"parent_id": "mc_del_parent",
|
||||
"sender_ids": ["u_del"],
|
||||
},
|
||||
sections={"Content": "to be removed"},
|
||||
date=today,
|
||||
)
|
||||
md_path = (
|
||||
"default_app/default_project/users/u_del/episodes/episode-2026-05-14.md"
|
||||
)
|
||||
absolute = memory_root.root / md_path
|
||||
|
||||
async def _ep_present(): # type: ignore[no-untyped-def]
|
||||
return await episode_repo.get_by_id(f"u_del_{eid.format()}")
|
||||
|
||||
await _poll(_ep_present, deadline_seconds=10.0)
|
||||
|
||||
# Now remove the file; the watcher's on_deleted should fire.
|
||||
absolute.unlink()
|
||||
|
||||
async def _ep_gone(): # type: ignore[no-untyped-def]
|
||||
row = await episode_repo.get_by_id(f"u_del_{eid.format()}")
|
||||
return row is None
|
||||
|
||||
assert await _poll(_ep_gone, deadline_seconds=10.0)
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
701
tests/integration/test_cascade_scenarios.py
Normal file
701
tests/integration/test_cascade_scenarios.py
Normal file
@ -0,0 +1,701 @@
|
||||
"""End-to-end cascade scenarios beyond the happy-path append.
|
||||
|
||||
Each test boots the full cascade (writer → watchdog → md_change_state →
|
||||
worker → LanceDB) against a tmp memory_root and asserts md/LanceDB
|
||||
convergence after a specific perturbation. Scanner interval is held
|
||||
at 60s here so the watcher path is the one being exercised — the
|
||||
scanner-fallback variants live in :mod:`test_cascade_scanner_fallback`.
|
||||
|
||||
Coverage targets
|
||||
----------------
|
||||
* Rename: in-bucket / out-of-glob / cross-owner ``mv`` of a real md
|
||||
file (not the atomic-replace one — that one's covered by
|
||||
:mod:`test_cascade_fsevents_repro`).
|
||||
* Content edits: re-writing an existing entry's body must flip
|
||||
``content_sha256`` and trigger LanceDB re-upsert (not skip).
|
||||
* Isolation: concurrent writes to N different owners must not bleed
|
||||
across each other's md_paths in LanceDB.
|
||||
* Lap race: ``writer.append`` calls overlapping a worker's
|
||||
in-flight handler must all converge once drained, no entries lost.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime as _dt
|
||||
import shutil
|
||||
from collections.abc import AsyncIterator
|
||||
from pathlib import Path
|
||||
|
||||
import anyio
|
||||
import pytest
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.component.embedding import EmbeddingProvider
|
||||
from everos.component.tokenizer import build_tokenizer
|
||||
from everos.core.persistence import MarkdownReader, MarkdownWriter, MemoryRoot
|
||||
from everos.infra.persistence.lancedb import (
|
||||
atomic_fact_repo,
|
||||
dispose_connection,
|
||||
ensure_business_indexes,
|
||||
)
|
||||
from everos.infra.persistence.lancedb.lancedb_manager import get_table
|
||||
from everos.infra.persistence.lancedb.tables.atomic_fact import AtomicFact
|
||||
from everos.infra.persistence.markdown import AtomicFactWriter
|
||||
from everos.infra.persistence.sqlite import (
|
||||
dispose_engine,
|
||||
get_engine,
|
||||
md_change_state_repo,
|
||||
)
|
||||
from everos.memory.cascade import CascadeConfig, CascadeOrchestrator
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_lancedb_write_locks() -> None:
|
||||
"""Drop the per-table write-lock pool between tests.
|
||||
|
||||
``LanceRepoBase`` stashes ``asyncio.Lock`` objects in a ClassVar dict
|
||||
keyed by table name; without a reset the lock outlives pytest-
|
||||
asyncio's function-scoped loop and the next test fails with "Lock
|
||||
bound to a different event loop". Mirrors the unit-test fixture in
|
||||
test_repository.py.
|
||||
"""
|
||||
from everos.core.persistence.lancedb.repository import LanceRepoBase
|
||||
|
||||
LanceRepoBase._reset_locks_for_tests()
|
||||
|
||||
|
||||
class _StubEmbedder(EmbeddingProvider):
|
||||
dim = 1024
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
return [0.0] * self.dim
|
||||
|
||||
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
|
||||
return [[0.0] * self.dim for _ in texts]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def cascade_runtime(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> AsyncIterator[MemoryRoot]:
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
|
||||
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
|
||||
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
|
||||
engine = get_engine()
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
await ensure_business_indexes()
|
||||
|
||||
yield MemoryRoot.default()
|
||||
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
def _build_orchestrator(
|
||||
memory_root: MemoryRoot, *, scan_interval: float = 60.0
|
||||
) -> CascadeOrchestrator:
|
||||
return CascadeOrchestrator(
|
||||
memory_root=memory_root,
|
||||
embedder=_StubEmbedder(),
|
||||
tokenizer=build_tokenizer(),
|
||||
config=CascadeConfig(
|
||||
scan_interval_seconds=scan_interval,
|
||||
worker_batch_size=20,
|
||||
worker_max_retry=1,
|
||||
worker_poll_interval_seconds=0.05,
|
||||
worker_retry_backoff_seconds=0.0,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def _wait_path_done(md_path: str, *, deadline: float = 15.0) -> None:
|
||||
"""Wait until ``md_path`` lands in state AND reaches ``status='done'``.
|
||||
|
||||
Bare ``_wait_drain`` returns immediately when the queue is empty,
|
||||
which is exactly the case right after a single ``append_entries``
|
||||
fires once but the watcher hasn't yet enqueued anything. This helper
|
||||
polls for the row first (i.e. watcher has noticed), then waits for
|
||||
terminal state, then re-checks after a short settle to absorb any
|
||||
last-second re-enqueue (e.g. atomic-replace echo).
|
||||
"""
|
||||
async with asyncio.timeout(deadline):
|
||||
while True: # noqa: ASYNC110 - polling cascade state
|
||||
row = await md_change_state_repo.get_by_id(md_path)
|
||||
if row is not None:
|
||||
break
|
||||
await asyncio.sleep(0.05)
|
||||
while True: # noqa: ASYNC110 - polling cascade state
|
||||
row = await md_change_state_repo.get_by_id(md_path)
|
||||
if row is not None and row.status in ("done", "failed"):
|
||||
break
|
||||
await asyncio.sleep(0.05)
|
||||
await asyncio.sleep(0.1)
|
||||
row = await md_change_state_repo.get_by_id(md_path)
|
||||
assert row is not None and row.status in ("done", "failed"), (
|
||||
f"path {md_path} flipped back to {row.status if row else 'NONE'} "
|
||||
f"after reaching done"
|
||||
)
|
||||
|
||||
|
||||
async def _wait_paths_done(*md_paths: str, deadline: float = 15.0) -> None:
|
||||
await asyncio.gather(*[_wait_path_done(p, deadline=deadline) for p in md_paths])
|
||||
|
||||
|
||||
async def _wait_drain(deadline: float = 15.0) -> None:
|
||||
"""Wait for the *whole* queue to settle. Use only when you've already
|
||||
confirmed at least one path is in flight (via _wait_path_done first)."""
|
||||
async with asyncio.timeout(deadline):
|
||||
while True:
|
||||
summary = await md_change_state_repo.queue_summary()
|
||||
if summary.pending == 0:
|
||||
return
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
|
||||
async def _count_lance_rows_md(md_path: str) -> int:
|
||||
table = await get_table(AtomicFact.TABLE_NAME, AtomicFact)
|
||||
return await table.count_rows(filter=f"md_path = '{md_path}'")
|
||||
|
||||
|
||||
async def _count_md_entries(absolute: Path) -> int:
|
||||
if not await anyio.Path(absolute).is_file():
|
||||
return 0
|
||||
parsed = await MarkdownReader.read(absolute)
|
||||
return len(parsed.entries)
|
||||
|
||||
|
||||
def _atomic_fact_md_path(owner_id: str, bucket: _dt.date) -> str:
|
||||
return (
|
||||
f"default_app/default_project/users/{owner_id}/.atomic_facts/"
|
||||
f"atomic_fact-{bucket.isoformat()}.md"
|
||||
)
|
||||
|
||||
|
||||
async def _seed_atomic_facts(
|
||||
writer: AtomicFactWriter,
|
||||
*,
|
||||
owner_id: str,
|
||||
bucket: _dt.date,
|
||||
n_items: int,
|
||||
text_prefix: str = "seed fact",
|
||||
) -> None:
|
||||
items = [
|
||||
(
|
||||
{
|
||||
"owner_id": owner_id,
|
||||
"session_id": f"s_{j}",
|
||||
"timestamp": "2026-05-19T07:04:26+00:00",
|
||||
"parent_id": f"mc_{j}",
|
||||
"sender_ids": [owner_id],
|
||||
},
|
||||
{"Fact": f"{text_prefix} {j}"},
|
||||
)
|
||||
for j in range(n_items)
|
||||
]
|
||||
await writer.append_entries(owner_id, items, date=bucket)
|
||||
|
||||
|
||||
# ===== A. Rename scenarios =====
|
||||
|
||||
|
||||
async def test_rename_same_owner_kind_in_bucket(
|
||||
cascade_runtime: MemoryRoot,
|
||||
) -> None:
|
||||
"""``mv atomic_fact-D1.md atomic_fact-D2.md`` inside the same owner+kind.
|
||||
|
||||
Both paths match the kind glob. Expected: src lancedb rows cleared,
|
||||
dest md_path becomes the new home for the (entry_id, content) pairs.
|
||||
"""
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = _build_orchestrator(memory_root)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
owner_id = "u_rename_a"
|
||||
bucket_src = _dt.date(2026, 5, 18)
|
||||
bucket_dest = _dt.date(2026, 5, 20)
|
||||
await _seed_atomic_facts(
|
||||
writer, owner_id=owner_id, bucket=bucket_src, n_items=5
|
||||
)
|
||||
src_md_path = _atomic_fact_md_path(owner_id, bucket_src)
|
||||
dest_md_path = _atomic_fact_md_path(owner_id, bucket_dest)
|
||||
src_absolute = memory_root.root / src_md_path
|
||||
dest_absolute = memory_root.root / dest_md_path
|
||||
|
||||
await _wait_path_done(src_md_path)
|
||||
|
||||
# Sanity: cascade has indexed the seed.
|
||||
assert await _count_lance_rows_md(src_md_path) == 5
|
||||
assert await _count_lance_rows_md(dest_md_path) == 0
|
||||
|
||||
# Real rename — no tmp/atomic-replace involvement.
|
||||
await anyio.to_thread.run_sync(
|
||||
shutil.move, str(src_absolute), str(dest_absolute)
|
||||
)
|
||||
await _wait_paths_done(src_md_path, dest_md_path)
|
||||
|
||||
assert await _count_lance_rows_md(src_md_path) == 0, "src not cleared"
|
||||
assert await _count_lance_rows_md(dest_md_path) == 5, "dest not reindexed"
|
||||
|
||||
# md_change_state should reflect both sides finally settled.
|
||||
src_row = await md_change_state_repo.get_by_id(src_md_path)
|
||||
dest_row = await md_change_state_repo.get_by_id(dest_md_path)
|
||||
assert src_row is not None and src_row.status == "done"
|
||||
assert dest_row is not None and dest_row.status == "done"
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
|
||||
|
||||
async def test_rename_out_of_kind_glob_degrades_to_delete(
|
||||
cascade_runtime: MemoryRoot,
|
||||
) -> None:
|
||||
"""``mv`` from inside the kind glob to a path outside it.
|
||||
|
||||
Expected: src lancedb cleared (treated as deletion); dest path is
|
||||
silently ignored because ``match_kind`` rejects it.
|
||||
"""
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = _build_orchestrator(memory_root)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
owner_id = "u_rename_oob"
|
||||
bucket = _dt.date(2026, 5, 18)
|
||||
await _seed_atomic_facts(writer, owner_id=owner_id, bucket=bucket, n_items=4)
|
||||
src_md_path = _atomic_fact_md_path(owner_id, bucket)
|
||||
src_absolute = memory_root.root / src_md_path
|
||||
# An obviously-out-of-glob target: hide it under a plain dir
|
||||
# that no kind spec registers.
|
||||
dest_absolute = memory_root.root / "out_of_scope" / "random.md"
|
||||
await anyio.Path(dest_absolute.parent).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
await _wait_path_done(src_md_path)
|
||||
assert await _count_lance_rows_md(src_md_path) == 4
|
||||
|
||||
await anyio.to_thread.run_sync(
|
||||
shutil.move, str(src_absolute), str(dest_absolute)
|
||||
)
|
||||
# Wait for the src deletion to settle. The dest path is outside
|
||||
# the glob so it never enters md_change_state — can't wait on it.
|
||||
# Re-poll src until row reflects the rename.
|
||||
await asyncio.sleep(0.5)
|
||||
await _wait_drain()
|
||||
|
||||
assert await _count_lance_rows_md(src_md_path) == 0
|
||||
# No row should appear for the out-of-glob target.
|
||||
src_row = await md_change_state_repo.get_by_id(src_md_path)
|
||||
assert src_row is not None and src_row.status == "done"
|
||||
# The dest path was never registered with any kind spec, so no
|
||||
# md_change_state row should exist for it.
|
||||
all_rows = await md_change_state_repo.queue_summary()
|
||||
# Spot check: pending should be 0; total rows present (done)
|
||||
# come only from the src side.
|
||||
assert all_rows.pending == 0
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
|
||||
|
||||
async def test_rename_cross_owner_keeps_frontmatter_owner(
|
||||
cascade_runtime: MemoryRoot,
|
||||
) -> None:
|
||||
"""``mv users/u_a/.atomic_facts/X.md users/u_b/.atomic_facts/X.md``.
|
||||
|
||||
Frontmatter ``user_id`` stays as ``u_a`` (rename doesn't rewrite the
|
||||
file). resolve_owner pulls owner_id from frontmatter, so dest
|
||||
LanceDB rows carry ``owner_id='u_a'`` even though md_path is under
|
||||
``users/u_b/``. This reflects current design (frontmatter is the
|
||||
truth source) — surface it as a regression anchor.
|
||||
"""
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = _build_orchestrator(memory_root)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
bucket = _dt.date(2026, 5, 18)
|
||||
owner_a = "u_a"
|
||||
owner_b = "u_b"
|
||||
await _seed_atomic_facts(writer, owner_id=owner_a, bucket=bucket, n_items=3)
|
||||
src_md_path = _atomic_fact_md_path(owner_a, bucket)
|
||||
dest_md_path = _atomic_fact_md_path(owner_b, bucket)
|
||||
src_absolute = memory_root.root / src_md_path
|
||||
dest_absolute = memory_root.root / dest_md_path
|
||||
await anyio.Path(dest_absolute.parent).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
await _wait_path_done(src_md_path)
|
||||
assert await _count_lance_rows_md(src_md_path) == 3
|
||||
|
||||
await anyio.to_thread.run_sync(
|
||||
shutil.move, str(src_absolute), str(dest_absolute)
|
||||
)
|
||||
await _wait_paths_done(src_md_path, dest_md_path)
|
||||
|
||||
assert await _count_lance_rows_md(src_md_path) == 0
|
||||
assert await _count_lance_rows_md(dest_md_path) == 3
|
||||
|
||||
# Inspect a row from dest to confirm owner_id stays as u_a
|
||||
# (current design: frontmatter wins over md_path for owner_id).
|
||||
rows = await atomic_fact_repo.find_where(
|
||||
f"md_path = '{dest_md_path}'", limit=10
|
||||
)
|
||||
assert rows, "dest md_path has no rows"
|
||||
assert all(r.owner_id == owner_a for r in rows), (
|
||||
f"expected owner_id={owner_a} from frontmatter, "
|
||||
f"got {[r.owner_id for r in rows]}"
|
||||
)
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
|
||||
|
||||
# ===== B. Write-pattern scenarios =====
|
||||
|
||||
|
||||
async def test_modify_existing_entry_content_reindexes(
|
||||
cascade_runtime: MemoryRoot,
|
||||
) -> None:
|
||||
"""Rewriting an entry's body (same entry_id, new text) must flip
|
||||
content_sha256 and trigger re-upsert (not skip)."""
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = _build_orchestrator(memory_root)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
owner_id = "u_modify"
|
||||
bucket = _dt.date(2026, 5, 18)
|
||||
await _seed_atomic_facts(
|
||||
writer,
|
||||
owner_id=owner_id,
|
||||
bucket=bucket,
|
||||
n_items=3,
|
||||
text_prefix="ORIGINAL",
|
||||
)
|
||||
md_path = _atomic_fact_md_path(owner_id, bucket)
|
||||
absolute = memory_root.root / md_path
|
||||
await _wait_path_done(md_path)
|
||||
rows_before = await atomic_fact_repo.find_where(
|
||||
f"md_path = '{md_path}'", limit=10
|
||||
)
|
||||
assert len(rows_before) == 3
|
||||
sha_before = {r.entry_id: r.content_sha256 for r in rows_before}
|
||||
fact_before = {r.entry_id: r.fact for r in rows_before}
|
||||
|
||||
# Read, replace body text, atomic-write back through writer.write()
|
||||
text = await anyio.Path(absolute).read_text(encoding="utf-8")
|
||||
new_text = text.replace("ORIGINAL", "EDITED")
|
||||
assert new_text != text
|
||||
mw = MarkdownWriter(memory_root)
|
||||
await mw.write(absolute, new_text)
|
||||
# The edit reuses md_path; row status flips back to pending then
|
||||
# to done again. Poll until content_sha256 actually changes.
|
||||
await asyncio.sleep(0.3)
|
||||
await _wait_drain()
|
||||
|
||||
rows_after = await atomic_fact_repo.find_where(
|
||||
f"md_path = '{md_path}'", limit=10
|
||||
)
|
||||
assert len(rows_after) == 3
|
||||
sha_after = {r.entry_id: r.content_sha256 for r in rows_after}
|
||||
fact_after = {r.entry_id: r.fact for r in rows_after}
|
||||
|
||||
# Every entry_id present in both, every content_sha256 changed,
|
||||
# every fact text now reflects EDITED.
|
||||
assert set(sha_after) == set(sha_before)
|
||||
for eid, sha in sha_after.items():
|
||||
assert sha != sha_before[eid], (
|
||||
f"content_sha256 did not change for {eid}: stayed {sha}"
|
||||
)
|
||||
assert "EDITED" in fact_after[eid], (
|
||||
f"fact text not updated for {eid}: {fact_after[eid]!r}"
|
||||
)
|
||||
assert "ORIGINAL" not in fact_after[eid]
|
||||
assert "ORIGINAL" in fact_before[eid]
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
|
||||
|
||||
async def test_concurrent_writes_different_owners_no_bleed(
|
||||
cascade_runtime: MemoryRoot,
|
||||
) -> None:
|
||||
"""N owners writing in parallel must converge with per-md_path
|
||||
isolation: each md_path holds exactly its owner's entries."""
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = _build_orchestrator(memory_root)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
bucket = _dt.date(2026, 5, 18)
|
||||
owners = [f"u_concur_{i}" for i in range(5)]
|
||||
per_owner = 4
|
||||
|
||||
await asyncio.gather(
|
||||
*[
|
||||
_seed_atomic_facts(
|
||||
writer,
|
||||
owner_id=oid,
|
||||
bucket=bucket,
|
||||
n_items=per_owner,
|
||||
text_prefix=f"by-{oid}",
|
||||
)
|
||||
for oid in owners
|
||||
]
|
||||
)
|
||||
md_paths = [_atomic_fact_md_path(oid, bucket) for oid in owners]
|
||||
await _wait_paths_done(*md_paths)
|
||||
|
||||
for oid in owners:
|
||||
md_path = _atomic_fact_md_path(oid, bucket)
|
||||
rows = await atomic_fact_repo.find_where(f"md_path = '{md_path}'", limit=10)
|
||||
assert len(rows) == per_owner, (
|
||||
f"{oid}: expected {per_owner} rows, got {len(rows)}"
|
||||
)
|
||||
# Every row in this md_path must belong to this owner —
|
||||
# no bleed from another concurrent owner's writes.
|
||||
assert all(r.owner_id == oid for r in rows)
|
||||
assert all(f"by-{oid}" in r.fact for r in rows)
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
|
||||
|
||||
async def test_lap_append_during_handler_no_loss(
|
||||
cascade_runtime: MemoryRoot,
|
||||
) -> None:
|
||||
"""Writer keeps appending while worker is mid-handler.
|
||||
|
||||
Slow the embedder so a handler invocation overlaps later appends.
|
||||
On drain, lance_rows must equal md entries — the lap is absorbed
|
||||
by the worker's status='processing' guard + re-claim.
|
||||
"""
|
||||
memory_root = cascade_runtime
|
||||
|
||||
class _SlowEmbedder(_StubEmbedder):
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
await asyncio.sleep(0.05) # handler takes ~0.05*N entries
|
||||
return [0.0] * self.dim
|
||||
|
||||
orchestrator = CascadeOrchestrator(
|
||||
memory_root=memory_root,
|
||||
embedder=_SlowEmbedder(),
|
||||
tokenizer=build_tokenizer(),
|
||||
config=CascadeConfig(
|
||||
scan_interval_seconds=60.0,
|
||||
worker_batch_size=20,
|
||||
worker_max_retry=1,
|
||||
worker_poll_interval_seconds=0.05,
|
||||
worker_retry_backoff_seconds=0.0,
|
||||
),
|
||||
)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
owner_id = "u_lap"
|
||||
bucket = _dt.date(2026, 5, 18)
|
||||
total = 30
|
||||
for i in range(total):
|
||||
await writer.append_entries(
|
||||
owner_id,
|
||||
[
|
||||
(
|
||||
{
|
||||
"owner_id": owner_id,
|
||||
"session_id": f"s_{i}",
|
||||
"timestamp": "2026-05-19T07:04:26+00:00",
|
||||
"parent_id": f"mc_{i}",
|
||||
"sender_ids": [owner_id],
|
||||
},
|
||||
{"Fact": f"fact body {i}"},
|
||||
)
|
||||
],
|
||||
date=bucket,
|
||||
)
|
||||
# Pace just slow enough that some writes land during a
|
||||
# handler invocation (~50ms per embed), but fast enough
|
||||
# that multiple writes accumulate during one handler.
|
||||
await asyncio.sleep(0.02)
|
||||
|
||||
md_path = _atomic_fact_md_path(owner_id, bucket)
|
||||
absolute = memory_root.root / md_path
|
||||
await _wait_path_done(md_path, deadline=30.0)
|
||||
|
||||
md_entries = await _count_md_entries(absolute)
|
||||
lance_rows = await _count_lance_rows_md(md_path)
|
||||
assert md_entries == total, (
|
||||
f"writer self-check: expected {total} md entries, got {md_entries}"
|
||||
)
|
||||
assert lance_rows == md_entries, f"LAP LOSS: md={md_entries} lance={lance_rows}"
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
|
||||
|
||||
# ===== C. Scanner fallback scenarios =====
|
||||
|
||||
|
||||
def _build_orchestrator_fast_scanner(memory_root: MemoryRoot) -> CascadeOrchestrator:
|
||||
"""Same as :func:`_build_orchestrator` but with a 2s scanner so tests
|
||||
don't wait 30s for the fallback path."""
|
||||
return CascadeOrchestrator(
|
||||
memory_root=memory_root,
|
||||
embedder=_StubEmbedder(),
|
||||
tokenizer=build_tokenizer(),
|
||||
config=CascadeConfig(
|
||||
scan_interval_seconds=2.0,
|
||||
worker_batch_size=20,
|
||||
worker_max_retry=1,
|
||||
worker_poll_interval_seconds=0.05,
|
||||
worker_retry_backoff_seconds=0.0,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _silence_handler_method(monkeypatch: pytest.MonkeyPatch, name: str) -> None:
|
||||
"""Replace ``watcher._Handler.<name>`` with a no-op for the duration
|
||||
of the test. Simulates fseventsd missing that event class entirely.
|
||||
"""
|
||||
from everos.memory.cascade import watcher as watcher_module
|
||||
|
||||
monkeypatch.setattr(
|
||||
watcher_module._Handler,
|
||||
name,
|
||||
lambda self, event: None,
|
||||
)
|
||||
|
||||
|
||||
async def test_scanner_recovers_missed_delete(
|
||||
cascade_runtime: MemoryRoot,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Watcher's ``on_deleted`` is silenced → unlink no longer enqueues
|
||||
via the watcher. The scanner sweep should still notice the path
|
||||
missing on disk and enqueue a 'deleted' on its own."""
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = _build_orchestrator_fast_scanner(memory_root)
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
owner_id = "u_scan_del"
|
||||
bucket = _dt.date(2026, 5, 18)
|
||||
await _seed_atomic_facts(writer, owner_id=owner_id, bucket=bucket, n_items=3)
|
||||
md_path = _atomic_fact_md_path(owner_id, bucket)
|
||||
absolute = memory_root.root / md_path
|
||||
await _wait_path_done(md_path)
|
||||
assert await _count_lance_rows_md(md_path) == 3
|
||||
|
||||
# From here on, watcher ignores deletions.
|
||||
_silence_handler_method(monkeypatch, "on_deleted")
|
||||
|
||||
absolute.unlink()
|
||||
# Watcher won't enqueue; scanner sweeps every 2s and should
|
||||
# spot mtime/existence inconsistency, then enqueue 'deleted'.
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
async def _lance_cleared() -> bool:
|
||||
return await _count_lance_rows_md(md_path) == 0
|
||||
|
||||
async with asyncio.timeout(10.0):
|
||||
while not await _lance_cleared(): # noqa: ASYNC110 - polling cascade state
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
async with asyncio.timeout(5.0):
|
||||
while True: # noqa: ASYNC110 - polling cascade state
|
||||
row = await md_change_state_repo.get_by_id(md_path)
|
||||
if row is not None and row.status == "done":
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
assert row.change_type == "deleted"
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
|
||||
|
||||
async def test_scanner_indexes_preexisting_md(
|
||||
cascade_runtime: MemoryRoot,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""An md file written BEFORE cascade starts (or by an editor while
|
||||
cascade is offline). watchdog ignores files that exist at schedule
|
||||
time — only the scanner can pick it up. Simulate by silencing
|
||||
on_created and writing the file before orchestrator.start()."""
|
||||
memory_root = cascade_runtime
|
||||
|
||||
# Pre-seed: write the md directly to disk before any cascade is up.
|
||||
owner_id = "u_scan_pre"
|
||||
bucket = _dt.date(2026, 5, 18)
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
await _seed_atomic_facts(writer, owner_id=owner_id, bucket=bucket, n_items=2)
|
||||
md_path = _atomic_fact_md_path(owner_id, bucket)
|
||||
assert (memory_root.root / md_path).is_file()
|
||||
|
||||
# Now start cascade with the file already on disk. Belt-and-
|
||||
# suspenders: silence all watcher events so the only path to
|
||||
# discovery is the scanner.
|
||||
orchestrator = _build_orchestrator_fast_scanner(memory_root)
|
||||
for name in ("on_created", "on_modified", "on_moved", "on_deleted"):
|
||||
_silence_handler_method(monkeypatch, name)
|
||||
await orchestrator.start()
|
||||
|
||||
try:
|
||||
|
||||
async def _lance_filled() -> bool:
|
||||
return await _count_lance_rows_md(md_path) == 2
|
||||
|
||||
async with asyncio.timeout(10.0):
|
||||
while not await _lance_filled(): # noqa: ASYNC110 - polling cascade state
|
||||
await asyncio.sleep(0.1)
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
|
||||
|
||||
async def test_scanner_recovers_missed_modify(
|
||||
cascade_runtime: MemoryRoot,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""All non-deletion watcher events silenced. writer.append produces
|
||||
an atomic-replace whose events are all dropped by the watcher.
|
||||
Scanner should still notice the new file and enqueue 'added'."""
|
||||
memory_root = cascade_runtime
|
||||
orchestrator = _build_orchestrator_fast_scanner(memory_root)
|
||||
|
||||
# Silence everything BEFORE start() so the initial schedule doesn't
|
||||
# see any add/create events either.
|
||||
for name in ("on_created", "on_modified", "on_moved"):
|
||||
_silence_handler_method(monkeypatch, name)
|
||||
|
||||
await orchestrator.start()
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
writer = AtomicFactWriter(root=memory_root)
|
||||
owner_id = "u_scan_mod"
|
||||
bucket = _dt.date(2026, 5, 18)
|
||||
await _seed_atomic_facts(writer, owner_id=owner_id, bucket=bucket, n_items=3)
|
||||
md_path = _atomic_fact_md_path(owner_id, bucket)
|
||||
|
||||
async def _lance_filled() -> bool:
|
||||
return await _count_lance_rows_md(md_path) == 3
|
||||
|
||||
async with asyncio.timeout(10.0):
|
||||
while not await _lance_filled(): # noqa: ASYNC110 - polling cascade state
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
row = await md_change_state_repo.get_by_id(md_path)
|
||||
assert row is not None and row.status == "done"
|
||||
finally:
|
||||
await orchestrator.stop()
|
||||
268
tests/integration/test_memorize_agent_mode.py
Normal file
268
tests/integration/test_memorize_agent_mode.py
Normal file
@ -0,0 +1,268 @@
|
||||
"""Agent-mode memorize integration tests.
|
||||
|
||||
Covers the agent branches that ``test_memorize_integration.py`` skips:
|
||||
|
||||
- :mod:`service.memorize` agent dispatch (asyncio.gather of user + agent
|
||||
pipelines)
|
||||
- :mod:`service._boundary` agent-mode detection via
|
||||
:class:`everalgo.agent_memory.AgentBoundaryDetector`
|
||||
- :mod:`memory.extract.pipeline.agent_memory.AgentMemoryPipeline` end-to-end
|
||||
|
||||
Self-contained: the chat-baseline file keeps its fixture local, so we
|
||||
copy the minimum scaffolding rather than refactor it into a shared
|
||||
conftest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import json
|
||||
import sqlite3
|
||||
from collections.abc import AsyncIterator, Callable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from everalgo.llm.types import ChatMessage as LLMChatMessage
|
||||
from everalgo.llm.types import ChatResponse
|
||||
from everalgo.testing.fake_llm import FakeLLMClient
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.service.memorize import MemorizeResult, memorize
|
||||
|
||||
|
||||
def _boundary_response(boundaries: list[int]) -> str:
|
||||
return json.dumps(
|
||||
{"reasoning": "test", "boundaries": boundaries, "should_wait": False}
|
||||
)
|
||||
|
||||
|
||||
def _make_fake_llm(boundary_responses: list[list[int]] | None = None) -> FakeLLMClient:
|
||||
queue: list[list[int]] = list(boundary_responses or [])
|
||||
|
||||
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
|
||||
prompt = messages[0].content
|
||||
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
|
||||
cuts = queue.pop(0) if queue else []
|
||||
return ChatResponse(content=_boundary_response(cuts), model="fake")
|
||||
return ChatResponse(
|
||||
content=json.dumps({"title": "T", "content": "B"}), model="fake"
|
||||
)
|
||||
|
||||
return FakeLLMClient(handler=handler)
|
||||
|
||||
|
||||
def _msg(
|
||||
role: str,
|
||||
content: str,
|
||||
*,
|
||||
sender_id: str = "u_alice",
|
||||
timestamp: int = 1_700_000_000_000,
|
||||
tool_calls: list[dict] | None = None,
|
||||
tool_call_id: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
out: dict[str, Any] = {
|
||||
"sender_id": sender_id,
|
||||
"role": role,
|
||||
"content": content,
|
||||
"timestamp": timestamp,
|
||||
}
|
||||
if tool_calls is not None:
|
||||
out["tool_calls"] = tool_calls
|
||||
if tool_call_id is not None:
|
||||
out["tool_call_id"] = tool_call_id
|
||||
return out
|
||||
|
||||
|
||||
def _user(content: str, ts: int, *, sender: str = "u_alice") -> dict[str, Any]:
|
||||
return _msg("user", content, sender_id=sender, timestamp=ts)
|
||||
|
||||
|
||||
def _assistant(content: str, ts: int) -> dict[str, Any]:
|
||||
return _msg("assistant", content, sender_id="assistant", timestamp=ts)
|
||||
|
||||
|
||||
def _memcell_rows(tmp_path: Path) -> list[sqlite3.Row]:
|
||||
db = tmp_path / ".index" / "sqlite" / "system.db"
|
||||
if not db.is_file():
|
||||
return []
|
||||
conn = sqlite3.connect(db)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
return list(conn.execute("SELECT * FROM memcell ORDER BY timestamp"))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def memorize_env(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> AsyncIterator[Callable[..., Any]]:
|
||||
"""Same shape as the chat-baseline fixture; ``mode`` defaults to ``agent``."""
|
||||
monkeypatch.setattr(
|
||||
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
|
||||
)
|
||||
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
|
||||
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
|
||||
ac_mod = importlib.import_module("everos.memory.strategies.extract_agent_case")
|
||||
client_mod = importlib.import_module("everos.component.llm.client")
|
||||
|
||||
for attr in (
|
||||
"_episode_writer",
|
||||
"_prompt_loader",
|
||||
"_user_pipeline",
|
||||
"_agent_pipeline",
|
||||
"_ome_engine",
|
||||
):
|
||||
monkeypatch.setattr(svc, attr, None, raising=False)
|
||||
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
|
||||
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
|
||||
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
|
||||
|
||||
started: dict[str, Any] = {"engine": None}
|
||||
|
||||
async def _setup(*, mode: str = "agent", fake_llm: FakeLLMClient) -> None:
|
||||
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", mode)
|
||||
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
|
||||
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
|
||||
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
|
||||
|
||||
from everos.infra.persistence.sqlite import dispose_engine, get_engine
|
||||
|
||||
db_engine = get_engine()
|
||||
async with db_engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
started["dispose"] = dispose_engine
|
||||
|
||||
# Silence OME strategies so agent_case / atomic / foresight don't
|
||||
# try real extraction logic during these tests.
|
||||
noop = AsyncMock(return_value=[])
|
||||
for mod in (af_mod, fs_mod, ac_mod):
|
||||
extractor_attr = next(
|
||||
(n for n in dir(mod) if n.endswith("Extractor")), None
|
||||
)
|
||||
if extractor_attr:
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
extractor_attr,
|
||||
lambda *a, **k: type("M", (), {"aextract": noop})(),
|
||||
)
|
||||
|
||||
engine = svc._get_engine()
|
||||
await engine.start()
|
||||
started["engine"] = engine
|
||||
|
||||
yield _setup
|
||||
|
||||
if started.get("engine") is not None:
|
||||
await started["engine"].stop()
|
||||
if started.get("dispose") is not None:
|
||||
await started["dispose"]()
|
||||
|
||||
|
||||
# ── Tests ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_agent_mode_two_user_assistant_msgs(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Agent mode happy path: one cell, both user + agent pipelines fire."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
await memorize_env(mode="agent", fake_llm=fake)
|
||||
|
||||
result = await memorize(
|
||||
{
|
||||
"session_id": "test_agent_basic",
|
||||
"messages": [
|
||||
_user("hello", 1_700_000_000_000),
|
||||
_assistant("hi there", 1_700_000_001_000),
|
||||
],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
assert isinstance(result, MemorizeResult)
|
||||
assert result.status == "extracted"
|
||||
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1
|
||||
assert rows[0]["raw_type"] == "AgentTrajectory"
|
||||
|
||||
|
||||
async def test_agent_mode_preserves_tool_items(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Agent mode keeps ``role=tool`` rows inside the cell (chat mode drops them)."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
await memorize_env(mode="agent", fake_llm=fake)
|
||||
|
||||
payload = {
|
||||
"session_id": "test_agent_tools",
|
||||
"messages": [
|
||||
_user("debug this", 1_700_000_000_000),
|
||||
_msg(
|
||||
"assistant",
|
||||
"calling tool",
|
||||
timestamp=1_700_000_001_000,
|
||||
tool_calls=[
|
||||
{
|
||||
"id": "c1",
|
||||
"type": "function",
|
||||
"function": {"name": "x", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
),
|
||||
_msg(
|
||||
"tool",
|
||||
"result",
|
||||
sender_id="tool",
|
||||
timestamp=1_700_000_002_000,
|
||||
tool_call_id="c1",
|
||||
),
|
||||
_assistant("here's the answer", 1_700_000_003_000),
|
||||
],
|
||||
}
|
||||
result = await memorize(payload, is_final=True)
|
||||
assert result.status == "extracted"
|
||||
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1
|
||||
ids = json.loads(rows[0]["message_ids_json"])
|
||||
# All four preserved in agent mode (chat mode would have 2).
|
||||
assert len(ids) == 4
|
||||
|
||||
|
||||
async def test_agent_mode_dispatch_no_double_insert(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Dual pipeline dispatch must not double-insert the memcell row."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
await memorize_env(mode="agent", fake_llm=fake)
|
||||
|
||||
await memorize(
|
||||
{
|
||||
"session_id": "test_agent_dispatch",
|
||||
"messages": [
|
||||
_user("u1", 1_700_000_000_000),
|
||||
_assistant("a1", 1_700_000_001_000),
|
||||
_user("u2", 1_700_000_002_000),
|
||||
_assistant("a2", 1_700_000_003_000),
|
||||
],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1 # boundary stage owns the ledger
|
||||
payload = json.loads(rows[0]["payload_json"])
|
||||
assert len(payload["items"]) == 4
|
||||
300
tests/integration/test_memorize_concurrent_session_lock.py
Normal file
300
tests/integration/test_memorize_concurrent_session_lock.py
Normal file
@ -0,0 +1,300 @@
|
||||
"""Concurrent /add on one session must not lose messages (regression).
|
||||
|
||||
White-box integration test for the per-session lock added in
|
||||
``everos.service._session_lock``.
|
||||
|
||||
Bug class
|
||||
---------
|
||||
Without the lock, two concurrent ``memorize()`` calls on the same
|
||||
``session_id`` race on ``unprocessed_buffer``:
|
||||
|
||||
1. Both read the same pre-existing buffer rows.
|
||||
2. Each boundary call sees only its own newly-arrived messages plus
|
||||
the shared pre-existing buffer (neither sees the other's messages).
|
||||
3. Both call ``_replace_buffer(session_id, tail)`` — the later write
|
||||
silently overwrites the earlier write's tail; the earlier task's
|
||||
tail messages are lost forever.
|
||||
|
||||
Invariant under test
|
||||
--------------------
|
||||
After N concurrent ``memorize()`` calls on one session, every input
|
||||
message_id is **either** in some memcell's ``message_ids_json`` **or**
|
||||
in the surviving ``unprocessed_buffer`` rows. Nothing silently vanishes.
|
||||
|
||||
This is a white-box integration test (not e2e): it bypasses HTTP, calls
|
||||
``memorize()`` directly, but inspects sqlite tables to assert internal
|
||||
state. Uses ``FakeLLMClient`` to avoid real LLM latency and to control
|
||||
boundary decisions deterministically.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import json
|
||||
from collections.abc import AsyncIterator, Callable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from everalgo.llm.types import ChatMessage as LLMChatMessage
|
||||
from everalgo.llm.types import ChatResponse
|
||||
from everalgo.testing.fake_llm import FakeLLMClient
|
||||
from sqlalchemy import text
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.service.memorize import memorize
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake LLM that splits each call into one memcell + 0-tail (force extract)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _boundary_response(boundaries: list[int]) -> str:
|
||||
return json.dumps(
|
||||
{"reasoning": "test", "boundaries": boundaries, "should_wait": False}
|
||||
)
|
||||
|
||||
|
||||
def _episode_response(title: str = "T", content: str = "B") -> str:
|
||||
return json.dumps({"title": title, "content": content})
|
||||
|
||||
|
||||
def _make_extract_all_llm() -> FakeLLMClient:
|
||||
"""Boundary returns single boundary at end → entire merged → 1 cell, tail=[]."""
|
||||
|
||||
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
|
||||
prompt = messages[0].content
|
||||
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
|
||||
# Always cut: the boundary indices are relative to merged input;
|
||||
# an empty list means "no cut, hold". A single [N] means "cut
|
||||
# after index N", i.e. everything before goes into one cell.
|
||||
# We use a sentinel large index to force boundary to take all.
|
||||
return ChatResponse(content=_boundary_response([999]), model="fake")
|
||||
return ChatResponse(content=_episode_response(), model="fake")
|
||||
|
||||
return FakeLLMClient(handler=handler)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture — mirrors test_memorize_integration's pattern but without OME / strategies
|
||||
# (the lock bug lives at the boundary stage; downstream strategies are
|
||||
# irrelevant to this race).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def memorize_env_locked(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> AsyncIterator[Callable[..., AsyncMock]]:
|
||||
monkeypatch.setattr(
|
||||
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
|
||||
)
|
||||
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
|
||||
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
|
||||
client_mod = importlib.import_module("everos.component.llm.client")
|
||||
lock_mod = importlib.import_module("everos.service._session_lock")
|
||||
|
||||
# Reset memorize singletons + session lock registry.
|
||||
for attr in (
|
||||
"_episode_writer",
|
||||
"_prompt_loader",
|
||||
"_user_pipeline",
|
||||
"_agent_pipeline",
|
||||
"_ome_engine",
|
||||
):
|
||||
monkeypatch.setattr(svc, attr, None, raising=False)
|
||||
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
|
||||
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
|
||||
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
|
||||
lock_mod._reset_for_tests()
|
||||
|
||||
started: dict[str, Any] = {"engine": None}
|
||||
|
||||
async def _setup(*, fake_llm: FakeLLMClient) -> None:
|
||||
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", "chat")
|
||||
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
|
||||
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
|
||||
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
db_engine = get_engine()
|
||||
async with db_engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
|
||||
# Silence OME strategy extractors (we only care about the boundary +
|
||||
# memcell + buffer cycle; downstream strategies are a separate story).
|
||||
mock_af = AsyncMock(return_value=[])
|
||||
mock_fs = AsyncMock(return_value=[])
|
||||
monkeypatch.setattr(
|
||||
af_mod,
|
||||
"AtomicFactExtractor",
|
||||
lambda *a, **k: type("M", (), {"aextract": mock_af})(),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
fs_mod,
|
||||
"ForesightExtractor",
|
||||
lambda *a, **k: type("M", (), {"aextract": mock_fs})(),
|
||||
)
|
||||
|
||||
engine = svc._get_engine()
|
||||
await engine.start()
|
||||
started["engine"] = engine
|
||||
|
||||
yield _setup
|
||||
|
||||
if started["engine"] is not None:
|
||||
await started["engine"].stop()
|
||||
from everos.infra.persistence.sqlite import dispose_engine
|
||||
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _msg(idx: int, sender: str, ts: int) -> dict[str, Any]:
|
||||
return {
|
||||
"sender_id": sender,
|
||||
"role": "user",
|
||||
"timestamp": ts,
|
||||
"content": f"msg-{idx} from {sender}",
|
||||
}
|
||||
|
||||
|
||||
async def _collect_buffer_message_ids(session_id: str) -> set[str]:
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
eng = get_engine()
|
||||
async with eng.connect() as conn:
|
||||
result = await conn.execute(
|
||||
text("SELECT message_id FROM unprocessed_buffer WHERE session_id = :s"),
|
||||
{"s": session_id},
|
||||
)
|
||||
return {row[0] for row in result.fetchall()}
|
||||
|
||||
|
||||
async def _collect_memcell_message_ids(session_id: str) -> set[str]:
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
eng = get_engine()
|
||||
async with eng.connect() as conn:
|
||||
result = await conn.execute(
|
||||
text("SELECT message_ids_json FROM memcell WHERE session_id = :s"),
|
||||
{"s": session_id},
|
||||
)
|
||||
out: set[str] = set()
|
||||
for (raw,) in result.fetchall():
|
||||
out.update(json.loads(raw))
|
||||
return out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_concurrent_adds_same_session_no_message_loss(
|
||||
memorize_env_locked: Callable[..., AsyncMock],
|
||||
) -> None:
|
||||
"""Two concurrent /add on one session: every input message must end up
|
||||
either in a memcell's message_ids OR in the surviving buffer."""
|
||||
await memorize_env_locked(fake_llm=_make_extract_all_llm())
|
||||
|
||||
session_id = "s_concurrent"
|
||||
|
||||
batch_a = [_msg(i, "alice", 1_700_000_000_000 + i * 1000) for i in range(4)]
|
||||
batch_b = [_msg(i + 100, "bob", 1_700_000_100_000 + i * 1000) for i in range(4)]
|
||||
|
||||
# Fire both concurrently against the same session.
|
||||
await asyncio.gather(
|
||||
memorize({"session_id": session_id, "messages": batch_a}),
|
||||
memorize({"session_id": session_id, "messages": batch_b}),
|
||||
)
|
||||
|
||||
buffered = await _collect_buffer_message_ids(session_id)
|
||||
in_cells = await _collect_memcell_message_ids(session_id)
|
||||
covered = buffered | in_cells
|
||||
|
||||
# The id format is ``m_<session>_<ts_ms>_<idx>`` — we can derive
|
||||
# exactly what the 8 inputs should hash to without depending on the
|
||||
# internal id_gen import. Easier: assert the *count* covered == 8.
|
||||
assert len(covered) == 8, (
|
||||
f"expected 8 distinct message ids covered, got {len(covered)}: "
|
||||
f"buffer={len(buffered)}, memcell={len(in_cells)}"
|
||||
)
|
||||
|
||||
# Sanity: no message appears in both buffer and memcell at once
|
||||
# (consumed = removed from buffer).
|
||||
overlap = buffered & in_cells
|
||||
assert not overlap, f"messages in both buffer and memcell: {overlap}"
|
||||
|
||||
|
||||
async def test_concurrent_adds_serial_when_locked(
|
||||
memorize_env_locked: Callable[..., AsyncMock],
|
||||
) -> None:
|
||||
"""Same as above but explicitly stress with 4 concurrent batches."""
|
||||
await memorize_env_locked(fake_llm=_make_extract_all_llm())
|
||||
|
||||
session_id = "s_stress"
|
||||
|
||||
n_batches = 4
|
||||
batch_size = 3
|
||||
batches = [
|
||||
[
|
||||
_msg(b * 10 + i, f"u{b}", 1_700_000_000_000 + (b * 10 + i) * 1000)
|
||||
for i in range(batch_size)
|
||||
]
|
||||
for b in range(n_batches)
|
||||
]
|
||||
|
||||
await asyncio.gather(
|
||||
*(memorize({"session_id": session_id, "messages": batch}) for batch in batches)
|
||||
)
|
||||
|
||||
buffered = await _collect_buffer_message_ids(session_id)
|
||||
in_cells = await _collect_memcell_message_ids(session_id)
|
||||
covered = buffered | in_cells
|
||||
|
||||
expected = n_batches * batch_size
|
||||
assert len(covered) == expected, (
|
||||
f"expected {expected} message ids covered, got {len(covered)}: "
|
||||
f"buffer={len(buffered)}, memcell={len(in_cells)}"
|
||||
)
|
||||
assert not (buffered & in_cells)
|
||||
|
||||
|
||||
async def test_different_sessions_run_in_parallel(
|
||||
memorize_env_locked: Callable[..., AsyncMock],
|
||||
) -> None:
|
||||
"""Cross-session calls share no lock — must not serialise."""
|
||||
await memorize_env_locked(fake_llm=_make_extract_all_llm())
|
||||
|
||||
def _msgs(sid: str) -> list[dict[str, Any]]:
|
||||
return [_msg(i, sid, 1_700_000_000_000 + i * 1000) for i in range(3)]
|
||||
|
||||
await asyncio.gather(
|
||||
memorize({"session_id": "s_a", "messages": _msgs("s_a")}),
|
||||
memorize({"session_id": "s_b", "messages": _msgs("s_b")}),
|
||||
memorize({"session_id": "s_c", "messages": _msgs("s_c")}),
|
||||
)
|
||||
|
||||
for sid in ("s_a", "s_b", "s_c"):
|
||||
buffered = await _collect_buffer_message_ids(sid)
|
||||
in_cells = await _collect_memcell_message_ids(sid)
|
||||
covered = buffered | in_cells
|
||||
assert len(covered) == 3, f"session {sid}: got {len(covered)}, want 3"
|
||||
690
tests/integration/test_memorize_integration.py
Normal file
690
tests/integration/test_memorize_integration.py
Normal file
@ -0,0 +1,690 @@
|
||||
"""End-to-end memorize integration tests.
|
||||
|
||||
Drives ``service.memorize.memorize()`` with a ``FakeLLMClient`` so the
|
||||
full chain (ingest → boundary → user / agent pipeline → md + OME emit)
|
||||
runs without real LLM calls. Each test isolates state by:
|
||||
|
||||
- redirecting ``MemoryRoot.default()`` to a ``tmp_path``
|
||||
- resetting service-layer lazy singletons
|
||||
- starting / stopping a per-test ``OfflineEngine``
|
||||
- patching ``get_llm_client`` (boundary + strategies) onto a fake
|
||||
|
||||
OME strategies (atomic / foresight) are silenced via ``mock_aextract`` so
|
||||
this test focuses on the synchronous boundary + pipeline + md path —
|
||||
strategy dispatch correctness already has its own coverage in
|
||||
``test_ome_strategies_integration.py``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import json
|
||||
import sqlite3
|
||||
from collections.abc import AsyncIterator, Callable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from everalgo.llm.types import ChatMessage as LLMChatMessage
|
||||
from everalgo.llm.types import ChatResponse
|
||||
from everalgo.testing.fake_llm import FakeLLMClient
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.service.memorize import MemorizeResult, memorize
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Canned LLM responses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _boundary_response(boundaries: list[int]) -> str:
|
||||
"""Build a ``detect_boundaries`` JSON response (algo schema)."""
|
||||
payload = {
|
||||
"reasoning": "test",
|
||||
"boundaries": boundaries,
|
||||
"should_wait": False,
|
||||
}
|
||||
return json.dumps(payload)
|
||||
|
||||
|
||||
def _episode_response(title: str = "Test Subject", content: str = "Test body") -> str:
|
||||
"""Build an ``EpisodeExtractor`` JSON response (algo schema)."""
|
||||
return json.dumps({"title": title, "content": content})
|
||||
|
||||
|
||||
def _make_fake_llm(
|
||||
boundary_responses: list[list[int]] | None = None,
|
||||
*,
|
||||
episode_title: str = "Test Subject",
|
||||
episode_content: str = "Test body",
|
||||
) -> FakeLLMClient:
|
||||
"""Build a ``FakeLLMClient`` that dispatches by prompt fingerprint.
|
||||
|
||||
Pops one ``boundaries=...`` from ``boundary_responses`` per boundary
|
||||
prompt seen; every episode prompt returns the same canned
|
||||
``{title, content}``.
|
||||
"""
|
||||
boundary_queue: list[list[int]] = list(boundary_responses or [])
|
||||
|
||||
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
|
||||
prompt = messages[0].content
|
||||
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
|
||||
cuts = boundary_queue.pop(0) if boundary_queue else []
|
||||
return ChatResponse(content=_boundary_response(cuts), model="fake")
|
||||
# Fall through to episode (also catches atomic/foresight prompts —
|
||||
# they'll return success-but-empty in their mocked extractor below).
|
||||
return ChatResponse(
|
||||
content=_episode_response(episode_title, episode_content),
|
||||
model="fake",
|
||||
)
|
||||
|
||||
return FakeLLMClient(handler=handler)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared setup fixture
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def memorize_env(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> AsyncIterator[Callable[..., AsyncMock]]:
|
||||
"""Yield a builder that configures a clean memorize environment.
|
||||
|
||||
Usage::
|
||||
|
||||
async def test_x(memorize_env):
|
||||
await memorize_env(mode="chat", fake_llm=_make_fake_llm([...]))
|
||||
outcome = await memorize({"session_id": "s", "messages": [...]})
|
||||
|
||||
The builder must be called exactly once per test (it primes singletons
|
||||
+ starts the OME engine). Teardown stops the engine and disposes the
|
||||
sqlite engine.
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
|
||||
)
|
||||
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
|
||||
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
|
||||
client_mod = importlib.import_module("everos.component.llm.client")
|
||||
|
||||
# Reset singletons.
|
||||
for attr in (
|
||||
"_episode_writer",
|
||||
"_prompt_loader",
|
||||
"_user_pipeline",
|
||||
"_agent_pipeline",
|
||||
"_ome_engine",
|
||||
):
|
||||
monkeypatch.setattr(svc, attr, None, raising=False)
|
||||
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
|
||||
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
|
||||
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
|
||||
|
||||
started: dict[str, Any] = {"engine": None, "sqlite_engine": None}
|
||||
|
||||
async def _setup(
|
||||
*,
|
||||
mode: str = "chat",
|
||||
fake_llm: FakeLLMClient,
|
||||
hard_token_limit: int = 65536,
|
||||
hard_msg_limit: int = 500,
|
||||
) -> None:
|
||||
# Provide a non-None API key + base_url so get_llm_client doesn't
|
||||
# raise; we replace the cached singleton with our fake right after.
|
||||
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", mode)
|
||||
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
|
||||
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
|
||||
monkeypatch.setenv(
|
||||
"EVEROS_BOUNDARY_DETECTION__HARD_TOKEN_LIMIT", str(hard_token_limit)
|
||||
)
|
||||
monkeypatch.setenv(
|
||||
"EVEROS_BOUNDARY_DETECTION__HARD_MSG_LIMIT", str(hard_msg_limit)
|
||||
)
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
# Replace the cached client singleton with our fake so get_llm_client
|
||||
# returns the fake on subsequent calls.
|
||||
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
|
||||
|
||||
# Build sqlite schema.
|
||||
from everos.infra.persistence.sqlite import dispose_engine, get_engine
|
||||
|
||||
db_engine = get_engine()
|
||||
async with db_engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
started["sqlite_engine"] = (get_engine, dispose_engine)
|
||||
|
||||
# Mock the OME extractors so the async strategy chain is a no-op
|
||||
# (the strategy itself still runs; it just sees no facts/foresights).
|
||||
mock_af = AsyncMock(return_value=[])
|
||||
mock_fs = AsyncMock(return_value=[])
|
||||
monkeypatch.setattr(
|
||||
af_mod,
|
||||
"AtomicFactExtractor",
|
||||
lambda *a, **k: type("M", (), {"aextract": mock_af})(),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
fs_mod,
|
||||
"ForesightExtractor",
|
||||
lambda *a, **k: type("M", (), {"aextract": mock_fs})(),
|
||||
)
|
||||
|
||||
engine = svc._get_engine()
|
||||
await engine.start()
|
||||
started["engine"] = engine
|
||||
|
||||
yield _setup
|
||||
|
||||
if started["engine"] is not None:
|
||||
await started["engine"].stop()
|
||||
if started["sqlite_engine"] is not None:
|
||||
_, dispose = started["sqlite_engine"]
|
||||
await dispose()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _msg(
|
||||
role: str,
|
||||
content: str,
|
||||
*,
|
||||
sender_id: str = "u_alice",
|
||||
timestamp: int = 1_700_000_000_000,
|
||||
tool_calls: list[dict] | None = None,
|
||||
tool_call_id: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
out: dict[str, Any] = {
|
||||
"sender_id": sender_id,
|
||||
"role": role,
|
||||
"content": content,
|
||||
"timestamp": timestamp,
|
||||
}
|
||||
if tool_calls is not None:
|
||||
out["tool_calls"] = tool_calls
|
||||
if tool_call_id is not None:
|
||||
out["tool_call_id"] = tool_call_id
|
||||
return out
|
||||
|
||||
|
||||
def _user(content: str, ts: int, *, sender: str = "u_alice") -> dict[str, Any]:
|
||||
return _msg("user", content, sender_id=sender, timestamp=ts)
|
||||
|
||||
|
||||
def _assistant(content: str, ts: int, *, sender: str = "assistant") -> dict[str, Any]:
|
||||
return _msg("assistant", content, sender_id=sender, timestamp=ts)
|
||||
|
||||
|
||||
def _memcell_rows(tmp_path: Path) -> list[sqlite3.Row]:
|
||||
db = tmp_path / ".index" / "sqlite" / "system.db"
|
||||
if not db.is_file():
|
||||
return []
|
||||
conn = sqlite3.connect(db)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
return list(conn.execute("SELECT * FROM memcell ORDER BY timestamp"))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _buffer_count(tmp_path: Path) -> int:
|
||||
db = tmp_path / ".index" / "sqlite" / "system.db"
|
||||
if not db.is_file():
|
||||
return 0
|
||||
conn = sqlite3.connect(db)
|
||||
try:
|
||||
return conn.execute(
|
||||
"SELECT COUNT(*) FROM unprocessed_buffer WHERE track='memorize'"
|
||||
).fetchone()[0]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _episode_paths(tmp_path: Path) -> list[Path]:
|
||||
base = tmp_path / "default_app" / "default_project" / "users"
|
||||
return sorted(base.rglob("episode-*.md"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Happy path baseline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_chat_baseline_two_msgs_one_cell(
|
||||
tmp_path: Path,
|
||||
memorize_env: Callable[..., Any],
|
||||
) -> None:
|
||||
"""2 messages → flush forces them into 1 cell + 1 Episode + 1 memcell row."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]]) # no internal cuts
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
payload = {
|
||||
"session_id": "test_chat_1",
|
||||
"messages": [
|
||||
_user("hello", 1_700_000_000_000),
|
||||
_assistant("hi there", 1_700_000_001_000),
|
||||
],
|
||||
}
|
||||
result = await memorize(payload, is_final=True)
|
||||
|
||||
assert isinstance(result, MemorizeResult)
|
||||
assert result.status == "extracted"
|
||||
assert result.message_count == 2
|
||||
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1
|
||||
assert rows[0]["track"] == "memorize"
|
||||
assert rows[0]["raw_type"] == "Conversation"
|
||||
# MemCell has no single owner — sender_ids carries the participants.
|
||||
assert "u_alice" in json.loads(rows[0]["sender_ids_json"])
|
||||
|
||||
assert _buffer_count(tmp_path) == 0
|
||||
|
||||
md_files = _episode_paths(tmp_path)
|
||||
assert len(md_files) == 1
|
||||
body = md_files[0].read_text()
|
||||
assert "Test Subject" in body
|
||||
assert "Test body" in body
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Input-shape boundary cases (6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_empty_batch_non_final_is_skipped(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""``messages=[]`` + ``is_final=False`` → skipped, no side effects."""
|
||||
await memorize_env(mode="chat", fake_llm=_make_fake_llm())
|
||||
|
||||
result = await memorize(
|
||||
{"session_id": "test_empty_nonfinal", "messages": []}, is_final=False
|
||||
)
|
||||
assert result.status == "accumulated"
|
||||
assert result.message_count == 0
|
||||
assert _memcell_rows(tmp_path) == []
|
||||
assert _episode_paths(tmp_path) == []
|
||||
|
||||
|
||||
async def test_empty_batch_final_drains_empty_buffer(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""``messages=[]`` + ``is_final=True`` on virgin session → no cells, no md."""
|
||||
await memorize_env(mode="chat", fake_llm=_make_fake_llm())
|
||||
|
||||
result = await memorize(
|
||||
{"session_id": "test_empty_final", "messages": []}, is_final=True
|
||||
)
|
||||
assert result.status == "accumulated"
|
||||
assert _memcell_rows(tmp_path) == []
|
||||
assert _episode_paths(tmp_path) == []
|
||||
|
||||
|
||||
async def test_assistant_only_batch_accumulates(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""No role=user message → boundary stage parks everything in buffer."""
|
||||
fake = _make_fake_llm(boundary_responses=[]) # no LLM call expected
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
result = await memorize(
|
||||
{
|
||||
"session_id": "test_asst_only",
|
||||
"messages": [
|
||||
_assistant("hi", 1_700_000_000_000),
|
||||
_assistant("anyone here?", 1_700_000_001_000),
|
||||
],
|
||||
},
|
||||
is_final=False,
|
||||
)
|
||||
assert result.status == "accumulated"
|
||||
assert _memcell_rows(tmp_path) == []
|
||||
assert _buffer_count(tmp_path) == 2 # parked in buffer
|
||||
|
||||
|
||||
async def test_single_user_message_accumulates(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Single user msg → boundary returns no cells (need conversation) → buffer it."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]]) # boundary called, no cuts
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
result = await memorize(
|
||||
{
|
||||
"session_id": "test_single",
|
||||
"messages": [_user("hello?", 1_700_000_000_000)],
|
||||
},
|
||||
is_final=False,
|
||||
)
|
||||
assert result.status == "accumulated"
|
||||
assert _memcell_rows(tmp_path) == []
|
||||
assert _buffer_count(tmp_path) == 1
|
||||
|
||||
|
||||
async def test_chat_mode_filters_tool_messages(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Chat mode drops ``role=tool`` + assistant-with-tool_calls pre-boundary."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
result = await memorize(
|
||||
{
|
||||
"session_id": "test_chat_filter",
|
||||
"messages": [
|
||||
_user("debug this", 1_700_000_000_000),
|
||||
_msg(
|
||||
"assistant",
|
||||
"calling tool",
|
||||
timestamp=1_700_000_001_000,
|
||||
tool_calls=[
|
||||
{
|
||||
"id": "c1",
|
||||
"type": "function",
|
||||
"function": {"name": "x", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
),
|
||||
_msg(
|
||||
"tool",
|
||||
"result",
|
||||
sender_id="tool",
|
||||
timestamp=1_700_000_002_000,
|
||||
tool_call_id="c1",
|
||||
),
|
||||
_assistant("here's the answer", 1_700_000_003_000),
|
||||
],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
# After filter: 1 user + 1 assistant text = 2 msgs → 1 cell on flush.
|
||||
assert result.status == "extracted"
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1
|
||||
ids = json.loads(rows[0]["message_ids_json"])
|
||||
assert len(ids) == 2 # tool + assistant-with-tool_calls dropped
|
||||
|
||||
|
||||
async def test_duplicate_message_id_dedup_across_adds(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Same message replayed across two ``/add`` calls is deduped by message_id."""
|
||||
fake = _make_fake_llm(boundary_responses=[[], []]) # 2 boundary calls, both empty
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
# message_id is derived from (session_id, ts_ms, idx); same payload twice
|
||||
# produces the same id, so the second add should be a no-op insert.
|
||||
payload = {
|
||||
"session_id": "test_dedup",
|
||||
"messages": [
|
||||
_user("hi", 1_700_000_000_000),
|
||||
_assistant("hi back", 1_700_000_001_000),
|
||||
],
|
||||
}
|
||||
await memorize(payload, is_final=False)
|
||||
await memorize(payload, is_final=False) # replay
|
||||
await memorize({"session_id": "test_dedup", "messages": []}, is_final=True)
|
||||
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1
|
||||
ids = json.loads(rows[0]["message_ids_json"])
|
||||
assert len(ids) == 2 # not 4 — dedup worked
|
||||
assert len(set(ids)) == 2 # unique
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hard-limit cases (2)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_hard_msg_limit_force_split(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Exceeding ``hard_msg_limit`` triggers a force-split before the LLM call."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]]) # LLM call after force-split
|
||||
# hard_msg_limit=3 → batch of 5 msgs forces ~1 split before LLM.
|
||||
await memorize_env(
|
||||
mode="chat", fake_llm=fake, hard_msg_limit=3, hard_token_limit=10_000
|
||||
)
|
||||
|
||||
msgs = [
|
||||
_user(f"u{i}", 1_700_000_000_000 + i * 1000, sender="u_alice")
|
||||
if i % 2 == 0
|
||||
else _assistant(f"a{i}", 1_700_000_000_000 + i * 1000)
|
||||
for i in range(5)
|
||||
]
|
||||
result = await memorize(
|
||||
{"session_id": "test_hardmsg", "messages": msgs}, is_final=True
|
||||
)
|
||||
assert result.status == "extracted"
|
||||
rows = _memcell_rows(tmp_path)
|
||||
# Force-split + LLM final → at least 2 cells (force + remaining).
|
||||
assert len(rows) >= 2
|
||||
|
||||
|
||||
async def test_hard_token_limit_force_split(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Exceeding ``hard_token_limit`` triggers a force-split (token-based)."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
# Very small token budget → even tiny content triggers force-split.
|
||||
await memorize_env(
|
||||
mode="chat", fake_llm=fake, hard_msg_limit=500, hard_token_limit=20
|
||||
)
|
||||
|
||||
msgs = [
|
||||
_user("a" * 200, 1_700_000_000_000, sender="u_alice"),
|
||||
_assistant("b" * 200, 1_700_000_001_000),
|
||||
_user("c" * 200, 1_700_000_002_000, sender="u_alice"),
|
||||
_assistant("d" * 200, 1_700_000_003_000),
|
||||
]
|
||||
result = await memorize(
|
||||
{"session_id": "test_hardtok", "messages": msgs}, is_final=True
|
||||
)
|
||||
assert result.status == "extracted"
|
||||
assert len(_memcell_rows(tmp_path)) >= 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Flush state-machine cases (4)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_flush_on_virgin_session_is_noop(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Flush a session that never received ``/add`` — should not crash."""
|
||||
await memorize_env(mode="chat", fake_llm=_make_fake_llm())
|
||||
|
||||
result = await memorize(
|
||||
{"session_id": "test_virgin_flush", "messages": []}, is_final=True
|
||||
)
|
||||
assert result.status == "accumulated"
|
||||
assert _memcell_rows(tmp_path) == []
|
||||
|
||||
|
||||
async def test_add_then_flush_then_add(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""After flush drains the buffer, a follow-up ``/add`` still works."""
|
||||
fake = _make_fake_llm(boundary_responses=[[], []])
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
sid = "test_add_flush_add"
|
||||
await memorize(
|
||||
{
|
||||
"session_id": sid,
|
||||
"messages": [
|
||||
_user("first", 1_700_000_000_000),
|
||||
_assistant("ack", 1_700_000_001_000),
|
||||
],
|
||||
},
|
||||
is_final=False,
|
||||
)
|
||||
await memorize({"session_id": sid, "messages": []}, is_final=True)
|
||||
|
||||
rows_after_flush_1 = len(_memcell_rows(tmp_path))
|
||||
assert rows_after_flush_1 == 1
|
||||
|
||||
# Second turn after the flush.
|
||||
await memorize(
|
||||
{
|
||||
"session_id": sid,
|
||||
"messages": [
|
||||
_user("second turn", 1_700_000_010_000),
|
||||
_assistant("ok", 1_700_000_011_000),
|
||||
],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
assert len(_memcell_rows(tmp_path)) == 2 # cumulative
|
||||
|
||||
|
||||
async def test_consecutive_flushes_second_is_noop(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Flush twice in a row — second call finds empty buffer, no-ops."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
sid = "test_double_flush"
|
||||
await memorize(
|
||||
{
|
||||
"session_id": sid,
|
||||
"messages": [
|
||||
_user("hi", 1_700_000_000_000),
|
||||
_assistant("ok", 1_700_000_001_000),
|
||||
],
|
||||
},
|
||||
is_final=False,
|
||||
)
|
||||
res1 = await memorize({"session_id": sid, "messages": []}, is_final=True)
|
||||
res2 = await memorize({"session_id": sid, "messages": []}, is_final=True)
|
||||
|
||||
assert res1.status == "extracted"
|
||||
assert res2.status == "accumulated" # nothing left
|
||||
assert len(_memcell_rows(tmp_path)) == 1
|
||||
|
||||
|
||||
async def test_flush_drains_assistant_only_buffer(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Buffer with only assistant messages: flush still forces them into a cell."""
|
||||
fake = _make_fake_llm(boundary_responses=[[]])
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
sid = "test_asst_then_flush"
|
||||
# Two assistant-only adds → both park in buffer.
|
||||
await memorize(
|
||||
{
|
||||
"session_id": sid,
|
||||
"messages": [_assistant("a1", 1_700_000_000_000)],
|
||||
},
|
||||
is_final=False,
|
||||
)
|
||||
await memorize(
|
||||
{
|
||||
"session_id": sid,
|
||||
"messages": [_assistant("a2", 1_700_000_001_000)],
|
||||
},
|
||||
is_final=False,
|
||||
)
|
||||
assert _buffer_count(tmp_path) == 2
|
||||
|
||||
# Add a user message + flush — boundary should now run.
|
||||
result = await memorize(
|
||||
{
|
||||
"session_id": sid,
|
||||
"messages": [_user("anyone there?", 1_700_000_002_000)],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
assert result.status == "extracted"
|
||||
assert _buffer_count(tmp_path) == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multi-session cases (2)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_two_sessions_are_isolated(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Two session_ids share the engine but their buffers / cells stay separate."""
|
||||
fake = _make_fake_llm(boundary_responses=[[], []]) # 1 per session
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
await memorize(
|
||||
{
|
||||
"session_id": "sess_A",
|
||||
"messages": [
|
||||
_user("hi from A", 1_700_000_000_000, sender="u_alice"),
|
||||
_assistant("ack A", 1_700_000_001_000),
|
||||
],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
await memorize(
|
||||
{
|
||||
"session_id": "sess_B",
|
||||
"messages": [
|
||||
_user("hi from B", 1_700_000_010_000, sender="u_bob"),
|
||||
_assistant("ack B", 1_700_000_011_000),
|
||||
],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 2
|
||||
sessions = sorted(r["session_id"] for r in rows)
|
||||
assert sessions == ["sess_A", "sess_B"]
|
||||
# MemCell has no single owner — sender_ids carries who participated.
|
||||
senders = {r["session_id"]: json.loads(r["sender_ids_json"]) for r in rows}
|
||||
assert "u_alice" in senders["sess_A"]
|
||||
assert "u_bob" in senders["sess_B"]
|
||||
|
||||
|
||||
async def test_same_session_multi_add_concatenates(
|
||||
tmp_path: Path, memorize_env: Callable[..., Any]
|
||||
) -> None:
|
||||
"""Multiple adds on the same session accumulate in one buffer until flushed."""
|
||||
fake = _make_fake_llm(boundary_responses=[[], [], []])
|
||||
await memorize_env(mode="chat", fake_llm=fake)
|
||||
|
||||
sid = "test_multi_add"
|
||||
for i in range(3):
|
||||
await memorize(
|
||||
{
|
||||
"session_id": sid,
|
||||
"messages": [
|
||||
_user(f"u{i}", 1_700_000_000_000 + i * 2000),
|
||||
_assistant(f"a{i}", 1_700_000_001_000 + i * 2000),
|
||||
],
|
||||
},
|
||||
is_final=False,
|
||||
)
|
||||
# Buffer should have 6 messages now (no boundary cuts).
|
||||
assert _buffer_count(tmp_path) == 6
|
||||
|
||||
result = await memorize({"session_id": sid, "messages": []}, is_final=True)
|
||||
assert result.status == "extracted"
|
||||
rows = _memcell_rows(tmp_path)
|
||||
assert len(rows) == 1 # one cell from the flush
|
||||
ids = json.loads(rows[0]["message_ids_json"])
|
||||
assert len(ids) == 6 # all 6 messages folded in
|
||||
433
tests/integration/test_memorize_window_segmentation.py
Normal file
433
tests/integration/test_memorize_window_segmentation.py
Normal file
@ -0,0 +1,433 @@
|
||||
"""Window-segmentation white-box integration tests for boundary stage.
|
||||
|
||||
Verifies the **read-merge-boundary-write** semantics of one ``memorize()``
|
||||
invocation, especially the buffer-as-tail invariant and the **buffer
|
||||
replacement** behaviour on successive calls:
|
||||
|
||||
Invariants under test
|
||||
---------------------
|
||||
I1. After one ``add`` with ``boundaries=[k]``:
|
||||
- memcell rows: prefix of merged input (first k messages)
|
||||
- buffer rows: tail (the remaining messages)
|
||||
- every input message_id lands in exactly one of {memcell, buffer}
|
||||
(covered ∧ disjoint)
|
||||
|
||||
I2. Tail ordering: every buffer row's timestamp ≥ every memcell row's
|
||||
timestamp (the tail is the **last** part of the time-ordered slice).
|
||||
|
||||
I3. Successive ``add`` consumes prior buffer:
|
||||
- Round 2's boundary sees ``prior_buffer + new_batch`` merged.
|
||||
- The prior tail (m3 say) ends up in **Round 2's memcell** if the
|
||||
boundary cuts past it, NOT in any buffer row.
|
||||
- The new buffer is the **fresh** tail, with the old buffer rows
|
||||
replaced entirely (semantics of ``_replace_buffer``).
|
||||
|
||||
I4. ``flush`` with ``is_final=True`` drains the buffer entirely — every
|
||||
remaining message ends up in some memcell.
|
||||
|
||||
This is **single-threaded sequential** (the concurrent race is covered
|
||||
separately in test_memorize_concurrent_session_lock.py). FakeLLM scripts
|
||||
boundary decisions deterministically so we own exact slicing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import json
|
||||
from collections.abc import AsyncIterator, Callable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from everalgo.llm.types import ChatMessage as LLMChatMessage
|
||||
from everalgo.llm.types import ChatResponse
|
||||
from everalgo.testing.fake_llm import FakeLLMClient
|
||||
from sqlalchemy import text
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.service.memorize import memorize
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FakeLLM with scripted boundary responses (FIFO queue, one pop per call)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _boundary_response(boundaries: list[int]) -> str:
|
||||
return json.dumps(
|
||||
{"reasoning": "test", "boundaries": boundaries, "should_wait": False}
|
||||
)
|
||||
|
||||
|
||||
def _episode_response(title: str = "T", content: str = "B") -> str:
|
||||
return json.dumps({"title": title, "content": content})
|
||||
|
||||
|
||||
def _make_scripted_llm(
|
||||
boundary_responses: list[list[int]],
|
||||
) -> FakeLLMClient:
|
||||
"""Boundary calls FIFO-pop from ``boundary_responses``.
|
||||
|
||||
Episode calls (for downstream pipeline) get a canned response.
|
||||
"""
|
||||
queue: list[list[int]] = list(boundary_responses)
|
||||
|
||||
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
|
||||
prompt = messages[0].content
|
||||
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
|
||||
cuts = queue.pop(0) if queue else []
|
||||
return ChatResponse(content=_boundary_response(cuts), model="fake")
|
||||
return ChatResponse(content=_episode_response(), model="fake")
|
||||
|
||||
return FakeLLMClient(handler=handler)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture — mirrors the locked-env fixture in the concurrent test
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def memorize_env_scripted(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> AsyncIterator[Callable[..., AsyncMock]]:
|
||||
monkeypatch.setattr(
|
||||
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
|
||||
)
|
||||
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
|
||||
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
|
||||
client_mod = importlib.import_module("everos.component.llm.client")
|
||||
lock_mod = importlib.import_module("everos.service._session_lock")
|
||||
|
||||
for attr in (
|
||||
"_episode_writer",
|
||||
"_prompt_loader",
|
||||
"_user_pipeline",
|
||||
"_agent_pipeline",
|
||||
"_ome_engine",
|
||||
):
|
||||
monkeypatch.setattr(svc, attr, None, raising=False)
|
||||
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
|
||||
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
|
||||
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
|
||||
lock_mod._reset_for_tests()
|
||||
|
||||
started: dict[str, Any] = {"engine": None}
|
||||
|
||||
async def _setup(*, fake_llm: FakeLLMClient) -> None:
|
||||
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", "chat")
|
||||
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
|
||||
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
|
||||
from everos.config import load_settings
|
||||
|
||||
load_settings.cache_clear()
|
||||
|
||||
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
|
||||
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
db_engine = get_engine()
|
||||
async with db_engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
|
||||
# Silence OME strategies — orthogonal to boundary segmentation.
|
||||
mock_af = AsyncMock(return_value=[])
|
||||
mock_fs = AsyncMock(return_value=[])
|
||||
monkeypatch.setattr(
|
||||
af_mod,
|
||||
"AtomicFactExtractor",
|
||||
lambda *a, **k: type("M", (), {"aextract": mock_af})(),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
fs_mod,
|
||||
"ForesightExtractor",
|
||||
lambda *a, **k: type("M", (), {"aextract": mock_fs})(),
|
||||
)
|
||||
|
||||
engine = svc._get_engine()
|
||||
await engine.start()
|
||||
started["engine"] = engine
|
||||
|
||||
yield _setup
|
||||
|
||||
if started["engine"] is not None:
|
||||
await started["engine"].stop()
|
||||
from everos.infra.persistence.sqlite import dispose_engine
|
||||
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers — message factory + state inspectors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_BASE_TS = 1_700_000_000_000 # 2023-11-14, plenty of headroom
|
||||
|
||||
|
||||
def _msg(idx: int, sender: str = "alice") -> dict[str, Any]:
|
||||
"""Build one canonical /add message with monotonically increasing ts."""
|
||||
return {
|
||||
"sender_id": sender,
|
||||
"role": "user",
|
||||
"timestamp": _BASE_TS + idx * 1000,
|
||||
"content": f"msg-{idx}",
|
||||
}
|
||||
|
||||
|
||||
async def _buffer_rows(session_id: str) -> list[tuple[str, int]]:
|
||||
"""Return ``[(message_id, timestamp_ms)]`` for buffer rows, time-ordered."""
|
||||
from everos.component.utils.datetime import from_iso_format, to_timestamp_ms
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
eng = get_engine()
|
||||
async with eng.connect() as conn:
|
||||
result = await conn.execute(
|
||||
text(
|
||||
"SELECT message_id, timestamp FROM unprocessed_buffer "
|
||||
"WHERE session_id = :s ORDER BY timestamp"
|
||||
),
|
||||
{"s": session_id},
|
||||
)
|
||||
rows: list[tuple[str, int]] = []
|
||||
for mid, ts in result.fetchall():
|
||||
# sqlite stores DateTime as ISO 8601 string via SQLAlchemy.
|
||||
ts_ms = to_timestamp_ms(from_iso_format(ts))
|
||||
rows.append((mid, ts_ms))
|
||||
return rows
|
||||
|
||||
|
||||
async def _memcell_rows(session_id: str) -> list[tuple[str, list[str]]]:
|
||||
"""Return ``[(memcell_id, message_ids[])]`` in insertion order."""
|
||||
from everos.infra.persistence.sqlite import get_engine
|
||||
|
||||
eng = get_engine()
|
||||
async with eng.connect() as conn:
|
||||
result = await conn.execute(
|
||||
text(
|
||||
"SELECT memcell_id, message_ids_json FROM memcell "
|
||||
"WHERE session_id = :s ORDER BY created_at"
|
||||
),
|
||||
{"s": session_id},
|
||||
)
|
||||
return [(mid, json.loads(raw)) for mid, raw in result.fetchall()]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# I1 + I2: single add with boundaries=[k] — prefix→memcell, suffix→buffer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_single_add_no_cut_accumulates_full_batch_in_buffer(
|
||||
memorize_env_scripted: Callable[..., AsyncMock],
|
||||
) -> None:
|
||||
"""boundaries=[] → no memcell, entire batch sits in buffer."""
|
||||
await memorize_env_scripted(fake_llm=_make_scripted_llm([[]]))
|
||||
|
||||
session = "s_no_cut"
|
||||
inputs = [_msg(i) for i in range(3)]
|
||||
await memorize({"session_id": session, "messages": inputs})
|
||||
|
||||
cells = await _memcell_rows(session)
|
||||
buffer = await _buffer_rows(session)
|
||||
|
||||
assert cells == [], f"expected no memcell, got {cells}"
|
||||
assert len(buffer) == 3, f"expected 3 buffer rows, got {len(buffer)}"
|
||||
# buffer holds all 3 input message_ids, time-ordered
|
||||
buffer_ts = [ts for _, ts in buffer]
|
||||
assert buffer_ts == sorted(buffer_ts)
|
||||
|
||||
|
||||
async def test_single_add_with_cut_splits_prefix_to_cell_suffix_to_buffer(
|
||||
memorize_env_scripted: Callable[..., AsyncMock],
|
||||
) -> None:
|
||||
"""boundaries=[2] on a 3-msg batch → cell=[m0,m1], buffer=[m2]."""
|
||||
await memorize_env_scripted(fake_llm=_make_scripted_llm([[2]]))
|
||||
|
||||
session = "s_cut"
|
||||
inputs = [_msg(i) for i in range(3)]
|
||||
await memorize({"session_id": session, "messages": inputs})
|
||||
|
||||
cells = await _memcell_rows(session)
|
||||
buffer = await _buffer_rows(session)
|
||||
|
||||
# Exactly one memcell carved.
|
||||
assert len(cells) == 1, cells
|
||||
cell_msg_ids = set(cells[0][1])
|
||||
assert len(cell_msg_ids) == 2
|
||||
|
||||
# Buffer holds the remaining one message.
|
||||
assert len(buffer) == 1
|
||||
buf_msg_id = buffer[0][0]
|
||||
|
||||
# Disjoint: buffer message NOT in the memcell.
|
||||
assert buf_msg_id not in cell_msg_ids, (
|
||||
"buffer row leaked into memcell — buffer should be the tail only"
|
||||
)
|
||||
|
||||
# I2 — tail comes AFTER prefix in time.
|
||||
cell_max_ts = max(_BASE_TS + i * 1000 for i in (0, 1))
|
||||
buf_ts = buffer[0][1]
|
||||
assert buf_ts >= cell_max_ts, (
|
||||
f"tail ts ({buf_ts}) must be >= max cell ts ({cell_max_ts})"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# I3: successive add — prior buffer feeds into next memcell, then is REPLACED
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_second_add_consumes_prior_buffer_and_replaces_tail(
|
||||
memorize_env_scripted: Callable[..., AsyncMock],
|
||||
) -> None:
|
||||
"""Core test: prior tail must end up in next memcell, NOT remain in buffer."""
|
||||
# Round 1: cut after 2 of 3 → cell=[m0,m1], buffer=[m2]
|
||||
# Round 2: merged input = [m2,m3,m4,m5]; cut after 3 → cell=[m2,m3,m4],
|
||||
# buffer=[m5]
|
||||
await memorize_env_scripted(
|
||||
fake_llm=_make_scripted_llm([[2], [3]]),
|
||||
)
|
||||
|
||||
session = "s_replace"
|
||||
|
||||
# Round 1
|
||||
r1_inputs = [_msg(i) for i in range(3)]
|
||||
await memorize({"session_id": session, "messages": r1_inputs})
|
||||
|
||||
r1_cells = await _memcell_rows(session)
|
||||
r1_buffer = await _buffer_rows(session)
|
||||
assert len(r1_cells) == 1
|
||||
assert len(r1_buffer) == 1
|
||||
prior_tail_msg_id = r1_buffer[0][0]
|
||||
|
||||
# Round 2 — fresh messages m3, m4, m5
|
||||
r2_inputs = [_msg(i) for i in range(3, 6)]
|
||||
await memorize({"session_id": session, "messages": r2_inputs})
|
||||
|
||||
r2_cells = await _memcell_rows(session)
|
||||
r2_buffer = await _buffer_rows(session)
|
||||
|
||||
# Two memcells total: one from round 1, one from round 2.
|
||||
assert len(r2_cells) == 2, r2_cells
|
||||
round1_cell_msgs = set(r2_cells[0][1])
|
||||
round2_cell_msgs = set(r2_cells[1][1])
|
||||
|
||||
# ★ KEY ASSERTION ★ — prior buffer's message landed in round 2 cell.
|
||||
assert prior_tail_msg_id in round2_cell_msgs, (
|
||||
f"prior buffer msg {prior_tail_msg_id} should have been consumed "
|
||||
f"into round 2's memcell, but it's missing from {round2_cell_msgs}"
|
||||
)
|
||||
# Round 2 cell should have exactly 3 messages (prior tail + first 2 of new).
|
||||
assert len(round2_cell_msgs) == 3
|
||||
|
||||
# Round 1 cell unchanged.
|
||||
assert len(round1_cell_msgs) == 2
|
||||
assert prior_tail_msg_id not in round1_cell_msgs
|
||||
|
||||
# Buffer is the NEW tail — exactly 1 fresh row.
|
||||
assert len(r2_buffer) == 1
|
||||
new_tail_id = r2_buffer[0][0]
|
||||
|
||||
# ★ KEY ASSERTION ★ — the OLD buffer entry is gone (replaced, not appended).
|
||||
assert new_tail_id != prior_tail_msg_id, (
|
||||
"old buffer entry survived into round 2's buffer — "
|
||||
"_replace_buffer is supposed to wipe + reinsert, not append"
|
||||
)
|
||||
|
||||
# Buffer ∩ all memcells = ∅
|
||||
all_cell_msgs = round1_cell_msgs | round2_cell_msgs
|
||||
assert new_tail_id not in all_cell_msgs
|
||||
|
||||
# Conservation: 6 distinct message ids covered across cells + buffer.
|
||||
# (We avoid hard-coding id format here — gen_message_id encodes the
|
||||
# per-batch index, not a global one.)
|
||||
covered = all_cell_msgs | {new_tail_id}
|
||||
assert len(covered) == 6, (
|
||||
f"expected 6 distinct ids covered, got {len(covered)}: {covered}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# I4: flush drains buffer entirely (is_final=True path)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_flush_after_accumulation_drains_buffer_into_memcell(
|
||||
memorize_env_scripted: Callable[..., AsyncMock],
|
||||
) -> None:
|
||||
"""add(boundaries=[]) → buffer accumulates → flush → cell=all, buffer=[]."""
|
||||
# Round 1 add: boundaries=[] → no cut, all into buffer.
|
||||
# Flush: is_final=True passes empty boundaries → algo closes tail into cell.
|
||||
await memorize_env_scripted(
|
||||
fake_llm=_make_scripted_llm([[], []]),
|
||||
)
|
||||
|
||||
session = "s_flush"
|
||||
inputs = [_msg(i) for i in range(3)]
|
||||
await memorize({"session_id": session, "messages": inputs})
|
||||
|
||||
# Post-add: nothing in memcell yet.
|
||||
cells = await _memcell_rows(session)
|
||||
buffer = await _buffer_rows(session)
|
||||
assert cells == []
|
||||
assert len(buffer) == 3
|
||||
|
||||
# Flush
|
||||
await memorize({"session_id": session, "messages": []}, is_final=True)
|
||||
|
||||
cells = await _memcell_rows(session)
|
||||
buffer = await _buffer_rows(session)
|
||||
|
||||
assert len(cells) == 1, cells
|
||||
assert len(cells[0][1]) == 3
|
||||
assert buffer == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sanity: empty boundaries + multiple sequential adds keep conservation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_three_sequential_adds_conservation_no_loss(
|
||||
memorize_env_scripted: Callable[..., AsyncMock],
|
||||
) -> None:
|
||||
"""3 sequential adds with mixed cuts: every input id covered exactly once."""
|
||||
# add 1: 3 msgs, no cut → buffer holds [m0,m1,m2]
|
||||
# add 2: 3 msgs, cut after 4 of merged [m0..m5] → cell=[m0..m3], buffer=[m4,m5]
|
||||
# add 3: 3 msgs, cut after 3 of merged [m4..m8] → cell=[m4,m5,m6], buffer=[m7,m8]
|
||||
await memorize_env_scripted(
|
||||
fake_llm=_make_scripted_llm([[], [4], [3]]),
|
||||
)
|
||||
|
||||
session = "s_seq"
|
||||
total_inputs = 0
|
||||
for batch_start in (0, 3, 6):
|
||||
await memorize(
|
||||
{
|
||||
"session_id": session,
|
||||
"messages": [_msg(i) for i in range(batch_start, batch_start + 3)],
|
||||
}
|
||||
)
|
||||
total_inputs += 3
|
||||
|
||||
cells = await _memcell_rows(session)
|
||||
buffer = await _buffer_rows(session)
|
||||
|
||||
in_cells: set[str] = set()
|
||||
for _, msg_ids in cells:
|
||||
in_cells.update(msg_ids)
|
||||
in_buffer = {mid for mid, _ in buffer}
|
||||
|
||||
covered = in_cells | in_buffer
|
||||
assert len(covered) == total_inputs, (
|
||||
f"expected {total_inputs} ids covered, got {len(covered)}"
|
||||
)
|
||||
# Disjoint
|
||||
assert not (in_cells & in_buffer)
|
||||
614
tests/integration/test_ome_strategies_integration.py
Normal file
614
tests/integration/test_ome_strategies_integration.py
Normal file
@ -0,0 +1,614 @@
|
||||
"""End-to-end: emit pipeline event → strategies dispatch → SUCCESS + log lines."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime as _dt
|
||||
import hashlib
|
||||
import uuid
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from everalgo.types import AgentCase, AtomicFact, ChatMessage, Foresight, MemCell
|
||||
from structlog.testing import capture_logs
|
||||
|
||||
from everos.memory.events import (
|
||||
AgentCaseExtracted,
|
||||
AgentPipelineStarted,
|
||||
EpisodeExtracted,
|
||||
UserPipelineStarted,
|
||||
)
|
||||
|
||||
|
||||
class _DeterministicHashEmbedder:
|
||||
"""Hash-seeded RNG embedder for clustering e2e.
|
||||
|
||||
Same input text → same unit vector; distinct inputs → distinct directions
|
||||
(sha256-seeded ``numpy.random.default_rng``). The vectors aren't
|
||||
semantically meaningful, but they ARE deterministic and well-spread, so
|
||||
``cluster_by_geometry`` / ``cluster_by_llm``'s nearest-neighbor logic
|
||||
has real signal to work with — unlike a MagicMock returning a constant
|
||||
vector, which collapses every cosine similarity to 1.0.
|
||||
"""
|
||||
|
||||
dim: int = 1024
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
digest = hashlib.sha256(text.encode("utf-8")).digest()
|
||||
seed = int.from_bytes(digest[:8], "little")
|
||||
rng = np.random.default_rng(seed)
|
||||
vec = rng.standard_normal(self.dim).astype(np.float32)
|
||||
norm = float(np.linalg.norm(vec)) or 1.0
|
||||
vec /= norm
|
||||
return vec.tolist()
|
||||
|
||||
async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
|
||||
return [await self.embed(t) for t in texts]
|
||||
|
||||
|
||||
def _sample_memcell() -> MemCell:
|
||||
return MemCell(
|
||||
items=[
|
||||
ChatMessage(
|
||||
id="m1",
|
||||
role="user",
|
||||
content="alice likes hiking",
|
||||
timestamp=1_700_000_000_000,
|
||||
sender_id="u_alice",
|
||||
),
|
||||
ChatMessage(
|
||||
id="m2",
|
||||
role="user",
|
||||
content="bob plans a trip",
|
||||
timestamp=1_700_000_001_000,
|
||||
sender_id="u_bob",
|
||||
),
|
||||
ChatMessage(
|
||||
id="m3",
|
||||
role="assistant",
|
||||
content="sounds good",
|
||||
timestamp=1_700_000_002_000,
|
||||
sender_id="agent",
|
||||
),
|
||||
],
|
||||
timestamp=1_700_000_002_000,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_emit_dispatches_both_strategies_to_success(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Real OfflineEngine + APScheduler runtime; extractors + LLM mocked.
|
||||
|
||||
Verifies the full chain: emit(event) → dispatcher (3 gates) → APS one-shot
|
||||
job → Runner.run → strategy body → mark_success.
|
||||
"""
|
||||
import importlib
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.infra.ome.records import RunStatus
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
|
||||
# Redirect MemoryRoot.default() to tmp_path so _get_engine() writes ome.db
|
||||
# under the test's isolated temp directory instead of the real ~/.everos.
|
||||
monkeypatch.setattr(
|
||||
MemoryRoot,
|
||||
"default",
|
||||
classmethod(lambda cls: MemoryRoot(root=tmp_path)),
|
||||
)
|
||||
# Reset singletons so they rebuild against the patched MemoryRoot.
|
||||
monkeypatch.setattr(svc, "_ome_engine", None, raising=False)
|
||||
_af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
|
||||
_fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
|
||||
monkeypatch.setattr(_af_mod, "_writer", None, raising=False)
|
||||
monkeypatch.setattr(_fs_mod, "_writer", None, raising=False)
|
||||
|
||||
fake_fact = AtomicFact(
|
||||
owner_id="u_alice", content="hi", timestamp=1_700_000_000_000
|
||||
)
|
||||
fake_foresight = Foresight(
|
||||
owner_id="u_alice",
|
||||
foresight="x",
|
||||
evidence="y",
|
||||
timestamp=1_700_000_000_000,
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"everos.memory.strategies.extract_atomic_facts.AtomicFactExtractor"
|
||||
) as mock_af,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_foresight.ForesightExtractor"
|
||||
) as mock_fs,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_atomic_facts.get_llm_client",
|
||||
return_value=object(),
|
||||
),
|
||||
patch(
|
||||
"everos.memory.strategies.extract_foresight.get_llm_client",
|
||||
return_value=object(),
|
||||
),
|
||||
capture_logs() as logs,
|
||||
):
|
||||
mock_af.return_value.aextract = AsyncMock(return_value=[fake_fact])
|
||||
mock_fs.return_value.aextract = AsyncMock(return_value=[fake_foresight])
|
||||
|
||||
# Ensure the sqlite dir exists before the engine creates ome.db.
|
||||
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
||||
await _setup_system_db_schema(monkeypatch)
|
||||
|
||||
engine = svc._get_engine()
|
||||
await engine.start()
|
||||
try:
|
||||
await engine.emit(
|
||||
UserPipelineStarted(
|
||||
memcell_id="mc_a",
|
||||
session_id="s1",
|
||||
memcell=_sample_memcell(),
|
||||
)
|
||||
)
|
||||
|
||||
# Poll until both strategies reach SUCCESS (max 5 s).
|
||||
af_rows: list = []
|
||||
fs_rows: list = []
|
||||
for _ in range(50):
|
||||
await asyncio.sleep(0.1)
|
||||
af_rows = await engine.list_runs(
|
||||
"extract_atomic_facts", status=RunStatus.SUCCESS
|
||||
)
|
||||
fs_rows = await engine.list_runs(
|
||||
"extract_foresight", status=RunStatus.SUCCESS
|
||||
)
|
||||
if af_rows and fs_rows:
|
||||
break
|
||||
|
||||
assert af_rows, "expected SUCCESS RunRecord for extract_atomic_facts"
|
||||
assert fs_rows, "expected SUCCESS RunRecord for extract_foresight"
|
||||
assert af_rows[0].strategy_name == "extract_atomic_facts"
|
||||
assert fs_rows[0].strategy_name == "extract_foresight"
|
||||
finally:
|
||||
await engine.stop()
|
||||
await _teardown_system_db_schema()
|
||||
|
||||
af_logs = [r for r in logs if r.get("event") == "atomic_facts_extracted"]
|
||||
fs_logs = [r for r in logs if r.get("event") == "foresights_extracted"]
|
||||
assert af_logs, "expected atomic_facts_extracted log line"
|
||||
assert fs_logs, "expected foresights_extracted log line"
|
||||
# The sample MemCell has 2 user senders (u_alice, u_bob), so each
|
||||
# strategy gathers one result per sender and flattens them:
|
||||
# extract_atomic_facts: 2 senders × 1 fake_fact each = 2
|
||||
# extract_foresight: 2 senders × 1 fake_foresight each = 2
|
||||
assert af_logs[0]["count"] == 2
|
||||
assert fs_logs[0]["count"] == 2
|
||||
|
||||
|
||||
async def _setup_system_db_schema(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Rebuild the sqlite system.db engine + schema against the active tmp_path.
|
||||
|
||||
The ``sqlite_manager`` engine is a process-wide singleton; without
|
||||
resetting it between tests the second e2e would reuse the first
|
||||
test's tmp engine (and miss the table create_all on this test's
|
||||
fresh tmp_path). ``SQLModel.metadata.create_all`` mirrors what
|
||||
:class:`SqliteLifespanProvider` runs at app startup.
|
||||
|
||||
Pair with :func:`_teardown_system_db_schema` in the test's ``finally``
|
||||
block — the engine created here owns an aiosqlite worker thread that
|
||||
must be closed explicitly, or it lingers past the event loop and
|
||||
raises ``RuntimeError: Event loop is closed`` from the worker.
|
||||
"""
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.infra.persistence.sqlite import sqlite_manager
|
||||
|
||||
if sqlite_manager._engine is not None: # noqa: SLF001
|
||||
await sqlite_manager.dispose_engine()
|
||||
monkeypatch.setattr(sqlite_manager, "_engine", None, raising=False)
|
||||
monkeypatch.setattr(sqlite_manager, "_session_factory", None, raising=False)
|
||||
engine = sqlite_manager.get_engine()
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
|
||||
|
||||
async def _teardown_system_db_schema() -> None:
|
||||
"""Dispose the per-test sqlite engine so its worker thread doesn't outlive
|
||||
the event loop (counterpart of :func:`_setup_system_db_schema`)."""
|
||||
from everos.infra.persistence.sqlite import sqlite_manager
|
||||
|
||||
if sqlite_manager._engine is not None: # noqa: SLF001
|
||||
await sqlite_manager.dispose_engine()
|
||||
|
||||
|
||||
def _agent_memcell() -> MemCell:
|
||||
return MemCell(
|
||||
items=[
|
||||
ChatMessage(
|
||||
id="m1",
|
||||
role="user",
|
||||
content="please summarise",
|
||||
timestamp=1_700_000_000_000,
|
||||
sender_id="u_alice",
|
||||
),
|
||||
ChatMessage(
|
||||
id="m2",
|
||||
role="assistant",
|
||||
content="here's the summary",
|
||||
timestamp=1_700_000_001_000,
|
||||
sender_id="agent_42",
|
||||
),
|
||||
],
|
||||
timestamp=1_700_000_001_000,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_emit_dispatches_agent_case_strategy_to_success(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Mirror of the user-side e2e for the agent track.
|
||||
|
||||
Verifies the full agent chain: AgentPipelineStarted emit → dispatcher
|
||||
(3 gates) → APS one-shot job → Runner.run → extract_agent_case body →
|
||||
mark_success. Catches breakage in event class wiring, trigger matching,
|
||||
engine registration, and the agent-side mock plumbing that unit tests
|
||||
bypass by calling the strategy function directly.
|
||||
"""
|
||||
import importlib
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.infra.ome.records import RunStatus
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
|
||||
monkeypatch.setattr(
|
||||
MemoryRoot,
|
||||
"default",
|
||||
classmethod(lambda cls: MemoryRoot(root=tmp_path)),
|
||||
)
|
||||
monkeypatch.setattr(svc, "_ome_engine", None, raising=False)
|
||||
_ac_mod = importlib.import_module("everos.memory.strategies.extract_agent_case")
|
||||
monkeypatch.setattr(_ac_mod, "_writer", None, raising=False)
|
||||
|
||||
fake_case = AgentCase(
|
||||
id=uuid.uuid4().hex,
|
||||
timestamp=1_700_000_001_000,
|
||||
task_intent="summarise the doc",
|
||||
approach="read + condense",
|
||||
quality_score=0.8,
|
||||
key_insight="",
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"everos.memory.strategies.extract_agent_case.AgentCaseExtractor"
|
||||
) as mock_ac,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_agent_case.get_llm_client",
|
||||
return_value=object(),
|
||||
),
|
||||
capture_logs() as logs,
|
||||
):
|
||||
mock_ac.return_value.aextract = AsyncMock(return_value=[fake_case])
|
||||
|
||||
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
||||
await _setup_system_db_schema(monkeypatch)
|
||||
|
||||
engine = svc._get_engine()
|
||||
await engine.start()
|
||||
try:
|
||||
await engine.emit(
|
||||
AgentPipelineStarted(
|
||||
memcell_id="mc_a",
|
||||
session_id="s1",
|
||||
memcell=_agent_memcell(),
|
||||
)
|
||||
)
|
||||
|
||||
ac_rows: list = []
|
||||
for _ in range(50):
|
||||
await asyncio.sleep(0.1)
|
||||
ac_rows = await engine.list_runs(
|
||||
"extract_agent_case", status=RunStatus.SUCCESS
|
||||
)
|
||||
if ac_rows:
|
||||
break
|
||||
|
||||
assert ac_rows, "expected SUCCESS RunRecord for extract_agent_case"
|
||||
assert ac_rows[0].strategy_name == "extract_agent_case"
|
||||
finally:
|
||||
await engine.stop()
|
||||
await _teardown_system_db_schema()
|
||||
|
||||
ac_logs = [r for r in logs if r.get("event") == "agent_case_extracted"]
|
||||
assert ac_logs, "expected agent_case_extracted log line"
|
||||
assert ac_logs[0]["owner_ids"] == ["agent_42"]
|
||||
assert ac_logs[0]["fanout"] == 1
|
||||
assert ac_logs[0]["quality_score"] == 0.8
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skill_chain_e2e(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Chain: AgentCaseExtracted → trigger_skill_clustering (sqlite) →
|
||||
SkillClusterUpdated → extract_agent_skill → SUCCESS.
|
||||
|
||||
Real ``cluster_by_llm`` algorithm path: hash-based deterministic
|
||||
embedder feeds the top-K nearest-neighbor stage, a ``FakeLLMClient``
|
||||
returns ``{"idx": "new"}`` so the algo picks the "brand-new cluster"
|
||||
branch — but the recall + skip-threshold + prompt-render + JSON-parse
|
||||
pipeline is all real. Only mocked: LanceDB reads (case + skill),
|
||||
``AgentSkillExtractor`` (downstream extractor; out of scope), and
|
||||
the markdown writer.
|
||||
"""
|
||||
import importlib
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from everalgo.testing.fake_llm import FakeLLMClient
|
||||
from everalgo.types import AgentSkill as AlgoAgentSkill
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.infra.ome.records import RunStatus
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
skill_mod = importlib.import_module("everos.memory.strategies.extract_agent_skill")
|
||||
|
||||
monkeypatch.setattr(
|
||||
MemoryRoot,
|
||||
"default",
|
||||
classmethod(lambda cls: MemoryRoot(root=tmp_path)),
|
||||
)
|
||||
monkeypatch.setattr(svc, "_ome_engine", None, raising=False)
|
||||
monkeypatch.setattr(skill_mod, "_writer", None, raising=False)
|
||||
|
||||
embedder = _DeterministicHashEmbedder()
|
||||
# FakeLLMClient: cluster_by_llm only invokes it when top-K similarity
|
||||
# falls below llm_skip_threshold (default 0.85). With a single new
|
||||
# cluster in an empty owner set, the recall stage returns no candidates
|
||||
# at all — so the LLM is never asked. Provide a "{idx: new}" response
|
||||
# anyway as belt-and-suspenders for future scenarios with seeded clusters.
|
||||
fake_llm = FakeLLMClient(responses=['{"idx": "new"}'])
|
||||
|
||||
target_lance = MagicMock()
|
||||
target_lance.entry_id = "ac_20260517_0001"
|
||||
target_lance.timestamp = _dt.datetime(2026, 5, 17, tzinfo=_dt.UTC)
|
||||
target_lance.task_intent = "summarise the doc"
|
||||
target_lance.approach = "read + condense"
|
||||
target_lance.quality_score = 0.8
|
||||
target_lance.key_insight = ""
|
||||
|
||||
emitted_skill = AlgoAgentSkill(
|
||||
id=uuid.uuid4().hex,
|
||||
cluster_id="",
|
||||
name="summarise_doc",
|
||||
description="how to summarise docs",
|
||||
content="step 1: read; step 2: condense",
|
||||
confidence=0.7,
|
||||
maturity_score=0.5,
|
||||
source_case_ids=["ac_20260517_0001"],
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"everos.memory.strategies.trigger_skill_clustering.get_embedder",
|
||||
return_value=embedder,
|
||||
),
|
||||
patch(
|
||||
"everos.memory.strategies.trigger_skill_clustering.get_llm_client",
|
||||
return_value=fake_llm,
|
||||
),
|
||||
patch(
|
||||
"everos.memory.strategies.extract_agent_skill.agent_case_repo"
|
||||
) as mock_case_repo,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_agent_skill.agent_skill_repo"
|
||||
) as mock_skill_repo,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_agent_skill.get_llm_client",
|
||||
return_value=object(),
|
||||
),
|
||||
patch(
|
||||
"everos.memory.strategies.extract_agent_skill.AgentSkillExtractor"
|
||||
) as mock_extractor_cls,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_agent_skill.AgentSkillWriter"
|
||||
) as mock_writer_cls,
|
||||
capture_logs() as logs,
|
||||
):
|
||||
mock_case_repo.find_by_owner_entry = AsyncMock(return_value=target_lance)
|
||||
mock_case_repo.find_by_owner_entries = AsyncMock(return_value=[])
|
||||
# Empty cluster (no prior skills) → small-cluster scalar path.
|
||||
mock_skill_repo.count_in_cluster = AsyncMock(return_value=0)
|
||||
mock_skill_repo.find_in_cluster = AsyncMock(return_value=[])
|
||||
mock_extractor_cls.return_value.aextract = AsyncMock(
|
||||
return_value=[emitted_skill]
|
||||
)
|
||||
mock_writer_cls.return_value.write_main = AsyncMock(return_value=None)
|
||||
|
||||
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
||||
await _setup_system_db_schema(monkeypatch)
|
||||
|
||||
engine = svc._get_engine()
|
||||
await engine.start()
|
||||
try:
|
||||
await engine.emit(
|
||||
AgentCaseExtracted(
|
||||
memcell_id="mc_a",
|
||||
case_entry_id="ac_20260517_0001",
|
||||
task_intent="summarise the doc",
|
||||
quality_score=0.8,
|
||||
case_timestamp_ms=1_700_000_001_000,
|
||||
agent_id="agent_42",
|
||||
)
|
||||
)
|
||||
|
||||
clu_rows: list = []
|
||||
skill_rows: list = []
|
||||
for _ in range(50):
|
||||
await asyncio.sleep(0.1)
|
||||
clu_rows = await engine.list_runs(
|
||||
"trigger_skill_clustering", status=RunStatus.SUCCESS
|
||||
)
|
||||
skill_rows = await engine.list_runs(
|
||||
"extract_agent_skill", status=RunStatus.SUCCESS
|
||||
)
|
||||
if clu_rows and skill_rows:
|
||||
break
|
||||
|
||||
assert clu_rows, "expected SUCCESS for trigger_skill_clustering"
|
||||
assert skill_rows, "expected SUCCESS for extract_agent_skill"
|
||||
finally:
|
||||
await engine.stop()
|
||||
await _teardown_system_db_schema()
|
||||
|
||||
cluster_logs = [r for r in logs if r.get("event") == "skill_cluster_updated"]
|
||||
skill_logs = [r for r in logs if r.get("event") == "agent_skills_extracted"]
|
||||
assert cluster_logs, "expected skill_cluster_updated log line"
|
||||
assert skill_logs, "expected agent_skills_extracted log line"
|
||||
# Writer received exactly one SKILL.md write call with cluster_id stamped.
|
||||
write_args = mock_writer_cls.return_value.write_main.call_args
|
||||
fm = write_args.kwargs["frontmatter"]
|
||||
assert fm.cluster_id == cluster_logs[0]["cluster_id"]
|
||||
assert fm.name == "summarise_doc"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_chain_e2e(
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Chain: EpisodeExtracted → trigger_profile_clustering (sqlite) →
|
||||
ProfileClusterUpdated → extract_user_profile → SUCCESS.
|
||||
|
||||
Real ``cluster_by_geometry`` (cosine + time-window) with a hash-based
|
||||
deterministic embedder so the geometry stage operates on well-spread
|
||||
unit vectors. Real ``cluster_repo`` sqlite. ``memcell_repo`` is still
|
||||
mocked (a real memcell row would require the boundary stage to run
|
||||
first; out of scope for the chain emit test). ``ProfileExtractor`` /
|
||||
md reader/writer mocked as algo + IO seams.
|
||||
"""
|
||||
import importlib
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from everalgo.types import Profile as AlgoProfile
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.infra.ome.records import RunStatus
|
||||
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
profile_mod = importlib.import_module(
|
||||
"everos.memory.strategies.extract_user_profile"
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
MemoryRoot,
|
||||
"default",
|
||||
classmethod(lambda cls: MemoryRoot(root=tmp_path)),
|
||||
)
|
||||
monkeypatch.setattr(svc, "_ome_engine", None, raising=False)
|
||||
monkeypatch.setattr(profile_mod, "_writer", None, raising=False)
|
||||
monkeypatch.setattr(profile_mod, "_reader", None, raising=False)
|
||||
|
||||
embedder = _DeterministicHashEmbedder()
|
||||
|
||||
fake_memcell_row = MagicMock()
|
||||
fake_memcell_row.memcell_id = "mc_aaaaaaaaaaa1"
|
||||
fake_memcell_row.payload_json = MemCell(
|
||||
items=[
|
||||
ChatMessage(
|
||||
id="m1",
|
||||
role="user",
|
||||
content="alice likes hiking",
|
||||
timestamp=1_700_000_001_000,
|
||||
sender_id="u_alice",
|
||||
),
|
||||
],
|
||||
timestamp=1_700_000_001_000,
|
||||
).model_dump_json()
|
||||
|
||||
new_profile = AlgoProfile.model_validate(
|
||||
{
|
||||
"owner_id": "u_alice",
|
||||
"summary": "Alice is a hiker.",
|
||||
"timestamp": 1_700_000_001_000,
|
||||
"explicit_info": ["lives in tokyo"],
|
||||
"implicit_traits": [],
|
||||
}
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"everos.memory.strategies.trigger_profile_clustering.get_embedder",
|
||||
return_value=embedder,
|
||||
),
|
||||
patch(
|
||||
"everos.memory.strategies.extract_user_profile.memcell_repo"
|
||||
) as mock_memcell_repo,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_user_profile.ProfileReader"
|
||||
) as mock_reader_cls,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_user_profile.ProfileWriter"
|
||||
) as mock_writer_cls,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_user_profile.ProfileExtractor"
|
||||
) as mock_extractor_cls,
|
||||
patch(
|
||||
"everos.memory.strategies.extract_user_profile.get_llm_client",
|
||||
return_value=object(),
|
||||
),
|
||||
capture_logs() as logs,
|
||||
):
|
||||
mock_memcell_repo.find_by_ids = AsyncMock(return_value=[fake_memcell_row])
|
||||
mock_reader_cls.return_value.read = AsyncMock(return_value=None)
|
||||
mock_writer_cls.return_value.write = AsyncMock(return_value=None)
|
||||
mock_extractor_cls.return_value.aextract = AsyncMock(return_value=new_profile)
|
||||
|
||||
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
|
||||
await _setup_system_db_schema(monkeypatch)
|
||||
|
||||
engine = svc._get_engine()
|
||||
await engine.start()
|
||||
try:
|
||||
await engine.emit(
|
||||
EpisodeExtracted(
|
||||
memcell_id="mc_aaaaaaaaaaa1",
|
||||
episode_entry_id="ep_20260517_0001",
|
||||
episode_text="alice likes hiking",
|
||||
episode_timestamp_ms=1_700_000_001_000,
|
||||
owner_id="u_alice",
|
||||
)
|
||||
)
|
||||
|
||||
clu_rows: list = []
|
||||
prof_rows: list = []
|
||||
for _ in range(50):
|
||||
await asyncio.sleep(0.1)
|
||||
clu_rows = await engine.list_runs(
|
||||
"trigger_profile_clustering", status=RunStatus.SUCCESS
|
||||
)
|
||||
prof_rows = await engine.list_runs(
|
||||
"extract_user_profile", status=RunStatus.SUCCESS
|
||||
)
|
||||
if clu_rows and prof_rows:
|
||||
break
|
||||
|
||||
assert clu_rows, "expected SUCCESS for trigger_profile_clustering"
|
||||
assert prof_rows, "expected SUCCESS for extract_user_profile"
|
||||
finally:
|
||||
await engine.stop()
|
||||
await _teardown_system_db_schema()
|
||||
|
||||
cluster_logs = [r for r in logs if r.get("event") == "profile_cluster_updated"]
|
||||
profile_logs = [r for r in logs if r.get("event") == "user_profile_extracted"]
|
||||
assert cluster_logs, "expected profile_cluster_updated log line"
|
||||
assert profile_logs, "expected user_profile_extracted log line"
|
||||
assert profile_logs[0]["owner_id"] == "u_alice"
|
||||
assert profile_logs[0]["mode"] == "INIT"
|
||||
Reference in New Issue
Block a user