chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

View File

View File

@ -0,0 +1,269 @@
"""Private helpers shared across the search e2e tests.
* :func:`pick_query_seeds` — scans the session corpus's
``.atomic_facts/`` md files and returns a list of
``(owner_id, fact_text)`` tuples to use as deterministic search
queries. Bootstrapping queries off the corpus's own extraction
output gives us a closed-loop correctness signal — what was
written should be findable.
* :func:`assert_recall` — the canonical "this search returned at
least one sensible hit for ``owner``" assertion bundle. Used by
the keyword / vector / hybrid recall tests so the assertion logic
is in one place.
* :func:`flatten_hits` — collapses ``SearchData``'s four arrays into
one ``(owner_id, score, text)`` tuple list for relevance checks.
The helpers do **not** hardcode topical keywords ("hiking" / "work")
— they are derived from what the pipeline produced. This keeps the
suite stable across LLM-driven boundary-cut variance.
"""
from __future__ import annotations
import re
from pathlib import Path
from typing import Any
import httpx
# Cap how many fact strings we sample per call — running every test
# against every fact would blow the LLM rerank budget.
_DEFAULT_SEED_LIMIT = 3
# Tokenise on word characters; lowercase; drop short tokens that carry
# no signal for the "content overlap" check.
_TOKEN_RE = re.compile(r"\w+", re.UNICODE)
_MIN_TOKEN_LEN = 3
_STOPWORDS: frozenset[str] = frozenset(
{
"the",
"and",
"for",
"that",
"with",
"this",
"was",
"has",
"have",
"are",
"but",
"from",
"you",
"she",
"her",
"his",
"him",
"they",
"them",
"their",
}
)
# ── Query seed extraction ───────────────────────────────────────────────
def pick_query_seeds(
memory_root: Path,
*,
limit: int = _DEFAULT_SEED_LIMIT,
) -> list[tuple[str, str]]:
"""Sample ``(owner_id, fact_text)`` tuples from atomic_facts md files.
Walks ``users/<owner>/.atomic_facts/atomic_fact-*.md`` and parses
the ``## Fact\\n<text>`` sections inside each daily-log entry.
Returns deterministic seeds (insertion order of ``rglob`` is
sort-stable thanks to the explicit ``sorted`` call) so a flaky
test surfaces a real regression, not query-rotation variance.
Raises:
AssertionError: if no facts were extracted — that's a fixture
failure, not a test failure, and should fail loudly.
"""
seeds: list[tuple[str, str]] = []
users_dir = memory_root / "default_app" / "default_project" / "users"
if not users_dir.is_dir():
raise AssertionError(f"expected {users_dir} to exist after ingest")
for owner_dir in sorted(users_dir.iterdir()):
if not owner_dir.is_dir():
continue
facts_dir = owner_dir / ".atomic_facts"
if not facts_dir.is_dir():
continue
for md in sorted(facts_dir.rglob("*.md")):
for fact in _extract_fact_sections(md):
if fact:
seeds.append((owner_dir.name, fact))
if len(seeds) >= limit:
return seeds
if not seeds:
raise AssertionError(
f"no atomic_fact md entries under {users_dir} — pipeline did "
"not produce any facts; cannot bootstrap search queries"
)
return seeds
def _extract_fact_sections(md: Path) -> list[str]:
"""Return every ``### Fact`` section body in a daily-log md file.
Daily-log entries are ``## <entry-id>`` blocks; the labelled body
sections inside an entry are h3 (``### Fact``, ``### Foresight``,
…). We scan linearly for ``### Fact`` and collect lines until the
next heading at any level or the end-of-entry marker.
"""
body = md.read_text(encoding="utf-8")
sections: list[str] = []
in_fact = False
buf: list[str] = []
for line in body.splitlines():
stripped = line.lstrip()
if stripped.startswith("### Fact"):
if in_fact:
sections.append("\n".join(buf).strip())
in_fact = True
buf = []
continue
# Any subsequent heading or entry-end marker closes the section.
if in_fact and (stripped.startswith("#") or stripped.startswith("<!-- /entry")):
sections.append("\n".join(buf).strip())
in_fact = False
buf = []
continue
if in_fact:
buf.append(line)
if in_fact:
sections.append("\n".join(buf).strip())
return [s for s in sections if s]
# ── Response flattening + assertions ────────────────────────────────────
def flatten_hits(data: dict[str, Any]) -> list[tuple[str | None, float, str]]:
"""Collapse ``SearchData``'s four arrays into ``(owner_id, score, text)``.
Stable shape across track-kinds so the recall / partition tests
don't have to branch. Episodes / profiles carry ``user_id`` on the
item; cases / skills carry ``agent_id`` — both project to the
generic ``owner`` slot here. ``owner`` may be ``None`` for profile
hits where the owner is implicit.
"""
out: list[tuple[str | None, float, str]] = []
for ep in data.get("episodes", []):
out.append(
(
ep.get("user_id"),
float(ep.get("score") or 0.0),
ep.get("episode") or ep.get("summary") or ep.get("subject") or "",
)
)
for pf in data.get("profiles", []):
out.append(
(
pf.get("user_id"),
float(pf.get("score") or 0.0),
str(pf.get("profile_data") or ""),
)
)
for cs in data.get("agent_cases", []):
out.append(
(
cs.get("agent_id"),
float(cs.get("score") or 0.0),
cs.get("approach") or cs.get("task_intent") or "",
)
)
for sk in data.get("agent_skills", []):
out.append(
(
sk.get("agent_id"),
float(sk.get("score") or 0.0),
sk.get("content") or sk.get("description") or "",
)
)
return out
async def assert_recall(
client: httpx.AsyncClient,
*,
owner_id: str,
query: str,
method: str,
min_score: float = 0.0,
top_k: int = 10,
) -> dict[str, Any]:
"""Hit ``/search`` and lock the four standard recall invariants.
1. **Status** 200 — the route compiled.
2. **Existence** — ``total >= 1`` across the four arrays.
3. **Owner partition** — every non-``None`` ``owner_id`` matches
the queried owner. Profile hits may carry ``None`` so they're
skipped from the check.
4. **Score sanity** — the top-scored hit clears ``min_score``.
Returns the parsed response body so the caller can layer
case-specific assertions on top.
"""
resp = await client.post(
"/api/v1/memory/search",
json={
"user_id": owner_id,
"query": query,
"method": method,
"top_k": top_k,
},
timeout=120.0,
)
assert resp.status_code == 200, resp.text
body = resp.json()
hits = flatten_hits(body["data"])
assert hits, (
f"no hits for owner={owner_id} query={query!r} method={method}"
f"recall is broken"
)
for hit_owner, _score, _text in hits:
if hit_owner is not None:
assert hit_owner == owner_id, (
f"partition leak: got owner={hit_owner!r} when querying {owner_id!r}"
)
top_score = max(score for _o, score, _t in hits)
assert top_score >= min_score, (
f"top hit score {top_score:.3f} < min {min_score} for method={method}"
)
return body
# ── Token utilities (for content-overlap checks) ────────────────────────
def query_tokens(query: str) -> set[str]:
"""Lowercase content tokens worth checking for overlap in hit text."""
return {
t.lower()
for t in _TOKEN_RE.findall(query)
if len(t) >= _MIN_TOKEN_LEN and t.lower() not in _STOPWORDS
}
def content_tokens_in_order(query: str) -> list[str]:
"""Content tokens in original document order, dedup'd by first occurrence.
Used by the keyword test: the project's BM25 tokenizer (jieba) is
Chinese-first and degrades to near-zero recall on single short
English tokens. Multi-token phrases recall well in practice, so
keyword queries are built by concatenating consecutive content
tokens from the source fact rather than sorting alphabetically.
"""
seen: set[str] = set()
out: list[str] = []
for t in _TOKEN_RE.findall(query):
low = t.lower()
if len(t) >= _MIN_TOKEN_LEN and low not in _STOPWORDS and low not in seen:
seen.add(low)
out.append(low)
return out

View File

@ -0,0 +1,83 @@
"""Re-run probes against an existing corpus + regenerate the report.
Reuses everything from :mod:`_run_full_report` except the ingest step —
points at the already-populated ``~/.everos-report-corpus`` and only
re-runs the search probes + report rendering. Useful when the corpus
is already there from a previous run and you just want to refresh the
retrieval section without paying for LLM ingestion again.
"""
from __future__ import annotations
import asyncio
import os
from pathlib import Path
import httpx
from dotenv import load_dotenv
_PROJECT_ROOT = Path(__file__).resolve().parents[3]
load_dotenv(_PROJECT_ROOT / ".env", override=False)
from _run_full_report import ( # noqa: E402
CONVERSATION,
CORPUS_ROOT,
REPORT_PATH,
inspect_artifacts,
render_report,
run_probes,
)
async def main() -> None:
if not (CORPUS_ROOT / "users").is_dir():
raise SystemExit(f"{CORPUS_ROOT} not populated — run _run_full_report.py first")
os.environ["EVEROS_MEMORY__ROOT"] = str(CORPUS_ROOT)
from everos.config import load_settings
load_settings.cache_clear()
print(f"[1/3] using corpus at {CORPUS_ROOT}")
from everos.entrypoints.api.app import create_app
app = create_app()
transport = httpx.ASGITransport(app=app)
async with (
app.router.lifespan_context(app),
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
):
print("[2/3] inspecting artifacts + running probes ...")
artifacts = await inspect_artifacts(CORPUS_ROOT)
probes = await run_probes(client)
print("[3/3] re-rendering report ...")
md = render_report(
memory_root=CORPUS_ROOT,
ingest_summary={
"batches": [
{
"idx": i,
"msg_count": len(b),
"status": "extracted (cached)",
"returned_count": len(b),
}
for i, b in enumerate(CONVERSATION)
],
"flush_status": "extracted (cached)",
},
cascade_summary={
"note": "cascade was force-completed via _rerun_probes.py "
"after initial run; counts below are post-completion."
},
artifacts=artifacts,
probes=probes,
)
REPORT_PATH.write_text(md, encoding="utf-8")
print(f"{REPORT_PATH}")
if __name__ == "__main__":
asyncio.run(main())

View File

@ -0,0 +1,660 @@
"""End-to-end report generator: fresh corpus → ingest → retrieve → markdown report.
Run with::
PYTHONPATH=src python tests/integration/search/_run_full_report.py
Writes a fresh ``~/.everos-report-corpus/`` memory_root, runs a small
synthetic 16-message conversation between two new users (``u_diana`` +
``u_ethan``) through ``/add`` + ``/flush``, waits for cascade drain, then
runs a curated set of search probes and dumps a structured markdown
report to ``tests/integration/search/SEARCH_REPORT.md``.
Not a pytest test — pure investigative script, real LLM, real embedder.
"""
from __future__ import annotations
import asyncio
import json
import os
import shutil
from pathlib import Path
import httpx
from dotenv import load_dotenv
# Load .env BEFORE any everos import so settings are correct.
_PROJECT_ROOT = Path(__file__).resolve().parents[3]
load_dotenv(_PROJECT_ROOT / ".env", override=False)
# ── Corpus location ────────────────────────────────────────────────────
CORPUS_ROOT = Path.home() / ".everos-report-corpus"
REPORT_PATH = _PROJECT_ROOT / "tests/integration/search/SEARCH_REPORT.md"
SESSION_ID = "report_session_diana_ethan"
# ── Synthetic conversation (16 msgs, 2 batches) ────────────────────────
CONVERSATION = [
# Batch 1 — introducing hobbies
[
{
"sender_id": "u_diana",
"role": "user",
"timestamp": 1778414400000,
"content": "Hey Ethan! Just got back from a 3-day hike in Yosemite. "
"My new Sony A7 camera is amazing for landscape shots.",
},
{
"sender_id": "u_ethan",
"role": "user",
"timestamp": 1778407260000,
"content": "Wow that sounds intense! I'd never survive without my "
"espresso. How's the Rust programming learning going?",
},
{
"sender_id": "u_diana",
"role": "user",
"timestamp": 1778407320000,
"content": "Slow but steady. Working through the official book. "
"The borrow checker still trips me up.",
},
{
"sender_id": "u_ethan",
"role": "user",
"timestamp": 1778407380000,
"content": "I'm marathon training — up to 15 miles long runs now. "
"Plus I joined a jazz quartet on weekends.",
},
{
"sender_id": "u_diana",
"role": "user",
"timestamp": 1778407440000,
"content": "That's awesome! Saxophone again?",
},
{
"sender_id": "u_ethan",
"role": "user",
"timestamp": 1778407500000,
"content": "Yeah, alto sax. We're playing at the Blue Note next month.",
},
{
"sender_id": "u_diana",
"role": "user",
"timestamp": 1778407560000,
"content": "I'll come watch! Speaking of trips, want to do "
"that Iceland thing this summer?",
},
{
"sender_id": "u_ethan",
"role": "user",
"timestamp": 1778407620000,
"content": "100% yes. I've been researching ring road photography spots.",
},
],
# Batch 2 — Iceland trip planning
[
{
"sender_id": "u_diana",
"role": "user",
"timestamp": 1778410800000,
"content": "I want to see the Northern Lights and shoot some "
"volcanic landscapes.",
},
{
"sender_id": "u_ethan",
"role": "user",
"timestamp": 1778410860000,
"content": "We should rent a 4x4. The F-roads are insane I hear.",
},
{
"sender_id": "u_diana",
"role": "user",
"timestamp": 1778410920000,
"content": "And I want to try Icelandic lamb stew. You cook, right?",
},
{
"sender_id": "u_ethan",
"role": "user",
"timestamp": 1778410980000,
"content": (
"Yeah, I'll bring my Dutch oven. Maybe a cast iron pan for fish."
),
},
{
"sender_id": "u_diana",
"role": "user",
"timestamp": 1778411040000,
"content": "Perfect. Mid-July works for me — I have a Rust conference "
"in late August.",
},
{
"sender_id": "u_ethan",
"role": "user",
"timestamp": 1778411100000,
"content": "July it is. I have the Boston Marathon qualifier in October "
"so I can't go after.",
},
{
"sender_id": "u_diana",
"role": "user",
"timestamp": 1778411160000,
"content": "Let's book flights this weekend?",
},
{
"sender_id": "u_ethan",
"role": "user",
"timestamp": 1778411220000,
"content": "Deal. Also bringing my Olympus E-M1 for the landscapes.",
},
],
]
# ── Probe set ───────────────────────────────────────────────────────────
PROBES: list[dict] = [
# Owner-specific topical: should recall the right owner's episodes.
{
"section": "Owner-specific topical (diana)",
"owner": "u_diana",
"query": "hiking",
"method": "hybrid",
"expect": "diana's Yosemite episode",
},
{
"section": "Owner-specific topical (diana)",
"owner": "u_diana",
"query": "Rust programming",
"method": "hybrid",
"expect": "diana's Rust learning facts",
},
{
"section": "Owner-specific topical (diana)",
"owner": "u_diana",
"query": "photography",
"method": "hybrid",
"expect": "diana's camera (Sony A7) facts",
},
{
"section": "Owner-specific topical (ethan)",
"owner": "u_ethan",
"query": "jazz",
"method": "hybrid",
"expect": "ethan's jazz quartet / sax facts",
},
{
"section": "Owner-specific topical (ethan)",
"owner": "u_ethan",
"query": "marathon training",
"method": "hybrid",
"expect": "ethan's marathon facts",
},
{
"section": "Owner-specific topical (ethan)",
"owner": "u_ethan",
"query": "cooking",
"method": "hybrid",
"expect": "ethan's Dutch oven / lamb stew facts",
},
# Shared topic — both should recall their own perspective.
{
"section": "Shared topic (Iceland)",
"owner": "u_diana",
"query": "Iceland trip",
"method": "hybrid",
"expect": "diana's planning episode",
},
{
"section": "Shared topic (Iceland)",
"owner": "u_ethan",
"query": "Iceland trip",
"method": "hybrid",
"expect": "ethan's planning episode",
},
# Method comparison on the same query.
{
"section": "Method comparison (diana + 'Rust')",
"owner": "u_diana",
"query": "Rust",
"method": "keyword",
"expect": "BM25 single token",
},
{
"section": "Method comparison (diana + 'Rust')",
"owner": "u_diana",
"query": "Rust",
"method": "vector",
"expect": "cosine ANN",
},
{
"section": "Method comparison (diana + 'Rust')",
"owner": "u_diana",
"query": "Rust",
"method": "hybrid",
"expect": "fusion of BM25 + vector",
},
# Owner partition: diana searching for ethan's exclusive topic.
{
"section": "Owner partition",
"owner": "u_diana",
"query": "jazz quartet",
"method": "hybrid",
"expect": "should NOT leak ethan's content",
},
{
"section": "Owner partition",
"owner": "u_ethan",
"query": "Rust programming",
"method": "hybrid",
"expect": "should NOT leak diana's content",
},
# Phrase + bigram.
{
"section": "Phrase queries",
"owner": "u_diana",
"query": "Northern Lights",
"method": "keyword",
"expect": "diana's Iceland aurora plans",
},
{
"section": "Phrase queries",
"owner": "u_ethan",
"query": "Boston Marathon",
"method": "keyword",
"expect": "ethan's qualifier date",
},
# include_profile.
{
"section": "Profile attach",
"owner": "u_diana",
"query": "anything",
"method": "hybrid",
"include_profile": True,
"expect": "should return diana's profile object",
},
# Unknown owner.
{
"section": "Unknown owner",
"owner": "u_ghost_does_not_exist",
"query": "hiking",
"method": "hybrid",
"expect": "empty arrays, status 200",
},
# Non-existent term.
{
"section": "Non-existent term",
"owner": "u_diana",
"query": "quantum blockchain pizza",
"method": "keyword",
"expect": "0 hits, status 200",
},
]
# ── Pipeline runners ───────────────────────────────────────────────────
async def ingest(client: httpx.AsyncClient) -> dict:
"""POST /add for each batch, then /flush. Return summary."""
summary: dict = {"batches": [], "flush_status": None}
for i, batch in enumerate(CONVERSATION):
resp = await client.post(
"/api/v1/memory/add",
json={"session_id": SESSION_ID, "messages": batch},
timeout=600.0,
)
resp.raise_for_status()
data = resp.json()["data"]
summary["batches"].append(
{
"idx": i,
"msg_count": len(batch),
"status": data["status"],
"returned_count": data["message_count"],
}
)
resp = await client.post(
"/api/v1/memory/flush",
json={"session_id": SESSION_ID},
timeout=600.0,
)
resp.raise_for_status()
summary["flush_status"] = resp.json()["data"]["status"]
return summary
async def wait_cascade(
*,
expected_md_paths: int = 8,
stable_checks: int = 5,
deadline_seconds: float = 600.0,
) -> dict:
"""Block until cascade is *stably* done across all expected md kinds.
A plain ``pending == 0`` check is racy: OME async strategies
(extract_foresight / extract_user_profile) emit md writes
asynchronously after ``/flush`` returns, and there's a window
where the cascade queue is momentarily empty before OME's writes
arrive. We require two stronger conditions:
1. At least ``expected_md_paths`` rows exist in ``md_change_state``
(one per expected (owner × kind) — episodes + atomic_facts +
foresights + user_profile, per owner). This guards against
returning before OME has emitted *anything*.
2. ``pending == 0`` stays true for ``stable_checks`` consecutive
polls (separated by 1s sleep). This guards against a transient
empty queue while a strategy is still mid-write.
"""
from everos.infra.persistence.sqlite import md_change_state_repo
consecutive_zero = 0
async with asyncio.timeout(deadline_seconds):
while True:
sm = await md_change_state_repo.queue_summary()
total_rows = (
sm.pending + sm.done + sm.failed_retryable + sm.failed_permanent
)
if sm.pending == 0 and total_rows >= expected_md_paths:
consecutive_zero += 1
if consecutive_zero >= stable_checks:
return {
"done": sm.done,
"failed_retryable": sm.failed_retryable,
"failed_permanent": sm.failed_permanent,
"max_lsn": sm.max_lsn,
"last_processed_lsn": sm.last_processed_lsn,
}
else:
consecutive_zero = 0
await asyncio.sleep(1.0)
async def inspect_artifacts(memory_root: Path) -> dict:
"""Read md files + LanceDB counts after cascade drain."""
from everos.infra.persistence.lancedb import (
atomic_fact_repo,
dispose_connection,
episode_repo,
foresight_repo,
get_connection,
user_profile_repo,
verify_business_schemas,
)
await get_connection()
await verify_business_schemas()
counts = {
"episode_rows": await episode_repo.count(),
"atomic_fact_rows": await atomic_fact_repo.count(),
"foresight_rows": await foresight_repo.count(),
"user_profile_rows": await user_profile_repo.count(),
}
await dispose_connection()
md_files: list[str] = []
users_dir = memory_root / "default_app" / "default_project" / "users"
if users_dir.is_dir():
for f in sorted(users_dir.rglob("*.md")):
md_files.append(str(f.relative_to(memory_root)))
counts["md_files"] = md_files
return counts
async def run_probes(client: httpx.AsyncClient) -> list[dict]:
"""Execute every probe in :data:`PROBES`; return captured rows."""
rows: list[dict] = []
for p in PROBES:
payload: dict = {
"owner_id": p["owner"],
"owner_type": "user",
"query": p["query"],
"method": p["method"],
"top_k": 5,
}
if p.get("include_profile"):
payload["include_profile"] = True
resp = await client.post("/api/v1/memory/search", json=payload, timeout=120.0)
body = resp.json()
data = body.get("data", {})
rows.append(
{
"section": p["section"],
"expect": p["expect"],
"request": payload,
"status": resp.status_code,
"episodes": [
{
"id": e["id"],
"owner_id": e["owner_id"],
"score": round(float(e["score"]), 3),
"summary": (e.get("summary") or "")[:150],
"atomic_facts_count": len(e.get("atomic_facts", [])),
}
for e in data.get("episodes", [])
],
"profiles": [
{
"owner_id": p_.get("owner_id"),
"score": p_.get("score"),
"summary_excerpt": str(p_.get("profile_data", {}))[:200],
}
for p_ in data.get("profiles", [])
],
}
)
return rows
# ── Markdown report renderer ───────────────────────────────────────────
def render_report(
*,
memory_root: Path,
ingest_summary: dict,
cascade_summary: dict,
artifacts: dict,
probes: list[dict],
) -> str:
lines: list[str] = []
lines.append("# Search E2E Report — fresh corpus (u_diana + u_ethan)\n")
lines.append(
"Generated by [`_run_full_report.py`](_run_full_report.py). "
"Two synthetic users with distinct hobbies feed a 16-message "
"conversation through the full pipeline; the report below "
"captures ingest stats, cascade drain numbers, on-disk "
"artifacts, and the response of every curated search probe.\n"
)
# ── Section: Setup ────────────────────────────────────────────────
lines.append("## 1. Setup\n")
lines.append(f"- **Memory root**: `{memory_root}`\n")
lines.append(f"- **Session id**: `{SESSION_ID}`\n")
lines.append(
"- **Users**: `u_diana` (hiking / Rust / photography), "
"`u_ethan` (jazz / marathon / cooking)\n"
)
lines.append(
f"- **Batches**: {len(CONVERSATION)} "
f"({sum(len(b) for b in CONVERSATION)} messages total)\n"
)
# ── Section: Ingest stats ─────────────────────────────────────────
lines.append("\n## 2. Ingest (`/add` × N + `/flush`)\n")
lines.append("| batch | msg_count | status |\n")
lines.append("|---|---|---|\n")
for b in ingest_summary["batches"]:
lines.append(f"| {b['idx']} | {b['msg_count']} | `{b['status']}` |\n")
lines.append(f"\n**Flush status**: `{ingest_summary['flush_status']}`\n")
# ── Section: Cascade drain ────────────────────────────────────────
lines.append("\n## 3. Cascade drain (md → LanceDB sync)\n")
lines.append("```\n")
lines.append(json.dumps(cascade_summary, indent=2) + "\n")
lines.append("```\n")
# ── Section: Artifacts ────────────────────────────────────────────
lines.append("\n## 4. On-disk artifacts\n")
lines.append("### LanceDB row counts\n\n")
lines.append("| table | rows |\n")
lines.append("|---|---|\n")
for k in (
"episode_rows",
"atomic_fact_rows",
"foresight_rows",
"user_profile_rows",
):
lines.append(f"| {k.replace('_rows', '')} | {artifacts[k]} |\n")
lines.append("\n### Markdown files\n\n")
for f in artifacts["md_files"]:
lines.append(f"- `{f}`\n")
# ── Section: Probes ───────────────────────────────────────────────
lines.append("\n## 5. Retrieval probes\n")
lines.append(
"Every row below is one POST to `/api/v1/memory/search`. "
"`expected` is what the test designer expects to see; "
"actual results are captured verbatim.\n"
)
current_section = None
for row in probes:
if row["section"] != current_section:
lines.append(f"\n### {row['section']}\n")
current_section = row["section"]
req = row["request"]
lines.append(
f"\n#### `{req['query']}` (method=`{req['method']}`, "
f"owner=`{req['owner_id']}`)\n"
)
lines.append(f"\n- **Expected**: {row['expect']}\n")
lines.append(f"- **Status**: {row['status']}\n")
lines.append(f"- **Episodes returned**: {len(row['episodes'])}\n")
if row["episodes"]:
lines.append("\n| rank | score | owner | atomic_facts | summary |\n")
lines.append("|---|---|---|---|---|\n")
for i, ep in enumerate(row["episodes"], 1):
summary = ep["summary"].replace("|", "\\|")
lines.append(
f"| {i} | {ep['score']} | `{ep['owner_id']}` | "
f"{ep['atomic_facts_count']} | {summary} |\n"
)
else:
lines.append("\n_(no episodes)_\n")
if row["profiles"]:
lines.append(
"\n**Profile attached**: "
f"`{row['profiles'][0]['owner_id']}` "
f"(excerpt: {row['profiles'][0]['summary_excerpt']!r})\n"
)
# ── Section: Pass/Fail summary ────────────────────────────────────
lines.append("\n## 6. Pass / Fail summary\n")
pf = _grade(probes)
lines.append("| # | section | query | result |\n")
lines.append("|---|---|---|---|\n")
for r in pf:
lines.append(
f"| {r['idx']} | {r['section']} | `{r['query']}` | {r['verdict']} |\n"
)
passed = sum(1 for r in pf if r["verdict"].startswith(""))
lines.append(f"\n**Total: {passed}/{len(pf)} passed.**\n")
return "".join(lines)
def _grade(probes: list[dict]) -> list[dict]:
"""Apply soft heuristic pass/fail to each probe based on its 'expect'."""
graded: list[dict] = []
for i, row in enumerate(probes, 1):
req = row["request"]
expect = row["expect"].lower()
verdict = ""
if "should not leak" in expect:
leaked = any(ep["owner_id"] != req["owner_id"] for ep in row["episodes"])
verdict = "❌ leaked" if leaked else "✅ no leak"
elif "empty arrays" in expect or "0 hits" in expect:
verdict = "" if not row["episodes"] else f"❌ got {len(row['episodes'])}"
elif "profile" in expect:
verdict = "" if row["profiles"] else "❌ no profile"
elif row["episodes"]:
top_owner = row["episodes"][0]["owner_id"]
verdict = (
"" if top_owner == req["owner_id"] else f"❌ wrong owner: {top_owner}"
)
else:
verdict = "❌ no hits"
graded.append(
{
"idx": i,
"section": row["section"],
"query": req["query"],
"verdict": verdict,
}
)
return graded
# ── Main ────────────────────────────────────────────────────────────────
async def main() -> None:
# Reset corpus to a known empty state.
if CORPUS_ROOT.exists():
shutil.rmtree(CORPUS_ROOT)
CORPUS_ROOT.mkdir(parents=True)
os.environ["EVEROS_MEMORY__ROOT"] = str(CORPUS_ROOT)
# Reset cached singletons so they pick up the new env.
from everos.config import load_settings
load_settings.cache_clear()
print(f"[1/6] fresh corpus at {CORPUS_ROOT}")
from everos.entrypoints.api.app import create_app
app = create_app()
transport = httpx.ASGITransport(app=app)
async with (
app.router.lifespan_context(app),
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
):
print("[2/6] ingesting via /add + /flush ...")
ingest_summary = await ingest(client)
print(f" batches={ingest_summary['batches']}")
print("[3/6] waiting for cascade drain ...")
cascade_summary = await wait_cascade()
print(f" drained: {cascade_summary}")
print("[4/6] inspecting on-disk artifacts ...")
artifacts = await inspect_artifacts(CORPUS_ROOT)
print(
" lancedb: {k: v for k,v in artifacts.items() if k.endswith('_rows')}"
)
print(f"[5/6] running {len(PROBES)} search probes ...")
probes = await run_probes(client)
print("[6/6] rendering report ...")
md = render_report(
memory_root=CORPUS_ROOT,
ingest_summary=ingest_summary,
cascade_summary=cascade_summary,
artifacts=artifacts,
probes=probes,
)
REPORT_PATH.write_text(md, encoding="utf-8")
print(f"{REPORT_PATH}")
if __name__ == "__main__":
asyncio.run(main())

View File

@ -0,0 +1,269 @@
"""Session-scoped corpus fixture for ``tests/integration/search/``.
The pipeline that produces the search corpus (`/add` × 19 + `/flush` +
cascade drain) is the same one exercised by
``tests/integration/test_add_flush_pipeline_e2e.py`` — and it costs
~10 minutes against real LLMs. To keep the search test suite usable
in CI we run that pipeline **once per session** here, persist the
resulting memory_root to a session ``tmp_path``, and let every test
re-attach a fresh FastAPI lifespan against the on-disk corpus.
Layout::
_ingested_memory_root (session-scoped)
└── ingests LoCoMo conv_0 via the HTTP API, then tears
lifespan down. Returns the memory_root path with md +
sqlite + lancedb populated on disk.
search_client (function-scoped)
└── per-test ``httpx.AsyncClient`` wired to a freshly built
FastAPI app, ``EVEROS_MEMORY__ROOT`` pointed at the
session corpus. Singletons are reset so each test starts
with cold caches and the lifespan is the only thing
constructing them.
This is intentionally separate from ``tests/integration/conftest.py``
fixtures (which are function-scoped). Cross-suite isolation: tests
under ``search/`` cannot poison or be poisoned by the ones above.
All tests in this folder are marked ``slow`` via the module-level
``pytestmark`` in ``test_search_e2e.py`` — a non-``-m slow`` run skips
the whole suite cleanly without paying the ingest cost.
"""
from __future__ import annotations
import asyncio
import importlib
import os
from collections.abc import AsyncIterator, Awaitable, Callable, Generator
from pathlib import Path
import httpx
import pytest
import pytest_asyncio
from sqlalchemy import text
# Set ``EVEROS_REUSE_CORPUS=<path>`` to skip ingest and point the
# session fixture at an existing memory_root (md + lancedb already
# populated). Search is a read-only path, so no copy is needed — the
# fixture just sets ``EVEROS_MEMORY__ROOT`` to that directory.
_REUSE_ENV = "EVEROS_REUSE_CORPUS"
# Memorize-service module-level lazy singletons; reset between phases so
# stale clients / engines don't leak from ingest into per-test lifespans.
_MEMORIZE_SINGLETONS: tuple[str, ...] = (
"_episode_writer",
"_prompt_loader",
"_user_pipeline",
"_agent_pipeline",
"_ome_engine",
)
# ── Session-scoped MonkeyPatch ─────────────────────────────────────────
@pytest.fixture(scope="session")
def _session_monkeypatch() -> Generator[pytest.MonkeyPatch, None, None]:
"""A ``MonkeyPatch`` instance with session lifetime.
Pytest's default ``monkeypatch`` is function-scoped. The ingest
fixture below has to set env vars and null singletons before the
lifespan even starts — those changes have to live for the whole
session, so we open our own ``MonkeyPatch`` and undo it at session
end.
"""
mp = pytest.MonkeyPatch()
yield mp
mp.undo()
# ── Singleton reset helper ─────────────────────────────────────────────
def _reset_memorize_singletons(mp: pytest.MonkeyPatch) -> None:
"""Null out memorize/strategy/LLM-client lazy singletons.
Called once before ingest (so the freshly-set ``EVEROS_MEMORY__ROOT``
actually wins) and once per test (so the session corpus's lifespan
sees clean caches).
"""
from everos.config import load_settings
load_settings.cache_clear()
svc = importlib.import_module("everos.service.memorize")
client_mod = importlib.import_module("everos.component.llm.client")
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
for attr in _MEMORIZE_SINGLETONS:
mp.setattr(svc, attr, None, raising=False)
mp.setattr(client_mod, "_llm_client", None, raising=False)
mp.setattr(af_mod, "_writer", None, raising=False)
mp.setattr(fs_mod, "_writer", None, raising=False)
# ── Session corpus: ingest once ────────────────────────────────────────
@pytest.fixture(scope="session")
def _ingested_memory_root(
tmp_path_factory: pytest.TempPathFactory,
_session_monkeypatch: pytest.MonkeyPatch,
long_conversation: dict,
) -> Path:
"""Run /add × 19 + /flush + cascade drain once; return the memory_root.
All on-disk artifacts (md files + sqlite system.db + lancedb
tables) survive lifespan teardown, so per-test fixtures can
re-attach a fresh app against the populated root and exercise
only the read path.
Marked **slow** transitively via ``pytestmark`` in
``test_search_e2e.py`` — without ``-m slow`` the test module is
deselected and this fixture is never instantiated.
"""
reuse = os.environ.get(_REUSE_ENV)
if reuse:
memory_root = Path(reuse).expanduser().resolve()
users_dir = memory_root / "default_app" / "default_project" / "users"
if not users_dir.is_dir():
raise AssertionError(
f"{_REUSE_ENV}={memory_root} has no "
"default_app/default_project/users/ subdir — point it at a "
"fully-ingested memory_root or unset to rebuild from scratch"
)
else:
memory_root = tmp_path_factory.mktemp("search_corpus")
_session_monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(memory_root))
_reset_memorize_singletons(_session_monkeypatch)
if reuse:
# Search is read-only; the corpus is consumed in place, no copy.
return memory_root
# Drive the ingest in its own event loop. The lifespan inside
# ``_ingest`` properly closes LanceDB / SQLite handles on exit so
# the per-test lifespans can re-open them.
asyncio.run(_ingest(memory_root, long_conversation))
return memory_root
async def _ingest(memory_root: Path, long_conversation: dict) -> None:
"""Bring up the app once, push the LoCoMo fixture through /add+/flush."""
from everos.entrypoints.api.app import create_app
app = create_app()
transport = httpx.ASGITransport(app=app)
async with (
app.router.lifespan_context(app),
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
):
session_id = long_conversation["everos_session_id"]
for batch in long_conversation["batches"]:
messages = [
{
"sender_id": m["sender_id"],
"role": m["role"],
"timestamp": m["timestamp"],
"content": m["content"],
}
for m in batch["messages"]
]
resp = await client.post(
"/api/v1/memory/add",
json={"session_id": session_id, "messages": messages},
timeout=600.0,
)
resp.raise_for_status()
resp = await client.post(
"/api/v1/memory/flush",
json={"session_id": session_id},
timeout=600.0,
)
resp.raise_for_status()
await _poll_cascade_drained(deadline_seconds=600.0)
async def _poll_cascade_drained(*, deadline_seconds: float) -> None:
"""Block until ``md_change_state.pending == 0`` or deadline."""
from everos.infra.persistence.sqlite import md_change_state_repo
async with asyncio.timeout(deadline_seconds):
while True:
summary = await md_change_state_repo.queue_summary()
if summary.pending == 0:
return
await asyncio.sleep(0.5)
# ── Per-test client against the session corpus ─────────────────────────
@pytest_asyncio.fixture
async def search_client(
_ingested_memory_root: Path,
monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[httpx.AsyncClient]:
"""Per-test ``AsyncClient`` reading from the session corpus.
Singletons are reset before the lifespan starts so the search
manager builds a fresh embedding / rerank / LLM client per test —
we don't want cross-test client state to mask a regression.
"""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(_ingested_memory_root))
_reset_memorize_singletons(monkeypatch)
# The search service has its own module-level singletons; reset
# those too so re-attach is clean.
search_svc = importlib.import_module("everos.service.search")
for attr in (
"_manager",
"_embedding",
"_reranker",
"_llm_client",
"_embedding_resolved",
"_rerank_resolved",
"_llm_resolved",
):
if hasattr(search_svc, attr):
monkeypatch.setattr(
search_svc,
attr,
None if not attr.endswith("_resolved") else False,
raising=False,
)
from everos.entrypoints.api.app import create_app
app = create_app()
transport = httpx.ASGITransport(app=app)
async with (
app.router.lifespan_context(app),
httpx.AsyncClient(transport=transport, base_url="http://test") as client,
):
yield client
# ── Diagnostic helpers (handy for tests that probe SQLite directly) ───
@pytest.fixture
def memcell_count() -> Callable[[], Awaitable[int]]:
"""Return an async callable: ``await memcell_count() -> int``."""
async def _count() -> int:
from everos.infra.persistence.sqlite import get_engine
engine = get_engine()
async with engine.connect() as conn:
result = await conn.execute(text("SELECT COUNT(*) FROM memcell"))
return int(result.scalar() or 0)
return _count

View File

@ -0,0 +1,241 @@
"""End-to-end ``/api/v1/memory/search`` tests over a real LoCoMo corpus.
Six tests, each pinning one path through :class:`SearchManager`:
============================================ =================================
``test_keyword_recalls_atomic_fact_origin`` keyword (BM25 only)
``test_vector_recalls_atomic_fact_origin`` vector (cosine only)
``test_hybrid_with_profile_returns_profile`` hybrid + ``include_profile``
``test_partition_respects_owner_id`` cross-owner isolation
``test_unknown_owner_returns_empty_200`` empty response, no 500
``test_filter_dsl_compiles_and_excludes`` filters DSL → LanceDB ``where``
============================================ =================================
The corpus is built once by :func:`_ingested_memory_root` (session-
scoped fixture in ``conftest.py``) and shared across all tests. Each
test re-attaches a fresh lifespan via :func:`search_client`, so the
search-manager singletons rebuild from cold per-test — a regression
in the lazy-init path can't hide behind warm state from a prior test.
Bootstrapping: queries are derived from the corpus's own
``atomic_facts`` md files via :func:`pick_query_seeds`, not
hardcoded. Closed-loop correctness — what the pipeline extracted
should be findable by the search side.
Assertions follow the project's "守恒 + 下界 + 形状" convention
(see :func:`_helpers.assert_recall`): no exact ranks, no exact
scores, no exact ids. LLM-driven retrieval is non-deterministic
across runs; brittle assertions cause CI noise, not signal.
"""
from __future__ import annotations
from pathlib import Path
import httpx
import pytest
from ._helpers import (
assert_recall,
flatten_hits,
pick_query_seeds,
)
# Whole module is opt-in — it depends on ``_ingested_memory_root`` which
# spends ~10 min running real LLM + embedder against LoCoMo conv_0.
pytestmark = pytest.mark.slow
# ── 1. Keyword recall ──────────────────────────────────────────────────
async def test_keyword_recalls_atomic_fact_origin(
search_client: httpx.AsyncClient,
_ingested_memory_root: Path,
) -> None:
"""BM25 must recall *some* episode for *some* fact-derived bigram.
The project's tokenizer is jieba (CJK-first); single short
English tokens and proper nouns / all-caps acronyms recall
poorly, but ordinary lowercase content bigrams recall reliably
(verified empirically). So we walk through the first N atomic
facts, pull consecutive lowercase content tokens, and pass the
test as soon as one candidate bigram returns ≥ 1 hit. This
validates the BM25 plumbing without coupling to which specific
fact got sampled — vector + hybrid tests own the strict
closed-loop recall claim.
"""
seeds = pick_query_seeds(_ingested_memory_root, limit=20)
last_query: str | None = None
for owner, fact in seeds:
for query in _candidate_bigrams(fact):
last_query = query
resp = await search_client.post(
"/api/v1/memory/search",
json={
"user_id": owner,
"query": query,
"method": "keyword",
"top_k": 5,
},
timeout=60.0,
)
assert resp.status_code == 200, resp.text
hits = flatten_hits(resp.json()["data"])
if hits:
# Partition still holds even on a successful keyword hit.
for hit_owner, _s, _t in hits:
if hit_owner is not None:
assert hit_owner == owner
return
raise AssertionError(
f"BM25 returned 0 hits across {len(seeds)} fact seeds; "
f"last tried query={last_query!r}"
)
def _candidate_bigrams(fact: str) -> list[str]:
"""Lowercase consecutive content-token bigrams from ``fact``.
Skip tokens that include uppercase letters in the original text
(proper nouns / acronyms — empirically poor BM25 recall under
jieba). Returns at most 5 candidates per fact, in source order.
"""
import re as _re
out: list[str] = []
tokens: list[str] = []
for raw in _re.findall(r"\w+", fact):
if raw.lower() == raw and len(raw) >= 3:
tokens.append(raw)
for i in range(len(tokens) - 1):
out.append(f"{tokens[i]} {tokens[i + 1]}")
if len(out) >= 5:
break
return out
# ── 2. Vector recall ───────────────────────────────────────────────────
async def test_vector_recalls_atomic_fact_origin(
search_client: httpx.AsyncClient,
_ingested_memory_root: Path,
) -> None:
"""Same fact via cosine ANN — independent of BM25 tokenisation."""
owner, fact = pick_query_seeds(_ingested_memory_root, limit=1)[0]
await assert_recall(
search_client,
owner_id=owner,
query=fact,
method="vector",
# Cosine: identical text would score ~1.0; threshold loose
# because the LLM-summarised episode text isn't the verbatim fact.
min_score=0.1,
)
# ── 3. Hybrid + include_profile ────────────────────────────────────────
async def test_hybrid_with_profile_returns_profile(
search_client: httpx.AsyncClient,
_ingested_memory_root: Path,
) -> None:
"""``include_profile=true`` must populate the profiles array."""
owner, fact = pick_query_seeds(_ingested_memory_root, limit=1)[0]
resp = await search_client.post(
"/api/v1/memory/search",
json={
"user_id": owner,
"query": fact,
"method": "hybrid",
"top_k": 5,
"include_profile": True,
},
timeout=120.0,
)
assert resp.status_code == 200, resp.text
data = resp.json()["data"]
assert data["profiles"], "include_profile=true but profiles[] empty"
assert data["profiles"][0]["user_id"] == owner
# ── 4. Owner partition ─────────────────────────────────────────────────
async def test_partition_respects_owner_id(
search_client: httpx.AsyncClient,
_ingested_memory_root: Path,
) -> None:
"""Querying owner=A must not leak owner=B's data, even on shared topics."""
seeds = pick_query_seeds(_ingested_memory_root, limit=2)
owners = {o for o, _ in seeds}
assert len(owners) >= 1, "need at least one owner in the corpus"
target_owner = next(iter(owners))
_, fact = next((o, f) for o, f in seeds if o == target_owner)
body = await assert_recall(
search_client,
owner_id=target_owner,
query=fact,
method="hybrid",
)
# Agent tracks must be empty for user owners.
assert body["data"]["agent_cases"] == []
assert body["data"]["agent_skills"] == []
# ── 5. Unknown owner ───────────────────────────────────────────────────
async def test_unknown_owner_returns_empty_200(
search_client: httpx.AsyncClient,
) -> None:
"""An owner that the corpus never saw → 200 with four empty arrays."""
resp = await search_client.post(
"/api/v1/memory/search",
json={
"user_id": "ghost_user_does_not_exist",
"query": "anything",
"method": "hybrid",
"top_k": 5,
},
timeout=60.0,
)
assert resp.status_code == 200, resp.text
data = resp.json()["data"]
assert data["episodes"] == []
assert data["profiles"] == []
assert data["agent_cases"] == []
assert data["agent_skills"] == []
# ── 6. Filter DSL ──────────────────────────────────────────────────────
async def test_filter_dsl_compiles_and_excludes(
search_client: httpx.AsyncClient,
_ingested_memory_root: Path,
) -> None:
"""Add a ``session_id`` ne-filter, verify the returned hits respect it."""
owner, fact = pick_query_seeds(_ingested_memory_root, limit=1)[0]
bogus_session = "session_that_never_was"
resp = await search_client.post(
"/api/v1/memory/search",
json={
"user_id": owner,
"query": fact,
"method": "keyword",
"top_k": 10,
"filters": {"session_id": {"ne": bogus_session}},
},
timeout=120.0,
)
assert resp.status_code == 200, resp.text
data = resp.json()["data"]
# The filter is satisfied by every real episode (none have the
# bogus id), so the hit count should be ≥ 1 — the filter
# compiled and shipped to LanceDB without breaking recall.
for ep in data["episodes"]:
assert ep["session_id"] != bogus_session

View File

@ -0,0 +1,316 @@
"""Strict md <-> lancedb consistency across all 4 daily-log kinds.
For each registered daily-log kind, seed N entries via the kind's
writer, wait for the cascade to drain, then assert exact equality
between md state and LanceDB state:
* ``frontmatter.entry_count == N``
* number of ``<!-- entry:... -->`` blocks == N
* ``lance_repo.count_rows(md_path=...) == N``
* lance ``entry_id`` set == md ``entry_id`` set
This is the strict counterpart to the loose ``>=`` assertions in
:mod:`test_add_flush_user_pipeline_e2e` (which can't be exact because
LLM output is non-deterministic).
Skill / profile are single-file (not daily-log) kinds and are covered
by the e2e pipeline tests where the OME drives real LLM emissions.
"""
from __future__ import annotations
import asyncio
import dataclasses
import datetime as _dt
from collections.abc import AsyncIterator, Callable, Mapping
from pathlib import Path
from typing import Any
import pytest
from sqlmodel import SQLModel
from everos.component.embedding import EmbeddingProvider
from everos.component.tokenizer import build_tokenizer
from everos.core.persistence import MarkdownReader, MemoryRoot
from everos.infra.persistence.lancedb import (
agent_case_repo,
atomic_fact_repo,
dispose_connection,
ensure_business_indexes,
episode_repo,
foresight_repo,
)
from everos.infra.persistence.lancedb.lancedb_manager import get_table
from everos.infra.persistence.lancedb.tables.agent_case import AgentCase
from everos.infra.persistence.lancedb.tables.atomic_fact import AtomicFact
from everos.infra.persistence.lancedb.tables.episode import Episode
from everos.infra.persistence.lancedb.tables.foresight import Foresight
from everos.infra.persistence.markdown import (
AgentCaseWriter,
AtomicFactWriter,
EpisodeWriter,
ForesightWriter,
)
from everos.infra.persistence.sqlite import (
dispose_engine,
get_engine,
md_change_state_repo,
)
from everos.memory.cascade import CascadeConfig, CascadeOrchestrator
from everos.memory.cascade.registry import KIND_REGISTRY
from tests._consistency_assertions import _daily_log_sha_for_entry
@pytest.fixture(autouse=True)
def _reset_lancedb_write_locks() -> None:
"""ClassVar lock pool reset; see test_repository.py for rationale."""
from everos.core.persistence.lancedb.repository import LanceRepoBase
LanceRepoBase._reset_locks_for_tests()
class _StubEmbedder(EmbeddingProvider):
dim = 1024
async def embed(self, text: str) -> list[float]:
return [0.0] * self.dim
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
return [[0.0] * self.dim for _ in texts]
@pytest.fixture
async def cascade_runtime(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> AsyncIterator[MemoryRoot]:
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
await dispose_connection()
await dispose_engine()
engine = get_engine()
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
await ensure_business_indexes()
yield MemoryRoot.default()
await dispose_connection()
await dispose_engine()
@dataclasses.dataclass(frozen=True)
class _DailyLogKindCase:
"""A single registered daily-log kind, packaged for parametrization."""
name: str
scope: str # "users" | "agents"
dir_name: str
file_prefix: str
writer_factory: Callable[[MemoryRoot], Any]
repo: Any
table_cls: type
build_item: Callable[[str, int], tuple[Mapping[str, object], Mapping[str, str]]]
def _af_item(scope_id: str, j: int):
return (
{
"owner_id": scope_id,
"session_id": f"s_{j}",
"timestamp": "2026-05-19T07:04:26+00:00",
"parent_id": f"mc_{j}",
"sender_ids": [scope_id],
},
{"Fact": f"af fact body {j}"},
)
def _ep_item(scope_id: str, j: int):
return (
{
"owner_id": scope_id,
"session_id": f"s_{j}",
"timestamp": "2026-05-19T07:04:26+00:00",
"parent_id": f"mc_{j}",
"sender_ids": [scope_id],
},
{"Subject": f"subj {j}", "Summary": f"sum {j}", "Content": f"content {j}"},
)
def _fs_item(scope_id: str, j: int):
return (
{
"owner_id": scope_id,
"session_id": f"s_{j}",
"timestamp": "2026-05-19T07:04:26+00:00",
"parent_id": f"mc_{j}",
"sender_ids": [scope_id],
},
{"Foresight": f"foresight body {j}"},
)
def _ac_item(scope_id: str, j: int):
return (
{
"owner_id": scope_id,
"session_id": f"s_{j}",
"timestamp": "2026-05-19T07:04:26+00:00",
"parent_id": f"mc_{j}",
"quality_score": 0.9,
},
{
"TaskIntent": f"task intent {j}",
"Approach": f"approach {j}",
"KeyInsight": f"insight {j}",
},
)
_KIND_CASES: list[_DailyLogKindCase] = [
_DailyLogKindCase(
name="atomic_fact",
scope="users",
dir_name=".atomic_facts",
file_prefix="atomic_fact",
writer_factory=AtomicFactWriter,
repo=atomic_fact_repo,
table_cls=AtomicFact,
build_item=_af_item,
),
_DailyLogKindCase(
name="episode",
scope="users",
dir_name="episodes",
file_prefix="episode",
writer_factory=EpisodeWriter,
repo=episode_repo,
table_cls=Episode,
build_item=_ep_item,
),
_DailyLogKindCase(
name="foresight",
scope="users",
dir_name=".foresights",
file_prefix="foresight",
writer_factory=ForesightWriter,
repo=foresight_repo,
table_cls=Foresight,
build_item=_fs_item,
),
_DailyLogKindCase(
name="agent_case",
scope="agents",
dir_name=".cases",
file_prefix="agent_case",
writer_factory=AgentCaseWriter,
repo=agent_case_repo,
table_cls=AgentCase,
build_item=_ac_item,
),
]
async def _wait_path_done(md_path: str, *, deadline: float = 15.0) -> None:
async with asyncio.timeout(deadline):
while True: # noqa: ASYNC110 - polling cascade state
row = await md_change_state_repo.get_by_id(md_path)
if row is not None:
break
await asyncio.sleep(0.05)
while True: # noqa: ASYNC110 - polling cascade state
row = await md_change_state_repo.get_by_id(md_path)
if row is not None and row.status in ("done", "failed"):
break
await asyncio.sleep(0.05)
await asyncio.sleep(0.1)
@pytest.mark.parametrize("case", _KIND_CASES, ids=lambda c: c.name)
async def test_md_lance_strict_consistency_per_kind(
cascade_runtime: MemoryRoot,
case: _DailyLogKindCase,
) -> None:
"""Per-kind strict equality: md entries / frontmatter / lance rows all == N."""
memory_root = cascade_runtime
orchestrator = CascadeOrchestrator(
memory_root=memory_root,
embedder=_StubEmbedder(),
tokenizer=build_tokenizer(),
config=CascadeConfig(
scan_interval_seconds=60.0,
worker_batch_size=20,
worker_max_retry=1,
worker_poll_interval_seconds=0.05,
worker_retry_backoff_seconds=0.0,
),
)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = case.writer_factory(root=memory_root)
scope_id = f"sid_{case.name}"
bucket = _dt.date(2026, 5, 19)
n = 5
items = [case.build_item(scope_id, j) for j in range(n)]
eids = await writer.append_entries(scope_id, items, date=bucket)
assert len(eids) == n, f"writer returned {len(eids)} eids, expected {n}"
md_path = (
f"default_app/default_project/{case.scope}/{scope_id}/{case.dir_name}/"
f"{case.file_prefix}-{bucket.isoformat()}.md"
)
absolute = memory_root.root / md_path
await _wait_path_done(md_path)
# 1) frontmatter.entry_count == N
parsed = await MarkdownReader.read(absolute)
assert parsed.frontmatter.get("entry_count") == n, (
f"{case.name}: frontmatter.entry_count="
f"{parsed.frontmatter.get('entry_count')}, expected {n}"
)
# 2) md entry blocks == N
assert len(parsed.entries) == n, (
f"{case.name}: md has {len(parsed.entries)} entry blocks, expected {n}"
)
# 3) lance count_rows(md_path) == N (strict equality)
table = await get_table(case.table_cls.TABLE_NAME, case.table_cls)
lance_count = await table.count_rows(filter=f"md_path = '{md_path}'")
assert lance_count == n, (
f"{case.name}: md={n} lance={lance_count} for {md_path}"
)
# 4) lance entry_id set == md entry_id set
lance_rows = await case.repo.find_where(f"md_path = '{md_path}'", limit=100)
lance_eids = {r.entry_id for r in lance_rows}
md_eids = {e.id for e in parsed.entries}
assert lance_eids == md_eids, (
f"{case.name}: lance eids {lance_eids} != md eids {md_eids}"
)
# 4b) lance content_sha256 per entry == md-recomputed content_sha256
# Catches "id present but content mismatched" — orthogonal to (4).
handler_cls = next(
spec.handler_factory for spec in KIND_REGISTRY if spec.name == case.name
)
md_sha_by_id = {
e.id: _daily_log_sha_for_entry(handler_cls, e.as_structured())
for e in parsed.entries
}
lance_sha_by_id = {r.entry_id: r.content_sha256 for r in lance_rows}
assert md_sha_by_id == lance_sha_by_id, (
f"{case.name}: per-entry content_sha256 mismatch "
f"@ {md_path}: md={md_sha_by_id} lance={lance_sha_by_id}"
)
# 5) row state row is terminally done (not failed)
state_row = await md_change_state_repo.get_by_id(md_path)
assert state_row is not None and state_row.status == "done", (
f"{case.name}: state row status={state_row.status if state_row else 'NONE'}"
)
finally:
await orchestrator.stop()

View File

@ -0,0 +1,196 @@
"""Integration test for ``everos cascade`` CLI commands.
Drives the actual Typer commands against a real sqlite + lancedb under a
tmp memory root. Validates the in-process orchestration that
``test_cascade_command`` (unit) cannot reach: ``_runtime()`` context,
queue summary formatting, fix (no-rows path), and a full
``cascade sync <path>`` round-trip with a stub embedder.
The CLI commands call ``asyncio.run(_run())`` internally, so this test
is **synchronous** — pytest-asyncio's auto mode would otherwise wrap it
in an event loop, which collides with the CLI's own loop.
"""
from __future__ import annotations
import asyncio
import datetime as _dt
import re
from collections.abc import Iterator
from pathlib import Path
import pytest
from typer.testing import CliRunner
from everos.component.embedding import EmbeddingProvider
from everos.config import load_settings
from everos.entrypoints.cli.commands import cascade as cascade_mod
from everos.infra.persistence.lancedb import dispose_connection
from everos.infra.persistence.sqlite import dispose_engine
class _StubEmbedder(EmbeddingProvider):
dim = 1024
async def embed(self, text: str) -> list[float]:
return [0.0] * self.dim
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
return [[0.0] * self.dim for _ in texts]
@pytest.fixture
def cli_runtime(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Iterator[Path]:
"""Tmp memory root + clean singletons; CLI bootstraps the schema itself."""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
load_settings.cache_clear()
# Strip any singleton state from a neighbouring test.
asyncio.run(_dispose_all())
yield tmp_path
asyncio.run(_dispose_all())
async def _dispose_all() -> None:
await dispose_connection()
await dispose_engine()
def test_status_on_empty_queue(cli_runtime: Path) -> None:
"""``cascade status`` boots the runtime + prints zeros for a fresh DB."""
result = CliRunner().invoke(cascade_mod.app, ["status"])
assert result.exit_code == 0, result.stdout
assert "queue:" in result.stdout
assert "pending:" in result.stdout
# Fresh DB: every counter is zero.
assert "0" in result.stdout
assert "lsn:" in result.stdout
def test_fix_with_no_failed_rows(cli_runtime: Path) -> None:
"""``cascade fix`` (no ``--apply``) prints the empty-state message."""
result = CliRunner().invoke(cascade_mod.app, ["fix"])
assert result.exit_code == 0, result.stdout
assert "no failed rows" in result.stdout
def test_fix_apply_with_no_failed_rows(cli_runtime: Path) -> None:
"""``cascade fix --apply`` is a noop when there's nothing to fix."""
result = CliRunner().invoke(cascade_mod.app, ["fix", "--apply"])
assert result.exit_code == 0, result.stdout
assert "no failed rows" in result.stdout
def test_sync_on_empty_queue_with_stub_embedder(
cli_runtime: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""``cascade sync`` invokes orchestrator.drain even on empty queue."""
# CLI builds the embedder via build_embedding_provider() which would
# try to connect; replace the orchestrator builder with one wired to
# the stub embedder.
from everos.component.tokenizer import build_tokenizer
from everos.core.persistence import MemoryRoot
from everos.memory.cascade import CascadeOrchestrator
def fake_build_orchestrator() -> CascadeOrchestrator:
root = MemoryRoot.default()
root.ensure()
return CascadeOrchestrator(
memory_root=root,
embedder=_StubEmbedder(),
tokenizer=build_tokenizer(),
)
monkeypatch.setattr(cascade_mod, "_build_orchestrator", fake_build_orchestrator)
result = CliRunner().invoke(cascade_mod.app, ["sync"])
assert result.exit_code == 0, result.stdout
assert "sync complete" in result.stdout
assert "processed 0 row(s)" in result.stdout
def test_sync_with_path_outside_root_errors(
cli_runtime: Path, tmp_path_factory: pytest.TempPathFactory
) -> None:
"""``cascade sync <path>`` rejects paths outside the memory root."""
other = tmp_path_factory.mktemp("other") / "x.md"
other.write_text("# unrelated\n")
result = CliRunner().invoke(cascade_mod.app, ["sync", str(other)])
assert result.exit_code != 0
# Typer.BadParameter surfaces in stderr / mixed output. The rich
# error box wraps the message at terminal width and pads each line
# with ``│`` (U+2502 box-drawing); so ``not under`` and
# ``memory root`` end up separated by spaces *plus* box characters
# *plus* a newline. ``\s`` doesn't match ``│``, so widen to
# ``[^\w]+`` (anything that isn't an alnum / underscore) — that
# tolerates the rich frame without falsely matching real text
# between the two tokens.
output = result.stdout + (result.stderr or "")
assert re.search(r"not under[^\w]+memory root", output), output
def test_sync_with_unmatched_path(
cli_runtime: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""A path under the root but matching no cascade kind exits 1 with a hint."""
from everos.component.tokenizer import build_tokenizer
from everos.core.persistence import MemoryRoot
from everos.memory.cascade import CascadeOrchestrator
def fake_build_orchestrator() -> CascadeOrchestrator:
return CascadeOrchestrator(
memory_root=MemoryRoot.default(),
embedder=_StubEmbedder(),
tokenizer=build_tokenizer(),
)
monkeypatch.setattr(cascade_mod, "_build_orchestrator", fake_build_orchestrator)
# File under the root but in an unregistered subdirectory.
unregistered = cli_runtime / "stuff" / "random.md"
unregistered.parent.mkdir(parents=True, exist_ok=True)
unregistered.write_text("# random\n")
result = CliRunner().invoke(cascade_mod.app, ["sync", str(unregistered)])
assert result.exit_code == 1
# stderr in CliRunner is merged into stdout for typer.echo(..., err=True).
output = result.stdout + (result.stderr or "")
assert "does not match any registered cascade kind" in output
# Keep a baseline so future regressions show as a hard failure.
def test_status_handles_pending_rows(cli_runtime: Path) -> None:
"""Seed one pending row via the repo before invoking status."""
async def seed() -> None:
# Bring the runtime up like the CLI does, seed, then dispose.
async with cascade_mod._runtime():
from everos.infra.persistence.sqlite import md_change_state_repo
await md_change_state_repo.force_enqueue(
"users/u1/episodes/episode-2026-01-01.md", "episode"
)
asyncio.run(seed())
result = CliRunner().invoke(cascade_mod.app, ["status"])
assert result.exit_code == 0, result.stdout
# One row pending; LSN must be ≥ 1.
assert "pending: 1" in result.stdout
# Reduce false negatives on date drift.
def test_resolve_relative_via_command_arg(cli_runtime: Path) -> None:
"""An absolute path under the root works through ``cascade sync <path>``."""
md_file = cli_runtime / "users" / "u1" / "episodes" / "episode-2026-05-25.md"
md_file.parent.mkdir(parents=True, exist_ok=True)
today = _dt.date.today().isoformat() # only used so the var isn't unused
md_file.write_text(f"# {today}\n")
# We don't need the orchestrator to actually drain anything; pass --help
# against the sync subcommand to verify the path resolution helper
# doesn't barf at construction time.
result = CliRunner().invoke(cascade_mod.app, ["sync", "--help"])
assert result.exit_code == 0

View File

@ -0,0 +1,193 @@
"""Repro: high-frequency atomic-replace bursts vs. cascade drain.
Drives N successive ``AtomicFactWriter.append_entries`` calls against the
same daily-log md, simulating multiple OME memcells landing in the same
owner+day bucket within a few ms of each other.
Before the watcher.on_deleted stat-guard, macOS FSEvents emits a paired
(moved, deleted) per ``os.replace`` and the synthetic deletion can
become the final ``change_type`` of the row — driving the worker into
``handle_deleted`` and wiping LanceDB while md is intact. Repeat the
test ~20x to surface the race if it ever resurfaces.
Scanner interval is held at 60s so the watcher path is the only thing
exercised (a scanner sweep would mask a watcher bug).
"""
from __future__ import annotations
import asyncio
import datetime as _dt
from collections.abc import AsyncIterator
from pathlib import Path
import anyio
import pytest
from sqlmodel import SQLModel
from everos.component.embedding import EmbeddingProvider
from everos.component.tokenizer import build_tokenizer
from everos.core.persistence import MarkdownReader, MemoryRoot
from everos.infra.persistence.lancedb import (
dispose_connection,
ensure_business_indexes,
)
from everos.infra.persistence.lancedb.lancedb_manager import get_table
from everos.infra.persistence.lancedb.tables.atomic_fact import AtomicFact
from everos.infra.persistence.markdown import AtomicFactWriter
from everos.infra.persistence.sqlite import (
dispose_engine,
get_engine,
md_change_state_repo,
)
from everos.memory.cascade import CascadeConfig, CascadeOrchestrator
@pytest.fixture(autouse=True)
def _reset_lancedb_write_locks() -> None:
"""Drop the per-table write-lock pool between tests; mirrors the
unit-test fixture in test_repository.py. Without this, the second
test in this module hits "Lock bound to a different event loop"
because LanceRepoBase stashes locks in a ClassVar dict."""
from everos.core.persistence.lancedb.repository import LanceRepoBase
LanceRepoBase._reset_locks_for_tests()
class _StubEmbedder(EmbeddingProvider):
dim = 1024
async def embed(self, text: str) -> list[float]:
return [0.0] * self.dim
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
return [[0.0] * self.dim for _ in texts]
@pytest.fixture
async def cascade_runtime(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> AsyncIterator[MemoryRoot]:
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
await dispose_connection()
await dispose_engine()
engine = get_engine()
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
await ensure_business_indexes()
yield MemoryRoot.default()
await dispose_connection()
await dispose_engine()
async def _wait_drain(deadline: float = 15.0) -> None:
async with asyncio.timeout(deadline):
while True:
summary = await md_change_state_repo.queue_summary()
if summary.pending == 0:
return
await asyncio.sleep(0.05)
async def _count_lance_rows(md_path: str) -> int:
table = await get_table(AtomicFact.TABLE_NAME, AtomicFact)
return await table.count_rows(filter=f"md_path = '{md_path}'")
async def _count_md_entries(absolute: Path) -> int:
if not await anyio.Path(absolute).is_file():
return 0
parsed = await MarkdownReader.read(absolute)
return len(parsed.entries)
@pytest.mark.parametrize(
"n_calls,items_per_call,inter_call_sleep_ms",
[
(20, 1, 0.0),
(20, 1, 1.0),
(20, 3, 0.0),
(10, 3, 5.0),
],
)
async def test_high_freq_atomic_fact_append_no_loss(
cascade_runtime: MemoryRoot,
n_calls: int,
items_per_call: int,
inter_call_sleep_ms: float,
) -> None:
memory_root = cascade_runtime
orchestrator = CascadeOrchestrator(
memory_root=memory_root,
embedder=_StubEmbedder(),
tokenizer=build_tokenizer(),
config=CascadeConfig(
scan_interval_seconds=60.0,
worker_batch_size=20,
worker_max_retry=1,
worker_poll_interval_seconds=0.05,
worker_retry_backoff_seconds=0.0,
),
)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = AtomicFactWriter(root=memory_root)
bucket = _dt.date(2026, 5, 19)
owner_id = "bob"
total = 0
for i in range(n_calls):
items = [
(
{
"owner_id": owner_id,
"session_id": f"s_{i}_{j}",
"timestamp": "2026-05-19T07:04:26+00:00",
"parent_id": f"mc_{i}",
"sender_ids": [owner_id],
},
{"Fact": f"fact body call={i} item={j}"},
)
for j in range(items_per_call)
]
await writer.append_entries(owner_id, items, date=bucket)
total += items_per_call
if inter_call_sleep_ms > 0:
await asyncio.sleep(inter_call_sleep_ms / 1000.0)
await _wait_drain(deadline=15.0)
# FSEvents has ~30-100ms kernel-to-userspace delivery latency,
# so the watcher's `on_*` callbacks for the LAST few
# os.replace() bursts may arrive AFTER sqlite first reads
# `pending == 0`. Absorb that tail: settle 500ms, then drain
# again until truly quiescent.
await asyncio.sleep(0.5)
await _wait_drain(deadline=15.0)
md_path = (
f"default_app/default_project/users/{owner_id}/.atomic_facts/"
f"atomic_fact-{bucket.isoformat()}.md"
)
absolute = memory_root.root / md_path
md_entries = await _count_md_entries(absolute)
lance_rows = await _count_lance_rows(md_path)
state_row = await md_change_state_repo.get_by_id(md_path)
assert md_entries == total, (
f"writer self-check failed: total={total} md={md_entries}"
)
assert lance_rows == md_entries, (
f"CASCADE LOSS: md={md_entries} lance={lance_rows} "
f"state={state_row.status if state_row else 'NONE'} "
f"lsn={state_row.lsn if state_row else None}"
)
finally:
await orchestrator.stop()

View File

@ -0,0 +1,242 @@
"""End-to-end cascade flow.
Drives the full pipeline once with real components except the embedder
(stubbed so the test never hits an external API):
EpisodeWriter.append_entry ─▶ md file on disk
watchdog FSEvents thread ─▶ CascadeWatcher._enqueue_async
md_change_state.upsert ─▶ pending row
CascadeWorker.drain_once ─▶ EpisodeHandler.handle_added_or_modified
episode_repo.upsert ─▶ LanceDB row
Asserts the row landed with the right shape (md_path, content_sha256,
episode tokens, vector dim). Validates that the three loops actually
talk to each other — no unit test covers the cross-loop wiring.
"""
from __future__ import annotations
import asyncio
import datetime as _dt
from collections.abc import AsyncIterator
from pathlib import Path
import pytest
from sqlmodel import SQLModel
from everos.component.embedding import EmbeddingProvider
from everos.component.tokenizer import build_tokenizer
from everos.core.persistence import MemoryRoot
from everos.infra.persistence.lancedb import (
dispose_connection,
ensure_business_indexes,
episode_repo,
)
from everos.infra.persistence.markdown import EpisodeWriter
from everos.infra.persistence.sqlite import (
dispose_engine,
get_engine,
md_change_state_repo,
)
from everos.memory.cascade import CascadeConfig, CascadeOrchestrator
class _StubEmbedder(EmbeddingProvider):
"""1024-dim deterministic vector; counts calls for the assertion."""
dim = 1024
def __init__(self) -> None:
self.calls = 0
async def embed(self, text: str) -> list[float]:
self.calls += 1
return [0.0] * self.dim
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
return [await self.embed(t) for t in texts]
@pytest.fixture
async def cascade_runtime(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> AsyncIterator[MemoryRoot]:
"""Boot sqlite + lancedb against a tmp memory_root; dispose at teardown.
Cascade uses module-level singletons; we reset them up-front to
guarantee no state leaks in from neighbouring tests, then dispose
on the way out so the next test sees a clean slate.
"""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
# Embedding settings are required for the lifespan factory; the
# stub bypasses real network, but the orchestrator still expects
# the env to be valid-looking.
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
await dispose_connection()
await dispose_engine()
engine = get_engine()
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
await ensure_business_indexes()
yield MemoryRoot.default()
await dispose_connection()
await dispose_engine()
async def _poll(condition, *, deadline_seconds: float = 10.0, interval: float = 0.05): # type: ignore[no-untyped-def]
"""Poll ``condition()`` (async) until truthy, or :class:`TimeoutError`.
Wraps the loop in :func:`asyncio.timeout` so the test surfaces a
clean ``TimeoutError`` instead of silently spinning. The polling
interval is a low-cost sleep; the deadline is the hard cap.
"""
async with asyncio.timeout(deadline_seconds):
while True:
result = await condition()
if result:
return result
await asyncio.sleep(interval)
async def test_append_to_md_propagates_to_lancedb(
cascade_runtime: MemoryRoot,
) -> None:
"""Happy path: writer append → watcher → state row → worker → LanceDB."""
memory_root = cascade_runtime
embedder = _StubEmbedder()
orchestrator = CascadeOrchestrator(
memory_root=memory_root,
embedder=embedder,
tokenizer=build_tokenizer(),
# Tight worker poll so the test wraps in seconds, not minutes.
# Scanner interval kept long so the watcher path is the one
# actually exercised (the scanner would mask a watcher bug).
config=CascadeConfig(
scan_interval_seconds=60.0,
worker_batch_size=10,
worker_max_retry=1,
worker_poll_interval_seconds=0.05,
worker_retry_backoff_seconds=0.0,
),
)
await orchestrator.start()
# Give the watchdog Observer thread a beat to actually subscribe;
# this is the watchdog API gap (start() returns before the kqueue
# / FSEvents subscription is live on macOS).
await asyncio.sleep(0.3)
try:
writer = EpisodeWriter(memory_root)
today = _dt.date(2026, 5, 14)
eid = await writer.append_entry(
"u_integration",
inline={
"owner_id": "u_integration",
"session_id": "s_int",
"timestamp": "2026-05-14T10:00:00+00:00",
"parent_id": "mc_integration_parent",
"sender_ids": ["u_integration"],
},
sections={
"Subject": "Test",
"Summary": "Stub",
"Content": "the user mentioned dark mode preference",
},
date=today,
)
md_path = (
"default_app/default_project/users/u_integration/episodes/"
"episode-2026-05-14.md"
)
# 1. Watcher enqueues the path.
async def _state_appeared(): # type: ignore[no-untyped-def]
return await md_change_state_repo.get_by_id(md_path)
row = await _poll(_state_appeared, deadline_seconds=5.0)
assert row.kind == "episode"
# 2. Worker drives it to done.
async def _state_done(): # type: ignore[no-untyped-def]
r = await md_change_state_repo.get_by_id(md_path)
return r if (r is not None and r.status == "done") else None
done_row = await _poll(_state_done, deadline_seconds=10.0)
assert done_row.error is None
# 3. LanceDB carries the typed episode row.
episode_id = f"u_integration_{eid.format()}"
ep_row = await episode_repo.get_by_id(episode_id)
assert ep_row is not None
assert ep_row.episode == "the user mentioned dark mode preference"
assert ep_row.episode_tokens # tokenizer ran
assert ep_row.md_path == md_path
assert ep_row.parent_id == "mc_integration_parent"
assert ep_row.content_sha256
assert len(ep_row.vector) == 1024
assert embedder.calls >= 1
finally:
await orchestrator.stop()
async def test_delete_md_wipes_lancedb_row(
cascade_runtime: MemoryRoot,
) -> None:
"""Append + drain, then ``unlink`` the md and watch the row evaporate."""
memory_root = cascade_runtime
orchestrator = CascadeOrchestrator(
memory_root=memory_root,
embedder=_StubEmbedder(),
tokenizer=build_tokenizer(),
config=CascadeConfig(
scan_interval_seconds=60.0,
worker_batch_size=10,
worker_max_retry=1,
worker_poll_interval_seconds=0.05,
worker_retry_backoff_seconds=0.0,
),
)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = EpisodeWriter(memory_root)
today = _dt.date(2026, 5, 14)
eid = await writer.append_entry(
"u_del",
inline={
"owner_id": "u_del",
"session_id": "s",
"timestamp": "2026-05-14T10:00:00+00:00",
"parent_id": "mc_del_parent",
"sender_ids": ["u_del"],
},
sections={"Content": "to be removed"},
date=today,
)
md_path = (
"default_app/default_project/users/u_del/episodes/episode-2026-05-14.md"
)
absolute = memory_root.root / md_path
async def _ep_present(): # type: ignore[no-untyped-def]
return await episode_repo.get_by_id(f"u_del_{eid.format()}")
await _poll(_ep_present, deadline_seconds=10.0)
# Now remove the file; the watcher's on_deleted should fire.
absolute.unlink()
async def _ep_gone(): # type: ignore[no-untyped-def]
row = await episode_repo.get_by_id(f"u_del_{eid.format()}")
return row is None
assert await _poll(_ep_gone, deadline_seconds=10.0)
finally:
await orchestrator.stop()

View File

@ -0,0 +1,701 @@
"""End-to-end cascade scenarios beyond the happy-path append.
Each test boots the full cascade (writer → watchdog → md_change_state →
worker → LanceDB) against a tmp memory_root and asserts md/LanceDB
convergence after a specific perturbation. Scanner interval is held
at 60s here so the watcher path is the one being exercised — the
scanner-fallback variants live in :mod:`test_cascade_scanner_fallback`.
Coverage targets
----------------
* Rename: in-bucket / out-of-glob / cross-owner ``mv`` of a real md
file (not the atomic-replace one — that one's covered by
:mod:`test_cascade_fsevents_repro`).
* Content edits: re-writing an existing entry's body must flip
``content_sha256`` and trigger LanceDB re-upsert (not skip).
* Isolation: concurrent writes to N different owners must not bleed
across each other's md_paths in LanceDB.
* Lap race: ``writer.append`` calls overlapping a worker's
in-flight handler must all converge once drained, no entries lost.
"""
from __future__ import annotations
import asyncio
import datetime as _dt
import shutil
from collections.abc import AsyncIterator
from pathlib import Path
import anyio
import pytest
from sqlmodel import SQLModel
from everos.component.embedding import EmbeddingProvider
from everos.component.tokenizer import build_tokenizer
from everos.core.persistence import MarkdownReader, MarkdownWriter, MemoryRoot
from everos.infra.persistence.lancedb import (
atomic_fact_repo,
dispose_connection,
ensure_business_indexes,
)
from everos.infra.persistence.lancedb.lancedb_manager import get_table
from everos.infra.persistence.lancedb.tables.atomic_fact import AtomicFact
from everos.infra.persistence.markdown import AtomicFactWriter
from everos.infra.persistence.sqlite import (
dispose_engine,
get_engine,
md_change_state_repo,
)
from everos.memory.cascade import CascadeConfig, CascadeOrchestrator
@pytest.fixture(autouse=True)
def _reset_lancedb_write_locks() -> None:
"""Drop the per-table write-lock pool between tests.
``LanceRepoBase`` stashes ``asyncio.Lock`` objects in a ClassVar dict
keyed by table name; without a reset the lock outlives pytest-
asyncio's function-scoped loop and the next test fails with "Lock
bound to a different event loop". Mirrors the unit-test fixture in
test_repository.py.
"""
from everos.core.persistence.lancedb.repository import LanceRepoBase
LanceRepoBase._reset_locks_for_tests()
class _StubEmbedder(EmbeddingProvider):
dim = 1024
async def embed(self, text: str) -> list[float]:
return [0.0] * self.dim
async def embed_batch(self, texts): # type: ignore[no-untyped-def]
return [[0.0] * self.dim for _ in texts]
@pytest.fixture
async def cascade_runtime(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> AsyncIterator[MemoryRoot]:
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
monkeypatch.setenv("EVEROS_EMBEDDING__MODEL", "stub-model")
monkeypatch.setenv("EVEROS_EMBEDDING__BASE_URL", "http://stub.invalid/v1")
monkeypatch.setenv("EVEROS_EMBEDDING__API_KEY", "stub-key")
await dispose_connection()
await dispose_engine()
engine = get_engine()
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
await ensure_business_indexes()
yield MemoryRoot.default()
await dispose_connection()
await dispose_engine()
def _build_orchestrator(
memory_root: MemoryRoot, *, scan_interval: float = 60.0
) -> CascadeOrchestrator:
return CascadeOrchestrator(
memory_root=memory_root,
embedder=_StubEmbedder(),
tokenizer=build_tokenizer(),
config=CascadeConfig(
scan_interval_seconds=scan_interval,
worker_batch_size=20,
worker_max_retry=1,
worker_poll_interval_seconds=0.05,
worker_retry_backoff_seconds=0.0,
),
)
async def _wait_path_done(md_path: str, *, deadline: float = 15.0) -> None:
"""Wait until ``md_path`` lands in state AND reaches ``status='done'``.
Bare ``_wait_drain`` returns immediately when the queue is empty,
which is exactly the case right after a single ``append_entries``
fires once but the watcher hasn't yet enqueued anything. This helper
polls for the row first (i.e. watcher has noticed), then waits for
terminal state, then re-checks after a short settle to absorb any
last-second re-enqueue (e.g. atomic-replace echo).
"""
async with asyncio.timeout(deadline):
while True: # noqa: ASYNC110 - polling cascade state
row = await md_change_state_repo.get_by_id(md_path)
if row is not None:
break
await asyncio.sleep(0.05)
while True: # noqa: ASYNC110 - polling cascade state
row = await md_change_state_repo.get_by_id(md_path)
if row is not None and row.status in ("done", "failed"):
break
await asyncio.sleep(0.05)
await asyncio.sleep(0.1)
row = await md_change_state_repo.get_by_id(md_path)
assert row is not None and row.status in ("done", "failed"), (
f"path {md_path} flipped back to {row.status if row else 'NONE'} "
f"after reaching done"
)
async def _wait_paths_done(*md_paths: str, deadline: float = 15.0) -> None:
await asyncio.gather(*[_wait_path_done(p, deadline=deadline) for p in md_paths])
async def _wait_drain(deadline: float = 15.0) -> None:
"""Wait for the *whole* queue to settle. Use only when you've already
confirmed at least one path is in flight (via _wait_path_done first)."""
async with asyncio.timeout(deadline):
while True:
summary = await md_change_state_repo.queue_summary()
if summary.pending == 0:
return
await asyncio.sleep(0.05)
async def _count_lance_rows_md(md_path: str) -> int:
table = await get_table(AtomicFact.TABLE_NAME, AtomicFact)
return await table.count_rows(filter=f"md_path = '{md_path}'")
async def _count_md_entries(absolute: Path) -> int:
if not await anyio.Path(absolute).is_file():
return 0
parsed = await MarkdownReader.read(absolute)
return len(parsed.entries)
def _atomic_fact_md_path(owner_id: str, bucket: _dt.date) -> str:
return (
f"default_app/default_project/users/{owner_id}/.atomic_facts/"
f"atomic_fact-{bucket.isoformat()}.md"
)
async def _seed_atomic_facts(
writer: AtomicFactWriter,
*,
owner_id: str,
bucket: _dt.date,
n_items: int,
text_prefix: str = "seed fact",
) -> None:
items = [
(
{
"owner_id": owner_id,
"session_id": f"s_{j}",
"timestamp": "2026-05-19T07:04:26+00:00",
"parent_id": f"mc_{j}",
"sender_ids": [owner_id],
},
{"Fact": f"{text_prefix} {j}"},
)
for j in range(n_items)
]
await writer.append_entries(owner_id, items, date=bucket)
# ===== A. Rename scenarios =====
async def test_rename_same_owner_kind_in_bucket(
cascade_runtime: MemoryRoot,
) -> None:
"""``mv atomic_fact-D1.md atomic_fact-D2.md`` inside the same owner+kind.
Both paths match the kind glob. Expected: src lancedb rows cleared,
dest md_path becomes the new home for the (entry_id, content) pairs.
"""
memory_root = cascade_runtime
orchestrator = _build_orchestrator(memory_root)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = AtomicFactWriter(root=memory_root)
owner_id = "u_rename_a"
bucket_src = _dt.date(2026, 5, 18)
bucket_dest = _dt.date(2026, 5, 20)
await _seed_atomic_facts(
writer, owner_id=owner_id, bucket=bucket_src, n_items=5
)
src_md_path = _atomic_fact_md_path(owner_id, bucket_src)
dest_md_path = _atomic_fact_md_path(owner_id, bucket_dest)
src_absolute = memory_root.root / src_md_path
dest_absolute = memory_root.root / dest_md_path
await _wait_path_done(src_md_path)
# Sanity: cascade has indexed the seed.
assert await _count_lance_rows_md(src_md_path) == 5
assert await _count_lance_rows_md(dest_md_path) == 0
# Real rename — no tmp/atomic-replace involvement.
await anyio.to_thread.run_sync(
shutil.move, str(src_absolute), str(dest_absolute)
)
await _wait_paths_done(src_md_path, dest_md_path)
assert await _count_lance_rows_md(src_md_path) == 0, "src not cleared"
assert await _count_lance_rows_md(dest_md_path) == 5, "dest not reindexed"
# md_change_state should reflect both sides finally settled.
src_row = await md_change_state_repo.get_by_id(src_md_path)
dest_row = await md_change_state_repo.get_by_id(dest_md_path)
assert src_row is not None and src_row.status == "done"
assert dest_row is not None and dest_row.status == "done"
finally:
await orchestrator.stop()
async def test_rename_out_of_kind_glob_degrades_to_delete(
cascade_runtime: MemoryRoot,
) -> None:
"""``mv`` from inside the kind glob to a path outside it.
Expected: src lancedb cleared (treated as deletion); dest path is
silently ignored because ``match_kind`` rejects it.
"""
memory_root = cascade_runtime
orchestrator = _build_orchestrator(memory_root)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = AtomicFactWriter(root=memory_root)
owner_id = "u_rename_oob"
bucket = _dt.date(2026, 5, 18)
await _seed_atomic_facts(writer, owner_id=owner_id, bucket=bucket, n_items=4)
src_md_path = _atomic_fact_md_path(owner_id, bucket)
src_absolute = memory_root.root / src_md_path
# An obviously-out-of-glob target: hide it under a plain dir
# that no kind spec registers.
dest_absolute = memory_root.root / "out_of_scope" / "random.md"
await anyio.Path(dest_absolute.parent).mkdir(parents=True, exist_ok=True)
await _wait_path_done(src_md_path)
assert await _count_lance_rows_md(src_md_path) == 4
await anyio.to_thread.run_sync(
shutil.move, str(src_absolute), str(dest_absolute)
)
# Wait for the src deletion to settle. The dest path is outside
# the glob so it never enters md_change_state — can't wait on it.
# Re-poll src until row reflects the rename.
await asyncio.sleep(0.5)
await _wait_drain()
assert await _count_lance_rows_md(src_md_path) == 0
# No row should appear for the out-of-glob target.
src_row = await md_change_state_repo.get_by_id(src_md_path)
assert src_row is not None and src_row.status == "done"
# The dest path was never registered with any kind spec, so no
# md_change_state row should exist for it.
all_rows = await md_change_state_repo.queue_summary()
# Spot check: pending should be 0; total rows present (done)
# come only from the src side.
assert all_rows.pending == 0
finally:
await orchestrator.stop()
async def test_rename_cross_owner_keeps_frontmatter_owner(
cascade_runtime: MemoryRoot,
) -> None:
"""``mv users/u_a/.atomic_facts/X.md users/u_b/.atomic_facts/X.md``.
Frontmatter ``user_id`` stays as ``u_a`` (rename doesn't rewrite the
file). resolve_owner pulls owner_id from frontmatter, so dest
LanceDB rows carry ``owner_id='u_a'`` even though md_path is under
``users/u_b/``. This reflects current design (frontmatter is the
truth source) — surface it as a regression anchor.
"""
memory_root = cascade_runtime
orchestrator = _build_orchestrator(memory_root)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = AtomicFactWriter(root=memory_root)
bucket = _dt.date(2026, 5, 18)
owner_a = "u_a"
owner_b = "u_b"
await _seed_atomic_facts(writer, owner_id=owner_a, bucket=bucket, n_items=3)
src_md_path = _atomic_fact_md_path(owner_a, bucket)
dest_md_path = _atomic_fact_md_path(owner_b, bucket)
src_absolute = memory_root.root / src_md_path
dest_absolute = memory_root.root / dest_md_path
await anyio.Path(dest_absolute.parent).mkdir(parents=True, exist_ok=True)
await _wait_path_done(src_md_path)
assert await _count_lance_rows_md(src_md_path) == 3
await anyio.to_thread.run_sync(
shutil.move, str(src_absolute), str(dest_absolute)
)
await _wait_paths_done(src_md_path, dest_md_path)
assert await _count_lance_rows_md(src_md_path) == 0
assert await _count_lance_rows_md(dest_md_path) == 3
# Inspect a row from dest to confirm owner_id stays as u_a
# (current design: frontmatter wins over md_path for owner_id).
rows = await atomic_fact_repo.find_where(
f"md_path = '{dest_md_path}'", limit=10
)
assert rows, "dest md_path has no rows"
assert all(r.owner_id == owner_a for r in rows), (
f"expected owner_id={owner_a} from frontmatter, "
f"got {[r.owner_id for r in rows]}"
)
finally:
await orchestrator.stop()
# ===== B. Write-pattern scenarios =====
async def test_modify_existing_entry_content_reindexes(
cascade_runtime: MemoryRoot,
) -> None:
"""Rewriting an entry's body (same entry_id, new text) must flip
content_sha256 and trigger re-upsert (not skip)."""
memory_root = cascade_runtime
orchestrator = _build_orchestrator(memory_root)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = AtomicFactWriter(root=memory_root)
owner_id = "u_modify"
bucket = _dt.date(2026, 5, 18)
await _seed_atomic_facts(
writer,
owner_id=owner_id,
bucket=bucket,
n_items=3,
text_prefix="ORIGINAL",
)
md_path = _atomic_fact_md_path(owner_id, bucket)
absolute = memory_root.root / md_path
await _wait_path_done(md_path)
rows_before = await atomic_fact_repo.find_where(
f"md_path = '{md_path}'", limit=10
)
assert len(rows_before) == 3
sha_before = {r.entry_id: r.content_sha256 for r in rows_before}
fact_before = {r.entry_id: r.fact for r in rows_before}
# Read, replace body text, atomic-write back through writer.write()
text = await anyio.Path(absolute).read_text(encoding="utf-8")
new_text = text.replace("ORIGINAL", "EDITED")
assert new_text != text
mw = MarkdownWriter(memory_root)
await mw.write(absolute, new_text)
# The edit reuses md_path; row status flips back to pending then
# to done again. Poll until content_sha256 actually changes.
await asyncio.sleep(0.3)
await _wait_drain()
rows_after = await atomic_fact_repo.find_where(
f"md_path = '{md_path}'", limit=10
)
assert len(rows_after) == 3
sha_after = {r.entry_id: r.content_sha256 for r in rows_after}
fact_after = {r.entry_id: r.fact for r in rows_after}
# Every entry_id present in both, every content_sha256 changed,
# every fact text now reflects EDITED.
assert set(sha_after) == set(sha_before)
for eid, sha in sha_after.items():
assert sha != sha_before[eid], (
f"content_sha256 did not change for {eid}: stayed {sha}"
)
assert "EDITED" in fact_after[eid], (
f"fact text not updated for {eid}: {fact_after[eid]!r}"
)
assert "ORIGINAL" not in fact_after[eid]
assert "ORIGINAL" in fact_before[eid]
finally:
await orchestrator.stop()
async def test_concurrent_writes_different_owners_no_bleed(
cascade_runtime: MemoryRoot,
) -> None:
"""N owners writing in parallel must converge with per-md_path
isolation: each md_path holds exactly its owner's entries."""
memory_root = cascade_runtime
orchestrator = _build_orchestrator(memory_root)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = AtomicFactWriter(root=memory_root)
bucket = _dt.date(2026, 5, 18)
owners = [f"u_concur_{i}" for i in range(5)]
per_owner = 4
await asyncio.gather(
*[
_seed_atomic_facts(
writer,
owner_id=oid,
bucket=bucket,
n_items=per_owner,
text_prefix=f"by-{oid}",
)
for oid in owners
]
)
md_paths = [_atomic_fact_md_path(oid, bucket) for oid in owners]
await _wait_paths_done(*md_paths)
for oid in owners:
md_path = _atomic_fact_md_path(oid, bucket)
rows = await atomic_fact_repo.find_where(f"md_path = '{md_path}'", limit=10)
assert len(rows) == per_owner, (
f"{oid}: expected {per_owner} rows, got {len(rows)}"
)
# Every row in this md_path must belong to this owner —
# no bleed from another concurrent owner's writes.
assert all(r.owner_id == oid for r in rows)
assert all(f"by-{oid}" in r.fact for r in rows)
finally:
await orchestrator.stop()
async def test_lap_append_during_handler_no_loss(
cascade_runtime: MemoryRoot,
) -> None:
"""Writer keeps appending while worker is mid-handler.
Slow the embedder so a handler invocation overlaps later appends.
On drain, lance_rows must equal md entries — the lap is absorbed
by the worker's status='processing' guard + re-claim.
"""
memory_root = cascade_runtime
class _SlowEmbedder(_StubEmbedder):
async def embed(self, text: str) -> list[float]:
await asyncio.sleep(0.05) # handler takes ~0.05*N entries
return [0.0] * self.dim
orchestrator = CascadeOrchestrator(
memory_root=memory_root,
embedder=_SlowEmbedder(),
tokenizer=build_tokenizer(),
config=CascadeConfig(
scan_interval_seconds=60.0,
worker_batch_size=20,
worker_max_retry=1,
worker_poll_interval_seconds=0.05,
worker_retry_backoff_seconds=0.0,
),
)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = AtomicFactWriter(root=memory_root)
owner_id = "u_lap"
bucket = _dt.date(2026, 5, 18)
total = 30
for i in range(total):
await writer.append_entries(
owner_id,
[
(
{
"owner_id": owner_id,
"session_id": f"s_{i}",
"timestamp": "2026-05-19T07:04:26+00:00",
"parent_id": f"mc_{i}",
"sender_ids": [owner_id],
},
{"Fact": f"fact body {i}"},
)
],
date=bucket,
)
# Pace just slow enough that some writes land during a
# handler invocation (~50ms per embed), but fast enough
# that multiple writes accumulate during one handler.
await asyncio.sleep(0.02)
md_path = _atomic_fact_md_path(owner_id, bucket)
absolute = memory_root.root / md_path
await _wait_path_done(md_path, deadline=30.0)
md_entries = await _count_md_entries(absolute)
lance_rows = await _count_lance_rows_md(md_path)
assert md_entries == total, (
f"writer self-check: expected {total} md entries, got {md_entries}"
)
assert lance_rows == md_entries, f"LAP LOSS: md={md_entries} lance={lance_rows}"
finally:
await orchestrator.stop()
# ===== C. Scanner fallback scenarios =====
def _build_orchestrator_fast_scanner(memory_root: MemoryRoot) -> CascadeOrchestrator:
"""Same as :func:`_build_orchestrator` but with a 2s scanner so tests
don't wait 30s for the fallback path."""
return CascadeOrchestrator(
memory_root=memory_root,
embedder=_StubEmbedder(),
tokenizer=build_tokenizer(),
config=CascadeConfig(
scan_interval_seconds=2.0,
worker_batch_size=20,
worker_max_retry=1,
worker_poll_interval_seconds=0.05,
worker_retry_backoff_seconds=0.0,
),
)
def _silence_handler_method(monkeypatch: pytest.MonkeyPatch, name: str) -> None:
"""Replace ``watcher._Handler.<name>`` with a no-op for the duration
of the test. Simulates fseventsd missing that event class entirely.
"""
from everos.memory.cascade import watcher as watcher_module
monkeypatch.setattr(
watcher_module._Handler,
name,
lambda self, event: None,
)
async def test_scanner_recovers_missed_delete(
cascade_runtime: MemoryRoot,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Watcher's ``on_deleted`` is silenced → unlink no longer enqueues
via the watcher. The scanner sweep should still notice the path
missing on disk and enqueue a 'deleted' on its own."""
memory_root = cascade_runtime
orchestrator = _build_orchestrator_fast_scanner(memory_root)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = AtomicFactWriter(root=memory_root)
owner_id = "u_scan_del"
bucket = _dt.date(2026, 5, 18)
await _seed_atomic_facts(writer, owner_id=owner_id, bucket=bucket, n_items=3)
md_path = _atomic_fact_md_path(owner_id, bucket)
absolute = memory_root.root / md_path
await _wait_path_done(md_path)
assert await _count_lance_rows_md(md_path) == 3
# From here on, watcher ignores deletions.
_silence_handler_method(monkeypatch, "on_deleted")
absolute.unlink()
# Watcher won't enqueue; scanner sweeps every 2s and should
# spot mtime/existence inconsistency, then enqueue 'deleted'.
await asyncio.sleep(0.2)
async def _lance_cleared() -> bool:
return await _count_lance_rows_md(md_path) == 0
async with asyncio.timeout(10.0):
while not await _lance_cleared(): # noqa: ASYNC110 - polling cascade state
await asyncio.sleep(0.1)
async with asyncio.timeout(5.0):
while True: # noqa: ASYNC110 - polling cascade state
row = await md_change_state_repo.get_by_id(md_path)
if row is not None and row.status == "done":
break
await asyncio.sleep(0.1)
assert row.change_type == "deleted"
finally:
await orchestrator.stop()
async def test_scanner_indexes_preexisting_md(
cascade_runtime: MemoryRoot,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""An md file written BEFORE cascade starts (or by an editor while
cascade is offline). watchdog ignores files that exist at schedule
time — only the scanner can pick it up. Simulate by silencing
on_created and writing the file before orchestrator.start()."""
memory_root = cascade_runtime
# Pre-seed: write the md directly to disk before any cascade is up.
owner_id = "u_scan_pre"
bucket = _dt.date(2026, 5, 18)
writer = AtomicFactWriter(root=memory_root)
await _seed_atomic_facts(writer, owner_id=owner_id, bucket=bucket, n_items=2)
md_path = _atomic_fact_md_path(owner_id, bucket)
assert (memory_root.root / md_path).is_file()
# Now start cascade with the file already on disk. Belt-and-
# suspenders: silence all watcher events so the only path to
# discovery is the scanner.
orchestrator = _build_orchestrator_fast_scanner(memory_root)
for name in ("on_created", "on_modified", "on_moved", "on_deleted"):
_silence_handler_method(monkeypatch, name)
await orchestrator.start()
try:
async def _lance_filled() -> bool:
return await _count_lance_rows_md(md_path) == 2
async with asyncio.timeout(10.0):
while not await _lance_filled(): # noqa: ASYNC110 - polling cascade state
await asyncio.sleep(0.1)
finally:
await orchestrator.stop()
async def test_scanner_recovers_missed_modify(
cascade_runtime: MemoryRoot,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""All non-deletion watcher events silenced. writer.append produces
an atomic-replace whose events are all dropped by the watcher.
Scanner should still notice the new file and enqueue 'added'."""
memory_root = cascade_runtime
orchestrator = _build_orchestrator_fast_scanner(memory_root)
# Silence everything BEFORE start() so the initial schedule doesn't
# see any add/create events either.
for name in ("on_created", "on_modified", "on_moved"):
_silence_handler_method(monkeypatch, name)
await orchestrator.start()
await asyncio.sleep(0.3)
try:
writer = AtomicFactWriter(root=memory_root)
owner_id = "u_scan_mod"
bucket = _dt.date(2026, 5, 18)
await _seed_atomic_facts(writer, owner_id=owner_id, bucket=bucket, n_items=3)
md_path = _atomic_fact_md_path(owner_id, bucket)
async def _lance_filled() -> bool:
return await _count_lance_rows_md(md_path) == 3
async with asyncio.timeout(10.0):
while not await _lance_filled(): # noqa: ASYNC110 - polling cascade state
await asyncio.sleep(0.1)
row = await md_change_state_repo.get_by_id(md_path)
assert row is not None and row.status == "done"
finally:
await orchestrator.stop()

View File

@ -0,0 +1,268 @@
"""Agent-mode memorize integration tests.
Covers the agent branches that ``test_memorize_integration.py`` skips:
- :mod:`service.memorize` agent dispatch (asyncio.gather of user + agent
pipelines)
- :mod:`service._boundary` agent-mode detection via
:class:`everalgo.agent_memory.AgentBoundaryDetector`
- :mod:`memory.extract.pipeline.agent_memory.AgentMemoryPipeline` end-to-end
Self-contained: the chat-baseline file keeps its fixture local, so we
copy the minimum scaffolding rather than refactor it into a shared
conftest.
"""
from __future__ import annotations
import importlib
import json
import sqlite3
from collections.abc import AsyncIterator, Callable
from pathlib import Path
from typing import Any
from unittest.mock import AsyncMock
import pytest
import pytest_asyncio
from everalgo.llm.types import ChatMessage as LLMChatMessage
from everalgo.llm.types import ChatResponse
from everalgo.testing.fake_llm import FakeLLMClient
from sqlmodel import SQLModel
from everos.core.persistence import MemoryRoot
from everos.service.memorize import MemorizeResult, memorize
def _boundary_response(boundaries: list[int]) -> str:
return json.dumps(
{"reasoning": "test", "boundaries": boundaries, "should_wait": False}
)
def _make_fake_llm(boundary_responses: list[list[int]] | None = None) -> FakeLLMClient:
queue: list[list[int]] = list(boundary_responses or [])
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
prompt = messages[0].content
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
cuts = queue.pop(0) if queue else []
return ChatResponse(content=_boundary_response(cuts), model="fake")
return ChatResponse(
content=json.dumps({"title": "T", "content": "B"}), model="fake"
)
return FakeLLMClient(handler=handler)
def _msg(
role: str,
content: str,
*,
sender_id: str = "u_alice",
timestamp: int = 1_700_000_000_000,
tool_calls: list[dict] | None = None,
tool_call_id: str | None = None,
) -> dict[str, Any]:
out: dict[str, Any] = {
"sender_id": sender_id,
"role": role,
"content": content,
"timestamp": timestamp,
}
if tool_calls is not None:
out["tool_calls"] = tool_calls
if tool_call_id is not None:
out["tool_call_id"] = tool_call_id
return out
def _user(content: str, ts: int, *, sender: str = "u_alice") -> dict[str, Any]:
return _msg("user", content, sender_id=sender, timestamp=ts)
def _assistant(content: str, ts: int) -> dict[str, Any]:
return _msg("assistant", content, sender_id="assistant", timestamp=ts)
def _memcell_rows(tmp_path: Path) -> list[sqlite3.Row]:
db = tmp_path / ".index" / "sqlite" / "system.db"
if not db.is_file():
return []
conn = sqlite3.connect(db)
conn.row_factory = sqlite3.Row
try:
return list(conn.execute("SELECT * FROM memcell ORDER BY timestamp"))
finally:
conn.close()
@pytest_asyncio.fixture
async def memorize_env(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> AsyncIterator[Callable[..., Any]]:
"""Same shape as the chat-baseline fixture; ``mode`` defaults to ``agent``."""
monkeypatch.setattr(
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
)
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
svc = importlib.import_module("everos.service.memorize")
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
ac_mod = importlib.import_module("everos.memory.strategies.extract_agent_case")
client_mod = importlib.import_module("everos.component.llm.client")
for attr in (
"_episode_writer",
"_prompt_loader",
"_user_pipeline",
"_agent_pipeline",
"_ome_engine",
):
monkeypatch.setattr(svc, attr, None, raising=False)
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
started: dict[str, Any] = {"engine": None}
async def _setup(*, mode: str = "agent", fake_llm: FakeLLMClient) -> None:
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", mode)
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
from everos.config import load_settings
load_settings.cache_clear()
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
from everos.infra.persistence.sqlite import dispose_engine, get_engine
db_engine = get_engine()
async with db_engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
started["dispose"] = dispose_engine
# Silence OME strategies so agent_case / atomic / foresight don't
# try real extraction logic during these tests.
noop = AsyncMock(return_value=[])
for mod in (af_mod, fs_mod, ac_mod):
extractor_attr = next(
(n for n in dir(mod) if n.endswith("Extractor")), None
)
if extractor_attr:
monkeypatch.setattr(
mod,
extractor_attr,
lambda *a, **k: type("M", (), {"aextract": noop})(),
)
engine = svc._get_engine()
await engine.start()
started["engine"] = engine
yield _setup
if started.get("engine") is not None:
await started["engine"].stop()
if started.get("dispose") is not None:
await started["dispose"]()
# ── Tests ────────────────────────────────────────────────────────────
async def test_agent_mode_two_user_assistant_msgs(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Agent mode happy path: one cell, both user + agent pipelines fire."""
fake = _make_fake_llm(boundary_responses=[[]])
await memorize_env(mode="agent", fake_llm=fake)
result = await memorize(
{
"session_id": "test_agent_basic",
"messages": [
_user("hello", 1_700_000_000_000),
_assistant("hi there", 1_700_000_001_000),
],
},
is_final=True,
)
assert isinstance(result, MemorizeResult)
assert result.status == "extracted"
rows = _memcell_rows(tmp_path)
assert len(rows) == 1
assert rows[0]["raw_type"] == "AgentTrajectory"
async def test_agent_mode_preserves_tool_items(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Agent mode keeps ``role=tool`` rows inside the cell (chat mode drops them)."""
fake = _make_fake_llm(boundary_responses=[[]])
await memorize_env(mode="agent", fake_llm=fake)
payload = {
"session_id": "test_agent_tools",
"messages": [
_user("debug this", 1_700_000_000_000),
_msg(
"assistant",
"calling tool",
timestamp=1_700_000_001_000,
tool_calls=[
{
"id": "c1",
"type": "function",
"function": {"name": "x", "arguments": "{}"},
}
],
),
_msg(
"tool",
"result",
sender_id="tool",
timestamp=1_700_000_002_000,
tool_call_id="c1",
),
_assistant("here's the answer", 1_700_000_003_000),
],
}
result = await memorize(payload, is_final=True)
assert result.status == "extracted"
rows = _memcell_rows(tmp_path)
assert len(rows) == 1
ids = json.loads(rows[0]["message_ids_json"])
# All four preserved in agent mode (chat mode would have 2).
assert len(ids) == 4
async def test_agent_mode_dispatch_no_double_insert(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Dual pipeline dispatch must not double-insert the memcell row."""
fake = _make_fake_llm(boundary_responses=[[]])
await memorize_env(mode="agent", fake_llm=fake)
await memorize(
{
"session_id": "test_agent_dispatch",
"messages": [
_user("u1", 1_700_000_000_000),
_assistant("a1", 1_700_000_001_000),
_user("u2", 1_700_000_002_000),
_assistant("a2", 1_700_000_003_000),
],
},
is_final=True,
)
rows = _memcell_rows(tmp_path)
assert len(rows) == 1 # boundary stage owns the ledger
payload = json.loads(rows[0]["payload_json"])
assert len(payload["items"]) == 4

View File

@ -0,0 +1,300 @@
"""Concurrent /add on one session must not lose messages (regression).
White-box integration test for the per-session lock added in
``everos.service._session_lock``.
Bug class
---------
Without the lock, two concurrent ``memorize()`` calls on the same
``session_id`` race on ``unprocessed_buffer``:
1. Both read the same pre-existing buffer rows.
2. Each boundary call sees only its own newly-arrived messages plus
the shared pre-existing buffer (neither sees the other's messages).
3. Both call ``_replace_buffer(session_id, tail)`` — the later write
silently overwrites the earlier write's tail; the earlier task's
tail messages are lost forever.
Invariant under test
--------------------
After N concurrent ``memorize()`` calls on one session, every input
message_id is **either** in some memcell's ``message_ids_json`` **or**
in the surviving ``unprocessed_buffer`` rows. Nothing silently vanishes.
This is a white-box integration test (not e2e): it bypasses HTTP, calls
``memorize()`` directly, but inspects sqlite tables to assert internal
state. Uses ``FakeLLMClient`` to avoid real LLM latency and to control
boundary decisions deterministically.
"""
from __future__ import annotations
import asyncio
import importlib
import json
from collections.abc import AsyncIterator, Callable
from pathlib import Path
from typing import Any
from unittest.mock import AsyncMock
import pytest
import pytest_asyncio
from everalgo.llm.types import ChatMessage as LLMChatMessage
from everalgo.llm.types import ChatResponse
from everalgo.testing.fake_llm import FakeLLMClient
from sqlalchemy import text
from sqlmodel import SQLModel
from everos.core.persistence import MemoryRoot
from everos.service.memorize import memorize
# ---------------------------------------------------------------------------
# Fake LLM that splits each call into one memcell + 0-tail (force extract)
# ---------------------------------------------------------------------------
def _boundary_response(boundaries: list[int]) -> str:
return json.dumps(
{"reasoning": "test", "boundaries": boundaries, "should_wait": False}
)
def _episode_response(title: str = "T", content: str = "B") -> str:
return json.dumps({"title": title, "content": content})
def _make_extract_all_llm() -> FakeLLMClient:
"""Boundary returns single boundary at end → entire merged → 1 cell, tail=[]."""
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
prompt = messages[0].content
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
# Always cut: the boundary indices are relative to merged input;
# an empty list means "no cut, hold". A single [N] means "cut
# after index N", i.e. everything before goes into one cell.
# We use a sentinel large index to force boundary to take all.
return ChatResponse(content=_boundary_response([999]), model="fake")
return ChatResponse(content=_episode_response(), model="fake")
return FakeLLMClient(handler=handler)
# ---------------------------------------------------------------------------
# Fixture — mirrors test_memorize_integration's pattern but without OME / strategies
# (the lock bug lives at the boundary stage; downstream strategies are
# irrelevant to this race).
# ---------------------------------------------------------------------------
@pytest_asyncio.fixture
async def memorize_env_locked(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[Callable[..., AsyncMock]]:
monkeypatch.setattr(
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
)
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
svc = importlib.import_module("everos.service.memorize")
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
client_mod = importlib.import_module("everos.component.llm.client")
lock_mod = importlib.import_module("everos.service._session_lock")
# Reset memorize singletons + session lock registry.
for attr in (
"_episode_writer",
"_prompt_loader",
"_user_pipeline",
"_agent_pipeline",
"_ome_engine",
):
monkeypatch.setattr(svc, attr, None, raising=False)
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
lock_mod._reset_for_tests()
started: dict[str, Any] = {"engine": None}
async def _setup(*, fake_llm: FakeLLMClient) -> None:
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", "chat")
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
from everos.config import load_settings
load_settings.cache_clear()
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
from everos.infra.persistence.sqlite import get_engine
db_engine = get_engine()
async with db_engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
# Silence OME strategy extractors (we only care about the boundary +
# memcell + buffer cycle; downstream strategies are a separate story).
mock_af = AsyncMock(return_value=[])
mock_fs = AsyncMock(return_value=[])
monkeypatch.setattr(
af_mod,
"AtomicFactExtractor",
lambda *a, **k: type("M", (), {"aextract": mock_af})(),
)
monkeypatch.setattr(
fs_mod,
"ForesightExtractor",
lambda *a, **k: type("M", (), {"aextract": mock_fs})(),
)
engine = svc._get_engine()
await engine.start()
started["engine"] = engine
yield _setup
if started["engine"] is not None:
await started["engine"].stop()
from everos.infra.persistence.sqlite import dispose_engine
await dispose_engine()
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _msg(idx: int, sender: str, ts: int) -> dict[str, Any]:
return {
"sender_id": sender,
"role": "user",
"timestamp": ts,
"content": f"msg-{idx} from {sender}",
}
async def _collect_buffer_message_ids(session_id: str) -> set[str]:
from everos.infra.persistence.sqlite import get_engine
eng = get_engine()
async with eng.connect() as conn:
result = await conn.execute(
text("SELECT message_id FROM unprocessed_buffer WHERE session_id = :s"),
{"s": session_id},
)
return {row[0] for row in result.fetchall()}
async def _collect_memcell_message_ids(session_id: str) -> set[str]:
from everos.infra.persistence.sqlite import get_engine
eng = get_engine()
async with eng.connect() as conn:
result = await conn.execute(
text("SELECT message_ids_json FROM memcell WHERE session_id = :s"),
{"s": session_id},
)
out: set[str] = set()
for (raw,) in result.fetchall():
out.update(json.loads(raw))
return out
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
async def test_concurrent_adds_same_session_no_message_loss(
memorize_env_locked: Callable[..., AsyncMock],
) -> None:
"""Two concurrent /add on one session: every input message must end up
either in a memcell's message_ids OR in the surviving buffer."""
await memorize_env_locked(fake_llm=_make_extract_all_llm())
session_id = "s_concurrent"
batch_a = [_msg(i, "alice", 1_700_000_000_000 + i * 1000) for i in range(4)]
batch_b = [_msg(i + 100, "bob", 1_700_000_100_000 + i * 1000) for i in range(4)]
# Fire both concurrently against the same session.
await asyncio.gather(
memorize({"session_id": session_id, "messages": batch_a}),
memorize({"session_id": session_id, "messages": batch_b}),
)
buffered = await _collect_buffer_message_ids(session_id)
in_cells = await _collect_memcell_message_ids(session_id)
covered = buffered | in_cells
# The id format is ``m_<session>_<ts_ms>_<idx>`` — we can derive
# exactly what the 8 inputs should hash to without depending on the
# internal id_gen import. Easier: assert the *count* covered == 8.
assert len(covered) == 8, (
f"expected 8 distinct message ids covered, got {len(covered)}: "
f"buffer={len(buffered)}, memcell={len(in_cells)}"
)
# Sanity: no message appears in both buffer and memcell at once
# (consumed = removed from buffer).
overlap = buffered & in_cells
assert not overlap, f"messages in both buffer and memcell: {overlap}"
async def test_concurrent_adds_serial_when_locked(
memorize_env_locked: Callable[..., AsyncMock],
) -> None:
"""Same as above but explicitly stress with 4 concurrent batches."""
await memorize_env_locked(fake_llm=_make_extract_all_llm())
session_id = "s_stress"
n_batches = 4
batch_size = 3
batches = [
[
_msg(b * 10 + i, f"u{b}", 1_700_000_000_000 + (b * 10 + i) * 1000)
for i in range(batch_size)
]
for b in range(n_batches)
]
await asyncio.gather(
*(memorize({"session_id": session_id, "messages": batch}) for batch in batches)
)
buffered = await _collect_buffer_message_ids(session_id)
in_cells = await _collect_memcell_message_ids(session_id)
covered = buffered | in_cells
expected = n_batches * batch_size
assert len(covered) == expected, (
f"expected {expected} message ids covered, got {len(covered)}: "
f"buffer={len(buffered)}, memcell={len(in_cells)}"
)
assert not (buffered & in_cells)
async def test_different_sessions_run_in_parallel(
memorize_env_locked: Callable[..., AsyncMock],
) -> None:
"""Cross-session calls share no lock — must not serialise."""
await memorize_env_locked(fake_llm=_make_extract_all_llm())
def _msgs(sid: str) -> list[dict[str, Any]]:
return [_msg(i, sid, 1_700_000_000_000 + i * 1000) for i in range(3)]
await asyncio.gather(
memorize({"session_id": "s_a", "messages": _msgs("s_a")}),
memorize({"session_id": "s_b", "messages": _msgs("s_b")}),
memorize({"session_id": "s_c", "messages": _msgs("s_c")}),
)
for sid in ("s_a", "s_b", "s_c"):
buffered = await _collect_buffer_message_ids(sid)
in_cells = await _collect_memcell_message_ids(sid)
covered = buffered | in_cells
assert len(covered) == 3, f"session {sid}: got {len(covered)}, want 3"

View File

@ -0,0 +1,690 @@
"""End-to-end memorize integration tests.
Drives ``service.memorize.memorize()`` with a ``FakeLLMClient`` so the
full chain (ingest → boundary → user / agent pipeline → md + OME emit)
runs without real LLM calls. Each test isolates state by:
- redirecting ``MemoryRoot.default()`` to a ``tmp_path``
- resetting service-layer lazy singletons
- starting / stopping a per-test ``OfflineEngine``
- patching ``get_llm_client`` (boundary + strategies) onto a fake
OME strategies (atomic / foresight) are silenced via ``mock_aextract`` so
this test focuses on the synchronous boundary + pipeline + md path —
strategy dispatch correctness already has its own coverage in
``test_ome_strategies_integration.py``.
"""
from __future__ import annotations
import importlib
import json
import sqlite3
from collections.abc import AsyncIterator, Callable
from pathlib import Path
from typing import Any
from unittest.mock import AsyncMock
import pytest
import pytest_asyncio
from everalgo.llm.types import ChatMessage as LLMChatMessage
from everalgo.llm.types import ChatResponse
from everalgo.testing.fake_llm import FakeLLMClient
from sqlmodel import SQLModel
from everos.core.persistence import MemoryRoot
from everos.service.memorize import MemorizeResult, memorize
# ---------------------------------------------------------------------------
# Canned LLM responses
# ---------------------------------------------------------------------------
def _boundary_response(boundaries: list[int]) -> str:
"""Build a ``detect_boundaries`` JSON response (algo schema)."""
payload = {
"reasoning": "test",
"boundaries": boundaries,
"should_wait": False,
}
return json.dumps(payload)
def _episode_response(title: str = "Test Subject", content: str = "Test body") -> str:
"""Build an ``EpisodeExtractor`` JSON response (algo schema)."""
return json.dumps({"title": title, "content": content})
def _make_fake_llm(
boundary_responses: list[list[int]] | None = None,
*,
episode_title: str = "Test Subject",
episode_content: str = "Test body",
) -> FakeLLMClient:
"""Build a ``FakeLLMClient`` that dispatches by prompt fingerprint.
Pops one ``boundaries=...`` from ``boundary_responses`` per boundary
prompt seen; every episode prompt returns the same canned
``{title, content}``.
"""
boundary_queue: list[list[int]] = list(boundary_responses or [])
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
prompt = messages[0].content
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
cuts = boundary_queue.pop(0) if boundary_queue else []
return ChatResponse(content=_boundary_response(cuts), model="fake")
# Fall through to episode (also catches atomic/foresight prompts —
# they'll return success-but-empty in their mocked extractor below).
return ChatResponse(
content=_episode_response(episode_title, episode_content),
model="fake",
)
return FakeLLMClient(handler=handler)
# ---------------------------------------------------------------------------
# Shared setup fixture
# ---------------------------------------------------------------------------
@pytest_asyncio.fixture
async def memorize_env(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[Callable[..., AsyncMock]]:
"""Yield a builder that configures a clean memorize environment.
Usage::
async def test_x(memorize_env):
await memorize_env(mode="chat", fake_llm=_make_fake_llm([...]))
outcome = await memorize({"session_id": "s", "messages": [...]})
The builder must be called exactly once per test (it primes singletons
+ starts the OME engine). Teardown stops the engine and disposes the
sqlite engine.
"""
monkeypatch.setattr(
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
)
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
svc = importlib.import_module("everos.service.memorize")
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
client_mod = importlib.import_module("everos.component.llm.client")
# Reset singletons.
for attr in (
"_episode_writer",
"_prompt_loader",
"_user_pipeline",
"_agent_pipeline",
"_ome_engine",
):
monkeypatch.setattr(svc, attr, None, raising=False)
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
started: dict[str, Any] = {"engine": None, "sqlite_engine": None}
async def _setup(
*,
mode: str = "chat",
fake_llm: FakeLLMClient,
hard_token_limit: int = 65536,
hard_msg_limit: int = 500,
) -> None:
# Provide a non-None API key + base_url so get_llm_client doesn't
# raise; we replace the cached singleton with our fake right after.
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", mode)
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
monkeypatch.setenv(
"EVEROS_BOUNDARY_DETECTION__HARD_TOKEN_LIMIT", str(hard_token_limit)
)
monkeypatch.setenv(
"EVEROS_BOUNDARY_DETECTION__HARD_MSG_LIMIT", str(hard_msg_limit)
)
from everos.config import load_settings
load_settings.cache_clear()
# Replace the cached client singleton with our fake so get_llm_client
# returns the fake on subsequent calls.
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
# Build sqlite schema.
from everos.infra.persistence.sqlite import dispose_engine, get_engine
db_engine = get_engine()
async with db_engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
started["sqlite_engine"] = (get_engine, dispose_engine)
# Mock the OME extractors so the async strategy chain is a no-op
# (the strategy itself still runs; it just sees no facts/foresights).
mock_af = AsyncMock(return_value=[])
mock_fs = AsyncMock(return_value=[])
monkeypatch.setattr(
af_mod,
"AtomicFactExtractor",
lambda *a, **k: type("M", (), {"aextract": mock_af})(),
)
monkeypatch.setattr(
fs_mod,
"ForesightExtractor",
lambda *a, **k: type("M", (), {"aextract": mock_fs})(),
)
engine = svc._get_engine()
await engine.start()
started["engine"] = engine
yield _setup
if started["engine"] is not None:
await started["engine"].stop()
if started["sqlite_engine"] is not None:
_, dispose = started["sqlite_engine"]
await dispose()
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _msg(
role: str,
content: str,
*,
sender_id: str = "u_alice",
timestamp: int = 1_700_000_000_000,
tool_calls: list[dict] | None = None,
tool_call_id: str | None = None,
) -> dict[str, Any]:
out: dict[str, Any] = {
"sender_id": sender_id,
"role": role,
"content": content,
"timestamp": timestamp,
}
if tool_calls is not None:
out["tool_calls"] = tool_calls
if tool_call_id is not None:
out["tool_call_id"] = tool_call_id
return out
def _user(content: str, ts: int, *, sender: str = "u_alice") -> dict[str, Any]:
return _msg("user", content, sender_id=sender, timestamp=ts)
def _assistant(content: str, ts: int, *, sender: str = "assistant") -> dict[str, Any]:
return _msg("assistant", content, sender_id=sender, timestamp=ts)
def _memcell_rows(tmp_path: Path) -> list[sqlite3.Row]:
db = tmp_path / ".index" / "sqlite" / "system.db"
if not db.is_file():
return []
conn = sqlite3.connect(db)
conn.row_factory = sqlite3.Row
try:
return list(conn.execute("SELECT * FROM memcell ORDER BY timestamp"))
finally:
conn.close()
def _buffer_count(tmp_path: Path) -> int:
db = tmp_path / ".index" / "sqlite" / "system.db"
if not db.is_file():
return 0
conn = sqlite3.connect(db)
try:
return conn.execute(
"SELECT COUNT(*) FROM unprocessed_buffer WHERE track='memorize'"
).fetchone()[0]
finally:
conn.close()
def _episode_paths(tmp_path: Path) -> list[Path]:
base = tmp_path / "default_app" / "default_project" / "users"
return sorted(base.rglob("episode-*.md"))
# ---------------------------------------------------------------------------
# Happy path baseline
# ---------------------------------------------------------------------------
async def test_chat_baseline_two_msgs_one_cell(
tmp_path: Path,
memorize_env: Callable[..., Any],
) -> None:
"""2 messages → flush forces them into 1 cell + 1 Episode + 1 memcell row."""
fake = _make_fake_llm(boundary_responses=[[]]) # no internal cuts
await memorize_env(mode="chat", fake_llm=fake)
payload = {
"session_id": "test_chat_1",
"messages": [
_user("hello", 1_700_000_000_000),
_assistant("hi there", 1_700_000_001_000),
],
}
result = await memorize(payload, is_final=True)
assert isinstance(result, MemorizeResult)
assert result.status == "extracted"
assert result.message_count == 2
rows = _memcell_rows(tmp_path)
assert len(rows) == 1
assert rows[0]["track"] == "memorize"
assert rows[0]["raw_type"] == "Conversation"
# MemCell has no single owner — sender_ids carries the participants.
assert "u_alice" in json.loads(rows[0]["sender_ids_json"])
assert _buffer_count(tmp_path) == 0
md_files = _episode_paths(tmp_path)
assert len(md_files) == 1
body = md_files[0].read_text()
assert "Test Subject" in body
assert "Test body" in body
# ---------------------------------------------------------------------------
# Input-shape boundary cases (6)
# ---------------------------------------------------------------------------
async def test_empty_batch_non_final_is_skipped(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""``messages=[]`` + ``is_final=False`` → skipped, no side effects."""
await memorize_env(mode="chat", fake_llm=_make_fake_llm())
result = await memorize(
{"session_id": "test_empty_nonfinal", "messages": []}, is_final=False
)
assert result.status == "accumulated"
assert result.message_count == 0
assert _memcell_rows(tmp_path) == []
assert _episode_paths(tmp_path) == []
async def test_empty_batch_final_drains_empty_buffer(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""``messages=[]`` + ``is_final=True`` on virgin session → no cells, no md."""
await memorize_env(mode="chat", fake_llm=_make_fake_llm())
result = await memorize(
{"session_id": "test_empty_final", "messages": []}, is_final=True
)
assert result.status == "accumulated"
assert _memcell_rows(tmp_path) == []
assert _episode_paths(tmp_path) == []
async def test_assistant_only_batch_accumulates(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""No role=user message → boundary stage parks everything in buffer."""
fake = _make_fake_llm(boundary_responses=[]) # no LLM call expected
await memorize_env(mode="chat", fake_llm=fake)
result = await memorize(
{
"session_id": "test_asst_only",
"messages": [
_assistant("hi", 1_700_000_000_000),
_assistant("anyone here?", 1_700_000_001_000),
],
},
is_final=False,
)
assert result.status == "accumulated"
assert _memcell_rows(tmp_path) == []
assert _buffer_count(tmp_path) == 2 # parked in buffer
async def test_single_user_message_accumulates(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Single user msg → boundary returns no cells (need conversation) → buffer it."""
fake = _make_fake_llm(boundary_responses=[[]]) # boundary called, no cuts
await memorize_env(mode="chat", fake_llm=fake)
result = await memorize(
{
"session_id": "test_single",
"messages": [_user("hello?", 1_700_000_000_000)],
},
is_final=False,
)
assert result.status == "accumulated"
assert _memcell_rows(tmp_path) == []
assert _buffer_count(tmp_path) == 1
async def test_chat_mode_filters_tool_messages(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Chat mode drops ``role=tool`` + assistant-with-tool_calls pre-boundary."""
fake = _make_fake_llm(boundary_responses=[[]])
await memorize_env(mode="chat", fake_llm=fake)
result = await memorize(
{
"session_id": "test_chat_filter",
"messages": [
_user("debug this", 1_700_000_000_000),
_msg(
"assistant",
"calling tool",
timestamp=1_700_000_001_000,
tool_calls=[
{
"id": "c1",
"type": "function",
"function": {"name": "x", "arguments": "{}"},
}
],
),
_msg(
"tool",
"result",
sender_id="tool",
timestamp=1_700_000_002_000,
tool_call_id="c1",
),
_assistant("here's the answer", 1_700_000_003_000),
],
},
is_final=True,
)
# After filter: 1 user + 1 assistant text = 2 msgs → 1 cell on flush.
assert result.status == "extracted"
rows = _memcell_rows(tmp_path)
assert len(rows) == 1
ids = json.loads(rows[0]["message_ids_json"])
assert len(ids) == 2 # tool + assistant-with-tool_calls dropped
async def test_duplicate_message_id_dedup_across_adds(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Same message replayed across two ``/add`` calls is deduped by message_id."""
fake = _make_fake_llm(boundary_responses=[[], []]) # 2 boundary calls, both empty
await memorize_env(mode="chat", fake_llm=fake)
# message_id is derived from (session_id, ts_ms, idx); same payload twice
# produces the same id, so the second add should be a no-op insert.
payload = {
"session_id": "test_dedup",
"messages": [
_user("hi", 1_700_000_000_000),
_assistant("hi back", 1_700_000_001_000),
],
}
await memorize(payload, is_final=False)
await memorize(payload, is_final=False) # replay
await memorize({"session_id": "test_dedup", "messages": []}, is_final=True)
rows = _memcell_rows(tmp_path)
assert len(rows) == 1
ids = json.loads(rows[0]["message_ids_json"])
assert len(ids) == 2 # not 4 — dedup worked
assert len(set(ids)) == 2 # unique
# ---------------------------------------------------------------------------
# Hard-limit cases (2)
# ---------------------------------------------------------------------------
async def test_hard_msg_limit_force_split(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Exceeding ``hard_msg_limit`` triggers a force-split before the LLM call."""
fake = _make_fake_llm(boundary_responses=[[]]) # LLM call after force-split
# hard_msg_limit=3 → batch of 5 msgs forces ~1 split before LLM.
await memorize_env(
mode="chat", fake_llm=fake, hard_msg_limit=3, hard_token_limit=10_000
)
msgs = [
_user(f"u{i}", 1_700_000_000_000 + i * 1000, sender="u_alice")
if i % 2 == 0
else _assistant(f"a{i}", 1_700_000_000_000 + i * 1000)
for i in range(5)
]
result = await memorize(
{"session_id": "test_hardmsg", "messages": msgs}, is_final=True
)
assert result.status == "extracted"
rows = _memcell_rows(tmp_path)
# Force-split + LLM final → at least 2 cells (force + remaining).
assert len(rows) >= 2
async def test_hard_token_limit_force_split(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Exceeding ``hard_token_limit`` triggers a force-split (token-based)."""
fake = _make_fake_llm(boundary_responses=[[]])
# Very small token budget → even tiny content triggers force-split.
await memorize_env(
mode="chat", fake_llm=fake, hard_msg_limit=500, hard_token_limit=20
)
msgs = [
_user("a" * 200, 1_700_000_000_000, sender="u_alice"),
_assistant("b" * 200, 1_700_000_001_000),
_user("c" * 200, 1_700_000_002_000, sender="u_alice"),
_assistant("d" * 200, 1_700_000_003_000),
]
result = await memorize(
{"session_id": "test_hardtok", "messages": msgs}, is_final=True
)
assert result.status == "extracted"
assert len(_memcell_rows(tmp_path)) >= 2
# ---------------------------------------------------------------------------
# Flush state-machine cases (4)
# ---------------------------------------------------------------------------
async def test_flush_on_virgin_session_is_noop(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Flush a session that never received ``/add`` — should not crash."""
await memorize_env(mode="chat", fake_llm=_make_fake_llm())
result = await memorize(
{"session_id": "test_virgin_flush", "messages": []}, is_final=True
)
assert result.status == "accumulated"
assert _memcell_rows(tmp_path) == []
async def test_add_then_flush_then_add(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""After flush drains the buffer, a follow-up ``/add`` still works."""
fake = _make_fake_llm(boundary_responses=[[], []])
await memorize_env(mode="chat", fake_llm=fake)
sid = "test_add_flush_add"
await memorize(
{
"session_id": sid,
"messages": [
_user("first", 1_700_000_000_000),
_assistant("ack", 1_700_000_001_000),
],
},
is_final=False,
)
await memorize({"session_id": sid, "messages": []}, is_final=True)
rows_after_flush_1 = len(_memcell_rows(tmp_path))
assert rows_after_flush_1 == 1
# Second turn after the flush.
await memorize(
{
"session_id": sid,
"messages": [
_user("second turn", 1_700_000_010_000),
_assistant("ok", 1_700_000_011_000),
],
},
is_final=True,
)
assert len(_memcell_rows(tmp_path)) == 2 # cumulative
async def test_consecutive_flushes_second_is_noop(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Flush twice in a row — second call finds empty buffer, no-ops."""
fake = _make_fake_llm(boundary_responses=[[]])
await memorize_env(mode="chat", fake_llm=fake)
sid = "test_double_flush"
await memorize(
{
"session_id": sid,
"messages": [
_user("hi", 1_700_000_000_000),
_assistant("ok", 1_700_000_001_000),
],
},
is_final=False,
)
res1 = await memorize({"session_id": sid, "messages": []}, is_final=True)
res2 = await memorize({"session_id": sid, "messages": []}, is_final=True)
assert res1.status == "extracted"
assert res2.status == "accumulated" # nothing left
assert len(_memcell_rows(tmp_path)) == 1
async def test_flush_drains_assistant_only_buffer(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Buffer with only assistant messages: flush still forces them into a cell."""
fake = _make_fake_llm(boundary_responses=[[]])
await memorize_env(mode="chat", fake_llm=fake)
sid = "test_asst_then_flush"
# Two assistant-only adds → both park in buffer.
await memorize(
{
"session_id": sid,
"messages": [_assistant("a1", 1_700_000_000_000)],
},
is_final=False,
)
await memorize(
{
"session_id": sid,
"messages": [_assistant("a2", 1_700_000_001_000)],
},
is_final=False,
)
assert _buffer_count(tmp_path) == 2
# Add a user message + flush — boundary should now run.
result = await memorize(
{
"session_id": sid,
"messages": [_user("anyone there?", 1_700_000_002_000)],
},
is_final=True,
)
assert result.status == "extracted"
assert _buffer_count(tmp_path) == 0
# ---------------------------------------------------------------------------
# Multi-session cases (2)
# ---------------------------------------------------------------------------
async def test_two_sessions_are_isolated(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Two session_ids share the engine but their buffers / cells stay separate."""
fake = _make_fake_llm(boundary_responses=[[], []]) # 1 per session
await memorize_env(mode="chat", fake_llm=fake)
await memorize(
{
"session_id": "sess_A",
"messages": [
_user("hi from A", 1_700_000_000_000, sender="u_alice"),
_assistant("ack A", 1_700_000_001_000),
],
},
is_final=True,
)
await memorize(
{
"session_id": "sess_B",
"messages": [
_user("hi from B", 1_700_000_010_000, sender="u_bob"),
_assistant("ack B", 1_700_000_011_000),
],
},
is_final=True,
)
rows = _memcell_rows(tmp_path)
assert len(rows) == 2
sessions = sorted(r["session_id"] for r in rows)
assert sessions == ["sess_A", "sess_B"]
# MemCell has no single owner — sender_ids carries who participated.
senders = {r["session_id"]: json.loads(r["sender_ids_json"]) for r in rows}
assert "u_alice" in senders["sess_A"]
assert "u_bob" in senders["sess_B"]
async def test_same_session_multi_add_concatenates(
tmp_path: Path, memorize_env: Callable[..., Any]
) -> None:
"""Multiple adds on the same session accumulate in one buffer until flushed."""
fake = _make_fake_llm(boundary_responses=[[], [], []])
await memorize_env(mode="chat", fake_llm=fake)
sid = "test_multi_add"
for i in range(3):
await memorize(
{
"session_id": sid,
"messages": [
_user(f"u{i}", 1_700_000_000_000 + i * 2000),
_assistant(f"a{i}", 1_700_000_001_000 + i * 2000),
],
},
is_final=False,
)
# Buffer should have 6 messages now (no boundary cuts).
assert _buffer_count(tmp_path) == 6
result = await memorize({"session_id": sid, "messages": []}, is_final=True)
assert result.status == "extracted"
rows = _memcell_rows(tmp_path)
assert len(rows) == 1 # one cell from the flush
ids = json.loads(rows[0]["message_ids_json"])
assert len(ids) == 6 # all 6 messages folded in

View File

@ -0,0 +1,433 @@
"""Window-segmentation white-box integration tests for boundary stage.
Verifies the **read-merge-boundary-write** semantics of one ``memorize()``
invocation, especially the buffer-as-tail invariant and the **buffer
replacement** behaviour on successive calls:
Invariants under test
---------------------
I1. After one ``add`` with ``boundaries=[k]``:
- memcell rows: prefix of merged input (first k messages)
- buffer rows: tail (the remaining messages)
- every input message_id lands in exactly one of {memcell, buffer}
(covered ∧ disjoint)
I2. Tail ordering: every buffer row's timestamp ≥ every memcell row's
timestamp (the tail is the **last** part of the time-ordered slice).
I3. Successive ``add`` consumes prior buffer:
- Round 2's boundary sees ``prior_buffer + new_batch`` merged.
- The prior tail (m3 say) ends up in **Round 2's memcell** if the
boundary cuts past it, NOT in any buffer row.
- The new buffer is the **fresh** tail, with the old buffer rows
replaced entirely (semantics of ``_replace_buffer``).
I4. ``flush`` with ``is_final=True`` drains the buffer entirely — every
remaining message ends up in some memcell.
This is **single-threaded sequential** (the concurrent race is covered
separately in test_memorize_concurrent_session_lock.py). FakeLLM scripts
boundary decisions deterministically so we own exact slicing.
"""
from __future__ import annotations
import importlib
import json
from collections.abc import AsyncIterator, Callable
from pathlib import Path
from typing import Any
from unittest.mock import AsyncMock
import pytest
import pytest_asyncio
from everalgo.llm.types import ChatMessage as LLMChatMessage
from everalgo.llm.types import ChatResponse
from everalgo.testing.fake_llm import FakeLLMClient
from sqlalchemy import text
from sqlmodel import SQLModel
from everos.core.persistence import MemoryRoot
from everos.service.memorize import memorize
# ---------------------------------------------------------------------------
# FakeLLM with scripted boundary responses (FIFO queue, one pop per call)
# ---------------------------------------------------------------------------
def _boundary_response(boundaries: list[int]) -> str:
return json.dumps(
{"reasoning": "test", "boundaries": boundaries, "should_wait": False}
)
def _episode_response(title: str = "T", content: str = "B") -> str:
return json.dumps({"title": title, "content": content})
def _make_scripted_llm(
boundary_responses: list[list[int]],
) -> FakeLLMClient:
"""Boundary calls FIFO-pop from ``boundary_responses``.
Episode calls (for downstream pipeline) get a canned response.
"""
queue: list[list[int]] = list(boundary_responses)
def handler(messages: list[LLMChatMessage], **_: Any) -> ChatResponse:
prompt = messages[0].content
if "boundaries" in prompt.lower() or "memcell" in prompt.lower():
cuts = queue.pop(0) if queue else []
return ChatResponse(content=_boundary_response(cuts), model="fake")
return ChatResponse(content=_episode_response(), model="fake")
return FakeLLMClient(handler=handler)
# ---------------------------------------------------------------------------
# Fixture — mirrors the locked-env fixture in the concurrent test
# ---------------------------------------------------------------------------
@pytest_asyncio.fixture
async def memorize_env_scripted(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[Callable[..., AsyncMock]]:
monkeypatch.setattr(
MemoryRoot, "default", classmethod(lambda cls: MemoryRoot(root=tmp_path))
)
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
svc = importlib.import_module("everos.service.memorize")
af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
client_mod = importlib.import_module("everos.component.llm.client")
lock_mod = importlib.import_module("everos.service._session_lock")
for attr in (
"_episode_writer",
"_prompt_loader",
"_user_pipeline",
"_agent_pipeline",
"_ome_engine",
):
monkeypatch.setattr(svc, attr, None, raising=False)
monkeypatch.setattr(client_mod, "_llm_client", None, raising=False)
monkeypatch.setattr(af_mod, "_writer", None, raising=False)
monkeypatch.setattr(fs_mod, "_writer", None, raising=False)
lock_mod._reset_for_tests()
started: dict[str, Any] = {"engine": None}
async def _setup(*, fake_llm: FakeLLMClient) -> None:
monkeypatch.setenv("EVEROS_MEMORIZE__MODE", "chat")
monkeypatch.setenv("EVEROS_LLM__API_KEY", "fake-key")
monkeypatch.setenv("EVEROS_LLM__BASE_URL", "https://fake.example.com")
from everos.config import load_settings
load_settings.cache_clear()
monkeypatch.setattr(client_mod, "_llm_client", fake_llm)
from everos.infra.persistence.sqlite import get_engine
db_engine = get_engine()
async with db_engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
# Silence OME strategies — orthogonal to boundary segmentation.
mock_af = AsyncMock(return_value=[])
mock_fs = AsyncMock(return_value=[])
monkeypatch.setattr(
af_mod,
"AtomicFactExtractor",
lambda *a, **k: type("M", (), {"aextract": mock_af})(),
)
monkeypatch.setattr(
fs_mod,
"ForesightExtractor",
lambda *a, **k: type("M", (), {"aextract": mock_fs})(),
)
engine = svc._get_engine()
await engine.start()
started["engine"] = engine
yield _setup
if started["engine"] is not None:
await started["engine"].stop()
from everos.infra.persistence.sqlite import dispose_engine
await dispose_engine()
# ---------------------------------------------------------------------------
# Helpers — message factory + state inspectors
# ---------------------------------------------------------------------------
_BASE_TS = 1_700_000_000_000 # 2023-11-14, plenty of headroom
def _msg(idx: int, sender: str = "alice") -> dict[str, Any]:
"""Build one canonical /add message with monotonically increasing ts."""
return {
"sender_id": sender,
"role": "user",
"timestamp": _BASE_TS + idx * 1000,
"content": f"msg-{idx}",
}
async def _buffer_rows(session_id: str) -> list[tuple[str, int]]:
"""Return ``[(message_id, timestamp_ms)]`` for buffer rows, time-ordered."""
from everos.component.utils.datetime import from_iso_format, to_timestamp_ms
from everos.infra.persistence.sqlite import get_engine
eng = get_engine()
async with eng.connect() as conn:
result = await conn.execute(
text(
"SELECT message_id, timestamp FROM unprocessed_buffer "
"WHERE session_id = :s ORDER BY timestamp"
),
{"s": session_id},
)
rows: list[tuple[str, int]] = []
for mid, ts in result.fetchall():
# sqlite stores DateTime as ISO 8601 string via SQLAlchemy.
ts_ms = to_timestamp_ms(from_iso_format(ts))
rows.append((mid, ts_ms))
return rows
async def _memcell_rows(session_id: str) -> list[tuple[str, list[str]]]:
"""Return ``[(memcell_id, message_ids[])]`` in insertion order."""
from everos.infra.persistence.sqlite import get_engine
eng = get_engine()
async with eng.connect() as conn:
result = await conn.execute(
text(
"SELECT memcell_id, message_ids_json FROM memcell "
"WHERE session_id = :s ORDER BY created_at"
),
{"s": session_id},
)
return [(mid, json.loads(raw)) for mid, raw in result.fetchall()]
# ---------------------------------------------------------------------------
# I1 + I2: single add with boundaries=[k] — prefix→memcell, suffix→buffer
# ---------------------------------------------------------------------------
async def test_single_add_no_cut_accumulates_full_batch_in_buffer(
memorize_env_scripted: Callable[..., AsyncMock],
) -> None:
"""boundaries=[] → no memcell, entire batch sits in buffer."""
await memorize_env_scripted(fake_llm=_make_scripted_llm([[]]))
session = "s_no_cut"
inputs = [_msg(i) for i in range(3)]
await memorize({"session_id": session, "messages": inputs})
cells = await _memcell_rows(session)
buffer = await _buffer_rows(session)
assert cells == [], f"expected no memcell, got {cells}"
assert len(buffer) == 3, f"expected 3 buffer rows, got {len(buffer)}"
# buffer holds all 3 input message_ids, time-ordered
buffer_ts = [ts for _, ts in buffer]
assert buffer_ts == sorted(buffer_ts)
async def test_single_add_with_cut_splits_prefix_to_cell_suffix_to_buffer(
memorize_env_scripted: Callable[..., AsyncMock],
) -> None:
"""boundaries=[2] on a 3-msg batch → cell=[m0,m1], buffer=[m2]."""
await memorize_env_scripted(fake_llm=_make_scripted_llm([[2]]))
session = "s_cut"
inputs = [_msg(i) for i in range(3)]
await memorize({"session_id": session, "messages": inputs})
cells = await _memcell_rows(session)
buffer = await _buffer_rows(session)
# Exactly one memcell carved.
assert len(cells) == 1, cells
cell_msg_ids = set(cells[0][1])
assert len(cell_msg_ids) == 2
# Buffer holds the remaining one message.
assert len(buffer) == 1
buf_msg_id = buffer[0][0]
# Disjoint: buffer message NOT in the memcell.
assert buf_msg_id not in cell_msg_ids, (
"buffer row leaked into memcell — buffer should be the tail only"
)
# I2 — tail comes AFTER prefix in time.
cell_max_ts = max(_BASE_TS + i * 1000 for i in (0, 1))
buf_ts = buffer[0][1]
assert buf_ts >= cell_max_ts, (
f"tail ts ({buf_ts}) must be >= max cell ts ({cell_max_ts})"
)
# ---------------------------------------------------------------------------
# I3: successive add — prior buffer feeds into next memcell, then is REPLACED
# ---------------------------------------------------------------------------
async def test_second_add_consumes_prior_buffer_and_replaces_tail(
memorize_env_scripted: Callable[..., AsyncMock],
) -> None:
"""Core test: prior tail must end up in next memcell, NOT remain in buffer."""
# Round 1: cut after 2 of 3 → cell=[m0,m1], buffer=[m2]
# Round 2: merged input = [m2,m3,m4,m5]; cut after 3 → cell=[m2,m3,m4],
# buffer=[m5]
await memorize_env_scripted(
fake_llm=_make_scripted_llm([[2], [3]]),
)
session = "s_replace"
# Round 1
r1_inputs = [_msg(i) for i in range(3)]
await memorize({"session_id": session, "messages": r1_inputs})
r1_cells = await _memcell_rows(session)
r1_buffer = await _buffer_rows(session)
assert len(r1_cells) == 1
assert len(r1_buffer) == 1
prior_tail_msg_id = r1_buffer[0][0]
# Round 2 — fresh messages m3, m4, m5
r2_inputs = [_msg(i) for i in range(3, 6)]
await memorize({"session_id": session, "messages": r2_inputs})
r2_cells = await _memcell_rows(session)
r2_buffer = await _buffer_rows(session)
# Two memcells total: one from round 1, one from round 2.
assert len(r2_cells) == 2, r2_cells
round1_cell_msgs = set(r2_cells[0][1])
round2_cell_msgs = set(r2_cells[1][1])
# ★ KEY ASSERTION ★ — prior buffer's message landed in round 2 cell.
assert prior_tail_msg_id in round2_cell_msgs, (
f"prior buffer msg {prior_tail_msg_id} should have been consumed "
f"into round 2's memcell, but it's missing from {round2_cell_msgs}"
)
# Round 2 cell should have exactly 3 messages (prior tail + first 2 of new).
assert len(round2_cell_msgs) == 3
# Round 1 cell unchanged.
assert len(round1_cell_msgs) == 2
assert prior_tail_msg_id not in round1_cell_msgs
# Buffer is the NEW tail — exactly 1 fresh row.
assert len(r2_buffer) == 1
new_tail_id = r2_buffer[0][0]
# ★ KEY ASSERTION ★ — the OLD buffer entry is gone (replaced, not appended).
assert new_tail_id != prior_tail_msg_id, (
"old buffer entry survived into round 2's buffer — "
"_replace_buffer is supposed to wipe + reinsert, not append"
)
# Buffer ∩ all memcells = ∅
all_cell_msgs = round1_cell_msgs | round2_cell_msgs
assert new_tail_id not in all_cell_msgs
# Conservation: 6 distinct message ids covered across cells + buffer.
# (We avoid hard-coding id format here — gen_message_id encodes the
# per-batch index, not a global one.)
covered = all_cell_msgs | {new_tail_id}
assert len(covered) == 6, (
f"expected 6 distinct ids covered, got {len(covered)}: {covered}"
)
# ---------------------------------------------------------------------------
# I4: flush drains buffer entirely (is_final=True path)
# ---------------------------------------------------------------------------
async def test_flush_after_accumulation_drains_buffer_into_memcell(
memorize_env_scripted: Callable[..., AsyncMock],
) -> None:
"""add(boundaries=[]) → buffer accumulates → flush → cell=all, buffer=[]."""
# Round 1 add: boundaries=[] → no cut, all into buffer.
# Flush: is_final=True passes empty boundaries → algo closes tail into cell.
await memorize_env_scripted(
fake_llm=_make_scripted_llm([[], []]),
)
session = "s_flush"
inputs = [_msg(i) for i in range(3)]
await memorize({"session_id": session, "messages": inputs})
# Post-add: nothing in memcell yet.
cells = await _memcell_rows(session)
buffer = await _buffer_rows(session)
assert cells == []
assert len(buffer) == 3
# Flush
await memorize({"session_id": session, "messages": []}, is_final=True)
cells = await _memcell_rows(session)
buffer = await _buffer_rows(session)
assert len(cells) == 1, cells
assert len(cells[0][1]) == 3
assert buffer == []
# ---------------------------------------------------------------------------
# Sanity: empty boundaries + multiple sequential adds keep conservation
# ---------------------------------------------------------------------------
async def test_three_sequential_adds_conservation_no_loss(
memorize_env_scripted: Callable[..., AsyncMock],
) -> None:
"""3 sequential adds with mixed cuts: every input id covered exactly once."""
# add 1: 3 msgs, no cut → buffer holds [m0,m1,m2]
# add 2: 3 msgs, cut after 4 of merged [m0..m5] → cell=[m0..m3], buffer=[m4,m5]
# add 3: 3 msgs, cut after 3 of merged [m4..m8] → cell=[m4,m5,m6], buffer=[m7,m8]
await memorize_env_scripted(
fake_llm=_make_scripted_llm([[], [4], [3]]),
)
session = "s_seq"
total_inputs = 0
for batch_start in (0, 3, 6):
await memorize(
{
"session_id": session,
"messages": [_msg(i) for i in range(batch_start, batch_start + 3)],
}
)
total_inputs += 3
cells = await _memcell_rows(session)
buffer = await _buffer_rows(session)
in_cells: set[str] = set()
for _, msg_ids in cells:
in_cells.update(msg_ids)
in_buffer = {mid for mid, _ in buffer}
covered = in_cells | in_buffer
assert len(covered) == total_inputs, (
f"expected {total_inputs} ids covered, got {len(covered)}"
)
# Disjoint
assert not (in_cells & in_buffer)

View File

@ -0,0 +1,614 @@
"""End-to-end: emit pipeline event → strategies dispatch → SUCCESS + log lines."""
from __future__ import annotations
import asyncio
import datetime as _dt
import hashlib
import uuid
from collections.abc import Sequence
from pathlib import Path
from unittest.mock import AsyncMock, patch
import numpy as np
import pytest
from everalgo.types import AgentCase, AtomicFact, ChatMessage, Foresight, MemCell
from structlog.testing import capture_logs
from everos.memory.events import (
AgentCaseExtracted,
AgentPipelineStarted,
EpisodeExtracted,
UserPipelineStarted,
)
class _DeterministicHashEmbedder:
"""Hash-seeded RNG embedder for clustering e2e.
Same input text → same unit vector; distinct inputs → distinct directions
(sha256-seeded ``numpy.random.default_rng``). The vectors aren't
semantically meaningful, but they ARE deterministic and well-spread, so
``cluster_by_geometry`` / ``cluster_by_llm``'s nearest-neighbor logic
has real signal to work with — unlike a MagicMock returning a constant
vector, which collapses every cosine similarity to 1.0.
"""
dim: int = 1024
async def embed(self, text: str) -> list[float]:
digest = hashlib.sha256(text.encode("utf-8")).digest()
seed = int.from_bytes(digest[:8], "little")
rng = np.random.default_rng(seed)
vec = rng.standard_normal(self.dim).astype(np.float32)
norm = float(np.linalg.norm(vec)) or 1.0
vec /= norm
return vec.tolist()
async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
return [await self.embed(t) for t in texts]
def _sample_memcell() -> MemCell:
return MemCell(
items=[
ChatMessage(
id="m1",
role="user",
content="alice likes hiking",
timestamp=1_700_000_000_000,
sender_id="u_alice",
),
ChatMessage(
id="m2",
role="user",
content="bob plans a trip",
timestamp=1_700_000_001_000,
sender_id="u_bob",
),
ChatMessage(
id="m3",
role="assistant",
content="sounds good",
timestamp=1_700_000_002_000,
sender_id="agent",
),
],
timestamp=1_700_000_002_000,
)
@pytest.mark.asyncio
async def test_emit_dispatches_both_strategies_to_success(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Real OfflineEngine + APScheduler runtime; extractors + LLM mocked.
Verifies the full chain: emit(event) → dispatcher (3 gates) → APS one-shot
job → Runner.run → strategy body → mark_success.
"""
import importlib
from everos.core.persistence import MemoryRoot
from everos.infra.ome.records import RunStatus
svc = importlib.import_module("everos.service.memorize")
# Redirect MemoryRoot.default() to tmp_path so _get_engine() writes ome.db
# under the test's isolated temp directory instead of the real ~/.everos.
monkeypatch.setattr(
MemoryRoot,
"default",
classmethod(lambda cls: MemoryRoot(root=tmp_path)),
)
# Reset singletons so they rebuild against the patched MemoryRoot.
monkeypatch.setattr(svc, "_ome_engine", None, raising=False)
_af_mod = importlib.import_module("everos.memory.strategies.extract_atomic_facts")
_fs_mod = importlib.import_module("everos.memory.strategies.extract_foresight")
monkeypatch.setattr(_af_mod, "_writer", None, raising=False)
monkeypatch.setattr(_fs_mod, "_writer", None, raising=False)
fake_fact = AtomicFact(
owner_id="u_alice", content="hi", timestamp=1_700_000_000_000
)
fake_foresight = Foresight(
owner_id="u_alice",
foresight="x",
evidence="y",
timestamp=1_700_000_000_000,
)
with (
patch(
"everos.memory.strategies.extract_atomic_facts.AtomicFactExtractor"
) as mock_af,
patch(
"everos.memory.strategies.extract_foresight.ForesightExtractor"
) as mock_fs,
patch(
"everos.memory.strategies.extract_atomic_facts.get_llm_client",
return_value=object(),
),
patch(
"everos.memory.strategies.extract_foresight.get_llm_client",
return_value=object(),
),
capture_logs() as logs,
):
mock_af.return_value.aextract = AsyncMock(return_value=[fake_fact])
mock_fs.return_value.aextract = AsyncMock(return_value=[fake_foresight])
# Ensure the sqlite dir exists before the engine creates ome.db.
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
await _setup_system_db_schema(monkeypatch)
engine = svc._get_engine()
await engine.start()
try:
await engine.emit(
UserPipelineStarted(
memcell_id="mc_a",
session_id="s1",
memcell=_sample_memcell(),
)
)
# Poll until both strategies reach SUCCESS (max 5 s).
af_rows: list = []
fs_rows: list = []
for _ in range(50):
await asyncio.sleep(0.1)
af_rows = await engine.list_runs(
"extract_atomic_facts", status=RunStatus.SUCCESS
)
fs_rows = await engine.list_runs(
"extract_foresight", status=RunStatus.SUCCESS
)
if af_rows and fs_rows:
break
assert af_rows, "expected SUCCESS RunRecord for extract_atomic_facts"
assert fs_rows, "expected SUCCESS RunRecord for extract_foresight"
assert af_rows[0].strategy_name == "extract_atomic_facts"
assert fs_rows[0].strategy_name == "extract_foresight"
finally:
await engine.stop()
await _teardown_system_db_schema()
af_logs = [r for r in logs if r.get("event") == "atomic_facts_extracted"]
fs_logs = [r for r in logs if r.get("event") == "foresights_extracted"]
assert af_logs, "expected atomic_facts_extracted log line"
assert fs_logs, "expected foresights_extracted log line"
# The sample MemCell has 2 user senders (u_alice, u_bob), so each
# strategy gathers one result per sender and flattens them:
# extract_atomic_facts: 2 senders × 1 fake_fact each = 2
# extract_foresight: 2 senders × 1 fake_foresight each = 2
assert af_logs[0]["count"] == 2
assert fs_logs[0]["count"] == 2
async def _setup_system_db_schema(monkeypatch: pytest.MonkeyPatch) -> None:
"""Rebuild the sqlite system.db engine + schema against the active tmp_path.
The ``sqlite_manager`` engine is a process-wide singleton; without
resetting it between tests the second e2e would reuse the first
test's tmp engine (and miss the table create_all on this test's
fresh tmp_path). ``SQLModel.metadata.create_all`` mirrors what
:class:`SqliteLifespanProvider` runs at app startup.
Pair with :func:`_teardown_system_db_schema` in the test's ``finally``
block — the engine created here owns an aiosqlite worker thread that
must be closed explicitly, or it lingers past the event loop and
raises ``RuntimeError: Event loop is closed`` from the worker.
"""
from sqlmodel import SQLModel
from everos.infra.persistence.sqlite import sqlite_manager
if sqlite_manager._engine is not None: # noqa: SLF001
await sqlite_manager.dispose_engine()
monkeypatch.setattr(sqlite_manager, "_engine", None, raising=False)
monkeypatch.setattr(sqlite_manager, "_session_factory", None, raising=False)
engine = sqlite_manager.get_engine()
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
async def _teardown_system_db_schema() -> None:
"""Dispose the per-test sqlite engine so its worker thread doesn't outlive
the event loop (counterpart of :func:`_setup_system_db_schema`)."""
from everos.infra.persistence.sqlite import sqlite_manager
if sqlite_manager._engine is not None: # noqa: SLF001
await sqlite_manager.dispose_engine()
def _agent_memcell() -> MemCell:
return MemCell(
items=[
ChatMessage(
id="m1",
role="user",
content="please summarise",
timestamp=1_700_000_000_000,
sender_id="u_alice",
),
ChatMessage(
id="m2",
role="assistant",
content="here's the summary",
timestamp=1_700_000_001_000,
sender_id="agent_42",
),
],
timestamp=1_700_000_001_000,
)
@pytest.mark.asyncio
async def test_emit_dispatches_agent_case_strategy_to_success(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Mirror of the user-side e2e for the agent track.
Verifies the full agent chain: AgentPipelineStarted emit → dispatcher
(3 gates) → APS one-shot job → Runner.run → extract_agent_case body →
mark_success. Catches breakage in event class wiring, trigger matching,
engine registration, and the agent-side mock plumbing that unit tests
bypass by calling the strategy function directly.
"""
import importlib
from everos.core.persistence import MemoryRoot
from everos.infra.ome.records import RunStatus
svc = importlib.import_module("everos.service.memorize")
monkeypatch.setattr(
MemoryRoot,
"default",
classmethod(lambda cls: MemoryRoot(root=tmp_path)),
)
monkeypatch.setattr(svc, "_ome_engine", None, raising=False)
_ac_mod = importlib.import_module("everos.memory.strategies.extract_agent_case")
monkeypatch.setattr(_ac_mod, "_writer", None, raising=False)
fake_case = AgentCase(
id=uuid.uuid4().hex,
timestamp=1_700_000_001_000,
task_intent="summarise the doc",
approach="read + condense",
quality_score=0.8,
key_insight="",
)
with (
patch(
"everos.memory.strategies.extract_agent_case.AgentCaseExtractor"
) as mock_ac,
patch(
"everos.memory.strategies.extract_agent_case.get_llm_client",
return_value=object(),
),
capture_logs() as logs,
):
mock_ac.return_value.aextract = AsyncMock(return_value=[fake_case])
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
await _setup_system_db_schema(monkeypatch)
engine = svc._get_engine()
await engine.start()
try:
await engine.emit(
AgentPipelineStarted(
memcell_id="mc_a",
session_id="s1",
memcell=_agent_memcell(),
)
)
ac_rows: list = []
for _ in range(50):
await asyncio.sleep(0.1)
ac_rows = await engine.list_runs(
"extract_agent_case", status=RunStatus.SUCCESS
)
if ac_rows:
break
assert ac_rows, "expected SUCCESS RunRecord for extract_agent_case"
assert ac_rows[0].strategy_name == "extract_agent_case"
finally:
await engine.stop()
await _teardown_system_db_schema()
ac_logs = [r for r in logs if r.get("event") == "agent_case_extracted"]
assert ac_logs, "expected agent_case_extracted log line"
assert ac_logs[0]["owner_ids"] == ["agent_42"]
assert ac_logs[0]["fanout"] == 1
assert ac_logs[0]["quality_score"] == 0.8
@pytest.mark.asyncio
async def test_skill_chain_e2e(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Chain: AgentCaseExtracted → trigger_skill_clustering (sqlite) →
SkillClusterUpdated → extract_agent_skill → SUCCESS.
Real ``cluster_by_llm`` algorithm path: hash-based deterministic
embedder feeds the top-K nearest-neighbor stage, a ``FakeLLMClient``
returns ``{"idx": "new"}`` so the algo picks the "brand-new cluster"
branch — but the recall + skip-threshold + prompt-render + JSON-parse
pipeline is all real. Only mocked: LanceDB reads (case + skill),
``AgentSkillExtractor`` (downstream extractor; out of scope), and
the markdown writer.
"""
import importlib
from unittest.mock import MagicMock
from everalgo.testing.fake_llm import FakeLLMClient
from everalgo.types import AgentSkill as AlgoAgentSkill
from everos.core.persistence import MemoryRoot
from everos.infra.ome.records import RunStatus
svc = importlib.import_module("everos.service.memorize")
skill_mod = importlib.import_module("everos.memory.strategies.extract_agent_skill")
monkeypatch.setattr(
MemoryRoot,
"default",
classmethod(lambda cls: MemoryRoot(root=tmp_path)),
)
monkeypatch.setattr(svc, "_ome_engine", None, raising=False)
monkeypatch.setattr(skill_mod, "_writer", None, raising=False)
embedder = _DeterministicHashEmbedder()
# FakeLLMClient: cluster_by_llm only invokes it when top-K similarity
# falls below llm_skip_threshold (default 0.85). With a single new
# cluster in an empty owner set, the recall stage returns no candidates
# at all — so the LLM is never asked. Provide a "{idx: new}" response
# anyway as belt-and-suspenders for future scenarios with seeded clusters.
fake_llm = FakeLLMClient(responses=['{"idx": "new"}'])
target_lance = MagicMock()
target_lance.entry_id = "ac_20260517_0001"
target_lance.timestamp = _dt.datetime(2026, 5, 17, tzinfo=_dt.UTC)
target_lance.task_intent = "summarise the doc"
target_lance.approach = "read + condense"
target_lance.quality_score = 0.8
target_lance.key_insight = ""
emitted_skill = AlgoAgentSkill(
id=uuid.uuid4().hex,
cluster_id="",
name="summarise_doc",
description="how to summarise docs",
content="step 1: read; step 2: condense",
confidence=0.7,
maturity_score=0.5,
source_case_ids=["ac_20260517_0001"],
)
with (
patch(
"everos.memory.strategies.trigger_skill_clustering.get_embedder",
return_value=embedder,
),
patch(
"everos.memory.strategies.trigger_skill_clustering.get_llm_client",
return_value=fake_llm,
),
patch(
"everos.memory.strategies.extract_agent_skill.agent_case_repo"
) as mock_case_repo,
patch(
"everos.memory.strategies.extract_agent_skill.agent_skill_repo"
) as mock_skill_repo,
patch(
"everos.memory.strategies.extract_agent_skill.get_llm_client",
return_value=object(),
),
patch(
"everos.memory.strategies.extract_agent_skill.AgentSkillExtractor"
) as mock_extractor_cls,
patch(
"everos.memory.strategies.extract_agent_skill.AgentSkillWriter"
) as mock_writer_cls,
capture_logs() as logs,
):
mock_case_repo.find_by_owner_entry = AsyncMock(return_value=target_lance)
mock_case_repo.find_by_owner_entries = AsyncMock(return_value=[])
# Empty cluster (no prior skills) → small-cluster scalar path.
mock_skill_repo.count_in_cluster = AsyncMock(return_value=0)
mock_skill_repo.find_in_cluster = AsyncMock(return_value=[])
mock_extractor_cls.return_value.aextract = AsyncMock(
return_value=[emitted_skill]
)
mock_writer_cls.return_value.write_main = AsyncMock(return_value=None)
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
await _setup_system_db_schema(monkeypatch)
engine = svc._get_engine()
await engine.start()
try:
await engine.emit(
AgentCaseExtracted(
memcell_id="mc_a",
case_entry_id="ac_20260517_0001",
task_intent="summarise the doc",
quality_score=0.8,
case_timestamp_ms=1_700_000_001_000,
agent_id="agent_42",
)
)
clu_rows: list = []
skill_rows: list = []
for _ in range(50):
await asyncio.sleep(0.1)
clu_rows = await engine.list_runs(
"trigger_skill_clustering", status=RunStatus.SUCCESS
)
skill_rows = await engine.list_runs(
"extract_agent_skill", status=RunStatus.SUCCESS
)
if clu_rows and skill_rows:
break
assert clu_rows, "expected SUCCESS for trigger_skill_clustering"
assert skill_rows, "expected SUCCESS for extract_agent_skill"
finally:
await engine.stop()
await _teardown_system_db_schema()
cluster_logs = [r for r in logs if r.get("event") == "skill_cluster_updated"]
skill_logs = [r for r in logs if r.get("event") == "agent_skills_extracted"]
assert cluster_logs, "expected skill_cluster_updated log line"
assert skill_logs, "expected agent_skills_extracted log line"
# Writer received exactly one SKILL.md write call with cluster_id stamped.
write_args = mock_writer_cls.return_value.write_main.call_args
fm = write_args.kwargs["frontmatter"]
assert fm.cluster_id == cluster_logs[0]["cluster_id"]
assert fm.name == "summarise_doc"
@pytest.mark.asyncio
async def test_profile_chain_e2e(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Chain: EpisodeExtracted → trigger_profile_clustering (sqlite) →
ProfileClusterUpdated → extract_user_profile → SUCCESS.
Real ``cluster_by_geometry`` (cosine + time-window) with a hash-based
deterministic embedder so the geometry stage operates on well-spread
unit vectors. Real ``cluster_repo`` sqlite. ``memcell_repo`` is still
mocked (a real memcell row would require the boundary stage to run
first; out of scope for the chain emit test). ``ProfileExtractor`` /
md reader/writer mocked as algo + IO seams.
"""
import importlib
from unittest.mock import MagicMock
from everalgo.types import Profile as AlgoProfile
from everos.core.persistence import MemoryRoot
from everos.infra.ome.records import RunStatus
svc = importlib.import_module("everos.service.memorize")
profile_mod = importlib.import_module(
"everos.memory.strategies.extract_user_profile"
)
monkeypatch.setattr(
MemoryRoot,
"default",
classmethod(lambda cls: MemoryRoot(root=tmp_path)),
)
monkeypatch.setattr(svc, "_ome_engine", None, raising=False)
monkeypatch.setattr(profile_mod, "_writer", None, raising=False)
monkeypatch.setattr(profile_mod, "_reader", None, raising=False)
embedder = _DeterministicHashEmbedder()
fake_memcell_row = MagicMock()
fake_memcell_row.memcell_id = "mc_aaaaaaaaaaa1"
fake_memcell_row.payload_json = MemCell(
items=[
ChatMessage(
id="m1",
role="user",
content="alice likes hiking",
timestamp=1_700_000_001_000,
sender_id="u_alice",
),
],
timestamp=1_700_000_001_000,
).model_dump_json()
new_profile = AlgoProfile.model_validate(
{
"owner_id": "u_alice",
"summary": "Alice is a hiker.",
"timestamp": 1_700_000_001_000,
"explicit_info": ["lives in tokyo"],
"implicit_traits": [],
}
)
with (
patch(
"everos.memory.strategies.trigger_profile_clustering.get_embedder",
return_value=embedder,
),
patch(
"everos.memory.strategies.extract_user_profile.memcell_repo"
) as mock_memcell_repo,
patch(
"everos.memory.strategies.extract_user_profile.ProfileReader"
) as mock_reader_cls,
patch(
"everos.memory.strategies.extract_user_profile.ProfileWriter"
) as mock_writer_cls,
patch(
"everos.memory.strategies.extract_user_profile.ProfileExtractor"
) as mock_extractor_cls,
patch(
"everos.memory.strategies.extract_user_profile.get_llm_client",
return_value=object(),
),
capture_logs() as logs,
):
mock_memcell_repo.find_by_ids = AsyncMock(return_value=[fake_memcell_row])
mock_reader_cls.return_value.read = AsyncMock(return_value=None)
mock_writer_cls.return_value.write = AsyncMock(return_value=None)
mock_extractor_cls.return_value.aextract = AsyncMock(return_value=new_profile)
(tmp_path / ".index" / "sqlite").mkdir(parents=True, exist_ok=True)
await _setup_system_db_schema(monkeypatch)
engine = svc._get_engine()
await engine.start()
try:
await engine.emit(
EpisodeExtracted(
memcell_id="mc_aaaaaaaaaaa1",
episode_entry_id="ep_20260517_0001",
episode_text="alice likes hiking",
episode_timestamp_ms=1_700_000_001_000,
owner_id="u_alice",
)
)
clu_rows: list = []
prof_rows: list = []
for _ in range(50):
await asyncio.sleep(0.1)
clu_rows = await engine.list_runs(
"trigger_profile_clustering", status=RunStatus.SUCCESS
)
prof_rows = await engine.list_runs(
"extract_user_profile", status=RunStatus.SUCCESS
)
if clu_rows and prof_rows:
break
assert clu_rows, "expected SUCCESS for trigger_profile_clustering"
assert prof_rows, "expected SUCCESS for extract_user_profile"
finally:
await engine.stop()
await _teardown_system_db_schema()
cluster_logs = [r for r in logs if r.get("event") == "profile_cluster_updated"]
profile_logs = [r for r in logs if r.get("event") == "user_profile_extracted"]
assert cluster_logs, "expected profile_cluster_updated log line"
assert profile_logs, "expected user_profile_extracted log line"
assert profile_logs[0]["owner_id"] == "u_alice"
assert profile_logs[0]["mode"] == "INIT"