"""Private helpers shared across the search e2e tests. * :func:`pick_query_seeds` — scans the session corpus's ``.atomic_facts/`` md files and returns a list of ``(owner_id, fact_text)`` tuples to use as deterministic search queries. Bootstrapping queries off the corpus's own extraction output gives us a closed-loop correctness signal — what was written should be findable. * :func:`assert_recall` — the canonical "this search returned at least one sensible hit for ``owner``" assertion bundle. Used by the keyword / vector / hybrid recall tests so the assertion logic is in one place. * :func:`flatten_hits` — collapses ``SearchData``'s four arrays into one ``(owner_id, score, text)`` tuple list for relevance checks. The helpers do **not** hardcode topical keywords ("hiking" / "work") — they are derived from what the pipeline produced. This keeps the suite stable across LLM-driven boundary-cut variance. """ from __future__ import annotations import re from pathlib import Path from typing import Any import httpx # Cap how many fact strings we sample per call — running every test # against every fact would blow the LLM rerank budget. _DEFAULT_SEED_LIMIT = 3 # Tokenise on word characters; lowercase; drop short tokens that carry # no signal for the "content overlap" check. _TOKEN_RE = re.compile(r"\w+", re.UNICODE) _MIN_TOKEN_LEN = 3 _STOPWORDS: frozenset[str] = frozenset( { "the", "and", "for", "that", "with", "this", "was", "has", "have", "are", "but", "from", "you", "she", "her", "his", "him", "they", "them", "their", } ) # ── Query seed extraction ─────────────────────────────────────────────── def pick_query_seeds( memory_root: Path, *, limit: int = _DEFAULT_SEED_LIMIT, ) -> list[tuple[str, str]]: """Sample ``(owner_id, fact_text)`` tuples from atomic_facts md files. Walks ``users//.atomic_facts/atomic_fact-*.md`` and parses the ``## Fact\\n`` sections inside each daily-log entry. Returns deterministic seeds (insertion order of ``rglob`` is sort-stable thanks to the explicit ``sorted`` call) so a flaky test surfaces a real regression, not query-rotation variance. Raises: AssertionError: if no facts were extracted — that's a fixture failure, not a test failure, and should fail loudly. """ seeds: list[tuple[str, str]] = [] users_dir = memory_root / "default_app" / "default_project" / "users" if not users_dir.is_dir(): raise AssertionError(f"expected {users_dir} to exist after ingest") for owner_dir in sorted(users_dir.iterdir()): if not owner_dir.is_dir(): continue facts_dir = owner_dir / ".atomic_facts" if not facts_dir.is_dir(): continue for md in sorted(facts_dir.rglob("*.md")): for fact in _extract_fact_sections(md): if fact: seeds.append((owner_dir.name, fact)) if len(seeds) >= limit: return seeds if not seeds: raise AssertionError( f"no atomic_fact md entries under {users_dir} — pipeline did " "not produce any facts; cannot bootstrap search queries" ) return seeds def _extract_fact_sections(md: Path) -> list[str]: """Return every ``### Fact`` section body in a daily-log md file. Daily-log entries are ``## `` blocks; the labelled body sections inside an entry are h3 (``### Fact``, ``### Foresight``, …). We scan linearly for ``### Fact`` and collect lines until the next heading at any level or the end-of-entry marker. """ body = md.read_text(encoding="utf-8") sections: list[str] = [] in_fact = False buf: list[str] = [] for line in body.splitlines(): stripped = line.lstrip() if stripped.startswith("### Fact"): if in_fact: sections.append("\n".join(buf).strip()) in_fact = True buf = [] continue # Any subsequent heading or entry-end marker closes the section. if in_fact and (stripped.startswith("#") or stripped.startswith("