chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,27 @@
"""Shared fixtures for ``memory.search`` unit tests.
The project default is ``EVEROS_SEARCH__VECTOR_STRATEGY=maxsim_atomic`` —
that path queries both the ``atomic_fact`` table and the ``episode`` table
to do MaxSim. The existing VECTOR-route tests in ``test_manager.py`` were
written against the legacy single-vector ``episode`` path and stub only the
episode recaller (atomic_fact recaller is a no-data stub).
Force the legacy ``episode`` strategy by default for these tests so they
keep asserting against the dense-recall path they were designed to cover.
MaxSim-specific tests opt back into ``maxsim_atomic`` by overriding the env
var inside their own body.
"""
from __future__ import annotations
import pytest
from everos.config.settings import load_settings
@pytest.fixture(autouse=True)
def _force_episode_vector_strategy(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("EVEROS_SEARCH__VECTOR_STRATEGY", "episode")
load_settings.cache_clear()
yield
load_settings.cache_clear()

View File

@ -0,0 +1,59 @@
"""Unit tests for ``memory.search.adapter.resolve_pipeline``."""
from __future__ import annotations
import pytest
from everos.memory.search.adapter import resolve_pipeline
from everos.memory.search.dto import SearchMethod
def test_keyword_skips_everalgo() -> None:
fm, cfg = resolve_pipeline(SearchMethod.KEYWORD, "episode")
assert fm is None
assert cfg is None
def test_vector_skips_everalgo() -> None:
fm, cfg = resolve_pipeline(SearchMethod.VECTOR, "episode")
assert fm is None
assert cfg is None
def test_hybrid_episode_picks_hierarchy() -> None:
fm, cfg = resolve_pipeline(SearchMethod.HYBRID, "episode")
assert fm == "hierarchy"
assert cfg is None
def test_hybrid_atomic_fact_picks_hierarchy() -> None:
fm, _cfg = resolve_pipeline(SearchMethod.HYBRID, "atomic_fact")
assert fm == "hierarchy"
def test_hybrid_case_picks_vector_anchored() -> None:
fm, cfg = resolve_pipeline(SearchMethod.HYBRID, "agent_case")
assert fm == "vector_anchored"
assert cfg is None
def test_hybrid_skill_picks_skill_hybrid() -> None:
fm, _cfg = resolve_pipeline(SearchMethod.HYBRID, "agent_skill")
assert fm == "skill_hybrid"
def test_agentic_method_raises_value_error() -> None:
"""AGENTIC (a valid enum member) raises ValueError from resolve_pipeline.
Distinct from ``test_unsupported_method_raises`` which passes an arbitrary
non-enum string. This test verifies the manager's contract: AGENTIC must be
intercepted before resolve_pipeline is called, and resolve_pipeline defends
against it with a ValueError even for the known enum member.
"""
with pytest.raises(ValueError, match="unsupported method"):
resolve_pipeline(SearchMethod.AGENTIC, "episode")
def test_unsupported_method_raises() -> None:
with pytest.raises(ValueError, match="unsupported method"):
resolve_pipeline("not-a-method", "episode") # type: ignore[arg-type]

View File

@ -0,0 +1,338 @@
"""Unit tests for ``memory.search.agentic.search_episodes_agentic``.
White-box: patches ``aagentic_retrieve`` to assert benchmark hyperparameters
are wired correctly, plus a shaping test to verify id remapping.
"""
from __future__ import annotations
import datetime as _dt
from collections.abc import Sequence
from typing import Any, ClassVar
from unittest.mock import AsyncMock, patch
import numpy as np
import pytest
from everalgo.clustering import Cluster
from everalgo.rank.protocols import AgenticDecision
from everalgo.testing.fake_llm import FakeLLMClient
from everalgo.types import Candidate
from everos.component.utils.datetime import from_timestamp
from everos.memory.search.agentic import (
_restore_shaper_metadata,
_to_everalgo_doc_metadata,
search_episodes_agentic,
)
from everos.memory.search.dto import SearchEpisodeItem
# ── Stubs ────────────────────────────────────────────────────────────────
def _ts() -> _dt.datetime:
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
def _mc_candidate(mc_id: str, ep_id: str, score: float = 0.8) -> Candidate:
"""Candidate keyed by memcell_id (as returned by amaxsim/fetch_all_for_owner)."""
return Candidate(
id=mc_id,
score=score,
source="vector",
metadata={
"episode_id": ep_id,
"owner_id": "alice",
"owner_type": "user",
"session_id": "sess_a",
"timestamp": _ts(),
"sender_ids": ["alice"],
"subject": "Alice eats oat milk",
"summary": "Alice food preferences",
"episode": "Alice prefers oat milk in her coffee",
"parent_id": mc_id,
},
)
class _StubEpisodeRecaller:
kind: ClassVar[str] = "episode"
everalgo_memory_type: ClassVar[str] = "episodic"
text_field: ClassVar[str] = "episode"
def __init__(
self, all_docs: list[Candidate], by_parent: dict[str, Candidate]
) -> None:
self._all_docs = all_docs
self._by_parent = by_parent
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return []
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._all_docs)
async def fetch_by_parent_ids(
self, parent_ids: Sequence[str], where: str
) -> list[Candidate]:
"""Returns Candidate with id=episode_id (real LanceDB id)."""
return [self._by_parent[p] for p in parent_ids if p in self._by_parent]
async def fetch_all_for_owner(self, where: str) -> list[Candidate]:
"""Returns Candidate with id=memcell_id and metadata['episode_id']."""
return list(self._all_docs)
class _StubFactRecaller:
kind: ClassVar[str] = "atomic_fact"
everalgo_memory_type: ClassVar[str] = "episodic"
text_field: ClassVar[str] = "fact"
def __init__(self, facts: list[Candidate]) -> None:
self._facts = facts
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._facts)
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._facts)
class _StubReranker:
async def rerank(
self, query: str, passages: list[str], *, instruction: str | None = None
) -> list[Any]:
class _R:
def __init__(self, idx: int) -> None:
self.index = idx
self.score = 1.0 - idx * 0.1
return [_R(i) for i in range(len(passages))]
# ── Fixtures ─────────────────────────────────────────────────────────────
@pytest.fixture()
def mc_cand() -> Candidate:
return _mc_candidate("mc_1", "ep_1")
@pytest.fixture()
def ep_recaller(mc_cand: Candidate) -> _StubEpisodeRecaller:
ep_raw = Candidate(
id="ep_1",
score=0.0,
source="vector",
metadata=mc_cand.metadata,
)
return _StubEpisodeRecaller(
all_docs=[mc_cand],
by_parent={"mc_1": ep_raw},
)
@pytest.fixture()
def fact_cand() -> Candidate:
return Candidate(
id="f_1",
score=0.9,
source="vector",
metadata={"parent_id": "mc_1", "fact": "Alice prefers oat milk"},
)
@pytest.fixture()
def fact_recaller(fact_cand: Candidate) -> _StubFactRecaller:
return _StubFactRecaller([fact_cand])
@pytest.fixture()
def clusters() -> list[Cluster]:
# ``cluster_repo.list_for_owner`` is mocked in every test, so cluster
# contents are never exercised by everalgo; we only need a valid instance
# that satisfies the everalgo ``Cluster`` schema (ndarray centroid + last_ts).
return [
Cluster(
id="cl_1",
members=["mc_1"],
centroid=np.zeros(4, dtype=np.float32),
last_ts=0,
)
]
# ── Tests ─────────────────────────────────────────────────────────────────
async def test_agentic_search_wires_benchmark_hyperparams(
ep_recaller: _StubEpisodeRecaller,
fact_recaller: _StubFactRecaller,
clusters: list[Cluster],
) -> None:
"""aagentic_retrieve must be called with the exact benchmark hyperparams."""
captured: dict[str, Any] = {}
async def fake_aagentic(
query: str,
*,
base_retrieve: Any,
llm: Any,
rerank_fn: Any,
round2_retrieve: Any,
round2_cap: int,
top_n: int,
round1_top_n: int,
round1_rerank_top_n: int,
refinement_strategy: str,
multi_query_count: int,
rrf_k: int,
) -> tuple[list[Candidate], AgenticDecision]:
captured.update(
top_n=top_n,
round1_top_n=round1_top_n,
round1_rerank_top_n=round1_rerank_top_n,
round2_cap=round2_cap,
multi_query_count=multi_query_count,
rrf_k=rrf_k,
refinement_strategy=refinement_strategy,
has_round2=round2_retrieve is not None,
)
return [], AgenticDecision(is_multi_round=False)
async def fake_embed(q: str) -> list[float]:
return [0.1, 0.2, 0.3, 0.4]
with (
patch("everos.memory.search.agentic.aagentic_retrieve", fake_aagentic),
patch(
"everos.memory.search.agentic.cluster_repo.list_for_owner",
AsyncMock(return_value=clusters),
),
):
await search_episodes_agentic(
"What did Alice eat?",
owner_id="alice",
where="owner_id = 'alice' AND owner_type = 'user'",
episode_recaller=ep_recaller,
atomic_fact_recaller=fact_recaller,
embed_query_fn=fake_embed,
reranker=_StubReranker(),
llm=FakeLLMClient(responses=[]),
top_k=10,
)
assert captured["top_n"] == 10
assert captured["round1_top_n"] == 50
assert captured["round1_rerank_top_n"] == 10
assert captured["round2_cap"] == 40
assert captured["multi_query_count"] == 3
assert captured["rrf_k"] == 40
assert captured["refinement_strategy"] == "multi_query"
assert captured["has_round2"] is True
async def test_agentic_search_loads_user_memory_clusters(
ep_recaller: _StubEpisodeRecaller,
fact_recaller: _StubFactRecaller,
) -> None:
"""cluster_repo.list_for_owner must be called with kind='user_memory'."""
mock_list = AsyncMock(return_value=[])
async def fake_embed(q: str) -> list[float]:
return [0.1] * 4
with (
patch(
"everos.memory.search.agentic.aagentic_retrieve",
AsyncMock(return_value=([], AgenticDecision(is_multi_round=False))),
),
patch("everos.memory.search.agentic.cluster_repo.list_for_owner", mock_list),
):
await search_episodes_agentic(
"q",
owner_id="alice",
where="owner_id = 'alice' AND owner_type = 'user'",
episode_recaller=ep_recaller,
atomic_fact_recaller=fact_recaller,
embed_query_fn=fake_embed,
reranker=_StubReranker(),
llm=FakeLLMClient(responses=[]),
top_k=10,
)
mock_list.assert_called_once_with("alice", "user_memory")
async def test_agentic_search_shapes_candidates_with_episode_id(
ep_recaller: _StubEpisodeRecaller,
fact_recaller: _StubFactRecaller,
clusters: list[Cluster],
mc_cand: Candidate,
) -> None:
"""SearchEpisodeItem.id must be episode_id (not memcell_id) after retrieve."""
async def fake_aagentic(
*_: Any, **__: Any
) -> tuple[list[Candidate], AgenticDecision]:
return [mc_cand], AgenticDecision(is_multi_round=False)
async def fake_embed(q: str) -> list[float]:
return [0.1] * 4
with (
patch("everos.memory.search.agentic.aagentic_retrieve", fake_aagentic),
patch(
"everos.memory.search.agentic.cluster_repo.list_for_owner",
AsyncMock(return_value=clusters),
),
):
result = await search_episodes_agentic(
"What did Alice eat?",
owner_id="alice",
where="owner_id = 'alice' AND owner_type = 'user'",
episode_recaller=ep_recaller,
atomic_fact_recaller=fact_recaller,
embed_query_fn=fake_embed,
reranker=_StubReranker(),
llm=FakeLLMClient(responses=[]),
top_k=10,
)
assert len(result) == 1
assert isinstance(result[0], SearchEpisodeItem)
assert result[0].id == "ep_1", (
f"Expected episode_id='ep_1' but got {result[0].id!r}. "
"Shaper must remap from memcell_id via metadata['episode_id']."
)
# ── Metadata bridge to the everalgo _format_docs contract ──────────────────
def test_to_everalgo_doc_metadata_injects_text_and_ms_timestamp() -> None:
"""Bridge adds `text` (episode body) + ms-epoch `timestamp` for _format_docs.
Without this the sufficiency / multi-query LLM prompt falls back to the
memcell id as the doc body and renders the date as "N/A". ``episode`` is
left untouched so the reranker / shaper (both expecting a str) keep working.
"""
original = _ts()
md = {
"episode": "Alice prefers oat milk",
"timestamp": original,
"subject": "Alice eats oat milk",
}
out = _to_everalgo_doc_metadata(md)
assert out["text"] == "Alice prefers oat milk"
assert out["episode"] == "Alice prefers oat milk" # untouched for rerank/shaper
assert isinstance(out["timestamp"], int)
assert from_timestamp(out["timestamp"]) == original
def test_restore_shaper_metadata_reverts_ms_timestamp_to_datetime() -> None:
"""The ms-epoch timestamp is reverted to the datetime the shaper requires."""
original = _ts()
bridged = _to_everalgo_doc_metadata({"episode": "x", "timestamp": original})
restored = _restore_shaper_metadata(bridged)
assert isinstance(restored["timestamp"], _dt.datetime)
assert restored["timestamp"] == original

View File

@ -0,0 +1,272 @@
"""Unit tests for ``memory.search.agentic_agent``.
White-box: patches ``aagentic_retrieve`` to assert benchmark hyperparameters
are wired correctly, plus a shaping test to verify DTOs are built correctly.
The skill verify step has been removed from production code; this test
module covers the agentic retrieve flow only.
"""
from __future__ import annotations
import datetime as _dt
from typing import Any, ClassVar
from unittest.mock import patch
from everalgo.rank.protocols import AgenticDecision
from everalgo.testing.fake_llm import FakeLLMClient
from everalgo.types import Candidate
from everos.memory.search.agentic_agent import (
search_agent_cases_agentic,
search_agent_skills_agentic,
)
from everos.memory.search.dto import SearchAgentCaseItem, SearchAgentSkillItem
# ── Stubs ────────────────────────────────────────────────────────────────
def _ts() -> _dt.datetime:
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
def _case_candidate(cid: str, score: float = 0.8) -> Candidate:
return Candidate(
id=cid,
score=score,
source="vector",
metadata={
"owner_id": "agent_a",
"owner_type": "agent",
"session_id": "sess_b",
"timestamp": _ts(),
"task_intent": f"intent {cid}",
"approach": f"approach {cid}",
"quality_score": 0.8,
},
)
def _skill_candidate(sid: str, score: float = 0.75) -> Candidate:
return Candidate(
id=sid,
score=score,
source="vector",
metadata={
"owner_id": "agent_a",
"owner_type": "agent",
"name": f"skill_{sid}",
"description": f"desc {sid}",
"content": f"content {sid}",
"confidence": 0.9,
"maturity_score": 0.6,
"source_case_ids": [],
},
)
class _StubCaseRecaller:
kind: ClassVar[str] = "agent_case"
everalgo_memory_type: ClassVar[str] = "case"
text_field: ClassVar[str] = "task_intent"
def __init__(self, dense: list[Candidate]) -> None:
self._dense = dense
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._dense)
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._dense)
class _StubSkillRecaller:
kind: ClassVar[str] = "agent_skill"
everalgo_memory_type: ClassVar[str] = "skill"
text_field: ClassVar[str] = "description"
def __init__(self, dense: list[Candidate]) -> None:
self._dense = dense
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._dense)
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._dense)
class _StubReranker:
async def rerank(self, query: str, passages: list[str]) -> list[Any]:
class _R:
def __init__(self, idx: int) -> None:
self.index = idx
self.score = 1.0 - idx * 0.1
return [_R(i) for i in range(len(passages))]
async def _fake_embed(q: str) -> list[float]:
return [0.1, 0.2, 0.3, 0.4]
# ── Tests ─────────────────────────────────────────────────────────────────
async def test_search_agent_cases_agentic_calls_aagentic_retrieve_with_benchmark_params() -> ( # noqa: E501
None
):
"""Verify aagentic_retrieve called with benchmark hyperparams for agent_case."""
captured: dict[str, Any] = {}
async def fake_aagentic(
query: str,
*,
base_retrieve: Any,
llm: Any,
rerank_fn: Any,
round2_retrieve: Any,
round2_cap: Any,
top_n: int,
round1_top_n: int,
round1_rerank_top_n: int,
refinement_strategy: str,
multi_query_count: int,
rrf_k: int,
) -> tuple[list[Candidate], AgenticDecision]:
captured.update(
top_n=top_n,
round1_top_n=round1_top_n,
round1_rerank_top_n=round1_rerank_top_n,
round2_cap=round2_cap,
round2_retrieve_is_none=round2_retrieve is None,
multi_query_count=multi_query_count,
rrf_k=rrf_k,
refinement_strategy=refinement_strategy,
)
return [], AgenticDecision(is_multi_round=False)
with patch("everos.memory.search.agentic_agent.aagentic_retrieve", fake_aagentic):
await search_agent_cases_agentic(
"How did agent handle login failure?",
where="owner_id = 'agent_a' AND owner_type = 'agent'",
case_recaller=_StubCaseRecaller([]),
embed_query_fn=_fake_embed,
reranker=_StubReranker(),
llm=FakeLLMClient(responses=[]),
top_k=10,
)
assert captured["top_n"] == 10
assert captured["round1_top_n"] == 20
assert captured["round1_rerank_top_n"] == 10
assert captured["round2_cap"] == 40
assert captured["round2_retrieve_is_none"] is True
assert captured["multi_query_count"] == 3
assert captured["rrf_k"] == 60
assert captured["refinement_strategy"] == "multi_query"
async def test_search_agent_skills_agentic_calls_aagentic_retrieve_with_benchmark_params() -> ( # noqa: E501
None
):
"""Verify aagentic_retrieve called with benchmark hyperparams for agent_skill."""
captured: dict[str, Any] = {}
async def fake_aagentic(
query: str,
*,
base_retrieve: Any,
llm: Any,
rerank_fn: Any,
round2_retrieve: Any,
round2_cap: Any,
top_n: int,
round1_top_n: int,
round1_rerank_top_n: int,
refinement_strategy: str,
multi_query_count: int,
rrf_k: int,
) -> tuple[list[Candidate], AgenticDecision]:
captured.update(
top_n=top_n,
round1_top_n=round1_top_n,
round1_rerank_top_n=round1_rerank_top_n,
round2_cap=round2_cap,
round2_retrieve_is_none=round2_retrieve is None,
multi_query_count=multi_query_count,
rrf_k=rrf_k,
refinement_strategy=refinement_strategy,
)
return [], AgenticDecision(is_multi_round=False)
with patch("everos.memory.search.agentic_agent.aagentic_retrieve", fake_aagentic):
await search_agent_skills_agentic(
"What skill handles auth token refresh?",
where="owner_id = 'agent_a' AND owner_type = 'agent'",
skill_recaller=_StubSkillRecaller([]),
embed_query_fn=_fake_embed,
reranker=_StubReranker(),
llm=FakeLLMClient(responses=[]),
top_k=5,
)
assert captured["top_n"] == 5
assert captured["round1_top_n"] == 20
assert captured["round1_rerank_top_n"] == 10
assert captured["round2_cap"] == 40
assert captured["round2_retrieve_is_none"] is True
assert captured["multi_query_count"] == 3
assert captured["rrf_k"] == 60
assert captured["refinement_strategy"] == "multi_query"
async def test_search_agent_cases_agentic_shapes_result() -> None:
"""Output must be list[SearchAgentCaseItem] built from aagentic_retrieve results."""
cand = _case_candidate("c_1")
async def fake_aagentic(
*_: Any, **__: Any
) -> tuple[list[Candidate], AgenticDecision]:
return [cand], AgenticDecision(is_multi_round=False)
with patch("everos.memory.search.agentic_agent.aagentic_retrieve", fake_aagentic):
result = await search_agent_cases_agentic(
"intent query",
where="owner_id = 'agent_a' AND owner_type = 'agent'",
case_recaller=_StubCaseRecaller([cand]),
embed_query_fn=_fake_embed,
reranker=_StubReranker(),
llm=FakeLLMClient(responses=[]),
top_k=10,
)
assert len(result) == 1
assert isinstance(result[0], SearchAgentCaseItem)
assert result[0].id == "c_1"
assert result[0].task_intent == "intent c_1"
async def test_search_agent_skills_agentic_shapes_result() -> None:
"""Output must be list[SearchAgentSkillItem] from aagentic_retrieve results."""
cand = _skill_candidate("s_1")
async def fake_aagentic(
*_: Any, **__: Any
) -> tuple[list[Candidate], AgenticDecision]:
return [cand], AgenticDecision(is_multi_round=False)
with patch("everos.memory.search.agentic_agent.aagentic_retrieve", fake_aagentic):
result = await search_agent_skills_agentic(
"skill query",
where="owner_id = 'agent_a' AND owner_type = 'agent'",
skill_recaller=_StubSkillRecaller([cand]),
embed_query_fn=_fake_embed,
reranker=_StubReranker(),
llm=FakeLLMClient(responses=[]),
top_k=10,
)
assert len(result) == 1
assert isinstance(result[0], SearchAgentSkillItem)
assert result[0].id == "s_1"
assert result[0].name == "skill_s_1"

View File

@ -0,0 +1,163 @@
"""Unit tests for ``memory.search.callbacks``."""
from __future__ import annotations
import inspect
from typing import Any
import pytest
from everalgo.types import Candidate
from everos.memory.search.callbacks import (
_SKILL_RERANK_INSTRUCTION,
build_rerank_fn,
build_skill_rerank_fn,
)
class _StubReranker:
"""Returns candidates in original order with scores 1.0, 0.9, 0.8, ...
Records the ``instruction`` and ``passages`` from the most recent call so
tests can assert that callback factories forward the right arguments.
"""
def __init__(self) -> None:
self.last_instruction: str | None = None
self.last_passages: list[str] | None = None
async def rerank(
self, query: str, passages: list[str], *, instruction: str | None = None
) -> list[Any]:
self.last_instruction = instruction
self.last_passages = list(passages)
class _R:
def __init__(self, index: int, score: float) -> None:
self.index = index
self.score = score
return [_R(i, 1.0 - i * 0.1) for i in range(len(passages))]
def _cand(cid: str, episode_text: str = "body") -> Candidate:
return Candidate(
id=cid,
score=0.5,
source="vector",
metadata={"episode": episode_text},
)
async def test_build_rerank_fn_returns_two_arg_callable() -> None:
"""build_rerank_fn must return a 2-arg async callable matching RerankFn."""
rerank_fn = build_rerank_fn(_StubReranker(), text_field="episode")
sig = inspect.signature(rerank_fn)
params = list(sig.parameters)
assert params == ["query", "candidates"], f"Expected 2-arg fn, got params: {params}"
async def test_build_rerank_fn_returns_all_candidates_without_truncation() -> None:
"""rerank_fn must return ALL reranked candidates; caller slices."""
rerank_fn = build_rerank_fn(_StubReranker(), text_field="episode")
cands = [_cand(f"c{i}") for i in range(5)]
result = await rerank_fn("what did Alice eat?", cands)
assert len(result) == 5
async def test_build_rerank_fn_attaches_scores_from_provider() -> None:
"""rerank_fn updates Candidate.score from RerankProvider results."""
rerank_fn = build_rerank_fn(_StubReranker(), text_field="episode")
cands = [_cand("a"), _cand("b")]
result = await rerank_fn("q", cands)
assert all(isinstance(c.score, float) for c in result)
assert result[0].score == pytest.approx(1.0)
assert result[1].score == pytest.approx(0.9)
async def test_build_rerank_fn_handles_empty_candidates() -> None:
"""Empty candidate list returns empty list without calling the provider."""
rerank_fn = build_rerank_fn(_StubReranker(), text_field="episode")
result = await rerank_fn("q", [])
assert result == []
async def test_build_rerank_fn_forwards_instruction() -> None:
"""The task instruction is forwarded verbatim to the provider."""
stub = _StubReranker()
rerank_fn = build_rerank_fn(stub, text_field="episode", instruction="find facts")
await rerank_fn("q", [_cand("a")])
assert stub.last_instruction == "find facts"
# ── build_skill_rerank_fn ────────────────────────────────────────────────
def _skill_cand(cid: str, *, name: str = "", description: str = "") -> Candidate:
return Candidate(
id=cid,
score=0.5,
source="vector",
metadata={"name": name, "description": description},
)
async def test_build_skill_rerank_fn_emits_shaped_passage() -> None:
"""Passage = ``"Agent Skill: {name} - {description}"`` when both present."""
stub = _StubReranker()
rerank_fn = build_skill_rerank_fn(stub)
await rerank_fn(
"q",
[_skill_cand("s1", name="refactor_auth", description="split provider lookup")],
)
assert stub.last_passages == ["Agent Skill: refactor_auth - split provider lookup"]
async def test_build_skill_rerank_fn_omits_dash_when_description_missing() -> None:
"""When description is empty, drop ``" - {description}"`` suffix."""
stub = _StubReranker()
rerank_fn = build_skill_rerank_fn(stub)
await rerank_fn("q", [_skill_cand("s1", name="refactor_auth", description="")])
assert stub.last_passages == ["Agent Skill: refactor_auth"]
async def test_build_skill_rerank_fn_falls_back_when_name_missing() -> None:
"""When name is empty, passage degrades to bare description."""
stub = _StubReranker()
rerank_fn = build_skill_rerank_fn(stub)
await rerank_fn("q", [_skill_cand("s1", name="", description="just text")])
assert stub.last_passages == ["just text"]
async def test_build_skill_rerank_fn_forwards_skill_instruction() -> None:
"""The skill-specific instruction is hard-wired into the call."""
stub = _StubReranker()
rerank_fn = build_skill_rerank_fn(stub)
await rerank_fn("q", [_skill_cand("s1", name="x", description="y")])
assert stub.last_instruction == _SKILL_RERANK_INSTRUCTION
async def test_build_skill_rerank_fn_handles_empty_candidates() -> None:
"""Empty candidate list skips the provider call entirely."""
stub = _StubReranker()
rerank_fn = build_skill_rerank_fn(stub)
result = await rerank_fn("q", [])
assert result == []
assert stub.last_passages is None # provider never called
async def test_build_skill_rerank_fn_attaches_scores_and_preserves_metadata() -> None:
"""Reranked candidates carry the provider's score and original metadata."""
stub = _StubReranker()
rerank_fn = build_skill_rerank_fn(stub)
cands = [
_skill_cand("a", name="alpha", description="d-a"),
_skill_cand("b", name="beta", description="d-b"),
]
result = await rerank_fn("q", cands)
assert [c.id for c in result] == ["a", "b"]
assert result[0].score == pytest.approx(1.0)
assert result[1].score == pytest.approx(0.9)
# metadata round-trips intact — the shape function only reads it, never mutates.
assert result[0].metadata["name"] == "alpha"
assert result[1].metadata["description"] == "d-b"

View File

@ -0,0 +1,135 @@
"""Unit tests for ``memory.search.dto`` validation rules."""
from __future__ import annotations
import pytest
from pydantic import ValidationError
from everos.memory.search import (
SearchData,
SearchMethod,
SearchRequest,
SearchResponse,
)
def _minimal_request_kwargs() -> dict:
return {
"user_id": "alice",
"query": "hello",
}
def test_enable_llm_rerank_defaults_to_false() -> None:
"""HYBRID should NOT auto-trigger LLM Phase-5 rerank by default.
The caller opts in explicitly when they want the extra LLM pass;
leaving it off keeps a default HYBRID call cheap (no LLM ``chat``).
"""
req = SearchRequest(**_minimal_request_kwargs())
assert req.enable_llm_rerank is False
def test_enable_llm_rerank_accepts_true() -> None:
req = SearchRequest(**_minimal_request_kwargs(), enable_llm_rerank=True)
assert req.enable_llm_rerank is True
def test_minimal_request_uses_hybrid_default() -> None:
req = SearchRequest(**_minimal_request_kwargs())
assert req.method == SearchMethod.HYBRID
assert req.top_k == -1
assert req.include_profile is False
assert req.filters is None
assert req.radius is None
def test_top_k_zero_rejected() -> None:
with pytest.raises(ValidationError) as exc:
SearchRequest(**_minimal_request_kwargs(), top_k=0)
assert "top_k" in str(exc.value)
def test_top_k_above_100_rejected() -> None:
with pytest.raises(ValidationError):
SearchRequest(**_minimal_request_kwargs(), top_k=101)
def test_top_k_below_minus_one_rejected() -> None:
with pytest.raises(ValidationError):
SearchRequest(**_minimal_request_kwargs(), top_k=-2)
def test_top_k_minus_one_accepted() -> None:
req = SearchRequest(**_minimal_request_kwargs(), top_k=-1)
assert req.top_k == -1
def test_top_k_in_range_accepted() -> None:
req = SearchRequest(**_minimal_request_kwargs(), top_k=50)
assert req.top_k == 50
def test_radius_out_of_range_rejected() -> None:
with pytest.raises(ValidationError):
SearchRequest(**_minimal_request_kwargs(), radius=1.5)
with pytest.raises(ValidationError):
SearchRequest(**_minimal_request_kwargs(), radius=-0.1)
def test_neither_user_id_nor_agent_id_rejected() -> None:
"""The xor validator requires exactly one of user_id / agent_id."""
with pytest.raises(ValidationError, match="exactly one of"):
SearchRequest(query="hello") # neither set
def test_both_user_id_and_agent_id_rejected() -> None:
"""The xor validator rejects ambiguous owner identity."""
with pytest.raises(ValidationError, match="exactly one of"):
SearchRequest(user_id="alice", agent_id="agent_x", query="hello")
def test_empty_query_rejected() -> None:
with pytest.raises(ValidationError):
SearchRequest(user_id="alice", query="")
def test_empty_user_id_rejected() -> None:
with pytest.raises(ValidationError):
SearchRequest(user_id="", query="hello")
def test_extra_top_level_field_rejected() -> None:
"""``extra='forbid'`` keeps the contract tight."""
with pytest.raises(ValidationError):
SearchRequest(
**_minimal_request_kwargs(),
unexpected_field="x", # type: ignore[call-arg]
)
def test_filters_extra_keys_allowed() -> None:
"""FilterNode is open-shape; safety is enforced in the compiler."""
req = SearchRequest(
**_minimal_request_kwargs(),
filters={"session_id": "sess_a", "AND": [{"timestamp": {"gte": 1}}]},
)
assert req.filters is not None
dumped = req.filters.model_dump(exclude_none=True)
assert dumped["session_id"] == "sess_a"
assert dumped["AND"][0]["timestamp"]["gte"] == 1
def test_response_default_arrays_present() -> None:
"""Every ``data.*`` array must exist so callers can iterate unconditionally."""
resp = SearchResponse(request_id="0" * 32, data=SearchData())
assert resp.data.episodes == []
assert resp.data.profiles == []
assert resp.data.agent_cases == []
assert resp.data.agent_skills == []
def test_method_enum_serialises_to_lowercase() -> None:
req = SearchRequest(**_minimal_request_kwargs(), method="agentic") # type: ignore[arg-type]
assert req.method == SearchMethod.AGENTIC
assert req.method.value == "agentic"

View File

@ -0,0 +1,244 @@
"""Unit tests for the Filters DSL compiler."""
from __future__ import annotations
import pytest
from everos.memory.search import (
FilterError,
FilterNode,
compile_filters,
)
# ── Base injection ───────────────────────────────────────────────────────
def test_no_filters_emits_base_clause() -> None:
where = compile_filters(None, owner_id="alice", owner_type="user")
assert where == (
"owner_id = 'alice' AND owner_type = 'user' "
"AND app_id = 'default' AND project_id = 'default'"
)
def test_owner_type_agent_pinned() -> None:
where = compile_filters(None, owner_id="alice", owner_type="agent")
assert "owner_type = 'agent'" in where
def test_app_project_scope_pinned() -> None:
where = compile_filters(
None,
owner_id="alice",
owner_type="user",
app_id="claude_code",
project_id="oss",
)
assert "app_id = 'claude_code'" in where
assert "project_id = 'oss'" in where
def test_owner_id_with_quote_is_escaped() -> None:
where = compile_filters(None, owner_id="al'ice", owner_type="user")
assert "owner_id = 'al''ice'" in where
# ── Equality / shorthand ────────────────────────────────────────────────
def test_flat_equality_shorthand() -> None:
node = FilterNode(session_id="sess_a") # type: ignore[call-arg]
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "session_id = 'sess_a'" in where
def test_multiple_flat_fields_join_with_and() -> None:
node = FilterNode.model_validate({"session_id": "sess_a", "parent_type": "memcell"})
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "session_id = 'sess_a'" in where
assert "parent_type = 'memcell'" in where
# ── Operators ───────────────────────────────────────────────────────────
def test_timestamp_gte_renders_timestamp_literal() -> None:
node = FilterNode.model_validate({"timestamp": {"gte": 1704067200000}})
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "timestamp >= TIMESTAMP '" in where
def test_timestamp_range_folds_with_and() -> None:
node = FilterNode.model_validate(
{"timestamp": {"gte": 1704067200000, "lt": 1740614399000}}
)
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "timestamp >= TIMESTAMP '" in where
assert "timestamp < TIMESTAMP '" in where
# Operators on the same field are wrapped in a single group.
assert " AND " in where
def test_in_operator_string_field() -> None:
node = FilterNode.model_validate({"parent_type": {"in": ["memcell", "episode"]}})
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "parent_type IN ('memcell', 'episode')" in where
def test_in_operator_requires_non_empty_list() -> None:
node = FilterNode.model_validate({"parent_type": {"in": []}})
with pytest.raises(FilterError):
compile_filters(node, owner_id="alice", owner_type="user")
def test_invalid_operator_rejected() -> None:
node = FilterNode.model_validate({"timestamp": {"between": [1, 2]}})
with pytest.raises(FilterError, match="operator"):
compile_filters(node, owner_id="alice", owner_type="user")
# ── Combinators ─────────────────────────────────────────────────────────
def test_and_combinator() -> None:
node = FilterNode.model_validate(
{
"AND": [
{"timestamp": {"gte": 1704067200000}},
{"timestamp": {"lt": 1740614399000}},
]
}
)
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "timestamp >= TIMESTAMP '" in where
assert "timestamp < TIMESTAMP '" in where
assert " AND " in where
def test_or_combinator() -> None:
node = FilterNode.model_validate(
{
"OR": [
{"parent_type": "memcell"},
{"parent_type": "episode"},
]
}
)
where = compile_filters(node, owner_id="alice", owner_type="user")
assert " OR " in where
assert "parent_type = 'memcell'" in where
assert "parent_type = 'episode'" in where
def test_nested_and_inside_or() -> None:
node = FilterNode.model_validate(
{
"OR": [
{"AND": [{"parent_type": "memcell"}, {"session_id": "sa"}]},
{"parent_type": "episode"},
]
}
)
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "parent_type = 'memcell'" in where
assert "session_id = 'sa'" in where
assert "parent_type = 'episode'" in where
assert " OR " in where
assert " AND " in where
def test_flat_field_alongside_and_combinator() -> None:
node = FilterNode.model_validate(
{
"session_id": "sess_a",
"AND": [{"timestamp": {"gte": 1}}],
}
)
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "session_id = 'sess_a'" in where
assert "timestamp >= TIMESTAMP '" in where
# ── Array field (sender_id → sender_ids) ────────────────────────────────
def test_sender_id_eq_uses_array_has() -> None:
node = FilterNode.model_validate({"sender_id": "u_jason"})
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "array_has(sender_ids, 'u_jason')" in where
def test_sender_id_in_expands_to_or_array_has() -> None:
node = FilterNode.model_validate({"sender_id": {"in": ["u_a", "u_b"]}})
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "array_has(sender_ids, 'u_a')" in where
assert "array_has(sender_ids, 'u_b')" in where
assert " OR " in where
def test_sender_id_gt_rejected() -> None:
node = FilterNode.model_validate({"sender_id": {"gt": "x"}})
with pytest.raises(FilterError, match="not supported on array"):
compile_filters(node, owner_id="alice", owner_type="user")
# ── Safety ──────────────────────────────────────────────────────────────
def test_unknown_field_rejected() -> None:
node = FilterNode.model_validate({"secret_field": "x"})
with pytest.raises(FilterError, match="unsupported filter field"):
compile_filters(node, owner_id="alice", owner_type="user")
def test_owner_id_in_filters_rejected() -> None:
node = FilterNode.model_validate({"owner_id": "mallory"})
with pytest.raises(FilterError, match="reserved"):
compile_filters(node, owner_id="alice", owner_type="user")
def test_owner_type_in_filters_rejected() -> None:
node = FilterNode.model_validate({"owner_type": "agent"})
with pytest.raises(FilterError, match="reserved"):
compile_filters(node, owner_id="alice", owner_type="user")
def test_string_with_single_quote_escaped() -> None:
node = FilterNode.model_validate({"session_id": "ses's"})
where = compile_filters(node, owner_id="alice", owner_type="user")
assert "session_id = 'ses''s'" in where
def test_timestamp_string_with_quote_rejected() -> None:
"""ISO strings with embedded quotes can break the literal — reject loudly."""
node = FilterNode.model_validate({"timestamp": {"gte": "2024-01'-01T00:00:00"}})
with pytest.raises(FilterError, match="contains a quote"):
compile_filters(node, owner_id="alice", owner_type="user")
def test_in_value_type_check() -> None:
node = FilterNode.model_validate({"parent_type": {"in": [1, 2]}})
with pytest.raises(FilterError, match="must be a string"):
compile_filters(node, owner_id="alice", owner_type="user")
def test_bool_for_timestamp_rejected() -> None:
node = FilterNode.model_validate({"timestamp": {"gte": True}})
with pytest.raises(FilterError, match="timestamp value"):
compile_filters(node, owner_id="alice", owner_type="user")
def test_empty_operator_map_rejected() -> None:
node = FilterNode.model_validate({"timestamp": {}})
with pytest.raises(FilterError, match="empty operator map"):
compile_filters(node, owner_id="alice", owner_type="user")
def test_empty_and_array_skips_combinator() -> None:
"""Empty AND/OR arrays compile to no clauses — only the base remains."""
node = FilterNode.model_validate({"AND": []})
where = compile_filters(node, owner_id="alice", owner_type="user")
assert where == (
"owner_id = 'alice' AND owner_type = 'user' "
"AND app_id = 'default' AND project_id = 'default'"
)

View File

@ -0,0 +1,278 @@
"""Unit tests for ``memory.search.hierarchy``.
White-box surfaces accessed:
- ``_hierarchy_eviction_pass`` (internal, tested directly for unit coverage)
- ``hierarchy_retrieve_episodes`` (public function, tested with stubbed I/O)
All I/O (fact_recaller, episode_recaller) is injected via AsyncMock stubs.
No LanceDB or network calls are made.
"""
from __future__ import annotations
import datetime as _dt
from unittest.mock import AsyncMock, MagicMock
import pytest
from everalgo.types import Candidate, FactCandidate
from everos.memory.search.hierarchy import (
_hierarchy_eviction_pass,
hierarchy_retrieve_episodes,
)
# ── Fixtures / helpers ───────────────────────────────────────────────────
def _ts() -> _dt.datetime:
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
def _episode_candidate(
*,
ep_id: str = "ep-1",
score: float = 0.7,
memcell_id: str = "mc-1",
) -> Candidate:
return Candidate(
id=ep_id,
score=score,
source="vector",
metadata={
"parent_id": memcell_id,
"owner_id": "u1",
"owner_type": "user",
"session_id": "sess-1",
"timestamp": _ts(),
"episode": "Some episode text.",
"sender_ids": ["u1"],
"subject": "Test subject",
"summary": "Test summary",
},
)
def _fact_candidate(
*,
fact_id: str = "fact-1",
parent_episode_id: str = "ep-1",
score: float = 0.9,
) -> FactCandidate:
return FactCandidate(
id=fact_id,
parent_episode_id=parent_episode_id,
score=score,
metadata={"fact": "Some fact text."},
)
def _make_recallers(
*,
dense_facts: list[Candidate] | None = None,
fetched_episodes: list[Candidate] | None = None,
facts_for_episodes: dict[str, list[FactCandidate]] | None = None,
) -> tuple[MagicMock, MagicMock]:
"""Build stubbed fact_recaller and episode_recaller."""
fact_recaller = MagicMock()
fact_recaller.dense_recall = AsyncMock(return_value=dense_facts or [])
fact_recaller.facts_for_episodes = AsyncMock(return_value=facts_for_episodes or {})
episode_recaller = MagicMock()
episode_recaller.fetch_by_parent_ids = AsyncMock(
return_value=fetched_episodes or []
)
return fact_recaller, episode_recaller
# ── _hierarchy_eviction_pass unit tests ─────────────────────────────────
class TestHierarchyEvictionPass:
def test_fact_wins_emits_atomic_fact_scored_item(self) -> None:
episode = _episode_candidate(ep_id="ep-1", score=0.5)
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.9)
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
assert len(result) == 1
item = result[0]
assert item.item_type == "atomic_fact"
assert item.id == "fact-1"
assert item.score == pytest.approx(0.9)
def test_episode_wins_emits_episode_scored_item(self) -> None:
episode = _episode_candidate(ep_id="ep-1", score=0.8)
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.6)
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
assert len(result) == 1
item = result[0]
assert item.item_type == "episode"
assert item.id == "ep-1"
assert item.score == pytest.approx(0.8)
def test_no_facts_emits_episode(self) -> None:
episode = _episode_candidate(ep_id="ep-1", score=0.7)
result = _hierarchy_eviction_pass([episode], {})
assert len(result) == 1
assert result[0].item_type == "episode"
assert result[0].id == "ep-1"
def test_ordering_preserved_matches_input_order(self) -> None:
ep_a = _episode_candidate(ep_id="ep-a", score=0.9, memcell_id="mc-a")
ep_b = _episode_candidate(ep_id="ep-b", score=0.8, memcell_id="mc-b")
ep_c = _episode_candidate(ep_id="ep-c", score=0.7, memcell_id="mc-c")
merged = [ep_a, ep_b, ep_c]
result = _hierarchy_eviction_pass(merged, {})
assert [r.id for r in result] == ["ep-a", "ep-b", "ep-c"]
def test_parent_episode_id_set_on_evicted_fact(self) -> None:
episode = _episode_candidate(ep_id="ep-1", score=0.4)
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.8)
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
assert result[0].parent_episode_id == "ep-1"
def test_episode_wins_parent_episode_id_is_none(self) -> None:
episode = _episode_candidate(ep_id="ep-1", score=0.9)
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.5)
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
assert result[0].parent_episode_id is None
def test_multiple_episodes_mixed_eviction(self) -> None:
ep1 = _episode_candidate(ep_id="ep-1", score=0.5, memcell_id="mc-1")
ep2 = _episode_candidate(ep_id="ep-2", score=0.8, memcell_id="mc-2")
ep3 = _episode_candidate(ep_id="ep-3", score=0.6, memcell_id="mc-3")
fact1 = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.9)
fact2 = _fact_candidate(fact_id="fact-2", parent_episode_id="ep-2", score=0.4)
result = _hierarchy_eviction_pass(
[ep1, ep2, ep3],
{"ep-1": [fact1], "ep-2": [fact2]},
)
assert len(result) == 3
assert result[0].item_type == "atomic_fact"
assert result[0].id == "fact-1"
assert result[1].item_type == "episode"
assert result[1].id == "ep-2"
assert result[2].item_type == "episode"
assert result[2].id == "ep-3"
def test_best_fact_is_first_element_used_for_comparison(self) -> None:
episode = _episode_candidate(ep_id="ep-1", score=0.7)
best_fact = _fact_candidate(
fact_id="fact-best", parent_episode_id="ep-1", score=0.8
)
second_fact = _fact_candidate(
fact_id="fact-second", parent_episode_id="ep-1", score=0.3
)
result = _hierarchy_eviction_pass([episode], {"ep-1": [best_fact, second_fact]})
assert result[0].item_type == "atomic_fact"
assert result[0].id == "fact-best"
def test_fact_score_equal_to_episode_score_episode_wins(self) -> None:
episode = _episode_candidate(ep_id="ep-1", score=0.7)
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.7)
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
assert result[0].item_type == "episode"
# ── hierarchy_retrieve_episodes integration-style unit tests ─────────────
class TestHierarchyRetrieveEpisodes:
"""Integration-style unit tests with fully stubbed I/O.
amaxsim_retrieve and rrf are exercised with real implementations but
all LanceDB / network calls are replaced by AsyncMock.
"""
async def test_empty_sparse_dense_returns_empty_list(self) -> None:
fact_recaller, episode_recaller = _make_recallers()
result = await hierarchy_retrieve_episodes(
query="test query",
sparse=[],
dense=[],
query_vector=[0.1, 0.2, 0.3],
fact_recaller=fact_recaller,
episode_recaller=episode_recaller,
where="owner_id = 'u1'",
top_k=10,
)
assert result == []
async def test_happy_path_episode_wins_no_nested_facts(self) -> None:
ep = _episode_candidate(ep_id="ep-1", score=0.8, memcell_id="mc-1")
fact_recaller, episode_recaller = _make_recallers(
dense_facts=[],
fetched_episodes=[],
facts_for_episodes={},
)
result = await hierarchy_retrieve_episodes(
query="test query",
sparse=[ep],
dense=[ep],
query_vector=[0.1, 0.2, 0.3],
fact_recaller=fact_recaller,
episode_recaller=episode_recaller,
where="owner_id = 'u1'",
top_k=10,
)
assert len(result) == 1
episode_item = result[0]
assert episode_item.id == "ep-1"
assert episode_item.atomic_facts == []
async def test_happy_path_fact_evicts_episode_nested_in_result(self) -> None:
ep = _episode_candidate(ep_id="ep-2", score=0.6, memcell_id="mc-2")
fact = _fact_candidate(fact_id="fact-2", parent_episode_id="ep-2", score=0.95)
fact_recaller, episode_recaller = _make_recallers(
dense_facts=[
Candidate(
id="fact-2",
score=0.95,
source="vector",
metadata={"parent_id": "mc-2"},
)
],
fetched_episodes=[ep],
facts_for_episodes={"ep-2": [fact]},
)
result = await hierarchy_retrieve_episodes(
query="test query",
sparse=[ep],
dense=[ep],
query_vector=[0.1, 0.2, 0.3],
fact_recaller=fact_recaller,
episode_recaller=episode_recaller,
where="owner_id = 'u1'",
top_k=10,
)
assert len(result) == 1
episode_item = result[0]
assert episode_item.atomic_facts != []
nested_fact = episode_item.atomic_facts[0]
assert nested_fact.id == "fact-2"
assert nested_fact.score == pytest.approx(0.95)

View File

@ -0,0 +1,930 @@
"""Unit tests for ``SearchManager`` with in-memory stub recallers.
These tests exercise the orchestration without touching LanceDB. Every
recaller is replaced by a hand-rolled stub that returns a small
candidate list; the manager's job is to:
* honour the ``owner_type`` hard partition,
* run KEYWORD as sparse-only and leave ``atomic_facts`` empty,
* run VECTOR as dense-only (and refuse when no embedding is wired),
* let HYBRID run without an LLM by default; require LLM only when the
caller sets ``enable_llm_rerank=True``,
* refuse AGENTIC when reranker / LLM prerequisites are missing,
* delegate AGENTIC to ``search_episodes_agentic`` and return its result.
"""
from __future__ import annotations
import datetime as _dt
from collections.abc import Mapping, Sequence
from typing import Any, ClassVar
import pytest
from everalgo.types import Candidate, FactCandidate
from everos.memory.search.dto import SearchMethod, SearchRequest
from everos.memory.search.manager import SearchManager
# ── Stubs ───────────────────────────────────────────────────────────────
def _ts() -> _dt.datetime:
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
def _episode_row(
eid: str, score: float = 0.8, memcell_id: str | None = None
) -> Candidate:
return Candidate(
id=eid,
score=score,
source="keyword",
metadata={
"owner_id": "alice",
"owner_type": "user",
"session_id": "sess_a",
"timestamp": _ts(),
"sender_ids": ["alice"],
"subject": f"subj {eid}",
"summary": f"summary {eid}",
"episode": f"body {eid}",
"parent_id": memcell_id if memcell_id is not None else f"mc_{eid}",
},
)
def _case_row(cid: str) -> Candidate:
return Candidate(
id=cid,
score=0.7,
source="keyword",
metadata={
"owner_id": "agent_a",
"owner_type": "agent",
"session_id": "sess_b",
"timestamp": _ts(),
"task_intent": f"intent {cid}",
"approach": f"approach {cid}",
"quality_score": 0.8,
},
)
def _skill_row(sid: str) -> Candidate:
return Candidate(
id=sid,
score=0.65,
source="keyword",
metadata={
"owner_id": "agent_a",
"owner_type": "agent",
"name": f"skill_{sid}",
"description": f"desc {sid}",
"content": f"content {sid}",
"confidence": 0.9,
"maturity_score": 0.6,
"source_case_ids": [],
},
)
class _StubEpisodeRecaller:
kind: ClassVar[str] = "episode"
everalgo_memory_type: ClassVar[str] = "episodic"
text_field: ClassVar[str] = "episode"
def __init__(self, sparse: list[Candidate], dense: list[Candidate]) -> None:
self._sparse = sparse
self._dense = dense
self.last_where: str | None = None
async def sparse_recall(
self, query: str, where: str, *, limit: int
) -> list[Candidate]:
self.last_where = where
return list(self._sparse[:limit])
async def dense_recall(
self, vector: Sequence[float], where: str, *, limit: int
) -> list[Candidate]:
self.last_where = where
return list(self._dense[:limit])
async def fetch_by_parent_ids(
self, parent_ids: Sequence[str], where: str
) -> list[Candidate]:
# Index dense rows by their parent_id (memcell id) so the maxsim
# path's reverse-resolve has something to return.
by_parent = {str(c.metadata.get("parent_id", "")): c for c in self._dense}
return [by_parent[p] for p in parent_ids if p in by_parent]
class _StubAtomicFactRecaller:
kind: ClassVar[str] = "atomic_fact"
everalgo_memory_type: ClassVar[str] = "episodic"
text_field: ClassVar[str] = "fact"
def __init__(
self,
facts_map: dict[str, list[FactCandidate]] | None = None,
dense: list[Candidate] | None = None,
) -> None:
self._facts_map = facts_map or {}
self._dense = dense or []
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return []
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._dense)
async def facts_for_episodes(
self,
ep_to_memcell: Mapping[str, str],
where: str,
*,
per_episode: int,
query_vector: Any = None,
) -> dict[str, list[FactCandidate]]:
# ``query_vector`` accepted to match the real recaller signature
# Accepted to match the real recaller signature; stub doesn't use it.
return {
eid: self._facts_map.get(eid, [])[:per_episode] for eid in ep_to_memcell
}
class _StubAgentCaseRecaller:
kind: ClassVar[str] = "agent_case"
everalgo_memory_type: ClassVar[str] = "case"
text_field: ClassVar[str] = "task_intent"
def __init__(self, sparse: list[Candidate], dense: list[Candidate]) -> None:
self._sparse = sparse
self._dense = dense
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._sparse)
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._dense)
class _StubAgentSkillRecaller:
kind: ClassVar[str] = "agent_skill"
everalgo_memory_type: ClassVar[str] = "skill"
text_field: ClassVar[str] = "description"
def __init__(
self,
sparse: list[Candidate],
dense: list[Candidate],
by_case: list[Candidate] | None = None,
) -> None:
self._sparse = sparse
self._dense = dense
# Bridge recall fixture: reverse-resolved skills (``fetch_by_case_ids``).
# Default empty — only the bridge tests populate this.
self._by_case = by_case or []
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._sparse)
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
return list(self._dense)
async def fetch_by_case_ids(
self, case_ids: Sequence[str], where: str, *, limit: int
) -> list[Candidate]:
return list(self._by_case)
class _StubProfileRecaller:
async def fetch(self, owner_id: str) -> list:
return []
class _StubEmbedding:
def __init__(self, dim: int = 4) -> None:
self.dim = dim
async def embed(self, text: str) -> list[float]:
return [0.0] * self.dim
async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
return [[0.0] * self.dim for _ in texts]
# ── Fixtures ────────────────────────────────────────────────────────────
def _build_manager(
*,
episode_sparse: list[Candidate] | None = None,
episode_dense: list[Candidate] | None = None,
case_sparse: list[Candidate] | None = None,
case_dense: list[Candidate] | None = None,
skill_sparse: list[Candidate] | None = None,
skill_dense: list[Candidate] | None = None,
skill_by_case: list[Candidate] | None = None,
facts_map: dict[str, list[FactCandidate]] | None = None,
atomic_fact_dense: list[Candidate] | None = None,
embedding: _StubEmbedding | None = None,
reranker: Any = None,
llm_client: Any = None,
) -> SearchManager:
ep_recaller = _StubEpisodeRecaller(episode_sparse or [], episode_dense or [])
return SearchManager(
episode_recaller=ep_recaller,
atomic_fact_recaller=_StubAtomicFactRecaller(facts_map, atomic_fact_dense),
agent_case_recaller=_StubAgentCaseRecaller(case_sparse or [], case_dense or []),
agent_skill_recaller=_StubAgentSkillRecaller(
skill_sparse or [], skill_dense or [], skill_by_case
),
profile_recaller=_StubProfileRecaller(),
embedding=embedding,
reranker=reranker,
llm_client=llm_client,
)
def _user_req(
method: SearchMethod = SearchMethod.KEYWORD, **kwargs: Any
) -> SearchRequest:
return SearchRequest(user_id="alice", query="hi", method=method, **kwargs)
def _agent_req(
method: SearchMethod = SearchMethod.KEYWORD, **kwargs: Any
) -> SearchRequest:
return SearchRequest(agent_id="agent_a", query="hi", method=method, **kwargs)
# ── KEYWORD: user owner ────────────────────────────────────────────────
async def test_user_keyword_returns_episodes_only() -> None:
mgr = _build_manager(episode_sparse=[_episode_row("ep_1")])
resp = await mgr.search(_user_req())
assert len(resp.request_id) == 32 and all(
c in "0123456789abcdef" for c in resp.request_id
)
assert len(resp.data.episodes) == 1
assert resp.data.episodes[0].id == "ep_1"
assert resp.data.episodes[0].user_id == "alice"
assert resp.data.episodes[0].type == "Conversation"
# Agent paths stay empty.
assert resp.data.agent_cases == []
assert resp.data.agent_skills == []
assert resp.data.profiles == []
async def test_user_keyword_leaves_atomic_facts_empty() -> None:
"""KEYWORD never back-fills facts — only HYBRID produces relevance-scored facts.
Even if the facts repository would return rows for the matched
episode, the keyword path must leave ``atomic_facts=[]``: there is
no per-query score for those facts, so emitting them would muddy
the contract (mirrors enterprise where event_log is a separate
memory_type, not auto-attached to episodic results).
"""
fact = FactCandidate(
id="f1",
parent_episode_id="ep_1",
score=0.0,
metadata={"fact": "Alice prefers oat milk"},
)
mgr = _build_manager(
episode_sparse=[_episode_row("ep_1")],
facts_map={"ep_1": [fact]},
)
resp = await mgr.search(_user_req())
ep = resp.data.episodes[0]
assert ep.atomic_facts == []
async def test_user_keyword_no_results() -> None:
resp = await _build_manager().search(_user_req())
assert resp.data.episodes == []
async def test_user_keyword_filters_compile_pinned_owner() -> None:
"""``compile_filters`` should pin owner_id / owner_type on the where."""
recaller = _StubEpisodeRecaller([_episode_row("ep_1")], [])
mgr = SearchManager(
episode_recaller=recaller,
atomic_fact_recaller=_StubAtomicFactRecaller(),
agent_case_recaller=_StubAgentCaseRecaller([], []),
agent_skill_recaller=_StubAgentSkillRecaller([], []),
profile_recaller=_StubProfileRecaller(),
embedding=None,
reranker=None,
llm_client=None,
)
await mgr.search(_user_req())
assert recaller.last_where is not None
assert "owner_id = 'alice'" in recaller.last_where
assert "owner_type = 'user'" in recaller.last_where
# ── VECTOR: requires embedding ────────────────────────────────────────
async def test_vector_method_requires_embedding() -> None:
mgr = _build_manager() # embedding=None by default
with pytest.raises(RuntimeError, match="embedding"):
await mgr.search(_user_req(method=SearchMethod.VECTOR))
async def test_vector_method_runs_dense_only_with_embedding() -> None:
mgr = _build_manager(
episode_sparse=[_episode_row("should_not_appear")],
episode_dense=[_episode_row("ep_dense")],
embedding=_StubEmbedding(),
)
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR))
assert [e.id for e in resp.data.episodes] == ["ep_dense"]
async def test_vector_radius_filter_drops_below_threshold() -> None:
mgr = _build_manager(
episode_dense=[
_episode_row("ep_low", score=0.3),
_episode_row("ep_high", score=0.9),
],
embedding=_StubEmbedding(),
)
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, radius=0.5))
assert [e.id for e in resp.data.episodes] == ["ep_high"]
async def test_unlimited_mode_applies_default_radius_for_vector() -> None:
"""``top_k=-1`` without an explicit radius gets the project default 0.5.
Mirrors enterprise's auto-floor behaviour — unlimited mode must not
return arbitrarily low-similarity tail.
"""
mgr = _build_manager(
episode_dense=[
_episode_row("ep_low", score=0.3), # below default 0.5 → dropped
_episode_row("ep_mid", score=0.55), # above default → kept
_episode_row("ep_high", score=0.9),
],
embedding=_StubEmbedding(),
)
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=-1))
assert [e.id for e in resp.data.episodes] == ["ep_mid", "ep_high"]
async def test_unlimited_mode_explicit_radius_overrides_default() -> None:
"""Caller-supplied radius (even ``0.0``) wins over the unlimited default."""
mgr = _build_manager(
episode_dense=[
_episode_row("ep_low", score=0.2),
_episode_row("ep_high", score=0.9),
],
embedding=_StubEmbedding(),
)
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=-1, radius=0.1))
# 0.1 threshold keeps both rows (the default 0.5 would have dropped ep_low).
assert {e.id for e in resp.data.episodes} == {"ep_low", "ep_high"}
async def test_normal_mode_keeps_full_pool_when_no_radius() -> None:
"""``top_k > 0`` without a radius applies no threshold — truncation handles tail."""
mgr = _build_manager(
episode_dense=[
_episode_row("ep_low", score=0.2),
_episode_row("ep_high", score=0.9),
],
embedding=_StubEmbedding(),
)
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=10))
# No radius default in normal mode → both kept.
assert {e.id for e in resp.data.episodes} == {"ep_low", "ep_high"}
# ── VECTOR + maxsim_atomic strategy ─────────────────────────────────────
def _atomic_fact_row(fid: str, *, parent_id: str, score: float) -> Candidate:
"""Atomic-fact candidate emitted by ``AtomicFactRecaller.dense_recall``."""
return Candidate(
id=fid,
score=score,
source="vector",
metadata={
"owner_id": "alice",
"owner_type": "user",
"session_id": "sess_a",
"timestamp": _ts(),
"sender_ids": ["alice"],
"parent_id": parent_id,
"fact": f"fact {fid}",
},
)
async def test_vector_maxsim_atomic_max_pools_facts_to_episodes(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""``vector_strategy=maxsim_atomic`` should ANN atomic_facts → max-pool by
memcell parent → reverse-resolve to episode, ordering episodes by the
per-memcell maximum fact score."""
from everos.config.settings import load_settings
monkeypatch.setenv("EVEROS_SEARCH__VECTOR_STRATEGY", "maxsim_atomic")
load_settings.cache_clear()
# Two episodes; each has two atomic facts under it. The max fact score
# per memcell is what should end up as the episode's score.
mgr = _build_manager(
episode_dense=[
_episode_row("ep_A", memcell_id="mc_A"),
_episode_row("ep_B", memcell_id="mc_B"),
],
atomic_fact_dense=[
_atomic_fact_row("f_A1", parent_id="mc_A", score=0.95),
_atomic_fact_row("f_A2", parent_id="mc_A", score=0.40),
_atomic_fact_row("f_B1", parent_id="mc_B", score=0.75),
],
embedding=_StubEmbedding(),
)
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=5))
eps = resp.data.episodes
# Both episodes returned, ordered by max-pool score desc.
assert [e.id for e in eps] == ["ep_A", "ep_B"]
assert eps[0].score == pytest.approx(0.95) # max(0.95, 0.40)
assert eps[1].score == pytest.approx(0.75)
async def test_vector_maxsim_atomic_returns_empty_when_no_facts(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""No fact recall → no memcells to score → empty episode list."""
from everos.config.settings import load_settings
monkeypatch.setenv("EVEROS_SEARCH__VECTOR_STRATEGY", "maxsim_atomic")
load_settings.cache_clear()
mgr = _build_manager(
episode_dense=[_episode_row("ep_A", memcell_id="mc_A")],
atomic_fact_dense=[],
embedding=_StubEmbedding(),
)
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=5))
assert resp.data.episodes == []
# ── HYBRID / AGENTIC: prerequisite errors ──────────────────────────────
async def test_hybrid_requires_embedding() -> None:
mgr = _build_manager()
with pytest.raises(RuntimeError, match="embedding"):
await mgr.search(_user_req(method=SearchMethod.HYBRID))
async def test_hybrid_does_not_require_llm_by_default() -> None:
"""HYBRID no longer auto-pulls LLM. With enable_llm_rerank=False the
fusion-only path (RRF / LR) should run without an LLM client."""
mgr = _build_manager(embedding=_StubEmbedding())
# Should not raise: no LLM needed when caller opts out of Phase-5 rerank.
resp = await mgr.search(_user_req(method=SearchMethod.HYBRID))
assert resp.data.episodes == [] # empty stub recallers → empty result
async def test_hybrid_requires_llm_when_enable_llm_rerank_true() -> None:
"""Setting ``enable_llm_rerank=True`` makes the LLM mandatory."""
mgr = _build_manager(embedding=_StubEmbedding())
with pytest.raises(RuntimeError, match="enable_llm_rerank"):
await mgr.search(_user_req(method=SearchMethod.HYBRID, enable_llm_rerank=True))
async def test_user_hybrid_episode_fuses_and_evicts_facts() -> None:
"""HYBRID episode path: hierarchy pipeline (RRF -> MaxSim -> merge -> eviction).
ep_1 has a fact scoring higher than the RRF score -> fact evicts episode.
ep_2 has no facts -> episode emitted as-is.
"""
ep1 = _episode_row("ep_1", score=0.8, memcell_id="mc_1")
ep2 = _episode_row("ep_2", score=0.7, memcell_id="mc_2")
fact1 = FactCandidate(
id="f1",
parent_episode_id="ep_1",
score=0.95,
metadata={"fact": "Alice prefers oat milk"},
)
mgr = _build_manager(
episode_sparse=[ep1, ep2],
episode_dense=[ep1, ep2],
facts_map={"ep_1": [fact1]},
embedding=_StubEmbedding(),
)
resp = await mgr.search(_user_req(method=SearchMethod.HYBRID, top_k=10))
eps = resp.data.episodes
assert len(eps) >= 1
ep1_result = next((e for e in eps if e.id == "ep_1"), None)
assert ep1_result is not None
assert len(ep1_result.atomic_facts) == 1
assert ep1_result.atomic_facts[0].id == "f1"
async def test_agentic_requires_reranker_and_llm() -> None:
mgr = _build_manager(embedding=_StubEmbedding())
with pytest.raises(RuntimeError, match="rerank provider"):
await mgr.search(_user_req(method=SearchMethod.AGENTIC))
async def test_agent_hybrid_requires_reranker_without_llm_rerank() -> None:
"""``owner_type='agent'`` + HYBRID + ``enable_llm_rerank=False`` reaches
the skill cross-encoder lane (``skill_hybrid``: rrf → cross-encoder),
so a missing rerank provider must fail-fast with a config hint rather
than crash deep inside the rerank callback.
"""
mgr = _build_manager(embedding=_StubEmbedding())
with pytest.raises(RuntimeError, match="rerank provider"):
await mgr.search(_agent_req(method=SearchMethod.HYBRID))
async def test_agent_hybrid_with_llm_rerank_does_not_need_reranker() -> None:
"""The LLM-rerank lane skips the cross-encoder and dispatches through
``arank`` instead, so a missing reranker is fine as long as the LLM
client is configured. Empty stub recallers → empty result; the call
must not raise on the reranker-absence path.
"""
mgr = _build_manager(embedding=_StubEmbedding(), llm_client=_StubLLM())
resp = await mgr.search(
_agent_req(method=SearchMethod.HYBRID, enable_llm_rerank=True)
)
assert resp.data.agent_skills == []
assert resp.data.agent_cases == []
class _StubReranker:
"""Minimal reranker stub — returns trivial scores."""
async def rerank(self, query: str, documents: Sequence[str]) -> list[Any]:
from everos.component.rerank.protocol import RerankResult
return [RerankResult(index=i, score=1.0) for i in range(len(documents))]
class _StubLLM:
"""Minimal LLM stub — satisfies protocol without making real calls."""
async def chat(self, *args: Any, **kwargs: Any) -> Any:
return ""
async def test_agentic_episode_delegates_to_search_episodes_agentic(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""AGENTIC method delegates to search_episodes_agentic and returns its result."""
import datetime as _dt
from everos.memory.search.dto import SearchEpisodeItem
fake_result = [
SearchEpisodeItem(
id="ep_1",
score=0.9,
session_id="s",
user_id="alice",
timestamp=_dt.datetime(2026, 1, 1, tzinfo=_dt.UTC),
sender_ids=["alice"],
subject="s",
summary="s",
episode="body",
type="Conversation",
atomic_facts=[],
)
]
async def _fake_agentic(*args: Any, **kwargs: Any) -> list[SearchEpisodeItem]:
return fake_result
monkeypatch.setattr(
"everos.memory.search.manager.search_episodes_agentic", _fake_agentic
)
mgr = _build_manager(
embedding=_StubEmbedding(),
reranker=_StubReranker(),
llm_client=_StubLLM(),
)
resp = await mgr.search(_user_req(method=SearchMethod.AGENTIC))
assert resp.data.episodes == fake_result
# ── AGENT owner hard partition ─────────────────────────────────────────
async def test_agent_keyword_returns_cases_and_skills_only() -> None:
mgr = _build_manager(
case_sparse=[_case_row("c_1")],
skill_sparse=[_skill_row("s_1")],
)
resp = await mgr.search(_agent_req())
assert resp.data.episodes == []
assert resp.data.profiles == []
assert [c.id for c in resp.data.agent_cases] == ["c_1"]
assert [s.id for s in resp.data.agent_skills] == ["s_1"]
async def test_agent_owner_ignores_include_profile() -> None:
"""Profile is user-only at this revision."""
mgr = _build_manager()
resp = await mgr.search(_agent_req(include_profile=True))
assert resp.data.profiles == []
# ── Top-k behaviour ───────────────────────────────────────────────────
async def test_top_k_truncates_results() -> None:
rows = [_episode_row(f"ep_{i}", score=1.0 - i * 0.01) for i in range(10)]
mgr = _build_manager(episode_sparse=rows)
resp = await mgr.search(_user_req(top_k=3))
assert [e.id for e in resp.data.episodes] == ["ep_0", "ep_1", "ep_2"]
async def test_top_k_minus_one_caps_at_100() -> None:
rows = [_episode_row(f"ep_{i}") for i in range(120)]
mgr = _build_manager(episode_sparse=rows)
resp = await mgr.search(_user_req(top_k=-1))
assert len(resp.data.episodes) == 100
# ── AGENTIC agent_case / agent_skill delegation ───────────────────────────
async def test_agentic_agent_cases_delegates_to_search_agent_cases_agentic(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""AGENTIC method for agent owner delegates to search_agent_cases_agentic."""
import datetime as _dt
from everos.memory.search.dto import SearchAgentCaseItem
fake_cases = [
SearchAgentCaseItem(
id="c_1",
agent_id="agent_a",
session_id="sess_b",
timestamp=_dt.datetime(2026, 1, 1, tzinfo=_dt.UTC),
task_intent="handle login",
approach="retry with backoff",
quality_score=0.9,
score=0.85,
)
]
async def _fake_cases_agentic(
*args: Any, **kwargs: Any
) -> list[SearchAgentCaseItem]:
return fake_cases
monkeypatch.setattr(
"everos.memory.search.manager.search_agent_cases_agentic",
_fake_cases_agentic,
)
mgr = _build_manager(
embedding=_StubEmbedding(),
reranker=_StubReranker(),
llm_client=_StubLLM(),
)
resp = await mgr.search(_agent_req(method=SearchMethod.AGENTIC))
assert resp.data.agent_cases == fake_cases
async def test_agentic_agent_skills_delegates_to_search_agent_skills_agentic(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""AGENTIC method for agent owner delegates to search_agent_skills_agentic."""
from everos.memory.search.dto import SearchAgentSkillItem
fake_skills = [
SearchAgentSkillItem(
id="s_1",
agent_id="agent_a",
name="auth_refresh",
description="Refreshes auth tokens",
content="Retry with new token",
confidence=0.9,
maturity_score=0.7,
source_case_ids=[],
score=0.8,
)
]
async def _fake_skills_agentic(
*args: Any, **kwargs: Any
) -> list[SearchAgentSkillItem]:
return fake_skills
monkeypatch.setattr(
"everos.memory.search.manager.search_agent_skills_agentic",
_fake_skills_agentic,
)
mgr = _build_manager(
embedding=_StubEmbedding(),
reranker=_StubReranker(),
llm_client=_StubLLM(),
)
resp = await mgr.search(_agent_req(method=SearchMethod.AGENTIC))
assert resp.data.agent_skills == fake_skills
# ── _merge_by_id_max / _case_bridged_skills helpers ──────────────────────
def test_merge_by_id_max_keeps_higher_score_on_collision() -> None:
"""Same-id collision → keep the higher score; non-colliding rows are
unioned. Used to fold bridge candidates into the direct dense pool.
"""
from everos.memory.search.manager import _merge_by_id_max
primary = [
Candidate(id="s1", score=0.5, source="vector", metadata={"src": "primary"}),
Candidate(id="s2", score=0.7, source="vector", metadata={"src": "primary"}),
]
extra = [
Candidate(id="s1", score=0.9, source="vector", metadata={"src": "bridge"}),
Candidate(id="s2", score=0.3, source="vector", metadata={"src": "bridge"}),
Candidate(id="s3", score=0.6, source="vector", metadata={"src": "bridge"}),
]
merged = {c.id: c for c in _merge_by_id_max(primary, extra)}
# s1 collision → bridge wins (0.9 > 0.5); s2 collision → primary wins
# (0.7 > 0.3); s3 fresh-from-bridge is added.
assert merged["s1"].score == 0.9
assert merged["s1"].metadata["src"] == "bridge"
assert merged["s2"].score == 0.7
assert merged["s2"].metadata["src"] == "primary"
assert merged["s3"].score == 0.6
async def test_case_bridged_skills_max_pools_score_across_source_cases() -> None:
"""Each bridged skill inherits the highest score among its matched
source cases (mirrors the ``maxsim_atomic`` fact→episode pooling).
Source cases not present in the bridge pool are ignored.
"""
skill_row = Candidate(
id="agent_a_skill_x",
score=0.0, # bridge ignores the recaller-side score
source="vector",
metadata={"source_case_ids": ["c1", "c2", "c3"], "name": "x"},
)
mgr = _build_manager(skill_by_case=[skill_row])
bridge_cases = [
Candidate(id="c1", score=0.4, source="vector", metadata={}),
Candidate(id="c2", score=0.9, source="vector", metadata={}), # max wins
Candidate(id="c_other", score=0.7, source="vector", metadata={}),
]
bridged = await mgr._case_bridged_skills(bridge_cases, where="", top_k=5)
assert len(bridged) == 1
assert bridged[0].id == "agent_a_skill_x"
# c1=0.4 and c2=0.9 are in the bridge pool; c3 is not → max-pool == 0.9.
assert bridged[0].score == pytest.approx(0.9)
# Metadata (incl. ``source_case_ids``) rides through so downstream
# shaping doesn't need a second fetch.
assert bridged[0].metadata["source_case_ids"] == ["c1", "c2", "c3"]
async def test_case_bridged_skills_returns_empty_for_none_or_empty_input() -> None:
"""No bridge cases ⇒ no bridge recall (skip the reverse fetch entirely).
This is the cross-encoder lane / KEYWORD / VECTOR contract.
"""
mgr = _build_manager(skill_by_case=[_skill_row("s1")]) # noise the stub
assert await mgr._case_bridged_skills(None, where="", top_k=5) == []
assert await mgr._case_bridged_skills([], where="", top_k=5) == []
# ── Agent HYBRID lane selection ──────────────────────────────────────────
async def test_agent_hybrid_no_llm_rerank_runs_cross_encoder_lane(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""``enable_llm_rerank=False`` for agent HYBRID must dispatch through
``search_agent_skills_hybrid`` (rrf → cross-encoder lane) with the
configured reranker, not through generic ``arank``.
"""
captured: dict[str, Any] = {}
async def _fake_hybrid(
query: str,
*,
sparse: list[Candidate],
dense: list[Candidate],
reranker: Any,
top_k: int,
) -> list:
captured.update(
query=query, sparse=sparse, dense=dense, reranker=reranker, top_k=top_k
)
return []
monkeypatch.setattr(
"everos.memory.search.manager.search_agent_skills_hybrid", _fake_hybrid
)
stub_reranker = _StubReranker()
mgr = _build_manager(embedding=_StubEmbedding(), reranker=stub_reranker)
await mgr.search(_agent_req(method=SearchMethod.HYBRID))
assert captured["query"] == "hi"
# Manager forwards its configured reranker to the cross-encoder lane.
assert captured["reranker"] is stub_reranker
# Agent kinds cap unlimited-mode top_k at _AGENT_TOP_K_CAP (10).
assert captured["top_k"] == 10
async def test_agent_hybrid_llm_rerank_dispatches_arank_for_case_then_skill(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""LLM rerank lane: ``_search_cases_and_skills`` runs serially —
``arank`` is called once with ``memory_type="case"`` and once with
``memory_type="skill"``, both with ``enable_rerank=True`` + the LLM
client. Order matters: the case call must precede the skill call so
its results can feed the bridge.
"""
from everalgo.types import RankOutput
calls: list[tuple[str, dict[str, Any]]] = []
async def _fake_arank(rank_input: Any, **kwargs: Any) -> RankOutput:
calls.append((rank_input.memory_type, kwargs))
return RankOutput(items=[], metadata={})
monkeypatch.setattr("everos.memory.search.manager.arank", _fake_arank)
mgr = _build_manager(embedding=_StubEmbedding(), llm_client=_StubLLM())
await mgr.search(_agent_req(method=SearchMethod.HYBRID, enable_llm_rerank=True))
# Two dispatches in the documented serial order.
assert [c[0] for c in calls] == ["case", "skill"]
# Both runs opt into rerank with the LLM client wired in.
for _mt, kw in calls:
assert kw["enable_rerank"] is True
assert kw["llm"] is mgr._llm
assert kw["rerank_top_k"] == 10 # _AGENT_TOP_K_CAP
async def test_agent_hybrid_llm_rerank_merges_bridged_skills_into_dense_pool(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""The bridge must surface into the skill dispatch: skills resolved
by ``fetch_by_case_ids`` are max-pooled into the dense candidates that
``arank`` sees on the second call, while the direct skill recall pool
is preserved.
"""
from everalgo.types import RankOutput, ScoredItem
case_result = ScoredItem(
id="agent_a_c1",
score=0.85,
item_type="case",
# Shaper requires owner_type="agent" + timestamp + intent/approach;
# otherwise the case is dropped and bridge_cases comes back empty.
metadata={
"owner_id": "agent_a",
"owner_type": "agent",
"session_id": "sess_b",
"timestamp": _ts(),
"task_intent": "intent c1",
"approach": "approach c1",
"quality_score": 0.8,
},
)
skill_direct = _skill_row("s_direct")
skill_bridged = Candidate(
id="s_bridged",
score=0.0,
source="vector",
metadata={"source_case_ids": ["agent_a_c1"], "name": "s_bridged"},
)
seen_skill_dense: dict[str, list[Candidate]] = {}
async def _fake_arank(rank_input: Any, **_: Any) -> RankOutput:
if rank_input.memory_type == "case":
return RankOutput(items=[case_result], metadata={})
# skill call — capture the merged dense pool the manager built.
seen_skill_dense["dense"] = list(rank_input.dense_candidates)
return RankOutput(items=[], metadata={})
monkeypatch.setattr("everos.memory.search.manager.arank", _fake_arank)
mgr = _build_manager(
embedding=_StubEmbedding(),
llm_client=_StubLLM(),
skill_sparse=[],
skill_dense=[skill_direct],
skill_by_case=[skill_bridged],
)
await mgr.search(_agent_req(method=SearchMethod.HYBRID, enable_llm_rerank=True))
dense_ids = {c.id for c in seen_skill_dense["dense"]}
# Direct dense recall is preserved AND the case-bridged skill is unioned.
assert dense_ids == {"s_direct", "s_bridged"}
# The bridged skill inherits the matched case's score (0.85 from c1).
by_id = {c.id: c for c in seen_skill_dense["dense"]}
assert by_id["s_bridged"].score == pytest.approx(0.85)

View File

@ -0,0 +1,145 @@
"""Real-LanceDB tests for ``AgentSkillRecaller.fetch_by_case_ids``.
The case→skill bridge reverse-resolves skills by ``source_case_ids``
membership using DataFusion's ``array_has`` on a ``list<utf8>`` column.
These tests exercise the actual SQL ``where`` predicate (no recaller
stubs):
* OR-composition over multiple case ids,
* hits respect the partition filter (``where`` passed by the caller),
* empty case-id input short-circuits without a LanceDB call,
* case ids containing single quotes round-trip safely via the ``_q``
escaper.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from everos.component.tokenizer import Tokenizer
from everos.infra.persistence.lancedb import (
AgentSkill as LanceAgentSkill,
)
from everos.infra.persistence.lancedb import (
agent_skill_repo,
lancedb_manager,
)
from everos.memory.search.recall.agent_skill import AgentSkillRecaller
from everos.memory.search.recall.base import RecallerDeps
class _WhitespaceTokenizer(Tokenizer):
"""Bridge reverse-fetch never tokenises; satisfy the deps contract."""
def tokenize(self, text: str) -> list[str]:
return text.split()
def _skill_row(
*,
name: str,
owner_id: str,
source_case_ids: list[str],
) -> LanceAgentSkill:
return LanceAgentSkill(
id=f"{owner_id}_{name}",
owner_id=owner_id,
owner_type="agent",
name=name,
description=f"desc {name}",
description_tokens=f"desc {name}",
content=f"body of {name}",
content_tokens=f"body of {name}",
confidence=0.7,
maturity_score=0.6,
source_case_ids=source_case_ids,
cluster_id=None,
md_path=f"agents/{owner_id}/skills/{name}/SKILL.md",
content_sha256="x" * 64,
vector=[0.0] * 1024,
)
@pytest.fixture(autouse=True)
async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
"""Isolate LanceDB under tmp memory root per test."""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
lancedb_manager._conn = None
lancedb_manager._tables.clear()
yield
await lancedb_manager.dispose_connection()
def _recaller() -> AgentSkillRecaller:
return AgentSkillRecaller(RecallerDeps(tokenizer=_WhitespaceTokenizer()))
_OWNER_WHERE = "owner_id = 'agt' AND owner_type = 'agent'"
async def test_fetch_by_case_ids_matches_any_lineage_case() -> None:
"""OR over case ids: a skill surfaces when its ``source_case_ids``
contains at least one queried case."""
await agent_skill_repo.upsert(
[
_skill_row(name="s1", owner_id="agt", source_case_ids=["c_a", "c_b"]),
_skill_row(name="s2", owner_id="agt", source_case_ids=["c_c"]),
_skill_row(name="s3", owner_id="agt", source_case_ids=["c_d"]),
]
)
got = await _recaller().fetch_by_case_ids(["c_a", "c_c"], _OWNER_WHERE, limit=10)
assert sorted(c.id for c in got) == ["agt_s1", "agt_s2"]
async def test_fetch_by_case_ids_respects_owner_partition() -> None:
"""The ``where`` clause is AND-composed with ``array_has(...)`` — a
skill in a different owner partition must not leak through."""
await agent_skill_repo.upsert(
[
_skill_row(name="s1", owner_id="agt", source_case_ids=["c_a"]),
_skill_row(name="s1", owner_id="other", source_case_ids=["c_a"]),
]
)
got = await _recaller().fetch_by_case_ids(["c_a"], _OWNER_WHERE, limit=10)
assert [c.id for c in got] == ["agt_s1"]
async def test_fetch_by_case_ids_returns_empty_for_no_ids() -> None:
"""Empty input short-circuits — no LanceDB query is issued."""
got = await _recaller().fetch_by_case_ids([], _OWNER_WHERE, limit=10)
assert got == []
async def test_fetch_by_case_ids_escapes_single_quotes() -> None:
"""A case id with a single quote must not break the SQL literal.
The ``_q`` escaper turns ``'`` into ``''`` (SQL standard); without it
the where-clause would close the string literal prematurely.
"""
quoted_id = "ac_o'brien_0001"
await agent_skill_repo.upsert(
[_skill_row(name="s1", owner_id="agt", source_case_ids=[quoted_id])]
)
got = await _recaller().fetch_by_case_ids([quoted_id], _OWNER_WHERE, limit=10)
assert [c.id for c in got] == ["agt_s1"]
async def test_fetch_by_case_ids_carries_source_case_ids_in_metadata() -> None:
"""The full ``source_case_ids`` list must ride back in metadata so the
manager's max-pool can score against the caller's case_score map."""
await agent_skill_repo.upsert(
[_skill_row(name="s1", owner_id="agt", source_case_ids=["c_a", "c_b", "c_c"])]
)
got = await _recaller().fetch_by_case_ids(["c_a"], _OWNER_WHERE, limit=10)
assert len(got) == 1
assert sorted(got[0].metadata["source_case_ids"]) == ["c_a", "c_b", "c_c"]

View File

@ -0,0 +1,264 @@
"""Real-LanceDB tests for ``AtomicFactRecaller.facts_for_episodes``.
The MRAG bridge is the only path that links facts back to episodes, and
the previous ``parent_type='episode' AND parent_id IN (episode_ids)``
query never matched: cascade writes facts with
``parent_type='memcell'``, ``parent_id=memcell_id``. The fixed version
takes an ``episode → memcell`` map from the caller, queries by the
deduped memcell set, and re-buckets results under every episode that
shares each memcell.
These tests exercise the real LanceDB query path (no recaller stubs):
- shared memcell → fact appears under both episodes,
- distinct memcells → facts bucket exclusively to their owning episode,
- empty / unknown memcells → empty result, no LanceDB call surprise.
"""
from __future__ import annotations
import datetime as _dt
from pathlib import Path
import pytest
from everos.component.tokenizer import Tokenizer
from everos.infra.persistence.lancedb import (
AtomicFact,
ParentType,
atomic_fact_repo,
lancedb_manager,
)
from everos.memory.search.recall.atomic_fact import AtomicFactRecaller
from everos.memory.search.recall.base import RecallerDeps
class _WhitespaceTokenizer(Tokenizer):
"""Trivial tokenizer — the bridge doesn't touch text tokenisation."""
def tokenize(self, text: str) -> list[str]:
return text.split()
def _ts() -> _dt.datetime:
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
def _fact_row(
*,
fid: str,
memcell_id: str,
fact: str,
owner_id: str = "alice",
) -> AtomicFact:
return AtomicFact(
id=fid,
entry_id=fid.split("_", 1)[1] if "_" in fid else fid,
owner_id=owner_id,
owner_type="user",
session_id="sess_1",
timestamp=_ts(),
parent_type=ParentType.MEMCELL.value,
parent_id=memcell_id,
sender_ids=[owner_id],
fact=fact,
fact_tokens=fact,
md_path=f"users/{owner_id}/.atomic_facts/atomic_fact-2026-01-01.md",
content_sha256="x" * 64,
vector=[0.0] * 1024,
)
@pytest.fixture(autouse=True)
async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
"""Isolate LanceDB to a tmp memory root per test."""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
lancedb_manager._conn = None
lancedb_manager._tables.clear()
yield
await lancedb_manager.dispose_connection()
def _recaller() -> AtomicFactRecaller:
return AtomicFactRecaller(RecallerDeps(tokenizer=_WhitespaceTokenizer()))
async def test_facts_for_episodes_buckets_by_shared_memcell() -> None:
"""Two episodes sharing one memcell both see the same fact pool.
Episode-level fan-out (Episode pipeline runs once per cell but emits
one Episode per user sender) gives multiple LanceDB episode rows
pointing at the same memcell. The bridge must surface every fact
that hangs off that memcell under both episode ids.
"""
await atomic_fact_repo.upsert(
[
_fact_row(fid="alice_af_1", memcell_id="mc_shared", fact="likes hiking"),
_fact_row(fid="alice_af_2", memcell_id="mc_shared", fact="lives in tokyo"),
_fact_row(fid="alice_af_3", memcell_id="mc_other", fact="prefers oat milk"),
]
)
ep_to_memcell = {
"alice_ep_a": "mc_shared",
"alice_ep_b": "mc_shared",
"alice_ep_c": "mc_other",
}
where = "owner_id = 'alice' AND owner_type = 'user'"
out = await _recaller().facts_for_episodes(ep_to_memcell, where, per_episode=10)
assert sorted(out.keys()) == ["alice_ep_a", "alice_ep_b", "alice_ep_c"]
assert sorted(f.id for f in out["alice_ep_a"]) == ["alice_af_1", "alice_af_2"]
assert sorted(f.id for f in out["alice_ep_b"]) == ["alice_af_1", "alice_af_2"]
assert [f.id for f in out["alice_ep_c"]] == ["alice_af_3"]
# parent_episode_id is the *bucket* episode, not the underlying memcell:
# the same fact_1 surfaces twice with different parent_episode_id values.
fact1_in_a = next(f for f in out["alice_ep_a"] if f.id == "alice_af_1")
fact1_in_b = next(f for f in out["alice_ep_b"] if f.id == "alice_af_1")
assert fact1_in_a.parent_episode_id == "alice_ep_a"
assert fact1_in_b.parent_episode_id == "alice_ep_b"
async def test_facts_for_episodes_returns_empty_for_no_episodes() -> None:
out = await _recaller().facts_for_episodes({}, "owner_id = 'alice'", per_episode=10)
assert out == {}
async def test_facts_for_episodes_skips_unknown_memcells() -> None:
"""Episodes whose memcell has no facts simply don't appear in the result."""
await atomic_fact_repo.upsert(
[_fact_row(fid="alice_af_1", memcell_id="mc_a", fact="hello")]
)
out = await _recaller().facts_for_episodes(
{"alice_ep_a": "mc_a", "alice_ep_b": "mc_missing"},
"owner_id = 'alice' AND owner_type = 'user'",
per_episode=10,
)
assert "alice_ep_a" in out
assert "alice_ep_b" not in out
assert [f.id for f in out["alice_ep_a"]] == ["alice_af_1"]
async def test_facts_for_episodes_filters_by_where_clause() -> None:
"""The caller's where clause is preserved (e.g. owner pinning)."""
await atomic_fact_repo.upsert(
[
_fact_row(
fid="alice_af_1",
memcell_id="mc_a",
fact="alice fact",
owner_id="alice",
),
_fact_row(
fid="bob_af_1",
memcell_id="mc_a",
fact="bob fact",
owner_id="bob",
),
]
)
out = await _recaller().facts_for_episodes(
{"alice_ep_a": "mc_a"},
"owner_id = 'alice' AND owner_type = 'user'",
per_episode=10,
)
assert [f.id for f in out["alice_ep_a"]] == ["alice_af_1"]
async def test_facts_for_episodes_drops_empty_memcell_ids() -> None:
"""Episodes whose parent_id is missing (empty string) are dropped silently.
Real-world cause: a candidate row that lost its ``parent_id`` (data
corruption, manual edit). The bridge must not crash and must not
emit ``parent_id IN ('')`` — which would match every empty-string
row in the table.
"""
await atomic_fact_repo.upsert(
[_fact_row(fid="alice_af_1", memcell_id="", fact="orphan fact")]
)
out = await _recaller().facts_for_episodes(
{"alice_ep_a": ""},
"owner_id = 'alice' AND owner_type = 'user'",
per_episode=10,
)
assert out == {}
# ── MRAG fact-level scoring (regression for query_vector handling) ─────
def _unit_vector(direction: int, dim: int = 1024) -> list[float]:
"""Return a unit vector with 1.0 at ``direction`` axis, 0 elsewhere.
Used to build deterministic cosine relationships in the tests below:
same direction → distance 0 (score 1.0); orthogonal → distance 1
(score 0.0). The ``vector`` field on AtomicFact requires 1024-dim,
so any test that goes through ``.nearest_to`` needs full-width.
"""
out = [0.0] * dim
out[direction] = 1.0
return out
async def test_facts_for_episodes_assigns_real_cosine_score_with_query_vector() -> None:
"""Regression: ``query_vector`` triggers cosine ANN, not flat scan.
Pre-fix, ``facts_for_episodes`` only ran ``where parent_id IN (...)``
and emitted every fact with ``score=0.0`` — the MRAG fact-level
ranking collapsed to insertion order. Post-fix, ``query_vector``
flows into ``.nearest_to(...).distance_type('cosine')`` and each
fact lands with its real query↔fact relevance score.
Setup:
- fact A's vector = unit on axis 0 (same direction as the query) →
cosine distance 0 → score ≈ 1.0.
- fact B's vector = unit on axis 1 (orthogonal to the query) →
cosine distance 1 → score ≈ 0.0.
Assertion: A ranks first AND its score > B's score AND both are
non-zero-distinguishable (catches the old hardcoded ``0.0`` bug).
"""
row_a = _fact_row(fid="alice_af_1", memcell_id="mc_shared", fact="close fact")
row_a.vector = _unit_vector(0)
row_b = _fact_row(fid="alice_af_2", memcell_id="mc_shared", fact="far fact")
row_b.vector = _unit_vector(1)
await atomic_fact_repo.upsert([row_a, row_b])
out = await _recaller().facts_for_episodes(
{"alice_ep_a": "mc_shared"},
"owner_id = 'alice' AND owner_type = 'user'",
per_episode=10,
query_vector=_unit_vector(0),
)
facts = out["alice_ep_a"]
assert [f.id for f in facts] == ["alice_af_1", "alice_af_2"], (
"facts must be ordered by cosine distance ascending (closest first)"
)
assert facts[0].score > facts[1].score, "real cosine scoring must differentiate"
assert facts[0].score > 0.5, "near-identical vectors should score close to 1"
assert facts[1].score < 0.5, "orthogonal vectors should score close to 0"
async def test_facts_for_episodes_score_zero_without_query_vector() -> None:
"""Backward-compat: omitting ``query_vector`` falls back to flat scan.
Callers that don't need fact-level relevance (e.g. KV-style fetch
where the parent ranking already encodes the signal) keep the old
``score=0.0`` semantics. Documents the explicit contract so the
fallback path is intentional, not an oversight.
"""
row = _fact_row(fid="alice_af_1", memcell_id="mc_a", fact="anything")
row.vector = _unit_vector(0)
await atomic_fact_repo.upsert([row])
out = await _recaller().facts_for_episodes(
{"alice_ep_a": "mc_a"},
"owner_id = 'alice' AND owner_type = 'user'",
per_episode=10,
# no query_vector
)
assert out["alice_ep_a"][0].score == 0.0

View File

@ -0,0 +1,108 @@
"""Unit tests for ``EpisodeRecaller.fetch_all_for_owner``."""
from __future__ import annotations
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from everos.component.tokenizer import Tokenizer
from everos.memory.search.recall.base import RecallerDeps
from everos.memory.search.recall.episode import EpisodeRecaller
def _make_row(ep_id: str, mc_id: str) -> dict[str, Any]:
"""Build a minimal episode LanceDB row dict for test fixtures."""
return {
"id": ep_id,
"owner_id": "alice",
"owner_type": "user",
"session_id": "sess_1",
"timestamp": 1000000,
"sender_ids": ["alice"],
"subject": f"subj {ep_id}",
"summary": f"summary {ep_id}",
"episode": f"body {ep_id}",
"parent_id": mc_id,
}
def _mock_table(rows: list[dict[str, Any]]) -> MagicMock:
tbl = MagicMock()
tbl.query.return_value.where.return_value.to_list = AsyncMock(return_value=rows)
return tbl
@pytest.fixture()
def recaller() -> EpisodeRecaller:
tok = MagicMock(spec=Tokenizer)
tok.tokenize.return_value = ["hi"]
return EpisodeRecaller(RecallerDeps(tokenizer=tok))
async def test_fetch_all_for_owner_returns_memcell_keyed_candidates(
recaller: EpisodeRecaller,
) -> None:
"""id must equal parent_id (memcell_id) so acluster_retrieve membership works."""
rows = [
_make_row("ep_1", "mc_1"),
_make_row("ep_2", "mc_2"),
]
with patch(
"everos.memory.search.recall.episode.get_table",
new_callable=AsyncMock,
return_value=_mock_table(rows),
):
result = await recaller.fetch_all_for_owner("owner_id = 'alice'")
assert len(result) == 2
ids = {c.id for c in result}
assert ids == {"mc_1", "mc_2"}, "id must be memcell_id, not episode_id"
async def test_fetch_all_for_owner_stores_episode_id_in_metadata(
recaller: EpisodeRecaller,
) -> None:
"""metadata['episode_id'] carries the real LanceDB episode id for final shaping."""
rows = [_make_row("ep_abc", "mc_xyz")]
with patch(
"everos.memory.search.recall.episode.get_table",
new_callable=AsyncMock,
return_value=_mock_table(rows),
):
result = await recaller.fetch_all_for_owner("owner_id = 'alice'")
assert result[0].metadata["episode_id"] == "ep_abc"
assert result[0].metadata["parent_id"] == "mc_xyz"
async def test_fetch_all_for_owner_skips_rows_without_parent_id(
recaller: EpisodeRecaller,
) -> None:
"""Rows without parent_id are silently skipped.
They are incomplete episode records.
"""
rows = [
{
"id": "ep_bad",
"owner_id": "alice",
"owner_type": "user",
"session_id": "s",
"timestamp": 1,
"sender_ids": [],
"subject": "",
"summary": "",
"episode": "",
# no parent_id key
},
]
with patch(
"everos.memory.search.recall.episode.get_table",
new_callable=AsyncMock,
return_value=_mock_table(rows),
):
result = await recaller.fetch_all_for_owner("owner_id = 'alice'")
assert result == []

View File

@ -0,0 +1,189 @@
"""Real-LanceDB regression: OR-mode BooleanQuery sparse recall.
Locks the fix for the tantivy implicit-AND poison: when a query
contains an IDF≈0 token (typically the partition owner's own name on
an owner-scoped corpus), the entire query used to return 0 hits. The
fixed path wraps each token in a ``BooleanQuery`` with ``SHOULD``
clauses (mirrors enterprise ES ``bool.should + minimum_should_match=1``)
so other tokens can carry the query.
These tests build a tiny in-memory corpus where one term is 100% DF
(the "poison" term) and verify that mixing it with informative
content tokens still surfaces results.
White-box surfaces:
- LanceDB ``episode`` table (real, per-test tmp root)
- ``EpisodeRecaller.sparse_recall``
"""
from __future__ import annotations
import datetime as _dt
from pathlib import Path
import pytest
from everos.component.tokenizer import Tokenizer
from everos.infra.persistence.lancedb import (
Episode,
ParentType,
episode_repo,
lancedb_manager,
)
from everos.memory.search.recall.base import RecallerDeps, build_or_query
from everos.memory.search.recall.episode import EpisodeRecaller
class _WhitespaceTokenizer(Tokenizer):
"""Split-on-whitespace tokenizer, lowercased.
The OR-semantics fix is independent of jieba's behaviour, so a
trivial tokenizer keeps the test focused.
"""
def tokenize(self, text: str) -> list[str]:
return [tok for tok in text.lower().split() if tok]
def _ts() -> _dt.datetime:
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
def _episode_row(
*,
eid: str,
owner_id: str,
body_tokens: str,
) -> Episode:
"""Build an Episode row with ``body_tokens`` indexed as ``episode_tokens``."""
return Episode(
id=f"{owner_id}_{eid}",
entry_id=eid,
owner_id=owner_id,
owner_type="user",
session_id="sess_1",
timestamp=_ts(),
parent_type=ParentType.MEMCELL.value,
parent_id="mc_test",
sender_ids=[owner_id],
episode=body_tokens,
episode_tokens=body_tokens,
md_path=f"users/{owner_id}/episodes/episode-2026-01-01.md",
content_sha256="x" * 64,
vector=[0.0] * 1024,
)
@pytest.fixture(autouse=True)
async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
lancedb_manager._conn = None
lancedb_manager._tables.clear()
yield
await lancedb_manager.dispose_connection()
def _recaller() -> EpisodeRecaller:
return EpisodeRecaller(RecallerDeps(tokenizer=_WhitespaceTokenizer()))
# ── build_or_query helper unit-level checks ────────────────────────────
def test_build_or_query_empty_returns_none() -> None:
"""Empty / whitespace-only query → ``None`` (caller must short-circuit)."""
tk = _WhitespaceTokenizer()
assert build_or_query(tk, "", column="episode_tokens") is None
assert build_or_query(tk, " ", column="episode_tokens") is None
def test_build_or_query_single_token_returns_match_query() -> None:
"""One token → bare MatchQuery (no boolean-wrapper overhead)."""
from lancedb.query import MatchQuery
q = build_or_query(_WhitespaceTokenizer(), "hello", column="episode_tokens")
assert isinstance(q, MatchQuery)
def test_build_or_query_multi_token_returns_boolean_query() -> None:
"""≥2 tokens → BooleanQuery with one SHOULD clause per token."""
from lancedb.query import BooleanQuery
q = build_or_query(
_WhitespaceTokenizer(), "alice support group", column="episode_tokens"
)
assert isinstance(q, BooleanQuery)
# ── Live recall: poison token + informative token must surface results ──
async def test_or_semantics_poison_token_does_not_kill_query() -> None:
"""Two episodes, owner name in every doc (DF=100%), plus distinct content.
Pre-fix, querying ``"alice support group"`` against owner=alice would
return 0 hits — the ``alice`` token (DF=100% → IDF≈0) poisoned the
implicit-AND query parser and dragged the score-conjunction to zero.
Post-fix, ``BooleanQuery + SHOULD`` lets ``support`` / ``group`` carry
the query on their own.
"""
await episode_repo.upsert(
[
_episode_row(
eid="ep_1",
owner_id="alice",
body_tokens="alice attended lgbtq support group last tuesday",
),
_episode_row(
eid="ep_2",
owner_id="alice",
body_tokens="alice tried watercolor painting on saturday morning",
),
]
)
# LanceDB FTS only sees data merged into the index after optimize().
# Tests treat that as part of "the corpus is ready to query".
from everos.infra.persistence.lancedb import get_table
tbl = await get_table(Episode.TABLE_NAME, Episode)
await tbl.optimize()
where = "owner_id = 'alice' AND owner_type = 'user'"
cands = await _recaller().sparse_recall("alice support group", where, limit=10)
assert cands, "alice + support + group should recall ep_1 via SHOULD"
# ep_1 is the support-group episode; should rank above ep_2 (no support).
assert cands[0].id == "alice_ep_1"
assert cands[0].score > 0.0
async def test_or_semantics_single_informative_token() -> None:
"""Single non-poison token still recalls (regression for ``painting``)."""
await episode_repo.upsert(
[
_episode_row(
eid="ep_1",
owner_id="alice",
body_tokens="alice attended lgbtq support group",
),
_episode_row(
eid="ep_2",
owner_id="alice",
body_tokens="alice tried watercolor painting on saturday",
),
]
)
from everos.infra.persistence.lancedb import get_table
tbl = await get_table(Episode.TABLE_NAME, Episode)
await tbl.optimize()
where = "owner_id = 'alice' AND owner_type = 'user'"
cands = await _recaller().sparse_recall("painting", where, limit=10)
assert cands, "single informative token must recall the matching episode"
assert cands[0].id == "alice_ep_2"
async def test_or_semantics_empty_query_returns_empty() -> None:
"""Tokenisation yields nothing → recall returns ``[]`` without hitting LanceDB."""
cands = await _recaller().sparse_recall(" ", "owner_id = 'alice'", limit=10)
assert cands == []

View File

@ -0,0 +1,128 @@
"""Real-LanceDB tests for ``ProfileRecaller`` — KV-by-owner fetch.
Profile recall has no query / no ranking: ``fetch(owner_id)`` returns
the at-most-one row keyed by ``id = owner_id``. These tests exercise
the LanceDB path (no stubs) and the JSON unpacking that turns the
``*_json`` columns back into the DTO's ``profile_data`` mapping.
"""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from everos.infra.persistence.lancedb import (
UserProfile,
lancedb_manager,
user_profile_repo,
)
from everos.memory.search.recall.profile import ProfileRecaller
def _profile_row(
*,
owner_id: str,
summary: str = "summary text",
explicit_info: list | None = None,
implicit_traits: list | None = None,
profile_timestamp_ms: int = 1_700_000_000_000,
) -> UserProfile:
return UserProfile(
id=owner_id,
owner_id=owner_id,
owner_type="user",
summary=summary,
explicit_info_json=json.dumps(explicit_info or [], ensure_ascii=False),
implicit_traits_json=json.dumps(implicit_traits or [], ensure_ascii=False),
profile_timestamp_ms=profile_timestamp_ms,
md_path=f"users/{owner_id}/user.md",
content_sha256="x" * 64,
)
@pytest.fixture(autouse=True)
async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
lancedb_manager._conn = None
lancedb_manager._tables.clear()
yield
await lancedb_manager.dispose_connection()
async def test_fetch_returns_dto_when_row_exists() -> None:
await user_profile_repo.upsert(
[
_profile_row(
owner_id="u_alice",
summary="Alice likes long hikes.",
explicit_info=[{"fact": "lives in tokyo"}],
implicit_traits=[{"trait": "introverted"}],
profile_timestamp_ms=1_700_000_001_000,
)
]
)
items = await ProfileRecaller().fetch("u_alice")
assert len(items) == 1
item = items[0]
assert item.id == "u_alice"
assert item.user_id == "u_alice"
assert item.score is None
# JSON columns are decoded back to live Python on the way out.
assert item.profile_data["summary"] == "Alice likes long hikes."
assert item.profile_data["explicit_info"] == [{"fact": "lives in tokyo"}]
assert item.profile_data["implicit_traits"] == [{"trait": "introverted"}]
assert item.profile_data["profile_timestamp_ms"] == 1_700_000_001_000
async def test_fetch_returns_empty_when_row_missing() -> None:
items = await ProfileRecaller().fetch("u_cold_start")
assert items == []
async def test_fetch_returns_empty_for_blank_owner() -> None:
"""Blank ``owner_id`` short-circuits — never hit LanceDB with an
empty-string PK (which would otherwise return any row whose id was
persisted as the empty string)."""
items = await ProfileRecaller().fetch("")
assert items == []
async def test_fetch_isolates_by_owner() -> None:
await user_profile_repo.upsert(
[
_profile_row(owner_id="u_alice", summary="Alice"),
_profile_row(owner_id="u_bob", summary="Bob"),
]
)
bob_items = await ProfileRecaller().fetch("u_bob")
assert len(bob_items) == 1
assert bob_items[0].profile_data["summary"] == "Bob"
async def test_fetch_tolerates_malformed_json_columns() -> None:
"""A column with corrupted JSON should not blow up the recall path —
the bucket falls back to ``[]`` and the rest of the DTO survives."""
await user_profile_repo.upsert(
[
UserProfile(
id="u_broken",
owner_id="u_broken",
owner_type="user",
summary="ok",
explicit_info_json="{not valid json",
implicit_traits_json="[]",
profile_timestamp_ms=0,
md_path="users/u_broken/user.md",
content_sha256="y" * 64,
)
]
)
items = await ProfileRecaller().fetch("u_broken")
assert len(items) == 1
assert items[0].profile_data["explicit_info"] == []
assert items[0].profile_data["implicit_traits"] == []
assert items[0].profile_data["summary"] == "ok"

View File

@ -0,0 +1,214 @@
"""Unit tests for ``memory.search.shaper``.
Tests are pure: no LanceDB, no everalgo, just dataclass-in / DTO-out.
"""
from __future__ import annotations
import datetime as _dt
from everalgo.types import Candidate, ScoredItem
from everos.memory.search.shaper import (
reshape_hybrid_output,
shape_agent_case_from_candidate,
shape_agent_skill_from_candidate,
shape_atomic_fact_from_candidate,
shape_episode_from_candidate,
)
# ── Fixtures ────────────────────────────────────────────────────────────
def _ts(year: int = 2026) -> _dt.datetime:
return _dt.datetime(year, 1, 1, tzinfo=_dt.UTC)
def _episode_candidate(*, id: str = "alice_ep_1", score: float = 0.9) -> Candidate:
return Candidate(
id=id,
score=score,
source="vector",
metadata={
"owner_id": "alice",
"owner_type": "user",
"session_id": "sess_a",
"timestamp": _ts(),
"sender_ids": ["alice", "assistant_1"],
"subject": "Coffee chat",
"summary": "Discussed coffee preferences.",
"episode": "Alice said she prefers oat milk.",
},
)
def _agent_case_candidate() -> Candidate:
return Candidate(
id="agent_a_case_1",
score=0.8,
source="keyword",
metadata={
"owner_id": "agent_a",
"owner_type": "agent",
"session_id": "sess_a",
"timestamp": _ts(),
"task_intent": "Draft a follow-up email",
"approach": "1. summarise...",
"quality_score": 0.92,
"key_insight": "User prefers brief tone",
},
)
def _agent_skill_candidate() -> Candidate:
return Candidate(
id="agent_a_skill_1",
score=0.7,
source="keyword",
metadata={
"owner_id": "agent_a",
"owner_type": "agent",
"name": "contract_redline",
"description": "Spot risky clauses",
"content": "Step 1: ...",
"confidence": 0.9,
"maturity_score": 0.5,
"source_case_ids": ["agent_a_case_1"],
},
)
# ── Episode shaping ─────────────────────────────────────────────────────
def test_shape_episode_basic() -> None:
item = shape_episode_from_candidate(_episode_candidate())
assert item is not None
assert item.id == "alice_ep_1"
assert item.user_id == "alice"
assert item.type == "Conversation"
assert item.score == 0.9
assert item.atomic_facts == []
assert item.sender_ids == ["alice", "assistant_1"]
def test_shape_episode_drops_when_owner_type_wrong() -> None:
cand = _episode_candidate()
cand.metadata["owner_type"] = "agent"
assert shape_episode_from_candidate(cand) is None
def test_shape_episode_drops_when_timestamp_missing() -> None:
cand = _episode_candidate()
del cand.metadata["timestamp"]
assert shape_episode_from_candidate(cand) is None
def test_shape_episode_attaches_facts() -> None:
facts = [
shape_atomic_fact_from_candidate(
Candidate(
id="f1",
score=0.5,
source="other",
metadata={"fact": "Alice prefers oat milk"},
)
)
]
item = shape_episode_from_candidate(_episode_candidate(), atomic_facts=facts)
assert item is not None
assert len(item.atomic_facts) == 1
assert item.atomic_facts[0].content == "Alice prefers oat milk"
# ── Agent case / skill shaping ──────────────────────────────────────────
def test_shape_agent_case_basic() -> None:
item = shape_agent_case_from_candidate(_agent_case_candidate())
assert item is not None
assert item.agent_id == "agent_a"
assert item.task_intent == "Draft a follow-up email"
assert item.quality_score == 0.92
assert item.key_insight == "User prefers brief tone"
def test_shape_agent_case_drops_when_owner_type_wrong() -> None:
cand = _agent_case_candidate()
cand.metadata["owner_type"] = "user"
assert shape_agent_case_from_candidate(cand) is None
def test_shape_agent_skill_basic() -> None:
item = shape_agent_skill_from_candidate(_agent_skill_candidate())
assert item is not None
assert item.name == "contract_redline"
assert item.maturity_score == 0.5
assert item.source_case_ids == ["agent_a_case_1"]
# ── Hybrid reshape ──────────────────────────────────────────────────────
def _scored_episode(eid: str, score: float) -> ScoredItem:
return ScoredItem(
id=eid,
score=score,
item_type="episode",
metadata={
"owner_id": "alice",
"owner_type": "user",
"session_id": "s1",
"timestamp": _ts(),
"sender_ids": ["alice"],
"subject": "subj",
"summary": "summ",
"episode": "body",
},
)
def _scored_fact(fid: str, parent: str, score: float) -> ScoredItem:
return ScoredItem(
id=fid,
score=score,
item_type="atomic_fact",
parent_episode_id=parent,
metadata={"fact": f"fact text {fid}"},
)
def test_reshape_hybrid_nests_facts_under_kept_episode() -> None:
scored = [
_scored_episode("ep_1", 0.9),
_scored_fact("f_1", "ep_1", 0.95),
_scored_fact("f_2", "ep_1", 0.85),
]
out = reshape_hybrid_output(scored, episode_pool={})
assert len(out) == 1
assert out[0].id == "ep_1"
# Facts sorted descending by score.
assert [f.id for f in out[0].atomic_facts] == ["f_1", "f_2"]
def test_reshape_hybrid_backfills_evicted_episode_from_pool() -> None:
# Episode ep_2 was evicted (only facts present),
# but it is in episode_pool — should be restored as a result.
scored = [
_scored_episode("ep_1", 0.7),
_scored_fact("f_a", "ep_2", 0.95),
]
pool_episode = _episode_candidate(id="ep_2", score=0.0)
out = reshape_hybrid_output(scored, episode_pool={"ep_2": pool_episode})
assert len(out) == 2
# Output sorted by score descending — ep_2 takes fact's max score (0.95).
assert out[0].id == "ep_2"
assert out[0].score == 0.95
assert len(out[0].atomic_facts) == 1
assert out[1].id == "ep_1"
def test_reshape_hybrid_drops_orphan_facts_with_no_pool_parent() -> None:
scored = [_scored_fact("f_x", "ep_missing", 0.5)]
out = reshape_hybrid_output(scored, episode_pool={})
assert out == []

View File

@ -0,0 +1,154 @@
"""Unit tests for ``memory.search.skill_hybrid``.
skill_hybrid is the **cross-encoder lane** for skill HYBRID retrieval.
The LLM-rerank lane lives in ``SearchManager._search_agent_skills`` and
goes through ``everalgo.rank.skill.arank`` directly — covered by
``test_manager`` tests instead.
Covered surfaces:
- ``search_agent_skills_hybrid`` (public function, MagicMock stubs)
- ``_fuse``, ``_cross_encoder_rerank``, ``_shape_results``
(via integration through the public function)
All I/O (reranker) is injected via MagicMock / stub objects. No LanceDB
or network calls are made.
"""
from __future__ import annotations
import datetime as _dt
from unittest.mock import AsyncMock, MagicMock
from everalgo.types import Candidate
from everos.memory.search.callbacks import _SKILL_RERANK_INSTRUCTION
from everos.memory.search.dto import SearchAgentSkillItem
from everos.memory.search.skill_hybrid import search_agent_skills_hybrid
# ── Helpers ───────────────────────────────────────────────────────────────
def _ts() -> _dt.datetime:
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
def _skill_candidate(
sid: str,
score: float = 0.8,
name: str | None = None,
) -> Candidate:
label = name or f"skill_{sid}"
return Candidate(
id=sid,
score=score,
source="vector",
metadata={
"owner_id": "agent_a",
"owner_type": "agent",
"name": label,
"description": f"desc {sid}",
"content": f"content {sid}",
"confidence": 0.9,
"maturity_score": 0.6,
"source_case_ids": [],
},
)
def _make_reranker(candidates: list[Candidate]) -> MagicMock:
"""Stub reranker that returns identity-reranked results in the same order."""
class _FakeResult:
def __init__(self, index: int, score: float) -> None:
self.index = index
self.score = score
reranker = MagicMock()
# provider.rerank returns a list of result objects with index + score
reranker.rerank = AsyncMock(
return_value=[_FakeResult(i, c.score) for i, c in enumerate(candidates)]
)
return reranker
# ── Tests ─────────────────────────────────────────────────────────────────
class TestSearchAgentSkillsHybridRerank:
"""Cross-encoder rerank path."""
async def test_returns_shaped_items_up_to_top_k(self) -> None:
"""rrf + rerank produces at most top_k SearchAgentSkillItem objects."""
c1 = _skill_candidate("s1", score=0.9)
c2 = _skill_candidate("s2", score=0.8)
c3 = _skill_candidate("s3", score=0.7)
reranker = _make_reranker([c1, c2, c3])
result = await search_agent_skills_hybrid(
"what skill handles auth?",
sparse=[c1, c2, c3],
dense=[c1, c2, c3],
reranker=reranker,
top_k=2,
)
assert len(result) == 2
assert all(isinstance(item, SearchAgentSkillItem) for item in result)
assert result[0].id == "s1"
assert result[1].id == "s2"
async def test_reranker_receives_skill_instruction_and_shaped_passages(
self,
) -> None:
"""Reranker must see the skill-specific instruction and
``"Agent Skill: {name} - {description}"`` passage shape — matches
the everosos-opensource contract for skill rerank.
"""
c1 = _skill_candidate("s1", name="auth_middleware_refactor")
c2 = _skill_candidate("s2", name="provider_lookup_split")
reranker = _make_reranker([c1, c2])
await search_agent_skills_hybrid(
"how to split auth?",
sparse=[c1],
dense=[c1, c2],
reranker=reranker,
top_k=10,
)
reranker.rerank.assert_awaited_once()
call = reranker.rerank.await_args
assert call is not None
positional = call.args
kw = call.kwargs
# Signature: rerank(query, passages, *, instruction=...)
assert positional[0] == "how to split auth?"
passages = positional[1]
assert passages == [
"Agent Skill: auth_middleware_refactor - desc s1",
"Agent Skill: provider_lookup_split - desc s2",
]
assert kw["instruction"] == _SKILL_RERANK_INSTRUCTION
class TestSearchAgentSkillsHybridEmpty:
"""Empty input / degenerate cases."""
async def test_empty_sparse_and_dense_returns_empty_list(self) -> None:
"""No candidates → no items, no errors."""
reranker = MagicMock()
reranker.rerank = AsyncMock(return_value=[])
result = await search_agent_skills_hybrid(
"query",
sparse=[],
dense=[],
reranker=reranker,
top_k=10,
)
assert result == []
# reranker.rerank must not be called when fused list is empty
reranker.rerank.assert_not_called()