md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
931 lines
34 KiB
Python
931 lines
34 KiB
Python
"""Unit tests for ``SearchManager`` with in-memory stub recallers.
|
|
|
|
These tests exercise the orchestration without touching LanceDB. Every
|
|
recaller is replaced by a hand-rolled stub that returns a small
|
|
candidate list; the manager's job is to:
|
|
|
|
* honour the ``owner_type`` hard partition,
|
|
* run KEYWORD as sparse-only and leave ``atomic_facts`` empty,
|
|
* run VECTOR as dense-only (and refuse when no embedding is wired),
|
|
* let HYBRID run without an LLM by default; require LLM only when the
|
|
caller sets ``enable_llm_rerank=True``,
|
|
* refuse AGENTIC when reranker / LLM prerequisites are missing,
|
|
* delegate AGENTIC to ``search_episodes_agentic`` and return its result.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import datetime as _dt
|
|
from collections.abc import Mapping, Sequence
|
|
from typing import Any, ClassVar
|
|
|
|
import pytest
|
|
from everalgo.types import Candidate, FactCandidate
|
|
|
|
from everos.memory.search.dto import SearchMethod, SearchRequest
|
|
from everos.memory.search.manager import SearchManager
|
|
|
|
# ── Stubs ───────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _ts() -> _dt.datetime:
|
|
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
|
|
|
|
|
|
def _episode_row(
|
|
eid: str, score: float = 0.8, memcell_id: str | None = None
|
|
) -> Candidate:
|
|
return Candidate(
|
|
id=eid,
|
|
score=score,
|
|
source="keyword",
|
|
metadata={
|
|
"owner_id": "alice",
|
|
"owner_type": "user",
|
|
"session_id": "sess_a",
|
|
"timestamp": _ts(),
|
|
"sender_ids": ["alice"],
|
|
"subject": f"subj {eid}",
|
|
"summary": f"summary {eid}",
|
|
"episode": f"body {eid}",
|
|
"parent_id": memcell_id if memcell_id is not None else f"mc_{eid}",
|
|
},
|
|
)
|
|
|
|
|
|
def _case_row(cid: str) -> Candidate:
|
|
return Candidate(
|
|
id=cid,
|
|
score=0.7,
|
|
source="keyword",
|
|
metadata={
|
|
"owner_id": "agent_a",
|
|
"owner_type": "agent",
|
|
"session_id": "sess_b",
|
|
"timestamp": _ts(),
|
|
"task_intent": f"intent {cid}",
|
|
"approach": f"approach {cid}",
|
|
"quality_score": 0.8,
|
|
},
|
|
)
|
|
|
|
|
|
def _skill_row(sid: str) -> Candidate:
|
|
return Candidate(
|
|
id=sid,
|
|
score=0.65,
|
|
source="keyword",
|
|
metadata={
|
|
"owner_id": "agent_a",
|
|
"owner_type": "agent",
|
|
"name": f"skill_{sid}",
|
|
"description": f"desc {sid}",
|
|
"content": f"content {sid}",
|
|
"confidence": 0.9,
|
|
"maturity_score": 0.6,
|
|
"source_case_ids": [],
|
|
},
|
|
)
|
|
|
|
|
|
class _StubEpisodeRecaller:
|
|
kind: ClassVar[str] = "episode"
|
|
everalgo_memory_type: ClassVar[str] = "episodic"
|
|
text_field: ClassVar[str] = "episode"
|
|
|
|
def __init__(self, sparse: list[Candidate], dense: list[Candidate]) -> None:
|
|
self._sparse = sparse
|
|
self._dense = dense
|
|
self.last_where: str | None = None
|
|
|
|
async def sparse_recall(
|
|
self, query: str, where: str, *, limit: int
|
|
) -> list[Candidate]:
|
|
self.last_where = where
|
|
return list(self._sparse[:limit])
|
|
|
|
async def dense_recall(
|
|
self, vector: Sequence[float], where: str, *, limit: int
|
|
) -> list[Candidate]:
|
|
self.last_where = where
|
|
return list(self._dense[:limit])
|
|
|
|
async def fetch_by_parent_ids(
|
|
self, parent_ids: Sequence[str], where: str
|
|
) -> list[Candidate]:
|
|
# Index dense rows by their parent_id (memcell id) so the maxsim
|
|
# path's reverse-resolve has something to return.
|
|
by_parent = {str(c.metadata.get("parent_id", "")): c for c in self._dense}
|
|
return [by_parent[p] for p in parent_ids if p in by_parent]
|
|
|
|
|
|
class _StubAtomicFactRecaller:
|
|
kind: ClassVar[str] = "atomic_fact"
|
|
everalgo_memory_type: ClassVar[str] = "episodic"
|
|
text_field: ClassVar[str] = "fact"
|
|
|
|
def __init__(
|
|
self,
|
|
facts_map: dict[str, list[FactCandidate]] | None = None,
|
|
dense: list[Candidate] | None = None,
|
|
) -> None:
|
|
self._facts_map = facts_map or {}
|
|
self._dense = dense or []
|
|
|
|
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
|
return []
|
|
|
|
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
|
return list(self._dense)
|
|
|
|
async def facts_for_episodes(
|
|
self,
|
|
ep_to_memcell: Mapping[str, str],
|
|
where: str,
|
|
*,
|
|
per_episode: int,
|
|
query_vector: Any = None,
|
|
) -> dict[str, list[FactCandidate]]:
|
|
# ``query_vector`` accepted to match the real recaller signature
|
|
# Accepted to match the real recaller signature; stub doesn't use it.
|
|
return {
|
|
eid: self._facts_map.get(eid, [])[:per_episode] for eid in ep_to_memcell
|
|
}
|
|
|
|
|
|
class _StubAgentCaseRecaller:
|
|
kind: ClassVar[str] = "agent_case"
|
|
everalgo_memory_type: ClassVar[str] = "case"
|
|
text_field: ClassVar[str] = "task_intent"
|
|
|
|
def __init__(self, sparse: list[Candidate], dense: list[Candidate]) -> None:
|
|
self._sparse = sparse
|
|
self._dense = dense
|
|
|
|
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
|
return list(self._sparse)
|
|
|
|
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
|
return list(self._dense)
|
|
|
|
|
|
class _StubAgentSkillRecaller:
|
|
kind: ClassVar[str] = "agent_skill"
|
|
everalgo_memory_type: ClassVar[str] = "skill"
|
|
text_field: ClassVar[str] = "description"
|
|
|
|
def __init__(
|
|
self,
|
|
sparse: list[Candidate],
|
|
dense: list[Candidate],
|
|
by_case: list[Candidate] | None = None,
|
|
) -> None:
|
|
self._sparse = sparse
|
|
self._dense = dense
|
|
# Bridge recall fixture: reverse-resolved skills (``fetch_by_case_ids``).
|
|
# Default empty — only the bridge tests populate this.
|
|
self._by_case = by_case or []
|
|
|
|
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
|
return list(self._sparse)
|
|
|
|
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
|
return list(self._dense)
|
|
|
|
async def fetch_by_case_ids(
|
|
self, case_ids: Sequence[str], where: str, *, limit: int
|
|
) -> list[Candidate]:
|
|
return list(self._by_case)
|
|
|
|
|
|
class _StubProfileRecaller:
|
|
async def fetch(self, owner_id: str) -> list:
|
|
return []
|
|
|
|
|
|
class _StubEmbedding:
|
|
def __init__(self, dim: int = 4) -> None:
|
|
self.dim = dim
|
|
|
|
async def embed(self, text: str) -> list[float]:
|
|
return [0.0] * self.dim
|
|
|
|
async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
|
|
return [[0.0] * self.dim for _ in texts]
|
|
|
|
|
|
# ── Fixtures ────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _build_manager(
|
|
*,
|
|
episode_sparse: list[Candidate] | None = None,
|
|
episode_dense: list[Candidate] | None = None,
|
|
case_sparse: list[Candidate] | None = None,
|
|
case_dense: list[Candidate] | None = None,
|
|
skill_sparse: list[Candidate] | None = None,
|
|
skill_dense: list[Candidate] | None = None,
|
|
skill_by_case: list[Candidate] | None = None,
|
|
facts_map: dict[str, list[FactCandidate]] | None = None,
|
|
atomic_fact_dense: list[Candidate] | None = None,
|
|
embedding: _StubEmbedding | None = None,
|
|
reranker: Any = None,
|
|
llm_client: Any = None,
|
|
) -> SearchManager:
|
|
ep_recaller = _StubEpisodeRecaller(episode_sparse or [], episode_dense or [])
|
|
return SearchManager(
|
|
episode_recaller=ep_recaller,
|
|
atomic_fact_recaller=_StubAtomicFactRecaller(facts_map, atomic_fact_dense),
|
|
agent_case_recaller=_StubAgentCaseRecaller(case_sparse or [], case_dense or []),
|
|
agent_skill_recaller=_StubAgentSkillRecaller(
|
|
skill_sparse or [], skill_dense or [], skill_by_case
|
|
),
|
|
profile_recaller=_StubProfileRecaller(),
|
|
embedding=embedding,
|
|
reranker=reranker,
|
|
llm_client=llm_client,
|
|
)
|
|
|
|
|
|
def _user_req(
|
|
method: SearchMethod = SearchMethod.KEYWORD, **kwargs: Any
|
|
) -> SearchRequest:
|
|
return SearchRequest(user_id="alice", query="hi", method=method, **kwargs)
|
|
|
|
|
|
def _agent_req(
|
|
method: SearchMethod = SearchMethod.KEYWORD, **kwargs: Any
|
|
) -> SearchRequest:
|
|
return SearchRequest(agent_id="agent_a", query="hi", method=method, **kwargs)
|
|
|
|
|
|
# ── KEYWORD: user owner ────────────────────────────────────────────────
|
|
|
|
|
|
async def test_user_keyword_returns_episodes_only() -> None:
|
|
mgr = _build_manager(episode_sparse=[_episode_row("ep_1")])
|
|
resp = await mgr.search(_user_req())
|
|
assert len(resp.request_id) == 32 and all(
|
|
c in "0123456789abcdef" for c in resp.request_id
|
|
)
|
|
assert len(resp.data.episodes) == 1
|
|
assert resp.data.episodes[0].id == "ep_1"
|
|
assert resp.data.episodes[0].user_id == "alice"
|
|
assert resp.data.episodes[0].type == "Conversation"
|
|
# Agent paths stay empty.
|
|
assert resp.data.agent_cases == []
|
|
assert resp.data.agent_skills == []
|
|
assert resp.data.profiles == []
|
|
|
|
|
|
async def test_user_keyword_leaves_atomic_facts_empty() -> None:
|
|
"""KEYWORD never back-fills facts — only HYBRID produces relevance-scored facts.
|
|
|
|
Even if the facts repository would return rows for the matched
|
|
episode, the keyword path must leave ``atomic_facts=[]``: there is
|
|
no per-query score for those facts, so emitting them would muddy
|
|
the contract (mirrors enterprise where event_log is a separate
|
|
memory_type, not auto-attached to episodic results).
|
|
"""
|
|
fact = FactCandidate(
|
|
id="f1",
|
|
parent_episode_id="ep_1",
|
|
score=0.0,
|
|
metadata={"fact": "Alice prefers oat milk"},
|
|
)
|
|
mgr = _build_manager(
|
|
episode_sparse=[_episode_row("ep_1")],
|
|
facts_map={"ep_1": [fact]},
|
|
)
|
|
resp = await mgr.search(_user_req())
|
|
ep = resp.data.episodes[0]
|
|
assert ep.atomic_facts == []
|
|
|
|
|
|
async def test_user_keyword_no_results() -> None:
|
|
resp = await _build_manager().search(_user_req())
|
|
assert resp.data.episodes == []
|
|
|
|
|
|
async def test_user_keyword_filters_compile_pinned_owner() -> None:
|
|
"""``compile_filters`` should pin owner_id / owner_type on the where."""
|
|
recaller = _StubEpisodeRecaller([_episode_row("ep_1")], [])
|
|
mgr = SearchManager(
|
|
episode_recaller=recaller,
|
|
atomic_fact_recaller=_StubAtomicFactRecaller(),
|
|
agent_case_recaller=_StubAgentCaseRecaller([], []),
|
|
agent_skill_recaller=_StubAgentSkillRecaller([], []),
|
|
profile_recaller=_StubProfileRecaller(),
|
|
embedding=None,
|
|
reranker=None,
|
|
llm_client=None,
|
|
)
|
|
await mgr.search(_user_req())
|
|
assert recaller.last_where is not None
|
|
assert "owner_id = 'alice'" in recaller.last_where
|
|
assert "owner_type = 'user'" in recaller.last_where
|
|
|
|
|
|
# ── VECTOR: requires embedding ────────────────────────────────────────
|
|
|
|
|
|
async def test_vector_method_requires_embedding() -> None:
|
|
mgr = _build_manager() # embedding=None by default
|
|
with pytest.raises(RuntimeError, match="embedding"):
|
|
await mgr.search(_user_req(method=SearchMethod.VECTOR))
|
|
|
|
|
|
async def test_vector_method_runs_dense_only_with_embedding() -> None:
|
|
mgr = _build_manager(
|
|
episode_sparse=[_episode_row("should_not_appear")],
|
|
episode_dense=[_episode_row("ep_dense")],
|
|
embedding=_StubEmbedding(),
|
|
)
|
|
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR))
|
|
assert [e.id for e in resp.data.episodes] == ["ep_dense"]
|
|
|
|
|
|
async def test_vector_radius_filter_drops_below_threshold() -> None:
|
|
mgr = _build_manager(
|
|
episode_dense=[
|
|
_episode_row("ep_low", score=0.3),
|
|
_episode_row("ep_high", score=0.9),
|
|
],
|
|
embedding=_StubEmbedding(),
|
|
)
|
|
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, radius=0.5))
|
|
assert [e.id for e in resp.data.episodes] == ["ep_high"]
|
|
|
|
|
|
async def test_unlimited_mode_applies_default_radius_for_vector() -> None:
|
|
"""``top_k=-1`` without an explicit radius gets the project default 0.5.
|
|
|
|
Mirrors enterprise's auto-floor behaviour — unlimited mode must not
|
|
return arbitrarily low-similarity tail.
|
|
"""
|
|
mgr = _build_manager(
|
|
episode_dense=[
|
|
_episode_row("ep_low", score=0.3), # below default 0.5 → dropped
|
|
_episode_row("ep_mid", score=0.55), # above default → kept
|
|
_episode_row("ep_high", score=0.9),
|
|
],
|
|
embedding=_StubEmbedding(),
|
|
)
|
|
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=-1))
|
|
assert [e.id for e in resp.data.episodes] == ["ep_mid", "ep_high"]
|
|
|
|
|
|
async def test_unlimited_mode_explicit_radius_overrides_default() -> None:
|
|
"""Caller-supplied radius (even ``0.0``) wins over the unlimited default."""
|
|
mgr = _build_manager(
|
|
episode_dense=[
|
|
_episode_row("ep_low", score=0.2),
|
|
_episode_row("ep_high", score=0.9),
|
|
],
|
|
embedding=_StubEmbedding(),
|
|
)
|
|
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=-1, radius=0.1))
|
|
# 0.1 threshold keeps both rows (the default 0.5 would have dropped ep_low).
|
|
assert {e.id for e in resp.data.episodes} == {"ep_low", "ep_high"}
|
|
|
|
|
|
async def test_normal_mode_keeps_full_pool_when_no_radius() -> None:
|
|
"""``top_k > 0`` without a radius applies no threshold — truncation handles tail."""
|
|
mgr = _build_manager(
|
|
episode_dense=[
|
|
_episode_row("ep_low", score=0.2),
|
|
_episode_row("ep_high", score=0.9),
|
|
],
|
|
embedding=_StubEmbedding(),
|
|
)
|
|
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=10))
|
|
# No radius default in normal mode → both kept.
|
|
assert {e.id for e in resp.data.episodes} == {"ep_low", "ep_high"}
|
|
|
|
|
|
# ── VECTOR + maxsim_atomic strategy ─────────────────────────────────────
|
|
|
|
|
|
def _atomic_fact_row(fid: str, *, parent_id: str, score: float) -> Candidate:
|
|
"""Atomic-fact candidate emitted by ``AtomicFactRecaller.dense_recall``."""
|
|
return Candidate(
|
|
id=fid,
|
|
score=score,
|
|
source="vector",
|
|
metadata={
|
|
"owner_id": "alice",
|
|
"owner_type": "user",
|
|
"session_id": "sess_a",
|
|
"timestamp": _ts(),
|
|
"sender_ids": ["alice"],
|
|
"parent_id": parent_id,
|
|
"fact": f"fact {fid}",
|
|
},
|
|
)
|
|
|
|
|
|
async def test_vector_maxsim_atomic_max_pools_facts_to_episodes(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
"""``vector_strategy=maxsim_atomic`` should ANN atomic_facts → max-pool by
|
|
memcell parent → reverse-resolve to episode, ordering episodes by the
|
|
per-memcell maximum fact score."""
|
|
from everos.config.settings import load_settings
|
|
|
|
monkeypatch.setenv("EVEROS_SEARCH__VECTOR_STRATEGY", "maxsim_atomic")
|
|
load_settings.cache_clear()
|
|
# Two episodes; each has two atomic facts under it. The max fact score
|
|
# per memcell is what should end up as the episode's score.
|
|
mgr = _build_manager(
|
|
episode_dense=[
|
|
_episode_row("ep_A", memcell_id="mc_A"),
|
|
_episode_row("ep_B", memcell_id="mc_B"),
|
|
],
|
|
atomic_fact_dense=[
|
|
_atomic_fact_row("f_A1", parent_id="mc_A", score=0.95),
|
|
_atomic_fact_row("f_A2", parent_id="mc_A", score=0.40),
|
|
_atomic_fact_row("f_B1", parent_id="mc_B", score=0.75),
|
|
],
|
|
embedding=_StubEmbedding(),
|
|
)
|
|
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=5))
|
|
eps = resp.data.episodes
|
|
# Both episodes returned, ordered by max-pool score desc.
|
|
assert [e.id for e in eps] == ["ep_A", "ep_B"]
|
|
assert eps[0].score == pytest.approx(0.95) # max(0.95, 0.40)
|
|
assert eps[1].score == pytest.approx(0.75)
|
|
|
|
|
|
async def test_vector_maxsim_atomic_returns_empty_when_no_facts(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
"""No fact recall → no memcells to score → empty episode list."""
|
|
from everos.config.settings import load_settings
|
|
|
|
monkeypatch.setenv("EVEROS_SEARCH__VECTOR_STRATEGY", "maxsim_atomic")
|
|
load_settings.cache_clear()
|
|
mgr = _build_manager(
|
|
episode_dense=[_episode_row("ep_A", memcell_id="mc_A")],
|
|
atomic_fact_dense=[],
|
|
embedding=_StubEmbedding(),
|
|
)
|
|
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=5))
|
|
assert resp.data.episodes == []
|
|
|
|
|
|
# ── HYBRID / AGENTIC: prerequisite errors ──────────────────────────────
|
|
|
|
|
|
async def test_hybrid_requires_embedding() -> None:
|
|
mgr = _build_manager()
|
|
with pytest.raises(RuntimeError, match="embedding"):
|
|
await mgr.search(_user_req(method=SearchMethod.HYBRID))
|
|
|
|
|
|
async def test_hybrid_does_not_require_llm_by_default() -> None:
|
|
"""HYBRID no longer auto-pulls LLM. With enable_llm_rerank=False the
|
|
fusion-only path (RRF / LR) should run without an LLM client."""
|
|
mgr = _build_manager(embedding=_StubEmbedding())
|
|
# Should not raise: no LLM needed when caller opts out of Phase-5 rerank.
|
|
resp = await mgr.search(_user_req(method=SearchMethod.HYBRID))
|
|
assert resp.data.episodes == [] # empty stub recallers → empty result
|
|
|
|
|
|
async def test_hybrid_requires_llm_when_enable_llm_rerank_true() -> None:
|
|
"""Setting ``enable_llm_rerank=True`` makes the LLM mandatory."""
|
|
mgr = _build_manager(embedding=_StubEmbedding())
|
|
with pytest.raises(RuntimeError, match="enable_llm_rerank"):
|
|
await mgr.search(_user_req(method=SearchMethod.HYBRID, enable_llm_rerank=True))
|
|
|
|
|
|
async def test_user_hybrid_episode_fuses_and_evicts_facts() -> None:
|
|
"""HYBRID episode path: hierarchy pipeline (RRF -> MaxSim -> merge -> eviction).
|
|
|
|
ep_1 has a fact scoring higher than the RRF score -> fact evicts episode.
|
|
ep_2 has no facts -> episode emitted as-is.
|
|
"""
|
|
ep1 = _episode_row("ep_1", score=0.8, memcell_id="mc_1")
|
|
ep2 = _episode_row("ep_2", score=0.7, memcell_id="mc_2")
|
|
fact1 = FactCandidate(
|
|
id="f1",
|
|
parent_episode_id="ep_1",
|
|
score=0.95,
|
|
metadata={"fact": "Alice prefers oat milk"},
|
|
)
|
|
mgr = _build_manager(
|
|
episode_sparse=[ep1, ep2],
|
|
episode_dense=[ep1, ep2],
|
|
facts_map={"ep_1": [fact1]},
|
|
embedding=_StubEmbedding(),
|
|
)
|
|
resp = await mgr.search(_user_req(method=SearchMethod.HYBRID, top_k=10))
|
|
eps = resp.data.episodes
|
|
assert len(eps) >= 1
|
|
ep1_result = next((e for e in eps if e.id == "ep_1"), None)
|
|
assert ep1_result is not None
|
|
assert len(ep1_result.atomic_facts) == 1
|
|
assert ep1_result.atomic_facts[0].id == "f1"
|
|
|
|
|
|
async def test_agentic_requires_reranker_and_llm() -> None:
|
|
mgr = _build_manager(embedding=_StubEmbedding())
|
|
with pytest.raises(RuntimeError, match="rerank provider"):
|
|
await mgr.search(_user_req(method=SearchMethod.AGENTIC))
|
|
|
|
|
|
async def test_agent_hybrid_requires_reranker_without_llm_rerank() -> None:
|
|
"""``owner_type='agent'`` + HYBRID + ``enable_llm_rerank=False`` reaches
|
|
the skill cross-encoder lane (``skill_hybrid``: rrf → cross-encoder),
|
|
so a missing rerank provider must fail-fast with a config hint rather
|
|
than crash deep inside the rerank callback.
|
|
"""
|
|
mgr = _build_manager(embedding=_StubEmbedding())
|
|
with pytest.raises(RuntimeError, match="rerank provider"):
|
|
await mgr.search(_agent_req(method=SearchMethod.HYBRID))
|
|
|
|
|
|
async def test_agent_hybrid_with_llm_rerank_does_not_need_reranker() -> None:
|
|
"""The LLM-rerank lane skips the cross-encoder and dispatches through
|
|
``arank`` instead, so a missing reranker is fine as long as the LLM
|
|
client is configured. Empty stub recallers → empty result; the call
|
|
must not raise on the reranker-absence path.
|
|
"""
|
|
mgr = _build_manager(embedding=_StubEmbedding(), llm_client=_StubLLM())
|
|
resp = await mgr.search(
|
|
_agent_req(method=SearchMethod.HYBRID, enable_llm_rerank=True)
|
|
)
|
|
assert resp.data.agent_skills == []
|
|
assert resp.data.agent_cases == []
|
|
|
|
|
|
class _StubReranker:
|
|
"""Minimal reranker stub — returns trivial scores."""
|
|
|
|
async def rerank(self, query: str, documents: Sequence[str]) -> list[Any]:
|
|
from everos.component.rerank.protocol import RerankResult
|
|
|
|
return [RerankResult(index=i, score=1.0) for i in range(len(documents))]
|
|
|
|
|
|
class _StubLLM:
|
|
"""Minimal LLM stub — satisfies protocol without making real calls."""
|
|
|
|
async def chat(self, *args: Any, **kwargs: Any) -> Any:
|
|
return ""
|
|
|
|
|
|
async def test_agentic_episode_delegates_to_search_episodes_agentic(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
"""AGENTIC method delegates to search_episodes_agentic and returns its result."""
|
|
import datetime as _dt
|
|
|
|
from everos.memory.search.dto import SearchEpisodeItem
|
|
|
|
fake_result = [
|
|
SearchEpisodeItem(
|
|
id="ep_1",
|
|
score=0.9,
|
|
session_id="s",
|
|
user_id="alice",
|
|
timestamp=_dt.datetime(2026, 1, 1, tzinfo=_dt.UTC),
|
|
sender_ids=["alice"],
|
|
subject="s",
|
|
summary="s",
|
|
episode="body",
|
|
type="Conversation",
|
|
atomic_facts=[],
|
|
)
|
|
]
|
|
|
|
async def _fake_agentic(*args: Any, **kwargs: Any) -> list[SearchEpisodeItem]:
|
|
return fake_result
|
|
|
|
monkeypatch.setattr(
|
|
"everos.memory.search.manager.search_episodes_agentic", _fake_agentic
|
|
)
|
|
|
|
mgr = _build_manager(
|
|
embedding=_StubEmbedding(),
|
|
reranker=_StubReranker(),
|
|
llm_client=_StubLLM(),
|
|
)
|
|
resp = await mgr.search(_user_req(method=SearchMethod.AGENTIC))
|
|
assert resp.data.episodes == fake_result
|
|
|
|
|
|
# ── AGENT owner hard partition ─────────────────────────────────────────
|
|
|
|
|
|
async def test_agent_keyword_returns_cases_and_skills_only() -> None:
|
|
mgr = _build_manager(
|
|
case_sparse=[_case_row("c_1")],
|
|
skill_sparse=[_skill_row("s_1")],
|
|
)
|
|
resp = await mgr.search(_agent_req())
|
|
assert resp.data.episodes == []
|
|
assert resp.data.profiles == []
|
|
assert [c.id for c in resp.data.agent_cases] == ["c_1"]
|
|
assert [s.id for s in resp.data.agent_skills] == ["s_1"]
|
|
|
|
|
|
async def test_agent_owner_ignores_include_profile() -> None:
|
|
"""Profile is user-only at this revision."""
|
|
mgr = _build_manager()
|
|
resp = await mgr.search(_agent_req(include_profile=True))
|
|
assert resp.data.profiles == []
|
|
|
|
|
|
# ── Top-k behaviour ───────────────────────────────────────────────────
|
|
|
|
|
|
async def test_top_k_truncates_results() -> None:
|
|
rows = [_episode_row(f"ep_{i}", score=1.0 - i * 0.01) for i in range(10)]
|
|
mgr = _build_manager(episode_sparse=rows)
|
|
resp = await mgr.search(_user_req(top_k=3))
|
|
assert [e.id for e in resp.data.episodes] == ["ep_0", "ep_1", "ep_2"]
|
|
|
|
|
|
async def test_top_k_minus_one_caps_at_100() -> None:
|
|
rows = [_episode_row(f"ep_{i}") for i in range(120)]
|
|
mgr = _build_manager(episode_sparse=rows)
|
|
resp = await mgr.search(_user_req(top_k=-1))
|
|
assert len(resp.data.episodes) == 100
|
|
|
|
|
|
# ── AGENTIC agent_case / agent_skill delegation ───────────────────────────
|
|
|
|
|
|
async def test_agentic_agent_cases_delegates_to_search_agent_cases_agentic(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
"""AGENTIC method for agent owner delegates to search_agent_cases_agentic."""
|
|
import datetime as _dt
|
|
|
|
from everos.memory.search.dto import SearchAgentCaseItem
|
|
|
|
fake_cases = [
|
|
SearchAgentCaseItem(
|
|
id="c_1",
|
|
agent_id="agent_a",
|
|
session_id="sess_b",
|
|
timestamp=_dt.datetime(2026, 1, 1, tzinfo=_dt.UTC),
|
|
task_intent="handle login",
|
|
approach="retry with backoff",
|
|
quality_score=0.9,
|
|
score=0.85,
|
|
)
|
|
]
|
|
|
|
async def _fake_cases_agentic(
|
|
*args: Any, **kwargs: Any
|
|
) -> list[SearchAgentCaseItem]:
|
|
return fake_cases
|
|
|
|
monkeypatch.setattr(
|
|
"everos.memory.search.manager.search_agent_cases_agentic",
|
|
_fake_cases_agentic,
|
|
)
|
|
|
|
mgr = _build_manager(
|
|
embedding=_StubEmbedding(),
|
|
reranker=_StubReranker(),
|
|
llm_client=_StubLLM(),
|
|
)
|
|
resp = await mgr.search(_agent_req(method=SearchMethod.AGENTIC))
|
|
assert resp.data.agent_cases == fake_cases
|
|
|
|
|
|
async def test_agentic_agent_skills_delegates_to_search_agent_skills_agentic(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
"""AGENTIC method for agent owner delegates to search_agent_skills_agentic."""
|
|
|
|
from everos.memory.search.dto import SearchAgentSkillItem
|
|
|
|
fake_skills = [
|
|
SearchAgentSkillItem(
|
|
id="s_1",
|
|
agent_id="agent_a",
|
|
name="auth_refresh",
|
|
description="Refreshes auth tokens",
|
|
content="Retry with new token",
|
|
confidence=0.9,
|
|
maturity_score=0.7,
|
|
source_case_ids=[],
|
|
score=0.8,
|
|
)
|
|
]
|
|
|
|
async def _fake_skills_agentic(
|
|
*args: Any, **kwargs: Any
|
|
) -> list[SearchAgentSkillItem]:
|
|
return fake_skills
|
|
|
|
monkeypatch.setattr(
|
|
"everos.memory.search.manager.search_agent_skills_agentic",
|
|
_fake_skills_agentic,
|
|
)
|
|
|
|
mgr = _build_manager(
|
|
embedding=_StubEmbedding(),
|
|
reranker=_StubReranker(),
|
|
llm_client=_StubLLM(),
|
|
)
|
|
resp = await mgr.search(_agent_req(method=SearchMethod.AGENTIC))
|
|
assert resp.data.agent_skills == fake_skills
|
|
|
|
|
|
# ── _merge_by_id_max / _case_bridged_skills helpers ──────────────────────
|
|
|
|
|
|
def test_merge_by_id_max_keeps_higher_score_on_collision() -> None:
|
|
"""Same-id collision → keep the higher score; non-colliding rows are
|
|
unioned. Used to fold bridge candidates into the direct dense pool.
|
|
"""
|
|
from everos.memory.search.manager import _merge_by_id_max
|
|
|
|
primary = [
|
|
Candidate(id="s1", score=0.5, source="vector", metadata={"src": "primary"}),
|
|
Candidate(id="s2", score=0.7, source="vector", metadata={"src": "primary"}),
|
|
]
|
|
extra = [
|
|
Candidate(id="s1", score=0.9, source="vector", metadata={"src": "bridge"}),
|
|
Candidate(id="s2", score=0.3, source="vector", metadata={"src": "bridge"}),
|
|
Candidate(id="s3", score=0.6, source="vector", metadata={"src": "bridge"}),
|
|
]
|
|
merged = {c.id: c for c in _merge_by_id_max(primary, extra)}
|
|
# s1 collision → bridge wins (0.9 > 0.5); s2 collision → primary wins
|
|
# (0.7 > 0.3); s3 fresh-from-bridge is added.
|
|
assert merged["s1"].score == 0.9
|
|
assert merged["s1"].metadata["src"] == "bridge"
|
|
assert merged["s2"].score == 0.7
|
|
assert merged["s2"].metadata["src"] == "primary"
|
|
assert merged["s3"].score == 0.6
|
|
|
|
|
|
async def test_case_bridged_skills_max_pools_score_across_source_cases() -> None:
|
|
"""Each bridged skill inherits the highest score among its matched
|
|
source cases (mirrors the ``maxsim_atomic`` fact→episode pooling).
|
|
Source cases not present in the bridge pool are ignored.
|
|
"""
|
|
skill_row = Candidate(
|
|
id="agent_a_skill_x",
|
|
score=0.0, # bridge ignores the recaller-side score
|
|
source="vector",
|
|
metadata={"source_case_ids": ["c1", "c2", "c3"], "name": "x"},
|
|
)
|
|
mgr = _build_manager(skill_by_case=[skill_row])
|
|
bridge_cases = [
|
|
Candidate(id="c1", score=0.4, source="vector", metadata={}),
|
|
Candidate(id="c2", score=0.9, source="vector", metadata={}), # max wins
|
|
Candidate(id="c_other", score=0.7, source="vector", metadata={}),
|
|
]
|
|
bridged = await mgr._case_bridged_skills(bridge_cases, where="", top_k=5)
|
|
assert len(bridged) == 1
|
|
assert bridged[0].id == "agent_a_skill_x"
|
|
# c1=0.4 and c2=0.9 are in the bridge pool; c3 is not → max-pool == 0.9.
|
|
assert bridged[0].score == pytest.approx(0.9)
|
|
# Metadata (incl. ``source_case_ids``) rides through so downstream
|
|
# shaping doesn't need a second fetch.
|
|
assert bridged[0].metadata["source_case_ids"] == ["c1", "c2", "c3"]
|
|
|
|
|
|
async def test_case_bridged_skills_returns_empty_for_none_or_empty_input() -> None:
|
|
"""No bridge cases ⇒ no bridge recall (skip the reverse fetch entirely).
|
|
This is the cross-encoder lane / KEYWORD / VECTOR contract.
|
|
"""
|
|
mgr = _build_manager(skill_by_case=[_skill_row("s1")]) # noise the stub
|
|
assert await mgr._case_bridged_skills(None, where="", top_k=5) == []
|
|
assert await mgr._case_bridged_skills([], where="", top_k=5) == []
|
|
|
|
|
|
# ── Agent HYBRID lane selection ──────────────────────────────────────────
|
|
|
|
|
|
async def test_agent_hybrid_no_llm_rerank_runs_cross_encoder_lane(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
"""``enable_llm_rerank=False`` for agent HYBRID must dispatch through
|
|
``search_agent_skills_hybrid`` (rrf → cross-encoder lane) with the
|
|
configured reranker, not through generic ``arank``.
|
|
"""
|
|
captured: dict[str, Any] = {}
|
|
|
|
async def _fake_hybrid(
|
|
query: str,
|
|
*,
|
|
sparse: list[Candidate],
|
|
dense: list[Candidate],
|
|
reranker: Any,
|
|
top_k: int,
|
|
) -> list:
|
|
captured.update(
|
|
query=query, sparse=sparse, dense=dense, reranker=reranker, top_k=top_k
|
|
)
|
|
return []
|
|
|
|
monkeypatch.setattr(
|
|
"everos.memory.search.manager.search_agent_skills_hybrid", _fake_hybrid
|
|
)
|
|
stub_reranker = _StubReranker()
|
|
mgr = _build_manager(embedding=_StubEmbedding(), reranker=stub_reranker)
|
|
await mgr.search(_agent_req(method=SearchMethod.HYBRID))
|
|
|
|
assert captured["query"] == "hi"
|
|
# Manager forwards its configured reranker to the cross-encoder lane.
|
|
assert captured["reranker"] is stub_reranker
|
|
# Agent kinds cap unlimited-mode top_k at _AGENT_TOP_K_CAP (10).
|
|
assert captured["top_k"] == 10
|
|
|
|
|
|
async def test_agent_hybrid_llm_rerank_dispatches_arank_for_case_then_skill(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
"""LLM rerank lane: ``_search_cases_and_skills`` runs serially —
|
|
``arank`` is called once with ``memory_type="case"`` and once with
|
|
``memory_type="skill"``, both with ``enable_rerank=True`` + the LLM
|
|
client. Order matters: the case call must precede the skill call so
|
|
its results can feed the bridge.
|
|
"""
|
|
from everalgo.types import RankOutput
|
|
|
|
calls: list[tuple[str, dict[str, Any]]] = []
|
|
|
|
async def _fake_arank(rank_input: Any, **kwargs: Any) -> RankOutput:
|
|
calls.append((rank_input.memory_type, kwargs))
|
|
return RankOutput(items=[], metadata={})
|
|
|
|
monkeypatch.setattr("everos.memory.search.manager.arank", _fake_arank)
|
|
mgr = _build_manager(embedding=_StubEmbedding(), llm_client=_StubLLM())
|
|
await mgr.search(_agent_req(method=SearchMethod.HYBRID, enable_llm_rerank=True))
|
|
|
|
# Two dispatches in the documented serial order.
|
|
assert [c[0] for c in calls] == ["case", "skill"]
|
|
# Both runs opt into rerank with the LLM client wired in.
|
|
for _mt, kw in calls:
|
|
assert kw["enable_rerank"] is True
|
|
assert kw["llm"] is mgr._llm
|
|
assert kw["rerank_top_k"] == 10 # _AGENT_TOP_K_CAP
|
|
|
|
|
|
async def test_agent_hybrid_llm_rerank_merges_bridged_skills_into_dense_pool(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
"""The bridge must surface into the skill dispatch: skills resolved
|
|
by ``fetch_by_case_ids`` are max-pooled into the dense candidates that
|
|
``arank`` sees on the second call, while the direct skill recall pool
|
|
is preserved.
|
|
"""
|
|
from everalgo.types import RankOutput, ScoredItem
|
|
|
|
case_result = ScoredItem(
|
|
id="agent_a_c1",
|
|
score=0.85,
|
|
item_type="case",
|
|
# Shaper requires owner_type="agent" + timestamp + intent/approach;
|
|
# otherwise the case is dropped and bridge_cases comes back empty.
|
|
metadata={
|
|
"owner_id": "agent_a",
|
|
"owner_type": "agent",
|
|
"session_id": "sess_b",
|
|
"timestamp": _ts(),
|
|
"task_intent": "intent c1",
|
|
"approach": "approach c1",
|
|
"quality_score": 0.8,
|
|
},
|
|
)
|
|
skill_direct = _skill_row("s_direct")
|
|
skill_bridged = Candidate(
|
|
id="s_bridged",
|
|
score=0.0,
|
|
source="vector",
|
|
metadata={"source_case_ids": ["agent_a_c1"], "name": "s_bridged"},
|
|
)
|
|
|
|
seen_skill_dense: dict[str, list[Candidate]] = {}
|
|
|
|
async def _fake_arank(rank_input: Any, **_: Any) -> RankOutput:
|
|
if rank_input.memory_type == "case":
|
|
return RankOutput(items=[case_result], metadata={})
|
|
# skill call — capture the merged dense pool the manager built.
|
|
seen_skill_dense["dense"] = list(rank_input.dense_candidates)
|
|
return RankOutput(items=[], metadata={})
|
|
|
|
monkeypatch.setattr("everos.memory.search.manager.arank", _fake_arank)
|
|
mgr = _build_manager(
|
|
embedding=_StubEmbedding(),
|
|
llm_client=_StubLLM(),
|
|
skill_sparse=[],
|
|
skill_dense=[skill_direct],
|
|
skill_by_case=[skill_bridged],
|
|
)
|
|
await mgr.search(_agent_req(method=SearchMethod.HYBRID, enable_llm_rerank=True))
|
|
|
|
dense_ids = {c.id for c in seen_skill_dense["dense"]}
|
|
# Direct dense recall is preserved AND the case-bridged skill is unioned.
|
|
assert dense_ids == {"s_direct", "s_bridged"}
|
|
# The bridged skill inherits the matched case's score (0.85 from c1).
|
|
by_id = {c.id: c for c in seen_skill_dense["dense"]}
|
|
assert by_id["s_bridged"].score == pytest.approx(0.85)
|