chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
0
tests/unit/test_memory/test_search/__init__.py
Normal file
0
tests/unit/test_memory/test_search/__init__.py
Normal file
27
tests/unit/test_memory/test_search/conftest.py
Normal file
27
tests/unit/test_memory/test_search/conftest.py
Normal file
@ -0,0 +1,27 @@
|
||||
"""Shared fixtures for ``memory.search`` unit tests.
|
||||
|
||||
The project default is ``EVEROS_SEARCH__VECTOR_STRATEGY=maxsim_atomic`` —
|
||||
that path queries both the ``atomic_fact`` table and the ``episode`` table
|
||||
to do MaxSim. The existing VECTOR-route tests in ``test_manager.py`` were
|
||||
written against the legacy single-vector ``episode`` path and stub only the
|
||||
episode recaller (atomic_fact recaller is a no-data stub).
|
||||
|
||||
Force the legacy ``episode`` strategy by default for these tests so they
|
||||
keep asserting against the dense-recall path they were designed to cover.
|
||||
MaxSim-specific tests opt back into ``maxsim_atomic`` by overriding the env
|
||||
var inside their own body.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.config.settings import load_settings
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _force_episode_vector_strategy(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setenv("EVEROS_SEARCH__VECTOR_STRATEGY", "episode")
|
||||
load_settings.cache_clear()
|
||||
yield
|
||||
load_settings.cache_clear()
|
||||
59
tests/unit/test_memory/test_search/test_adapter.py
Normal file
59
tests/unit/test_memory/test_search/test_adapter.py
Normal file
@ -0,0 +1,59 @@
|
||||
"""Unit tests for ``memory.search.adapter.resolve_pipeline``."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.memory.search.adapter import resolve_pipeline
|
||||
from everos.memory.search.dto import SearchMethod
|
||||
|
||||
|
||||
def test_keyword_skips_everalgo() -> None:
|
||||
fm, cfg = resolve_pipeline(SearchMethod.KEYWORD, "episode")
|
||||
assert fm is None
|
||||
assert cfg is None
|
||||
|
||||
|
||||
def test_vector_skips_everalgo() -> None:
|
||||
fm, cfg = resolve_pipeline(SearchMethod.VECTOR, "episode")
|
||||
assert fm is None
|
||||
assert cfg is None
|
||||
|
||||
|
||||
def test_hybrid_episode_picks_hierarchy() -> None:
|
||||
fm, cfg = resolve_pipeline(SearchMethod.HYBRID, "episode")
|
||||
assert fm == "hierarchy"
|
||||
assert cfg is None
|
||||
|
||||
|
||||
def test_hybrid_atomic_fact_picks_hierarchy() -> None:
|
||||
fm, _cfg = resolve_pipeline(SearchMethod.HYBRID, "atomic_fact")
|
||||
assert fm == "hierarchy"
|
||||
|
||||
|
||||
def test_hybrid_case_picks_vector_anchored() -> None:
|
||||
fm, cfg = resolve_pipeline(SearchMethod.HYBRID, "agent_case")
|
||||
assert fm == "vector_anchored"
|
||||
assert cfg is None
|
||||
|
||||
|
||||
def test_hybrid_skill_picks_skill_hybrid() -> None:
|
||||
fm, _cfg = resolve_pipeline(SearchMethod.HYBRID, "agent_skill")
|
||||
assert fm == "skill_hybrid"
|
||||
|
||||
|
||||
def test_agentic_method_raises_value_error() -> None:
|
||||
"""AGENTIC (a valid enum member) raises ValueError from resolve_pipeline.
|
||||
|
||||
Distinct from ``test_unsupported_method_raises`` which passes an arbitrary
|
||||
non-enum string. This test verifies the manager's contract: AGENTIC must be
|
||||
intercepted before resolve_pipeline is called, and resolve_pipeline defends
|
||||
against it with a ValueError even for the known enum member.
|
||||
"""
|
||||
with pytest.raises(ValueError, match="unsupported method"):
|
||||
resolve_pipeline(SearchMethod.AGENTIC, "episode")
|
||||
|
||||
|
||||
def test_unsupported_method_raises() -> None:
|
||||
with pytest.raises(ValueError, match="unsupported method"):
|
||||
resolve_pipeline("not-a-method", "episode") # type: ignore[arg-type]
|
||||
338
tests/unit/test_memory/test_search/test_agentic.py
Normal file
338
tests/unit/test_memory/test_search/test_agentic.py
Normal file
@ -0,0 +1,338 @@
|
||||
"""Unit tests for ``memory.search.agentic.search_episodes_agentic``.
|
||||
|
||||
White-box: patches ``aagentic_retrieve`` to assert benchmark hyperparameters
|
||||
are wired correctly, plus a shaping test to verify id remapping.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from collections.abc import Sequence
|
||||
from typing import Any, ClassVar
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from everalgo.clustering import Cluster
|
||||
from everalgo.rank.protocols import AgenticDecision
|
||||
from everalgo.testing.fake_llm import FakeLLMClient
|
||||
from everalgo.types import Candidate
|
||||
|
||||
from everos.component.utils.datetime import from_timestamp
|
||||
from everos.memory.search.agentic import (
|
||||
_restore_shaper_metadata,
|
||||
_to_everalgo_doc_metadata,
|
||||
search_episodes_agentic,
|
||||
)
|
||||
from everos.memory.search.dto import SearchEpisodeItem
|
||||
|
||||
# ── Stubs ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _ts() -> _dt.datetime:
|
||||
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
|
||||
|
||||
|
||||
def _mc_candidate(mc_id: str, ep_id: str, score: float = 0.8) -> Candidate:
|
||||
"""Candidate keyed by memcell_id (as returned by amaxsim/fetch_all_for_owner)."""
|
||||
return Candidate(
|
||||
id=mc_id,
|
||||
score=score,
|
||||
source="vector",
|
||||
metadata={
|
||||
"episode_id": ep_id,
|
||||
"owner_id": "alice",
|
||||
"owner_type": "user",
|
||||
"session_id": "sess_a",
|
||||
"timestamp": _ts(),
|
||||
"sender_ids": ["alice"],
|
||||
"subject": "Alice eats oat milk",
|
||||
"summary": "Alice food preferences",
|
||||
"episode": "Alice prefers oat milk in her coffee",
|
||||
"parent_id": mc_id,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class _StubEpisodeRecaller:
|
||||
kind: ClassVar[str] = "episode"
|
||||
everalgo_memory_type: ClassVar[str] = "episodic"
|
||||
text_field: ClassVar[str] = "episode"
|
||||
|
||||
def __init__(
|
||||
self, all_docs: list[Candidate], by_parent: dict[str, Candidate]
|
||||
) -> None:
|
||||
self._all_docs = all_docs
|
||||
self._by_parent = by_parent
|
||||
|
||||
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return []
|
||||
|
||||
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._all_docs)
|
||||
|
||||
async def fetch_by_parent_ids(
|
||||
self, parent_ids: Sequence[str], where: str
|
||||
) -> list[Candidate]:
|
||||
"""Returns Candidate with id=episode_id (real LanceDB id)."""
|
||||
return [self._by_parent[p] for p in parent_ids if p in self._by_parent]
|
||||
|
||||
async def fetch_all_for_owner(self, where: str) -> list[Candidate]:
|
||||
"""Returns Candidate with id=memcell_id and metadata['episode_id']."""
|
||||
return list(self._all_docs)
|
||||
|
||||
|
||||
class _StubFactRecaller:
|
||||
kind: ClassVar[str] = "atomic_fact"
|
||||
everalgo_memory_type: ClassVar[str] = "episodic"
|
||||
text_field: ClassVar[str] = "fact"
|
||||
|
||||
def __init__(self, facts: list[Candidate]) -> None:
|
||||
self._facts = facts
|
||||
|
||||
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._facts)
|
||||
|
||||
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._facts)
|
||||
|
||||
|
||||
class _StubReranker:
|
||||
async def rerank(
|
||||
self, query: str, passages: list[str], *, instruction: str | None = None
|
||||
) -> list[Any]:
|
||||
class _R:
|
||||
def __init__(self, idx: int) -> None:
|
||||
self.index = idx
|
||||
self.score = 1.0 - idx * 0.1
|
||||
|
||||
return [_R(i) for i in range(len(passages))]
|
||||
|
||||
|
||||
# ── Fixtures ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mc_cand() -> Candidate:
|
||||
return _mc_candidate("mc_1", "ep_1")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def ep_recaller(mc_cand: Candidate) -> _StubEpisodeRecaller:
|
||||
ep_raw = Candidate(
|
||||
id="ep_1",
|
||||
score=0.0,
|
||||
source="vector",
|
||||
metadata=mc_cand.metadata,
|
||||
)
|
||||
return _StubEpisodeRecaller(
|
||||
all_docs=[mc_cand],
|
||||
by_parent={"mc_1": ep_raw},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def fact_cand() -> Candidate:
|
||||
return Candidate(
|
||||
id="f_1",
|
||||
score=0.9,
|
||||
source="vector",
|
||||
metadata={"parent_id": "mc_1", "fact": "Alice prefers oat milk"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def fact_recaller(fact_cand: Candidate) -> _StubFactRecaller:
|
||||
return _StubFactRecaller([fact_cand])
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def clusters() -> list[Cluster]:
|
||||
# ``cluster_repo.list_for_owner`` is mocked in every test, so cluster
|
||||
# contents are never exercised by everalgo; we only need a valid instance
|
||||
# that satisfies the everalgo ``Cluster`` schema (ndarray centroid + last_ts).
|
||||
return [
|
||||
Cluster(
|
||||
id="cl_1",
|
||||
members=["mc_1"],
|
||||
centroid=np.zeros(4, dtype=np.float32),
|
||||
last_ts=0,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
# ── Tests ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_agentic_search_wires_benchmark_hyperparams(
|
||||
ep_recaller: _StubEpisodeRecaller,
|
||||
fact_recaller: _StubFactRecaller,
|
||||
clusters: list[Cluster],
|
||||
) -> None:
|
||||
"""aagentic_retrieve must be called with the exact benchmark hyperparams."""
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
async def fake_aagentic(
|
||||
query: str,
|
||||
*,
|
||||
base_retrieve: Any,
|
||||
llm: Any,
|
||||
rerank_fn: Any,
|
||||
round2_retrieve: Any,
|
||||
round2_cap: int,
|
||||
top_n: int,
|
||||
round1_top_n: int,
|
||||
round1_rerank_top_n: int,
|
||||
refinement_strategy: str,
|
||||
multi_query_count: int,
|
||||
rrf_k: int,
|
||||
) -> tuple[list[Candidate], AgenticDecision]:
|
||||
captured.update(
|
||||
top_n=top_n,
|
||||
round1_top_n=round1_top_n,
|
||||
round1_rerank_top_n=round1_rerank_top_n,
|
||||
round2_cap=round2_cap,
|
||||
multi_query_count=multi_query_count,
|
||||
rrf_k=rrf_k,
|
||||
refinement_strategy=refinement_strategy,
|
||||
has_round2=round2_retrieve is not None,
|
||||
)
|
||||
return [], AgenticDecision(is_multi_round=False)
|
||||
|
||||
async def fake_embed(q: str) -> list[float]:
|
||||
return [0.1, 0.2, 0.3, 0.4]
|
||||
|
||||
with (
|
||||
patch("everos.memory.search.agentic.aagentic_retrieve", fake_aagentic),
|
||||
patch(
|
||||
"everos.memory.search.agentic.cluster_repo.list_for_owner",
|
||||
AsyncMock(return_value=clusters),
|
||||
),
|
||||
):
|
||||
await search_episodes_agentic(
|
||||
"What did Alice eat?",
|
||||
owner_id="alice",
|
||||
where="owner_id = 'alice' AND owner_type = 'user'",
|
||||
episode_recaller=ep_recaller,
|
||||
atomic_fact_recaller=fact_recaller,
|
||||
embed_query_fn=fake_embed,
|
||||
reranker=_StubReranker(),
|
||||
llm=FakeLLMClient(responses=[]),
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
assert captured["top_n"] == 10
|
||||
assert captured["round1_top_n"] == 50
|
||||
assert captured["round1_rerank_top_n"] == 10
|
||||
assert captured["round2_cap"] == 40
|
||||
assert captured["multi_query_count"] == 3
|
||||
assert captured["rrf_k"] == 40
|
||||
assert captured["refinement_strategy"] == "multi_query"
|
||||
assert captured["has_round2"] is True
|
||||
|
||||
|
||||
async def test_agentic_search_loads_user_memory_clusters(
|
||||
ep_recaller: _StubEpisodeRecaller,
|
||||
fact_recaller: _StubFactRecaller,
|
||||
) -> None:
|
||||
"""cluster_repo.list_for_owner must be called with kind='user_memory'."""
|
||||
mock_list = AsyncMock(return_value=[])
|
||||
|
||||
async def fake_embed(q: str) -> list[float]:
|
||||
return [0.1] * 4
|
||||
|
||||
with (
|
||||
patch(
|
||||
"everos.memory.search.agentic.aagentic_retrieve",
|
||||
AsyncMock(return_value=([], AgenticDecision(is_multi_round=False))),
|
||||
),
|
||||
patch("everos.memory.search.agentic.cluster_repo.list_for_owner", mock_list),
|
||||
):
|
||||
await search_episodes_agentic(
|
||||
"q",
|
||||
owner_id="alice",
|
||||
where="owner_id = 'alice' AND owner_type = 'user'",
|
||||
episode_recaller=ep_recaller,
|
||||
atomic_fact_recaller=fact_recaller,
|
||||
embed_query_fn=fake_embed,
|
||||
reranker=_StubReranker(),
|
||||
llm=FakeLLMClient(responses=[]),
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
mock_list.assert_called_once_with("alice", "user_memory")
|
||||
|
||||
|
||||
async def test_agentic_search_shapes_candidates_with_episode_id(
|
||||
ep_recaller: _StubEpisodeRecaller,
|
||||
fact_recaller: _StubFactRecaller,
|
||||
clusters: list[Cluster],
|
||||
mc_cand: Candidate,
|
||||
) -> None:
|
||||
"""SearchEpisodeItem.id must be episode_id (not memcell_id) after retrieve."""
|
||||
|
||||
async def fake_aagentic(
|
||||
*_: Any, **__: Any
|
||||
) -> tuple[list[Candidate], AgenticDecision]:
|
||||
return [mc_cand], AgenticDecision(is_multi_round=False)
|
||||
|
||||
async def fake_embed(q: str) -> list[float]:
|
||||
return [0.1] * 4
|
||||
|
||||
with (
|
||||
patch("everos.memory.search.agentic.aagentic_retrieve", fake_aagentic),
|
||||
patch(
|
||||
"everos.memory.search.agentic.cluster_repo.list_for_owner",
|
||||
AsyncMock(return_value=clusters),
|
||||
),
|
||||
):
|
||||
result = await search_episodes_agentic(
|
||||
"What did Alice eat?",
|
||||
owner_id="alice",
|
||||
where="owner_id = 'alice' AND owner_type = 'user'",
|
||||
episode_recaller=ep_recaller,
|
||||
atomic_fact_recaller=fact_recaller,
|
||||
embed_query_fn=fake_embed,
|
||||
reranker=_StubReranker(),
|
||||
llm=FakeLLMClient(responses=[]),
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], SearchEpisodeItem)
|
||||
assert result[0].id == "ep_1", (
|
||||
f"Expected episode_id='ep_1' but got {result[0].id!r}. "
|
||||
"Shaper must remap from memcell_id via metadata['episode_id']."
|
||||
)
|
||||
|
||||
|
||||
# ── Metadata bridge to the everalgo _format_docs contract ──────────────────
|
||||
|
||||
|
||||
def test_to_everalgo_doc_metadata_injects_text_and_ms_timestamp() -> None:
|
||||
"""Bridge adds `text` (episode body) + ms-epoch `timestamp` for _format_docs.
|
||||
|
||||
Without this the sufficiency / multi-query LLM prompt falls back to the
|
||||
memcell id as the doc body and renders the date as "N/A". ``episode`` is
|
||||
left untouched so the reranker / shaper (both expecting a str) keep working.
|
||||
"""
|
||||
original = _ts()
|
||||
md = {
|
||||
"episode": "Alice prefers oat milk",
|
||||
"timestamp": original,
|
||||
"subject": "Alice eats oat milk",
|
||||
}
|
||||
out = _to_everalgo_doc_metadata(md)
|
||||
assert out["text"] == "Alice prefers oat milk"
|
||||
assert out["episode"] == "Alice prefers oat milk" # untouched for rerank/shaper
|
||||
assert isinstance(out["timestamp"], int)
|
||||
assert from_timestamp(out["timestamp"]) == original
|
||||
|
||||
|
||||
def test_restore_shaper_metadata_reverts_ms_timestamp_to_datetime() -> None:
|
||||
"""The ms-epoch timestamp is reverted to the datetime the shaper requires."""
|
||||
original = _ts()
|
||||
bridged = _to_everalgo_doc_metadata({"episode": "x", "timestamp": original})
|
||||
restored = _restore_shaper_metadata(bridged)
|
||||
assert isinstance(restored["timestamp"], _dt.datetime)
|
||||
assert restored["timestamp"] == original
|
||||
272
tests/unit/test_memory/test_search/test_agentic_agent.py
Normal file
272
tests/unit/test_memory/test_search/test_agentic_agent.py
Normal file
@ -0,0 +1,272 @@
|
||||
"""Unit tests for ``memory.search.agentic_agent``.
|
||||
|
||||
White-box: patches ``aagentic_retrieve`` to assert benchmark hyperparameters
|
||||
are wired correctly, plus a shaping test to verify DTOs are built correctly.
|
||||
|
||||
The skill verify step has been removed from production code; this test
|
||||
module covers the agentic retrieve flow only.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import Any, ClassVar
|
||||
from unittest.mock import patch
|
||||
|
||||
from everalgo.rank.protocols import AgenticDecision
|
||||
from everalgo.testing.fake_llm import FakeLLMClient
|
||||
from everalgo.types import Candidate
|
||||
|
||||
from everos.memory.search.agentic_agent import (
|
||||
search_agent_cases_agentic,
|
||||
search_agent_skills_agentic,
|
||||
)
|
||||
from everos.memory.search.dto import SearchAgentCaseItem, SearchAgentSkillItem
|
||||
|
||||
# ── Stubs ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _ts() -> _dt.datetime:
|
||||
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
|
||||
|
||||
|
||||
def _case_candidate(cid: str, score: float = 0.8) -> Candidate:
|
||||
return Candidate(
|
||||
id=cid,
|
||||
score=score,
|
||||
source="vector",
|
||||
metadata={
|
||||
"owner_id": "agent_a",
|
||||
"owner_type": "agent",
|
||||
"session_id": "sess_b",
|
||||
"timestamp": _ts(),
|
||||
"task_intent": f"intent {cid}",
|
||||
"approach": f"approach {cid}",
|
||||
"quality_score": 0.8,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _skill_candidate(sid: str, score: float = 0.75) -> Candidate:
|
||||
return Candidate(
|
||||
id=sid,
|
||||
score=score,
|
||||
source="vector",
|
||||
metadata={
|
||||
"owner_id": "agent_a",
|
||||
"owner_type": "agent",
|
||||
"name": f"skill_{sid}",
|
||||
"description": f"desc {sid}",
|
||||
"content": f"content {sid}",
|
||||
"confidence": 0.9,
|
||||
"maturity_score": 0.6,
|
||||
"source_case_ids": [],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class _StubCaseRecaller:
|
||||
kind: ClassVar[str] = "agent_case"
|
||||
everalgo_memory_type: ClassVar[str] = "case"
|
||||
text_field: ClassVar[str] = "task_intent"
|
||||
|
||||
def __init__(self, dense: list[Candidate]) -> None:
|
||||
self._dense = dense
|
||||
|
||||
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._dense)
|
||||
|
||||
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._dense)
|
||||
|
||||
|
||||
class _StubSkillRecaller:
|
||||
kind: ClassVar[str] = "agent_skill"
|
||||
everalgo_memory_type: ClassVar[str] = "skill"
|
||||
text_field: ClassVar[str] = "description"
|
||||
|
||||
def __init__(self, dense: list[Candidate]) -> None:
|
||||
self._dense = dense
|
||||
|
||||
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._dense)
|
||||
|
||||
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._dense)
|
||||
|
||||
|
||||
class _StubReranker:
|
||||
async def rerank(self, query: str, passages: list[str]) -> list[Any]:
|
||||
class _R:
|
||||
def __init__(self, idx: int) -> None:
|
||||
self.index = idx
|
||||
self.score = 1.0 - idx * 0.1
|
||||
|
||||
return [_R(i) for i in range(len(passages))]
|
||||
|
||||
|
||||
async def _fake_embed(q: str) -> list[float]:
|
||||
return [0.1, 0.2, 0.3, 0.4]
|
||||
|
||||
|
||||
# ── Tests ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_search_agent_cases_agentic_calls_aagentic_retrieve_with_benchmark_params() -> ( # noqa: E501
|
||||
None
|
||||
):
|
||||
"""Verify aagentic_retrieve called with benchmark hyperparams for agent_case."""
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
async def fake_aagentic(
|
||||
query: str,
|
||||
*,
|
||||
base_retrieve: Any,
|
||||
llm: Any,
|
||||
rerank_fn: Any,
|
||||
round2_retrieve: Any,
|
||||
round2_cap: Any,
|
||||
top_n: int,
|
||||
round1_top_n: int,
|
||||
round1_rerank_top_n: int,
|
||||
refinement_strategy: str,
|
||||
multi_query_count: int,
|
||||
rrf_k: int,
|
||||
) -> tuple[list[Candidate], AgenticDecision]:
|
||||
captured.update(
|
||||
top_n=top_n,
|
||||
round1_top_n=round1_top_n,
|
||||
round1_rerank_top_n=round1_rerank_top_n,
|
||||
round2_cap=round2_cap,
|
||||
round2_retrieve_is_none=round2_retrieve is None,
|
||||
multi_query_count=multi_query_count,
|
||||
rrf_k=rrf_k,
|
||||
refinement_strategy=refinement_strategy,
|
||||
)
|
||||
return [], AgenticDecision(is_multi_round=False)
|
||||
|
||||
with patch("everos.memory.search.agentic_agent.aagentic_retrieve", fake_aagentic):
|
||||
await search_agent_cases_agentic(
|
||||
"How did agent handle login failure?",
|
||||
where="owner_id = 'agent_a' AND owner_type = 'agent'",
|
||||
case_recaller=_StubCaseRecaller([]),
|
||||
embed_query_fn=_fake_embed,
|
||||
reranker=_StubReranker(),
|
||||
llm=FakeLLMClient(responses=[]),
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
assert captured["top_n"] == 10
|
||||
assert captured["round1_top_n"] == 20
|
||||
assert captured["round1_rerank_top_n"] == 10
|
||||
assert captured["round2_cap"] == 40
|
||||
assert captured["round2_retrieve_is_none"] is True
|
||||
assert captured["multi_query_count"] == 3
|
||||
assert captured["rrf_k"] == 60
|
||||
assert captured["refinement_strategy"] == "multi_query"
|
||||
|
||||
|
||||
async def test_search_agent_skills_agentic_calls_aagentic_retrieve_with_benchmark_params() -> ( # noqa: E501
|
||||
None
|
||||
):
|
||||
"""Verify aagentic_retrieve called with benchmark hyperparams for agent_skill."""
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
async def fake_aagentic(
|
||||
query: str,
|
||||
*,
|
||||
base_retrieve: Any,
|
||||
llm: Any,
|
||||
rerank_fn: Any,
|
||||
round2_retrieve: Any,
|
||||
round2_cap: Any,
|
||||
top_n: int,
|
||||
round1_top_n: int,
|
||||
round1_rerank_top_n: int,
|
||||
refinement_strategy: str,
|
||||
multi_query_count: int,
|
||||
rrf_k: int,
|
||||
) -> tuple[list[Candidate], AgenticDecision]:
|
||||
captured.update(
|
||||
top_n=top_n,
|
||||
round1_top_n=round1_top_n,
|
||||
round1_rerank_top_n=round1_rerank_top_n,
|
||||
round2_cap=round2_cap,
|
||||
round2_retrieve_is_none=round2_retrieve is None,
|
||||
multi_query_count=multi_query_count,
|
||||
rrf_k=rrf_k,
|
||||
refinement_strategy=refinement_strategy,
|
||||
)
|
||||
return [], AgenticDecision(is_multi_round=False)
|
||||
|
||||
with patch("everos.memory.search.agentic_agent.aagentic_retrieve", fake_aagentic):
|
||||
await search_agent_skills_agentic(
|
||||
"What skill handles auth token refresh?",
|
||||
where="owner_id = 'agent_a' AND owner_type = 'agent'",
|
||||
skill_recaller=_StubSkillRecaller([]),
|
||||
embed_query_fn=_fake_embed,
|
||||
reranker=_StubReranker(),
|
||||
llm=FakeLLMClient(responses=[]),
|
||||
top_k=5,
|
||||
)
|
||||
|
||||
assert captured["top_n"] == 5
|
||||
assert captured["round1_top_n"] == 20
|
||||
assert captured["round1_rerank_top_n"] == 10
|
||||
assert captured["round2_cap"] == 40
|
||||
assert captured["round2_retrieve_is_none"] is True
|
||||
assert captured["multi_query_count"] == 3
|
||||
assert captured["rrf_k"] == 60
|
||||
assert captured["refinement_strategy"] == "multi_query"
|
||||
|
||||
|
||||
async def test_search_agent_cases_agentic_shapes_result() -> None:
|
||||
"""Output must be list[SearchAgentCaseItem] built from aagentic_retrieve results."""
|
||||
cand = _case_candidate("c_1")
|
||||
|
||||
async def fake_aagentic(
|
||||
*_: Any, **__: Any
|
||||
) -> tuple[list[Candidate], AgenticDecision]:
|
||||
return [cand], AgenticDecision(is_multi_round=False)
|
||||
|
||||
with patch("everos.memory.search.agentic_agent.aagentic_retrieve", fake_aagentic):
|
||||
result = await search_agent_cases_agentic(
|
||||
"intent query",
|
||||
where="owner_id = 'agent_a' AND owner_type = 'agent'",
|
||||
case_recaller=_StubCaseRecaller([cand]),
|
||||
embed_query_fn=_fake_embed,
|
||||
reranker=_StubReranker(),
|
||||
llm=FakeLLMClient(responses=[]),
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], SearchAgentCaseItem)
|
||||
assert result[0].id == "c_1"
|
||||
assert result[0].task_intent == "intent c_1"
|
||||
|
||||
|
||||
async def test_search_agent_skills_agentic_shapes_result() -> None:
|
||||
"""Output must be list[SearchAgentSkillItem] from aagentic_retrieve results."""
|
||||
cand = _skill_candidate("s_1")
|
||||
|
||||
async def fake_aagentic(
|
||||
*_: Any, **__: Any
|
||||
) -> tuple[list[Candidate], AgenticDecision]:
|
||||
return [cand], AgenticDecision(is_multi_round=False)
|
||||
|
||||
with patch("everos.memory.search.agentic_agent.aagentic_retrieve", fake_aagentic):
|
||||
result = await search_agent_skills_agentic(
|
||||
"skill query",
|
||||
where="owner_id = 'agent_a' AND owner_type = 'agent'",
|
||||
skill_recaller=_StubSkillRecaller([cand]),
|
||||
embed_query_fn=_fake_embed,
|
||||
reranker=_StubReranker(),
|
||||
llm=FakeLLMClient(responses=[]),
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], SearchAgentSkillItem)
|
||||
assert result[0].id == "s_1"
|
||||
assert result[0].name == "skill_s_1"
|
||||
163
tests/unit/test_memory/test_search/test_callbacks.py
Normal file
163
tests/unit/test_memory/test_search/test_callbacks.py
Normal file
@ -0,0 +1,163 @@
|
||||
"""Unit tests for ``memory.search.callbacks``."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from everalgo.types import Candidate
|
||||
|
||||
from everos.memory.search.callbacks import (
|
||||
_SKILL_RERANK_INSTRUCTION,
|
||||
build_rerank_fn,
|
||||
build_skill_rerank_fn,
|
||||
)
|
||||
|
||||
|
||||
class _StubReranker:
|
||||
"""Returns candidates in original order with scores 1.0, 0.9, 0.8, ...
|
||||
|
||||
Records the ``instruction`` and ``passages`` from the most recent call so
|
||||
tests can assert that callback factories forward the right arguments.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.last_instruction: str | None = None
|
||||
self.last_passages: list[str] | None = None
|
||||
|
||||
async def rerank(
|
||||
self, query: str, passages: list[str], *, instruction: str | None = None
|
||||
) -> list[Any]:
|
||||
self.last_instruction = instruction
|
||||
self.last_passages = list(passages)
|
||||
|
||||
class _R:
|
||||
def __init__(self, index: int, score: float) -> None:
|
||||
self.index = index
|
||||
self.score = score
|
||||
|
||||
return [_R(i, 1.0 - i * 0.1) for i in range(len(passages))]
|
||||
|
||||
|
||||
def _cand(cid: str, episode_text: str = "body") -> Candidate:
|
||||
return Candidate(
|
||||
id=cid,
|
||||
score=0.5,
|
||||
source="vector",
|
||||
metadata={"episode": episode_text},
|
||||
)
|
||||
|
||||
|
||||
async def test_build_rerank_fn_returns_two_arg_callable() -> None:
|
||||
"""build_rerank_fn must return a 2-arg async callable matching RerankFn."""
|
||||
rerank_fn = build_rerank_fn(_StubReranker(), text_field="episode")
|
||||
sig = inspect.signature(rerank_fn)
|
||||
params = list(sig.parameters)
|
||||
assert params == ["query", "candidates"], f"Expected 2-arg fn, got params: {params}"
|
||||
|
||||
|
||||
async def test_build_rerank_fn_returns_all_candidates_without_truncation() -> None:
|
||||
"""rerank_fn must return ALL reranked candidates; caller slices."""
|
||||
rerank_fn = build_rerank_fn(_StubReranker(), text_field="episode")
|
||||
cands = [_cand(f"c{i}") for i in range(5)]
|
||||
result = await rerank_fn("what did Alice eat?", cands)
|
||||
assert len(result) == 5
|
||||
|
||||
|
||||
async def test_build_rerank_fn_attaches_scores_from_provider() -> None:
|
||||
"""rerank_fn updates Candidate.score from RerankProvider results."""
|
||||
rerank_fn = build_rerank_fn(_StubReranker(), text_field="episode")
|
||||
cands = [_cand("a"), _cand("b")]
|
||||
result = await rerank_fn("q", cands)
|
||||
assert all(isinstance(c.score, float) for c in result)
|
||||
assert result[0].score == pytest.approx(1.0)
|
||||
assert result[1].score == pytest.approx(0.9)
|
||||
|
||||
|
||||
async def test_build_rerank_fn_handles_empty_candidates() -> None:
|
||||
"""Empty candidate list returns empty list without calling the provider."""
|
||||
rerank_fn = build_rerank_fn(_StubReranker(), text_field="episode")
|
||||
result = await rerank_fn("q", [])
|
||||
assert result == []
|
||||
|
||||
|
||||
async def test_build_rerank_fn_forwards_instruction() -> None:
|
||||
"""The task instruction is forwarded verbatim to the provider."""
|
||||
stub = _StubReranker()
|
||||
rerank_fn = build_rerank_fn(stub, text_field="episode", instruction="find facts")
|
||||
await rerank_fn("q", [_cand("a")])
|
||||
assert stub.last_instruction == "find facts"
|
||||
|
||||
|
||||
# ── build_skill_rerank_fn ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _skill_cand(cid: str, *, name: str = "", description: str = "") -> Candidate:
|
||||
return Candidate(
|
||||
id=cid,
|
||||
score=0.5,
|
||||
source="vector",
|
||||
metadata={"name": name, "description": description},
|
||||
)
|
||||
|
||||
|
||||
async def test_build_skill_rerank_fn_emits_shaped_passage() -> None:
|
||||
"""Passage = ``"Agent Skill: {name} - {description}"`` when both present."""
|
||||
stub = _StubReranker()
|
||||
rerank_fn = build_skill_rerank_fn(stub)
|
||||
await rerank_fn(
|
||||
"q",
|
||||
[_skill_cand("s1", name="refactor_auth", description="split provider lookup")],
|
||||
)
|
||||
assert stub.last_passages == ["Agent Skill: refactor_auth - split provider lookup"]
|
||||
|
||||
|
||||
async def test_build_skill_rerank_fn_omits_dash_when_description_missing() -> None:
|
||||
"""When description is empty, drop ``" - {description}"`` suffix."""
|
||||
stub = _StubReranker()
|
||||
rerank_fn = build_skill_rerank_fn(stub)
|
||||
await rerank_fn("q", [_skill_cand("s1", name="refactor_auth", description="")])
|
||||
assert stub.last_passages == ["Agent Skill: refactor_auth"]
|
||||
|
||||
|
||||
async def test_build_skill_rerank_fn_falls_back_when_name_missing() -> None:
|
||||
"""When name is empty, passage degrades to bare description."""
|
||||
stub = _StubReranker()
|
||||
rerank_fn = build_skill_rerank_fn(stub)
|
||||
await rerank_fn("q", [_skill_cand("s1", name="", description="just text")])
|
||||
assert stub.last_passages == ["just text"]
|
||||
|
||||
|
||||
async def test_build_skill_rerank_fn_forwards_skill_instruction() -> None:
|
||||
"""The skill-specific instruction is hard-wired into the call."""
|
||||
stub = _StubReranker()
|
||||
rerank_fn = build_skill_rerank_fn(stub)
|
||||
await rerank_fn("q", [_skill_cand("s1", name="x", description="y")])
|
||||
assert stub.last_instruction == _SKILL_RERANK_INSTRUCTION
|
||||
|
||||
|
||||
async def test_build_skill_rerank_fn_handles_empty_candidates() -> None:
|
||||
"""Empty candidate list skips the provider call entirely."""
|
||||
stub = _StubReranker()
|
||||
rerank_fn = build_skill_rerank_fn(stub)
|
||||
result = await rerank_fn("q", [])
|
||||
assert result == []
|
||||
assert stub.last_passages is None # provider never called
|
||||
|
||||
|
||||
async def test_build_skill_rerank_fn_attaches_scores_and_preserves_metadata() -> None:
|
||||
"""Reranked candidates carry the provider's score and original metadata."""
|
||||
stub = _StubReranker()
|
||||
rerank_fn = build_skill_rerank_fn(stub)
|
||||
cands = [
|
||||
_skill_cand("a", name="alpha", description="d-a"),
|
||||
_skill_cand("b", name="beta", description="d-b"),
|
||||
]
|
||||
result = await rerank_fn("q", cands)
|
||||
assert [c.id for c in result] == ["a", "b"]
|
||||
assert result[0].score == pytest.approx(1.0)
|
||||
assert result[1].score == pytest.approx(0.9)
|
||||
# metadata round-trips intact — the shape function only reads it, never mutates.
|
||||
assert result[0].metadata["name"] == "alpha"
|
||||
assert result[1].metadata["description"] == "d-b"
|
||||
135
tests/unit/test_memory/test_search/test_dto.py
Normal file
135
tests/unit/test_memory/test_search/test_dto.py
Normal file
@ -0,0 +1,135 @@
|
||||
"""Unit tests for ``memory.search.dto`` validation rules."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from everos.memory.search import (
|
||||
SearchData,
|
||||
SearchMethod,
|
||||
SearchRequest,
|
||||
SearchResponse,
|
||||
)
|
||||
|
||||
|
||||
def _minimal_request_kwargs() -> dict:
|
||||
return {
|
||||
"user_id": "alice",
|
||||
"query": "hello",
|
||||
}
|
||||
|
||||
|
||||
def test_enable_llm_rerank_defaults_to_false() -> None:
|
||||
"""HYBRID should NOT auto-trigger LLM Phase-5 rerank by default.
|
||||
|
||||
The caller opts in explicitly when they want the extra LLM pass;
|
||||
leaving it off keeps a default HYBRID call cheap (no LLM ``chat``).
|
||||
"""
|
||||
req = SearchRequest(**_minimal_request_kwargs())
|
||||
assert req.enable_llm_rerank is False
|
||||
|
||||
|
||||
def test_enable_llm_rerank_accepts_true() -> None:
|
||||
req = SearchRequest(**_minimal_request_kwargs(), enable_llm_rerank=True)
|
||||
assert req.enable_llm_rerank is True
|
||||
|
||||
|
||||
def test_minimal_request_uses_hybrid_default() -> None:
|
||||
req = SearchRequest(**_minimal_request_kwargs())
|
||||
assert req.method == SearchMethod.HYBRID
|
||||
assert req.top_k == -1
|
||||
assert req.include_profile is False
|
||||
assert req.filters is None
|
||||
assert req.radius is None
|
||||
|
||||
|
||||
def test_top_k_zero_rejected() -> None:
|
||||
with pytest.raises(ValidationError) as exc:
|
||||
SearchRequest(**_minimal_request_kwargs(), top_k=0)
|
||||
assert "top_k" in str(exc.value)
|
||||
|
||||
|
||||
def test_top_k_above_100_rejected() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
SearchRequest(**_minimal_request_kwargs(), top_k=101)
|
||||
|
||||
|
||||
def test_top_k_below_minus_one_rejected() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
SearchRequest(**_minimal_request_kwargs(), top_k=-2)
|
||||
|
||||
|
||||
def test_top_k_minus_one_accepted() -> None:
|
||||
req = SearchRequest(**_minimal_request_kwargs(), top_k=-1)
|
||||
assert req.top_k == -1
|
||||
|
||||
|
||||
def test_top_k_in_range_accepted() -> None:
|
||||
req = SearchRequest(**_minimal_request_kwargs(), top_k=50)
|
||||
assert req.top_k == 50
|
||||
|
||||
|
||||
def test_radius_out_of_range_rejected() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
SearchRequest(**_minimal_request_kwargs(), radius=1.5)
|
||||
with pytest.raises(ValidationError):
|
||||
SearchRequest(**_minimal_request_kwargs(), radius=-0.1)
|
||||
|
||||
|
||||
def test_neither_user_id_nor_agent_id_rejected() -> None:
|
||||
"""The xor validator requires exactly one of user_id / agent_id."""
|
||||
with pytest.raises(ValidationError, match="exactly one of"):
|
||||
SearchRequest(query="hello") # neither set
|
||||
|
||||
|
||||
def test_both_user_id_and_agent_id_rejected() -> None:
|
||||
"""The xor validator rejects ambiguous owner identity."""
|
||||
with pytest.raises(ValidationError, match="exactly one of"):
|
||||
SearchRequest(user_id="alice", agent_id="agent_x", query="hello")
|
||||
|
||||
|
||||
def test_empty_query_rejected() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
SearchRequest(user_id="alice", query="")
|
||||
|
||||
|
||||
def test_empty_user_id_rejected() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
SearchRequest(user_id="", query="hello")
|
||||
|
||||
|
||||
def test_extra_top_level_field_rejected() -> None:
|
||||
"""``extra='forbid'`` keeps the contract tight."""
|
||||
with pytest.raises(ValidationError):
|
||||
SearchRequest(
|
||||
**_minimal_request_kwargs(),
|
||||
unexpected_field="x", # type: ignore[call-arg]
|
||||
)
|
||||
|
||||
|
||||
def test_filters_extra_keys_allowed() -> None:
|
||||
"""FilterNode is open-shape; safety is enforced in the compiler."""
|
||||
req = SearchRequest(
|
||||
**_minimal_request_kwargs(),
|
||||
filters={"session_id": "sess_a", "AND": [{"timestamp": {"gte": 1}}]},
|
||||
)
|
||||
assert req.filters is not None
|
||||
dumped = req.filters.model_dump(exclude_none=True)
|
||||
assert dumped["session_id"] == "sess_a"
|
||||
assert dumped["AND"][0]["timestamp"]["gte"] == 1
|
||||
|
||||
|
||||
def test_response_default_arrays_present() -> None:
|
||||
"""Every ``data.*`` array must exist so callers can iterate unconditionally."""
|
||||
resp = SearchResponse(request_id="0" * 32, data=SearchData())
|
||||
assert resp.data.episodes == []
|
||||
assert resp.data.profiles == []
|
||||
assert resp.data.agent_cases == []
|
||||
assert resp.data.agent_skills == []
|
||||
|
||||
|
||||
def test_method_enum_serialises_to_lowercase() -> None:
|
||||
req = SearchRequest(**_minimal_request_kwargs(), method="agentic") # type: ignore[arg-type]
|
||||
assert req.method == SearchMethod.AGENTIC
|
||||
assert req.method.value == "agentic"
|
||||
244
tests/unit/test_memory/test_search/test_filters.py
Normal file
244
tests/unit/test_memory/test_search/test_filters.py
Normal file
@ -0,0 +1,244 @@
|
||||
"""Unit tests for the Filters DSL compiler."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.memory.search import (
|
||||
FilterError,
|
||||
FilterNode,
|
||||
compile_filters,
|
||||
)
|
||||
|
||||
# ── Base injection ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_no_filters_emits_base_clause() -> None:
|
||||
where = compile_filters(None, owner_id="alice", owner_type="user")
|
||||
assert where == (
|
||||
"owner_id = 'alice' AND owner_type = 'user' "
|
||||
"AND app_id = 'default' AND project_id = 'default'"
|
||||
)
|
||||
|
||||
|
||||
def test_owner_type_agent_pinned() -> None:
|
||||
where = compile_filters(None, owner_id="alice", owner_type="agent")
|
||||
assert "owner_type = 'agent'" in where
|
||||
|
||||
|
||||
def test_app_project_scope_pinned() -> None:
|
||||
where = compile_filters(
|
||||
None,
|
||||
owner_id="alice",
|
||||
owner_type="user",
|
||||
app_id="claude_code",
|
||||
project_id="oss",
|
||||
)
|
||||
assert "app_id = 'claude_code'" in where
|
||||
assert "project_id = 'oss'" in where
|
||||
|
||||
|
||||
def test_owner_id_with_quote_is_escaped() -> None:
|
||||
where = compile_filters(None, owner_id="al'ice", owner_type="user")
|
||||
assert "owner_id = 'al''ice'" in where
|
||||
|
||||
|
||||
# ── Equality / shorthand ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_flat_equality_shorthand() -> None:
|
||||
node = FilterNode(session_id="sess_a") # type: ignore[call-arg]
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "session_id = 'sess_a'" in where
|
||||
|
||||
|
||||
def test_multiple_flat_fields_join_with_and() -> None:
|
||||
node = FilterNode.model_validate({"session_id": "sess_a", "parent_type": "memcell"})
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "session_id = 'sess_a'" in where
|
||||
assert "parent_type = 'memcell'" in where
|
||||
|
||||
|
||||
# ── Operators ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_timestamp_gte_renders_timestamp_literal() -> None:
|
||||
node = FilterNode.model_validate({"timestamp": {"gte": 1704067200000}})
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "timestamp >= TIMESTAMP '" in where
|
||||
|
||||
|
||||
def test_timestamp_range_folds_with_and() -> None:
|
||||
node = FilterNode.model_validate(
|
||||
{"timestamp": {"gte": 1704067200000, "lt": 1740614399000}}
|
||||
)
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "timestamp >= TIMESTAMP '" in where
|
||||
assert "timestamp < TIMESTAMP '" in where
|
||||
# Operators on the same field are wrapped in a single group.
|
||||
assert " AND " in where
|
||||
|
||||
|
||||
def test_in_operator_string_field() -> None:
|
||||
node = FilterNode.model_validate({"parent_type": {"in": ["memcell", "episode"]}})
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "parent_type IN ('memcell', 'episode')" in where
|
||||
|
||||
|
||||
def test_in_operator_requires_non_empty_list() -> None:
|
||||
node = FilterNode.model_validate({"parent_type": {"in": []}})
|
||||
with pytest.raises(FilterError):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
def test_invalid_operator_rejected() -> None:
|
||||
node = FilterNode.model_validate({"timestamp": {"between": [1, 2]}})
|
||||
with pytest.raises(FilterError, match="operator"):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
# ── Combinators ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_and_combinator() -> None:
|
||||
node = FilterNode.model_validate(
|
||||
{
|
||||
"AND": [
|
||||
{"timestamp": {"gte": 1704067200000}},
|
||||
{"timestamp": {"lt": 1740614399000}},
|
||||
]
|
||||
}
|
||||
)
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "timestamp >= TIMESTAMP '" in where
|
||||
assert "timestamp < TIMESTAMP '" in where
|
||||
assert " AND " in where
|
||||
|
||||
|
||||
def test_or_combinator() -> None:
|
||||
node = FilterNode.model_validate(
|
||||
{
|
||||
"OR": [
|
||||
{"parent_type": "memcell"},
|
||||
{"parent_type": "episode"},
|
||||
]
|
||||
}
|
||||
)
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert " OR " in where
|
||||
assert "parent_type = 'memcell'" in where
|
||||
assert "parent_type = 'episode'" in where
|
||||
|
||||
|
||||
def test_nested_and_inside_or() -> None:
|
||||
node = FilterNode.model_validate(
|
||||
{
|
||||
"OR": [
|
||||
{"AND": [{"parent_type": "memcell"}, {"session_id": "sa"}]},
|
||||
{"parent_type": "episode"},
|
||||
]
|
||||
}
|
||||
)
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "parent_type = 'memcell'" in where
|
||||
assert "session_id = 'sa'" in where
|
||||
assert "parent_type = 'episode'" in where
|
||||
assert " OR " in where
|
||||
assert " AND " in where
|
||||
|
||||
|
||||
def test_flat_field_alongside_and_combinator() -> None:
|
||||
node = FilterNode.model_validate(
|
||||
{
|
||||
"session_id": "sess_a",
|
||||
"AND": [{"timestamp": {"gte": 1}}],
|
||||
}
|
||||
)
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "session_id = 'sess_a'" in where
|
||||
assert "timestamp >= TIMESTAMP '" in where
|
||||
|
||||
|
||||
# ── Array field (sender_id → sender_ids) ────────────────────────────────
|
||||
|
||||
|
||||
def test_sender_id_eq_uses_array_has() -> None:
|
||||
node = FilterNode.model_validate({"sender_id": "u_jason"})
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "array_has(sender_ids, 'u_jason')" in where
|
||||
|
||||
|
||||
def test_sender_id_in_expands_to_or_array_has() -> None:
|
||||
node = FilterNode.model_validate({"sender_id": {"in": ["u_a", "u_b"]}})
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "array_has(sender_ids, 'u_a')" in where
|
||||
assert "array_has(sender_ids, 'u_b')" in where
|
||||
assert " OR " in where
|
||||
|
||||
|
||||
def test_sender_id_gt_rejected() -> None:
|
||||
node = FilterNode.model_validate({"sender_id": {"gt": "x"}})
|
||||
with pytest.raises(FilterError, match="not supported on array"):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
# ── Safety ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_unknown_field_rejected() -> None:
|
||||
node = FilterNode.model_validate({"secret_field": "x"})
|
||||
with pytest.raises(FilterError, match="unsupported filter field"):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
def test_owner_id_in_filters_rejected() -> None:
|
||||
node = FilterNode.model_validate({"owner_id": "mallory"})
|
||||
with pytest.raises(FilterError, match="reserved"):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
def test_owner_type_in_filters_rejected() -> None:
|
||||
node = FilterNode.model_validate({"owner_type": "agent"})
|
||||
with pytest.raises(FilterError, match="reserved"):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
def test_string_with_single_quote_escaped() -> None:
|
||||
node = FilterNode.model_validate({"session_id": "ses's"})
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert "session_id = 'ses''s'" in where
|
||||
|
||||
|
||||
def test_timestamp_string_with_quote_rejected() -> None:
|
||||
"""ISO strings with embedded quotes can break the literal — reject loudly."""
|
||||
node = FilterNode.model_validate({"timestamp": {"gte": "2024-01'-01T00:00:00"}})
|
||||
with pytest.raises(FilterError, match="contains a quote"):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
def test_in_value_type_check() -> None:
|
||||
node = FilterNode.model_validate({"parent_type": {"in": [1, 2]}})
|
||||
with pytest.raises(FilterError, match="must be a string"):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
def test_bool_for_timestamp_rejected() -> None:
|
||||
node = FilterNode.model_validate({"timestamp": {"gte": True}})
|
||||
with pytest.raises(FilterError, match="timestamp value"):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
def test_empty_operator_map_rejected() -> None:
|
||||
node = FilterNode.model_validate({"timestamp": {}})
|
||||
with pytest.raises(FilterError, match="empty operator map"):
|
||||
compile_filters(node, owner_id="alice", owner_type="user")
|
||||
|
||||
|
||||
def test_empty_and_array_skips_combinator() -> None:
|
||||
"""Empty AND/OR arrays compile to no clauses — only the base remains."""
|
||||
node = FilterNode.model_validate({"AND": []})
|
||||
where = compile_filters(node, owner_id="alice", owner_type="user")
|
||||
assert where == (
|
||||
"owner_id = 'alice' AND owner_type = 'user' "
|
||||
"AND app_id = 'default' AND project_id = 'default'"
|
||||
)
|
||||
278
tests/unit/test_memory/test_search/test_hierarchy.py
Normal file
278
tests/unit/test_memory/test_search/test_hierarchy.py
Normal file
@ -0,0 +1,278 @@
|
||||
"""Unit tests for ``memory.search.hierarchy``.
|
||||
|
||||
White-box surfaces accessed:
|
||||
- ``_hierarchy_eviction_pass`` (internal, tested directly for unit coverage)
|
||||
- ``hierarchy_retrieve_episodes`` (public function, tested with stubbed I/O)
|
||||
|
||||
All I/O (fact_recaller, episode_recaller) is injected via AsyncMock stubs.
|
||||
No LanceDB or network calls are made.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from everalgo.types import Candidate, FactCandidate
|
||||
|
||||
from everos.memory.search.hierarchy import (
|
||||
_hierarchy_eviction_pass,
|
||||
hierarchy_retrieve_episodes,
|
||||
)
|
||||
|
||||
# ── Fixtures / helpers ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _ts() -> _dt.datetime:
|
||||
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
|
||||
|
||||
|
||||
def _episode_candidate(
|
||||
*,
|
||||
ep_id: str = "ep-1",
|
||||
score: float = 0.7,
|
||||
memcell_id: str = "mc-1",
|
||||
) -> Candidate:
|
||||
return Candidate(
|
||||
id=ep_id,
|
||||
score=score,
|
||||
source="vector",
|
||||
metadata={
|
||||
"parent_id": memcell_id,
|
||||
"owner_id": "u1",
|
||||
"owner_type": "user",
|
||||
"session_id": "sess-1",
|
||||
"timestamp": _ts(),
|
||||
"episode": "Some episode text.",
|
||||
"sender_ids": ["u1"],
|
||||
"subject": "Test subject",
|
||||
"summary": "Test summary",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _fact_candidate(
|
||||
*,
|
||||
fact_id: str = "fact-1",
|
||||
parent_episode_id: str = "ep-1",
|
||||
score: float = 0.9,
|
||||
) -> FactCandidate:
|
||||
return FactCandidate(
|
||||
id=fact_id,
|
||||
parent_episode_id=parent_episode_id,
|
||||
score=score,
|
||||
metadata={"fact": "Some fact text."},
|
||||
)
|
||||
|
||||
|
||||
def _make_recallers(
|
||||
*,
|
||||
dense_facts: list[Candidate] | None = None,
|
||||
fetched_episodes: list[Candidate] | None = None,
|
||||
facts_for_episodes: dict[str, list[FactCandidate]] | None = None,
|
||||
) -> tuple[MagicMock, MagicMock]:
|
||||
"""Build stubbed fact_recaller and episode_recaller."""
|
||||
fact_recaller = MagicMock()
|
||||
fact_recaller.dense_recall = AsyncMock(return_value=dense_facts or [])
|
||||
fact_recaller.facts_for_episodes = AsyncMock(return_value=facts_for_episodes or {})
|
||||
|
||||
episode_recaller = MagicMock()
|
||||
episode_recaller.fetch_by_parent_ids = AsyncMock(
|
||||
return_value=fetched_episodes or []
|
||||
)
|
||||
|
||||
return fact_recaller, episode_recaller
|
||||
|
||||
|
||||
# ── _hierarchy_eviction_pass unit tests ─────────────────────────────────
|
||||
|
||||
|
||||
class TestHierarchyEvictionPass:
|
||||
def test_fact_wins_emits_atomic_fact_scored_item(self) -> None:
|
||||
episode = _episode_candidate(ep_id="ep-1", score=0.5)
|
||||
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.9)
|
||||
|
||||
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
|
||||
|
||||
assert len(result) == 1
|
||||
item = result[0]
|
||||
assert item.item_type == "atomic_fact"
|
||||
assert item.id == "fact-1"
|
||||
assert item.score == pytest.approx(0.9)
|
||||
|
||||
def test_episode_wins_emits_episode_scored_item(self) -> None:
|
||||
episode = _episode_candidate(ep_id="ep-1", score=0.8)
|
||||
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.6)
|
||||
|
||||
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
|
||||
|
||||
assert len(result) == 1
|
||||
item = result[0]
|
||||
assert item.item_type == "episode"
|
||||
assert item.id == "ep-1"
|
||||
assert item.score == pytest.approx(0.8)
|
||||
|
||||
def test_no_facts_emits_episode(self) -> None:
|
||||
episode = _episode_candidate(ep_id="ep-1", score=0.7)
|
||||
|
||||
result = _hierarchy_eviction_pass([episode], {})
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].item_type == "episode"
|
||||
assert result[0].id == "ep-1"
|
||||
|
||||
def test_ordering_preserved_matches_input_order(self) -> None:
|
||||
ep_a = _episode_candidate(ep_id="ep-a", score=0.9, memcell_id="mc-a")
|
||||
ep_b = _episode_candidate(ep_id="ep-b", score=0.8, memcell_id="mc-b")
|
||||
ep_c = _episode_candidate(ep_id="ep-c", score=0.7, memcell_id="mc-c")
|
||||
merged = [ep_a, ep_b, ep_c]
|
||||
|
||||
result = _hierarchy_eviction_pass(merged, {})
|
||||
|
||||
assert [r.id for r in result] == ["ep-a", "ep-b", "ep-c"]
|
||||
|
||||
def test_parent_episode_id_set_on_evicted_fact(self) -> None:
|
||||
episode = _episode_candidate(ep_id="ep-1", score=0.4)
|
||||
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.8)
|
||||
|
||||
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
|
||||
|
||||
assert result[0].parent_episode_id == "ep-1"
|
||||
|
||||
def test_episode_wins_parent_episode_id_is_none(self) -> None:
|
||||
episode = _episode_candidate(ep_id="ep-1", score=0.9)
|
||||
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.5)
|
||||
|
||||
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
|
||||
|
||||
assert result[0].parent_episode_id is None
|
||||
|
||||
def test_multiple_episodes_mixed_eviction(self) -> None:
|
||||
ep1 = _episode_candidate(ep_id="ep-1", score=0.5, memcell_id="mc-1")
|
||||
ep2 = _episode_candidate(ep_id="ep-2", score=0.8, memcell_id="mc-2")
|
||||
ep3 = _episode_candidate(ep_id="ep-3", score=0.6, memcell_id="mc-3")
|
||||
fact1 = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.9)
|
||||
fact2 = _fact_candidate(fact_id="fact-2", parent_episode_id="ep-2", score=0.4)
|
||||
|
||||
result = _hierarchy_eviction_pass(
|
||||
[ep1, ep2, ep3],
|
||||
{"ep-1": [fact1], "ep-2": [fact2]},
|
||||
)
|
||||
|
||||
assert len(result) == 3
|
||||
assert result[0].item_type == "atomic_fact"
|
||||
assert result[0].id == "fact-1"
|
||||
assert result[1].item_type == "episode"
|
||||
assert result[1].id == "ep-2"
|
||||
assert result[2].item_type == "episode"
|
||||
assert result[2].id == "ep-3"
|
||||
|
||||
def test_best_fact_is_first_element_used_for_comparison(self) -> None:
|
||||
episode = _episode_candidate(ep_id="ep-1", score=0.7)
|
||||
best_fact = _fact_candidate(
|
||||
fact_id="fact-best", parent_episode_id="ep-1", score=0.8
|
||||
)
|
||||
second_fact = _fact_candidate(
|
||||
fact_id="fact-second", parent_episode_id="ep-1", score=0.3
|
||||
)
|
||||
|
||||
result = _hierarchy_eviction_pass([episode], {"ep-1": [best_fact, second_fact]})
|
||||
|
||||
assert result[0].item_type == "atomic_fact"
|
||||
assert result[0].id == "fact-best"
|
||||
|
||||
def test_fact_score_equal_to_episode_score_episode_wins(self) -> None:
|
||||
episode = _episode_candidate(ep_id="ep-1", score=0.7)
|
||||
fact = _fact_candidate(fact_id="fact-1", parent_episode_id="ep-1", score=0.7)
|
||||
|
||||
result = _hierarchy_eviction_pass([episode], {"ep-1": [fact]})
|
||||
|
||||
assert result[0].item_type == "episode"
|
||||
|
||||
|
||||
# ── hierarchy_retrieve_episodes integration-style unit tests ─────────────
|
||||
|
||||
|
||||
class TestHierarchyRetrieveEpisodes:
|
||||
"""Integration-style unit tests with fully stubbed I/O.
|
||||
|
||||
amaxsim_retrieve and rrf are exercised with real implementations but
|
||||
all LanceDB / network calls are replaced by AsyncMock.
|
||||
"""
|
||||
|
||||
async def test_empty_sparse_dense_returns_empty_list(self) -> None:
|
||||
fact_recaller, episode_recaller = _make_recallers()
|
||||
|
||||
result = await hierarchy_retrieve_episodes(
|
||||
query="test query",
|
||||
sparse=[],
|
||||
dense=[],
|
||||
query_vector=[0.1, 0.2, 0.3],
|
||||
fact_recaller=fact_recaller,
|
||||
episode_recaller=episode_recaller,
|
||||
where="owner_id = 'u1'",
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
assert result == []
|
||||
|
||||
async def test_happy_path_episode_wins_no_nested_facts(self) -> None:
|
||||
ep = _episode_candidate(ep_id="ep-1", score=0.8, memcell_id="mc-1")
|
||||
|
||||
fact_recaller, episode_recaller = _make_recallers(
|
||||
dense_facts=[],
|
||||
fetched_episodes=[],
|
||||
facts_for_episodes={},
|
||||
)
|
||||
|
||||
result = await hierarchy_retrieve_episodes(
|
||||
query="test query",
|
||||
sparse=[ep],
|
||||
dense=[ep],
|
||||
query_vector=[0.1, 0.2, 0.3],
|
||||
fact_recaller=fact_recaller,
|
||||
episode_recaller=episode_recaller,
|
||||
where="owner_id = 'u1'",
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
assert len(result) == 1
|
||||
episode_item = result[0]
|
||||
assert episode_item.id == "ep-1"
|
||||
assert episode_item.atomic_facts == []
|
||||
|
||||
async def test_happy_path_fact_evicts_episode_nested_in_result(self) -> None:
|
||||
ep = _episode_candidate(ep_id="ep-2", score=0.6, memcell_id="mc-2")
|
||||
fact = _fact_candidate(fact_id="fact-2", parent_episode_id="ep-2", score=0.95)
|
||||
|
||||
fact_recaller, episode_recaller = _make_recallers(
|
||||
dense_facts=[
|
||||
Candidate(
|
||||
id="fact-2",
|
||||
score=0.95,
|
||||
source="vector",
|
||||
metadata={"parent_id": "mc-2"},
|
||||
)
|
||||
],
|
||||
fetched_episodes=[ep],
|
||||
facts_for_episodes={"ep-2": [fact]},
|
||||
)
|
||||
|
||||
result = await hierarchy_retrieve_episodes(
|
||||
query="test query",
|
||||
sparse=[ep],
|
||||
dense=[ep],
|
||||
query_vector=[0.1, 0.2, 0.3],
|
||||
fact_recaller=fact_recaller,
|
||||
episode_recaller=episode_recaller,
|
||||
where="owner_id = 'u1'",
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
assert len(result) == 1
|
||||
episode_item = result[0]
|
||||
assert episode_item.atomic_facts != []
|
||||
nested_fact = episode_item.atomic_facts[0]
|
||||
assert nested_fact.id == "fact-2"
|
||||
assert nested_fact.score == pytest.approx(0.95)
|
||||
930
tests/unit/test_memory/test_search/test_manager.py
Normal file
930
tests/unit/test_memory/test_search/test_manager.py
Normal file
@ -0,0 +1,930 @@
|
||||
"""Unit tests for ``SearchManager`` with in-memory stub recallers.
|
||||
|
||||
These tests exercise the orchestration without touching LanceDB. Every
|
||||
recaller is replaced by a hand-rolled stub that returns a small
|
||||
candidate list; the manager's job is to:
|
||||
|
||||
* honour the ``owner_type`` hard partition,
|
||||
* run KEYWORD as sparse-only and leave ``atomic_facts`` empty,
|
||||
* run VECTOR as dense-only (and refuse when no embedding is wired),
|
||||
* let HYBRID run without an LLM by default; require LLM only when the
|
||||
caller sets ``enable_llm_rerank=True``,
|
||||
* refuse AGENTIC when reranker / LLM prerequisites are missing,
|
||||
* delegate AGENTIC to ``search_episodes_agentic`` and return its result.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import Any, ClassVar
|
||||
|
||||
import pytest
|
||||
from everalgo.types import Candidate, FactCandidate
|
||||
|
||||
from everos.memory.search.dto import SearchMethod, SearchRequest
|
||||
from everos.memory.search.manager import SearchManager
|
||||
|
||||
# ── Stubs ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _ts() -> _dt.datetime:
|
||||
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
|
||||
|
||||
|
||||
def _episode_row(
|
||||
eid: str, score: float = 0.8, memcell_id: str | None = None
|
||||
) -> Candidate:
|
||||
return Candidate(
|
||||
id=eid,
|
||||
score=score,
|
||||
source="keyword",
|
||||
metadata={
|
||||
"owner_id": "alice",
|
||||
"owner_type": "user",
|
||||
"session_id": "sess_a",
|
||||
"timestamp": _ts(),
|
||||
"sender_ids": ["alice"],
|
||||
"subject": f"subj {eid}",
|
||||
"summary": f"summary {eid}",
|
||||
"episode": f"body {eid}",
|
||||
"parent_id": memcell_id if memcell_id is not None else f"mc_{eid}",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _case_row(cid: str) -> Candidate:
|
||||
return Candidate(
|
||||
id=cid,
|
||||
score=0.7,
|
||||
source="keyword",
|
||||
metadata={
|
||||
"owner_id": "agent_a",
|
||||
"owner_type": "agent",
|
||||
"session_id": "sess_b",
|
||||
"timestamp": _ts(),
|
||||
"task_intent": f"intent {cid}",
|
||||
"approach": f"approach {cid}",
|
||||
"quality_score": 0.8,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _skill_row(sid: str) -> Candidate:
|
||||
return Candidate(
|
||||
id=sid,
|
||||
score=0.65,
|
||||
source="keyword",
|
||||
metadata={
|
||||
"owner_id": "agent_a",
|
||||
"owner_type": "agent",
|
||||
"name": f"skill_{sid}",
|
||||
"description": f"desc {sid}",
|
||||
"content": f"content {sid}",
|
||||
"confidence": 0.9,
|
||||
"maturity_score": 0.6,
|
||||
"source_case_ids": [],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class _StubEpisodeRecaller:
|
||||
kind: ClassVar[str] = "episode"
|
||||
everalgo_memory_type: ClassVar[str] = "episodic"
|
||||
text_field: ClassVar[str] = "episode"
|
||||
|
||||
def __init__(self, sparse: list[Candidate], dense: list[Candidate]) -> None:
|
||||
self._sparse = sparse
|
||||
self._dense = dense
|
||||
self.last_where: str | None = None
|
||||
|
||||
async def sparse_recall(
|
||||
self, query: str, where: str, *, limit: int
|
||||
) -> list[Candidate]:
|
||||
self.last_where = where
|
||||
return list(self._sparse[:limit])
|
||||
|
||||
async def dense_recall(
|
||||
self, vector: Sequence[float], where: str, *, limit: int
|
||||
) -> list[Candidate]:
|
||||
self.last_where = where
|
||||
return list(self._dense[:limit])
|
||||
|
||||
async def fetch_by_parent_ids(
|
||||
self, parent_ids: Sequence[str], where: str
|
||||
) -> list[Candidate]:
|
||||
# Index dense rows by their parent_id (memcell id) so the maxsim
|
||||
# path's reverse-resolve has something to return.
|
||||
by_parent = {str(c.metadata.get("parent_id", "")): c for c in self._dense}
|
||||
return [by_parent[p] for p in parent_ids if p in by_parent]
|
||||
|
||||
|
||||
class _StubAtomicFactRecaller:
|
||||
kind: ClassVar[str] = "atomic_fact"
|
||||
everalgo_memory_type: ClassVar[str] = "episodic"
|
||||
text_field: ClassVar[str] = "fact"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
facts_map: dict[str, list[FactCandidate]] | None = None,
|
||||
dense: list[Candidate] | None = None,
|
||||
) -> None:
|
||||
self._facts_map = facts_map or {}
|
||||
self._dense = dense or []
|
||||
|
||||
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return []
|
||||
|
||||
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._dense)
|
||||
|
||||
async def facts_for_episodes(
|
||||
self,
|
||||
ep_to_memcell: Mapping[str, str],
|
||||
where: str,
|
||||
*,
|
||||
per_episode: int,
|
||||
query_vector: Any = None,
|
||||
) -> dict[str, list[FactCandidate]]:
|
||||
# ``query_vector`` accepted to match the real recaller signature
|
||||
# Accepted to match the real recaller signature; stub doesn't use it.
|
||||
return {
|
||||
eid: self._facts_map.get(eid, [])[:per_episode] for eid in ep_to_memcell
|
||||
}
|
||||
|
||||
|
||||
class _StubAgentCaseRecaller:
|
||||
kind: ClassVar[str] = "agent_case"
|
||||
everalgo_memory_type: ClassVar[str] = "case"
|
||||
text_field: ClassVar[str] = "task_intent"
|
||||
|
||||
def __init__(self, sparse: list[Candidate], dense: list[Candidate]) -> None:
|
||||
self._sparse = sparse
|
||||
self._dense = dense
|
||||
|
||||
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._sparse)
|
||||
|
||||
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._dense)
|
||||
|
||||
|
||||
class _StubAgentSkillRecaller:
|
||||
kind: ClassVar[str] = "agent_skill"
|
||||
everalgo_memory_type: ClassVar[str] = "skill"
|
||||
text_field: ClassVar[str] = "description"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
sparse: list[Candidate],
|
||||
dense: list[Candidate],
|
||||
by_case: list[Candidate] | None = None,
|
||||
) -> None:
|
||||
self._sparse = sparse
|
||||
self._dense = dense
|
||||
# Bridge recall fixture: reverse-resolved skills (``fetch_by_case_ids``).
|
||||
# Default empty — only the bridge tests populate this.
|
||||
self._by_case = by_case or []
|
||||
|
||||
async def sparse_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._sparse)
|
||||
|
||||
async def dense_recall(self, *_: Any, **__: Any) -> list[Candidate]:
|
||||
return list(self._dense)
|
||||
|
||||
async def fetch_by_case_ids(
|
||||
self, case_ids: Sequence[str], where: str, *, limit: int
|
||||
) -> list[Candidate]:
|
||||
return list(self._by_case)
|
||||
|
||||
|
||||
class _StubProfileRecaller:
|
||||
async def fetch(self, owner_id: str) -> list:
|
||||
return []
|
||||
|
||||
|
||||
class _StubEmbedding:
|
||||
def __init__(self, dim: int = 4) -> None:
|
||||
self.dim = dim
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
return [0.0] * self.dim
|
||||
|
||||
async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
|
||||
return [[0.0] * self.dim for _ in texts]
|
||||
|
||||
|
||||
# ── Fixtures ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _build_manager(
|
||||
*,
|
||||
episode_sparse: list[Candidate] | None = None,
|
||||
episode_dense: list[Candidate] | None = None,
|
||||
case_sparse: list[Candidate] | None = None,
|
||||
case_dense: list[Candidate] | None = None,
|
||||
skill_sparse: list[Candidate] | None = None,
|
||||
skill_dense: list[Candidate] | None = None,
|
||||
skill_by_case: list[Candidate] | None = None,
|
||||
facts_map: dict[str, list[FactCandidate]] | None = None,
|
||||
atomic_fact_dense: list[Candidate] | None = None,
|
||||
embedding: _StubEmbedding | None = None,
|
||||
reranker: Any = None,
|
||||
llm_client: Any = None,
|
||||
) -> SearchManager:
|
||||
ep_recaller = _StubEpisodeRecaller(episode_sparse or [], episode_dense or [])
|
||||
return SearchManager(
|
||||
episode_recaller=ep_recaller,
|
||||
atomic_fact_recaller=_StubAtomicFactRecaller(facts_map, atomic_fact_dense),
|
||||
agent_case_recaller=_StubAgentCaseRecaller(case_sparse or [], case_dense or []),
|
||||
agent_skill_recaller=_StubAgentSkillRecaller(
|
||||
skill_sparse or [], skill_dense or [], skill_by_case
|
||||
),
|
||||
profile_recaller=_StubProfileRecaller(),
|
||||
embedding=embedding,
|
||||
reranker=reranker,
|
||||
llm_client=llm_client,
|
||||
)
|
||||
|
||||
|
||||
def _user_req(
|
||||
method: SearchMethod = SearchMethod.KEYWORD, **kwargs: Any
|
||||
) -> SearchRequest:
|
||||
return SearchRequest(user_id="alice", query="hi", method=method, **kwargs)
|
||||
|
||||
|
||||
def _agent_req(
|
||||
method: SearchMethod = SearchMethod.KEYWORD, **kwargs: Any
|
||||
) -> SearchRequest:
|
||||
return SearchRequest(agent_id="agent_a", query="hi", method=method, **kwargs)
|
||||
|
||||
|
||||
# ── KEYWORD: user owner ────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_user_keyword_returns_episodes_only() -> None:
|
||||
mgr = _build_manager(episode_sparse=[_episode_row("ep_1")])
|
||||
resp = await mgr.search(_user_req())
|
||||
assert len(resp.request_id) == 32 and all(
|
||||
c in "0123456789abcdef" for c in resp.request_id
|
||||
)
|
||||
assert len(resp.data.episodes) == 1
|
||||
assert resp.data.episodes[0].id == "ep_1"
|
||||
assert resp.data.episodes[0].user_id == "alice"
|
||||
assert resp.data.episodes[0].type == "Conversation"
|
||||
# Agent paths stay empty.
|
||||
assert resp.data.agent_cases == []
|
||||
assert resp.data.agent_skills == []
|
||||
assert resp.data.profiles == []
|
||||
|
||||
|
||||
async def test_user_keyword_leaves_atomic_facts_empty() -> None:
|
||||
"""KEYWORD never back-fills facts — only HYBRID produces relevance-scored facts.
|
||||
|
||||
Even if the facts repository would return rows for the matched
|
||||
episode, the keyword path must leave ``atomic_facts=[]``: there is
|
||||
no per-query score for those facts, so emitting them would muddy
|
||||
the contract (mirrors enterprise where event_log is a separate
|
||||
memory_type, not auto-attached to episodic results).
|
||||
"""
|
||||
fact = FactCandidate(
|
||||
id="f1",
|
||||
parent_episode_id="ep_1",
|
||||
score=0.0,
|
||||
metadata={"fact": "Alice prefers oat milk"},
|
||||
)
|
||||
mgr = _build_manager(
|
||||
episode_sparse=[_episode_row("ep_1")],
|
||||
facts_map={"ep_1": [fact]},
|
||||
)
|
||||
resp = await mgr.search(_user_req())
|
||||
ep = resp.data.episodes[0]
|
||||
assert ep.atomic_facts == []
|
||||
|
||||
|
||||
async def test_user_keyword_no_results() -> None:
|
||||
resp = await _build_manager().search(_user_req())
|
||||
assert resp.data.episodes == []
|
||||
|
||||
|
||||
async def test_user_keyword_filters_compile_pinned_owner() -> None:
|
||||
"""``compile_filters`` should pin owner_id / owner_type on the where."""
|
||||
recaller = _StubEpisodeRecaller([_episode_row("ep_1")], [])
|
||||
mgr = SearchManager(
|
||||
episode_recaller=recaller,
|
||||
atomic_fact_recaller=_StubAtomicFactRecaller(),
|
||||
agent_case_recaller=_StubAgentCaseRecaller([], []),
|
||||
agent_skill_recaller=_StubAgentSkillRecaller([], []),
|
||||
profile_recaller=_StubProfileRecaller(),
|
||||
embedding=None,
|
||||
reranker=None,
|
||||
llm_client=None,
|
||||
)
|
||||
await mgr.search(_user_req())
|
||||
assert recaller.last_where is not None
|
||||
assert "owner_id = 'alice'" in recaller.last_where
|
||||
assert "owner_type = 'user'" in recaller.last_where
|
||||
|
||||
|
||||
# ── VECTOR: requires embedding ────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_vector_method_requires_embedding() -> None:
|
||||
mgr = _build_manager() # embedding=None by default
|
||||
with pytest.raises(RuntimeError, match="embedding"):
|
||||
await mgr.search(_user_req(method=SearchMethod.VECTOR))
|
||||
|
||||
|
||||
async def test_vector_method_runs_dense_only_with_embedding() -> None:
|
||||
mgr = _build_manager(
|
||||
episode_sparse=[_episode_row("should_not_appear")],
|
||||
episode_dense=[_episode_row("ep_dense")],
|
||||
embedding=_StubEmbedding(),
|
||||
)
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR))
|
||||
assert [e.id for e in resp.data.episodes] == ["ep_dense"]
|
||||
|
||||
|
||||
async def test_vector_radius_filter_drops_below_threshold() -> None:
|
||||
mgr = _build_manager(
|
||||
episode_dense=[
|
||||
_episode_row("ep_low", score=0.3),
|
||||
_episode_row("ep_high", score=0.9),
|
||||
],
|
||||
embedding=_StubEmbedding(),
|
||||
)
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, radius=0.5))
|
||||
assert [e.id for e in resp.data.episodes] == ["ep_high"]
|
||||
|
||||
|
||||
async def test_unlimited_mode_applies_default_radius_for_vector() -> None:
|
||||
"""``top_k=-1`` without an explicit radius gets the project default 0.5.
|
||||
|
||||
Mirrors enterprise's auto-floor behaviour — unlimited mode must not
|
||||
return arbitrarily low-similarity tail.
|
||||
"""
|
||||
mgr = _build_manager(
|
||||
episode_dense=[
|
||||
_episode_row("ep_low", score=0.3), # below default 0.5 → dropped
|
||||
_episode_row("ep_mid", score=0.55), # above default → kept
|
||||
_episode_row("ep_high", score=0.9),
|
||||
],
|
||||
embedding=_StubEmbedding(),
|
||||
)
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=-1))
|
||||
assert [e.id for e in resp.data.episodes] == ["ep_mid", "ep_high"]
|
||||
|
||||
|
||||
async def test_unlimited_mode_explicit_radius_overrides_default() -> None:
|
||||
"""Caller-supplied radius (even ``0.0``) wins over the unlimited default."""
|
||||
mgr = _build_manager(
|
||||
episode_dense=[
|
||||
_episode_row("ep_low", score=0.2),
|
||||
_episode_row("ep_high", score=0.9),
|
||||
],
|
||||
embedding=_StubEmbedding(),
|
||||
)
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=-1, radius=0.1))
|
||||
# 0.1 threshold keeps both rows (the default 0.5 would have dropped ep_low).
|
||||
assert {e.id for e in resp.data.episodes} == {"ep_low", "ep_high"}
|
||||
|
||||
|
||||
async def test_normal_mode_keeps_full_pool_when_no_radius() -> None:
|
||||
"""``top_k > 0`` without a radius applies no threshold — truncation handles tail."""
|
||||
mgr = _build_manager(
|
||||
episode_dense=[
|
||||
_episode_row("ep_low", score=0.2),
|
||||
_episode_row("ep_high", score=0.9),
|
||||
],
|
||||
embedding=_StubEmbedding(),
|
||||
)
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=10))
|
||||
# No radius default in normal mode → both kept.
|
||||
assert {e.id for e in resp.data.episodes} == {"ep_low", "ep_high"}
|
||||
|
||||
|
||||
# ── VECTOR + maxsim_atomic strategy ─────────────────────────────────────
|
||||
|
||||
|
||||
def _atomic_fact_row(fid: str, *, parent_id: str, score: float) -> Candidate:
|
||||
"""Atomic-fact candidate emitted by ``AtomicFactRecaller.dense_recall``."""
|
||||
return Candidate(
|
||||
id=fid,
|
||||
score=score,
|
||||
source="vector",
|
||||
metadata={
|
||||
"owner_id": "alice",
|
||||
"owner_type": "user",
|
||||
"session_id": "sess_a",
|
||||
"timestamp": _ts(),
|
||||
"sender_ids": ["alice"],
|
||||
"parent_id": parent_id,
|
||||
"fact": f"fact {fid}",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
async def test_vector_maxsim_atomic_max_pools_facts_to_episodes(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""``vector_strategy=maxsim_atomic`` should ANN atomic_facts → max-pool by
|
||||
memcell parent → reverse-resolve to episode, ordering episodes by the
|
||||
per-memcell maximum fact score."""
|
||||
from everos.config.settings import load_settings
|
||||
|
||||
monkeypatch.setenv("EVEROS_SEARCH__VECTOR_STRATEGY", "maxsim_atomic")
|
||||
load_settings.cache_clear()
|
||||
# Two episodes; each has two atomic facts under it. The max fact score
|
||||
# per memcell is what should end up as the episode's score.
|
||||
mgr = _build_manager(
|
||||
episode_dense=[
|
||||
_episode_row("ep_A", memcell_id="mc_A"),
|
||||
_episode_row("ep_B", memcell_id="mc_B"),
|
||||
],
|
||||
atomic_fact_dense=[
|
||||
_atomic_fact_row("f_A1", parent_id="mc_A", score=0.95),
|
||||
_atomic_fact_row("f_A2", parent_id="mc_A", score=0.40),
|
||||
_atomic_fact_row("f_B1", parent_id="mc_B", score=0.75),
|
||||
],
|
||||
embedding=_StubEmbedding(),
|
||||
)
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=5))
|
||||
eps = resp.data.episodes
|
||||
# Both episodes returned, ordered by max-pool score desc.
|
||||
assert [e.id for e in eps] == ["ep_A", "ep_B"]
|
||||
assert eps[0].score == pytest.approx(0.95) # max(0.95, 0.40)
|
||||
assert eps[1].score == pytest.approx(0.75)
|
||||
|
||||
|
||||
async def test_vector_maxsim_atomic_returns_empty_when_no_facts(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""No fact recall → no memcells to score → empty episode list."""
|
||||
from everos.config.settings import load_settings
|
||||
|
||||
monkeypatch.setenv("EVEROS_SEARCH__VECTOR_STRATEGY", "maxsim_atomic")
|
||||
load_settings.cache_clear()
|
||||
mgr = _build_manager(
|
||||
episode_dense=[_episode_row("ep_A", memcell_id="mc_A")],
|
||||
atomic_fact_dense=[],
|
||||
embedding=_StubEmbedding(),
|
||||
)
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.VECTOR, top_k=5))
|
||||
assert resp.data.episodes == []
|
||||
|
||||
|
||||
# ── HYBRID / AGENTIC: prerequisite errors ──────────────────────────────
|
||||
|
||||
|
||||
async def test_hybrid_requires_embedding() -> None:
|
||||
mgr = _build_manager()
|
||||
with pytest.raises(RuntimeError, match="embedding"):
|
||||
await mgr.search(_user_req(method=SearchMethod.HYBRID))
|
||||
|
||||
|
||||
async def test_hybrid_does_not_require_llm_by_default() -> None:
|
||||
"""HYBRID no longer auto-pulls LLM. With enable_llm_rerank=False the
|
||||
fusion-only path (RRF / LR) should run without an LLM client."""
|
||||
mgr = _build_manager(embedding=_StubEmbedding())
|
||||
# Should not raise: no LLM needed when caller opts out of Phase-5 rerank.
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.HYBRID))
|
||||
assert resp.data.episodes == [] # empty stub recallers → empty result
|
||||
|
||||
|
||||
async def test_hybrid_requires_llm_when_enable_llm_rerank_true() -> None:
|
||||
"""Setting ``enable_llm_rerank=True`` makes the LLM mandatory."""
|
||||
mgr = _build_manager(embedding=_StubEmbedding())
|
||||
with pytest.raises(RuntimeError, match="enable_llm_rerank"):
|
||||
await mgr.search(_user_req(method=SearchMethod.HYBRID, enable_llm_rerank=True))
|
||||
|
||||
|
||||
async def test_user_hybrid_episode_fuses_and_evicts_facts() -> None:
|
||||
"""HYBRID episode path: hierarchy pipeline (RRF -> MaxSim -> merge -> eviction).
|
||||
|
||||
ep_1 has a fact scoring higher than the RRF score -> fact evicts episode.
|
||||
ep_2 has no facts -> episode emitted as-is.
|
||||
"""
|
||||
ep1 = _episode_row("ep_1", score=0.8, memcell_id="mc_1")
|
||||
ep2 = _episode_row("ep_2", score=0.7, memcell_id="mc_2")
|
||||
fact1 = FactCandidate(
|
||||
id="f1",
|
||||
parent_episode_id="ep_1",
|
||||
score=0.95,
|
||||
metadata={"fact": "Alice prefers oat milk"},
|
||||
)
|
||||
mgr = _build_manager(
|
||||
episode_sparse=[ep1, ep2],
|
||||
episode_dense=[ep1, ep2],
|
||||
facts_map={"ep_1": [fact1]},
|
||||
embedding=_StubEmbedding(),
|
||||
)
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.HYBRID, top_k=10))
|
||||
eps = resp.data.episodes
|
||||
assert len(eps) >= 1
|
||||
ep1_result = next((e for e in eps if e.id == "ep_1"), None)
|
||||
assert ep1_result is not None
|
||||
assert len(ep1_result.atomic_facts) == 1
|
||||
assert ep1_result.atomic_facts[0].id == "f1"
|
||||
|
||||
|
||||
async def test_agentic_requires_reranker_and_llm() -> None:
|
||||
mgr = _build_manager(embedding=_StubEmbedding())
|
||||
with pytest.raises(RuntimeError, match="rerank provider"):
|
||||
await mgr.search(_user_req(method=SearchMethod.AGENTIC))
|
||||
|
||||
|
||||
async def test_agent_hybrid_requires_reranker_without_llm_rerank() -> None:
|
||||
"""``owner_type='agent'`` + HYBRID + ``enable_llm_rerank=False`` reaches
|
||||
the skill cross-encoder lane (``skill_hybrid``: rrf → cross-encoder),
|
||||
so a missing rerank provider must fail-fast with a config hint rather
|
||||
than crash deep inside the rerank callback.
|
||||
"""
|
||||
mgr = _build_manager(embedding=_StubEmbedding())
|
||||
with pytest.raises(RuntimeError, match="rerank provider"):
|
||||
await mgr.search(_agent_req(method=SearchMethod.HYBRID))
|
||||
|
||||
|
||||
async def test_agent_hybrid_with_llm_rerank_does_not_need_reranker() -> None:
|
||||
"""The LLM-rerank lane skips the cross-encoder and dispatches through
|
||||
``arank`` instead, so a missing reranker is fine as long as the LLM
|
||||
client is configured. Empty stub recallers → empty result; the call
|
||||
must not raise on the reranker-absence path.
|
||||
"""
|
||||
mgr = _build_manager(embedding=_StubEmbedding(), llm_client=_StubLLM())
|
||||
resp = await mgr.search(
|
||||
_agent_req(method=SearchMethod.HYBRID, enable_llm_rerank=True)
|
||||
)
|
||||
assert resp.data.agent_skills == []
|
||||
assert resp.data.agent_cases == []
|
||||
|
||||
|
||||
class _StubReranker:
|
||||
"""Minimal reranker stub — returns trivial scores."""
|
||||
|
||||
async def rerank(self, query: str, documents: Sequence[str]) -> list[Any]:
|
||||
from everos.component.rerank.protocol import RerankResult
|
||||
|
||||
return [RerankResult(index=i, score=1.0) for i in range(len(documents))]
|
||||
|
||||
|
||||
class _StubLLM:
|
||||
"""Minimal LLM stub — satisfies protocol without making real calls."""
|
||||
|
||||
async def chat(self, *args: Any, **kwargs: Any) -> Any:
|
||||
return ""
|
||||
|
||||
|
||||
async def test_agentic_episode_delegates_to_search_episodes_agentic(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""AGENTIC method delegates to search_episodes_agentic and returns its result."""
|
||||
import datetime as _dt
|
||||
|
||||
from everos.memory.search.dto import SearchEpisodeItem
|
||||
|
||||
fake_result = [
|
||||
SearchEpisodeItem(
|
||||
id="ep_1",
|
||||
score=0.9,
|
||||
session_id="s",
|
||||
user_id="alice",
|
||||
timestamp=_dt.datetime(2026, 1, 1, tzinfo=_dt.UTC),
|
||||
sender_ids=["alice"],
|
||||
subject="s",
|
||||
summary="s",
|
||||
episode="body",
|
||||
type="Conversation",
|
||||
atomic_facts=[],
|
||||
)
|
||||
]
|
||||
|
||||
async def _fake_agentic(*args: Any, **kwargs: Any) -> list[SearchEpisodeItem]:
|
||||
return fake_result
|
||||
|
||||
monkeypatch.setattr(
|
||||
"everos.memory.search.manager.search_episodes_agentic", _fake_agentic
|
||||
)
|
||||
|
||||
mgr = _build_manager(
|
||||
embedding=_StubEmbedding(),
|
||||
reranker=_StubReranker(),
|
||||
llm_client=_StubLLM(),
|
||||
)
|
||||
resp = await mgr.search(_user_req(method=SearchMethod.AGENTIC))
|
||||
assert resp.data.episodes == fake_result
|
||||
|
||||
|
||||
# ── AGENT owner hard partition ─────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_agent_keyword_returns_cases_and_skills_only() -> None:
|
||||
mgr = _build_manager(
|
||||
case_sparse=[_case_row("c_1")],
|
||||
skill_sparse=[_skill_row("s_1")],
|
||||
)
|
||||
resp = await mgr.search(_agent_req())
|
||||
assert resp.data.episodes == []
|
||||
assert resp.data.profiles == []
|
||||
assert [c.id for c in resp.data.agent_cases] == ["c_1"]
|
||||
assert [s.id for s in resp.data.agent_skills] == ["s_1"]
|
||||
|
||||
|
||||
async def test_agent_owner_ignores_include_profile() -> None:
|
||||
"""Profile is user-only at this revision."""
|
||||
mgr = _build_manager()
|
||||
resp = await mgr.search(_agent_req(include_profile=True))
|
||||
assert resp.data.profiles == []
|
||||
|
||||
|
||||
# ── Top-k behaviour ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_top_k_truncates_results() -> None:
|
||||
rows = [_episode_row(f"ep_{i}", score=1.0 - i * 0.01) for i in range(10)]
|
||||
mgr = _build_manager(episode_sparse=rows)
|
||||
resp = await mgr.search(_user_req(top_k=3))
|
||||
assert [e.id for e in resp.data.episodes] == ["ep_0", "ep_1", "ep_2"]
|
||||
|
||||
|
||||
async def test_top_k_minus_one_caps_at_100() -> None:
|
||||
rows = [_episode_row(f"ep_{i}") for i in range(120)]
|
||||
mgr = _build_manager(episode_sparse=rows)
|
||||
resp = await mgr.search(_user_req(top_k=-1))
|
||||
assert len(resp.data.episodes) == 100
|
||||
|
||||
|
||||
# ── AGENTIC agent_case / agent_skill delegation ───────────────────────────
|
||||
|
||||
|
||||
async def test_agentic_agent_cases_delegates_to_search_agent_cases_agentic(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""AGENTIC method for agent owner delegates to search_agent_cases_agentic."""
|
||||
import datetime as _dt
|
||||
|
||||
from everos.memory.search.dto import SearchAgentCaseItem
|
||||
|
||||
fake_cases = [
|
||||
SearchAgentCaseItem(
|
||||
id="c_1",
|
||||
agent_id="agent_a",
|
||||
session_id="sess_b",
|
||||
timestamp=_dt.datetime(2026, 1, 1, tzinfo=_dt.UTC),
|
||||
task_intent="handle login",
|
||||
approach="retry with backoff",
|
||||
quality_score=0.9,
|
||||
score=0.85,
|
||||
)
|
||||
]
|
||||
|
||||
async def _fake_cases_agentic(
|
||||
*args: Any, **kwargs: Any
|
||||
) -> list[SearchAgentCaseItem]:
|
||||
return fake_cases
|
||||
|
||||
monkeypatch.setattr(
|
||||
"everos.memory.search.manager.search_agent_cases_agentic",
|
||||
_fake_cases_agentic,
|
||||
)
|
||||
|
||||
mgr = _build_manager(
|
||||
embedding=_StubEmbedding(),
|
||||
reranker=_StubReranker(),
|
||||
llm_client=_StubLLM(),
|
||||
)
|
||||
resp = await mgr.search(_agent_req(method=SearchMethod.AGENTIC))
|
||||
assert resp.data.agent_cases == fake_cases
|
||||
|
||||
|
||||
async def test_agentic_agent_skills_delegates_to_search_agent_skills_agentic(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""AGENTIC method for agent owner delegates to search_agent_skills_agentic."""
|
||||
|
||||
from everos.memory.search.dto import SearchAgentSkillItem
|
||||
|
||||
fake_skills = [
|
||||
SearchAgentSkillItem(
|
||||
id="s_1",
|
||||
agent_id="agent_a",
|
||||
name="auth_refresh",
|
||||
description="Refreshes auth tokens",
|
||||
content="Retry with new token",
|
||||
confidence=0.9,
|
||||
maturity_score=0.7,
|
||||
source_case_ids=[],
|
||||
score=0.8,
|
||||
)
|
||||
]
|
||||
|
||||
async def _fake_skills_agentic(
|
||||
*args: Any, **kwargs: Any
|
||||
) -> list[SearchAgentSkillItem]:
|
||||
return fake_skills
|
||||
|
||||
monkeypatch.setattr(
|
||||
"everos.memory.search.manager.search_agent_skills_agentic",
|
||||
_fake_skills_agentic,
|
||||
)
|
||||
|
||||
mgr = _build_manager(
|
||||
embedding=_StubEmbedding(),
|
||||
reranker=_StubReranker(),
|
||||
llm_client=_StubLLM(),
|
||||
)
|
||||
resp = await mgr.search(_agent_req(method=SearchMethod.AGENTIC))
|
||||
assert resp.data.agent_skills == fake_skills
|
||||
|
||||
|
||||
# ── _merge_by_id_max / _case_bridged_skills helpers ──────────────────────
|
||||
|
||||
|
||||
def test_merge_by_id_max_keeps_higher_score_on_collision() -> None:
|
||||
"""Same-id collision → keep the higher score; non-colliding rows are
|
||||
unioned. Used to fold bridge candidates into the direct dense pool.
|
||||
"""
|
||||
from everos.memory.search.manager import _merge_by_id_max
|
||||
|
||||
primary = [
|
||||
Candidate(id="s1", score=0.5, source="vector", metadata={"src": "primary"}),
|
||||
Candidate(id="s2", score=0.7, source="vector", metadata={"src": "primary"}),
|
||||
]
|
||||
extra = [
|
||||
Candidate(id="s1", score=0.9, source="vector", metadata={"src": "bridge"}),
|
||||
Candidate(id="s2", score=0.3, source="vector", metadata={"src": "bridge"}),
|
||||
Candidate(id="s3", score=0.6, source="vector", metadata={"src": "bridge"}),
|
||||
]
|
||||
merged = {c.id: c for c in _merge_by_id_max(primary, extra)}
|
||||
# s1 collision → bridge wins (0.9 > 0.5); s2 collision → primary wins
|
||||
# (0.7 > 0.3); s3 fresh-from-bridge is added.
|
||||
assert merged["s1"].score == 0.9
|
||||
assert merged["s1"].metadata["src"] == "bridge"
|
||||
assert merged["s2"].score == 0.7
|
||||
assert merged["s2"].metadata["src"] == "primary"
|
||||
assert merged["s3"].score == 0.6
|
||||
|
||||
|
||||
async def test_case_bridged_skills_max_pools_score_across_source_cases() -> None:
|
||||
"""Each bridged skill inherits the highest score among its matched
|
||||
source cases (mirrors the ``maxsim_atomic`` fact→episode pooling).
|
||||
Source cases not present in the bridge pool are ignored.
|
||||
"""
|
||||
skill_row = Candidate(
|
||||
id="agent_a_skill_x",
|
||||
score=0.0, # bridge ignores the recaller-side score
|
||||
source="vector",
|
||||
metadata={"source_case_ids": ["c1", "c2", "c3"], "name": "x"},
|
||||
)
|
||||
mgr = _build_manager(skill_by_case=[skill_row])
|
||||
bridge_cases = [
|
||||
Candidate(id="c1", score=0.4, source="vector", metadata={}),
|
||||
Candidate(id="c2", score=0.9, source="vector", metadata={}), # max wins
|
||||
Candidate(id="c_other", score=0.7, source="vector", metadata={}),
|
||||
]
|
||||
bridged = await mgr._case_bridged_skills(bridge_cases, where="", top_k=5)
|
||||
assert len(bridged) == 1
|
||||
assert bridged[0].id == "agent_a_skill_x"
|
||||
# c1=0.4 and c2=0.9 are in the bridge pool; c3 is not → max-pool == 0.9.
|
||||
assert bridged[0].score == pytest.approx(0.9)
|
||||
# Metadata (incl. ``source_case_ids``) rides through so downstream
|
||||
# shaping doesn't need a second fetch.
|
||||
assert bridged[0].metadata["source_case_ids"] == ["c1", "c2", "c3"]
|
||||
|
||||
|
||||
async def test_case_bridged_skills_returns_empty_for_none_or_empty_input() -> None:
|
||||
"""No bridge cases ⇒ no bridge recall (skip the reverse fetch entirely).
|
||||
This is the cross-encoder lane / KEYWORD / VECTOR contract.
|
||||
"""
|
||||
mgr = _build_manager(skill_by_case=[_skill_row("s1")]) # noise the stub
|
||||
assert await mgr._case_bridged_skills(None, where="", top_k=5) == []
|
||||
assert await mgr._case_bridged_skills([], where="", top_k=5) == []
|
||||
|
||||
|
||||
# ── Agent HYBRID lane selection ──────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_agent_hybrid_no_llm_rerank_runs_cross_encoder_lane(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""``enable_llm_rerank=False`` for agent HYBRID must dispatch through
|
||||
``search_agent_skills_hybrid`` (rrf → cross-encoder lane) with the
|
||||
configured reranker, not through generic ``arank``.
|
||||
"""
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
async def _fake_hybrid(
|
||||
query: str,
|
||||
*,
|
||||
sparse: list[Candidate],
|
||||
dense: list[Candidate],
|
||||
reranker: Any,
|
||||
top_k: int,
|
||||
) -> list:
|
||||
captured.update(
|
||||
query=query, sparse=sparse, dense=dense, reranker=reranker, top_k=top_k
|
||||
)
|
||||
return []
|
||||
|
||||
monkeypatch.setattr(
|
||||
"everos.memory.search.manager.search_agent_skills_hybrid", _fake_hybrid
|
||||
)
|
||||
stub_reranker = _StubReranker()
|
||||
mgr = _build_manager(embedding=_StubEmbedding(), reranker=stub_reranker)
|
||||
await mgr.search(_agent_req(method=SearchMethod.HYBRID))
|
||||
|
||||
assert captured["query"] == "hi"
|
||||
# Manager forwards its configured reranker to the cross-encoder lane.
|
||||
assert captured["reranker"] is stub_reranker
|
||||
# Agent kinds cap unlimited-mode top_k at _AGENT_TOP_K_CAP (10).
|
||||
assert captured["top_k"] == 10
|
||||
|
||||
|
||||
async def test_agent_hybrid_llm_rerank_dispatches_arank_for_case_then_skill(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""LLM rerank lane: ``_search_cases_and_skills`` runs serially —
|
||||
``arank`` is called once with ``memory_type="case"`` and once with
|
||||
``memory_type="skill"``, both with ``enable_rerank=True`` + the LLM
|
||||
client. Order matters: the case call must precede the skill call so
|
||||
its results can feed the bridge.
|
||||
"""
|
||||
from everalgo.types import RankOutput
|
||||
|
||||
calls: list[tuple[str, dict[str, Any]]] = []
|
||||
|
||||
async def _fake_arank(rank_input: Any, **kwargs: Any) -> RankOutput:
|
||||
calls.append((rank_input.memory_type, kwargs))
|
||||
return RankOutput(items=[], metadata={})
|
||||
|
||||
monkeypatch.setattr("everos.memory.search.manager.arank", _fake_arank)
|
||||
mgr = _build_manager(embedding=_StubEmbedding(), llm_client=_StubLLM())
|
||||
await mgr.search(_agent_req(method=SearchMethod.HYBRID, enable_llm_rerank=True))
|
||||
|
||||
# Two dispatches in the documented serial order.
|
||||
assert [c[0] for c in calls] == ["case", "skill"]
|
||||
# Both runs opt into rerank with the LLM client wired in.
|
||||
for _mt, kw in calls:
|
||||
assert kw["enable_rerank"] is True
|
||||
assert kw["llm"] is mgr._llm
|
||||
assert kw["rerank_top_k"] == 10 # _AGENT_TOP_K_CAP
|
||||
|
||||
|
||||
async def test_agent_hybrid_llm_rerank_merges_bridged_skills_into_dense_pool(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""The bridge must surface into the skill dispatch: skills resolved
|
||||
by ``fetch_by_case_ids`` are max-pooled into the dense candidates that
|
||||
``arank`` sees on the second call, while the direct skill recall pool
|
||||
is preserved.
|
||||
"""
|
||||
from everalgo.types import RankOutput, ScoredItem
|
||||
|
||||
case_result = ScoredItem(
|
||||
id="agent_a_c1",
|
||||
score=0.85,
|
||||
item_type="case",
|
||||
# Shaper requires owner_type="agent" + timestamp + intent/approach;
|
||||
# otherwise the case is dropped and bridge_cases comes back empty.
|
||||
metadata={
|
||||
"owner_id": "agent_a",
|
||||
"owner_type": "agent",
|
||||
"session_id": "sess_b",
|
||||
"timestamp": _ts(),
|
||||
"task_intent": "intent c1",
|
||||
"approach": "approach c1",
|
||||
"quality_score": 0.8,
|
||||
},
|
||||
)
|
||||
skill_direct = _skill_row("s_direct")
|
||||
skill_bridged = Candidate(
|
||||
id="s_bridged",
|
||||
score=0.0,
|
||||
source="vector",
|
||||
metadata={"source_case_ids": ["agent_a_c1"], "name": "s_bridged"},
|
||||
)
|
||||
|
||||
seen_skill_dense: dict[str, list[Candidate]] = {}
|
||||
|
||||
async def _fake_arank(rank_input: Any, **_: Any) -> RankOutput:
|
||||
if rank_input.memory_type == "case":
|
||||
return RankOutput(items=[case_result], metadata={})
|
||||
# skill call — capture the merged dense pool the manager built.
|
||||
seen_skill_dense["dense"] = list(rank_input.dense_candidates)
|
||||
return RankOutput(items=[], metadata={})
|
||||
|
||||
monkeypatch.setattr("everos.memory.search.manager.arank", _fake_arank)
|
||||
mgr = _build_manager(
|
||||
embedding=_StubEmbedding(),
|
||||
llm_client=_StubLLM(),
|
||||
skill_sparse=[],
|
||||
skill_dense=[skill_direct],
|
||||
skill_by_case=[skill_bridged],
|
||||
)
|
||||
await mgr.search(_agent_req(method=SearchMethod.HYBRID, enable_llm_rerank=True))
|
||||
|
||||
dense_ids = {c.id for c in seen_skill_dense["dense"]}
|
||||
# Direct dense recall is preserved AND the case-bridged skill is unioned.
|
||||
assert dense_ids == {"s_direct", "s_bridged"}
|
||||
# The bridged skill inherits the matched case's score (0.85 from c1).
|
||||
by_id = {c.id: c for c in seen_skill_dense["dense"]}
|
||||
assert by_id["s_bridged"].score == pytest.approx(0.85)
|
||||
145
tests/unit/test_memory/test_search/test_recall_agent_skill.py
Normal file
145
tests/unit/test_memory/test_search/test_recall_agent_skill.py
Normal file
@ -0,0 +1,145 @@
|
||||
"""Real-LanceDB tests for ``AgentSkillRecaller.fetch_by_case_ids``.
|
||||
|
||||
The case→skill bridge reverse-resolves skills by ``source_case_ids``
|
||||
membership using DataFusion's ``array_has`` on a ``list<utf8>`` column.
|
||||
These tests exercise the actual SQL ``where`` predicate (no recaller
|
||||
stubs):
|
||||
|
||||
* OR-composition over multiple case ids,
|
||||
* hits respect the partition filter (``where`` passed by the caller),
|
||||
* empty case-id input short-circuits without a LanceDB call,
|
||||
* case ids containing single quotes round-trip safely via the ``_q``
|
||||
escaper.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.component.tokenizer import Tokenizer
|
||||
from everos.infra.persistence.lancedb import (
|
||||
AgentSkill as LanceAgentSkill,
|
||||
)
|
||||
from everos.infra.persistence.lancedb import (
|
||||
agent_skill_repo,
|
||||
lancedb_manager,
|
||||
)
|
||||
from everos.memory.search.recall.agent_skill import AgentSkillRecaller
|
||||
from everos.memory.search.recall.base import RecallerDeps
|
||||
|
||||
|
||||
class _WhitespaceTokenizer(Tokenizer):
|
||||
"""Bridge reverse-fetch never tokenises; satisfy the deps contract."""
|
||||
|
||||
def tokenize(self, text: str) -> list[str]:
|
||||
return text.split()
|
||||
|
||||
|
||||
def _skill_row(
|
||||
*,
|
||||
name: str,
|
||||
owner_id: str,
|
||||
source_case_ids: list[str],
|
||||
) -> LanceAgentSkill:
|
||||
return LanceAgentSkill(
|
||||
id=f"{owner_id}_{name}",
|
||||
owner_id=owner_id,
|
||||
owner_type="agent",
|
||||
name=name,
|
||||
description=f"desc {name}",
|
||||
description_tokens=f"desc {name}",
|
||||
content=f"body of {name}",
|
||||
content_tokens=f"body of {name}",
|
||||
confidence=0.7,
|
||||
maturity_score=0.6,
|
||||
source_case_ids=source_case_ids,
|
||||
cluster_id=None,
|
||||
md_path=f"agents/{owner_id}/skills/{name}/SKILL.md",
|
||||
content_sha256="x" * 64,
|
||||
vector=[0.0] * 1024,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
"""Isolate LanceDB under tmp memory root per test."""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
lancedb_manager._conn = None
|
||||
lancedb_manager._tables.clear()
|
||||
yield
|
||||
await lancedb_manager.dispose_connection()
|
||||
|
||||
|
||||
def _recaller() -> AgentSkillRecaller:
|
||||
return AgentSkillRecaller(RecallerDeps(tokenizer=_WhitespaceTokenizer()))
|
||||
|
||||
|
||||
_OWNER_WHERE = "owner_id = 'agt' AND owner_type = 'agent'"
|
||||
|
||||
|
||||
async def test_fetch_by_case_ids_matches_any_lineage_case() -> None:
|
||||
"""OR over case ids: a skill surfaces when its ``source_case_ids``
|
||||
contains at least one queried case."""
|
||||
await agent_skill_repo.upsert(
|
||||
[
|
||||
_skill_row(name="s1", owner_id="agt", source_case_ids=["c_a", "c_b"]),
|
||||
_skill_row(name="s2", owner_id="agt", source_case_ids=["c_c"]),
|
||||
_skill_row(name="s3", owner_id="agt", source_case_ids=["c_d"]),
|
||||
]
|
||||
)
|
||||
|
||||
got = await _recaller().fetch_by_case_ids(["c_a", "c_c"], _OWNER_WHERE, limit=10)
|
||||
|
||||
assert sorted(c.id for c in got) == ["agt_s1", "agt_s2"]
|
||||
|
||||
|
||||
async def test_fetch_by_case_ids_respects_owner_partition() -> None:
|
||||
"""The ``where`` clause is AND-composed with ``array_has(...)`` — a
|
||||
skill in a different owner partition must not leak through."""
|
||||
await agent_skill_repo.upsert(
|
||||
[
|
||||
_skill_row(name="s1", owner_id="agt", source_case_ids=["c_a"]),
|
||||
_skill_row(name="s1", owner_id="other", source_case_ids=["c_a"]),
|
||||
]
|
||||
)
|
||||
|
||||
got = await _recaller().fetch_by_case_ids(["c_a"], _OWNER_WHERE, limit=10)
|
||||
|
||||
assert [c.id for c in got] == ["agt_s1"]
|
||||
|
||||
|
||||
async def test_fetch_by_case_ids_returns_empty_for_no_ids() -> None:
|
||||
"""Empty input short-circuits — no LanceDB query is issued."""
|
||||
got = await _recaller().fetch_by_case_ids([], _OWNER_WHERE, limit=10)
|
||||
assert got == []
|
||||
|
||||
|
||||
async def test_fetch_by_case_ids_escapes_single_quotes() -> None:
|
||||
"""A case id with a single quote must not break the SQL literal.
|
||||
|
||||
The ``_q`` escaper turns ``'`` into ``''`` (SQL standard); without it
|
||||
the where-clause would close the string literal prematurely.
|
||||
"""
|
||||
quoted_id = "ac_o'brien_0001"
|
||||
await agent_skill_repo.upsert(
|
||||
[_skill_row(name="s1", owner_id="agt", source_case_ids=[quoted_id])]
|
||||
)
|
||||
|
||||
got = await _recaller().fetch_by_case_ids([quoted_id], _OWNER_WHERE, limit=10)
|
||||
|
||||
assert [c.id for c in got] == ["agt_s1"]
|
||||
|
||||
|
||||
async def test_fetch_by_case_ids_carries_source_case_ids_in_metadata() -> None:
|
||||
"""The full ``source_case_ids`` list must ride back in metadata so the
|
||||
manager's max-pool can score against the caller's case_score map."""
|
||||
await agent_skill_repo.upsert(
|
||||
[_skill_row(name="s1", owner_id="agt", source_case_ids=["c_a", "c_b", "c_c"])]
|
||||
)
|
||||
|
||||
got = await _recaller().fetch_by_case_ids(["c_a"], _OWNER_WHERE, limit=10)
|
||||
|
||||
assert len(got) == 1
|
||||
assert sorted(got[0].metadata["source_case_ids"]) == ["c_a", "c_b", "c_c"]
|
||||
264
tests/unit/test_memory/test_search/test_recall_atomic_fact.py
Normal file
264
tests/unit/test_memory/test_search/test_recall_atomic_fact.py
Normal file
@ -0,0 +1,264 @@
|
||||
"""Real-LanceDB tests for ``AtomicFactRecaller.facts_for_episodes``.
|
||||
|
||||
The MRAG bridge is the only path that links facts back to episodes, and
|
||||
the previous ``parent_type='episode' AND parent_id IN (episode_ids)``
|
||||
query never matched: cascade writes facts with
|
||||
``parent_type='memcell'``, ``parent_id=memcell_id``. The fixed version
|
||||
takes an ``episode → memcell`` map from the caller, queries by the
|
||||
deduped memcell set, and re-buckets results under every episode that
|
||||
shares each memcell.
|
||||
|
||||
These tests exercise the real LanceDB query path (no recaller stubs):
|
||||
- shared memcell → fact appears under both episodes,
|
||||
- distinct memcells → facts bucket exclusively to their owning episode,
|
||||
- empty / unknown memcells → empty result, no LanceDB call surprise.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.component.tokenizer import Tokenizer
|
||||
from everos.infra.persistence.lancedb import (
|
||||
AtomicFact,
|
||||
ParentType,
|
||||
atomic_fact_repo,
|
||||
lancedb_manager,
|
||||
)
|
||||
from everos.memory.search.recall.atomic_fact import AtomicFactRecaller
|
||||
from everos.memory.search.recall.base import RecallerDeps
|
||||
|
||||
|
||||
class _WhitespaceTokenizer(Tokenizer):
|
||||
"""Trivial tokenizer — the bridge doesn't touch text tokenisation."""
|
||||
|
||||
def tokenize(self, text: str) -> list[str]:
|
||||
return text.split()
|
||||
|
||||
|
||||
def _ts() -> _dt.datetime:
|
||||
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
|
||||
|
||||
|
||||
def _fact_row(
|
||||
*,
|
||||
fid: str,
|
||||
memcell_id: str,
|
||||
fact: str,
|
||||
owner_id: str = "alice",
|
||||
) -> AtomicFact:
|
||||
return AtomicFact(
|
||||
id=fid,
|
||||
entry_id=fid.split("_", 1)[1] if "_" in fid else fid,
|
||||
owner_id=owner_id,
|
||||
owner_type="user",
|
||||
session_id="sess_1",
|
||||
timestamp=_ts(),
|
||||
parent_type=ParentType.MEMCELL.value,
|
||||
parent_id=memcell_id,
|
||||
sender_ids=[owner_id],
|
||||
fact=fact,
|
||||
fact_tokens=fact,
|
||||
md_path=f"users/{owner_id}/.atomic_facts/atomic_fact-2026-01-01.md",
|
||||
content_sha256="x" * 64,
|
||||
vector=[0.0] * 1024,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
"""Isolate LanceDB to a tmp memory root per test."""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
lancedb_manager._conn = None
|
||||
lancedb_manager._tables.clear()
|
||||
yield
|
||||
await lancedb_manager.dispose_connection()
|
||||
|
||||
|
||||
def _recaller() -> AtomicFactRecaller:
|
||||
return AtomicFactRecaller(RecallerDeps(tokenizer=_WhitespaceTokenizer()))
|
||||
|
||||
|
||||
async def test_facts_for_episodes_buckets_by_shared_memcell() -> None:
|
||||
"""Two episodes sharing one memcell both see the same fact pool.
|
||||
|
||||
Episode-level fan-out (Episode pipeline runs once per cell but emits
|
||||
one Episode per user sender) gives multiple LanceDB episode rows
|
||||
pointing at the same memcell. The bridge must surface every fact
|
||||
that hangs off that memcell under both episode ids.
|
||||
"""
|
||||
await atomic_fact_repo.upsert(
|
||||
[
|
||||
_fact_row(fid="alice_af_1", memcell_id="mc_shared", fact="likes hiking"),
|
||||
_fact_row(fid="alice_af_2", memcell_id="mc_shared", fact="lives in tokyo"),
|
||||
_fact_row(fid="alice_af_3", memcell_id="mc_other", fact="prefers oat milk"),
|
||||
]
|
||||
)
|
||||
|
||||
ep_to_memcell = {
|
||||
"alice_ep_a": "mc_shared",
|
||||
"alice_ep_b": "mc_shared",
|
||||
"alice_ep_c": "mc_other",
|
||||
}
|
||||
where = "owner_id = 'alice' AND owner_type = 'user'"
|
||||
out = await _recaller().facts_for_episodes(ep_to_memcell, where, per_episode=10)
|
||||
|
||||
assert sorted(out.keys()) == ["alice_ep_a", "alice_ep_b", "alice_ep_c"]
|
||||
assert sorted(f.id for f in out["alice_ep_a"]) == ["alice_af_1", "alice_af_2"]
|
||||
assert sorted(f.id for f in out["alice_ep_b"]) == ["alice_af_1", "alice_af_2"]
|
||||
assert [f.id for f in out["alice_ep_c"]] == ["alice_af_3"]
|
||||
# parent_episode_id is the *bucket* episode, not the underlying memcell:
|
||||
# the same fact_1 surfaces twice with different parent_episode_id values.
|
||||
fact1_in_a = next(f for f in out["alice_ep_a"] if f.id == "alice_af_1")
|
||||
fact1_in_b = next(f for f in out["alice_ep_b"] if f.id == "alice_af_1")
|
||||
assert fact1_in_a.parent_episode_id == "alice_ep_a"
|
||||
assert fact1_in_b.parent_episode_id == "alice_ep_b"
|
||||
|
||||
|
||||
async def test_facts_for_episodes_returns_empty_for_no_episodes() -> None:
|
||||
out = await _recaller().facts_for_episodes({}, "owner_id = 'alice'", per_episode=10)
|
||||
assert out == {}
|
||||
|
||||
|
||||
async def test_facts_for_episodes_skips_unknown_memcells() -> None:
|
||||
"""Episodes whose memcell has no facts simply don't appear in the result."""
|
||||
await atomic_fact_repo.upsert(
|
||||
[_fact_row(fid="alice_af_1", memcell_id="mc_a", fact="hello")]
|
||||
)
|
||||
|
||||
out = await _recaller().facts_for_episodes(
|
||||
{"alice_ep_a": "mc_a", "alice_ep_b": "mc_missing"},
|
||||
"owner_id = 'alice' AND owner_type = 'user'",
|
||||
per_episode=10,
|
||||
)
|
||||
assert "alice_ep_a" in out
|
||||
assert "alice_ep_b" not in out
|
||||
assert [f.id for f in out["alice_ep_a"]] == ["alice_af_1"]
|
||||
|
||||
|
||||
async def test_facts_for_episodes_filters_by_where_clause() -> None:
|
||||
"""The caller's where clause is preserved (e.g. owner pinning)."""
|
||||
await atomic_fact_repo.upsert(
|
||||
[
|
||||
_fact_row(
|
||||
fid="alice_af_1",
|
||||
memcell_id="mc_a",
|
||||
fact="alice fact",
|
||||
owner_id="alice",
|
||||
),
|
||||
_fact_row(
|
||||
fid="bob_af_1",
|
||||
memcell_id="mc_a",
|
||||
fact="bob fact",
|
||||
owner_id="bob",
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
out = await _recaller().facts_for_episodes(
|
||||
{"alice_ep_a": "mc_a"},
|
||||
"owner_id = 'alice' AND owner_type = 'user'",
|
||||
per_episode=10,
|
||||
)
|
||||
assert [f.id for f in out["alice_ep_a"]] == ["alice_af_1"]
|
||||
|
||||
|
||||
async def test_facts_for_episodes_drops_empty_memcell_ids() -> None:
|
||||
"""Episodes whose parent_id is missing (empty string) are dropped silently.
|
||||
|
||||
Real-world cause: a candidate row that lost its ``parent_id`` (data
|
||||
corruption, manual edit). The bridge must not crash and must not
|
||||
emit ``parent_id IN ('')`` — which would match every empty-string
|
||||
row in the table.
|
||||
"""
|
||||
await atomic_fact_repo.upsert(
|
||||
[_fact_row(fid="alice_af_1", memcell_id="", fact="orphan fact")]
|
||||
)
|
||||
|
||||
out = await _recaller().facts_for_episodes(
|
||||
{"alice_ep_a": ""},
|
||||
"owner_id = 'alice' AND owner_type = 'user'",
|
||||
per_episode=10,
|
||||
)
|
||||
assert out == {}
|
||||
|
||||
|
||||
# ── MRAG fact-level scoring (regression for query_vector handling) ─────
|
||||
|
||||
|
||||
def _unit_vector(direction: int, dim: int = 1024) -> list[float]:
|
||||
"""Return a unit vector with 1.0 at ``direction`` axis, 0 elsewhere.
|
||||
|
||||
Used to build deterministic cosine relationships in the tests below:
|
||||
same direction → distance 0 (score 1.0); orthogonal → distance 1
|
||||
(score 0.0). The ``vector`` field on AtomicFact requires 1024-dim,
|
||||
so any test that goes through ``.nearest_to`` needs full-width.
|
||||
"""
|
||||
out = [0.0] * dim
|
||||
out[direction] = 1.0
|
||||
return out
|
||||
|
||||
|
||||
async def test_facts_for_episodes_assigns_real_cosine_score_with_query_vector() -> None:
|
||||
"""Regression: ``query_vector`` triggers cosine ANN, not flat scan.
|
||||
|
||||
Pre-fix, ``facts_for_episodes`` only ran ``where parent_id IN (...)``
|
||||
and emitted every fact with ``score=0.0`` — the MRAG fact-level
|
||||
ranking collapsed to insertion order. Post-fix, ``query_vector``
|
||||
flows into ``.nearest_to(...).distance_type('cosine')`` and each
|
||||
fact lands with its real query↔fact relevance score.
|
||||
|
||||
Setup:
|
||||
- fact A's vector = unit on axis 0 (same direction as the query) →
|
||||
cosine distance 0 → score ≈ 1.0.
|
||||
- fact B's vector = unit on axis 1 (orthogonal to the query) →
|
||||
cosine distance 1 → score ≈ 0.0.
|
||||
|
||||
Assertion: A ranks first AND its score > B's score AND both are
|
||||
non-zero-distinguishable (catches the old hardcoded ``0.0`` bug).
|
||||
"""
|
||||
row_a = _fact_row(fid="alice_af_1", memcell_id="mc_shared", fact="close fact")
|
||||
row_a.vector = _unit_vector(0)
|
||||
row_b = _fact_row(fid="alice_af_2", memcell_id="mc_shared", fact="far fact")
|
||||
row_b.vector = _unit_vector(1)
|
||||
await atomic_fact_repo.upsert([row_a, row_b])
|
||||
|
||||
out = await _recaller().facts_for_episodes(
|
||||
{"alice_ep_a": "mc_shared"},
|
||||
"owner_id = 'alice' AND owner_type = 'user'",
|
||||
per_episode=10,
|
||||
query_vector=_unit_vector(0),
|
||||
)
|
||||
|
||||
facts = out["alice_ep_a"]
|
||||
assert [f.id for f in facts] == ["alice_af_1", "alice_af_2"], (
|
||||
"facts must be ordered by cosine distance ascending (closest first)"
|
||||
)
|
||||
assert facts[0].score > facts[1].score, "real cosine scoring must differentiate"
|
||||
assert facts[0].score > 0.5, "near-identical vectors should score close to 1"
|
||||
assert facts[1].score < 0.5, "orthogonal vectors should score close to 0"
|
||||
|
||||
|
||||
async def test_facts_for_episodes_score_zero_without_query_vector() -> None:
|
||||
"""Backward-compat: omitting ``query_vector`` falls back to flat scan.
|
||||
|
||||
Callers that don't need fact-level relevance (e.g. KV-style fetch
|
||||
where the parent ranking already encodes the signal) keep the old
|
||||
``score=0.0`` semantics. Documents the explicit contract so the
|
||||
fallback path is intentional, not an oversight.
|
||||
"""
|
||||
row = _fact_row(fid="alice_af_1", memcell_id="mc_a", fact="anything")
|
||||
row.vector = _unit_vector(0)
|
||||
await atomic_fact_repo.upsert([row])
|
||||
|
||||
out = await _recaller().facts_for_episodes(
|
||||
{"alice_ep_a": "mc_a"},
|
||||
"owner_id = 'alice' AND owner_type = 'user'",
|
||||
per_episode=10,
|
||||
# no query_vector
|
||||
)
|
||||
|
||||
assert out["alice_ep_a"][0].score == 0.0
|
||||
108
tests/unit/test_memory/test_search/test_recall_episode.py
Normal file
108
tests/unit/test_memory/test_search/test_recall_episode.py
Normal file
@ -0,0 +1,108 @@
|
||||
"""Unit tests for ``EpisodeRecaller.fetch_all_for_owner``."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.component.tokenizer import Tokenizer
|
||||
from everos.memory.search.recall.base import RecallerDeps
|
||||
from everos.memory.search.recall.episode import EpisodeRecaller
|
||||
|
||||
|
||||
def _make_row(ep_id: str, mc_id: str) -> dict[str, Any]:
|
||||
"""Build a minimal episode LanceDB row dict for test fixtures."""
|
||||
return {
|
||||
"id": ep_id,
|
||||
"owner_id": "alice",
|
||||
"owner_type": "user",
|
||||
"session_id": "sess_1",
|
||||
"timestamp": 1000000,
|
||||
"sender_ids": ["alice"],
|
||||
"subject": f"subj {ep_id}",
|
||||
"summary": f"summary {ep_id}",
|
||||
"episode": f"body {ep_id}",
|
||||
"parent_id": mc_id,
|
||||
}
|
||||
|
||||
|
||||
def _mock_table(rows: list[dict[str, Any]]) -> MagicMock:
|
||||
tbl = MagicMock()
|
||||
tbl.query.return_value.where.return_value.to_list = AsyncMock(return_value=rows)
|
||||
return tbl
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def recaller() -> EpisodeRecaller:
|
||||
tok = MagicMock(spec=Tokenizer)
|
||||
tok.tokenize.return_value = ["hi"]
|
||||
return EpisodeRecaller(RecallerDeps(tokenizer=tok))
|
||||
|
||||
|
||||
async def test_fetch_all_for_owner_returns_memcell_keyed_candidates(
|
||||
recaller: EpisodeRecaller,
|
||||
) -> None:
|
||||
"""id must equal parent_id (memcell_id) so acluster_retrieve membership works."""
|
||||
rows = [
|
||||
_make_row("ep_1", "mc_1"),
|
||||
_make_row("ep_2", "mc_2"),
|
||||
]
|
||||
with patch(
|
||||
"everos.memory.search.recall.episode.get_table",
|
||||
new_callable=AsyncMock,
|
||||
return_value=_mock_table(rows),
|
||||
):
|
||||
result = await recaller.fetch_all_for_owner("owner_id = 'alice'")
|
||||
|
||||
assert len(result) == 2
|
||||
ids = {c.id for c in result}
|
||||
assert ids == {"mc_1", "mc_2"}, "id must be memcell_id, not episode_id"
|
||||
|
||||
|
||||
async def test_fetch_all_for_owner_stores_episode_id_in_metadata(
|
||||
recaller: EpisodeRecaller,
|
||||
) -> None:
|
||||
"""metadata['episode_id'] carries the real LanceDB episode id for final shaping."""
|
||||
rows = [_make_row("ep_abc", "mc_xyz")]
|
||||
with patch(
|
||||
"everos.memory.search.recall.episode.get_table",
|
||||
new_callable=AsyncMock,
|
||||
return_value=_mock_table(rows),
|
||||
):
|
||||
result = await recaller.fetch_all_for_owner("owner_id = 'alice'")
|
||||
|
||||
assert result[0].metadata["episode_id"] == "ep_abc"
|
||||
assert result[0].metadata["parent_id"] == "mc_xyz"
|
||||
|
||||
|
||||
async def test_fetch_all_for_owner_skips_rows_without_parent_id(
|
||||
recaller: EpisodeRecaller,
|
||||
) -> None:
|
||||
"""Rows without parent_id are silently skipped.
|
||||
|
||||
They are incomplete episode records.
|
||||
"""
|
||||
rows = [
|
||||
{
|
||||
"id": "ep_bad",
|
||||
"owner_id": "alice",
|
||||
"owner_type": "user",
|
||||
"session_id": "s",
|
||||
"timestamp": 1,
|
||||
"sender_ids": [],
|
||||
"subject": "",
|
||||
"summary": "",
|
||||
"episode": "",
|
||||
# no parent_id key
|
||||
},
|
||||
]
|
||||
with patch(
|
||||
"everos.memory.search.recall.episode.get_table",
|
||||
new_callable=AsyncMock,
|
||||
return_value=_mock_table(rows),
|
||||
):
|
||||
result = await recaller.fetch_all_for_owner("owner_id = 'alice'")
|
||||
|
||||
assert result == []
|
||||
189
tests/unit/test_memory/test_search/test_recall_or_semantics.py
Normal file
189
tests/unit/test_memory/test_search/test_recall_or_semantics.py
Normal file
@ -0,0 +1,189 @@
|
||||
"""Real-LanceDB regression: OR-mode BooleanQuery sparse recall.
|
||||
|
||||
Locks the fix for the tantivy implicit-AND poison: when a query
|
||||
contains an IDF≈0 token (typically the partition owner's own name on
|
||||
an owner-scoped corpus), the entire query used to return 0 hits. The
|
||||
fixed path wraps each token in a ``BooleanQuery`` with ``SHOULD``
|
||||
clauses (mirrors enterprise ES ``bool.should + minimum_should_match=1``)
|
||||
so other tokens can carry the query.
|
||||
|
||||
These tests build a tiny in-memory corpus where one term is 100% DF
|
||||
(the "poison" term) and verify that mixing it with informative
|
||||
content tokens still surfaces results.
|
||||
|
||||
White-box surfaces:
|
||||
- LanceDB ``episode`` table (real, per-test tmp root)
|
||||
- ``EpisodeRecaller.sparse_recall``
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.component.tokenizer import Tokenizer
|
||||
from everos.infra.persistence.lancedb import (
|
||||
Episode,
|
||||
ParentType,
|
||||
episode_repo,
|
||||
lancedb_manager,
|
||||
)
|
||||
from everos.memory.search.recall.base import RecallerDeps, build_or_query
|
||||
from everos.memory.search.recall.episode import EpisodeRecaller
|
||||
|
||||
|
||||
class _WhitespaceTokenizer(Tokenizer):
|
||||
"""Split-on-whitespace tokenizer, lowercased.
|
||||
|
||||
The OR-semantics fix is independent of jieba's behaviour, so a
|
||||
trivial tokenizer keeps the test focused.
|
||||
"""
|
||||
|
||||
def tokenize(self, text: str) -> list[str]:
|
||||
return [tok for tok in text.lower().split() if tok]
|
||||
|
||||
|
||||
def _ts() -> _dt.datetime:
|
||||
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
|
||||
|
||||
|
||||
def _episode_row(
|
||||
*,
|
||||
eid: str,
|
||||
owner_id: str,
|
||||
body_tokens: str,
|
||||
) -> Episode:
|
||||
"""Build an Episode row with ``body_tokens`` indexed as ``episode_tokens``."""
|
||||
return Episode(
|
||||
id=f"{owner_id}_{eid}",
|
||||
entry_id=eid,
|
||||
owner_id=owner_id,
|
||||
owner_type="user",
|
||||
session_id="sess_1",
|
||||
timestamp=_ts(),
|
||||
parent_type=ParentType.MEMCELL.value,
|
||||
parent_id="mc_test",
|
||||
sender_ids=[owner_id],
|
||||
episode=body_tokens,
|
||||
episode_tokens=body_tokens,
|
||||
md_path=f"users/{owner_id}/episodes/episode-2026-01-01.md",
|
||||
content_sha256="x" * 64,
|
||||
vector=[0.0] * 1024,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
lancedb_manager._conn = None
|
||||
lancedb_manager._tables.clear()
|
||||
yield
|
||||
await lancedb_manager.dispose_connection()
|
||||
|
||||
|
||||
def _recaller() -> EpisodeRecaller:
|
||||
return EpisodeRecaller(RecallerDeps(tokenizer=_WhitespaceTokenizer()))
|
||||
|
||||
|
||||
# ── build_or_query helper unit-level checks ────────────────────────────
|
||||
|
||||
|
||||
def test_build_or_query_empty_returns_none() -> None:
|
||||
"""Empty / whitespace-only query → ``None`` (caller must short-circuit)."""
|
||||
tk = _WhitespaceTokenizer()
|
||||
assert build_or_query(tk, "", column="episode_tokens") is None
|
||||
assert build_or_query(tk, " ", column="episode_tokens") is None
|
||||
|
||||
|
||||
def test_build_or_query_single_token_returns_match_query() -> None:
|
||||
"""One token → bare MatchQuery (no boolean-wrapper overhead)."""
|
||||
from lancedb.query import MatchQuery
|
||||
|
||||
q = build_or_query(_WhitespaceTokenizer(), "hello", column="episode_tokens")
|
||||
assert isinstance(q, MatchQuery)
|
||||
|
||||
|
||||
def test_build_or_query_multi_token_returns_boolean_query() -> None:
|
||||
"""≥2 tokens → BooleanQuery with one SHOULD clause per token."""
|
||||
from lancedb.query import BooleanQuery
|
||||
|
||||
q = build_or_query(
|
||||
_WhitespaceTokenizer(), "alice support group", column="episode_tokens"
|
||||
)
|
||||
assert isinstance(q, BooleanQuery)
|
||||
|
||||
|
||||
# ── Live recall: poison token + informative token must surface results ──
|
||||
|
||||
|
||||
async def test_or_semantics_poison_token_does_not_kill_query() -> None:
|
||||
"""Two episodes, owner name in every doc (DF=100%), plus distinct content.
|
||||
|
||||
Pre-fix, querying ``"alice support group"`` against owner=alice would
|
||||
return 0 hits — the ``alice`` token (DF=100% → IDF≈0) poisoned the
|
||||
implicit-AND query parser and dragged the score-conjunction to zero.
|
||||
Post-fix, ``BooleanQuery + SHOULD`` lets ``support`` / ``group`` carry
|
||||
the query on their own.
|
||||
"""
|
||||
await episode_repo.upsert(
|
||||
[
|
||||
_episode_row(
|
||||
eid="ep_1",
|
||||
owner_id="alice",
|
||||
body_tokens="alice attended lgbtq support group last tuesday",
|
||||
),
|
||||
_episode_row(
|
||||
eid="ep_2",
|
||||
owner_id="alice",
|
||||
body_tokens="alice tried watercolor painting on saturday morning",
|
||||
),
|
||||
]
|
||||
)
|
||||
# LanceDB FTS only sees data merged into the index after optimize().
|
||||
# Tests treat that as part of "the corpus is ready to query".
|
||||
from everos.infra.persistence.lancedb import get_table
|
||||
|
||||
tbl = await get_table(Episode.TABLE_NAME, Episode)
|
||||
await tbl.optimize()
|
||||
|
||||
where = "owner_id = 'alice' AND owner_type = 'user'"
|
||||
cands = await _recaller().sparse_recall("alice support group", where, limit=10)
|
||||
assert cands, "alice + support + group should recall ep_1 via SHOULD"
|
||||
# ep_1 is the support-group episode; should rank above ep_2 (no support).
|
||||
assert cands[0].id == "alice_ep_1"
|
||||
assert cands[0].score > 0.0
|
||||
|
||||
|
||||
async def test_or_semantics_single_informative_token() -> None:
|
||||
"""Single non-poison token still recalls (regression for ``painting``)."""
|
||||
await episode_repo.upsert(
|
||||
[
|
||||
_episode_row(
|
||||
eid="ep_1",
|
||||
owner_id="alice",
|
||||
body_tokens="alice attended lgbtq support group",
|
||||
),
|
||||
_episode_row(
|
||||
eid="ep_2",
|
||||
owner_id="alice",
|
||||
body_tokens="alice tried watercolor painting on saturday",
|
||||
),
|
||||
]
|
||||
)
|
||||
from everos.infra.persistence.lancedb import get_table
|
||||
|
||||
tbl = await get_table(Episode.TABLE_NAME, Episode)
|
||||
await tbl.optimize()
|
||||
|
||||
where = "owner_id = 'alice' AND owner_type = 'user'"
|
||||
cands = await _recaller().sparse_recall("painting", where, limit=10)
|
||||
assert cands, "single informative token must recall the matching episode"
|
||||
assert cands[0].id == "alice_ep_2"
|
||||
|
||||
|
||||
async def test_or_semantics_empty_query_returns_empty() -> None:
|
||||
"""Tokenisation yields nothing → recall returns ``[]`` without hitting LanceDB."""
|
||||
cands = await _recaller().sparse_recall(" ", "owner_id = 'alice'", limit=10)
|
||||
assert cands == []
|
||||
128
tests/unit/test_memory/test_search/test_recall_profile.py
Normal file
128
tests/unit/test_memory/test_search/test_recall_profile.py
Normal file
@ -0,0 +1,128 @@
|
||||
"""Real-LanceDB tests for ``ProfileRecaller`` — KV-by-owner fetch.
|
||||
|
||||
Profile recall has no query / no ranking: ``fetch(owner_id)`` returns
|
||||
the at-most-one row keyed by ``id = owner_id``. These tests exercise
|
||||
the LanceDB path (no stubs) and the JSON unpacking that turns the
|
||||
``*_json`` columns back into the DTO's ``profile_data`` mapping.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.infra.persistence.lancedb import (
|
||||
UserProfile,
|
||||
lancedb_manager,
|
||||
user_profile_repo,
|
||||
)
|
||||
from everos.memory.search.recall.profile import ProfileRecaller
|
||||
|
||||
|
||||
def _profile_row(
|
||||
*,
|
||||
owner_id: str,
|
||||
summary: str = "summary text",
|
||||
explicit_info: list | None = None,
|
||||
implicit_traits: list | None = None,
|
||||
profile_timestamp_ms: int = 1_700_000_000_000,
|
||||
) -> UserProfile:
|
||||
return UserProfile(
|
||||
id=owner_id,
|
||||
owner_id=owner_id,
|
||||
owner_type="user",
|
||||
summary=summary,
|
||||
explicit_info_json=json.dumps(explicit_info or [], ensure_ascii=False),
|
||||
implicit_traits_json=json.dumps(implicit_traits or [], ensure_ascii=False),
|
||||
profile_timestamp_ms=profile_timestamp_ms,
|
||||
md_path=f"users/{owner_id}/user.md",
|
||||
content_sha256="x" * 64,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
lancedb_manager._conn = None
|
||||
lancedb_manager._tables.clear()
|
||||
yield
|
||||
await lancedb_manager.dispose_connection()
|
||||
|
||||
|
||||
async def test_fetch_returns_dto_when_row_exists() -> None:
|
||||
await user_profile_repo.upsert(
|
||||
[
|
||||
_profile_row(
|
||||
owner_id="u_alice",
|
||||
summary="Alice likes long hikes.",
|
||||
explicit_info=[{"fact": "lives in tokyo"}],
|
||||
implicit_traits=[{"trait": "introverted"}],
|
||||
profile_timestamp_ms=1_700_000_001_000,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
items = await ProfileRecaller().fetch("u_alice")
|
||||
assert len(items) == 1
|
||||
item = items[0]
|
||||
assert item.id == "u_alice"
|
||||
assert item.user_id == "u_alice"
|
||||
assert item.score is None
|
||||
# JSON columns are decoded back to live Python on the way out.
|
||||
assert item.profile_data["summary"] == "Alice likes long hikes."
|
||||
assert item.profile_data["explicit_info"] == [{"fact": "lives in tokyo"}]
|
||||
assert item.profile_data["implicit_traits"] == [{"trait": "introverted"}]
|
||||
assert item.profile_data["profile_timestamp_ms"] == 1_700_000_001_000
|
||||
|
||||
|
||||
async def test_fetch_returns_empty_when_row_missing() -> None:
|
||||
items = await ProfileRecaller().fetch("u_cold_start")
|
||||
assert items == []
|
||||
|
||||
|
||||
async def test_fetch_returns_empty_for_blank_owner() -> None:
|
||||
"""Blank ``owner_id`` short-circuits — never hit LanceDB with an
|
||||
empty-string PK (which would otherwise return any row whose id was
|
||||
persisted as the empty string)."""
|
||||
items = await ProfileRecaller().fetch("")
|
||||
assert items == []
|
||||
|
||||
|
||||
async def test_fetch_isolates_by_owner() -> None:
|
||||
await user_profile_repo.upsert(
|
||||
[
|
||||
_profile_row(owner_id="u_alice", summary="Alice"),
|
||||
_profile_row(owner_id="u_bob", summary="Bob"),
|
||||
]
|
||||
)
|
||||
bob_items = await ProfileRecaller().fetch("u_bob")
|
||||
assert len(bob_items) == 1
|
||||
assert bob_items[0].profile_data["summary"] == "Bob"
|
||||
|
||||
|
||||
async def test_fetch_tolerates_malformed_json_columns() -> None:
|
||||
"""A column with corrupted JSON should not blow up the recall path —
|
||||
the bucket falls back to ``[]`` and the rest of the DTO survives."""
|
||||
await user_profile_repo.upsert(
|
||||
[
|
||||
UserProfile(
|
||||
id="u_broken",
|
||||
owner_id="u_broken",
|
||||
owner_type="user",
|
||||
summary="ok",
|
||||
explicit_info_json="{not valid json",
|
||||
implicit_traits_json="[]",
|
||||
profile_timestamp_ms=0,
|
||||
md_path="users/u_broken/user.md",
|
||||
content_sha256="y" * 64,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
items = await ProfileRecaller().fetch("u_broken")
|
||||
assert len(items) == 1
|
||||
assert items[0].profile_data["explicit_info"] == []
|
||||
assert items[0].profile_data["implicit_traits"] == []
|
||||
assert items[0].profile_data["summary"] == "ok"
|
||||
214
tests/unit/test_memory/test_search/test_shaper.py
Normal file
214
tests/unit/test_memory/test_search/test_shaper.py
Normal file
@ -0,0 +1,214 @@
|
||||
"""Unit tests for ``memory.search.shaper``.
|
||||
|
||||
Tests are pure: no LanceDB, no everalgo, just dataclass-in / DTO-out.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
|
||||
from everalgo.types import Candidate, ScoredItem
|
||||
|
||||
from everos.memory.search.shaper import (
|
||||
reshape_hybrid_output,
|
||||
shape_agent_case_from_candidate,
|
||||
shape_agent_skill_from_candidate,
|
||||
shape_atomic_fact_from_candidate,
|
||||
shape_episode_from_candidate,
|
||||
)
|
||||
|
||||
# ── Fixtures ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _ts(year: int = 2026) -> _dt.datetime:
|
||||
return _dt.datetime(year, 1, 1, tzinfo=_dt.UTC)
|
||||
|
||||
|
||||
def _episode_candidate(*, id: str = "alice_ep_1", score: float = 0.9) -> Candidate:
|
||||
return Candidate(
|
||||
id=id,
|
||||
score=score,
|
||||
source="vector",
|
||||
metadata={
|
||||
"owner_id": "alice",
|
||||
"owner_type": "user",
|
||||
"session_id": "sess_a",
|
||||
"timestamp": _ts(),
|
||||
"sender_ids": ["alice", "assistant_1"],
|
||||
"subject": "Coffee chat",
|
||||
"summary": "Discussed coffee preferences.",
|
||||
"episode": "Alice said she prefers oat milk.",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _agent_case_candidate() -> Candidate:
|
||||
return Candidate(
|
||||
id="agent_a_case_1",
|
||||
score=0.8,
|
||||
source="keyword",
|
||||
metadata={
|
||||
"owner_id": "agent_a",
|
||||
"owner_type": "agent",
|
||||
"session_id": "sess_a",
|
||||
"timestamp": _ts(),
|
||||
"task_intent": "Draft a follow-up email",
|
||||
"approach": "1. summarise...",
|
||||
"quality_score": 0.92,
|
||||
"key_insight": "User prefers brief tone",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _agent_skill_candidate() -> Candidate:
|
||||
return Candidate(
|
||||
id="agent_a_skill_1",
|
||||
score=0.7,
|
||||
source="keyword",
|
||||
metadata={
|
||||
"owner_id": "agent_a",
|
||||
"owner_type": "agent",
|
||||
"name": "contract_redline",
|
||||
"description": "Spot risky clauses",
|
||||
"content": "Step 1: ...",
|
||||
"confidence": 0.9,
|
||||
"maturity_score": 0.5,
|
||||
"source_case_ids": ["agent_a_case_1"],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ── Episode shaping ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_shape_episode_basic() -> None:
|
||||
item = shape_episode_from_candidate(_episode_candidate())
|
||||
assert item is not None
|
||||
assert item.id == "alice_ep_1"
|
||||
assert item.user_id == "alice"
|
||||
assert item.type == "Conversation"
|
||||
assert item.score == 0.9
|
||||
assert item.atomic_facts == []
|
||||
assert item.sender_ids == ["alice", "assistant_1"]
|
||||
|
||||
|
||||
def test_shape_episode_drops_when_owner_type_wrong() -> None:
|
||||
cand = _episode_candidate()
|
||||
cand.metadata["owner_type"] = "agent"
|
||||
assert shape_episode_from_candidate(cand) is None
|
||||
|
||||
|
||||
def test_shape_episode_drops_when_timestamp_missing() -> None:
|
||||
cand = _episode_candidate()
|
||||
del cand.metadata["timestamp"]
|
||||
assert shape_episode_from_candidate(cand) is None
|
||||
|
||||
|
||||
def test_shape_episode_attaches_facts() -> None:
|
||||
facts = [
|
||||
shape_atomic_fact_from_candidate(
|
||||
Candidate(
|
||||
id="f1",
|
||||
score=0.5,
|
||||
source="other",
|
||||
metadata={"fact": "Alice prefers oat milk"},
|
||||
)
|
||||
)
|
||||
]
|
||||
item = shape_episode_from_candidate(_episode_candidate(), atomic_facts=facts)
|
||||
assert item is not None
|
||||
assert len(item.atomic_facts) == 1
|
||||
assert item.atomic_facts[0].content == "Alice prefers oat milk"
|
||||
|
||||
|
||||
# ── Agent case / skill shaping ──────────────────────────────────────────
|
||||
|
||||
|
||||
def test_shape_agent_case_basic() -> None:
|
||||
item = shape_agent_case_from_candidate(_agent_case_candidate())
|
||||
assert item is not None
|
||||
assert item.agent_id == "agent_a"
|
||||
assert item.task_intent == "Draft a follow-up email"
|
||||
assert item.quality_score == 0.92
|
||||
assert item.key_insight == "User prefers brief tone"
|
||||
|
||||
|
||||
def test_shape_agent_case_drops_when_owner_type_wrong() -> None:
|
||||
cand = _agent_case_candidate()
|
||||
cand.metadata["owner_type"] = "user"
|
||||
assert shape_agent_case_from_candidate(cand) is None
|
||||
|
||||
|
||||
def test_shape_agent_skill_basic() -> None:
|
||||
item = shape_agent_skill_from_candidate(_agent_skill_candidate())
|
||||
assert item is not None
|
||||
assert item.name == "contract_redline"
|
||||
assert item.maturity_score == 0.5
|
||||
assert item.source_case_ids == ["agent_a_case_1"]
|
||||
|
||||
|
||||
# ── Hybrid reshape ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _scored_episode(eid: str, score: float) -> ScoredItem:
|
||||
return ScoredItem(
|
||||
id=eid,
|
||||
score=score,
|
||||
item_type="episode",
|
||||
metadata={
|
||||
"owner_id": "alice",
|
||||
"owner_type": "user",
|
||||
"session_id": "s1",
|
||||
"timestamp": _ts(),
|
||||
"sender_ids": ["alice"],
|
||||
"subject": "subj",
|
||||
"summary": "summ",
|
||||
"episode": "body",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _scored_fact(fid: str, parent: str, score: float) -> ScoredItem:
|
||||
return ScoredItem(
|
||||
id=fid,
|
||||
score=score,
|
||||
item_type="atomic_fact",
|
||||
parent_episode_id=parent,
|
||||
metadata={"fact": f"fact text {fid}"},
|
||||
)
|
||||
|
||||
|
||||
def test_reshape_hybrid_nests_facts_under_kept_episode() -> None:
|
||||
scored = [
|
||||
_scored_episode("ep_1", 0.9),
|
||||
_scored_fact("f_1", "ep_1", 0.95),
|
||||
_scored_fact("f_2", "ep_1", 0.85),
|
||||
]
|
||||
out = reshape_hybrid_output(scored, episode_pool={})
|
||||
assert len(out) == 1
|
||||
assert out[0].id == "ep_1"
|
||||
# Facts sorted descending by score.
|
||||
assert [f.id for f in out[0].atomic_facts] == ["f_1", "f_2"]
|
||||
|
||||
|
||||
def test_reshape_hybrid_backfills_evicted_episode_from_pool() -> None:
|
||||
# Episode ep_2 was evicted (only facts present),
|
||||
# but it is in episode_pool — should be restored as a result.
|
||||
scored = [
|
||||
_scored_episode("ep_1", 0.7),
|
||||
_scored_fact("f_a", "ep_2", 0.95),
|
||||
]
|
||||
pool_episode = _episode_candidate(id="ep_2", score=0.0)
|
||||
out = reshape_hybrid_output(scored, episode_pool={"ep_2": pool_episode})
|
||||
assert len(out) == 2
|
||||
# Output sorted by score descending — ep_2 takes fact's max score (0.95).
|
||||
assert out[0].id == "ep_2"
|
||||
assert out[0].score == 0.95
|
||||
assert len(out[0].atomic_facts) == 1
|
||||
assert out[1].id == "ep_1"
|
||||
|
||||
|
||||
def test_reshape_hybrid_drops_orphan_facts_with_no_pool_parent() -> None:
|
||||
scored = [_scored_fact("f_x", "ep_missing", 0.5)]
|
||||
out = reshape_hybrid_output(scored, episode_pool={})
|
||||
assert out == []
|
||||
154
tests/unit/test_memory/test_search/test_skill_hybrid.py
Normal file
154
tests/unit/test_memory/test_search/test_skill_hybrid.py
Normal file
@ -0,0 +1,154 @@
|
||||
"""Unit tests for ``memory.search.skill_hybrid``.
|
||||
|
||||
skill_hybrid is the **cross-encoder lane** for skill HYBRID retrieval.
|
||||
The LLM-rerank lane lives in ``SearchManager._search_agent_skills`` and
|
||||
goes through ``everalgo.rank.skill.arank`` directly — covered by
|
||||
``test_manager`` tests instead.
|
||||
|
||||
Covered surfaces:
|
||||
- ``search_agent_skills_hybrid`` (public function, MagicMock stubs)
|
||||
- ``_fuse``, ``_cross_encoder_rerank``, ``_shape_results``
|
||||
(via integration through the public function)
|
||||
|
||||
All I/O (reranker) is injected via MagicMock / stub objects. No LanceDB
|
||||
or network calls are made.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
from everalgo.types import Candidate
|
||||
|
||||
from everos.memory.search.callbacks import _SKILL_RERANK_INSTRUCTION
|
||||
from everos.memory.search.dto import SearchAgentSkillItem
|
||||
from everos.memory.search.skill_hybrid import search_agent_skills_hybrid
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _ts() -> _dt.datetime:
|
||||
return _dt.datetime(2026, 1, 1, tzinfo=_dt.UTC)
|
||||
|
||||
|
||||
def _skill_candidate(
|
||||
sid: str,
|
||||
score: float = 0.8,
|
||||
name: str | None = None,
|
||||
) -> Candidate:
|
||||
label = name or f"skill_{sid}"
|
||||
return Candidate(
|
||||
id=sid,
|
||||
score=score,
|
||||
source="vector",
|
||||
metadata={
|
||||
"owner_id": "agent_a",
|
||||
"owner_type": "agent",
|
||||
"name": label,
|
||||
"description": f"desc {sid}",
|
||||
"content": f"content {sid}",
|
||||
"confidence": 0.9,
|
||||
"maturity_score": 0.6,
|
||||
"source_case_ids": [],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _make_reranker(candidates: list[Candidate]) -> MagicMock:
|
||||
"""Stub reranker that returns identity-reranked results in the same order."""
|
||||
|
||||
class _FakeResult:
|
||||
def __init__(self, index: int, score: float) -> None:
|
||||
self.index = index
|
||||
self.score = score
|
||||
|
||||
reranker = MagicMock()
|
||||
# provider.rerank returns a list of result objects with index + score
|
||||
reranker.rerank = AsyncMock(
|
||||
return_value=[_FakeResult(i, c.score) for i, c in enumerate(candidates)]
|
||||
)
|
||||
return reranker
|
||||
|
||||
|
||||
# ── Tests ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSearchAgentSkillsHybridRerank:
|
||||
"""Cross-encoder rerank path."""
|
||||
|
||||
async def test_returns_shaped_items_up_to_top_k(self) -> None:
|
||||
"""rrf + rerank produces at most top_k SearchAgentSkillItem objects."""
|
||||
c1 = _skill_candidate("s1", score=0.9)
|
||||
c2 = _skill_candidate("s2", score=0.8)
|
||||
c3 = _skill_candidate("s3", score=0.7)
|
||||
|
||||
reranker = _make_reranker([c1, c2, c3])
|
||||
|
||||
result = await search_agent_skills_hybrid(
|
||||
"what skill handles auth?",
|
||||
sparse=[c1, c2, c3],
|
||||
dense=[c1, c2, c3],
|
||||
reranker=reranker,
|
||||
top_k=2,
|
||||
)
|
||||
|
||||
assert len(result) == 2
|
||||
assert all(isinstance(item, SearchAgentSkillItem) for item in result)
|
||||
assert result[0].id == "s1"
|
||||
assert result[1].id == "s2"
|
||||
|
||||
async def test_reranker_receives_skill_instruction_and_shaped_passages(
|
||||
self,
|
||||
) -> None:
|
||||
"""Reranker must see the skill-specific instruction and
|
||||
``"Agent Skill: {name} - {description}"`` passage shape — matches
|
||||
the everosos-opensource contract for skill rerank.
|
||||
"""
|
||||
c1 = _skill_candidate("s1", name="auth_middleware_refactor")
|
||||
c2 = _skill_candidate("s2", name="provider_lookup_split")
|
||||
|
||||
reranker = _make_reranker([c1, c2])
|
||||
|
||||
await search_agent_skills_hybrid(
|
||||
"how to split auth?",
|
||||
sparse=[c1],
|
||||
dense=[c1, c2],
|
||||
reranker=reranker,
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
reranker.rerank.assert_awaited_once()
|
||||
call = reranker.rerank.await_args
|
||||
assert call is not None
|
||||
positional = call.args
|
||||
kw = call.kwargs
|
||||
# Signature: rerank(query, passages, *, instruction=...)
|
||||
assert positional[0] == "how to split auth?"
|
||||
passages = positional[1]
|
||||
assert passages == [
|
||||
"Agent Skill: auth_middleware_refactor - desc s1",
|
||||
"Agent Skill: provider_lookup_split - desc s2",
|
||||
]
|
||||
assert kw["instruction"] == _SKILL_RERANK_INSTRUCTION
|
||||
|
||||
|
||||
class TestSearchAgentSkillsHybridEmpty:
|
||||
"""Empty input / degenerate cases."""
|
||||
|
||||
async def test_empty_sparse_and_dense_returns_empty_list(self) -> None:
|
||||
"""No candidates → no items, no errors."""
|
||||
reranker = MagicMock()
|
||||
reranker.rerank = AsyncMock(return_value=[])
|
||||
|
||||
result = await search_agent_skills_hybrid(
|
||||
"query",
|
||||
sparse=[],
|
||||
dense=[],
|
||||
reranker=reranker,
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
assert result == []
|
||||
# reranker.rerank must not be called when fused list is empty
|
||||
reranker.rerank.assert_not_called()
|
||||
Reference in New Issue
Block a user