chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
0
tests/unit/test_infra/test_lancedb/__init__.py
Normal file
0
tests/unit/test_infra/test_lancedb/__init__.py
Normal file
72
tests/unit/test_infra/test_lancedb/test_lancedb_manager.py
Normal file
72
tests/unit/test_infra/test_lancedb/test_lancedb_manager.py
Normal file
@ -0,0 +1,72 @@
|
||||
"""LanceDB manager singletons.
|
||||
|
||||
Verifies ``get_connection`` / ``get_table`` / ``dispose_connection``
|
||||
are idempotent and rebuild after dispose.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from lancedb.pydantic import Vector
|
||||
|
||||
from everos.core.persistence import BaseLanceTable
|
||||
from everos.infra.persistence.lancedb import lancedb_manager
|
||||
|
||||
|
||||
class _DemoVec(BaseLanceTable):
|
||||
"""Demo schema — only used by this test module."""
|
||||
|
||||
text: str
|
||||
vector: Vector(3) # type: ignore[valid-type]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
"""Point the singleton at an isolated memory-root and reset module state."""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
lancedb_manager._conn = None
|
||||
lancedb_manager._tables.clear()
|
||||
yield
|
||||
await lancedb_manager.dispose_connection()
|
||||
|
||||
|
||||
async def test_get_connection_is_singleton() -> None:
|
||||
c1 = await lancedb_manager.get_connection()
|
||||
c2 = await lancedb_manager.get_connection()
|
||||
assert c1 is c2
|
||||
|
||||
|
||||
async def test_get_table_creates_then_caches() -> None:
|
||||
t1 = await lancedb_manager.get_table("demo", _DemoVec)
|
||||
t2 = await lancedb_manager.get_table("demo", _DemoVec)
|
||||
assert t1 is t2
|
||||
assert "demo" in lancedb_manager._tables
|
||||
|
||||
|
||||
async def test_get_table_reopens_existing() -> None:
|
||||
"""A second connection cycle must reopen (not recreate) the table."""
|
||||
await lancedb_manager.get_table("demo", _DemoVec)
|
||||
await lancedb_manager.dispose_connection()
|
||||
|
||||
t = await lancedb_manager.get_table("demo", _DemoVec)
|
||||
assert t is not None
|
||||
# Round-trip a row to prove the schema survived.
|
||||
await t.add([_DemoVec(text="hello", vector=[0.1, 0.2, 0.3])])
|
||||
assert await t.count_rows() == 1
|
||||
|
||||
|
||||
async def test_dispose_resets_state() -> None:
|
||||
await lancedb_manager.get_connection()
|
||||
await lancedb_manager.get_table("demo", _DemoVec)
|
||||
await lancedb_manager.dispose_connection()
|
||||
assert lancedb_manager._conn is None
|
||||
assert lancedb_manager._tables == {}
|
||||
|
||||
|
||||
async def test_dispose_is_idempotent() -> None:
|
||||
await lancedb_manager.dispose_connection() # nothing built yet
|
||||
await lancedb_manager.get_connection()
|
||||
await lancedb_manager.dispose_connection()
|
||||
await lancedb_manager.dispose_connection() # second call must not raise
|
||||
@ -0,0 +1,153 @@
|
||||
"""Tests for :class:`everos.infra.persistence.lancedb._AgentSkillRepo`.
|
||||
|
||||
Real LanceDB under ``tmp_path`` (no mocks) — these tests exercise the
|
||||
SQL ``where`` predicate, cosine ``distance_type`` ranking, and
|
||||
``_distance`` stripping that the repo owns. Strategy-level routing
|
||||
across these methods is covered separately in
|
||||
``tests/unit/test_memory/test_strategies/test_extract_agent_skill.py``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.infra.persistence.lancedb import (
|
||||
AgentSkill as LanceAgentSkill,
|
||||
)
|
||||
from everos.infra.persistence.lancedb import (
|
||||
agent_skill_repo,
|
||||
lancedb_manager,
|
||||
)
|
||||
|
||||
|
||||
def _skill_row(
|
||||
*,
|
||||
name: str,
|
||||
owner_id: str,
|
||||
cluster_id: str,
|
||||
vector: list[float],
|
||||
) -> LanceAgentSkill:
|
||||
"""Minimal AgentSkill row sufficient to land in LanceDB for repo tests."""
|
||||
return LanceAgentSkill(
|
||||
id=f"{owner_id}_{name}",
|
||||
owner_id=owner_id,
|
||||
owner_type="agent",
|
||||
name=name,
|
||||
description=f"desc {name}",
|
||||
description_tokens=f"desc {name}",
|
||||
content=f"body of {name}",
|
||||
content_tokens=f"body of {name}",
|
||||
confidence=0.7,
|
||||
maturity_score=0.6,
|
||||
source_case_ids=[],
|
||||
cluster_id=cluster_id,
|
||||
md_path=f"agents/{owner_id}/skills/{name}/SKILL.md",
|
||||
content_sha256="x" * 64,
|
||||
vector=vector,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def _real_lancedb(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
"""Spin up a clean LanceDB rooted under ``tmp_path`` for one test."""
|
||||
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
|
||||
lancedb_manager._conn = None
|
||||
lancedb_manager._tables.clear()
|
||||
yield
|
||||
await lancedb_manager.dispose_connection()
|
||||
|
||||
|
||||
async def test_count_in_cluster_isolates_owner_and_cluster(
|
||||
_real_lancedb: None,
|
||||
) -> None:
|
||||
"""``count_in_cluster`` returns only rows matching both filters."""
|
||||
await agent_skill_repo.upsert(
|
||||
[
|
||||
_skill_row(name="s1", owner_id="a", cluster_id="cl_x", vector=[0.1] * 1024),
|
||||
_skill_row(name="s2", owner_id="a", cluster_id="cl_x", vector=[0.2] * 1024),
|
||||
_skill_row(
|
||||
name="other_cluster",
|
||||
owner_id="a",
|
||||
cluster_id="cl_y",
|
||||
vector=[0.3] * 1024,
|
||||
),
|
||||
_skill_row(
|
||||
name="other_owner",
|
||||
owner_id="b",
|
||||
cluster_id="cl_x",
|
||||
vector=[0.4] * 1024,
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
assert (
|
||||
await agent_skill_repo.count_in_cluster(owner_id="a", cluster_id="cl_x")
|
||||
) == 2
|
||||
|
||||
|
||||
async def test_find_in_cluster_returns_typed_rows_no_ranking(
|
||||
_real_lancedb: None,
|
||||
) -> None:
|
||||
"""Scalar fetch within one cluster; capped at ``limit`` regardless of order."""
|
||||
await agent_skill_repo.upsert(
|
||||
[
|
||||
_skill_row(name="s1", owner_id="a", cluster_id="cl_x", vector=[0.1] * 1024),
|
||||
_skill_row(name="s2", owner_id="a", cluster_id="cl_x", vector=[0.2] * 1024),
|
||||
_skill_row(name="s3", owner_id="a", cluster_id="cl_x", vector=[0.3] * 1024),
|
||||
_skill_row(
|
||||
name="other_cluster",
|
||||
owner_id="a",
|
||||
cluster_id="cl_y",
|
||||
vector=[0.4] * 1024,
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
got = await agent_skill_repo.find_in_cluster(
|
||||
owner_id="a", cluster_id="cl_x", limit=2
|
||||
)
|
||||
assert len(got) == 2
|
||||
assert {s.name for s in got}.issubset({"s1", "s2", "s3"})
|
||||
assert all(s.owner_id == "a" and s.cluster_id == "cl_x" for s in got)
|
||||
|
||||
|
||||
async def test_find_topk_relevant_in_cluster_ranks_by_cosine(
|
||||
_real_lancedb: None,
|
||||
) -> None:
|
||||
"""LanceDB native ``nearest_to + distance_type('cosine')`` ordering."""
|
||||
near = [1.0] + [0.0] * 1023
|
||||
far = [0.0] * 1023 + [1.0]
|
||||
medium = [0.7, 0.7] + [0.0] * 1022
|
||||
await agent_skill_repo.upsert(
|
||||
[
|
||||
_skill_row(name="near", owner_id="a", cluster_id="cl_x", vector=near),
|
||||
_skill_row(name="far", owner_id="a", cluster_id="cl_x", vector=far),
|
||||
_skill_row(name="medium", owner_id="a", cluster_id="cl_x", vector=medium),
|
||||
# Different cluster — must not leak.
|
||||
_skill_row(name="other", owner_id="a", cluster_id="cl_y", vector=near),
|
||||
# Different owner — must not leak either.
|
||||
_skill_row(name="near", owner_id="b", cluster_id="cl_x", vector=near),
|
||||
]
|
||||
)
|
||||
|
||||
got = await agent_skill_repo.find_topk_relevant_in_cluster(
|
||||
owner_id="a", cluster_id="cl_x", query_vector=near, top_k=2
|
||||
)
|
||||
assert [s.name for s in got] == ["near", "medium"]
|
||||
|
||||
|
||||
async def test_find_topk_relevant_in_cluster_raises_on_empty_vector(
|
||||
_real_lancedb: None,
|
||||
) -> None:
|
||||
"""Empty ``query_vector`` is a caller-side error — the repo refuses."""
|
||||
await agent_skill_repo.upsert(
|
||||
[
|
||||
_skill_row(name="s1", owner_id="a", cluster_id="cl_x", vector=[0.1] * 1024),
|
||||
]
|
||||
)
|
||||
with pytest.raises(ValueError, match="query_vector must be non-empty"):
|
||||
await agent_skill_repo.find_topk_relevant_in_cluster(
|
||||
owner_id="a", cluster_id="cl_x", query_vector=[], top_k=2
|
||||
)
|
||||
@ -0,0 +1,150 @@
|
||||
"""``content_sha256`` is a required field on every business lancedb table.
|
||||
|
||||
Cascade handler (16 doc §3.3) diffs by this digest to skip no-op
|
||||
re-embeds. Every business schema — including ``agent_skill`` — declares
|
||||
the field; daily-log kinds hash a per-handler subset of inline +
|
||||
section keys, agent_skill hashes the file-level content-bearing parts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.infra.persistence.lancedb import (
|
||||
AgentCase,
|
||||
AgentSkill,
|
||||
AtomicFact,
|
||||
Episode,
|
||||
Foresight,
|
||||
)
|
||||
|
||||
_VEC = [0.0] * 1024
|
||||
_NOW = dt.datetime(2026, 5, 14, 10, 0, 0, tzinfo=dt.UTC)
|
||||
_SHA = "f" * 64
|
||||
|
||||
|
||||
def _episode() -> Episode:
|
||||
return Episode(
|
||||
id="u1_ep_1",
|
||||
entry_id="ep_20260514_0001",
|
||||
owner_id="u1",
|
||||
owner_type="user",
|
||||
session_id="s1",
|
||||
timestamp=_NOW,
|
||||
parent_type="memcell",
|
||||
parent_id="mc_1",
|
||||
sender_ids=["u1"],
|
||||
episode="hello world",
|
||||
episode_tokens="hello world",
|
||||
md_path="users/u1/episodes/episode-2026-05-14.md",
|
||||
content_sha256=_SHA,
|
||||
vector=_VEC,
|
||||
)
|
||||
|
||||
|
||||
def _atomic_fact() -> AtomicFact:
|
||||
return AtomicFact(
|
||||
id="u1_af_1",
|
||||
entry_id="af_20260514_0001",
|
||||
owner_id="u1",
|
||||
owner_type="user",
|
||||
session_id="s1",
|
||||
timestamp=_NOW,
|
||||
parent_type="memcell",
|
||||
parent_id="mc_1",
|
||||
sender_ids=["u1"],
|
||||
fact="x is y",
|
||||
fact_tokens="x is y",
|
||||
md_path="users/u1/.atomic_facts/atomic_fact-2026-05-14.md",
|
||||
content_sha256=_SHA,
|
||||
vector=_VEC,
|
||||
)
|
||||
|
||||
|
||||
def _foresight() -> Foresight:
|
||||
return Foresight(
|
||||
id="u1_fs_1",
|
||||
entry_id="fs_20260514_0001",
|
||||
owner_id="u1",
|
||||
owner_type="user",
|
||||
session_id="s1",
|
||||
timestamp=_NOW,
|
||||
parent_type="memcell",
|
||||
parent_id="mc_1",
|
||||
sender_ids=["u1"],
|
||||
foresight="user plans X",
|
||||
foresight_tokens="user plans X",
|
||||
md_path="users/u1/.foresights/foresight-2026-05-14.md",
|
||||
content_sha256=_SHA,
|
||||
vector=_VEC,
|
||||
)
|
||||
|
||||
|
||||
def _agent_case() -> AgentCase:
|
||||
return AgentCase(
|
||||
id="a1_ac_1",
|
||||
entry_id="ac_20260514_0001",
|
||||
owner_id="a1",
|
||||
owner_type="agent",
|
||||
session_id="s1",
|
||||
timestamp=_NOW,
|
||||
parent_type="memcell",
|
||||
parent_id="mc_1",
|
||||
quality_score=0.9,
|
||||
task_intent="scan contract",
|
||||
task_intent_tokens="scan contract",
|
||||
approach="step 1; step 2",
|
||||
approach_tokens="step 1 step 2",
|
||||
md_path="agents/a1/.cases/agent_case-2026-05-14.md",
|
||||
content_sha256=_SHA,
|
||||
vector=_VEC,
|
||||
)
|
||||
|
||||
|
||||
def _agent_skill() -> AgentSkill:
|
||||
return AgentSkill(
|
||||
id="a1_demo_skill",
|
||||
owner_id="a1",
|
||||
owner_type="agent",
|
||||
name="demo_skill",
|
||||
description="just a demo",
|
||||
description_tokens="just a demo",
|
||||
content="body content",
|
||||
content_tokens="body content",
|
||||
confidence=0.7,
|
||||
maturity_score=0.6,
|
||||
source_case_ids=[],
|
||||
md_path="agents/a1/agent_skills/demo_skill/SKILL.md",
|
||||
content_sha256=_SHA,
|
||||
vector=_VEC,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"factory",
|
||||
[_episode, _atomic_fact, _foresight, _agent_case, _agent_skill],
|
||||
ids=["episode", "atomic_fact", "foresight", "agent_case", "agent_skill"],
|
||||
)
|
||||
def test_content_sha256_round_trip(factory) -> None: # type: ignore[no-untyped-def]
|
||||
row = factory()
|
||||
assert row.content_sha256 == _SHA
|
||||
dumped = row.model_dump()
|
||||
assert dumped["content_sha256"] == _SHA
|
||||
restored = type(row).model_validate(dumped)
|
||||
assert restored.content_sha256 == _SHA
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"factory",
|
||||
[_episode, _atomic_fact, _foresight, _agent_case, _agent_skill],
|
||||
ids=["episode", "atomic_fact", "foresight", "agent_case", "agent_skill"],
|
||||
)
|
||||
def test_content_sha256_required(factory) -> None: # type: ignore[no-untyped-def]
|
||||
"""Dropping content_sha256 from the kwargs surfaces a ValidationError."""
|
||||
row = factory()
|
||||
kwargs = row.model_dump()
|
||||
del kwargs["content_sha256"]
|
||||
with pytest.raises(Exception): # noqa: B017,PT011
|
||||
type(row).model_validate(kwargs)
|
||||
Reference in New Issue
Block a user