chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,104 @@
"""Unit tests for the LanceDB async connection factory."""
from __future__ import annotations
from pathlib import Path
import pytest
from everos.config import LanceDBSettings
from everos.core.persistence import MemoryRoot, open_lancedb_connection
@pytest.fixture
def memory_root(tmp_path: Path) -> MemoryRoot:
mr = MemoryRoot(tmp_path)
mr.ensure()
return mr
async def test_connect_creates_lancedb_dir(memory_root: MemoryRoot) -> None:
settings = LanceDBSettings()
# Remove the auto-created dir to verify the factory recreates it.
if memory_root.lancedb_dir.exists():
memory_root.lancedb_dir.rmdir()
assert not memory_root.lancedb_dir.exists()
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
try:
assert memory_root.lancedb_dir.is_dir()
assert conn.is_open()
finally:
conn.close() # AsyncConnection.close() is sync
async def test_empty_connection_lists_no_tables(memory_root: MemoryRoot) -> None:
settings = LanceDBSettings()
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
try:
# list_tables() returns ListTablesResponse(tables, page_token).
result = await conn.list_tables()
assert list(result.tables) == []
finally:
conn.close()
async def test_read_consistency_seconds_translated_to_timedelta(
memory_root: MemoryRoot,
) -> None:
"""Non-None read_consistency_seconds must be passed as a timedelta."""
settings = LanceDBSettings(read_consistency_seconds=5.0)
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
try:
# The interval echoed back from the connection should equal what we set.
# AsyncConnection.get_read_consistency_interval is async.
import datetime as dt
interval = await conn.get_read_consistency_interval()
assert interval == dt.timedelta(seconds=5.0)
finally:
conn.close()
async def test_default_consistency_is_none(memory_root: MemoryRoot) -> None:
settings = LanceDBSettings()
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
try:
interval = await conn.get_read_consistency_interval()
assert interval is None
finally:
conn.close()
async def test_index_cache_cap_is_plumbed_into_session(
memory_root: MemoryRoot, monkeypatch: pytest.MonkeyPatch
) -> None:
"""A capped ``Session`` must reach ``lancedb.connect_async``.
The connection factory's whole purpose for installing a Session is
to bound the index reader cache so FDs do not leak. We spy on the
underlying ``connect_async`` and assert a Session is passed —
Session objects don't expose the configured cap back as a property,
so verifying that a Session is wired through is the closest unit-
level check we can make. The behavioural side (LRU eviction →
FD release under load) is covered by the fd-probe scripts kept
outside the test suite.
"""
import lancedb
settings = LanceDBSettings(index_cache_size_bytes=1024)
captured: dict[str, object] = {}
real_connect = lancedb.connect_async
async def spy(*args, **kwargs): # type: ignore[no-untyped-def]
captured["session"] = kwargs.get("session")
return await real_connect(*args, **kwargs)
monkeypatch.setattr(lancedb, "connect_async", spy)
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
try:
assert isinstance(captured.get("session"), lancedb.Session)
finally:
conn.close()

View File

@ -0,0 +1,175 @@
"""FTS-layer normalisation contract tests.
``BaseLanceTable.ensure_fts_indexes`` builds the LanceDB FTS index with
the following configuration::
base_tokenizer="whitespace"
lower_case=True
stem=True
remove_stop_words=True
ascii_folding=True
language="English" (tantivy default)
The app-layer ``JiebaTokenizer`` already handles segmentation +
stopword filtering, so these FTS-layer settings act as a *belt-and-
braces* layer of normalisation. These tests probe the FTS layer
*directly* (bypassing jieba) to verify each setting actually behaves
as the docstring claims:
- lower_case=True → query case-insensitive against the raw-cased text
- stem=True → query for the word root hits inflected forms
- remove_stop_words=False → FTS layer does NOT drop stop-words; the
app-layer JiebaTokenizer is the single source of truth for
stop-word filtering (English + Chinese)
- ascii_folding=True → diacritics on Latin chars normalised (café → cafe)
- CJK pass-through → no stemming applied to CJK
Tests build a fresh in-memory-ish LanceDB store under ``tmp_path``,
declare a minimal schema with one ``body`` column, and inspect query
hits against handcrafted rows.
"""
from __future__ import annotations
from collections.abc import AsyncIterator
from pathlib import Path
from typing import ClassVar
import lancedb
import pytest
from lancedb import AsyncTable
from everos.core.persistence.lancedb import BaseLanceTable
class _FtsSpec(BaseLanceTable):
"""Minimal schema with one BM25-indexed column for FTS-layer probes."""
TABLE_NAME: ClassVar[str] = "fts_probe"
BM25_FIELDS: ClassVar[list[str]] = ["body"]
id: str
body: str
@pytest.fixture
async def fts_table(tmp_path: Path) -> AsyncIterator[AsyncTable]:
"""Build a fresh tmp LanceDB store + ``_FtsSpec`` table; index gets
built on first ``ensure_fts_indexes`` call by each test (FTS index
requires data first to materialise sensibly).
"""
conn = await lancedb.connect_async(str(tmp_path / "lancedb"))
table = await conn.create_table(_FtsSpec.TABLE_NAME, schema=_FtsSpec)
yield table
async def _seed_and_index(table: AsyncTable, rows: list[dict]) -> None:
"""Insert rows, then (re)build the FTS index over the full table."""
await table.add([_FtsSpec(**r) for r in rows])
await _FtsSpec.ensure_fts_indexes(table)
async def _query_ids(table: AsyncTable, text: str) -> set[str]:
"""Run a BM25 keyword query over the ``body`` column, return matched ids."""
rows = await table.query().nearest_to_text(text, columns="body").limit(10).to_list()
return {r["id"] for r in rows}
# ── lower_case=True ────────────────────────────────────────────────────
async def test_lower_case_query_matches_uppercase_index(
fts_table: AsyncTable,
) -> None:
"""Document indexed as ``HELLO`` is found by query ``hello``."""
await _seed_and_index(
fts_table,
[
{"id": "1", "body": "HELLO world"},
{"id": "2", "body": "GOODBYE world"},
],
)
hits = await _query_ids(fts_table, "hello")
assert hits == {"1"}
# ── stem=True ──────────────────────────────────────────────────────────
async def test_stem_query_root_matches_inflected_forms(
fts_table: AsyncTable,
) -> None:
"""Query ``counsel`` hits documents containing ``counseling`` / ``counseled``."""
await _seed_and_index(
fts_table,
[
{"id": "1", "body": "counseling session happened"},
{"id": "2", "body": "counseled patient yesterday"},
{"id": "3", "body": "unrelated content"},
],
)
hits = await _query_ids(fts_table, "counsel")
assert hits == {"1", "2"}
# ── remove_stop_words=False (app layer owns stop-words) ────────────────
async def test_fts_layer_does_not_filter_stopwords(
fts_table: AsyncTable,
) -> None:
"""FTS layer is configured ``remove_stop_words=False`` — app layer owns it.
The FTS index does NOT strip English stop-words. A query ``the``
reaches BM25 unfiltered and hits a document that contains it.
In production, :class:`JiebaTokenizer` removes ``the`` before
tokens reach this layer; this test bypasses jieba to probe the
FTS layer's behaviour in isolation.
"""
await _seed_and_index(
fts_table,
[
{"id": "1", "body": "the cat sat on the mat"},
{"id": "2", "body": "unrelated body text"},
],
)
hits = await _query_ids(fts_table, "the")
assert hits == {"1"}
# ── ascii_folding=True ─────────────────────────────────────────────────
async def test_ascii_folding_strips_diacritics(fts_table: AsyncTable) -> None:
"""``café`` is indexed/queried as ``cafe`` once diacritics are folded."""
await _seed_and_index(
fts_table,
[
{"id": "1", "body": "café latte"},
{"id": "2", "body": "tea house"},
],
)
hits = await _query_ids(fts_table, "cafe")
assert hits == {"1"}
# ── CJK pass-through ───────────────────────────────────────────────────
async def test_cjk_terms_pass_through_untouched(fts_table: AsyncTable) -> None:
"""CJK tokens are not stemmed or stop-word-filtered (English-only rules).
Note: ``base_tokenizer="whitespace"`` means CJK substrings are split
only on whitespace. The app-layer tokenizer (``JiebaTokenizer``)
normally inserts spaces between CJK words before they reach this
layer; here we simulate that by pre-spacing the body text.
"""
await _seed_and_index(
fts_table,
[
{"id": "1", "body": "北京 天安门"},
{"id": "2", "body": "上海 外滩"},
],
)
hits = await _query_ids(fts_table, "北京")
assert hits == {"1"}

View File

@ -0,0 +1,649 @@
"""Tests for :class:`LanceRepoBase` + :class:`LanceDailyLogRepoBase`.
Exercises the chassis-level query helpers shared by every business
LanceDB repo: ``find_where`` / ``find_one_where`` / ``find_by_owner`` /
``find_by_md_path`` (on :class:`LanceRepoBase`), and the daily-log
slice ``find_by_owner_entry`` / ``find_by_session`` /
``find_by_parent`` (on :class:`LanceDailyLogRepoBase`). Also covers
``get_by_id`` + ``upsert`` so the chassis CRUD surface is end-to-end
verified.
Uses a tmp LanceDB connection + a locally-defined daily-log-shaped
table so the chassis can be exercised without depending on any
specific business schema (episode / atomic_fact / …).
"""
from __future__ import annotations
import asyncio
from pathlib import Path
from typing import ClassVar
import pytest
from everos.config import LanceDBSettings
from everos.core.persistence import (
BaseLanceTable,
MemoryRoot,
Vector,
open_lancedb_connection,
)
from everos.core.persistence.lancedb import (
LanceDailyLogRepoBase,
LanceRepoBase,
)
class _Note(BaseLanceTable):
"""Minimal daily-log-shaped table for chassis tests."""
TABLE_NAME: ClassVar[str] = "_note"
id: str
owner_id: str
app_id: str = "default"
project_id: str = "default"
entry_id: str
session_id: str
parent_type: str
parent_id: str
md_path: str
text: str
vector: Vector(4) # type: ignore[valid-type]
class _SearchNote(BaseLanceTable):
"""Schema with BM25_FIELDS declared — exercises FTS index setup."""
TABLE_NAME: ClassVar[str] = "_search_note"
BM25_FIELDS: ClassVar[list[str]] = ["tokens"]
id: str
text: str
"""Original surface form (display)."""
tokens: str
"""Space-joined pre-tokenised text (BM25 index target)."""
vector: Vector(4) # type: ignore[valid-type]
class _NoteRepo(LanceDailyLogRepoBase[_Note]):
schema = _Note
def _row(
*,
owner: str,
entry: str,
session: str = "sess_a",
parent_type: str = "memcell",
parent_id: str = "mc_1",
md_path: str | None = None,
text: str = "x",
) -> _Note:
return _Note(
id=f"{owner}_{entry}",
owner_id=owner,
entry_id=entry,
session_id=session,
parent_type=parent_type,
parent_id=parent_id,
md_path=md_path or f"users/{owner}/notes/{entry}.md",
text=text,
vector=[1.0, 0.0, 0.0, 0.0],
)
@pytest.fixture(autouse=True)
def _reset_write_locks() -> None:
"""Drop the per-table write-lock pool between tests.
``LanceRepoBase`` lazily creates an ``asyncio.Lock`` per table name
and stashes it in a class-level dict; without a reset the lock
object outlives the pytest-asyncio function-scoped event loop and
the next test fails with "bound to a different event loop".
"""
LanceRepoBase._reset_locks_for_tests()
@pytest.fixture
async def repo(tmp_path: Path) -> _NoteRepo:
"""Open a tmp connection, create the ``_note`` table, return a repo."""
mr = MemoryRoot(tmp_path)
mr.ensure()
conn = await open_lancedb_connection(mr.lancedb_dir, LanceDBSettings())
table = await conn.create_table("_note", schema=_Note)
return _NoteRepo(table=table)
# ── add + get_by_id + count ──────────────────────────────────────────────
async def test_add_and_count(repo: _NoteRepo) -> None:
await repo.add([_row(owner="u1", entry="ep_1"), _row(owner="u1", entry="ep_2")])
assert await repo.count() == 2
async def test_get_by_id_returns_typed_instance(repo: _NoteRepo) -> None:
await repo.add([_row(owner="u1", entry="ep_1", text="hello")])
got = await repo.get_by_id("u1_ep_1")
assert got is not None
assert isinstance(got, _Note)
assert got.text == "hello"
async def test_get_by_id_returns_none_when_missing(repo: _NoteRepo) -> None:
assert await repo.get_by_id("ghost") is None
# ── upsert ──────────────────────────────────────────────────────────────
async def test_upsert_inserts_on_new(repo: _NoteRepo) -> None:
await repo.upsert([_row(owner="u1", entry="ep_1", text="v1")])
got = await repo.get_by_id("u1_ep_1")
assert got is not None
assert got.text == "v1"
async def test_upsert_updates_on_existing(repo: _NoteRepo) -> None:
await repo.add([_row(owner="u1", entry="ep_1", text="v1")])
await repo.upsert([_row(owner="u1", entry="ep_1", text="v2")])
got = await repo.get_by_id("u1_ep_1")
assert got is not None
assert got.text == "v2"
assert await repo.count() == 1 # update, not append
# ── find_where / find_one_where ─────────────────────────────────────────
async def test_find_where_returns_typed_list(repo: _NoteRepo) -> None:
await repo.add(
[
_row(owner="u1", entry="ep_1"),
_row(owner="u1", entry="ep_2"),
_row(owner="u2", entry="ep_3"),
]
)
rows = await repo.find_where("owner_id = 'u1'")
assert len(rows) == 2
assert all(isinstance(r, _Note) for r in rows)
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
async def test_find_one_where_returns_first_match(repo: _NoteRepo) -> None:
await repo.add([_row(owner="u1", entry="ep_1")])
got = await repo.find_one_where("entry_id = 'ep_1'")
assert got is not None
assert got.entry_id == "ep_1"
async def test_find_one_where_returns_none(repo: _NoteRepo) -> None:
assert await repo.find_one_where("entry_id = 'ghost'") is None
# ── find_where_paginated ────────────────────────────────────────────────
async def test_find_where_paginated_first_page(repo: _NoteRepo) -> None:
"""5 rows, page=1 size=2 → 2 rows, total=5, sorted DESC by entry_id."""
await repo.add(
[_row(owner="u1", entry=f"ep_{i}") for i in range(1, 6)],
)
rows, total = await repo.find_where_paginated(
"owner_id = 'u1'",
sort_by="entry_id",
descending=True,
page=1,
page_size=2,
)
assert total == 5
assert [r.entry_id for r in rows] == ["ep_5", "ep_4"]
async def test_find_where_paginated_last_page_partial(repo: _NoteRepo) -> None:
"""5 rows, page=3 size=2 → 1 row (the tail)."""
await repo.add(
[_row(owner="u1", entry=f"ep_{i}") for i in range(1, 6)],
)
rows, total = await repo.find_where_paginated(
"owner_id = 'u1'",
sort_by="entry_id",
descending=True,
page=3,
page_size=2,
)
assert total == 5
assert [r.entry_id for r in rows] == ["ep_1"]
async def test_find_where_paginated_ascending_sort(repo: _NoteRepo) -> None:
"""``descending=False`` flips order."""
await repo.add(
[_row(owner="u1", entry=f"ep_{i}") for i in range(1, 4)],
)
rows, total = await repo.find_where_paginated(
"owner_id = 'u1'",
sort_by="entry_id",
descending=False,
page=1,
page_size=10,
)
assert total == 3
assert [r.entry_id for r in rows] == ["ep_1", "ep_2", "ep_3"]
async def test_find_where_paginated_empty_predicate(repo: _NoteRepo) -> None:
"""Predicate that matches nothing → empty list + total=0."""
rows, total = await repo.find_where_paginated(
"owner_id = 'ghost'",
sort_by="entry_id",
page=1,
page_size=20,
)
assert rows == []
assert total == 0
async def test_find_where_paginated_filters_by_owner(repo: _NoteRepo) -> None:
"""Total is the predicate's true count, not the table's row count."""
await repo.add(
[
_row(owner="u1", entry="ep_1"),
_row(owner="u1", entry="ep_2"),
_row(owner="u2", entry="ep_3"),
]
)
rows, total = await repo.find_where_paginated(
"owner_id = 'u1'",
sort_by="entry_id",
page=1,
page_size=10,
)
assert total == 2
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
async def test_find_where_paginated_truncates_above_max_fetch(
repo: _NoteRepo,
caplog: pytest.LogCaptureFixture,
) -> None:
"""When total > max_fetch the chassis warns and returns a prefix sort.
Correctness contract: ``total`` is still the *true* row count from
``count_rows(filter=...)``, but the page contents are taken from
only the first ``max_fetch`` rows the engine scanned. structlog now
routes through stdlib's root logger (see
``core/observability/logging/factory.py``), so the standard
``caplog`` fixture is the right way to assert on the warning.
"""
# Unit tests don't go through the CLI entry, so the structlog →
# stdlib bridge is uninitialised — wire it up here so ``caplog``
# can observe the warning.
from everos.core.observability.logging import configure_logging
configure_logging(level="WARNING")
await repo.add(
[_row(owner="u1", entry=f"ep_{i:03d}") for i in range(1, 11)],
)
with caplog.at_level("WARNING"):
rows, total = await repo.find_where_paginated(
"owner_id = 'u1'",
sort_by="entry_id",
page=1,
page_size=3,
max_fetch=5,
)
assert total == 10 # true match count
assert len(rows) == 3
assert "find_where_paginated truncated" in caplog.text
# ── 5-table shared: find_by_owner / find_by_md_path ─────────────────────
async def test_find_by_owner(repo: _NoteRepo) -> None:
await repo.add(
[
_row(owner="u1", entry="ep_1"),
_row(owner="u1", entry="ep_2"),
_row(owner="u2", entry="ep_3"),
]
)
rows = await repo.find_by_owner("u1")
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
async def test_find_by_md_path_round_trip(repo: _NoteRepo) -> None:
path = "users/u1/notes/ep_1.md"
await repo.add([_row(owner="u1", entry="ep_1", md_path=path)])
got = await repo.find_by_md_path(path)
assert got is not None
assert got.entry_id == "ep_1"
async def test_find_by_md_path_returns_none_when_missing(repo: _NoteRepo) -> None:
assert await repo.find_by_md_path("users/u1/notes/ghost.md") is None
# ── daily-log: find_by_owner_entry / find_by_session / find_by_parent ───
async def test_find_by_owner_entry(repo: _NoteRepo) -> None:
await repo.add([_row(owner="u1", entry="ep_7")])
got = await repo.find_by_owner_entry("u1", "ep_7")
assert got is not None
assert got.entry_id == "ep_7"
async def test_find_by_owner_entry_returns_none_when_missing(
repo: _NoteRepo,
) -> None:
assert await repo.find_by_owner_entry("u1", "ghost") is None
async def test_find_by_owner_entries_returns_only_matching_rows(
repo: _NoteRepo,
) -> None:
"""Bulk lookup keeps only rows whose ``entry_id`` is in the set."""
await repo.add(
[
_row(owner="u1", entry="ep_1"),
_row(owner="u1", entry="ep_2"),
_row(owner="u1", entry="ep_3"),
_row(owner="u2", entry="ep_1"), # different owner — must not leak
]
)
rows = await repo.find_by_owner_entries("u1", ["ep_1", "ep_3"])
assert {r.entry_id for r in rows} == {"ep_1", "ep_3"}
assert all(r.owner_id == "u1" for r in rows)
async def test_find_by_owner_entries_empty_input_short_circuits(
repo: _NoteRepo,
) -> None:
"""No ids → ``[]`` without emitting a ``WHERE entry_id IN ()`` predicate."""
await repo.add([_row(owner="u1", entry="ep_1")])
assert await repo.find_by_owner_entries("u1", []) == []
async def test_find_by_session(repo: _NoteRepo) -> None:
await repo.add(
[
_row(owner="u1", entry="ep_1", session="sess_a"),
_row(owner="u1", entry="ep_2", session="sess_a"),
_row(owner="u1", entry="ep_3", session="sess_b"),
]
)
rows = await repo.find_by_session("u1", "sess_a")
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
async def test_find_by_parent(repo: _NoteRepo) -> None:
await repo.add(
[
_row(owner="u1", entry="ep_1", parent_type="memcell", parent_id="mc_x"),
_row(owner="u1", entry="ep_2", parent_type="memcell", parent_id="mc_x"),
_row(owner="u1", entry="ep_3", parent_type="other", parent_id="mc_y"),
]
)
rows = await repo.find_by_parent("memcell", "mc_x")
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
# ── chassis fallback behaviour ──────────────────────────────────────────
async def test_table_lookup_not_implemented_when_no_override() -> None:
"""Repo with neither ``table=`` injection nor ``_table_lookup`` raises."""
class _BareRepo(LanceRepoBase[_Note]):
schema = _Note
bare = _BareRepo()
with pytest.raises(NotImplementedError, match="_table_lookup"):
await bare.count()
async def test_table_name_derived_from_schema() -> None:
"""``repo.table_name`` reads off ``schema.TABLE_NAME`` (single source of truth)."""
class _R(LanceRepoBase[_Note]):
schema = _Note
assert _R().table_name == "_note" # equals _Note.TABLE_NAME
# ── SQL-quote escape defence ────────────────────────────────────────────
# ── BaseLanceTable.ensure_fts_indexes ───────────────────────────────────
async def test_ensure_fts_indexes_creates_index(tmp_path: Path) -> None:
"""Declared ``BM25_FIELDS`` becomes an FTS index after ensure."""
mr = MemoryRoot(tmp_path)
mr.ensure()
conn = await open_lancedb_connection(mr.lancedb_dir, LanceDBSettings())
table = await conn.create_table("_search_note", schema=_SearchNote)
await table.add(
[
_SearchNote(
id="1",
text="hello world",
tokens="hello world",
vector=[1, 0, 0, 0],
)
]
)
await _SearchNote.ensure_fts_indexes(table)
indices = await table.list_indices()
indexed_cols = {col for idx in indices for col in (idx.columns or [])}
assert "tokens" in indexed_cols
conn.close()
async def test_ensure_fts_indexes_is_idempotent(tmp_path: Path) -> None:
"""Calling twice is safe — no error, no duplicate index."""
mr = MemoryRoot(tmp_path)
mr.ensure()
conn = await open_lancedb_connection(mr.lancedb_dir, LanceDBSettings())
table = await conn.create_table("_search_note", schema=_SearchNote)
await table.add([_SearchNote(id="1", text="hi", tokens="hi", vector=[1, 0, 0, 0])])
await _SearchNote.ensure_fts_indexes(table)
first = await table.list_indices()
await _SearchNote.ensure_fts_indexes(table)
second = await table.list_indices()
assert len(first) == len(second)
conn.close()
async def test_ensure_fts_indexes_noop_when_no_fields_declared(
repo: _NoteRepo,
) -> None:
"""Schema without ``BM25_FIELDS`` is a no-op (no error)."""
table = await repo._table()
# _Note declares no BM25_FIELDS — calling the classmethod is a no-op.
await _Note.ensure_fts_indexes(table)
indices = await table.list_indices()
# No FTS index was created; vector/scalar may exist by default but we
# only assert no error path triggered.
assert isinstance(indices, list) or hasattr(indices, "__iter__")
# ── SQL-quote escape defence ────────────────────────────────────────────
# ── delete_by_md_path ───────────────────────────────────────────────────
async def test_delete_by_md_path_removes_matching_row(repo: _NoteRepo) -> None:
"""Cascade md-deleted flow: rows for a path are wiped, count returned."""
target = "users/u1/notes/ep_1.md"
await repo.add(
[
_row(owner="u1", entry="ep_1", md_path=target),
_row(owner="u1", entry="ep_2"),
]
)
deleted = await repo.delete_by_md_path(target)
assert deleted == 1
assert await repo.find_by_md_path(target) is None
assert await repo.count() == 1 # the other row survived
async def test_delete_by_md_path_returns_zero_when_no_match(
repo: _NoteRepo,
) -> None:
await repo.add([_row(owner="u1", entry="ep_1")])
assert await repo.delete_by_md_path("users/u1/notes/ghost.md") == 0
assert await repo.count() == 1
async def test_delete_by_md_path_removes_multiple_entries_one_file(
repo: _NoteRepo,
) -> None:
"""A daily-log md holds many entries → all rows for the path go."""
shared = "users/u1/notes/episode-2026-05-12.md"
await repo.add(
[
_row(owner="u1", entry="ep_1", md_path=shared),
_row(owner="u1", entry="ep_2", md_path=shared),
_row(owner="u1", entry="ep_3", md_path=shared),
_row(owner="u2", entry="ep_4"), # different path, untouched
]
)
deleted = await repo.delete_by_md_path(shared)
assert deleted == 3
assert await repo.count() == 1
async def test_delete_by_md_path_escapes_single_quotes(
repo: _NoteRepo,
) -> None:
"""A path containing a single quote does not break the predicate."""
tricky = "users/u1/notes/it's.md"
await repo.add([_row(owner="u1", entry="ep_1", md_path=tricky)])
assert await repo.delete_by_md_path(tricky) == 1
# ── SQL-quote escape defence (kept) ─────────────────────────────────────
async def test_get_by_id_escapes_single_quotes(repo: _NoteRepo) -> None:
"""An id containing a single quote does not break the predicate."""
quoted_id = "u1_it's_fine"
await repo.add(
[
_Note(
id=quoted_id,
owner_id="u1",
entry_id="it's_fine",
session_id="s",
parent_type="memcell",
parent_id="mc_1",
md_path="x",
text="t",
vector=[1.0, 0.0, 0.0, 0.0],
)
]
)
got = await repo.get_by_id(quoted_id)
assert got is not None
assert got.entry_id == "it's_fine"
# ── Concurrency: per-table write lock ───────────────────────────────────
async def test_concurrent_upsert_disjoint_ids_no_lost_update(
repo: _NoteRepo,
) -> None:
"""Regression for Bug B: cascade ``asyncio.gather`` over rows of the
same kind would race on ``merge_insert`` and drop a write (observed
on ``user_profile`` — pk = owner_id, two disjoint INSERTs ending up
with only one row in LanceDB). The per-table ``asyncio.Lock`` in
:meth:`LanceRepoBase.upsert` must serialise those writes so every
submitted row lands.
"""
n = 16
rows = [_row(owner=f"u_{i}", entry=f"ep_{i}") for i in range(n)]
await asyncio.gather(*(repo.upsert([r]) for r in rows))
assert await repo.count() == n
for i in range(n):
got = await repo.get_by_id(f"u_{i}_ep_{i}")
assert got is not None, f"u_{i}_ep_{i} disappeared after concurrent upsert"
async def test_concurrent_upsert_same_id_last_writer_wins(
repo: _NoteRepo,
) -> None:
"""Concurrent upserts on the *same* pk must converge: exactly one row,
one of the texts wins. The lock makes the outcome deterministic per
schedule (no torn state, no duplicate row)."""
row_a = _row(owner="u1", entry="ep_1", text="A")
row_b = _row(owner="u1", entry="ep_1", text="B")
await asyncio.gather(repo.upsert([row_a]), repo.upsert([row_b]))
assert await repo.count() == 1
got = await repo.get_by_id("u1_ep_1")
assert got is not None
assert got.text in {"A", "B"}
async def test_read_not_blocked_by_write_lock(repo: _NoteRepo) -> None:
"""Search / count must remain available while a write lock is held —
only write paths take the lock. Acquires the table lock manually,
then verifies a read still resolves."""
await repo.add([_row(owner="u1", entry="ep_1", text="seed")])
lock = repo._write_lock(repo.table_name)
async with lock:
# Whilst the lock is held, reads should not block.
got = await asyncio.wait_for(repo.get_by_id("u1_ep_1"), timeout=2.0)
assert got is not None
assert got.text == "seed"
async def test_write_lock_is_per_table(tmp_path: Path) -> None:
"""Distinct tables share no lock — writes on table A do not stall
writes on table B."""
mr = MemoryRoot(tmp_path)
mr.ensure()
conn = await open_lancedb_connection(mr.lancedb_dir, LanceDBSettings())
class _OtherNote(BaseLanceTable):
TABLE_NAME: ClassVar[str] = "_other_note"
id: str
owner_id: str
entry_id: str
session_id: str
parent_type: str
parent_id: str
md_path: str
text: str
vector: Vector(4) # type: ignore[valid-type]
class _OtherRepo(LanceDailyLogRepoBase[_OtherNote]):
schema = _OtherNote
table_a = await conn.create_table("_note_a", schema=_Note)
table_b = await conn.create_table(_OtherNote.TABLE_NAME, schema=_OtherNote)
class _NoteARepo(LanceDailyLogRepoBase[_Note]):
schema = _Note
@property
def table_name(self) -> str:
return "_note_a"
repo_a = _NoteARepo(table=table_a)
repo_b = _OtherRepo(table=table_b)
assert repo_a._write_lock(repo_a.table_name) is not repo_b._write_lock(
repo_b.table_name
)

View File

@ -0,0 +1,82 @@
"""LanceDB IO toolkit — typical workflow demo.
End-to-end story for how to author + use a LanceDB-backed table in everos:
1. Define a table schema by subclassing :class:`BaseLanceTable` and
declaring a ``Vector(N)`` column for the embedding.
2. ``open_lancedb_connection`` to get an :class:`AsyncConnection`.
3. ``conn.create_table(name, schema=Cls)`` to create the table from
the Pydantic schema.
4. ``table.add(rows)`` to insert.
5. ``table.query().nearest_to(vec).limit(k).to_list()`` for vector
search (BM25 + scalar filter can chain in the same query).
6. ``table.count_rows()`` for size.
7. Mutate via :func:`touch` + :meth:`AsyncTable.update` (LanceDB has
no SQL ``onupdate`` equivalent — the app must bump ``updated_at``).
8. ``table.delete(predicate)`` to remove rows.
"""
from __future__ import annotations
from pathlib import Path
from everos.config import LanceDBSettings
from everos.core.persistence import (
BaseLanceTable,
MemoryRoot,
Vector,
open_lancedb_connection,
)
class _DemoNote(BaseLanceTable):
"""Demo table — used only by this test module."""
text: str
vector: Vector(4) # 4-dim for the test fixture
async def test_lancedb_typical_workflow(tmp_path: Path) -> None:
mr = MemoryRoot(tmp_path)
mr.ensure()
settings = LanceDBSettings()
# 1. Open async connection rooted at <memory_root>/.index/lancedb/
conn = await open_lancedb_connection(mr.lancedb_dir, settings)
# 2. Create the table from the BaseLanceTable schema
table = await conn.create_table("_demo_notes", schema=_DemoNote)
# 3. Insert rows (Pydantic instances; created_at / updated_at filled in
# by BaseLanceTable's default_factory).
rows = [
_DemoNote(text="hello world", vector=[1.0, 0.0, 0.0, 0.0]),
_DemoNote(text="goodbye cruel world", vector=[0.0, 1.0, 0.0, 0.0]),
_DemoNote(text="welcome aboard", vector=[1.0, 0.5, 0.0, 0.0]),
]
await table.add(rows)
# 4. Count
assert await table.count_rows() == 3
# 5. Vector search — nearest_to picks rows by ANN distance.
results = await table.query().nearest_to([0.95, 0.05, 0.0, 0.0]).limit(2).to_list()
assert len(results) == 2
# The closest row to [0.95, 0.05, 0, 0] is "hello world" [1, 0, 0, 0]
# ahead of "welcome aboard" [1, 0.5, 0, 0].
assert results[0]["text"] == "hello world"
# 6. Filter (scalar predicate). LanceDB SQL-like predicate string.
only_hello = await table.query().where("text = 'hello world'").to_list()
assert len(only_hello) == 1
assert only_hello[0]["text"] == "hello world"
# 7. Delete by predicate
await table.delete("text = 'goodbye cruel world'")
assert await table.count_rows() == 2
# 8. List tables on the connection
tables_response = await conn.list_tables()
assert "_demo_notes" in list(tables_response.tables)
conn.close()

View File

@ -0,0 +1,96 @@
"""Unit tests for memory_root_lock async context manager."""
from __future__ import annotations
import multiprocessing
import time
from pathlib import Path
import anyio
import pytest
from everos.core.persistence import LockError, MemoryRoot, memory_root_lock
async def test_lock_creates_anchor_file(tmp_path: Path) -> None:
mr = MemoryRoot(tmp_path)
async with memory_root_lock(mr):
assert mr.lock_file.exists()
async def test_lock_acquire_release_acquire(tmp_path: Path) -> None:
"""Same process can re-acquire after release (no leftover state)."""
mr = MemoryRoot(tmp_path)
async with memory_root_lock(mr):
pass
async with memory_root_lock(mr):
pass
def _hold_lock(memory_root_path: str, ready: object, release: object) -> None:
"""Subprocess helper: acquire blocking lock, signal, wait, release.
The subprocess runs its own event loop via :func:`anyio.run` since
:func:`memory_root_lock` is now async.
"""
async def _run() -> None:
mr = MemoryRoot(memory_root_path)
async with memory_root_lock(mr, blocking=True):
ready.set()
# Use a thread-offloaded wait so we don't block the event loop.
await anyio.to_thread.run_sync(release.wait, 5)
anyio.run(_run)
async def test_nonblocking_raises_when_held_by_other_process(tmp_path: Path) -> None:
"""Different process holding the lock → blocking=False raises LockError."""
mr = MemoryRoot(tmp_path)
ctx = multiprocessing.get_context("spawn")
ready = ctx.Event()
release = ctx.Event()
proc = ctx.Process(target=_hold_lock, args=(str(mr.root), ready, release))
proc.start()
try:
assert ready.wait(timeout=5), "subprocess failed to acquire lock"
with pytest.raises(LockError):
async with memory_root_lock(mr, blocking=False):
pass
finally:
release.set()
proc.join(timeout=5)
if proc.is_alive():
proc.terminate()
async def test_blocking_waits_for_release(tmp_path: Path) -> None:
"""Different process holding lock + main process blocking=True waits."""
mr = MemoryRoot(tmp_path)
ctx = multiprocessing.get_context("spawn")
ready = ctx.Event()
release = ctx.Event()
proc = ctx.Process(target=_hold_lock, args=(str(mr.root), ready, release))
proc.start()
try:
assert ready.wait(timeout=5)
# Schedule the subprocess to release shortly; main process should
# acquire the lock after that.
release_started = time.monotonic()
def release_after_short_delay() -> None:
time.sleep(0.2)
release.set()
import threading
threading.Thread(target=release_after_short_delay, daemon=True).start()
async with memory_root_lock(mr, blocking=True):
elapsed = time.monotonic() - release_started
# Should have waited at least roughly the delay.
assert elapsed >= 0.1
finally:
release.set()
proc.join(timeout=5)
if proc.is_alive():
proc.terminate()

View File

@ -0,0 +1,68 @@
"""Tests for Frontmatter base classes (chassis layer)."""
from __future__ import annotations
import pytest
from pydantic import ValidationError
from everos.core.persistence.markdown import (
AgentScopedFrontmatter,
BaseFrontmatter,
UserScopedFrontmatter,
)
def test_base_requires_id_and_type() -> None:
with pytest.raises(ValidationError):
BaseFrontmatter() # type: ignore[call-arg]
def test_base_default_schema_version_is_one() -> None:
fm = BaseFrontmatter(id="x", type="t")
assert fm.schema_version == 1
def test_base_extra_fields_allowed() -> None:
"""L2 / L3 / L4 fields ride along without subclass declaration."""
fm = BaseFrontmatter(
id="x",
type="t",
md_sha256="abc", # L2
last_indexed_at="2026-04-22T10:00:00Z",
custom_user_field="anything", # L4
)
dumped = fm.model_dump()
assert dumped["md_sha256"] == "abc"
assert dumped["custom_user_field"] == "anything"
def test_user_scoped_track_default() -> None:
fm = UserScopedFrontmatter(id="x", type="t", user_id="u_jason")
assert fm.track == "user"
def test_user_scoped_requires_user_id() -> None:
with pytest.raises(ValidationError):
UserScopedFrontmatter(id="x", type="t") # type: ignore[call-arg]
def test_agent_scoped_track_default() -> None:
fm = AgentScopedFrontmatter(id="x", type="t", agent_id="agent_zhangsan")
assert fm.track == "agent"
def test_agent_scoped_requires_agent_id() -> None:
with pytest.raises(ValidationError):
AgentScopedFrontmatter(id="x", type="t") # type: ignore[call-arg]
def test_track_literal_rejects_invalid_value() -> None:
with pytest.raises(ValidationError):
UserScopedFrontmatter(id="x", type="t", user_id="u", track="agent")
def test_scope_dir_classvars() -> None:
"""Scope mixins declare the top-level memory-root subdirectory."""
assert BaseFrontmatter.SCOPE_DIR == "" # scope-agnostic by default
assert UserScopedFrontmatter.SCOPE_DIR == "users"
assert AgentScopedFrontmatter.SCOPE_DIR == "agents"

View File

@ -0,0 +1,94 @@
"""Unit tests for entry marker parsing."""
from __future__ import annotations
from everos.core.persistence import find_entry, split_entries
def test_split_no_entries() -> None:
assert split_entries("# heading\n\nbody.") == []
def test_split_single_entry() -> None:
body = (
"preamble\n"
"<!-- entry:abc123 -->\n"
"content here\n"
"<!-- /entry:abc123 -->\n"
"trailing\n"
)
entries = split_entries(body)
assert len(entries) == 1
e = entries[0]
assert e.id == "abc123"
assert e.body == "content here"
# offsets should bracket the markers
assert body[e.start : e.end].startswith("<!-- entry:abc123 -->")
assert body[e.start : e.end].endswith("<!-- /entry:abc123 -->")
def test_split_multiple_entries() -> None:
body = (
"<!-- entry:e1 -->\nfirst\n<!-- /entry:e1 -->\n"
"<!-- entry:e2 -->\nsecond\n<!-- /entry:e2 -->\n"
)
entries = split_entries(body)
assert [e.id for e in entries] == ["e1", "e2"]
assert entries[0].body == "first"
assert entries[1].body == "second"
def test_split_unmatched_open() -> None:
"""Open without close → scan stops; preceding entries are still returned."""
body = "<!-- entry:e1 -->\nok\n<!-- /entry:e1 -->\n<!-- entry:e2 -->\nno close\n"
entries = split_entries(body)
assert [e.id for e in entries] == ["e1"]
def test_split_mismatched_id() -> None:
"""Open id != close id → no match → scan stops at unterminated open."""
body = "<!-- entry:e1 -->\ncontent\n<!-- /entry:other -->\n"
entries = split_entries(body)
assert entries == []
def test_split_id_with_underscore_and_hyphen() -> None:
body = "<!-- entry:abc_def-123 -->\nx\n<!-- /entry:abc_def-123 -->\n"
entries = split_entries(body)
assert len(entries) == 1
assert entries[0].id == "abc_def-123"
def test_split_offsets_consistent() -> None:
body = "before\n<!-- entry:e1 -->\nx\n<!-- /entry:e1 -->\nafter\n"
e = split_entries(body)[0]
assert body[e.start : e.end] == "<!-- entry:e1 -->\nx\n<!-- /entry:e1 -->"
def test_find_entry_found() -> None:
body = (
"<!-- entry:a -->\nfirst\n<!-- /entry:a -->\n"
"<!-- entry:b -->\nsecond\n<!-- /entry:b -->\n"
)
e = find_entry(body, "b")
assert e is not None
assert e.id == "b"
assert e.body == "second"
def test_find_entry_not_found() -> None:
body = "<!-- entry:a -->\nx\n<!-- /entry:a -->\n"
assert find_entry(body, "missing") is None
def test_find_entry_open_without_close() -> None:
body = "<!-- entry:a -->\nx\n" # no close
assert find_entry(body, "a") is None
def test_split_entry_body_no_internal_newline_stripping() -> None:
"""Internal blank lines preserved; only the *single* leading/trailing
newline introduced by formatter is stripped."""
body = "<!-- entry:e1 -->\nline1\n\nline3\n<!-- /entry:e1 -->\n"
e = split_entries(body)[0]
assert e.body == "line1\n\nline3"

View File

@ -0,0 +1,99 @@
"""Tests for ``EntryId`` parse / format / next_for."""
from __future__ import annotations
import datetime as dt
import pytest
from everos.core.persistence import EntryId
# ── format ───────────────────────────────────────────────────────────────
def test_format_pads_seq_to_eight_digits() -> None:
eid = EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1)
assert eid.format() == "umc_20260422_00000001"
def test_format_pads_seq_at_99999999() -> None:
eid = EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=99_999_999)
assert eid.format() == "umc_20260422_99999999"
def test_str_uses_format() -> None:
eid = EntryId(prefix="ep", date=dt.date(2026, 1, 1), seq=42)
assert str(eid) == "ep_20260101_00000042"
# ── parse ────────────────────────────────────────────────────────────────
def test_parse_round_trip() -> None:
raw = "umc_20260422_00000001"
eid = EntryId.parse(raw)
assert eid.prefix == "umc"
assert eid.date == dt.date(2026, 4, 22)
assert eid.seq == 1
assert eid.format() == raw
def test_parse_handles_seq_above_pad_width() -> None:
"""Seq above 10**8 still parses; format emits more than 8 digits."""
eid = EntryId.parse("umc_20260422_150000000")
assert eid.seq == 150_000_000
assert eid.format() == "umc_20260422_150000000"
def test_parse_accepts_legacy_four_digit_seq() -> None:
"""Pre-bump 4-digit seq strings still parse — format upgrades on round-trip."""
eid = EntryId.parse("umc_20260422_0001")
assert eid.seq == 1
# format() returns the new 8-digit padding.
assert eid.format() == "umc_20260422_00000001"
def test_parse_accepts_legacy_three_digit_seq() -> None:
"""Older 3-digit seq strings still parse cleanly."""
eid = EntryId.parse("umc_20260422_001")
assert eid.seq == 1
assert eid.format() == "umc_20260422_00000001"
def test_parse_rejects_too_few_segments() -> None:
with pytest.raises(ValueError, match="invalid entry id format"):
EntryId.parse("umc_20260422")
def test_parse_rejects_invalid_date() -> None:
with pytest.raises(ValueError, match="invalid date"):
EntryId.parse("umc_2026XX22_00000001")
def test_parse_rejects_non_numeric_seq() -> None:
with pytest.raises(ValueError, match="invalid seq"):
EntryId.parse("umc_20260422_xxxx")
def test_parse_rejects_empty_prefix() -> None:
with pytest.raises(ValueError, match="empty prefix"):
EntryId.parse("_20260422_00000001")
# ── next_for ─────────────────────────────────────────────────────────────
def test_next_for_seq_is_count_plus_one() -> None:
eid = EntryId.next_for("umc", dt.date(2026, 4, 22), current_count=2)
assert eid.seq == 3
assert eid.format() == "umc_20260422_00000003"
def test_next_for_starts_at_one_when_empty() -> None:
eid = EntryId.next_for("umc", dt.date(2026, 4, 22), current_count=0)
assert eid.seq == 1
def test_next_for_rejects_negative_count() -> None:
with pytest.raises(ValueError, match="must be >= 0"):
EntryId.next_for("umc", dt.date(2026, 4, 22), current_count=-1)

View File

@ -0,0 +1,168 @@
"""Unit tests for frontmatter parse / dump + path_glob chassis."""
from __future__ import annotations
from typing import ClassVar, Literal
import pytest
from everos.core.persistence import (
AgentScopedFrontmatter,
BaseFrontmatter,
DailyLogPathMixin,
SkillPathMixin,
UserScopedFrontmatter,
dump_frontmatter,
parse_frontmatter,
)
def test_parse_no_frontmatter() -> None:
text = "# Just a heading\n\nbody."
meta, body = parse_frontmatter(text)
assert meta == {}
assert body == text
def test_parse_empty_frontmatter() -> None:
text = "---\n---\n# body\n"
meta, body = parse_frontmatter(text)
assert meta == {}
assert body == "# body\n"
def test_parse_simple_frontmatter() -> None:
text = "---\ntitle: Hello\ntags: [a, b]\n---\n# body\n"
meta, body = parse_frontmatter(text)
assert meta == {"title": "Hello", "tags": ["a", "b"]}
assert body == "# body\n"
def test_parse_nested_frontmatter() -> None:
text = "---\nuser:\n id: u_1\n name: Alice\n---\nbody"
meta, body = parse_frontmatter(text)
assert meta == {"user": {"id": "u_1", "name": "Alice"}}
assert body == "body"
def test_parse_no_closing_delim() -> None:
"""Missing closing --- → treat as no frontmatter (return original text)."""
text = "---\ntitle: Hello\n# body without closing\n"
meta, body = parse_frontmatter(text)
assert meta == {}
assert body == text
def test_parse_non_mapping_yaml() -> None:
"""YAML that parses to a non-mapping (e.g. list) → empty dict + original text."""
text = "---\n- item1\n- item2\n---\nbody\n"
meta, body = parse_frontmatter(text)
assert meta == {}
assert body == text
def test_parse_opening_delim_no_newline() -> None:
"""``---`` followed by non-newline char → not a frontmatter block."""
text = "---this is not frontmatter"
meta, body = parse_frontmatter(text)
assert meta == {}
assert body == text
def test_parse_unicode_values() -> None:
text = "---\ntitle: 你好\n---\n世界"
meta, body = parse_frontmatter(text)
assert meta == {"title": "你好"}
assert body == "世界"
def test_dump_empty_mapping_returns_empty_string() -> None:
assert dump_frontmatter({}) == ""
def test_dump_simple_mapping() -> None:
out = dump_frontmatter({"title": "Hello"})
assert out.startswith("---\n")
assert out.endswith("---\n")
assert "title: Hello" in out
def test_dump_preserves_key_order() -> None:
out = dump_frontmatter({"z": 1, "a": 2, "m": 3})
body = out.strip("-\n")
keys = [line.split(":", 1)[0] for line in body.strip().splitlines() if ":" in line]
assert keys == ["z", "a", "m"]
def test_dump_unicode() -> None:
out = dump_frontmatter({"title": "你好"})
assert "你好" in out # allow_unicode keeps non-ASCII verbatim
def test_round_trip() -> None:
meta = {"title": "Hello", "tags": ["a", "b"], "nested": {"k": "v"}}
body_text = "# Body\n\nLine.\n"
composed = dump_frontmatter(meta) + body_text
parsed_meta, parsed_body = parse_frontmatter(composed)
assert parsed_meta == meta
assert parsed_body == body_text
# ── path_glob chassis ───────────────────────────────────────────────────
def test_base_path_glob_raises_not_implemented() -> None:
"""A schema with no strategy mixin must surface a clear error."""
class _PlainFm(BaseFrontmatter):
type: Literal["_plain"] = "_plain"
with pytest.raises(NotImplementedError, match="path_glob"):
_PlainFm.path_glob()
def test_daily_log_path_glob_user_scope() -> None:
"""Mixin builds ``users/*/<dir>/<prefix>-*.md`` from ClassVars."""
class _UserDaily(DailyLogPathMixin, UserScopedFrontmatter):
DIR_NAME: ClassVar[str] = "demo"
FILE_PREFIX: ClassVar[str] = "entry"
type: Literal["_user_daily"] = "_user_daily"
assert _UserDaily.path_glob() == "*/*/users/*/demo/entry-*.md"
def test_daily_log_path_glob_agent_scope() -> None:
"""Same mixin, agent scope swaps the leading directory."""
class _AgentDaily(DailyLogPathMixin, AgentScopedFrontmatter):
DIR_NAME: ClassVar[str] = "cases"
FILE_PREFIX: ClassVar[str] = "case"
type: Literal["_agent_daily"] = "_agent_daily"
assert _AgentDaily.path_glob() == "*/*/agents/*/cases/case-*.md"
def test_skill_path_glob() -> None:
"""SkillPathMixin builds ``<scope>/*/<container>/<prefix>*/<main>``."""
class _AgentSkill(SkillPathMixin, AgentScopedFrontmatter):
SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
type: Literal["_agent_skill"] = "_agent_skill"
assert _AgentSkill.path_glob() == "*/*/agents/*/skills/skill_*/SKILL.md"
def test_strategy_mixin_overrides_base_via_mro() -> None:
"""Strategy mixin placed first in the parent list wins over abstract base."""
class _Daily(DailyLogPathMixin, UserScopedFrontmatter):
DIR_NAME: ClassVar[str] = "x"
FILE_PREFIX: ClassVar[str] = "y"
type: Literal["_daily_mro"] = "_daily_mro"
# Concrete is reachable; abstract NotImplementedError is shadowed.
assert isinstance(_Daily.path_glob(), str)
assert "NotImplementedError" not in _Daily.path_glob()

View File

@ -0,0 +1,66 @@
"""Unit tests for MarkdownReader."""
from __future__ import annotations
import datetime
from pathlib import Path
from everos.core.persistence import MarkdownReader
def test_parse_text_with_frontmatter_and_entries() -> None:
text = (
"---\n"
"title: Day Log\n"
"date: 2026-04-22\n"
"---\n"
"# Header\n"
"<!-- entry:e1 -->\n"
"first entry\n"
"<!-- /entry:e1 -->\n"
)
parsed = MarkdownReader.parse(text)
# PyYAML auto-converts unquoted ISO dates to datetime.date.
assert parsed.frontmatter == {
"title": "Day Log",
"date": datetime.date(2026, 4, 22),
}
assert "# Header" in parsed.body
assert len(parsed.entries) == 1
assert parsed.entries[0].id == "e1"
assert parsed.entries[0].body == "first entry"
def test_parse_no_frontmatter_no_entries() -> None:
text = "# Just a header\n\nbody.\n"
parsed = MarkdownReader.parse(text)
assert parsed.frontmatter == {}
assert parsed.body == text
assert parsed.entries == []
def test_parse_only_frontmatter() -> None:
text = "---\nkey: value\n---\n"
parsed = MarkdownReader.parse(text)
assert parsed.frontmatter == {"key": "value"}
assert parsed.body == ""
assert parsed.entries == []
async def test_read_file(tmp_path: Path) -> None:
f = tmp_path / "doc.md"
f.write_text(
"---\nk: v\n---\n<!-- entry:x -->\nbody\n<!-- /entry:x -->\n",
encoding="utf-8",
)
parsed = await MarkdownReader.read(f)
assert parsed.frontmatter == {"k": "v"}
assert parsed.entries[0].id == "x"
async def test_read_unicode_file(tmp_path: Path) -> None:
f = tmp_path / "zh.md"
f.write_text("---\ntitle: 你好\n---\n世界\n", encoding="utf-8")
parsed = await MarkdownReader.read(f)
assert parsed.frontmatter == {"title": "你好"}
assert parsed.body == "世界\n"

View File

@ -0,0 +1,214 @@
"""Tests for the audit-form structured entry chassis."""
from __future__ import annotations
import pytest
from everos.core.persistence.markdown import (
StructuredEntry,
parse_structured_entry,
render_structured_entry,
)
# ── render ───────────────────────────────────────────────────────────────
def test_render_with_header_inline_and_sections() -> None:
out = render_structured_entry(
header="ep_20260422_001",
inline={
"type": "episode",
"user_id": "u_jason",
"group_id": "sp_1",
},
sections={"Summary": "first line\nsecond line"},
)
assert out.startswith("## ep_20260422_001\n\n")
assert "**type**: episode" in out
assert "**user_id**: u_jason" in out
assert "**group_id**: sp_1" in out
assert "### Summary\nfirst line\nsecond line" in out
def test_render_inline_only_no_header_no_sections() -> None:
out = render_structured_entry(inline={"k": "v"})
assert out == "**k**: v"
def test_render_lists_use_bracket_notation() -> None:
out = render_structured_entry(
inline={"participants": ["u_jason", "u_sarah"], "tags": ("a", "b")}
)
assert "**participants**: [u_jason, u_sarah]" in out
assert "**tags**: [a, b]" in out
def test_render_none_value_renders_empty() -> None:
out = render_structured_entry(inline={"optional": None})
assert out == "**optional**: "
def test_render_scalar_uses_str() -> None:
out = render_structured_entry(inline={"count": 3, "ratio": 0.5, "active": True})
assert "**count**: 3" in out
assert "**ratio**: 0.5" in out
assert "**active**: True" in out
# ── parse ────────────────────────────────────────────────────────────────
def test_parse_full_round_trip() -> None:
src = render_structured_entry(
header="ep_001",
inline={"type": "episode", "user_id": "u_jason"},
sections={"Summary": "the summary", "Body": "the body"},
)
entry = parse_structured_entry(src)
assert entry.header == "ep_001"
assert entry.inline == {"type": "episode", "user_id": "u_jason"}
assert entry.sections == {"Summary": "the summary", "Body": "the body"}
def test_parse_no_header_yields_none() -> None:
src = "**k**: v\n\n### Section\nbody"
entry = parse_structured_entry(src)
assert entry.header is None
assert entry.inline == {"k": "v"}
assert entry.sections == {"Section": "body"}
def test_parse_no_inline() -> None:
src = "## ep_001\n\n### Body\nonly section"
entry = parse_structured_entry(src)
assert entry.header == "ep_001"
assert entry.inline == {}
assert entry.sections == {"Body": "only section"}
def test_parse_no_sections() -> None:
src = "## ep_001\n\n**k**: v"
entry = parse_structured_entry(src)
assert entry.header == "ep_001"
assert entry.inline == {"k": "v"}
assert entry.sections == {}
def test_parse_inline_value_with_colon_kept_verbatim() -> None:
src = "**timestamp**: 2026-04-22T10:03:11Z"
entry = parse_structured_entry(src)
assert entry.inline == {"timestamp": "2026-04-22T10:03:11Z"}
def test_parse_list_value_kept_as_string() -> None:
"""Type-agnostic by design — bracket notation is preserved as text."""
src = "**participants**: [u_jason, u_sarah]"
entry = parse_structured_entry(src)
assert entry.inline == {"participants": "[u_jason, u_sarah]"}
def test_parse_section_with_multiline_body() -> None:
src = "### Episode\nline 1\nline 2\nline 3"
entry = parse_structured_entry(src)
assert entry.sections == {"Episode": "line 1\nline 2\nline 3"}
def test_parse_section_titles_kept_verbatim() -> None:
"""No Title-casing — titles stay exactly as written."""
src = "### task_intent\ndoc text"
entry = parse_structured_entry(src)
assert "task_intent" in entry.sections
def test_parse_tolerates_stray_text_outside_blocks() -> None:
"""Stray paragraphs in the head become part of nothing — silently dropped."""
src = (
"## ep_001\n\nrandom prose paragraph\n"
"**k**: v\nmore stray text\n\n### Section\nbody"
)
entry = parse_structured_entry(src)
# H2 + inline match anchors; stray prose lines that don't match
# **key**: ... are simply not captured.
assert entry.header == "ep_001"
assert entry.inline == {"k": "v"}
assert entry.sections == {"Section": "body"}
def test_dataclass_immutable() -> None:
"""``StructuredEntry`` is frozen — accidental mutation raises."""
entry = StructuredEntry(id="", body="", start=0, end=0, header="x")
with pytest.raises((AttributeError, TypeError)):
entry.header = "y" # type: ignore[misc]
def test_structured_entry_inherits_entry() -> None:
"""``StructuredEntry`` is an :class:`Entry` subclass and carries
the marker context plus the parsed audit-form fields together."""
from everos.core.persistence.markdown import Entry
entry = StructuredEntry(
id="ep_001",
body="b",
start=0,
end=10,
header="ep_001",
inline={"k": "v"},
sections={"S": "x"},
)
assert isinstance(entry, Entry)
assert entry.id == "ep_001"
assert entry.header == "ep_001"
def test_entry_as_structured_preserves_marker_context() -> None:
"""``Entry.as_structured`` copies id/start/end and adds parsed fields."""
from everos.core.persistence.markdown import Entry
entry = Entry(
id="ep_001",
body="## ep_001\n\n**k**: v\n\n### Body\nthe body",
start=42,
end=128,
)
s = entry.as_structured()
assert isinstance(s, StructuredEntry)
assert s.id == "ep_001"
assert s.start == 42
assert s.end == 128
assert s.header == "ep_001"
assert s.inline == {"k": "v"}
assert s.sections == {"Body": "the body"}
# ── round-trip with realistic Episode entry ─────────────────────────────
def test_round_trip_episode_shape() -> None:
"""Mirrors the shape from the wiki Memory Types doc."""
inline = {
"type": "episode",
"user_id": "u_jason",
"group_id": "sp_1",
"session_id": "sess_abc123",
"timestamp": "2026-04-22T10:03:11Z",
"parent_type": "memcell",
"parent_id": "mc_20260422_001",
"participants": ["u_jason", "u_sarah"],
"subject": "weekend planning",
}
sections = {
"Summary": "Jason and Sarah discussed weekend coffee plans.",
"Episode": "At ten in the morning, while making coffee, Jason told Sarah...",
}
rendered = render_structured_entry(
header="ep_20260422_001",
inline=inline,
sections=sections,
)
entry = parse_structured_entry(rendered)
assert entry.header == "ep_20260422_001"
# Lists become string in audit form.
assert entry.inline["participants"] == "[u_jason, u_sarah]"
# Scalars round-trip exactly.
assert entry.inline["session_id"] == "sess_abc123"
assert entry.sections == sections

View File

@ -0,0 +1,87 @@
"""Markdown IO toolkit — typical workflow demo.
Doubles as living documentation for how a caller assembles + reads a
day-level markdown file with multiple ``<!-- entry:id -->`` records.
End-to-end story:
1. Build a body that contains entry markers.
2. Use ``MarkdownWriter.write_markdown`` to persist frontmatter + body
atomically (tmp file + fsync + rename, all inside the target dir).
3. Use ``MarkdownReader.read`` to parse the resulting file back into
a ``ParsedMarkdown`` (frontmatter dict + raw body + list[Entry]).
4. Verify each entry's id / body matches what was written.
5. Look up a single entry by id with ``find_entry``.
6. Round-trip: dump_frontmatter + body → parse_frontmatter recovers
the original mapping.
"""
from __future__ import annotations
from pathlib import Path
from everos.core.persistence import (
MarkdownReader,
MarkdownWriter,
MemoryRoot,
dump_frontmatter,
find_entry,
parse_frontmatter,
)
async def test_typical_day_log_write_then_read(tmp_path: Path) -> None:
mr = MemoryRoot(tmp_path)
mr.ensure()
writer = MarkdownWriter(mr)
# 1. Build a body with two entries (typical day-level append log).
body = (
"# Day log\n"
"\n"
"<!-- entry:ep_001 -->\n"
"**Title**: Met Alice\n"
"We discussed the new project layout.\n"
"<!-- /entry:ep_001 -->\n"
"\n"
"<!-- entry:ep_002 -->\n"
"**Title**: Read paper X\n"
"Key idea: end-to-end async pipelines.\n"
"<!-- /entry:ep_002 -->\n"
)
frontmatter = {
"type": "episodic_day_log",
"date": "2026-04-22",
"user_id": "u_jason",
"tags": ["meeting", "research"],
}
# 2. Atomic write via the writer.
target = mr.users_dir() / "u_jason" / "episodic" / "2026-04-22.md"
written_path = await writer.write_markdown(
target, frontmatter=frontmatter, body=body
)
assert written_path == target
assert target.is_file()
# No leftover temp file.
leftover = list(target.parent.glob(f".{target.name}.tmp.*"))
assert leftover == []
# 3. Read back into ParsedMarkdown.
parsed = await MarkdownReader.read(target)
# 4. Validate frontmatter + entries.
assert parsed.frontmatter == frontmatter
assert [e.id for e in parsed.entries] == ["ep_001", "ep_002"]
assert "Met Alice" in parsed.entries[0].body
assert "Read paper X" in parsed.entries[1].body
# 5. Single-entry lookup.
e2 = find_entry(parsed.body, "ep_002")
assert e2 is not None
assert "async pipelines" in e2.body
# 6. Round-trip frontmatter parse / dump.
composed = dump_frontmatter(frontmatter) + body
re_meta, re_body = parse_frontmatter(composed)
assert re_meta == frontmatter
assert re_body == body

View File

@ -0,0 +1,229 @@
"""Unit tests for MarkdownWriter (atomic write)."""
from __future__ import annotations
import datetime as dt
from pathlib import Path
from unittest.mock import patch
import pytest
from everos.core.persistence import (
EntryId,
MarkdownReader,
MarkdownWriter,
MemoryRoot,
)
def _make_writer(tmp_path: Path) -> MarkdownWriter:
return MarkdownWriter(MemoryRoot(tmp_path))
async def test_write_creates_file_with_content(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "users" / "u1" / "out.md"
result = await writer.write(target, "hello\n")
assert result == target
assert target.read_text(encoding="utf-8") == "hello\n"
async def test_write_creates_parent_directories(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "a" / "b" / "c" / "f.md"
await writer.write(target, "x")
assert target.is_file()
async def test_write_overwrites_existing(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "f.md"
target.write_text("old", encoding="utf-8")
await writer.write(target, "new")
assert target.read_text(encoding="utf-8") == "new"
async def test_write_no_temp_file_left_after_success(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "f.md"
await writer.write(target, "ok")
leftovers = [
p.name
for p in tmp_path.iterdir() # noqa: ASYNC240 — sync iterdir over a pytest tmp_path is fine in tests
if p.name.startswith(".f.md.tmp.")
]
assert leftovers == []
async def test_write_cleans_up_temp_on_failure(tmp_path: Path) -> None:
"""If os.replace fails, the temp file should be cleaned up."""
writer = _make_writer(tmp_path)
target = tmp_path / "f.md"
boom = OSError("simulated rename failure")
with (
patch("everos.core.persistence.markdown.writer.os.replace", side_effect=boom),
pytest.raises(OSError, match="simulated"),
):
await writer.write(target, "hello")
# No tmp file leftover, and the target was not created.
leftovers = [
p.name
for p in tmp_path.iterdir() # noqa: ASYNC240 — sync iterdir over a pytest tmp_path is fine in tests
if p.name.startswith(".f.md.tmp.")
]
assert leftovers == []
assert not target.exists()
async def test_write_markdown_assembles_frontmatter_and_body(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "doc.md"
await writer.write_markdown(
target,
frontmatter={"title": "Hello"},
body="# Body\n",
)
text = target.read_text(encoding="utf-8")
assert text.startswith("---\n")
assert "title: Hello" in text
assert text.rstrip("\n").endswith("# Body")
async def test_write_markdown_round_trip(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "rt.md"
await writer.write_markdown(
target,
frontmatter={"k": "v", "n": 1},
body="<!-- entry:x -->\ncontent\n<!-- /entry:x -->\n",
)
parsed = await MarkdownReader.read(target)
assert parsed.frontmatter == {"k": "v", "n": 1}
assert len(parsed.entries) == 1
assert parsed.entries[0].body == "content"
async def test_write_markdown_no_frontmatter(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "plain.md"
await writer.write_markdown(target, body="just body\n")
assert target.read_text(encoding="utf-8") == "just body\n"
def test_memory_root_property_accessible(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
assert writer.memory_root.root == tmp_path.resolve()
# ── append_entry ─────────────────────────────────────────────────────────
async def test_append_entry_creates_file_when_missing(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "memcells" / "memcell-2026-04-22.md"
eid = EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1)
written = await writer.append_entry(
target,
entry_body="hello world",
entry_id=eid,
frontmatter_updates={
"file_type": "memcell_daily",
"entry_count": 1,
},
)
assert written == target
parsed = await MarkdownReader.read(target)
assert parsed.frontmatter == {"file_type": "memcell_daily", "entry_count": 1}
assert len(parsed.entries) == 1
assert parsed.entries[0].id == "umc_20260422_00000001"
assert parsed.entries[0].body == "hello world"
async def test_append_entry_appends_to_existing(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "log.md"
await writer.append_entry(
target,
entry_body="first",
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1),
frontmatter_updates={"entry_count": 1},
)
await writer.append_entry(
target,
entry_body="second",
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=2),
frontmatter_updates={"entry_count": 2},
)
parsed = await MarkdownReader.read(target)
assert [e.id for e in parsed.entries] == [
"umc_20260422_00000001",
"umc_20260422_00000002",
]
assert [e.body for e in parsed.entries] == ["first", "second"]
async def test_append_entry_merges_frontmatter_shallow(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "log.md"
await writer.append_entry(
target,
entry_body="b",
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1),
frontmatter_updates={
"file_type": "memcell_daily",
"entry_count": 1,
"last_appended_at": "2026-04-22T10:00:00Z",
},
)
# Second append — overwrite entry_count + last_appended_at, keep file_type.
await writer.append_entry(
target,
entry_body="b",
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=2),
frontmatter_updates={
"entry_count": 2,
"last_appended_at": "2026-04-22T10:05:00Z",
},
)
parsed = await MarkdownReader.read(target)
assert parsed.frontmatter == {
"file_type": "memcell_daily",
"entry_count": 2,
"last_appended_at": "2026-04-22T10:05:00Z",
}
async def test_append_entry_without_frontmatter_updates_keeps_existing(
tmp_path: Path,
) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "log.md"
await writer.write_markdown(target, frontmatter={"file_type": "x", "n": 1}, body="")
await writer.append_entry(
target,
entry_body="body",
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1),
)
parsed = await MarkdownReader.read(target)
assert parsed.frontmatter == {"file_type": "x", "n": 1}
assert len(parsed.entries) == 1
async def test_append_entry_round_trip_with_reader(tmp_path: Path) -> None:
writer = _make_writer(tmp_path)
target = tmp_path / "log.md"
for i in range(5):
await writer.append_entry(
target,
entry_body=f"content {i}",
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=i + 1),
frontmatter_updates={"entry_count": i + 1},
)
parsed = await MarkdownReader.read(target)
assert len(parsed.entries) == 5
assert parsed.frontmatter["entry_count"] == 5
for i, e in enumerate(parsed.entries):
assert e.id == f"umc_20260422_{i + 1:08d}"
assert e.body == f"content {i}"

View File

@ -0,0 +1,200 @@
"""Regression tests for the MarkdownWriter read-modify-write race.
Before the per-path :class:`asyncio.Lock` was added, two concurrent tasks
calling :meth:`MarkdownWriter.append_entry` against the same path would
each load the file, append one entry block in memory, and write the
merged file back — the second writer's read pre-dated the first
writer's write, so it overwrote the first writer's append. Both
``entry_count`` (frontmatter) and the entry block markers were lost in
proportion to concurrency level.
These tests drive ``N`` concurrent appends against one ``(owner, date)``
and assert that no entry is lost at any concurrency level. They cover
both the single-entry ``append_entry`` path (taken by tests / external
callers) and the batched ``append_entries`` path (taken by strategies
after the per-owner batching migration).
"""
from __future__ import annotations
import asyncio
import re
from pathlib import Path
import pytest
from everos.core.persistence import EntryId, MarkdownWriter, MemoryRoot
from everos.infra.persistence.markdown.writers.atomic_fact_writer import (
AtomicFactWriter,
)
def _scan_md(md_path: Path) -> tuple[int, int]:
"""Return ``(entry_tag_count, frontmatter_entry_count)``."""
text = md_path.read_text(encoding="utf-8")
tag_count = len(re.findall(r"<!-- entry:af_", text))
fm_match = re.search(r"^entry_count: (\d+)", text, re.MULTILINE)
fm_count = int(fm_match.group(1)) if fm_match else -1
return tag_count, fm_count
async def _drive_concurrent_appends(
writer: AtomicFactWriter,
owner: str,
n: int,
concurrency: int,
) -> None:
"""Issue ``n`` single-entry ``append_entry`` calls with bounded concurrency."""
sem = asyncio.Semaphore(concurrency)
async def _guarded(idx: int) -> None:
async with sem:
await writer.append_entry(
owner,
inline={
"owner_id": owner,
"session_id": "race_test",
"timestamp": "2026-05-18T00:00:00+00:00",
"parent_type": "memcell",
"parent_id": f"mc_{idx:04d}",
},
sections={"Fact": f"fact-{idx:04d}"},
)
await asyncio.gather(*(_guarded(i) for i in range(n)))
@pytest.mark.parametrize("concurrency", [1, 2, 4, 8, 16])
async def test_append_entry_no_lost_updates_under_concurrency(
tmp_path: Path, concurrency: int
) -> None:
"""``append_entry`` from N concurrent tasks must not drop any entry."""
root = MemoryRoot(root=tmp_path)
writer = AtomicFactWriter(root=root)
owner = "race_user"
n = 30
await _drive_concurrent_appends(writer, owner, n, concurrency)
md_files = list((root.users_dir() / owner).rglob("*.md"))
assert len(md_files) == 1, f"expected 1 md file, got {md_files}"
tag_count, fm_count = _scan_md(md_files[0])
assert tag_count == n, (
f"lost {n - tag_count} entries at concurrency={concurrency} "
f"(tag_count={tag_count}, expected={n})"
)
assert fm_count == n, (
f"frontmatter entry_count drift at concurrency={concurrency} "
f"(fm_count={fm_count}, expected={n})"
)
@pytest.mark.parametrize("concurrency", [1, 2, 4, 8, 16])
async def test_append_entries_batch_no_lost_updates_under_concurrency(
tmp_path: Path, concurrency: int
) -> None:
"""``append_entries`` (batched) from N concurrent tasks must not drop any
entry."""
root = MemoryRoot(root=tmp_path)
writer = AtomicFactWriter(root=root)
owner = "race_user_batched"
batches = 6
items_per_batch = 5
total = batches * items_per_batch
sem = asyncio.Semaphore(concurrency)
async def _one_batch(batch_idx: int) -> None:
async with sem:
items = [
(
{
"owner_id": owner,
"session_id": "race_test",
"timestamp": "2026-05-18T00:00:00+00:00",
"parent_type": "memcell",
"parent_id": f"mc_b{batch_idx:02d}_i{i:02d}",
},
{"Fact": f"batched-fact-b{batch_idx:02d}-{i:02d}"},
)
for i in range(items_per_batch)
]
await writer.append_entries(owner, items)
await asyncio.gather(*(_one_batch(b) for b in range(batches)))
md_files = list((root.users_dir() / owner).rglob("*.md"))
assert len(md_files) == 1
tag_count, fm_count = _scan_md(md_files[0])
assert tag_count == total, (
f"lost {total - tag_count} entries at concurrency={concurrency} "
f"(tag_count={tag_count}, expected={total})"
)
assert fm_count == total, (
f"frontmatter entry_count drift at concurrency={concurrency} "
f"(fm_count={fm_count}, expected={total})"
)
async def test_lock_for_returns_same_lock_per_path(tmp_path: Path) -> None:
"""``lock_for`` is the keying primitive that BaseDailyWriter relies on
to serialise its multi-step read-compute-write sequence; aliasing paths
must collapse to one lock object."""
writer = MarkdownWriter(MemoryRoot(root=tmp_path))
p1 = tmp_path / "foo" / "bar.md"
p2 = tmp_path / "foo" / "bar.md"
p3 = tmp_path / "foo" / ".." / "foo" / "bar.md"
lock1 = writer.lock_for(p1)
lock2 = writer.lock_for(p2)
lock3 = writer.lock_for(p3)
# Same canonical path → identical Lock object.
assert lock1 is lock2
assert lock1 is lock3
# Different path → different Lock.
other = writer.lock_for(tmp_path / "foo" / "baz.md")
assert other is not lock1
async def test_append_entries_empty_is_noop(tmp_path: Path) -> None:
"""Empty batch must not touch the file or allocate any EntryId."""
writer = MarkdownWriter(MemoryRoot(root=tmp_path))
target = tmp_path / "scratch.md"
result = await writer.append_entries(target, [])
assert result == target
# No file should have been created (empty body + no frontmatter updates
# still calls write_markdown — verify the file is empty or absent).
if target.exists():
assert target.read_text(encoding="utf-8") in ("", "---\n---\n\n")
async def test_markdown_writer_append_entry_delegates_to_batch(
tmp_path: Path,
) -> None:
"""``append_entry`` is documented as a wrapper for ``append_entries`` —
asserting they produce identical file contents protects callers from
drift between the two paths."""
writer = MarkdownWriter(MemoryRoot(root=tmp_path))
eid = EntryId.next_for("af", __import__("datetime").date(2026, 5, 18), 0)
body = "**fact**: hello"
path_a = tmp_path / "a.md"
path_b = tmp_path / "b.md"
await writer.append_entry(
path_a,
entry_body=body,
entry_id=eid,
frontmatter_updates={"id": "shared", "entry_count": 1},
)
await writer.append_entries(
path_b,
[(body, eid)],
frontmatter_updates={"id": "shared", "entry_count": 1},
)
assert path_a.read_text(encoding="utf-8") == path_b.read_text(encoding="utf-8")

View File

@ -0,0 +1,126 @@
"""Unit tests for MemoryRoot path manager."""
from __future__ import annotations
from pathlib import Path
import pytest
from everos.core.persistence import MemoryRoot
def test_default_returns_home_everos(monkeypatch: pytest.MonkeyPatch) -> None:
# Isolate from any ambient EVEROS_MEMORY__ROOT (e.g. the session-scoped
# search-corpus fixture sets it for the whole run); the autouse
# _reset_settings_cache fixture clears the load_settings cache, so the
# delenv takes effect for this assertion of the hard-coded default.
monkeypatch.delenv("EVEROS_MEMORY__ROOT", raising=False)
mr = MemoryRoot.default()
assert mr.root == (Path.home() / ".everos").resolve()
def test_accepts_str_path(tmp_path: Path) -> None:
mr = MemoryRoot(str(tmp_path))
assert mr.root == tmp_path.resolve()
def test_accepts_pathlib_path(tmp_path: Path) -> None:
mr = MemoryRoot(tmp_path)
assert mr.root == tmp_path.resolve()
def test_user_visible_dirs_default_scope(tmp_path: Path) -> None:
mr = MemoryRoot(tmp_path)
# Omitting app/project resolves to the default space; "default" lands as
# the reserved ``default_app`` / ``default_project`` directory names.
base = mr.root / "default_app" / "default_project"
assert mr.agents_dir() == base / "agents"
assert mr.users_dir() == base / "users"
assert mr.knowledge_dir() == base / "knowledge"
def test_user_visible_dirs_named_scope(tmp_path: Path) -> None:
mr = MemoryRoot(tmp_path)
# A non-default app/project maps to itself (no ``default_*`` rewrite).
base = mr.root / "claude_code" / "oss"
assert mr.agents_dir("claude_code", "oss") == base / "agents"
assert mr.users_dir("claude_code", "oss") == base / "users"
assert mr.knowledge_dir("claude_code", "oss") == base / "knowledge"
def test_dotfile_paths(tmp_path: Path) -> None:
mr = MemoryRoot(tmp_path)
assert mr.index_dir == tmp_path / ".index"
assert mr.lancedb_dir == tmp_path / ".index" / "lancedb"
assert mr.sqlite_dir == tmp_path / ".index" / "sqlite"
assert mr.system_db == tmp_path / ".index" / "sqlite" / "system.db"
assert mr.lock_file == tmp_path / ".lock"
assert mr.tmp_dir == tmp_path / ".tmp"
def test_ensure_creates_required_dirs(tmp_path: Path) -> None:
mr = MemoryRoot(tmp_path / "fresh")
mr.ensure()
assert mr.root.is_dir()
assert mr.index_dir.is_dir()
assert mr.sqlite_dir.is_dir()
assert mr.lancedb_dir.is_dir()
assert mr.tmp_dir.is_dir()
# User-visible dirs are NOT pre-created.
assert not mr.agents_dir().exists()
assert not mr.users_dir().exists()
assert not mr.knowledge_dir().exists()
def test_ensure_is_idempotent(tmp_path: Path) -> None:
mr = MemoryRoot(tmp_path)
mr.ensure()
mr.ensure() # second call must not fail
assert mr.tmp_dir.is_dir()
def test_ensure_materializes_ome_config_template(tmp_path: Path) -> None:
"""First ensure() drops a real ``ome.toml`` users can edit.
Without this, ``pip install everos && everos server start`` produced
a warning (``config_reload_failed: No such file``) because the OME
config reloader had no file to point at. The template ships under
``src/everos/config/default_ome.toml`` and is byte-copied on first run.
"""
mr = MemoryRoot(tmp_path)
mr.ensure()
assert mr.ome_config.is_file()
# Content is the shipped template verbatim — protects against a future
# diff that silently changes what users see on first run.
template = Path(__file__).resolve().parents[4] / (
"src/everos/config/default_ome.toml"
)
assert mr.ome_config.read_bytes() == template.read_bytes()
def test_ensure_preserves_user_edited_ome_config(tmp_path: Path) -> None:
"""Second ensure() must not overwrite user edits.
The template materialisation is an existence check, not a content
sync — once the user has tweaked their overrides the file is theirs.
"""
mr = MemoryRoot(tmp_path)
mr.ensure()
custom = b"# user-edited\n[strategies.extract_foresight]\nenabled = false\n"
mr.ome_config.write_bytes(custom)
mr.ensure()
assert mr.ome_config.read_bytes() == custom
def test_frozen_dataclass_hashable(tmp_path: Path) -> None:
a = MemoryRoot(tmp_path)
b = MemoryRoot(tmp_path)
assert a == b
assert hash(a) == hash(b)
assert {a, b} == {a} # set deduplication works
def test_user_expansion(tmp_path: Path, monkeypatch) -> None:
monkeypatch.setenv("HOME", str(tmp_path))
mr = MemoryRoot("~/custom")
assert mr.root == (tmp_path / "custom").resolve()

View File

@ -0,0 +1,113 @@
"""Unit tests for the SQLite async engine + PRAGMA listener.
Critical: verifies PRAGMAs are actually applied at the SQLite layer
(not just declared in code). The whole reason for the listener is that
PRAGMAs are per-connection and the SA pool reuses connections.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from sqlalchemy import text
from everos.config import SqliteSettings
from everos.core.persistence import (
MemoryRoot,
create_session_factory,
create_system_engine,
session_scope,
)
@pytest.fixture
def memory_root(tmp_path: Path) -> MemoryRoot:
mr = MemoryRoot(tmp_path)
mr.ensure()
return mr
async def test_engine_creates_db_file(memory_root: MemoryRoot) -> None:
engine = create_system_engine(memory_root.system_db, SqliteSettings())
factory = create_session_factory(engine)
async with session_scope(factory) as s:
await s.execute(text("SELECT 1"))
await engine.dispose()
assert memory_root.system_db.exists()
async def test_pragmas_actually_applied_default_settings(
memory_root: MemoryRoot,
) -> None:
"""Default PRAGMAs match what's in default.toml."""
settings = SqliteSettings()
engine = create_system_engine(memory_root.system_db, settings)
factory = create_session_factory(engine)
try:
async with session_scope(factory) as s:
assert _scalar(await _pragma(s, "journal_mode")) == "wal"
# synchronous: 0=OFF 1=NORMAL 2=FULL 3=EXTRA
assert _scalar(await _pragma(s, "synchronous")) == 1
# foreign_keys: 1=ON 0=OFF
assert _scalar(await _pragma(s, "foreign_keys")) == 1
# temp_store: 0=DEFAULT 1=FILE 2=MEMORY
assert _scalar(await _pragma(s, "temp_store")) == 2
assert _scalar(await _pragma(s, "busy_timeout")) == 5000
assert _scalar(await _pragma(s, "journal_size_limit")) == 64 * 1024 * 1024
# cache_size: negative value = KB; positive = pages
assert _scalar(await _pragma(s, "cache_size")) == -2048
finally:
await engine.dispose()
async def test_pragmas_respect_custom_settings(memory_root: MemoryRoot) -> None:
"""Engine reflects non-default tunables."""
settings = SqliteSettings(
journal_mode="DELETE",
synchronous="FULL",
foreign_keys=False,
temp_store="FILE",
busy_timeout_ms=10000,
journal_size_limit_bytes=1024 * 1024,
cache_size_kb=4096,
)
engine = create_system_engine(memory_root.system_db, settings)
factory = create_session_factory(engine)
try:
async with session_scope(factory) as s:
assert _scalar(await _pragma(s, "journal_mode")) == "delete"
assert _scalar(await _pragma(s, "synchronous")) == 2 # FULL
assert _scalar(await _pragma(s, "foreign_keys")) == 0
assert _scalar(await _pragma(s, "temp_store")) == 1 # FILE
assert _scalar(await _pragma(s, "busy_timeout")) == 10000
assert _scalar(await _pragma(s, "cache_size")) == -4096
finally:
await engine.dispose()
async def test_pragmas_applied_on_each_new_connection(
memory_root: MemoryRoot,
) -> None:
"""The listener fires on every new connection from the pool, not just once."""
settings = SqliteSettings()
engine = create_system_engine(memory_root.system_db, settings)
factory = create_session_factory(engine)
try:
# Two independent sessions → at least two connection acquisitions
# → both must see WAL mode.
async with session_scope(factory) as s1:
assert _scalar(await _pragma(s1, "journal_mode")) == "wal"
async with session_scope(factory) as s2:
assert _scalar(await _pragma(s2, "journal_mode")) == "wal"
finally:
await engine.dispose()
async def _pragma(session, name: str): # type: ignore[no-untyped-def]
return await session.execute(text(f"PRAGMA {name}"))
def _scalar(result): # type: ignore[no-untyped-def]
row = result.fetchone()
return row[0] if row is not None else None

View File

@ -0,0 +1,126 @@
"""ORM CRUD demo: full INSERT / SELECT / UPDATE / DELETE on a BaseTable.
Doubles as living documentation for how to author a SQLModel-backed
business table inside the everos persistence stack:
1. Subclass ``BaseTable`` (gets ``created_at`` / ``updated_at`` for free).
2. Build a session factory from a real engine.
3. Use ``session_scope`` for the transaction lifecycle.
4. Verify ``updated_at`` auto-bumps on UPDATE.
The local table name is prefixed with ``_`` so it cannot be confused with
a real business table.
"""
from __future__ import annotations
import asyncio
from pathlib import Path
import pytest
from sqlmodel import SQLModel, select
from everos.config import SqliteSettings
from everos.core.persistence import (
BaseTable,
Field,
MemoryRoot,
create_session_factory,
create_system_engine,
session_scope,
)
class _DemoNote(BaseTable, table=True):
"""Tiny demo table — used only by this test module."""
__tablename__ = "_demo_notes" # type: ignore[assignment]
id: int | None = Field(default=None, primary_key=True)
body: str
tags: str | None = Field(default=None)
@pytest.fixture
def memory_root(tmp_path: Path) -> MemoryRoot:
mr = MemoryRoot(tmp_path)
mr.ensure()
return mr
async def test_orm_full_crud_lifecycle(memory_root: MemoryRoot) -> None:
engine = create_system_engine(memory_root.system_db, SqliteSettings())
factory = create_session_factory(engine)
try:
# ── Create schema ───────────────────────────────────────────────
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
# ── INSERT ──────────────────────────────────────────────────────
async with session_scope(factory) as s:
note = _DemoNote(body="hello")
s.add(note)
await s.commit()
await s.refresh(note)
assert note.id is not None
assert note.created_at is not None
assert note.updated_at is not None
# default_factory runs once per field, so the two timestamps
# may differ by a few microseconds on INSERT. Order must hold.
assert note.created_at <= note.updated_at
note_id = note.id
initial_created = note.created_at
initial_updated = note.updated_at
# ── SELECT (single by id) ───────────────────────────────────────
async with session_scope(factory) as s:
stmt = select(_DemoNote).where(_DemoNote.id == note_id)
result = (await s.execute(stmt)).scalars().first()
assert result is not None
assert result.body == "hello"
# ── SELECT (filter + order) ─────────────────────────────────────
async with session_scope(factory) as s:
s.add(_DemoNote(body="second"))
s.add(_DemoNote(body="third"))
await s.commit()
async with session_scope(factory) as s:
stmt = select(_DemoNote).order_by(_DemoNote.id)
rows = (await s.execute(stmt)).scalars().all()
assert [r.body for r in rows] == ["hello", "second", "third"]
# ── UPDATE (verify updated_at auto-bumps) ───────────────────────
# Sleep slightly so onupdate has a measurably newer timestamp
# than the initial insert (timestamp resolution is fine but the
# comparison should be ``>=`` to be robust on fast machines).
await asyncio.sleep(0.01)
async with session_scope(factory) as s:
stmt = select(_DemoNote).where(_DemoNote.id == note_id)
n = (await s.execute(stmt)).scalars().first()
assert n is not None
n.body = "hello world"
n.tags = "demo"
await s.commit()
await s.refresh(n)
assert n.body == "hello world"
assert n.tags == "demo"
assert n.updated_at >= initial_updated # bumped via onupdate
assert n.created_at == initial_created # unchanged on update
# ── DELETE ──────────────────────────────────────────────────────
async with session_scope(factory) as s:
stmt = select(_DemoNote).where(_DemoNote.id == note_id)
n = (await s.execute(stmt)).scalars().first()
assert n is not None
await s.delete(n)
await s.commit()
async with session_scope(factory) as s:
count_stmt = select(_DemoNote).where(_DemoNote.id == note_id)
assert (await s.execute(count_stmt)).scalars().first() is None
# Other rows survive
remaining = (await s.execute(select(_DemoNote))).scalars().all()
assert {r.body for r in remaining} == {"second", "third"}
finally:
await engine.dispose()

View File

@ -0,0 +1,160 @@
"""RepoBase CRUD demo + assertions.
Doubles as living documentation for how a service / memory layer caller
uses the generic repository — no manual session handling. Exercises the
explicit-factory constructor path; the lazy ``_factory_lookup`` hook is
exercised indirectly via the lifespan + manager tests once business
repos land under ``infra/.../repos/``.
"""
from __future__ import annotations
import asyncio
from pathlib import Path
import pytest
from sqlmodel import SQLModel
from everos.config import SqliteSettings
from everos.core.persistence import (
BaseTable,
Field,
MemoryRoot,
RepoBase,
create_session_factory,
create_system_engine,
)
class _DemoUser(BaseTable, table=True):
"""Demo table — only used by this test module."""
__tablename__ = "_demo_users" # type: ignore[assignment]
id: int | None = Field(default=None, primary_key=True)
name: str
active: bool = Field(default=True)
class _DemoUserRepo(RepoBase[_DemoUser]):
model = _DemoUser
@pytest.fixture
def memory_root(tmp_path: Path) -> MemoryRoot:
mr = MemoryRoot(tmp_path)
mr.ensure()
return mr
async def _setup_repo(memory_root: MemoryRoot) -> tuple[_DemoUserRepo, object]:
"""Build engine, factory, and ensure schema. Returns (repo, engine)."""
engine = create_system_engine(memory_root.system_db, SqliteSettings())
factory = create_session_factory(engine)
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
return _DemoUserRepo(factory), engine
async def test_repo_add_and_get(memory_root: MemoryRoot) -> None:
repo, engine = await _setup_repo(memory_root)
try:
added = await repo.add(_DemoUser(name="alice"))
assert added.id is not None
assert added.created_at is not None
fetched = await repo.get_by_id(added.id)
assert fetched is not None
assert fetched.name == "alice"
assert fetched.active is True
finally:
await engine.dispose()
async def test_repo_add_many_and_list_all(memory_root: MemoryRoot) -> None:
repo, engine = await _setup_repo(memory_root)
try:
users = await repo.add_many(
[
_DemoUser(name="alice"),
_DemoUser(name="bob"),
_DemoUser(name="carol", active=False),
]
)
assert all(u.id is not None for u in users)
all_users = await repo.list_all()
assert {u.name for u in all_users} == {"alice", "bob", "carol"}
assert await repo.count() == 3
finally:
await engine.dispose()
async def test_repo_find_where_and_find_one(memory_root: MemoryRoot) -> None:
repo, engine = await _setup_repo(memory_root)
try:
await repo.add_many(
[
_DemoUser(name="alice", active=True),
_DemoUser(name="bob", active=False),
_DemoUser(name="carol", active=True),
]
)
actives = await repo.find_where(active=True)
assert {u.name for u in actives} == {"alice", "carol"}
bob = await repo.find_one(name="bob")
assert bob is not None
assert bob.active is False
ghost = await repo.find_one(name="no_such")
assert ghost is None
finally:
await engine.dispose()
async def test_repo_update_bumps_updated_at(memory_root: MemoryRoot) -> None:
repo, engine = await _setup_repo(memory_root)
try:
u = await repo.add(_DemoUser(name="alice"))
original_updated = u.updated_at
original_created = u.created_at
await asyncio.sleep(0.01)
u.name = "alice2"
u.active = False
updated = await repo.update(u)
assert updated.name == "alice2"
assert updated.active is False
assert updated.updated_at >= original_updated # bumped
assert updated.created_at == original_created
finally:
await engine.dispose()
async def test_repo_delete_by_instance(memory_root: MemoryRoot) -> None:
repo, engine = await _setup_repo(memory_root)
try:
u = await repo.add(_DemoUser(name="alice"))
assert await repo.count() == 1
await repo.delete(u)
assert await repo.count() == 0
assert await repo.get_by_id(u.id) is None
finally:
await engine.dispose()
async def test_repo_delete_by_id_returns_bool(memory_root: MemoryRoot) -> None:
repo, engine = await _setup_repo(memory_root)
try:
u = await repo.add(_DemoUser(name="alice"))
assert await repo.delete_by_id(u.id) is True
assert await repo.delete_by_id(u.id) is False # already gone
assert await repo.delete_by_id(99999) is False # never existed
finally:
await engine.dispose()

View File

@ -0,0 +1,78 @@
"""Unit tests for session_scope semantics."""
from __future__ import annotations
from pathlib import Path
import pytest
from sqlalchemy import text
from sqlmodel import Field, SQLModel
from everos.config import SqliteSettings
from everos.core.persistence import (
MemoryRoot,
create_session_factory,
create_system_engine,
session_scope,
)
class _Sample(SQLModel, table=True):
"""Tiny model used only by these tests."""
__tablename__ = "_sample_session_scope" # type: ignore[assignment]
id: int | None = Field(default=None, primary_key=True)
note: str
@pytest.fixture
def memory_root(tmp_path: Path) -> MemoryRoot:
mr = MemoryRoot(tmp_path)
mr.ensure()
return mr
async def test_session_scope_commits_on_success(memory_root: MemoryRoot) -> None:
engine = create_system_engine(memory_root.system_db, SqliteSettings())
factory = create_session_factory(engine)
try:
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
async with session_scope(factory) as s:
s.add(_Sample(note="hello"))
await s.commit()
async with session_scope(factory) as s:
row = (
await s.execute(text("SELECT note FROM _sample_session_scope"))
).fetchone()
assert row is not None
assert row[0] == "hello"
finally:
await engine.dispose()
async def test_session_scope_rolls_back_on_exception(
memory_root: MemoryRoot,
) -> None:
engine = create_system_engine(memory_root.system_db, SqliteSettings())
factory = create_session_factory(engine)
try:
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
with pytest.raises(RuntimeError):
async with session_scope(factory) as s:
s.add(_Sample(note="should rollback"))
# No commit yet → scope must rollback on exception.
raise RuntimeError("boom")
async with session_scope(factory) as s:
count = (
await s.execute(text("SELECT COUNT(*) FROM _sample_session_scope"))
).fetchone()
assert count is not None
assert count[0] == 0
finally:
await engine.dispose()