chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
298
tests/unit/test_infra/test_sqlite/test_repos/test_cluster.py
Normal file
298
tests/unit/test_infra/test_sqlite/test_repos/test_cluster.py
Normal file
@ -0,0 +1,298 @@
|
||||
"""Tests for :class:`_ClusterRepo` — cluster + cluster_member persistence.
|
||||
|
||||
Verifies the boundary translations between the algo value object
|
||||
(:class:`everalgo.clustering.Cluster`) and the two-table storage shape:
|
||||
|
||||
- centroid ``np.ndarray`` ↔ raw ``bytes``,
|
||||
- ``last_ts`` int ms-epoch stored verbatim (no datetime conversion),
|
||||
- ``preview`` ``list[str]`` ↔ JSON,
|
||||
- ``members`` ``list[str]`` ↔ ``cluster_member`` rows (forward + reverse).
|
||||
|
||||
The repo is the only path that touches the storage; downstream cluster
|
||||
strategies must always see a fully-hydrated :class:`AlgoCluster` on read.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from everalgo.clustering import Cluster as AlgoCluster
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.config import SqliteSettings
|
||||
from everos.core.persistence import (
|
||||
MemoryRoot,
|
||||
create_session_factory,
|
||||
create_system_engine,
|
||||
)
|
||||
from everos.infra.persistence.sqlite.repos.cluster import (
|
||||
_ClusterRepo,
|
||||
mint_cluster_id,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path: Path) -> _ClusterRepo:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
engine = create_system_engine(mr.system_db, SqliteSettings())
|
||||
factory = create_session_factory(engine)
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
return _ClusterRepo(session_factory=factory)
|
||||
|
||||
|
||||
def _make_cluster(
|
||||
*,
|
||||
cluster_id: str,
|
||||
centroid_vals: list[float],
|
||||
members: list[str],
|
||||
last_ts_ms: int = 1_700_000_000_000,
|
||||
count: int = 1,
|
||||
preview: list[str] | None = None,
|
||||
) -> AlgoCluster:
|
||||
return AlgoCluster(
|
||||
id=cluster_id,
|
||||
centroid=np.array(centroid_vals, dtype=np.float32),
|
||||
count=count,
|
||||
last_ts=last_ts_ms,
|
||||
preview=preview or [],
|
||||
members=members,
|
||||
)
|
||||
|
||||
|
||||
def test_mint_cluster_id_shape() -> None:
|
||||
cid = mint_cluster_id()
|
||||
assert cid.startswith("cl_")
|
||||
assert len(cid) == 3 + 12 # ``cl_`` + 12 hex chars
|
||||
|
||||
|
||||
# ── round-trip ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_upsert_then_list_round_trips_full_algo_cluster(
|
||||
repo: _ClusterRepo,
|
||||
) -> None:
|
||||
"""Insert → list — every algo field survives storage."""
|
||||
cluster = _make_cluster(
|
||||
cluster_id="cl_aaa000000001",
|
||||
centroid_vals=[0.25, -0.5, 0.75],
|
||||
members=["mc_one", "mc_two"],
|
||||
last_ts_ms=1_700_000_001_500,
|
||||
count=2,
|
||||
preview=["alice likes hiking", "alice plans tokyo"],
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
cluster,
|
||||
owner_id="u_alice",
|
||||
owner_type="user",
|
||||
kind="user_memory",
|
||||
member_type="memcell",
|
||||
)
|
||||
|
||||
rows = await repo.list_for_owner("u_alice", "user_memory")
|
||||
assert len(rows) == 1
|
||||
got = rows[0]
|
||||
assert got.id == "cl_aaa000000001"
|
||||
assert got.count == 2
|
||||
assert got.last_ts == 1_700_000_001_500
|
||||
assert got.preview == ["alice likes hiking", "alice plans tokyo"]
|
||||
assert got.members == ["mc_one", "mc_two"]
|
||||
np.testing.assert_allclose(
|
||||
np.asarray(got.centroid),
|
||||
np.array([0.25, -0.5, 0.75], dtype=np.float32),
|
||||
)
|
||||
|
||||
|
||||
async def test_list_for_owner_isolates_by_owner_and_kind(
|
||||
repo: _ClusterRepo,
|
||||
) -> None:
|
||||
"""Different owner_id or different kind = separate buckets."""
|
||||
alice = _make_cluster(
|
||||
cluster_id="cl_alice00000001",
|
||||
centroid_vals=[1.0, 0.0],
|
||||
members=["mc_a"],
|
||||
)
|
||||
bob = _make_cluster(
|
||||
cluster_id="cl_bob0000000001",
|
||||
centroid_vals=[0.0, 1.0],
|
||||
members=["mc_b"],
|
||||
)
|
||||
agent_case = _make_cluster(
|
||||
cluster_id="cl_case0000001",
|
||||
centroid_vals=[0.5, 0.5],
|
||||
members=["ac_20260517_0001"],
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
alice,
|
||||
owner_id="u_alice",
|
||||
owner_type="user",
|
||||
kind="user_memory",
|
||||
member_type="memcell",
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
bob,
|
||||
owner_id="u_bob",
|
||||
owner_type="user",
|
||||
kind="user_memory",
|
||||
member_type="memcell",
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
agent_case,
|
||||
owner_id="agent_42",
|
||||
owner_type="agent",
|
||||
kind="agent_case",
|
||||
member_type="case",
|
||||
)
|
||||
|
||||
alice_rows = await repo.list_for_owner("u_alice", "user_memory")
|
||||
bob_rows = await repo.list_for_owner("u_bob", "user_memory")
|
||||
agent_rows = await repo.list_for_owner("agent_42", "agent_case")
|
||||
assert [r.id for r in alice_rows] == ["cl_alice00000001"]
|
||||
assert [r.id for r in bob_rows] == ["cl_bob0000000001"]
|
||||
assert [r.id for r in agent_rows] == ["cl_case0000001"]
|
||||
|
||||
|
||||
# ── upsert (idempotency + members merge) ────────────────────────────────
|
||||
|
||||
|
||||
async def test_upsert_appends_new_members_and_overwrites_scalar_fields(
|
||||
repo: _ClusterRepo,
|
||||
) -> None:
|
||||
"""A second upsert with new members appends; centroid / count / preview replace."""
|
||||
initial = _make_cluster(
|
||||
cluster_id="cl_xxxxxxxxxxx1",
|
||||
centroid_vals=[1.0, 0.0],
|
||||
members=["mc_one"],
|
||||
count=1,
|
||||
preview=["first sample"],
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
initial,
|
||||
owner_id="u_alice",
|
||||
owner_type="user",
|
||||
kind="user_memory",
|
||||
member_type="memcell",
|
||||
)
|
||||
|
||||
# Merge: same cluster_id, count up, member list grew, centroid shifted.
|
||||
updated = _make_cluster(
|
||||
cluster_id="cl_xxxxxxxxxxx1",
|
||||
centroid_vals=[0.5, 0.5],
|
||||
members=["mc_one", "mc_two"],
|
||||
count=2,
|
||||
preview=["first sample", "second sample"],
|
||||
last_ts_ms=1_700_000_002_000,
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
updated,
|
||||
owner_id="u_alice",
|
||||
owner_type="user",
|
||||
kind="user_memory",
|
||||
member_type="memcell",
|
||||
)
|
||||
|
||||
rows = await repo.list_for_owner("u_alice", "user_memory")
|
||||
assert len(rows) == 1
|
||||
got = rows[0]
|
||||
assert got.count == 2
|
||||
assert got.members == ["mc_one", "mc_two"]
|
||||
assert got.preview == ["first sample", "second sample"]
|
||||
np.testing.assert_allclose(
|
||||
np.asarray(got.centroid),
|
||||
np.array([0.5, 0.5], dtype=np.float32),
|
||||
)
|
||||
|
||||
|
||||
async def test_upsert_is_idempotent_under_retry(repo: _ClusterRepo) -> None:
|
||||
"""OME at-least-once retry: same upsert twice → state unchanged, no duplicates."""
|
||||
cluster = _make_cluster(
|
||||
cluster_id="cl_idempot00001",
|
||||
centroid_vals=[0.1, 0.9],
|
||||
members=["mc_one", "mc_two"],
|
||||
count=2,
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
cluster,
|
||||
owner_id="u_alice",
|
||||
owner_type="user",
|
||||
kind="user_memory",
|
||||
member_type="memcell",
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
cluster,
|
||||
owner_id="u_alice",
|
||||
owner_type="user",
|
||||
kind="user_memory",
|
||||
member_type="memcell",
|
||||
)
|
||||
rows = await repo.list_for_owner("u_alice", "user_memory")
|
||||
assert len(rows) == 1
|
||||
assert rows[0].members == ["mc_one", "mc_two"]
|
||||
|
||||
|
||||
async def test_upsert_rejects_unset_cluster_id(repo: _ClusterRepo) -> None:
|
||||
"""Algo's ``Cluster.id`` is caller-supplied — None is a programming error."""
|
||||
cluster = AlgoCluster(
|
||||
id=None,
|
||||
centroid=np.array([1.0], dtype=np.float32),
|
||||
count=1,
|
||||
last_ts=1_700_000_000_000,
|
||||
preview=[],
|
||||
members=["mc_one"],
|
||||
)
|
||||
with pytest.raises(ValueError, match="cluster_id"):
|
||||
await repo.upsert_with_members(
|
||||
cluster,
|
||||
owner_id="u_alice",
|
||||
owner_type="user",
|
||||
kind="user_memory",
|
||||
member_type="memcell",
|
||||
)
|
||||
|
||||
|
||||
# ── reverse lookup ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_find_cluster_id_for_member_reverse_lookup(
|
||||
repo: _ClusterRepo,
|
||||
) -> None:
|
||||
"""``(member_type, member_id) → cluster_id`` index works both ways across kinds."""
|
||||
user_cluster = _make_cluster(
|
||||
cluster_id="cl_user0000001",
|
||||
centroid_vals=[1.0, 0.0],
|
||||
members=["mc_one"],
|
||||
)
|
||||
case_cluster = _make_cluster(
|
||||
cluster_id="cl_case0000001",
|
||||
centroid_vals=[0.0, 1.0],
|
||||
members=["ac_20260517_0001"],
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
user_cluster,
|
||||
owner_id="u_alice",
|
||||
owner_type="user",
|
||||
kind="user_memory",
|
||||
member_type="memcell",
|
||||
)
|
||||
await repo.upsert_with_members(
|
||||
case_cluster,
|
||||
owner_id="agent_42",
|
||||
owner_type="agent",
|
||||
kind="agent_case",
|
||||
member_type="case",
|
||||
)
|
||||
|
||||
assert (
|
||||
await repo.find_cluster_id_for_member("memcell", "mc_one") == "cl_user0000001"
|
||||
)
|
||||
assert (
|
||||
await repo.find_cluster_id_for_member("case", "ac_20260517_0001")
|
||||
== "cl_case0000001"
|
||||
)
|
||||
# Type-discriminated: same id under wrong type misses.
|
||||
assert await repo.find_cluster_id_for_member("case", "mc_one") is None
|
||||
assert await repo.find_cluster_id_for_member("memcell", "ac_20260517_0001") is None
|
||||
assert await repo.find_cluster_id_for_member("memcell", "mc_missing") is None
|
||||
@ -0,0 +1,508 @@
|
||||
"""Tests for :class:`_MdChangeStateRepo` — cascade work-queue persistence.
|
||||
|
||||
Builds a fresh tmp-file SQLite engine per test (the in-memory ``sqlite``
|
||||
driver can't share schema across concurrent connections), wires a
|
||||
private repo instance to its session factory, then exercises every
|
||||
public method against the live database — no mocks, no in-memory
|
||||
shortcuts.
|
||||
|
||||
Covers the unit-test matrix from
|
||||
``16_cascade_impl_design.md`` §14 for this commit:
|
||||
|
||||
- ``upsert`` — LSN monotonic across the same path, retry_count resets.
|
||||
- ``claim_one`` — atomic; concurrent racers split 1 winner / N losers.
|
||||
- ``reset_retryable_to_pending`` — only ``retryable=TRUE`` rows move.
|
||||
|
||||
Plus the rest of the repo surface (mark_done / mark_failed /
|
||||
queue_summary / list_failed / force_enqueue / claim_pending_batch).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.config import SqliteSettings
|
||||
from everos.core.persistence import (
|
||||
MemoryRoot,
|
||||
create_session_factory,
|
||||
create_system_engine,
|
||||
)
|
||||
from everos.infra.persistence.sqlite.repos.md_change_state import (
|
||||
_MdChangeStateRepo,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path: Path) -> _MdChangeStateRepo:
|
||||
"""Per-test repo wired to a fresh tmp SQLite DB with schema applied."""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
engine = create_system_engine(mr.system_db, SqliteSettings())
|
||||
factory = create_session_factory(engine)
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
return _MdChangeStateRepo(session_factory=factory)
|
||||
|
||||
|
||||
# ── upsert ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_upsert_assigns_monotonic_lsn(repo: _MdChangeStateRepo) -> None:
|
||||
"""Two distinct paths get strictly increasing LSNs."""
|
||||
lsn_a = await repo.upsert(
|
||||
"users/u/episodes/episode-2026-05-12.md",
|
||||
kind="episode",
|
||||
change_type="added",
|
||||
mtime=1.0,
|
||||
)
|
||||
lsn_b = await repo.upsert(
|
||||
"users/u/episodes/episode-2026-05-13.md",
|
||||
kind="episode",
|
||||
change_type="added",
|
||||
mtime=2.0,
|
||||
)
|
||||
assert lsn_a == 1
|
||||
assert lsn_b == 2
|
||||
|
||||
|
||||
async def test_upsert_same_path_bumps_lsn_and_resets_retry(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""Re-enqueueing the same path bumps LSN and clears prior failure state."""
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
# Simulate a worker run that failed (retryable): claim then fail.
|
||||
await repo.claim_one(path)
|
||||
await repo.mark_failed(path, retryable=True, error="503", new_retry_count=3)
|
||||
|
||||
lsn_after = await repo.upsert(
|
||||
path, kind="episode", change_type="modified", mtime=2.0
|
||||
)
|
||||
row = await repo.get_by_id(path)
|
||||
assert row is not None
|
||||
assert row.lsn == lsn_after
|
||||
assert lsn_after > 1
|
||||
# State reset back to pending; failure metadata cleared.
|
||||
assert row.status == "pending"
|
||||
assert row.retry_count == 0
|
||||
assert row.error is None
|
||||
assert row.retryable is None
|
||||
# Re-enqueue refreshes change_type / mtime to the new event.
|
||||
assert row.change_type == "modified"
|
||||
assert row.mtime == 2.0
|
||||
|
||||
|
||||
# ── force_enqueue ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_force_enqueue_resurrects_done_row(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""`cascade sync --path` re-enqueues even a row that already landed."""
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
await repo.claim_one(path)
|
||||
await repo.mark_done(path)
|
||||
|
||||
lsn = await repo.force_enqueue(path, "episode")
|
||||
row = await repo.get_by_id(path)
|
||||
assert row is not None
|
||||
assert row.lsn == lsn
|
||||
assert row.status == "pending"
|
||||
assert row.change_type == "modified"
|
||||
|
||||
|
||||
# ── claim_one ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_claim_one_returns_row_when_pending(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
row = await repo.claim_one(path)
|
||||
assert row is not None
|
||||
assert row.md_path == path
|
||||
assert row.status == "processing"
|
||||
assert row.last_attempt_at is not None
|
||||
|
||||
|
||||
async def test_claim_one_returns_none_when_already_processing(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""Second claim of the same row returns None — claim is one-shot."""
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
first = await repo.claim_one(path)
|
||||
assert first is not None
|
||||
second = await repo.claim_one(path)
|
||||
assert second is None
|
||||
|
||||
|
||||
async def test_claim_one_race_only_one_winner(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""Three concurrent claims on the same row: exactly one wins."""
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
results = await asyncio.gather(
|
||||
repo.claim_one(path),
|
||||
repo.claim_one(path),
|
||||
repo.claim_one(path),
|
||||
)
|
||||
winners = [r for r in results if r is not None]
|
||||
assert len(winners) == 1
|
||||
|
||||
|
||||
# ── claim_pending_batch ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_claim_pending_batch_returns_in_lsn_order(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
paths = [f"users/u/episodes/e-{i}.md" for i in range(3)]
|
||||
for p in paths:
|
||||
await repo.upsert(p, kind="episode", change_type="added", mtime=0.0)
|
||||
|
||||
batch = await repo.claim_pending_batch(limit=10)
|
||||
assert [r.md_path for r in batch] == paths
|
||||
assert all(r.status == "processing" for r in batch)
|
||||
|
||||
|
||||
async def test_claim_pending_batch_skips_already_claimed(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""Already-processing rows are not re-claimed."""
|
||||
await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.upsert("b.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.claim_one("a.md")
|
||||
|
||||
batch = await repo.claim_pending_batch(limit=10)
|
||||
assert [r.md_path for r in batch] == ["b.md"]
|
||||
|
||||
|
||||
async def test_claim_pending_batch_zero_limit_returns_empty(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
|
||||
assert await repo.claim_pending_batch(limit=0) == []
|
||||
|
||||
|
||||
# ── mark_done / mark_failed ─────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_mark_done_transitions_processing_to_done(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""``processing → done`` lands a clean terminal row (no error fields)."""
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
await repo.claim_one(path)
|
||||
await repo.mark_done(path)
|
||||
|
||||
row = await repo.get_by_id(path)
|
||||
assert row is not None
|
||||
assert row.status == "done"
|
||||
assert row.error is None
|
||||
assert row.retryable is None
|
||||
|
||||
|
||||
async def test_mark_failed_records_retryable_flag(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
await repo.claim_one(path)
|
||||
await repo.mark_failed(
|
||||
path, retryable=False, error="YAML parse: line 5", new_retry_count=0
|
||||
)
|
||||
|
||||
row = await repo.get_by_id(path)
|
||||
assert row is not None
|
||||
assert row.status == "failed"
|
||||
assert row.retryable is False
|
||||
assert row.error == "YAML parse: line 5"
|
||||
assert row.retry_count == 0
|
||||
|
||||
|
||||
# ── Race: re-enqueue during processing must win over stale mark_xxx ─────
|
||||
#
|
||||
# Reproduces the Bug A scenario:
|
||||
# T0 watcher upsert → status=pending, lsn=1
|
||||
# T1 worker claim_one → status=processing, lsn=1
|
||||
# T2 watcher upsert again → status=pending, lsn=2 (on_conflict_do_update)
|
||||
# T3 worker (stale claim) finishes handler
|
||||
# T4 worker mark_xxx → must no-op because status != processing
|
||||
#
|
||||
# Without the guard, T4 overwrites T2's pending and the worker never
|
||||
# re-processes the latest md state.
|
||||
|
||||
|
||||
async def test_mark_done_noop_when_row_reverted_to_pending(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""Concurrent upsert during processing → mark_done must not overwrite it."""
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
# T0: watcher enqueues.
|
||||
lsn_1 = await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
# T1: worker claims.
|
||||
claimed = await repo.claim_one(path)
|
||||
assert claimed is not None
|
||||
# T2: watcher re-enqueues — row flipped back to pending with a fresh lsn.
|
||||
lsn_2 = await repo.upsert(path, kind="episode", change_type="modified", mtime=2.0)
|
||||
assert lsn_2 > lsn_1
|
||||
# T4: stale mark_done — guard must make this a no-op.
|
||||
await repo.mark_done(path)
|
||||
|
||||
row = await repo.get_by_id(path)
|
||||
assert row is not None
|
||||
assert row.status == "pending" # not "done"
|
||||
assert row.lsn == lsn_2 # upsert's lsn survives
|
||||
assert row.change_type == "modified" # upsert's payload survives
|
||||
assert row.mtime == 2.0
|
||||
|
||||
|
||||
async def test_mark_failed_noop_when_row_reverted_to_pending(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""Concurrent upsert during processing → mark_failed must not overwrite it."""
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
lsn_1 = await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
claimed = await repo.claim_one(path)
|
||||
assert claimed is not None
|
||||
lsn_2 = await repo.upsert(path, kind="episode", change_type="modified", mtime=2.0)
|
||||
assert lsn_2 > lsn_1
|
||||
await repo.mark_failed(path, retryable=True, error="503", new_retry_count=2)
|
||||
|
||||
row = await repo.get_by_id(path)
|
||||
assert row is not None
|
||||
assert row.status == "pending" # not "failed"
|
||||
assert row.lsn == lsn_2
|
||||
assert row.error is None # upsert cleared the error fields
|
||||
assert row.retryable is None
|
||||
assert row.retry_count == 0
|
||||
|
||||
|
||||
async def test_mark_done_concurrent_with_upsert_preserves_reenqueue(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""asyncio.gather(upsert, mark_done): final state never loses the upsert.
|
||||
|
||||
Two valid commit orderings:
|
||||
* upsert first → mark_done sees status != processing → no-op
|
||||
→ final = pending(lsn=2)
|
||||
* mark_done first → row=done(lsn=1) → upsert flips back to pending(lsn=2)
|
||||
→ final = pending(lsn=2)
|
||||
|
||||
Both orderings converge on the same invariant: the re-enqueue wins.
|
||||
"""
|
||||
path = "users/u/episodes/episode-2026-05-12.md"
|
||||
lsn_1 = await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
|
||||
await repo.claim_one(path)
|
||||
|
||||
# Race the two writes. SQLite WAL serialises commits, so one is
|
||||
# ordered before the other — but the test does not pin which.
|
||||
await asyncio.gather(
|
||||
repo.upsert(path, kind="episode", change_type="modified", mtime=2.0),
|
||||
repo.mark_done(path),
|
||||
)
|
||||
|
||||
row = await repo.get_by_id(path)
|
||||
assert row is not None
|
||||
assert row.status == "pending"
|
||||
assert row.lsn > lsn_1
|
||||
assert row.change_type == "modified"
|
||||
assert row.mtime == 2.0
|
||||
|
||||
|
||||
# ── reset_retryable_to_pending ──────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_reset_retryable_to_pending_moves_only_retryable(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""`cascade fix --apply` semantics: only retryable=TRUE rows move."""
|
||||
await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.upsert("b.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.upsert("c.md", kind="episode", change_type="added", mtime=0.0)
|
||||
|
||||
await repo.claim_one("a.md")
|
||||
await repo.mark_failed("a.md", retryable=True, error="503", new_retry_count=3)
|
||||
await repo.claim_one("b.md")
|
||||
await repo.mark_failed("b.md", retryable=False, error="YAML", new_retry_count=0)
|
||||
# c.md remains pending.
|
||||
|
||||
moved = await repo.reset_retryable_to_pending()
|
||||
assert moved == 1
|
||||
|
||||
a = await repo.get_by_id("a.md")
|
||||
b = await repo.get_by_id("b.md")
|
||||
assert a is not None and a.status == "pending"
|
||||
assert a.retry_count == 0
|
||||
assert a.retryable is None
|
||||
assert a.error is None
|
||||
assert b is not None and b.status == "failed"
|
||||
assert b.retryable is False
|
||||
|
||||
|
||||
async def test_reset_retryable_to_pending_zero_when_none_eligible(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.claim_one("a.md")
|
||||
await repo.mark_failed("a.md", retryable=False, error="YAML", new_retry_count=0)
|
||||
assert await repo.reset_retryable_to_pending() == 0
|
||||
|
||||
|
||||
# ── list_failed ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_list_failed_orders_by_lsn(repo: _MdChangeStateRepo) -> None:
|
||||
for path in ("a.md", "b.md", "c.md"):
|
||||
await repo.upsert(path, kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.claim_one("a.md")
|
||||
await repo.mark_failed("a.md", retryable=True, error="x", new_retry_count=3)
|
||||
await repo.claim_one("c.md")
|
||||
await repo.mark_failed("c.md", retryable=False, error="y", new_retry_count=0)
|
||||
|
||||
rows = await repo.list_failed()
|
||||
assert [r.md_path for r in rows] == ["a.md", "c.md"]
|
||||
|
||||
|
||||
# ── queue_summary ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_queue_summary_aggregates_all_states(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
# Pending: 2 (one rolled through processing)
|
||||
await repo.upsert("p1.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.upsert("p2.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.claim_one("p2.md") # → processing, still counts as pending.
|
||||
# Done: 1 (full claim → mark_done path matches production flow).
|
||||
await repo.upsert("d.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.claim_one("d.md")
|
||||
await repo.mark_done("d.md")
|
||||
# Failed retryable: 1
|
||||
await repo.upsert("fr.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.claim_one("fr.md")
|
||||
await repo.mark_failed("fr.md", retryable=True, error="503", new_retry_count=3)
|
||||
# Failed permanent: 1
|
||||
await repo.upsert("fp.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.claim_one("fp.md")
|
||||
await repo.mark_failed("fp.md", retryable=False, error="YAML", new_retry_count=0)
|
||||
|
||||
summary = await repo.queue_summary()
|
||||
assert summary.pending == 2
|
||||
assert summary.done == 1
|
||||
assert summary.failed_retryable == 1
|
||||
assert summary.failed_permanent == 1
|
||||
# 5 upserts → max LSN 5; last_processed = max among done/failed.
|
||||
assert summary.max_lsn == 5
|
||||
assert summary.last_processed_lsn == 5
|
||||
|
||||
|
||||
async def test_queue_summary_empty_table(repo: _MdChangeStateRepo) -> None:
|
||||
summary = await repo.queue_summary()
|
||||
assert summary == _empty_summary()
|
||||
|
||||
|
||||
def _empty_summary() -> object:
|
||||
from everos.infra.persistence.sqlite import QueueSummary
|
||||
|
||||
return QueueSummary(
|
||||
pending=0,
|
||||
done=0,
|
||||
failed_retryable=0,
|
||||
failed_permanent=0,
|
||||
max_lsn=0,
|
||||
last_processed_lsn=0,
|
||||
)
|
||||
|
||||
|
||||
# ── recover_orphan_processing ───────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_recover_orphan_processing_resets_stale_rows(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""Crash recovery: every ``processing`` row goes back to ``pending``."""
|
||||
await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.upsert("b.md", kind="episode", change_type="added", mtime=0.0)
|
||||
# Simulate a worker that claimed both but died before mark_done/failed.
|
||||
await repo.claim_one("a.md")
|
||||
await repo.claim_one("b.md")
|
||||
|
||||
moved = await repo.recover_orphan_processing()
|
||||
assert moved == 2
|
||||
a = await repo.get_by_id("a.md")
|
||||
b = await repo.get_by_id("b.md")
|
||||
assert a is not None and a.status == "pending"
|
||||
assert b is not None and b.status == "pending"
|
||||
# last_attempt_at cleared so the next claim records the new attempt.
|
||||
assert a.last_attempt_at is None
|
||||
assert b.last_attempt_at is None
|
||||
|
||||
|
||||
async def test_recover_orphan_processing_zero_when_clean(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""No rows in ``processing`` → returns 0, leaves the rest alone."""
|
||||
await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.claim_one("a.md")
|
||||
await repo.mark_done("a.md")
|
||||
assert await repo.recover_orphan_processing() == 0
|
||||
row = await repo.get_by_id("a.md")
|
||||
assert row is not None
|
||||
assert row.status == "done"
|
||||
|
||||
|
||||
async def test_recover_orphan_processing_only_touches_processing_rows(
|
||||
repo: _MdChangeStateRepo,
|
||||
) -> None:
|
||||
"""Pending / done / failed rows are untouched."""
|
||||
await repo.upsert("p.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.upsert("d.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.upsert("f.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.upsert("proc.md", kind="episode", change_type="added", mtime=0.0)
|
||||
await repo.claim_one("d.md")
|
||||
await repo.mark_done("d.md")
|
||||
await repo.claim_one("f.md")
|
||||
await repo.mark_failed("f.md", retryable=True, error="x", new_retry_count=1)
|
||||
await repo.claim_one("proc.md")
|
||||
|
||||
moved = await repo.recover_orphan_processing()
|
||||
assert moved == 1
|
||||
p = await repo.get_by_id("p.md")
|
||||
d = await repo.get_by_id("d.md")
|
||||
f = await repo.get_by_id("f.md")
|
||||
proc = await repo.get_by_id("proc.md")
|
||||
assert p is not None and p.status == "pending"
|
||||
assert d is not None and d.status == "done"
|
||||
assert f is not None and f.status == "failed"
|
||||
assert proc is not None and proc.status == "pending"
|
||||
|
||||
|
||||
# ── Partial indexes (smoke) ─────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_partial_indexes_are_created(repo: _MdChangeStateRepo) -> None:
|
||||
"""The three partial / mtime indexes from the schema land in sqlite_master."""
|
||||
async with repo.session_factory() as s:
|
||||
from sqlalchemy import text
|
||||
|
||||
result = await s.execute(
|
||||
text("SELECT name FROM sqlite_master WHERE type='index'")
|
||||
)
|
||||
names = {row[0] for row in result.all()}
|
||||
for expected in (
|
||||
"idx_md_change_pending",
|
||||
"idx_md_change_retryable",
|
||||
"idx_md_change_mtime",
|
||||
"idx_md_change_kind",
|
||||
):
|
||||
assert expected in names, f"missing index {expected!r}; got {names!r}"
|
||||
Reference in New Issue
Block a user