chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions
--- a/tests/unit/test_infra/init.py
+++ b/tests/unit/test_infra/init.py
--- a/tests/unit/test_infra/test_lancedb/init.py
+++ b/tests/unit/test_infra/test_lancedb/init.py
--- a/tests/unit/test_infra/test_lancedb/test_lancedb_manager.py
+++ b/tests/unit/test_infra/test_lancedb/test_lancedb_manager.py
@ -0,0 +1,72 @@
+"""LanceDB manager singletons.
+
+Verifies ``get_connection`` / ``get_table`` / ``dispose_connection``
+are idempotent and rebuild after dispose.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from lancedb.pydantic import Vector
+
+from everos.core.persistence import BaseLanceTable
+from everos.infra.persistence.lancedb import lancedb_manager
+
+
+class _DemoVec(BaseLanceTable):
+    """Demo schema — only used by this test module."""
+
+    text: str
+    vector: Vector(3)  # type: ignore[valid-type]
+
+
+@pytest.fixture(autouse=True)
+async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
+    """Point the singleton at an isolated memory-root and reset module state."""
+    monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
+    lancedb_manager._conn = None
+    lancedb_manager._tables.clear()
+    yield
+    await lancedb_manager.dispose_connection()
+
+
+async def test_get_connection_is_singleton() -> None:
+    c1 = await lancedb_manager.get_connection()
+    c2 = await lancedb_manager.get_connection()
+    assert c1 is c2
+
+
+async def test_get_table_creates_then_caches() -> None:
+    t1 = await lancedb_manager.get_table("demo", _DemoVec)
+    t2 = await lancedb_manager.get_table("demo", _DemoVec)
+    assert t1 is t2
+    assert "demo" in lancedb_manager._tables
+
+
+async def test_get_table_reopens_existing() -> None:
+    """A second connection cycle must reopen (not recreate) the table."""
+    await lancedb_manager.get_table("demo", _DemoVec)
+    await lancedb_manager.dispose_connection()
+
+    t = await lancedb_manager.get_table("demo", _DemoVec)
+    assert t is not None
+    # Round-trip a row to prove the schema survived.
+    await t.add([_DemoVec(text="hello", vector=[0.1, 0.2, 0.3])])
+    assert await t.count_rows() == 1
+
+
+async def test_dispose_resets_state() -> None:
+    await lancedb_manager.get_connection()
+    await lancedb_manager.get_table("demo", _DemoVec)
+    await lancedb_manager.dispose_connection()
+    assert lancedb_manager._conn is None
+    assert lancedb_manager._tables == {}
+
+
+async def test_dispose_is_idempotent() -> None:
+    await lancedb_manager.dispose_connection()  # nothing built yet
+    await lancedb_manager.get_connection()
+    await lancedb_manager.dispose_connection()
+    await lancedb_manager.dispose_connection()  # second call must not raise
--- a/tests/unit/test_infra/test_lancedb/test_repos/init.py
+++ b/tests/unit/test_infra/test_lancedb/test_repos/init.py
--- a/tests/unit/test_infra/test_lancedb/test_repos/test_agent_skill.py
+++ b/tests/unit/test_infra/test_lancedb/test_repos/test_agent_skill.py
@ -0,0 +1,153 @@
+"""Tests for :class:`everos.infra.persistence.lancedb._AgentSkillRepo`.
+
+Real LanceDB under ``tmp_path`` (no mocks) — these tests exercise the
+SQL ``where`` predicate, cosine ``distance_type`` ranking, and
+``_distance`` stripping that the repo owns. Strategy-level routing
+across these methods is covered separately in
+``tests/unit/test_memory/test_strategies/test_extract_agent_skill.py``.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from everos.infra.persistence.lancedb import (
+    AgentSkill as LanceAgentSkill,
+)
+from everos.infra.persistence.lancedb import (
+    agent_skill_repo,
+    lancedb_manager,
+)
+
+
+def _skill_row(
+    *,
+    name: str,
+    owner_id: str,
+    cluster_id: str,
+    vector: list[float],
+) -> LanceAgentSkill:
+    """Minimal AgentSkill row sufficient to land in LanceDB for repo tests."""
+    return LanceAgentSkill(
+        id=f"{owner_id}_{name}",
+        owner_id=owner_id,
+        owner_type="agent",
+        name=name,
+        description=f"desc {name}",
+        description_tokens=f"desc {name}",
+        content=f"body of {name}",
+        content_tokens=f"body of {name}",
+        confidence=0.7,
+        maturity_score=0.6,
+        source_case_ids=[],
+        cluster_id=cluster_id,
+        md_path=f"agents/{owner_id}/skills/{name}/SKILL.md",
+        content_sha256="x" * 64,
+        vector=vector,
+    )
+
+
+@pytest.fixture
+async def _real_lancedb(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
+    """Spin up a clean LanceDB rooted under ``tmp_path`` for one test."""
+    monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
+    lancedb_manager._conn = None
+    lancedb_manager._tables.clear()
+    yield
+    await lancedb_manager.dispose_connection()
+
+
+async def test_count_in_cluster_isolates_owner_and_cluster(
+    _real_lancedb: None,
+) -> None:
+    """``count_in_cluster`` returns only rows matching both filters."""
+    await agent_skill_repo.upsert(
+        [
+            _skill_row(name="s1", owner_id="a", cluster_id="cl_x", vector=[0.1] * 1024),
+            _skill_row(name="s2", owner_id="a", cluster_id="cl_x", vector=[0.2] * 1024),
+            _skill_row(
+                name="other_cluster",
+                owner_id="a",
+                cluster_id="cl_y",
+                vector=[0.3] * 1024,
+            ),
+            _skill_row(
+                name="other_owner",
+                owner_id="b",
+                cluster_id="cl_x",
+                vector=[0.4] * 1024,
+            ),
+        ]
+    )
+
+    assert (
+        await agent_skill_repo.count_in_cluster(owner_id="a", cluster_id="cl_x")
+    ) == 2
+
+
+async def test_find_in_cluster_returns_typed_rows_no_ranking(
+    _real_lancedb: None,
+) -> None:
+    """Scalar fetch within one cluster; capped at ``limit`` regardless of order."""
+    await agent_skill_repo.upsert(
+        [
+            _skill_row(name="s1", owner_id="a", cluster_id="cl_x", vector=[0.1] * 1024),
+            _skill_row(name="s2", owner_id="a", cluster_id="cl_x", vector=[0.2] * 1024),
+            _skill_row(name="s3", owner_id="a", cluster_id="cl_x", vector=[0.3] * 1024),
+            _skill_row(
+                name="other_cluster",
+                owner_id="a",
+                cluster_id="cl_y",
+                vector=[0.4] * 1024,
+            ),
+        ]
+    )
+
+    got = await agent_skill_repo.find_in_cluster(
+        owner_id="a", cluster_id="cl_x", limit=2
+    )
+    assert len(got) == 2
+    assert {s.name for s in got}.issubset({"s1", "s2", "s3"})
+    assert all(s.owner_id == "a" and s.cluster_id == "cl_x" for s in got)
+
+
+async def test_find_topk_relevant_in_cluster_ranks_by_cosine(
+    _real_lancedb: None,
+) -> None:
+    """LanceDB native ``nearest_to + distance_type('cosine')`` ordering."""
+    near = [1.0] + [0.0] * 1023
+    far = [0.0] * 1023 + [1.0]
+    medium = [0.7, 0.7] + [0.0] * 1022
+    await agent_skill_repo.upsert(
+        [
+            _skill_row(name="near", owner_id="a", cluster_id="cl_x", vector=near),
+            _skill_row(name="far", owner_id="a", cluster_id="cl_x", vector=far),
+            _skill_row(name="medium", owner_id="a", cluster_id="cl_x", vector=medium),
+            # Different cluster — must not leak.
+            _skill_row(name="other", owner_id="a", cluster_id="cl_y", vector=near),
+            # Different owner — must not leak either.
+            _skill_row(name="near", owner_id="b", cluster_id="cl_x", vector=near),
+        ]
+    )
+
+    got = await agent_skill_repo.find_topk_relevant_in_cluster(
+        owner_id="a", cluster_id="cl_x", query_vector=near, top_k=2
+    )
+    assert [s.name for s in got] == ["near", "medium"]
+
+
+async def test_find_topk_relevant_in_cluster_raises_on_empty_vector(
+    _real_lancedb: None,
+) -> None:
+    """Empty ``query_vector`` is a caller-side error — the repo refuses."""
+    await agent_skill_repo.upsert(
+        [
+            _skill_row(name="s1", owner_id="a", cluster_id="cl_x", vector=[0.1] * 1024),
+        ]
+    )
+    with pytest.raises(ValueError, match="query_vector must be non-empty"):
+        await agent_skill_repo.find_topk_relevant_in_cluster(
+            owner_id="a", cluster_id="cl_x", query_vector=[], top_k=2
+        )
--- a/tests/unit/test_infra/test_lancedb/test_tables/init.py
+++ b/tests/unit/test_infra/test_lancedb/test_tables/init.py
--- a/tests/unit/test_infra/test_lancedb/test_tables/test_content_sha256.py
+++ b/tests/unit/test_infra/test_lancedb/test_tables/test_content_sha256.py
@ -0,0 +1,150 @@
+"""``content_sha256`` is a required field on every business lancedb table.
+
+Cascade handler (16 doc §3.3) diffs by this digest to skip no-op
+re-embeds. Every business schema — including ``agent_skill`` — declares
+the field; daily-log kinds hash a per-handler subset of inline +
+section keys, agent_skill hashes the file-level content-bearing parts.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+
+import pytest
+
+from everos.infra.persistence.lancedb import (
+    AgentCase,
+    AgentSkill,
+    AtomicFact,
+    Episode,
+    Foresight,
+)
+
+_VEC = [0.0] * 1024
+_NOW = dt.datetime(2026, 5, 14, 10, 0, 0, tzinfo=dt.UTC)
+_SHA = "f" * 64
+
+
+def _episode() -> Episode:
+    return Episode(
+        id="u1_ep_1",
+        entry_id="ep_20260514_0001",
+        owner_id="u1",
+        owner_type="user",
+        session_id="s1",
+        timestamp=_NOW,
+        parent_type="memcell",
+        parent_id="mc_1",
+        sender_ids=["u1"],
+        episode="hello world",
+        episode_tokens="hello world",
+        md_path="users/u1/episodes/episode-2026-05-14.md",
+        content_sha256=_SHA,
+        vector=_VEC,
+    )
+
+
+def _atomic_fact() -> AtomicFact:
+    return AtomicFact(
+        id="u1_af_1",
+        entry_id="af_20260514_0001",
+        owner_id="u1",
+        owner_type="user",
+        session_id="s1",
+        timestamp=_NOW,
+        parent_type="memcell",
+        parent_id="mc_1",
+        sender_ids=["u1"],
+        fact="x is y",
+        fact_tokens="x is y",
+        md_path="users/u1/.atomic_facts/atomic_fact-2026-05-14.md",
+        content_sha256=_SHA,
+        vector=_VEC,
+    )
+
+
+def _foresight() -> Foresight:
+    return Foresight(
+        id="u1_fs_1",
+        entry_id="fs_20260514_0001",
+        owner_id="u1",
+        owner_type="user",
+        session_id="s1",
+        timestamp=_NOW,
+        parent_type="memcell",
+        parent_id="mc_1",
+        sender_ids=["u1"],
+        foresight="user plans X",
+        foresight_tokens="user plans X",
+        md_path="users/u1/.foresights/foresight-2026-05-14.md",
+        content_sha256=_SHA,
+        vector=_VEC,
+    )
+
+
+def _agent_case() -> AgentCase:
+    return AgentCase(
+        id="a1_ac_1",
+        entry_id="ac_20260514_0001",
+        owner_id="a1",
+        owner_type="agent",
+        session_id="s1",
+        timestamp=_NOW,
+        parent_type="memcell",
+        parent_id="mc_1",
+        quality_score=0.9,
+        task_intent="scan contract",
+        task_intent_tokens="scan contract",
+        approach="step 1; step 2",
+        approach_tokens="step 1 step 2",
+        md_path="agents/a1/.cases/agent_case-2026-05-14.md",
+        content_sha256=_SHA,
+        vector=_VEC,
+    )
+
+
+def _agent_skill() -> AgentSkill:
+    return AgentSkill(
+        id="a1_demo_skill",
+        owner_id="a1",
+        owner_type="agent",
+        name="demo_skill",
+        description="just a demo",
+        description_tokens="just a demo",
+        content="body content",
+        content_tokens="body content",
+        confidence=0.7,
+        maturity_score=0.6,
+        source_case_ids=[],
+        md_path="agents/a1/agent_skills/demo_skill/SKILL.md",
+        content_sha256=_SHA,
+        vector=_VEC,
+    )
+
+
+@pytest.mark.parametrize(
+    "factory",
+    [_episode, _atomic_fact, _foresight, _agent_case, _agent_skill],
+    ids=["episode", "atomic_fact", "foresight", "agent_case", "agent_skill"],
+)
+def test_content_sha256_round_trip(factory) -> None:  # type: ignore[no-untyped-def]
+    row = factory()
+    assert row.content_sha256 == _SHA
+    dumped = row.model_dump()
+    assert dumped["content_sha256"] == _SHA
+    restored = type(row).model_validate(dumped)
+    assert restored.content_sha256 == _SHA
+
+
+@pytest.mark.parametrize(
+    "factory",
+    [_episode, _atomic_fact, _foresight, _agent_case, _agent_skill],
+    ids=["episode", "atomic_fact", "foresight", "agent_case", "agent_skill"],
+)
+def test_content_sha256_required(factory) -> None:  # type: ignore[no-untyped-def]
+    """Dropping content_sha256 from the kwargs surfaces a ValidationError."""
+    row = factory()
+    kwargs = row.model_dump()
+    del kwargs["content_sha256"]
+    with pytest.raises(Exception):  # noqa: B017,PT011
+        type(row).model_validate(kwargs)
--- a/tests/unit/test_infra/test_markdown/init.py
+++ b/tests/unit/test_infra/test_markdown/init.py
--- a/tests/unit/test_infra/test_markdown/test_mds/init.py
+++ b/tests/unit/test_infra/test_markdown/test_mds/init.py
--- a/tests/unit/test_infra/test_markdown/test_mds/test_agent_skill.py
+++ b/tests/unit/test_infra/test_markdown/test_mds/test_agent_skill.py
@ -0,0 +1,104 @@
+"""Tests for :class:`AgentSkillFrontmatter` — the AgentSkill schema.
+
+Lives under ``test_infra`` because :class:`AgentSkillFrontmatter` itself
+lives under ``infra/.../mds`` (it carries business fields + the
+directory-shape ClassVars). The schema-agnostic chassis tests live
+under ``test_core/test_persistence/test_markdown/``.
+"""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from everos.infra.persistence.markdown import AgentSkillFrontmatter
+
+
+def _kwargs(**overrides: object) -> dict[str, object]:
+    """Minimal valid kwargs for AgentSkillFrontmatter."""
+    base: dict[str, object] = {
+        "id": "skill_contract_risk_scan",
+        "agent_id": "agent_zhang_legal",
+        "name": "contract_risk_scan",
+        "description": "Scan a contract draft for risk clauses.",
+        "confidence": 0.5,
+        "maturity_score": 0.5,
+    }
+    base.update(overrides)
+    return base
+
+
+def test_skill_inherits_agent_scope() -> None:
+    """Skills always live under ``agents/`` — track + SCOPE_DIR confirm."""
+    assert AgentSkillFrontmatter.SCOPE_DIR == "agents"
+    fm = AgentSkillFrontmatter(**_kwargs())  # type: ignore[arg-type]
+    assert fm.track == "agent"
+    assert fm.type == "agent_skill"
+
+
+def test_skill_requires_name_and_description() -> None:
+    """Tier-1 prompt injection demands both fields — schema enforces."""
+    bad = _kwargs()
+    del bad["name"]
+    with pytest.raises(ValidationError):
+        AgentSkillFrontmatter(**bad)  # type: ignore[arg-type]
+
+    bad = _kwargs()
+    del bad["description"]
+    with pytest.raises(ValidationError):
+        AgentSkillFrontmatter(**bad)  # type: ignore[arg-type]
+
+
+def test_skill_requires_confidence_and_maturity_score() -> None:
+    """LLM-emitted score fields are required (no default)."""
+    bad = _kwargs()
+    del bad["confidence"]
+    with pytest.raises(ValidationError):
+        AgentSkillFrontmatter(**bad)  # type: ignore[arg-type]
+
+    bad = _kwargs()
+    del bad["maturity_score"]
+    with pytest.raises(ValidationError):
+        AgentSkillFrontmatter(**bad)  # type: ignore[arg-type]
+
+
+def test_skill_optional_fields_default() -> None:
+    """``source_case_ids`` defaults to empty list; ``cluster_id`` to None."""
+    fm = AgentSkillFrontmatter(**_kwargs())  # type: ignore[arg-type]
+    assert fm.source_case_ids == []
+    assert fm.cluster_id is None
+
+
+def test_skill_lineage_fields_round_trip() -> None:
+    """``source_case_ids`` + ``cluster_id`` round-trip through model_dump."""
+    fm = AgentSkillFrontmatter(
+        **_kwargs(
+            source_case_ids=["case_a", "case_b"],
+            cluster_id="cl_x",
+        ),  # type: ignore[arg-type]
+    )
+    dumped = fm.model_dump()
+    assert dumped["source_case_ids"] == ["case_a", "case_b"]
+    assert dumped["cluster_id"] == "cl_x"
+
+
+def test_skill_extra_fields_still_allowed() -> None:
+    """L2 system metadata (md_sha256 / last_indexed_at) rides along."""
+    fm = AgentSkillFrontmatter(
+        **_kwargs(
+            md_sha256="deadbeef",
+            last_indexed_at="2026-05-07T08:00:00Z",
+        ),  # type: ignore[arg-type]
+    )
+    dumped = fm.model_dump()
+    assert dumped["md_sha256"] == "deadbeef"
+    assert dumped["last_indexed_at"] == "2026-05-07T08:00:00Z"
+
+
+def test_skill_directory_shape_classvars() -> None:
+    """Path-shape ClassVars pin the wiki layout for the writer/reader pair."""
+    assert AgentSkillFrontmatter.SKILLS_CONTAINER_NAME == "skills"
+    assert AgentSkillFrontmatter.SKILL_DIR_PREFIX == "skill_"
+    assert AgentSkillFrontmatter.SKILL_MAIN_FILENAME == "SKILL.md"
+    assert AgentSkillFrontmatter.SKILL_REFERENCES_DIR_NAME == "references"
+    assert AgentSkillFrontmatter.SKILL_SCRIPTS_DIR_NAME == "scripts"
--- a/tests/unit/test_infra/test_markdown/test_mds/test_path_glob.py
+++ b/tests/unit/test_infra/test_markdown/test_mds/test_path_glob.py
@ -0,0 +1,30 @@
+"""Tests that every business frontmatter class reports the expected
+``path_glob()`` — the cascade scanner reads these to enumerate eligible
+files, so a wrong glob silently drops a whole kind from cascade.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from everos.infra.persistence.markdown import (
+    AgentCaseDailyFrontmatter,
+    AgentSkillFrontmatter,
+    AtomicFactDailyFrontmatter,
+    EpisodeDailyFrontmatter,
+    ForesightDailyFrontmatter,
+)
+
+
+@pytest.mark.parametrize(
+    ("schema", "expected"),
+    [
+        (EpisodeDailyFrontmatter, "*/*/users/*/episodes/episode-*.md"),
+        (AtomicFactDailyFrontmatter, "*/*/users/*/.atomic_facts/atomic_fact-*.md"),
+        (ForesightDailyFrontmatter, "*/*/users/*/.foresights/foresight-*.md"),
+        (AgentCaseDailyFrontmatter, "*/*/agents/*/.cases/agent_case-*.md"),
+        (AgentSkillFrontmatter, "*/*/agents/*/skills/skill_*/SKILL.md"),
+    ],
+)
+def test_path_glob(schema: type, expected: str) -> None:
+    assert schema.path_glob() == expected
--- a/tests/unit/test_infra/test_markdown/test_mds/test_profile.py
+++ b/tests/unit/test_infra/test_markdown/test_mds/test_profile.py
@ -0,0 +1,71 @@
+"""Tests for the profile frontmatter duck-typed shape.
+
+Profile schemas have no shared base class — they only need a
+``PROFILE_FILENAME`` ClassVar plus inheritance from a scope mixin. This
+test exercises that contract via a local fixture class.
+"""
+
+from __future__ import annotations
+
+from typing import ClassVar, Literal
+
+import pytest
+from pydantic import ValidationError
+
+from everos.core.persistence.markdown import UserScopedFrontmatter
+
+
+class _SampleUserProfileFM(UserScopedFrontmatter):
+    """Local fixture: a user-track profile schema."""
+
+    PROFILE_FILENAME: ClassVar[str] = "user.md"
+
+    type: Literal["sample_user_profile"] = "sample_user_profile"
+    display_name: str
+    bio: str
+    interests: list[str] = []
+
+
+def test_schema_inherits_user_scope() -> None:
+    fm = _SampleUserProfileFM(
+        id="sample_user_profile_u_jason",
+        type="sample_user_profile",
+        user_id="u_jason",
+        display_name="Jason",
+        bio="hiker.",
+    )
+    assert fm.track == "user"
+    assert fm.SCOPE_DIR == "users"
+
+
+def test_profile_filename_classvar() -> None:
+    """Path-shape ClassVar is duck-typed onto the schema directly."""
+    assert _SampleUserProfileFM.PROFILE_FILENAME == "user.md"
+
+
+def test_requires_display_name_and_bio() -> None:
+    with pytest.raises(ValidationError):
+        _SampleUserProfileFM(  # type: ignore[call-arg]
+            id="x",
+            type="sample_user_profile",
+            user_id="u_jason",
+            bio="missing display_name",
+        )
+    with pytest.raises(ValidationError):
+        _SampleUserProfileFM(  # type: ignore[call-arg]
+            id="x",
+            type="sample_user_profile",
+            user_id="u_jason",
+            display_name="missing bio",
+        )
+
+
+def test_interests_default_empty() -> None:
+    fm = _SampleUserProfileFM(
+        id="x",
+        type="sample_user_profile",
+        user_id="u_jason",
+        display_name="Jason",
+        bio="hiker.",
+    )
+    assert fm.interests == []
--- a/tests/unit/test_infra/test_markdown/test_readers/init.py
+++ b/tests/unit/test_infra/test_markdown/test_readers/init.py
--- a/tests/unit/test_infra/test_markdown/test_readers/test_agent_skill_reader.py
+++ b/tests/unit/test_infra/test_markdown/test_readers/test_agent_skill_reader.py
@ -0,0 +1,129 @@
+"""Tests for :class:`AgentSkillReader` — typed read for the skill directory layout."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from pydantic import ValidationError
+
+from everos.core.persistence import MemoryRoot
+from everos.infra.persistence.markdown import (
+    AgentSkillFrontmatter,
+    AgentSkillReader,
+    AgentSkillWriter,
+)
+
+
+def _make_fm(**overrides: object) -> AgentSkillFrontmatter:
+    base: dict[str, object] = {
+        "id": "agent_x_skill_alpha",
+        "agent_id": "agent_x",
+        "name": "alpha",
+        "description": "A test skill.",
+        "confidence": 0.5,
+        "maturity_score": 0.5,
+    }
+    base.update(overrides)
+    return AgentSkillFrontmatter(**base)  # type: ignore[arg-type]
+
+
+@pytest.fixture
+def root(tmp_path: Path) -> MemoryRoot:
+    return MemoryRoot(tmp_path)
+
+
+@pytest.fixture
+def writer(root: MemoryRoot) -> AgentSkillWriter:
+    return AgentSkillWriter(root)
+
+
+@pytest.fixture
+def reader(root: MemoryRoot) -> AgentSkillReader:
+    return AgentSkillReader(root)
+
+
+async def test_read_main_returns_typed_frontmatter_and_body(
+    writer: AgentSkillWriter, reader: AgentSkillReader
+) -> None:
+    fm_in = _make_fm(
+        description="Contract risk scan.",
+        confidence=0.88,
+        maturity_score=0.82,
+        source_case_ids=["case_a", "case_b"],
+    )
+    await writer.write_main("agent_x", "alpha", frontmatter=fm_in, body="The body.")
+
+    out = await reader.read_main("agent_x", "alpha", schema=AgentSkillFrontmatter)
+    assert out is not None
+    fm_out, body = out
+    assert isinstance(fm_out, AgentSkillFrontmatter)
+    assert fm_out.name == "alpha"
+    assert fm_out.source_case_ids == ["case_a", "case_b"]
+    assert fm_out.confidence == 0.88
+    assert fm_out.maturity_score == 0.82
+    assert body == "The body."
+
+
+async def test_read_main_returns_none_when_missing(reader: AgentSkillReader) -> None:
+    assert (
+        await reader.read_main("agent_x", "ghost", schema=AgentSkillFrontmatter) is None
+    )
+
+
+async def test_read_main_round_trip_through_extra_fields(
+    writer: AgentSkillWriter, reader: AgentSkillReader
+) -> None:
+    """L2 / L4 ride-along fields survive a write+read cycle (extra="allow")."""
+    fm_in = _make_fm(md_sha256="abc", custom_label="ride-along")
+    await writer.write_main("agent_x", "alpha", frontmatter=fm_in, body="b")
+    out = await reader.read_main("agent_x", "alpha", schema=AgentSkillFrontmatter)
+    assert out is not None
+    fm_out, _ = out
+    dumped = fm_out.model_dump()
+    assert dumped["md_sha256"] == "abc"
+    assert dumped["custom_label"] == "ride-along"
+
+
+async def test_read_main_validates_against_supplied_schema(
+    writer: AgentSkillWriter, reader: AgentSkillReader
+) -> None:
+    """A stricter schema rejects loose existing data — proves typed parsing."""
+
+    class _StricterSkillFM(AgentSkillFrontmatter):
+        # Required field with no default — written file lacks it.
+        priority: int
+
+    fm_in = _make_fm()
+    await writer.write_main("agent_x", "alpha", frontmatter=fm_in, body="b")
+
+    with pytest.raises(ValidationError):
+        await reader.read_main("agent_x", "alpha", schema=_StricterSkillFM)
+
+
+async def test_read_reference_round_trip(
+    writer: AgentSkillWriter, reader: AgentSkillReader
+) -> None:
+    await writer.write_reference(
+        "agent_x", "alpha", "termination", "## term clauses\n..."
+    )
+    content = await reader.read_reference("agent_x", "alpha", "termination")
+    assert content == "## term clauses\n..."
+
+
+async def test_read_reference_returns_none_when_missing(
+    reader: AgentSkillReader,
+) -> None:
+    assert await reader.read_reference("agent_x", "alpha", "ghost") is None
+
+
+async def test_read_script_round_trip(
+    writer: AgentSkillWriter, reader: AgentSkillReader
+) -> None:
+    await writer.write_script("agent_x", "alpha", "redline.py", "print('hi')\n")
+    content = await reader.read_script("agent_x", "alpha", "redline.py")
+    assert content == "print('hi')"
+
+
+async def test_read_script_returns_none_when_missing(reader: AgentSkillReader) -> None:
+    assert await reader.read_script("agent_x", "alpha", "ghost.py") is None
--- a/tests/unit/test_infra/test_markdown/test_readers/test_base.py
+++ b/tests/unit/test_infra/test_markdown/test_readers/test_base.py
@ -0,0 +1,182 @@
+"""Tests for ``BaseDailyReader`` chassis.
+
+Symmetric to ``test_writers/test_base.py`` — exercises path resolution
+ entry locating + structured-entry upgrading on a dummy schema.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+from pathlib import Path
+from typing import ClassVar, Literal
+
+import pytest
+
+from everos.core.persistence import (
+    EntryId,
+    MemoryRoot,
+    StructuredEntry,
+    UserScopedFrontmatter,
+    render_structured_entry,
+)
+from everos.infra.persistence.markdown.readers import BaseDailyReader
+from everos.infra.persistence.markdown.writers import BaseDailyWriter
+
+
+class _DemoFrontmatter(UserScopedFrontmatter):
+    ENTRY_ID_PREFIX: ClassVar[str] = "demo"
+    DIR_NAME: ClassVar[str] = "demos"
+    FILE_PREFIX: ClassVar[str] = "demo"
+    type: Literal["user_demo"] = "user_demo"
+
+
+class _DemoWriter(BaseDailyWriter):
+    schema = _DemoFrontmatter
+
+
+class _DemoReader(BaseDailyReader):
+    schema = _DemoFrontmatter
+
+
+@pytest.fixture
+def root(tmp_path: Path) -> MemoryRoot:
+    return MemoryRoot(tmp_path)
+
+
+# ── construction ────────────────────────────────────────────────────────
+
+
+def test_reader_rejects_missing_schema(root: MemoryRoot) -> None:
+    class _NoSchemaReader(BaseDailyReader):
+        pass
+
+    with pytest.raises(TypeError, match="schema"):
+        _NoSchemaReader(root)
+
+
+def test_reader_rejects_schema_missing_classvars(root: MemoryRoot) -> None:
+    class _IncompleteFrontmatter(UserScopedFrontmatter):
+        # Missing DIR_NAME / FILE_PREFIX.
+        type: Literal["incomplete"] = "incomplete"
+
+    class _IncompleteReader(BaseDailyReader):
+        schema = _IncompleteFrontmatter
+
+    with pytest.raises(TypeError, match="missing ClassVar"):
+        _IncompleteReader(root)
+
+
+# ── read_for ────────────────────────────────────────────────────────────
+
+
+async def test_read_for_returns_none_when_file_missing(root: MemoryRoot) -> None:
+    reader = _DemoReader(root)
+    assert await reader.read_for("u_jason", dt.date(2026, 4, 22)) is None
+
+
+async def test_read_for_returns_parsed_when_file_exists(
+    tmp_path: Path, root: MemoryRoot
+) -> None:
+    writer = _DemoWriter(root)
+    await writer.append("u_jason", "first body", date=dt.date(2026, 4, 22))
+
+    reader = _DemoReader(root)
+    parsed = await reader.read_for("u_jason", dt.date(2026, 4, 22))
+    assert parsed is not None
+    assert len(parsed.entries) == 1
+    assert parsed.entries[0].body == "first body"
+
+
+async def test_read_for_today_default(root: MemoryRoot) -> None:
+    """Omitting ``date`` falls back to today_with_timezone()."""
+    writer = _DemoWriter(root)
+    await writer.append("u_jason", "today body")
+
+    reader = _DemoReader(root)
+    parsed = await reader.read_for("u_jason")
+    assert parsed is not None
+    assert parsed.entries[0].body == "today body"
+
+
+# ── find_entry ──────────────────────────────────────────────────────────
+
+
+async def test_find_entry_resolves_file_from_entry_id(root: MemoryRoot) -> None:
+    writer = _DemoWriter(root)
+    await writer.append("u_jason", "alpha", date=dt.date(2026, 4, 22))
+    await writer.append("u_jason", "beta", date=dt.date(2026, 4, 22))
+
+    reader = _DemoReader(root)
+    e = await reader.find_entry("u_jason", "demo_20260422_00000002")
+    assert e is not None
+    assert e.id == "demo_20260422_00000002"
+    assert e.body == "beta"
+
+
+async def test_find_entry_returns_none_when_file_missing(root: MemoryRoot) -> None:
+    reader = _DemoReader(root)
+    assert await reader.find_entry("u_jason", "demo_20260422_00000001") is None
+
+
+async def test_find_entry_returns_none_when_entry_missing(root: MemoryRoot) -> None:
+    writer = _DemoWriter(root)
+    await writer.append("u_jason", "only", date=dt.date(2026, 4, 22))
+
+    reader = _DemoReader(root)
+    assert await reader.find_entry("u_jason", "demo_20260422_00000099") is None
+
+
+async def test_find_entry_accepts_entryid_object(root: MemoryRoot) -> None:
+    writer = _DemoWriter(root)
+    await writer.append("u_jason", "alpha", date=dt.date(2026, 4, 22))
+
+    reader = _DemoReader(root)
+    eid = EntryId(prefix="demo", date=dt.date(2026, 4, 22), seq=1)
+    e = await reader.find_entry("u_jason", eid)
+    assert e is not None
+    assert e.body == "alpha"
+
+
+# ── find_structured ─────────────────────────────────────────────────────
+
+
+async def test_find_structured_parses_audit_form(root: MemoryRoot) -> None:
+    writer = _DemoWriter(root)
+    body = render_structured_entry(
+        header="demo_20260422_00000001",
+        inline={"type": "demo", "user_id": "u_jason"},
+        sections={"Body": "the body"},
+    )
+    await writer.append("u_jason", body, date=dt.date(2026, 4, 22))
+
+    reader = _DemoReader(root)
+    structured = await reader.find_structured("u_jason", "demo_20260422_00000001")
+    assert structured is not None
+    assert isinstance(structured, StructuredEntry)
+    assert structured.id == "demo_20260422_00000001"
+    assert structured.header == "demo_20260422_00000001"
+    assert structured.inline == {"type": "demo", "user_id": "u_jason"}
+    assert structured.sections == {"Body": "the body"}
+
+
+async def test_find_structured_returns_none_when_missing(root: MemoryRoot) -> None:
+    reader = _DemoReader(root)
+    assert await reader.find_structured("u_jason", "demo_20260422_00000001") is None
+
+
+# ── path_for ────────────────────────────────────────────────────────────
+
+
+def test_path_for_matches_writer(tmp_path: Path, root: MemoryRoot) -> None:
+    """Reader and writer resolve to the same path for the same schema."""
+    reader = _DemoReader(root)
+    writer = _DemoWriter(root)
+    d = dt.date(2026, 4, 22)
+    assert reader.path_for("u_jason", d) == writer.path_for("u_jason", d)
+
+
+def test_path_for_does_not_create_files(tmp_path: Path, root: MemoryRoot) -> None:
+    reader = _DemoReader(root)
+    p = reader.path_for("u_jason", dt.date(2026, 4, 22))
+    assert not p.exists()
+    assert not (tmp_path / "users").exists()
--- a/tests/unit/test_infra/test_markdown/test_readers/test_profile_reader.py
+++ b/tests/unit/test_infra/test_markdown/test_readers/test_profile_reader.py
@ -0,0 +1,121 @@
+"""Tests for :class:`ProfileReader` — typed read for profile files."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import ClassVar, Literal
+
+import pytest
+from pydantic import ValidationError
+
+from everos.core.persistence import MemoryRoot, UserScopedFrontmatter
+from everos.infra.persistence.markdown.readers import ProfileReader
+from everos.infra.persistence.markdown.writers import ProfileWriter
+
+
+class _UserProfileFM(UserScopedFrontmatter):
+    PROFILE_FILENAME: ClassVar[str] = "user.md"
+    type: Literal["demo_user_profile"] = "demo_user_profile"
+    display_name: str = ""
+    bio: str = ""
+    interests: list[str] = []
+
+
+@pytest.fixture
+def root(tmp_path: Path) -> MemoryRoot:
+    return MemoryRoot(tmp_path)
+
+
+@pytest.fixture
+def writer(root: MemoryRoot) -> ProfileWriter:
+    return ProfileWriter(root)
+
+
+@pytest.fixture
+def reader(root: MemoryRoot) -> ProfileReader:
+    return ProfileReader(root)
+
+
+async def test_read_returns_typed_frontmatter_and_body(
+    writer: ProfileWriter, reader: ProfileReader
+) -> None:
+    fm_in = _UserProfileFM(
+        id="demo_user_profile_u_jason",
+        type="demo_user_profile",
+        user_id="u_jason",
+        display_name="Jason",
+        bio="weekend hiker.",
+        interests=["hiking", "coffee"],
+    )
+    await writer.write("u_jason", frontmatter=fm_in, body="The body.")
+
+    out = await reader.read("u_jason", schema=_UserProfileFM)
+    assert out is not None
+    fm_out, body = out
+    assert isinstance(fm_out, _UserProfileFM)
+    assert fm_out.display_name == "Jason"
+    assert fm_out.interests == ["hiking", "coffee"]
+    assert body == "The body."
+
+
+async def test_read_returns_none_when_missing(reader: ProfileReader) -> None:
+    assert await reader.read("u_ghost", schema=_UserProfileFM) is None
+
+
+async def test_read_round_trip_through_extra_fields(
+    writer: ProfileWriter, reader: ProfileReader
+) -> None:
+    """L2 / L4 ride-along fields survive a write+read cycle."""
+    fm_in = _UserProfileFM(
+        id="demo_user_profile_u_jason",
+        type="demo_user_profile",
+        user_id="u_jason",
+        md_sha256="abc",  # extra
+        custom_label="ride-along",  # extra
+    )
+    await writer.write("u_jason", frontmatter=fm_in, body="b")
+    out = await reader.read("u_jason", schema=_UserProfileFM)
+    assert out is not None
+    fm_out, _ = out
+    dumped = fm_out.model_dump()
+    assert dumped["md_sha256"] == "abc"
+    assert dumped["custom_label"] == "ride-along"
+
+
+async def test_read_validates_against_supplied_schema(
+    writer: ProfileWriter, reader: ProfileReader
+) -> None:
+    """A stricter schema rejects loose existing data — proves typed parsing."""
+
+    class _StricterFM(UserScopedFrontmatter):
+        PROFILE_FILENAME: ClassVar[str] = "user.md"
+        type: Literal["demo_user_profile"] = "demo_user_profile"
+        # Required field with no default — written file lacks it.
+        priority: int
+
+    fm_in = _UserProfileFM(
+        id="demo_user_profile_u_jason",
+        type="demo_user_profile",
+        user_id="u_jason",
+    )
+    await writer.write("u_jason", frontmatter=fm_in, body="b")
+
+    with pytest.raises(ValidationError):
+        await reader.read("u_jason", schema=_StricterFM)
+
+
+def test_path_for_matches_writer(
+    tmp_path: Path,
+    writer: ProfileWriter,
+    reader: ProfileReader,
+) -> None:
+    """Reader and writer resolve to the same path for the same schema."""
+    assert reader.path_for("u_jason", schema=_UserProfileFM) == writer.path_for(
+        "u_jason", schema=_UserProfileFM
+    )
+
+
+def test_path_for_does_not_create_files(tmp_path: Path, reader: ProfileReader) -> None:
+    p = reader.path_for("u_jason", schema=_UserProfileFM)
+    assert not p.exists()
+    assert not (tmp_path / "users").exists()
--- a/tests/unit/test_infra/test_markdown/test_writers/init.py
+++ b/tests/unit/test_infra/test_markdown/test_writers/init.py
--- a/tests/unit/test_infra/test_markdown/test_writers/test_agent_skill_writer.py
+++ b/tests/unit/test_infra/test_markdown/test_writers/test_agent_skill_writer.py
@ -0,0 +1,147 @@
+"""Tests for :class:`AgentSkillWriter` — directory + progressive disclosure."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from everos.core.persistence import MarkdownReader, MemoryRoot
+from everos.infra.persistence.markdown import (
+    AgentSkillFrontmatter,
+    AgentSkillWriter,
+)
+
+
+def _make_fm(**overrides: object) -> AgentSkillFrontmatter:
+    """Build an AgentSkillFrontmatter with sensible defaults for tests."""
+    base: dict[str, object] = {
+        "id": "agent_x_skill_alpha",
+        "agent_id": "agent_x",
+        "name": "alpha",
+        "description": "A test skill.",
+        "confidence": 0.5,
+        "maturity_score": 0.5,
+    }
+    base.update(overrides)
+    return AgentSkillFrontmatter(**base)  # type: ignore[arg-type]
+
+
+@pytest.fixture
+def root(tmp_path: Path) -> MemoryRoot:
+    return MemoryRoot(tmp_path)
+
+
+@pytest.fixture
+def writer(root: MemoryRoot) -> AgentSkillWriter:
+    return AgentSkillWriter(root)
+
+
+async def test_write_main_creates_directory_layout(
+    root: MemoryRoot, writer: AgentSkillWriter
+) -> None:
+    fm = _make_fm()
+    path = await writer.write_main(
+        "agent_x", "alpha", frontmatter=fm, body="Step 1: do thing."
+    )
+    expected = root.agents_dir() / "agent_x" / "skills" / "skill_alpha" / "SKILL.md"
+    assert path == expected
+    assert expected.is_file()
+
+
+async def test_write_main_writes_frontmatter_and_body(
+    root: MemoryRoot, writer: AgentSkillWriter
+) -> None:
+    fm = _make_fm(
+        description="Contract risk scan.",
+        confidence=0.88,
+        maturity_score=0.82,
+        source_case_ids=["case_a", "case_b"],
+        cluster_id="cl_x",
+    )
+    await writer.write_main("agent_x", "alpha", frontmatter=fm, body="The body.")
+    parsed = await MarkdownReader.read(
+        root.agents_dir() / "agent_x" / "skills" / "skill_alpha" / "SKILL.md"
+    )
+    assert parsed.frontmatter["name"] == "alpha"
+    assert parsed.frontmatter["description"] == "Contract risk scan."
+    assert parsed.frontmatter["confidence"] == 0.88
+    assert parsed.frontmatter["maturity_score"] == 0.82
+    assert parsed.frontmatter["source_case_ids"] == ["case_a", "case_b"]
+    assert parsed.frontmatter["cluster_id"] == "cl_x"
+    assert parsed.body.rstrip("\n") == "The body."
+
+
+async def test_write_main_is_upsert_full_replace(
+    root: MemoryRoot, writer: AgentSkillWriter
+) -> None:
+    """Second call overwrites both frontmatter and body — no append."""
+    fm1 = _make_fm(description="v1", maturity_score=0.4)
+    await writer.write_main("agent_x", "alpha", frontmatter=fm1, body="body v1")
+
+    fm2 = _make_fm(description="v2", maturity_score=0.7)
+    await writer.write_main("agent_x", "alpha", frontmatter=fm2, body="body v2")
+
+    parsed = await MarkdownReader.read(
+        root.agents_dir() / "agent_x" / "skills" / "skill_alpha" / "SKILL.md"
+    )
+    assert parsed.frontmatter["description"] == "v2"
+    assert parsed.frontmatter["maturity_score"] == 0.7
+    assert parsed.body.rstrip("\n") == "body v2"
+    # No "body v1" residue from the previous version.
+    assert "body v1" not in parsed.body
+
+
+async def test_write_reference_uses_md_extension(
+    root: MemoryRoot, writer: AgentSkillWriter
+) -> None:
+    path = await writer.write_reference(
+        "agent_x", "alpha", "termination_clauses", "## Termination\n..."
+    )
+    expected = (
+        root.agents_dir()
+        / "agent_x"
+        / "skills"
+        / "skill_alpha"
+        / "references"
+        / "termination_clauses.md"
+    )
+    assert path == expected
+    assert path.read_text(encoding="utf-8").startswith("## Termination")
+
+
+async def test_write_script_keeps_full_filename(
+    root: MemoryRoot, writer: AgentSkillWriter
+) -> None:
+    path = await writer.write_script("agent_x", "alpha", "redline.py", "print('hi')\n")
+    expected = (
+        root.agents_dir()
+        / "agent_x"
+        / "skills"
+        / "skill_alpha"
+        / "scripts"
+        / "redline.py"
+    )
+    assert path == expected
+    assert path.read_text(encoding="utf-8") == "print('hi')\n"
+
+
+def test_main_path_does_not_create_anything(
+    root: MemoryRoot, writer: AgentSkillWriter
+) -> None:
+    """``main_path`` is a pure path resolver — no IO."""
+    p = writer.main_path("agent_x", "alpha")
+    assert p.name == "SKILL.md"
+    assert not root.agents_dir().exists()
+
+
+async def test_write_main_normalises_trailing_newline(
+    root: MemoryRoot, writer: AgentSkillWriter
+) -> None:
+    """Body without a trailing newline still ends in exactly one newline."""
+    fm = _make_fm()
+    await writer.write_main("agent_x", "alpha", frontmatter=fm, body="no-newline-end")
+    text = (
+        root.agents_dir() / "agent_x" / "skills" / "skill_alpha" / "SKILL.md"
+    ).read_text(encoding="utf-8")
+    assert text.endswith("no-newline-end\n")
--- a/tests/unit/test_infra/test_markdown/test_writers/test_base.py
+++ b/tests/unit/test_infra/test_markdown/test_writers/test_base.py
@ -0,0 +1,182 @@
+"""Tests for ``BaseDailyWriter`` skeleton.
+
+Uses a dummy ``UserScopedFrontmatter`` subclass to exercise the path
+resolution + entry-id construction + today-by-default logic without
+pulling in any concrete business schema.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+from pathlib import Path
+from typing import ClassVar, Literal
+
+import pytest
+
+from everos.component.utils.datetime import today_with_timezone
+from everos.core.persistence import (
+    AgentScopedFrontmatter,
+    MarkdownReader,
+    MemoryRoot,
+    UserScopedFrontmatter,
+)
+from everos.infra.persistence.markdown.writers import BaseDailyWriter
+
+
+class _UserDemoFrontmatter(UserScopedFrontmatter):
+    ENTRY_ID_PREFIX: ClassVar[str] = "demo"
+    DIR_NAME: ClassVar[str] = "demos"
+    FILE_PREFIX: ClassVar[str] = "demo"
+    type: Literal["user_demo"] = "user_demo"
+
+
+class _AgentDemoFrontmatter(AgentScopedFrontmatter):
+    ENTRY_ID_PREFIX: ClassVar[str] = "ademo"
+    DIR_NAME: ClassVar[str] = "demos"
+    FILE_PREFIX: ClassVar[str] = "demo"
+    type: Literal["agent_demo"] = "agent_demo"
+
+
+class _UserDemoWriter(BaseDailyWriter):
+    schema = _UserDemoFrontmatter
+
+
+class _AgentDemoWriter(BaseDailyWriter):
+    schema = _AgentDemoFrontmatter
+
+
+@pytest.fixture
+def root(tmp_path: Path) -> MemoryRoot:
+    return MemoryRoot(tmp_path)
+
+
+def test_constructor_rejects_missing_schema(root: MemoryRoot) -> None:
+    class _NoSchema(BaseDailyWriter):
+        pass
+
+    with pytest.raises(TypeError, match="schema"):
+        _NoSchema(root)
+
+
+def test_constructor_rejects_schema_missing_classvars(root: MemoryRoot) -> None:
+    class _IncompleteFrontmatter(UserScopedFrontmatter):
+        # Missing ENTRY_ID_PREFIX / DIR_NAME / FILE_PREFIX.
+        type: Literal["incomplete"] = "incomplete"
+
+    class _IncompleteWriter(BaseDailyWriter):
+        schema = _IncompleteFrontmatter
+
+    with pytest.raises(TypeError, match="ENTRY_ID_PREFIX"):
+        _IncompleteWriter(root)
+
+
+async def test_append_writes_to_user_track(root: MemoryRoot) -> None:
+    writer = _UserDemoWriter(root)
+    eid = await writer.append("u_jason", "first", date=dt.date(2026, 4, 22))
+    assert eid.prefix == "demo"
+    assert eid.date == dt.date(2026, 4, 22)
+    assert eid.seq == 1
+    expected = root.users_dir() / "u_jason" / "demos" / "demo-2026-04-22.md"
+    assert expected.exists()
+    parsed = await MarkdownReader.read(expected)
+    assert parsed.entries[0].id == "demo_20260422_00000001"
+    assert parsed.entries[0].body == "first"
+
+
+async def test_append_writes_to_agent_track(root: MemoryRoot) -> None:
+    writer = _AgentDemoWriter(root)
+    eid = await writer.append("agent_zhangsan", "trace", date=dt.date(2026, 4, 22))
+    assert eid.prefix == "ademo"
+    expected = root.agents_dir() / "agent_zhangsan" / "demos" / "demo-2026-04-22.md"
+    assert expected.exists()
+
+
+async def test_append_increments_seq_across_calls(root: MemoryRoot) -> None:
+    writer = _UserDemoWriter(root)
+    eids = [
+        await writer.append("u_jason", f"body {i}", date=dt.date(2026, 4, 22))
+        for i in range(3)
+    ]
+    assert [e.seq for e in eids] == [1, 2, 3]
+
+
+async def test_append_date_defaults_to_today(root: MemoryRoot) -> None:
+    """Omitting ``date`` falls back to today_with_timezone()."""
+    writer = _UserDemoWriter(root)
+    eid = await writer.append("u_jason", "body")
+    today = today_with_timezone()
+    assert eid.date == today
+    expected = root.users_dir() / "u_jason" / "demos" / f"demo-{today.isoformat()}.md"
+    assert expected.exists()
+
+
+async def test_append_passes_frontmatter_updates(root: MemoryRoot) -> None:
+    writer = _UserDemoWriter(root)
+    await writer.append(
+        "u_jason",
+        "body",
+        date=dt.date(2026, 4, 22),
+        frontmatter_updates={"file_type": "user_demo_daily", "entry_count": 1},
+    )
+    path = root.users_dir() / "u_jason" / "demos" / "demo-2026-04-22.md"
+    parsed = await MarkdownReader.read(path)
+    assert parsed.frontmatter["file_type"] == "user_demo_daily"
+    assert parsed.frontmatter["entry_count"] == 1
+
+
+async def test_current_count_hook_can_be_overridden(root: MemoryRoot) -> None:
+    """Subclass override of ``_current_count`` controls seq."""
+
+    class _ConstantCount(BaseDailyWriter):
+        schema = _UserDemoFrontmatter
+
+        async def _current_count(self, path):  # noqa: ANN001
+            return 41  # always claim 41 existing entries
+
+    writer = _ConstantCount(root)
+    eid = await writer.append("u_jason", "body", date=dt.date(2026, 4, 22))
+    assert eid.seq == 42  # 41 + 1
+
+
+async def test_frontmatter_updates_hook_supplies_defaults(root: MemoryRoot) -> None:
+    """Subclass override of ``_frontmatter_updates`` populates frontmatter."""
+
+    class _WithDefaults(BaseDailyWriter):
+        schema = _UserDemoFrontmatter
+
+        def _frontmatter_updates(self, scope_id, date, *, next_count):  # noqa: ANN001
+            return {
+                "user_id": scope_id,
+                "entry_count": next_count,
+                "marker": "from-hook",
+            }
+
+    writer = _WithDefaults(root)
+    await writer.append("u_jason", "body", date=dt.date(2026, 4, 22))
+
+    path = root.users_dir() / "u_jason" / "demos" / "demo-2026-04-22.md"
+    parsed = await MarkdownReader.read(path)
+    assert parsed.frontmatter["marker"] == "from-hook"
+    assert parsed.frontmatter["entry_count"] == 1
+    assert parsed.frontmatter["user_id"] == "u_jason"
+
+
+async def test_explicit_frontmatter_updates_skip_hook(root: MemoryRoot) -> None:
+    """Caller-supplied ``frontmatter_updates`` overrides the hook entirely."""
+
+    class _WithDefaults(BaseDailyWriter):
+        schema = _UserDemoFrontmatter
+
+        def _frontmatter_updates(self, scope_id, date, *, next_count):  # noqa: ANN001
+            return {"marker": "from-hook"}
+
+    writer = _WithDefaults(root)
+    await writer.append(
+        "u_jason",
+        "body",
+        date=dt.date(2026, 4, 22),
+        frontmatter_updates={"marker": "explicit"},
+    )
+    path = root.users_dir() / "u_jason" / "demos" / "demo-2026-04-22.md"
+    parsed = await MarkdownReader.read(path)
+    assert parsed.frontmatter["marker"] == "explicit"
--- a/tests/unit/test_infra/test_markdown/test_writers/test_daily_log_writers.py
+++ b/tests/unit/test_infra/test_markdown/test_writers/test_daily_log_writers.py
@ -0,0 +1,344 @@
+"""Tests for AtomicFact / Foresight / AgentCase daily-log writers.
+
+The 4 daily-log kinds (episode + these 3) all share ``BaseDailyWriter``
+plumbing — exhaustive chassis tests live in ``test_base.py`` and
+``test_episode_writer.py`` indirectly via the e2e flows. Here we focus
+on the per-kind path resolution + frontmatter shape that each
+subclass owns: ``schema``, ``_frontmatter_updates``, and the
+writer ↔ reader round-trip on a fresh tmp memory_root.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+from pathlib import Path
+
+import pytest
+
+from everos.core.persistence import MarkdownReader, MemoryRoot
+from everos.infra.persistence.markdown import (
+    AgentCaseReader,
+    AgentCaseWriter,
+    AtomicFactReader,
+    AtomicFactWriter,
+    ForesightReader,
+    ForesightWriter,
+)
+
+
+@pytest.fixture
+def memory_root(tmp_path: Path) -> MemoryRoot:
+    mr = MemoryRoot(tmp_path)
+    mr.ensure()
+    return mr
+
+
+# ── AtomicFact ────────────────────────────────────────────────────────────
+
+
+async def test_atomic_fact_writer_round_trip(memory_root: MemoryRoot) -> None:
+    writer = AtomicFactWriter(memory_root)
+    today = _dt.date(2026, 5, 15)
+    eid = await writer.append_entry(
+        "u1",
+        inline={
+            "owner_id": "u1",
+            "session_id": "s1",
+            "timestamp": "2026-05-15T10:00:00+00:00",
+            "parent_id": "mc_1",
+            "sender_ids": ["u1"],
+        },
+        sections={"Fact": "Alice prefers Italian."},
+        date=today,
+    )
+    path = (
+        memory_root.users_dir() / "u1" / ".atomic_facts" / "atomic_fact-2026-05-15.md"
+    )
+    parsed = await MarkdownReader.read(path)
+
+    # frontmatter
+    fm = parsed.frontmatter
+    assert fm["id"] == "atomic_fact_log_u1_2026-05-15"
+    assert fm["type"] == "atomic_fact_daily"
+    assert fm["file_type"] == "atomic_fact_daily"
+    assert fm["user_id"] == "u1"
+    assert fm["track"] == "user"
+    assert fm["date"] == "2026-05-15"
+    assert fm["entry_count"] == 1
+
+    # entry body
+    assert len(parsed.entries) == 1
+    entry = parsed.entries[0]
+    assert entry.id == eid.format()
+    structured = entry.as_structured()
+    assert structured.inline["owner_id"] == "u1"
+    assert structured.inline["parent_id"] == "mc_1"
+    assert structured.sections["Fact"] == "Alice prefers Italian."
+
+    # reader is symmetric
+    reader = AtomicFactReader(memory_root)
+    assert reader.path_for("u1", today) == path
+    found = await reader.find_structured("u1", eid)
+    assert found is not None
+    assert found.sections["Fact"] == "Alice prefers Italian."
+
+
+async def test_atomic_fact_writer_appends_multiple(memory_root: MemoryRoot) -> None:
+    writer = AtomicFactWriter(memory_root)
+    today = _dt.date(2026, 5, 15)
+    eid1 = await writer.append_entry(
+        "u1",
+        inline={
+            "owner_id": "u1",
+            "session_id": "s1",
+            "timestamp": "2026-05-15T10:00:00+00:00",
+            "parent_id": "mc_1",
+        },
+        sections={"Fact": "fact 1"},
+        date=today,
+    )
+    eid2 = await writer.append_entry(
+        "u1",
+        inline={
+            "owner_id": "u1",
+            "session_id": "s1",
+            "timestamp": "2026-05-15T11:00:00+00:00",
+            "parent_id": "mc_2",
+        },
+        sections={"Fact": "fact 2"},
+        date=today,
+    )
+    assert eid1.format() != eid2.format()
+    assert eid2.format().endswith("0002")
+
+
+# ── Foresight ─────────────────────────────────────────────────────────────
+
+
+async def test_foresight_writer_round_trip(memory_root: MemoryRoot) -> None:
+    writer = ForesightWriter(memory_root)
+    today = _dt.date(2026, 5, 15)
+    eid = await writer.append_entry(
+        "u1",
+        inline={
+            "owner_id": "u1",
+            "session_id": "s1",
+            "timestamp": "2026-05-15T10:00:00+00:00",
+            "parent_id": "mc_1",
+            "start_time": "2026-05-15T12:00:00+00:00",
+            "end_time": "2026-05-15T13:00:00+00:00",
+            "duration_days": 1,
+        },
+        sections={
+            "Foresight": "User will book lunch at noon.",
+            "Evidence": "Past calendar pattern.",
+        },
+        date=today,
+    )
+    path = memory_root.users_dir() / "u1" / ".foresights" / "foresight-2026-05-15.md"
+    parsed = await MarkdownReader.read(path)
+    fm = parsed.frontmatter
+    assert fm["id"] == "foresight_log_u1_2026-05-15"
+    assert fm["type"] == "foresight_daily"
+
+    structured = parsed.entries[0].as_structured()
+    assert structured.sections["Foresight"] == "User will book lunch at noon."
+    assert structured.sections["Evidence"] == "Past calendar pattern."
+    assert structured.inline["duration_days"] == "1"
+    assert structured.inline["start_time"].startswith("2026-05-15T12:00:00")
+
+    reader = ForesightReader(memory_root)
+    found = await reader.find_structured("u1", eid)
+    assert found is not None
+    assert found.sections["Evidence"] == "Past calendar pattern."
+
+
+# ── AgentCase ─────────────────────────────────────────────────────────────
+
+
+async def test_agent_case_writer_round_trip(memory_root: MemoryRoot) -> None:
+    writer = AgentCaseWriter(memory_root)
+    today = _dt.date(2026, 5, 15)
+    eid = await writer.append_entry(
+        "a1",
+        inline={
+            "owner_id": "a1",
+            "session_id": "s1",
+            "timestamp": "2026-05-15T10:00:00+00:00",
+            "parent_id": "mc_agent",
+            "quality_score": 0.87,
+        },
+        sections={
+            "TaskIntent": "Scan contract for indemnity gaps.",
+            "Approach": "1. read sections;\n2. flag clauses;\n3. cross-check cap.",
+            "KeyInsight": "Indemnity cap missing in section 4.",
+        },
+        date=today,
+    )
+    path = memory_root.agents_dir() / "a1" / ".cases" / "agent_case-2026-05-15.md"
+    parsed = await MarkdownReader.read(path)
+    fm = parsed.frontmatter
+    assert fm["id"] == "agent_case_log_a1_2026-05-15"
+    assert fm["type"] == "agent_case_daily"
+    assert fm["agent_id"] == "a1"
+    assert fm["track"] == "agent"
+
+    structured = parsed.entries[0].as_structured()
+    assert structured.inline["quality_score"] == "0.87"
+    assert structured.sections["TaskIntent"].startswith("Scan contract")
+    assert structured.sections["Approach"].startswith("1. read sections")
+    assert structured.sections["KeyInsight"].startswith("Indemnity cap missing")
+
+    reader = AgentCaseReader(memory_root)
+    assert reader.path_for("a1", today) == path
+    found = await reader.find_structured("a1", eid)
+    assert found is not None
+    assert found.sections["TaskIntent"].startswith("Scan contract")
+
+
+# ── round-trip with cascade handler (md → LanceDB row mapping) ─────────────
+
+
+async def test_atomic_fact_writer_output_feeds_handler(
+    memory_root: MemoryRoot,
+) -> None:
+    """The writer's md is exactly what AtomicFactHandler expects to read."""
+    from everos.component.embedding import EmbeddingProvider
+    from everos.component.tokenizer import Tokenizer
+    from everos.memory.cascade.handlers import AtomicFactHandler, HandlerDeps
+    from everos.memory.cascade.handlers._daily_log_base import ParsedEntry
+
+    class _T(Tokenizer):
+        def tokenize(self, t):  # type: ignore[no-untyped-def]
+            return [x for x in t.split() if x]
+
+        def tokenize_batch(self, ts):  # type: ignore[no-untyped-def]
+            return [self.tokenize(x) for x in ts]
+
+    class _E(EmbeddingProvider):
+        dim = 1024
+
+        async def embed(self, t):  # type: ignore[no-untyped-def]
+            return [0.0] * self.dim
+
+        async def embed_batch(self, ts):  # type: ignore[no-untyped-def]
+            return [await self.embed(x) for x in ts]
+
+    today = _dt.date(2026, 5, 15)
+    eid = await AtomicFactWriter(memory_root).append_entry(
+        "u1",
+        inline={
+            "owner_id": "u1",
+            "session_id": "s1",
+            "timestamp": "2026-05-15T10:00:00+00:00",
+            "parent_id": "mc_1",
+            "sender_ids": ["u1"],
+        },
+        sections={"Fact": "Alice prefers Italian."},
+        date=today,
+    )
+    path = (
+        memory_root.users_dir() / "u1" / ".atomic_facts" / "atomic_fact-2026-05-15.md"
+    )
+    rel = path.relative_to(memory_root.root).as_posix()
+    parsed = await MarkdownReader.read(path)
+    entry = parsed.entries[0]
+    handler = AtomicFactHandler(
+        HandlerDeps(memory_root=memory_root, embedder=_E(), tokenizer=_T())
+    )
+    structured = entry.as_structured()
+    pe = ParsedEntry(entry.id, structured, handler._content_sha256(structured))
+    row = await handler._build_row(
+        owner_id="u1", owner_type="user", md_path=rel, entry=pe
+    )
+    assert row.id == f"u1_{eid.format()}"
+    assert row.fact == "Alice prefers Italian."
+    assert row.parent_id == "mc_1"
+    assert row.sender_ids == ["u1"]
+    assert len(row.vector) == 1024
+
+
+# ── Display-tz contract for frontmatter timestamps (Gap #5) ────────────
+
+
+async def test_atomic_fact_frontmatter_last_appended_at_carries_display_tz_offset(
+    memory_root: MemoryRoot,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """``last_appended_at`` in markdown frontmatter renders in the display tz.
+
+    Markdown frontmatter is a display-side artefact (users read the file
+    directly), so ``last_appended_at`` must use
+    :func:`get_now_with_timezone` not :func:`get_utc_now`. Pins that
+    contract end-to-end: configure ``EVEROS_MEMORY__TIMEZONE=Asia/Shanghai``,
+    write an entry, read the .md file, assert the literal string ends
+    with ``+08:00``.
+
+    Repeats the same check for ``ForesightWriter`` and
+    ``AgentCaseWriter`` — they share ``BaseDailyWriter`` plumbing so a
+    regression on one would likely affect all three, but pinning each
+    rules out per-subclass shadowing of ``_frontmatter_updates``.
+    """
+    from everos.component.utils import datetime as _dt_module
+    from everos.config import load_settings
+
+    monkeypatch.setenv("EVEROS_MEMORY__TIMEZONE", "Asia/Shanghai")
+    load_settings.cache_clear()
+    _dt_module._display_tz.cache_clear()
+
+    today = _dt.date(2026, 5, 15)
+
+    # AtomicFact
+    af_writer = AtomicFactWriter(memory_root)
+    await af_writer.append_entry(
+        "u1",
+        inline={
+            "owner_id": "u1",
+            "session_id": "s1",
+            "timestamp": "2026-05-15T10:00:00+00:00",
+            "parent_id": "mc_1",
+            "sender_ids": ["u1"],
+        },
+        sections={"Fact": "x"},
+        date=today,
+    )
+    af_path = (
+        memory_root.users_dir() / "u1" / ".atomic_facts" / "atomic_fact-2026-05-15.md"
+    )
+    af_fm = (await MarkdownReader.read(af_path)).frontmatter
+    assert af_fm["last_appended_at"].endswith("+08:00"), af_fm["last_appended_at"]
+
+    # Foresight
+    fs_writer = ForesightWriter(memory_root)
+    await fs_writer.append_entry(
+        "u1",
+        inline={
+            "owner_id": "u1",
+            "session_id": "s1",
+            "timestamp": "2026-05-15T10:00:00+00:00",
+            "scope": "today",
+            "horizon_days": 1,
+        },
+        sections={"Foresight": "x"},
+        date=today,
+    )
+    fs_path = memory_root.users_dir() / "u1" / ".foresights" / "foresight-2026-05-15.md"
+    fs_fm = (await MarkdownReader.read(fs_path)).frontmatter
+    assert fs_fm["last_appended_at"].endswith("+08:00"), fs_fm["last_appended_at"]
+
+    # AgentCase
+    ac_writer = AgentCaseWriter(memory_root)
+    await ac_writer.append_entry(
+        "a1",
+        inline={
+            "owner_id": "a1",
+            "session_id": "s1",
+            "timestamp": "2026-05-15T10:00:00+00:00",
+            "quality_score": 0.9,
+        },
+        sections={"Task intent": "x", "Approach": "y"},
+        date=today,
+    )
+    ac_path = memory_root.agents_dir() / "a1" / ".cases" / "agent_case-2026-05-15.md"
+    ac_fm = (await MarkdownReader.read(ac_path)).frontmatter
+    assert ac_fm["last_appended_at"].endswith("+08:00"), ac_fm["last_appended_at"]
--- a/tests/unit/test_infra/test_markdown/test_writers/test_profile_writer.py
+++ b/tests/unit/test_infra/test_markdown/test_writers/test_profile_writer.py
@ -0,0 +1,166 @@
+"""Tests for :class:`ProfileWriter` — single-file rewrite layout."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import ClassVar, Literal
+
+import pytest
+
+from everos.core.persistence import (
+    AgentScopedFrontmatter,
+    BaseFrontmatter,
+    MarkdownReader,
+    MemoryRoot,
+    UserScopedFrontmatter,
+)
+from everos.infra.persistence.markdown.writers import ProfileWriter
+
+
+class _UserProfileFM(UserScopedFrontmatter):
+    PROFILE_FILENAME: ClassVar[str] = "user.md"
+    type: Literal["demo_user_profile"] = "demo_user_profile"
+    display_name: str = ""
+    bio: str = ""
+
+
+class _AgentProfileFM(AgentScopedFrontmatter):
+    PROFILE_FILENAME: ClassVar[str] = "agent.md"
+    type: Literal["demo_agent_profile"] = "demo_agent_profile"
+    name: str = ""
+
+
+@pytest.fixture
+def root(tmp_path: Path) -> MemoryRoot:
+    return MemoryRoot(tmp_path)
+
+
+@pytest.fixture
+def writer(root: MemoryRoot) -> ProfileWriter:
+    return ProfileWriter(root)
+
+
+async def test_write_creates_user_profile(
+    root: MemoryRoot, writer: ProfileWriter
+) -> None:
+    fm = _UserProfileFM(
+        id="demo_user_profile_u_jason",
+        type="demo_user_profile",
+        user_id="u_jason",
+        display_name="Jason",
+        bio="hiker.",
+    )
+    path = await writer.write("u_jason", frontmatter=fm, body="Long-form profile.")
+    expected = root.users_dir() / "u_jason" / "user.md"
+    assert path == expected
+    assert expected.is_file()
+
+
+async def test_write_creates_agent_profile(
+    root: MemoryRoot, writer: ProfileWriter
+) -> None:
+    fm = _AgentProfileFM(
+        id="demo_agent_profile_agent_x",
+        type="demo_agent_profile",
+        agent_id="agent_x",
+        name="zhang_legal",
+    )
+    path = await writer.write("agent_x", frontmatter=fm, body="Agent playbook.")
+    expected = root.agents_dir() / "agent_x" / "agent.md"
+    assert path == expected
+    assert expected.is_file()
+
+
+async def test_write_writes_frontmatter_and_body(
+    root: MemoryRoot, writer: ProfileWriter
+) -> None:
+    fm = _UserProfileFM(
+        id="demo_user_profile_u_jason",
+        type="demo_user_profile",
+        user_id="u_jason",
+        display_name="Jason",
+        bio="weekend hiker.",
+    )
+    await writer.write("u_jason", frontmatter=fm, body="The body.")
+
+    parsed = await MarkdownReader.read(root.users_dir() / "u_jason" / "user.md")
+    assert parsed.frontmatter["display_name"] == "Jason"
+    assert parsed.frontmatter["bio"] == "weekend hiker."
+    assert parsed.body.rstrip("\n") == "The body."
+
+
+async def test_write_is_upsert_full_replace(
+    root: MemoryRoot, writer: ProfileWriter
+) -> None:
+    """Second call overwrites both frontmatter and body — no append."""
+    fm1 = _UserProfileFM(
+        id="demo_user_profile_u_jason",
+        type="demo_user_profile",
+        user_id="u_jason",
+        display_name="Jason v1",
+        bio="v1",
+    )
+    await writer.write("u_jason", frontmatter=fm1, body="body v1")
+
+    fm2 = _UserProfileFM(
+        id="demo_user_profile_u_jason",
+        type="demo_user_profile",
+        user_id="u_jason",
+        display_name="Jason v2",
+        bio="v2",
+    )
+    await writer.write("u_jason", frontmatter=fm2, body="body v2")
+
+    parsed = await MarkdownReader.read(root.users_dir() / "u_jason" / "user.md")
+    assert parsed.frontmatter["display_name"] == "Jason v2"
+    assert parsed.frontmatter["bio"] == "v2"
+    assert parsed.body.rstrip("\n") == "body v2"
+    assert "v1" not in parsed.body
+
+
+def test_path_for_does_not_create_files(
+    root: MemoryRoot, writer: ProfileWriter
+) -> None:
+    """``path_for`` is a pure path resolver — no IO."""
+    p = writer.path_for("u_jason", schema=_UserProfileFM)
+    assert p == root.users_dir() / "u_jason" / "user.md"
+    assert not p.exists()
+    assert not root.users_dir().exists()
+
+
+async def test_write_normalises_trailing_newline(
+    root: MemoryRoot, writer: ProfileWriter
+) -> None:
+    fm = _UserProfileFM(
+        id="demo_user_profile_u_jason",
+        type="demo_user_profile",
+        user_id="u_jason",
+    )
+    await writer.write("u_jason", frontmatter=fm, body="no-newline-end")
+    text = (root.users_dir() / "u_jason" / "user.md").read_text(encoding="utf-8")
+    assert text.endswith("no-newline-end\n")
+
+
+async def test_write_rejects_schema_missing_profile_filename(
+    writer: ProfileWriter,
+) -> None:
+    """Schema without ``PROFILE_FILENAME`` ClassVar raises a clear error."""
+
+    class _BadSchema(UserScopedFrontmatter):
+        type: Literal["bad"] = "bad"
+
+    fm = _BadSchema(id="x", type="bad", user_id="u_jason")
+    with pytest.raises(TypeError, match="PROFILE_FILENAME"):
+        await writer.write("u_jason", frontmatter=fm, body="body")
+
+
+async def test_write_rejects_schema_missing_scope_dir(writer: ProfileWriter) -> None:
+    """Schema without scope mixin (empty ``SCOPE_DIR``) raises a clear error."""
+
+    class _ScopelessSchema(BaseFrontmatter):
+        PROFILE_FILENAME: ClassVar[str] = "profile.md"
+        type: Literal["scopeless"] = "scopeless"
+
+    fm = _ScopelessSchema(id="x", type="scopeless")
+    with pytest.raises(TypeError, match="SCOPE_DIR"):
+        await writer.write("x", frontmatter=fm, body="body")
--- a/tests/unit/test_infra/test_ome/init.py
+++ b/tests/unit/test_infra/test_ome/init.py
--- a/tests/unit/test_infra/test_ome/test_config.py
+++ b/tests/unit/test_infra/test_ome/test_config.py
@ -0,0 +1,159 @@
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from everos.infra.ome.config import (
+    CounterOverride,
+    OMEConfig,
+    StrategyOverride,
+    TomlRoot,
+)
+
+
+def test_ome_config_defaults() -> None:
+    from everos.core.persistence.memory_root import MemoryRoot
+
+    c = OMEConfig()
+    assert c.jobstore_path == MemoryRoot.default().ome_db
+    assert c.aps_jobstore_path == MemoryRoot.default().ome_aps_db
+    assert c.max_concurrent_runs == 20
+    assert c.max_retries == 1
+    assert c.max_records_per_strategy == 1000
+    assert c.crash_recovery_timeout_seconds == 1800
+    assert c.config_path is None
+    assert c.config_watch is True
+    assert c.config_watch_debounce_ms == 1600
+
+
+def test_aps_jobstore_path_derives_sibling_of_jobstore_path(tmp_path: object) -> None:
+    """When only ``jobstore_path`` is set, APS db lands next to it as
+    ``<stem>.aps.db`` so callers using a custom path (e.g. tests with
+    tmp_path) get an isolated APS file rather than the global default."""
+    from pathlib import Path
+
+    custom = Path(str(tmp_path)) / "custom_dir" / "my_ome.db"
+    c = OMEConfig(jobstore_path=custom)
+    assert c.aps_jobstore_path == custom.with_name("my_ome.aps.db")
+
+
+def test_aps_jobstore_path_respects_explicit_value(tmp_path: object) -> None:
+    """An explicitly passed ``aps_jobstore_path`` is honored verbatim and
+    the derivation validator does not overwrite it."""
+    from pathlib import Path
+
+    ome = Path(str(tmp_path)) / "ome.db"
+    aps = Path(str(tmp_path)) / "elsewhere" / "scheduler.db"
+    c = OMEConfig(jobstore_path=ome, aps_jobstore_path=aps)
+    assert c.aps_jobstore_path == aps
+
+
+def test_ome_config_rejects_unknown_field() -> None:
+    with pytest.raises(ValidationError):
+        OMEConfig(unknown_field=1)  # type: ignore[call-arg]
+
+
+def test_ome_config_rejects_zero_concurrency() -> None:
+    with pytest.raises(ValidationError):
+        OMEConfig(max_concurrent_runs=0)
+
+
+def test_toml_root_parses_strategy_override() -> None:
+    raw = """
+[strategies.cluster_memcells]
+enabled = true
+max_retries = 3
+
+[strategies.cluster_memcells.gate]
+threshold = 10
+event_field = "user_id"
+"""
+    import tomllib
+
+    parsed = tomllib.loads(raw)
+    root = TomlRoot.model_validate(parsed)
+    s = root.strategies["cluster_memcells"]
+    assert isinstance(s, StrategyOverride)
+    assert s.enabled is True
+    assert s.max_retries == 3
+    assert isinstance(s.gate, CounterOverride)
+    assert s.gate.threshold == 10
+    assert s.gate.event_field == "user_id"
+
+
+def test_toml_root_forbids_unknown_strategy_field() -> None:
+    import tomllib
+
+    raw = """
+[strategies.x]
+unknown_key = 1
+"""
+    parsed = tomllib.loads(raw)
+    with pytest.raises(ValidationError):
+        TomlRoot.model_validate(parsed)
+
+
+def test_strategy_override_accepts_cron_field() -> None:
+    s = StrategyOverride(cron="0 3 * * *")
+    assert s.cron == "0 3 * * *"
+
+
+def test_strategy_override_accepts_idle_seconds() -> None:
+    s = StrategyOverride(idle_seconds=30)
+    assert s.idle_seconds == 30
+
+
+def test_strategy_override_accepts_scan_interval_seconds() -> None:
+    s = StrategyOverride(scan_interval_seconds=15)
+    assert s.scan_interval_seconds == 15
+
+
+def test_strategy_override_rejects_zero_idle_seconds() -> None:
+    with pytest.raises(ValidationError):
+        StrategyOverride(idle_seconds=0)
+
+
+def test_strategy_override_rejects_zero_scan_interval() -> None:
+    with pytest.raises(ValidationError):
+        StrategyOverride(scan_interval_seconds=0)
+
+
+def test_strategy_override_defaults_are_none() -> None:
+    s = StrategyOverride()
+    assert s.cron is None
+    assert s.idle_seconds is None
+    assert s.scan_interval_seconds is None
+
+
+def test_counter_override_rejects_empty_event_field() -> None:
+    with pytest.raises(ValidationError, match="event_field"):
+        CounterOverride(event_field="")
+
+
+def test_strategy_override_rejects_invalid_cron_at_construction() -> None:
+    """cron is parsed by APS at construction time so TOML reload can't
+    bring an invalid crontab into the system."""
+    with pytest.raises(ValidationError, match="cron"):
+        StrategyOverride(cron="not a cron")
+
+
+def test_strategy_override_rejects_inconsistent_idle_pair() -> None:
+    """When both idle_seconds and scan_interval_seconds are overridden in
+    the same payload, scan_interval must be <= idle_seconds // 2 — mirror
+    of the Idle trigger constraint."""
+    with pytest.raises(ValidationError, match="scan_interval_seconds"):
+        StrategyOverride(idle_seconds=30, scan_interval_seconds=20)
+
+
+def test_strategy_override_accepts_consistent_idle_pair() -> None:
+    s = StrategyOverride(idle_seconds=60, scan_interval_seconds=30)
+    assert s.idle_seconds == 60
+    assert s.scan_interval_seconds == 30
+
+
+def test_strategy_override_accepts_single_idle_field() -> None:
+    """One-sided override is allowed; the cross-field check is deferred
+    to post-merge time (in apply_overrides) when both are known."""
+    s = StrategyOverride(scan_interval_seconds=999)
+    assert s.scan_interval_seconds == 999
+    assert s.idle_seconds is None
--- a/tests/unit/test_infra/test_ome/test_config_reloader.py
+++ b/tests/unit/test_infra/test_ome/test_config_reloader.py
@ -0,0 +1,407 @@
+"""Tests for ConfigReloader."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+from unittest.mock import MagicMock
+
+import pytest
+
+from everos.infra.ome._background.config_reloader import (
+    ConfigReloader,
+    apply_overrides,
+)
+from everos.infra.ome._dispatch.registry import StrategyRegistry
+from everos.infra.ome.config import CounterOverride, StrategyOverride, TomlRoot
+from everos.infra.ome.context import StrategyContext
+from everos.infra.ome.decorator import offline_strategy
+from everos.infra.ome.engine import OfflineEngine
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.gates import Counter
+from everos.infra.ome.triggers import Cron, Idle, Immediate
+
+
+class _E(BaseEvent):
+    pass
+
+
+class _EventUid(BaseEvent):
+    user_id: str
+
+
+def _make(name: str, **kw: Any) -> Any:
+    @offline_strategy(name=name, trigger=Immediate(on=[_E]), emits=[], **kw)
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    return f
+
+
+def _make_cron(name: str, expr: str = "0 3 * * *", **kw: Any) -> Any:
+    @offline_strategy(name=name, trigger=Cron(expr=expr), emits=[], **kw)
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    return f
+
+
+def _make_idle(name: str, **kw: Any) -> Any:
+    @offline_strategy(
+        name=name,
+        trigger=Idle(
+            on=[_EventUid],
+            event_field="user_id",
+            idle_seconds=30,
+            scan_interval_seconds=10,
+        ),
+        emits=[],
+        **kw,
+    )
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    return f
+
+
+@pytest.fixture
+def fake_engine() -> MagicMock:
+    """Mock OfflineEngine; spec catches typos in mocked method names."""
+    return MagicMock(spec=OfflineEngine)
+
+
+def test_apply_overrides_replaces_enabled(fake_engine: MagicMock) -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s", enabled=True))
+    root = TomlRoot(strategies={"s": StrategyOverride(enabled=False)})
+    apply_overrides(reg, root, fake_engine)
+    assert reg.get("s").enabled is False
+
+
+def test_apply_overrides_max_retries(fake_engine: MagicMock) -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s", max_retries=1))
+    root = TomlRoot(strategies={"s": StrategyOverride(max_retries=5)})
+    apply_overrides(reg, root, fake_engine)
+    assert reg.get("s").max_retries == 5
+
+
+def test_apply_overrides_counter_partial(fake_engine: MagicMock) -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s", gate=Counter(threshold=3, event_field="user_id")))
+    root = TomlRoot(
+        strategies={"s": StrategyOverride(gate=CounterOverride(threshold=10))}
+    )
+    apply_overrides(reg, root, fake_engine)
+    g = reg.get("s").gate
+    assert g.threshold == 10
+    assert g.event_field == "user_id"  # untouched
+
+
+def test_apply_overrides_unknown_strategy_ignored(fake_engine: MagicMock) -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s"))
+    root = TomlRoot(strategies={"unknown": StrategyOverride(enabled=False)})
+    apply_overrides(reg, root, fake_engine)  # must not raise
+
+
+def test_apply_overrides_updates_cron_expr(fake_engine: MagicMock) -> None:
+    reg = StrategyRegistry()
+    reg.register(_make_cron("s", "0 3 * * *"))
+    root = TomlRoot(strategies={"s": StrategyOverride(cron="*/5 * * * *")})
+
+    apply_overrides(reg, root, fake_engine)
+
+    assert isinstance(reg.get("s").trigger, Cron)
+    assert reg.get("s").trigger.expr == "*/5 * * * *"
+    fake_engine.reschedule_cron_job.assert_called_once_with("s", "*/5 * * * *")
+
+
+def test_apply_overrides_skips_atomic_group_on_reschedule_failure(
+    fake_engine: MagicMock,
+) -> None:
+    """Even though StrategyOverride.cron is now syntactically validated at
+    parse time, reschedule_cron_job can still fail at runtime (APS internal
+    error, scheduler stopped, etc.). The atomic-group rollback must hold
+    against those failures too.
+    """
+    reg = StrategyRegistry()
+    reg.register(_make_cron("s", "0 3 * * *", enabled=True, max_retries=1))
+    fake_engine.reschedule_cron_job.side_effect = RuntimeError("APS error")
+    root = TomlRoot(
+        strategies={
+            "s": StrategyOverride(enabled=False, cron="*/5 * * * *", max_retries=99)
+        }
+    )
+
+    apply_overrides(reg, root, fake_engine)
+
+    # enabled applied independently
+    assert reg.get("s").enabled is False
+    # atomic group rolled back: cron unchanged, max_retries unchanged
+    assert reg.get("s").trigger.expr == "0 3 * * *"
+    assert reg.get("s").max_retries == 1
+    fake_engine.reschedule_cron_job.assert_called_once_with("s", "*/5 * * * *")
+
+
+def test_apply_overrides_skips_atomic_group_on_cron_type_mismatch(
+    fake_engine: MagicMock,
+) -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s", enabled=True))  # Immediate strategy
+    root = TomlRoot(strategies={"s": StrategyOverride(enabled=False, cron="0 3 * * *")})
+
+    apply_overrides(reg, root, fake_engine)
+
+    assert reg.get("s").enabled is False
+    assert isinstance(reg.get("s").trigger, Immediate)
+    fake_engine.reschedule_cron_job.assert_not_called()
+
+
+def test_apply_overrides_updates_idle_seconds_and_scan_interval(
+    fake_engine: MagicMock,
+) -> None:
+    reg = StrategyRegistry()
+    reg.register(_make_idle("s"))
+    root = TomlRoot(
+        strategies={"s": StrategyOverride(idle_seconds=120, scan_interval_seconds=15)}
+    )
+
+    apply_overrides(reg, root, fake_engine)
+
+    t = reg.get("s").trigger
+    assert t.idle_seconds == 120
+    assert t.scan_interval_seconds == 15
+    fake_engine.reschedule_idle_job.assert_called_once_with(
+        "s", scan_interval_seconds=15
+    )
+
+
+def test_apply_overrides_updates_only_idle_seconds_does_not_reschedule_aps(
+    fake_engine: MagicMock,
+) -> None:
+    """idle_seconds is consumed by dispatcher / engine on each scan,
+    not by APS IntervalTrigger, so changing only it must NOT trigger
+    an APS reschedule (which would reset the pending tick).
+    """
+    reg = StrategyRegistry()
+    reg.register(_make_idle("s"))
+    root = TomlRoot(strategies={"s": StrategyOverride(idle_seconds=120)})
+
+    apply_overrides(reg, root, fake_engine)
+
+    assert reg.get("s").trigger.idle_seconds == 120
+    fake_engine.reschedule_idle_job.assert_not_called()
+
+
+def test_apply_overrides_skips_atomic_group_on_idle_type_mismatch(
+    fake_engine: MagicMock,
+) -> None:
+    reg = StrategyRegistry()
+    reg.register(_make_cron("s"))  # Cron strategy
+    root = TomlRoot(strategies={"s": StrategyOverride(idle_seconds=60)})
+
+    apply_overrides(reg, root, fake_engine)
+
+    assert isinstance(reg.get("s").trigger, Cron)
+    fake_engine.reschedule_cron_job.assert_not_called()
+    fake_engine.reschedule_idle_job.assert_not_called()
+
+
+def test_apply_overrides_rollback_on_aps_reschedule_failure(
+    fake_engine: MagicMock,
+) -> None:
+    fake_engine.reschedule_cron_job.side_effect = RuntimeError("APS exploded")
+
+    reg = StrategyRegistry()
+    reg.register(_make_cron("s", "0 3 * * *", enabled=True, max_retries=1))
+    root = TomlRoot(
+        strategies={
+            "s": StrategyOverride(enabled=False, cron="*/5 * * * *", max_retries=99)
+        }
+    )
+
+    apply_overrides(reg, root, fake_engine)
+
+    # enabled applied (Step 1, before atomic group)
+    assert reg.get("s").enabled is False
+    # atomic group rolled back: cron + max_retries unchanged
+    assert reg.get("s").trigger.expr == "0 3 * * *"
+    assert reg.get("s").max_retries == 1
+
+
+def test_apply_overrides_enabled_survives_reschedule_failure(
+    fake_engine: MagicMock,
+) -> None:
+    """enabled=false is emergency-stop semantics; must apply even when the
+    paired cron update fails at reschedule time.
+    """
+    reg = StrategyRegistry()
+    reg.register(_make_cron("s", "0 3 * * *", enabled=True))
+    fake_engine.reschedule_cron_job.side_effect = RuntimeError("APS error")
+    root = TomlRoot(
+        strategies={"s": StrategyOverride(enabled=False, cron="*/5 * * * *")}
+    )
+
+    apply_overrides(reg, root, fake_engine)
+
+    assert reg.get("s").enabled is False
+    assert reg.get("s").trigger.expr == "0 3 * * *"
+
+
+def test_apply_overrides_strategy_isolation(fake_engine: MagicMock) -> None:
+    """One strategy's atomic-group failure must not affect another."""
+    reg = StrategyRegistry()
+    reg.register(_make_cron("a", "0 3 * * *"))
+    reg.register(_make_cron("b", "0 4 * * *"))
+
+    def _reschedule(name: str, expr: str) -> None:
+        if name == "b":
+            raise RuntimeError("simulated APS failure for b")
+
+    fake_engine.reschedule_cron_job.side_effect = _reschedule
+    root = TomlRoot(
+        strategies={
+            "a": StrategyOverride(cron="*/5 * * * *"),
+            "b": StrategyOverride(cron="*/7 * * * *"),
+        }
+    )
+
+    apply_overrides(reg, root, fake_engine)
+
+    assert reg.get("a").trigger.expr == "*/5 * * * *"
+    assert reg.get("b").trigger.expr == "0 4 * * *"
+
+
+def test_apply_overrides_atomic_group_no_partial_application(
+    fake_engine: MagicMock,
+) -> None:
+    """A failure in the atomic group must roll back max_retries / gate too."""
+    reg = StrategyRegistry()
+    reg.register(
+        _make_cron(
+            "s",
+            "0 3 * * *",
+            max_retries=1,
+            gate=Counter(threshold=3, event_field="user_id"),
+        )
+    )
+    fake_engine.reschedule_cron_job.side_effect = RuntimeError("APS error")
+    root = TomlRoot(
+        strategies={
+            "s": StrategyOverride(
+                cron="*/5 * * * *",
+                max_retries=99,
+                gate=CounterOverride(threshold=100),
+            )
+        }
+    )
+
+    apply_overrides(reg, root, fake_engine)
+
+    assert reg.get("s").trigger.expr == "0 3 * * *"
+    assert reg.get("s").max_retries == 1
+    assert reg.get("s").gate.threshold == 3
+
+
+def test_apply_overrides_succeeds_on_combined_enabled_and_trigger(
+    fake_engine: MagicMock,
+) -> None:
+    reg = StrategyRegistry()
+    reg.register(_make_cron("s", "0 3 * * *", enabled=True))
+    root = TomlRoot(
+        strategies={"s": StrategyOverride(enabled=False, cron="*/5 * * * *")}
+    )
+
+    apply_overrides(reg, root, fake_engine)
+
+    assert reg.get("s").enabled is False
+    assert reg.get("s").trigger.expr == "*/5 * * * *"
+    fake_engine.reschedule_cron_job.assert_called_once_with("s", "*/5 * * * *")
+
+
+def test_atomic_group_skipped_when_introducing_gate_without_threshold(
+    fake_engine: MagicMock,
+) -> None:
+    """N5: TOML that introduces a gate via cooldown alone (no threshold)
+    must be rejected, not silently defaulted to threshold=1 ('fire every event').
+    """
+    reg = StrategyRegistry()
+    reg.register(_make("s"))  # no gate
+    assert reg.get("s").gate is None
+
+    root = TomlRoot(
+        strategies={
+            "s": StrategyOverride(gate=CounterOverride(cooldown_seconds=60)),
+        }
+    )
+
+    apply_overrides(reg, root, fake_engine)
+
+    # Atomic group rolled back: still no gate.
+    assert reg.get("s").gate is None
+
+
+def test_atomic_group_accepts_introducing_gate_with_explicit_threshold(
+    fake_engine: MagicMock,
+) -> None:
+    """N5 happy path: explicit threshold on a previously-gateless strategy
+    is the user opt-in we require.
+    """
+    reg = StrategyRegistry()
+    reg.register(_make("s"))
+    assert reg.get("s").gate is None
+
+    root = TomlRoot(
+        strategies={
+            "s": StrategyOverride(
+                gate=CounterOverride(threshold=5, cooldown_seconds=60)
+            ),
+        }
+    )
+
+    apply_overrides(reg, root, fake_engine)
+
+    g = reg.get("s").gate
+    assert g is not None
+    assert g.threshold == 5
+    assert g.cooldown_seconds == 60
+
+
+@pytest.mark.asyncio
+async def test_start_twice_raises(tmp_path: Path) -> None:
+    """N7: calling start() twice surfaces the caller bug instead of
+    silently dropping the original task reference and racing two watchers.
+    """
+    config_path = tmp_path / "ome.toml"
+    config_path.write_text("")
+    reloader = ConfigReloader(
+        config_path=config_path,
+        registry=StrategyRegistry(),
+        engine=MagicMock(spec=OfflineEngine),
+    )
+    reloader.start()
+    try:
+        with pytest.raises(RuntimeError, match=r"already started"):
+            reloader.start()
+    finally:
+        await reloader.stop()
+
+
+@pytest.mark.asyncio
+async def test_start_after_stop_is_allowed(tmp_path: Path) -> None:
+    """N7: idempotency check only fires while a task is live; once stopped,
+    start() must work again so callers can restart the reloader.
+    """
+    config_path = tmp_path / "ome.toml"
+    config_path.write_text("")
+    reloader = ConfigReloader(
+        config_path=config_path,
+        registry=StrategyRegistry(),
+        engine=MagicMock(spec=OfflineEngine),
+    )
+    reloader.start()
+    await reloader.stop()
+    # Must not raise.
+    reloader.start()
+    await reloader.stop()
--- a/tests/unit/test_infra/test_ome/test_context.py
+++ b/tests/unit/test_infra/test_ome/test_context.py
@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Protocol
+
+import structlog
+
+from everos.infra.ome.context import StrategyContext
+
+
+def test_strategy_context_is_protocol() -> None:
+    assert issubclass(StrategyContext, Protocol)  # type: ignore[arg-type]
+
+
+def test_strategy_context_runtime_attributes() -> None:
+    class _Impl:
+        run_id = "r1"
+        logger = structlog.get_logger("test")
+
+        async def emit(self, event: object) -> None:
+            return None
+
+    ctx: StrategyContext = _Impl()
+    assert ctx.run_id == "r1"
+    assert callable(ctx.emit)
--- a/tests/unit/test_infra/test_ome/test_counter_store.py
+++ b/tests/unit/test_infra/test_ome/test_counter_store.py
@ -0,0 +1,111 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from everos.infra.ome._stores.counter import CounterStore
+from everos.infra.ome._stores.storage import OMEStorage
+
+
+@pytest.fixture
+async def store(tmp_path: Path) -> CounterStore:
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+    return CounterStore(storage=storage)
+
+
+@pytest.mark.asyncio
+async def test_increments_until_threshold(store: CounterStore) -> None:
+    for i in range(1, 5):
+        passed, cur = await store.incr_and_check(
+            "s",
+            "u1",
+            threshold=5,
+            cooldown_seconds=0,
+        )
+        assert passed is False
+        assert cur == i
+
+    passed, cur = await store.incr_and_check(
+        "s",
+        "u1",
+        threshold=5,
+        cooldown_seconds=0,
+    )
+    assert passed is True
+    assert cur == 5
+
+
+@pytest.mark.asyncio
+async def test_resets_after_pass(store: CounterStore) -> None:
+    for _ in range(5):
+        await store.incr_and_check("s", "u1", threshold=5, cooldown_seconds=0)
+    passed, cur = await store.incr_and_check(
+        "s",
+        "u1",
+        threshold=5,
+        cooldown_seconds=0,
+    )
+    assert passed is False
+    assert cur == 1
+
+
+@pytest.mark.asyncio
+async def test_cooldown_blocks_pass(store: CounterStore) -> None:
+    # First pass
+    for _ in range(5):
+        await store.incr_and_check("s", "u1", threshold=5, cooldown_seconds=10)
+    # Threshold met again immediately, but cooldown blocks
+    for _ in range(5):
+        passed, _ = await store.incr_and_check(
+            "s",
+            "u1",
+            threshold=5,
+            cooldown_seconds=10,
+        )
+    assert passed is False
+
+
+@pytest.mark.asyncio
+async def test_buckets_are_isolated(store: CounterStore) -> None:
+    for _ in range(5):
+        await store.incr_and_check("s", "u1", threshold=5, cooldown_seconds=0)
+    passed, cur = await store.incr_and_check(
+        "s",
+        "u2",
+        threshold=5,
+        cooldown_seconds=0,
+    )
+    assert cur == 1
+    assert passed is False
+
+
+@pytest.mark.asyncio
+async def test_progress_query(store: CounterStore) -> None:
+    await store.incr_and_check("s", "u1", threshold=5, cooldown_seconds=0)
+    await store.incr_and_check("s", "u1", threshold=5, cooldown_seconds=0)
+    cur = await store.get_progress("s", "u1")
+    assert cur == 2
+
+
+@pytest.mark.asyncio
+async def test_returned_counter_reflects_actual_value_when_threshold_lowered(
+    store: CounterStore,
+) -> None:
+    """When threshold drops via hot-reload after counter accumulation,
+    the returned counter must reflect the *actual* count at trigger
+    moment, not the (lower) threshold. Diagnostics rely on this.
+    """
+    # Accumulate 7 hits under a high threshold; none pass.
+    for _ in range(7):
+        passed, _ = await store.incr_and_check(
+            "s", "u1", threshold=10, cooldown_seconds=0
+        )
+        assert passed is False
+
+    # Threshold is "lowered" to 5 (config hot-reload semantics).
+    # Counter goes 7 -> 8, which is past the new threshold.
+    passed, cur = await store.incr_and_check("s", "u1", threshold=5, cooldown_seconds=0)
+    assert passed is True
+    assert cur == 8  # actual count, not threshold (=5)
--- a/tests/unit/test_infra/test_ome/test_crash_recovery.py
+++ b/tests/unit/test_infra/test_ome/test_crash_recovery.py
@ -0,0 +1,149 @@
+from __future__ import annotations
+
+from datetime import timedelta
+from pathlib import Path
+
+import pytest
+
+from everos.component.utils.datetime import get_now_with_timezone, to_iso_format
+from everos.infra.ome._background.crash_recovery import scan_and_resume
+from everos.infra.ome._stores.run_record import RunRecordStore
+from everos.infra.ome._stores.storage import OMEStorage
+from everos.infra.ome.records import RunStatus
+
+
+@pytest.fixture
+async def rec_store(tmp_path: Path) -> RunRecordStore:
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+    return RunRecordStore(storage=storage, max_records_per_strategy=1000)
+
+
+@pytest.mark.asyncio
+async def test_marks_old_running_as_crashed(rec_store: RunRecordStore) -> None:
+    await rec_store.mark_running(
+        run_id="r_old",
+        strategy_name="s",
+        attempt=0,
+        event_topic="x:E",
+        event_payload="{}",
+        max_retries_snapshot=1,
+    )
+    async with rec_store._storage.connect() as conn:
+        rewind = to_iso_format(get_now_with_timezone() - timedelta(hours=2))
+        await conn.execute(
+            "UPDATE run_record SET started_at = ? WHERE run_id = ?",
+            (rewind, "r_old"),
+        )
+        await conn.commit()
+
+    resumed: list = []
+
+    async def add_job_hook(name, run_id, event_topic, event_payload, max_retries):
+        resumed.append((name, run_id, event_topic, event_payload, max_retries))
+
+    await scan_and_resume(
+        run_record_store=rec_store,
+        timeout_seconds=1800,
+        add_job=add_job_hook,
+    )
+
+    rec = await rec_store.get("r_old")
+    assert rec.status == RunStatus.CRASHED
+    assert len(resumed) == 1
+    new_name, new_run_id, ec, ep, mr = resumed[0]
+    assert new_name == "s"
+    assert new_run_id != "r_old"
+    assert ec == "x:E"
+    assert ep == "{}"
+    assert mr == 1
+
+
+@pytest.mark.asyncio
+async def test_recent_running_skipped(rec_store: RunRecordStore) -> None:
+    await rec_store.mark_running(
+        run_id="r_fresh",
+        strategy_name="s",
+        attempt=0,
+        event_topic="x:E",
+        event_payload="{}",
+        max_retries_snapshot=1,
+    )
+    resumed: list = []
+
+    async def add_job_hook(*args, **kw):
+        resumed.append(args)
+
+    await scan_and_resume(
+        run_record_store=rec_store,
+        timeout_seconds=1800,
+        add_job=add_job_hook,
+    )
+    rec = await rec_store.get("r_fresh")
+    assert rec.status == RunStatus.RUNNING
+    assert resumed == []
+
+
+@pytest.mark.parametrize("bad_timeout", [0, -1])
+@pytest.mark.asyncio
+async def test_scan_and_resume_non_positive_timeout_raises(
+    rec_store: RunRecordStore, bad_timeout: int
+) -> None:
+    """N6: non-positive timeout must fail fast rather than silently no-op."""
+
+    async def _noop_add_job(*_args: object, **_kwargs: object) -> None:
+        pass
+
+    with pytest.raises(ValueError, match=r"timeout_seconds must be > 0"):
+        await scan_and_resume(
+            run_record_store=rec_store,
+            timeout_seconds=bad_timeout,
+            add_job=_noop_add_job,
+        )
+
+
+@pytest.mark.asyncio
+async def test_add_job_failure_does_not_abort_loop(
+    rec_store: RunRecordStore,
+) -> None:
+    """add_job raising on one row must not block sibling stale rows.
+
+    mark_crashed runs before add_job, so both rows end up CRASHED even
+    when add_job fails for one. This pins the at-most-once contract
+    documented in the module docstring.
+    """
+    for run_id in ("r_old_1", "r_old_2"):
+        await rec_store.mark_running(
+            run_id=run_id,
+            strategy_name="s",
+            attempt=0,
+            event_topic="x:E",
+            event_payload="{}",
+            max_retries_snapshot=1,
+        )
+    async with rec_store._storage.connect() as conn:
+        rewind = to_iso_format(get_now_with_timezone() - timedelta(hours=2))
+        await conn.execute(
+            "UPDATE run_record SET started_at = ? WHERE run_id IN (?, ?)",
+            (rewind, "r_old_1", "r_old_2"),
+        )
+        await conn.commit()
+
+    calls: list[tuple] = []
+
+    async def flaky_add_job(name, run_id, event_topic, event_payload, max_retries):
+        calls.append((name, run_id, event_topic, event_payload, max_retries))
+        if len(calls) == 1:
+            raise RuntimeError("APS jobstore unavailable")
+
+    await scan_and_resume(
+        run_record_store=rec_store,
+        timeout_seconds=1800,
+        add_job=flaky_add_job,
+    )
+
+    rec1 = await rec_store.get("r_old_1")
+    rec2 = await rec_store.get("r_old_2")
+    assert rec1.status == RunStatus.CRASHED
+    assert rec2.status == RunStatus.CRASHED
+    assert len(calls) == 2
--- a/tests/unit/test_infra/test_ome/test_decorator.py
+++ b/tests/unit/test_infra/test_ome/test_decorator.py
@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import pytest
+
+from everos.infra.ome.context import StrategyContext
+from everos.infra.ome.decorator import StrategyMeta, offline_strategy
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.gates import Counter
+from everos.infra.ome.triggers import Immediate
+
+
+class _E(BaseEvent):
+    user_id: str
+
+
+def test_decorator_attaches_metadata() -> None:
+    @offline_strategy(name="x", trigger=Immediate(on=[_E]), emits=[_E])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        return None
+
+    meta: StrategyMeta = s._ome_strategy_meta  # type: ignore[attr-defined]
+    assert meta.name == "x"
+    assert meta.emits == frozenset({_E})
+    assert meta.gate is None
+    assert meta.applies_to is None
+    assert meta.max_retries is None
+    assert meta.enabled is True
+    assert meta.func is s
+
+
+def test_decorator_with_full_params() -> None:
+    @offline_strategy(
+        name="cluster",
+        trigger=Immediate(on=[_E]),
+        emits=[_E],
+        applies_to="user_id",
+        gate=Counter(threshold=5),
+        max_retries=3,
+        enabled=False,
+    )
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        return None
+
+    meta = s._ome_strategy_meta  # type: ignore[attr-defined]
+    assert meta.applies_to == "user_id"
+    assert meta.gate.threshold == 5
+    assert meta.max_retries == 3
+    assert meta.enabled is False
+
+
+def test_decorator_callable_applies_to() -> None:
+    def is_paid(e: _E) -> bool:
+        return e.user_id.startswith("paid_")
+
+    @offline_strategy(
+        name="paid_only",
+        trigger=Immediate(on=[_E]),
+        emits=[_E],
+        applies_to=is_paid,
+    )
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        return None
+
+    meta = s._ome_strategy_meta  # type: ignore[attr-defined]
+    assert meta.applies_to is is_paid
+
+
+def test_decorator_rejects_blank_name() -> None:
+    with pytest.raises(ValueError):
+
+        @offline_strategy(name="", trigger=Immediate(on=[_E]), emits=[_E])
+        async def _s(event: _E, ctx: StrategyContext) -> None:
+            return None
+
+
+def test_decorator_rejects_non_async_function() -> None:
+    with pytest.raises(TypeError):
+
+        @offline_strategy(name="x", trigger=Immediate(on=[_E]), emits=[_E])
+        def _s(event: _E, ctx: StrategyContext) -> None:  # not async
+            return None
--- a/tests/unit/test_infra/test_ome/test_dispatcher.py
+++ b/tests/unit/test_infra/test_ome/test_dispatcher.py
@ -0,0 +1,215 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from everos.infra.ome._dispatch.dispatcher import EventDispatcher
+from everos.infra.ome._dispatch.registry import StrategyRegistry
+from everos.infra.ome._stores.counter import CounterStore
+from everos.infra.ome._stores.storage import OMEStorage
+from everos.infra.ome.context import StrategyContext
+from everos.infra.ome.decorator import offline_strategy
+from everos.infra.ome.events import BaseEvent, CronTick
+from everos.infra.ome.gates import Counter
+from everos.infra.ome.triggers import Cron, Immediate
+
+
+class _E(BaseEvent):
+    user_id: str
+
+
+def _make_strategy(name: str, **kw):
+    @offline_strategy(name=name, trigger=Immediate(on=[_E]), emits=[], **kw)
+    async def _f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    return _f
+
+
+@pytest.fixture
+async def dispatcher(tmp_path: Path) -> EventDispatcher:
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+    registry = StrategyRegistry()
+    counter = CounterStore(storage=storage)
+    return EventDispatcher(registry=registry, counter_store=counter)
+
+
+@pytest.mark.asyncio
+async def test_dispatch_passes_when_no_gate(
+    dispatcher: EventDispatcher,
+) -> None:
+    dispatcher._registry.register(_make_strategy("s_pass"))
+    routes = await dispatcher.dispatch(_E(user_id="u1"))
+    assert [m.name for m, _ in routes] == ["s_pass"]
+
+
+@pytest.mark.asyncio
+async def test_dispatch_skips_disabled(dispatcher: EventDispatcher) -> None:
+    dispatcher._registry.register(_make_strategy("s_off", enabled=False))
+    routes = await dispatcher.dispatch(_E(user_id="u1"))
+    assert routes == []
+
+
+@pytest.mark.asyncio
+async def test_dispatch_applies_to_string(
+    dispatcher: EventDispatcher,
+) -> None:
+    dispatcher._registry.register(
+        _make_strategy("s", applies_to="user_id"),
+    )
+    routes_empty = await dispatcher.dispatch(_E(user_id=""))
+    routes_set = await dispatcher.dispatch(_E(user_id="u1"))
+    assert routes_empty == []
+    assert len(routes_set) == 1
+
+
+@pytest.mark.asyncio
+async def test_dispatch_applies_to_callable(
+    dispatcher: EventDispatcher,
+) -> None:
+    def is_paid(e: _E) -> bool:
+        return e.user_id.startswith("paid_")
+
+    dispatcher._registry.register(_make_strategy("s", applies_to=is_paid))
+    assert await dispatcher.dispatch(_E(user_id="free_a")) == []
+    assert len(await dispatcher.dispatch(_E(user_id="paid_a"))) == 1
+
+
+@pytest.mark.asyncio
+async def test_dispatch_counter_gate(dispatcher: EventDispatcher) -> None:
+    dispatcher._registry.register(
+        _make_strategy("s", gate=Counter(threshold=3, event_field="user_id"))
+    )
+    for _ in range(2):
+        routes = await dispatcher.dispatch(_E(user_id="u1"))
+        assert routes == []
+    routes = await dispatcher.dispatch(_E(user_id="u1"))
+    assert len(routes) == 1
+
+
+@pytest.mark.asyncio
+async def test_inspect_returns_route_info(
+    dispatcher: EventDispatcher,
+) -> None:
+    dispatcher._registry.register(
+        _make_strategy("s", gate=Counter(threshold=3, event_field="user_id"))
+    )
+    infos = await dispatcher.inspect(_E(user_id="u1"))
+    assert len(infos) == 1
+    assert infos[0].counter_progress == (1, 3)
+    assert infos[0].will_run is False
+
+
+def _make_cron_strategy(name: str):
+    @offline_strategy(name=name, trigger=Cron(expr="0 * * * *"), emits=[])
+    async def _f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    return _f
+
+
+@pytest.mark.asyncio
+async def test_dispatch_routes_engine_tick_to_named_strategy_only(
+    dispatcher: EventDispatcher,
+) -> None:
+    dispatcher._registry.register(_make_cron_strategy("cron_a"))
+    dispatcher._registry.register(_make_cron_strategy("cron_b"))
+    routes = await dispatcher.dispatch(CronTick(strategy_name="cron_a"))
+    assert [m.name for m, _ in routes] == ["cron_a"]
+
+
+@pytest.mark.asyncio
+async def test_inspect_engine_tick_skips_non_target_strategy(
+    dispatcher: EventDispatcher,
+) -> None:
+    dispatcher._registry.register(_make_cron_strategy("cron_a"))
+    dispatcher._registry.register(_make_cron_strategy("cron_b"))
+    infos = await dispatcher.inspect(CronTick(strategy_name="cron_b"))
+    assert [i.strategy_name for i in infos] == ["cron_b"]
+
+
+@pytest.mark.asyncio
+async def test_dispatch_force_enabled_bypasses_enabled_gate(
+    dispatcher: EventDispatcher,
+) -> None:
+    dispatcher._registry.register(_make_strategy("s_off", enabled=False))
+    assert await dispatcher.dispatch(_E(user_id="u1")) == []
+    routes = await dispatcher.dispatch(_E(user_id="u1"), force_enabled=True)
+    assert [m.name for m, _ in routes] == ["s_off"]
+
+
+@pytest.mark.asyncio
+async def test_dispatch_force_enabled_still_applies_applies_to_and_counter(
+    dispatcher: EventDispatcher,
+) -> None:
+    dispatcher._registry.register(
+        _make_strategy(
+            "s",
+            enabled=False,
+            applies_to="user_id",
+            gate=Counter(threshold=2, event_field="user_id"),
+        ),
+    )
+    assert await dispatcher.dispatch(_E(user_id=""), force_enabled=True) == []
+    assert await dispatcher.dispatch(_E(user_id="u1"), force_enabled=True) == []
+    routes = await dispatcher.dispatch(_E(user_id="u1"), force_enabled=True)
+    assert len(routes) == 1
+
+
+@pytest.mark.asyncio
+async def test_dispatch_strategy_filter_scopes_to_single_strategy(
+    dispatcher: EventDispatcher,
+) -> None:
+    dispatcher._registry.register(_make_strategy("s_a"))
+    dispatcher._registry.register(_make_strategy("s_b"))
+    routes = await dispatcher.dispatch(_E(user_id="u1"), strategy_filter="s_a")
+    assert [m.name for m, _ in routes] == ["s_a"]
+
+
+@pytest.mark.asyncio
+async def test_dispatch_strategy_filter_unknown_raises(
+    dispatcher: EventDispatcher,
+) -> None:
+    dispatcher._registry.register(_make_strategy("s_a"))
+    with pytest.raises(KeyError):
+        await dispatcher.dispatch(_E(user_id="u1"), strategy_filter="missing")
+
+
+@pytest.mark.asyncio
+async def test_dispatch_isolates_faulty_applies_to_callable(
+    dispatcher: EventDispatcher,
+) -> None:
+    """A single strategy's buggy ``applies_to`` callable must not tank
+    the fan-out for siblings subscribed to the same event class.
+    """
+
+    def _boom(_e: _E) -> bool:
+        raise RuntimeError("applies_to is buggy")
+
+    dispatcher._registry.register(_make_strategy("s_buggy", applies_to=_boom))
+    dispatcher._registry.register(_make_strategy("s_healthy"))
+
+    routes = await dispatcher.dispatch(_E(user_id="u1"))
+
+    # s_buggy is treated as not-applies; s_healthy still routes.
+    assert [m.name for m, _ in routes] == ["s_healthy"]
+
+
+@pytest.mark.asyncio
+async def test_inspect_isolates_faulty_applies_to_callable(
+    dispatcher: EventDispatcher,
+) -> None:
+    def _boom(_e: _E) -> bool:
+        raise RuntimeError("applies_to is buggy")
+
+    dispatcher._registry.register(_make_strategy("s_buggy", applies_to=_boom))
+    dispatcher._registry.register(_make_strategy("s_healthy"))
+
+    infos = await dispatcher.inspect(_E(user_id="u1"))
+
+    by_name = {i.strategy_name: i for i in infos}
+    assert by_name["s_buggy"].applies_to_pass is False
+    assert by_name["s_healthy"].applies_to_pass is True
--- a/tests/unit/test_infra/test_ome/test_e2e.py
+++ b/tests/unit/test_infra/test_ome/test_e2e.py
@ -0,0 +1,186 @@
+"""End-to-end pipeline test exercising the chain emit semantics.
+
+MemCellSaved -> atomic (leaf strategy)
+EpisodeSaved -> cluster -> ClusteringCompleted -> profile (Counter threshold=3)
+"""
+
+from __future__ import annotations
+
+import asyncio
+
+import pytest
+
+from everos.infra.ome import (
+    BaseEvent,
+    Counter,
+    Cron,
+    CronTick,
+    Immediate,
+    StrategyContext,
+    offline_strategy,
+)
+from everos.infra.ome.engine import _cron_entry
+from everos.infra.ome.testing import StrategyTestHarness
+
+
+class MemCellSaved(BaseEvent):
+    user_id: str
+    cell_id: str
+
+
+class EpisodeSaved(BaseEvent):
+    user_id: str
+    episode_text: str
+
+
+class ClusteringCompleted(BaseEvent):
+    user_id: str
+
+
+@pytest.mark.asyncio
+async def test_chain_emit_without_counter_gate() -> None:
+    """Variant of the full-chain test without a Counter gate.
+
+    Profile fires once per ClusteringCompleted instead of once per N.
+    """
+    log: list[tuple[str, str]] = []
+
+    @offline_strategy(
+        name="cluster_e2e",
+        trigger=Immediate(on=[EpisodeSaved]),
+        emits=[ClusteringCompleted],
+    )
+    async def cluster(event: EpisodeSaved, ctx: StrategyContext) -> None:
+        log.append(("cluster", event.user_id))
+        await ctx.emit(ClusteringCompleted(user_id=event.user_id))
+
+    @offline_strategy(
+        name="profile_e2e",
+        trigger=Immediate(on=[ClusteringCompleted]),
+        emits=[],
+    )
+    async def profile(event: ClusteringCompleted, ctx: StrategyContext) -> None:
+        log.append(("profile", event.user_id))
+
+    async with StrategyTestHarness() as h:
+        h.register(cluster)
+        h.register(profile)
+        await h.start()
+        # Emit 3 episodes -> cluster runs 3x -> emits ClusteringCompleted 3x ->
+        # profile runs 3x (no counter gate).
+        await h.emit(EpisodeSaved(user_id="u1", episode_text="t1"))
+        await asyncio.sleep(0.15)
+        await h.emit(EpisodeSaved(user_id="u1", episode_text="t2"))
+        await asyncio.sleep(0.15)
+        await h.emit(EpisodeSaved(user_id="u1", episode_text="t3"))
+        await asyncio.sleep(0.2)
+        await h.drain(timeout=15)
+
+        cluster_runs = await h.list_runs("cluster_e2e")
+        profile_runs = await h.list_runs("profile_e2e")
+
+    cluster_calls = [c for c in log if c[0] == "cluster"]
+    profile_calls = [c for c in log if c[0] == "profile"]
+    assert len(cluster_calls) == 3, (
+        f"Expected 3 cluster, got {len(cluster_calls)}: {log}"
+    )
+    assert len(profile_calls) == 3, (
+        f"Expected 3 profile, got {len(profile_calls)}: {log}"
+    )
+    assert len(cluster_runs) == 3
+    assert len(profile_runs) == 3
+
+
+@pytest.mark.asyncio
+async def test_chain_pipeline_runs_full_path() -> None:
+    """Full chain with atomic, cluster, and profile (Counter gated)."""
+    log: list[tuple[str, str]] = []
+
+    @offline_strategy(name="atomic_e2e", trigger=Immediate(on=[MemCellSaved]), emits=[])
+    async def atomic(event: MemCellSaved, ctx: StrategyContext) -> None:
+        log.append(("atomic", event.cell_id))
+
+    @offline_strategy(
+        name="cluster_e2e",
+        trigger=Immediate(on=[EpisodeSaved]),
+        emits=[ClusteringCompleted],
+    )
+    async def cluster(event: EpisodeSaved, ctx: StrategyContext) -> None:
+        log.append(("cluster", event.user_id))
+        await ctx.emit(ClusteringCompleted(user_id=event.user_id))
+
+    @offline_strategy(
+        name="profile_e2e",
+        trigger=Immediate(on=[ClusteringCompleted]),
+        emits=[],
+        gate=Counter(threshold=3, event_field="user_id"),
+    )
+    async def profile(event: ClusteringCompleted, ctx: StrategyContext) -> None:
+        log.append(("profile", event.user_id))
+
+    async with StrategyTestHarness() as h:
+        h.register(atomic)
+        h.register(cluster)
+        h.register(profile)
+        await h.start()
+        # Two memcells (each fires atomic).
+        await h.emit(MemCellSaved(user_id="u1", cell_id="c1"))
+        await asyncio.sleep(0.15)
+        await h.emit(MemCellSaved(user_id="u1", cell_id="c2"))
+        await asyncio.sleep(0.15)
+        # Three episodes -> cluster runs 3x -> ClusteringCompleted 3x ->
+        # profile Counter at threshold=3 fires once.
+        await h.emit(EpisodeSaved(user_id="u1", episode_text="t1"))
+        await asyncio.sleep(0.15)
+        await h.emit(EpisodeSaved(user_id="u1", episode_text="t2"))
+        await asyncio.sleep(0.15)
+        await h.emit(EpisodeSaved(user_id="u1", episode_text="t3"))
+        await asyncio.sleep(0.2)
+        await h.drain(timeout=15)
+
+        # Validate using run records
+        atomic_runs = await h.list_runs("atomic_e2e")
+        cluster_runs = await h.list_runs("cluster_e2e")
+        profile_runs = await h.list_runs("profile_e2e")
+
+    atomic_calls = [c for c in log if c[0] == "atomic"]
+    cluster_calls = [c for c in log if c[0] == "cluster"]
+    profile_calls = [c for c in log if c[0] == "profile"]
+    assert len(atomic_calls) == 2, (
+        f"Expected 2 atomic calls, got {len(atomic_calls)}: {log}"
+    )
+    assert len(cluster_calls) == 3, (
+        f"Expected 3 cluster calls, got {len(cluster_calls)}: {log}"
+    )
+    assert len(profile_calls) == 1, (
+        f"Expected 1 profile call, got {len(profile_calls)}: {log}"
+    )
+    assert len(atomic_runs) == 2
+    assert len(cluster_runs) == 3
+    assert len(profile_runs) == 1
+
+
+@pytest.mark.asyncio
+async def test_cron_strategy_executes_when_cron_entry_fires() -> None:
+    """Verify that the cron-trigger code path actually reaches the strategy.
+
+    APScheduler timing is mocked away — we directly call the module-level
+    _cron_entry function that APS would invoke on schedule. This proves
+    the registry/dispatcher/runner chain wires cron strategies correctly.
+    """
+    seen: list[str] = []
+
+    @offline_strategy(name="cron_e2e", trigger=Cron(expr="0 * * * *"), emits=[])
+    async def cron_job(event: CronTick, ctx: StrategyContext) -> None:
+        seen.append(event.strategy_name)
+
+    async with StrategyTestHarness() as h:
+        h.register(cron_job)
+        await h.start()
+        # Directly invoke what APS would call; bypass scheduler timing.
+        await _cron_entry(h._engine._engine_id, "cron_e2e")  # noqa: SLF001
+        await h.drain(timeout=5)
+        runs = await h.list_runs("cron_e2e")
+
+    assert seen == ["cron_e2e"]
+    assert len(runs) == 1
--- a/tests/unit/test_infra/test_ome/test_engine.py
+++ b/tests/unit/test_infra/test_ome/test_engine.py
@ -0,0 +1,623 @@
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from everos.infra.ome.config import OMEConfig
+from everos.infra.ome.context import StrategyContext
+from everos.infra.ome.decorator import offline_strategy
+from everos.infra.ome.engine import OfflineEngine
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.exceptions import (
+    EngineLockHeldError,
+    OMEError,
+    StartupValidationError,
+)
+from everos.infra.ome.records import RunStatus
+from everos.infra.ome.triggers import Cron, Idle, Immediate
+
+
+class _E(BaseEvent):
+    pass
+
+
+class _A(BaseEvent):
+    pass
+
+
+class _B(BaseEvent):
+    pass
+
+
+@pytest.fixture
+def cfg(tmp_path: Path) -> OMEConfig:
+    return OMEConfig(jobstore_path=tmp_path / "ome.db", config_watch=False)
+
+
+@pytest.mark.asyncio
+async def test_engine_register_and_start(cfg: OMEConfig) -> None:
+    @offline_strategy(name="s", trigger=Immediate(on=[_E]), emits=[])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        return None
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    await engine.start()
+    await engine.stop()
+
+
+@pytest.mark.asyncio
+async def test_engine_register_after_start_raises(cfg: OMEConfig) -> None:
+    engine = OfflineEngine(config=cfg)
+    await engine.start()
+    try:
+
+        @offline_strategy(name="s", trigger=Immediate(on=[_E]), emits=[])
+        async def s(event: _E, ctx: StrategyContext) -> None:
+            return None
+
+        with pytest.raises(OMEError):
+            engine.register(s)
+    finally:
+        await engine.stop()
+
+
+@pytest.mark.asyncio
+async def test_engine_lock_prevents_double_open(cfg: OMEConfig) -> None:
+    engine1 = OfflineEngine(config=cfg)
+    await engine1.start()
+    try:
+        engine2 = OfflineEngine(config=cfg)
+        with pytest.raises(EngineLockHeldError):
+            await engine2.start()
+    finally:
+        await engine1.stop()
+
+
+@pytest.mark.asyncio
+async def test_engine_validates_dag_at_start(tmp_path: Path) -> None:
+    cfg = OMEConfig(jobstore_path=tmp_path / "ome.db", config_watch=False)
+
+    @offline_strategy(name="s1", trigger=Immediate(on=[_A]), emits=[_B])
+    async def _s1(e: Any, ctx: StrategyContext) -> None:
+        return None
+
+    @offline_strategy(name="s2", trigger=Immediate(on=[_B]), emits=[_A])
+    async def _s2(e: Any, ctx: StrategyContext) -> None:
+        return None
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(_s1)
+    engine.register(_s2)
+    with pytest.raises(StartupValidationError, match=r"(?i)cycle"):
+        await engine.start()
+
+
+@pytest.mark.asyncio
+async def test_engine_emit_drives_strategy(cfg: OMEConfig) -> None:
+    seen: list[_E] = []
+
+    @offline_strategy(name="collector", trigger=Immediate(on=[_E]), emits=[])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        seen.append(event)
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    await engine.start()
+    try:
+        await engine.emit(_E())
+        # Poll because APScheduler offers no completion signal; retry up to ~2.5s.
+        for _ in range(50):
+            if seen:
+                break
+            await asyncio.sleep(0.05)
+    finally:
+        await engine.stop()
+    assert len(seen) == 1
+
+
+@pytest.mark.asyncio
+async def test_engine_chain_emit_through_ctx(cfg: OMEConfig) -> None:
+    seen_b: list = []
+
+    @offline_strategy(name="a_to_b", trigger=Immediate(on=[_A]), emits=[_B])
+    async def s_a(event: _A, ctx: StrategyContext) -> None:
+        await ctx.emit(_B())
+
+    @offline_strategy(name="b_collector", trigger=Immediate(on=[_B]), emits=[])
+    async def s_b(event: _B, ctx: StrategyContext) -> None:
+        seen_b.append(event)
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s_a)
+    engine.register(s_b)
+    await engine.start()
+    try:
+        await engine.emit(_A())
+        for _ in range(50):
+            if seen_b:
+                break
+            await asyncio.sleep(0.05)
+    finally:
+        await engine.stop()
+    assert len(seen_b) == 1
+
+
+@pytest.mark.asyncio
+async def test_strategy_calling_engine_emit_directly_is_rejected(
+    cfg: OMEConfig,
+) -> None:
+    """Strategy code must emit follow-up events through ctx.emit.
+
+    Calling engine.emit from inside a strategy raises
+    EngineCallFromStrategyError (a StrategyContractError) so Runner
+    short-circuits the retry budget and dead-letters on the very first
+    attempt — re-running the same buggy code can't fix a programming bug.
+    """
+    engine = OfflineEngine(config=cfg)
+
+    @offline_strategy(name="bad", trigger=Immediate(on=[_A]), emits=[_B])
+    async def bad_strategy(event: _A, ctx: StrategyContext) -> None:
+        # Captured engine reference is the common, intended pattern for
+        # external triggers; using it from INSIDE a strategy is the
+        # convention violation we want to catch.
+        await engine.emit(_B())
+
+    engine.register(bad_strategy)
+    await engine.start()
+    try:
+        await engine.emit(_A())
+        for _ in range(50):
+            runs = await engine.list_runs("bad")
+            if runs and runs[0].status == RunStatus.DEAD_LETTER:
+                break
+            await asyncio.sleep(0.05)
+        runs = await engine.list_runs("bad")
+    finally:
+        await engine.stop()
+
+    assert runs, "expected at least one run record"
+    # Permanent error → exactly one attempt, no retry.
+    assert len(runs) == 1
+    final = runs[0]
+    assert final.status == RunStatus.DEAD_LETTER
+    assert "EngineCallFromStrategyError" in (final.error or "")
+    assert "emit" in (final.error or "")
+
+
+# Module-level singleton — proxies the "strategy reads engine via
+# globals/DI/import" pattern. Guard is contextvars-based so it catches
+# this path identically to the closure case.
+_MODULE_ENGINE: OfflineEngine | None = None
+
+
+@pytest.mark.asyncio
+async def test_strategy_reaching_engine_via_module_global_is_rejected(
+    cfg: OMEConfig,
+) -> None:
+    """The guard is contextvars-based: it doesn't matter how the strategy
+    got the engine reference (closure, module singleton, DI container).
+    """
+    global _MODULE_ENGINE
+    _MODULE_ENGINE = OfflineEngine(config=cfg)
+
+    @offline_strategy(name="bad_global", trigger=Immediate(on=[_A]), emits=[_B])
+    async def bad_strategy(event: _A, ctx: StrategyContext) -> None:
+        assert _MODULE_ENGINE is not None
+        await _MODULE_ENGINE.emit(_B())
+
+    _MODULE_ENGINE.register(bad_strategy)
+    await _MODULE_ENGINE.start()
+    try:
+        await _MODULE_ENGINE.emit(_A())
+        for _ in range(50):
+            runs = await _MODULE_ENGINE.list_runs("bad_global")
+            if runs and runs[0].status == RunStatus.DEAD_LETTER:
+                break
+            await asyncio.sleep(0.05)
+        runs = await _MODULE_ENGINE.list_runs("bad_global")
+    finally:
+        await _MODULE_ENGINE.stop()
+        _MODULE_ENGINE = None
+
+    assert len(runs) == 1
+    assert runs[0].status == RunStatus.DEAD_LETTER
+    assert "EngineCallFromStrategyError" in (runs[0].error or "")
+
+
+@pytest.mark.asyncio
+async def test_strategy_calling_other_engine_methods_is_rejected(
+    cfg: OMEConfig,
+) -> None:
+    """The guard covers every public engine method, not just emit —
+    strategies must interact with the engine only via (event, ctx).
+    """
+    engine = OfflineEngine(config=cfg)
+
+    @offline_strategy(name="bad_lookup", trigger=Immediate(on=[_A]), emits=[])
+    async def bad_strategy(event: _A, ctx: StrategyContext) -> None:
+        # trigger_manual is another public engine method that strategies
+        # must not call directly.
+        await engine.trigger_manual("bad_lookup")
+
+    engine.register(bad_strategy)
+    await engine.start()
+    try:
+        await engine.emit(_A())
+        for _ in range(50):
+            runs = await engine.list_runs("bad_lookup")
+            if runs and runs[0].status == RunStatus.DEAD_LETTER:
+                break
+            await asyncio.sleep(0.05)
+        runs = await engine.list_runs("bad_lookup")
+    finally:
+        await engine.stop()
+
+    assert len(runs) == 1
+    assert runs[0].status == RunStatus.DEAD_LETTER
+    assert "EngineCallFromStrategyError" in (runs[0].error or "")
+    assert "trigger_manual" in (runs[0].error or "")
+
+
+@pytest.mark.asyncio
+async def test_trigger_manual_with_default_event_uses_manual_tick(
+    cfg: OMEConfig,
+) -> None:
+    seen: list = []
+
+    from everos.infra.ome.events import ManualTick
+
+    @offline_strategy(
+        name="manual_only",
+        trigger=Immediate(on=[ManualTick]),
+        emits=[],
+    )
+    async def s(event: ManualTick, ctx: StrategyContext) -> None:
+        seen.append(event)
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    await engine.start()
+    try:
+        await engine.trigger_manual("manual_only")
+        for _ in range(50):
+            if seen:
+                break
+            await asyncio.sleep(0.05)
+    finally:
+        await engine.stop()
+    assert len(seen) == 1
+
+
+@pytest.mark.asyncio
+async def test_trigger_manual_force_bypasses_enabled(
+    cfg: OMEConfig,
+) -> None:
+    seen: list = []
+    from everos.infra.ome.events import ManualTick
+
+    @offline_strategy(
+        name="off",
+        trigger=Immediate(on=[ManualTick]),
+        emits=[],
+        enabled=False,
+    )
+    async def s(event: ManualTick, ctx: StrategyContext) -> None:
+        seen.append(event)
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    await engine.start()
+    try:
+        await engine.trigger_manual("off", force=True)
+        for _ in range(50):
+            if seen:
+                break
+            await asyncio.sleep(0.05)
+    finally:
+        await engine.stop()
+    assert len(seen) == 1
+
+
+@pytest.mark.asyncio
+async def test_on_dead_letter_callback_invoked(cfg: OMEConfig) -> None:
+    calls: list = []
+
+    @offline_strategy(
+        name="bad_dl", trigger=Immediate(on=[_E]), emits=[], max_retries=0
+    )
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        raise RuntimeError("always-fail")
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    engine.on_dead_letter(lambda rec: calls.append(rec.run_id))
+    await engine.start()
+    try:
+        await engine.emit(_E())
+        for _ in range(50):
+            if calls:
+                break
+            await asyncio.sleep(0.05)
+    finally:
+        await engine.stop()
+    assert len(calls) == 1
+
+
+@pytest.mark.asyncio
+async def test_inspect_dispatch_returns_routes(cfg: OMEConfig) -> None:
+    @offline_strategy(name="s_t24a", trigger=Immediate(on=[_E]), emits=[])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        return None
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    await engine.start()
+    try:
+        infos = await engine.inspect_dispatch(_E())
+        assert len(infos) == 1
+        assert infos[0].will_run is True
+    finally:
+        await engine.stop()
+
+
+@pytest.mark.asyncio
+async def test_get_run_status_and_list(cfg: OMEConfig) -> None:
+    @offline_strategy(name="s_t24b", trigger=Immediate(on=[_E]), emits=[])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        return None
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    await engine.start()
+    try:
+        await engine.emit(_E())
+        # Poll because APScheduler offers no completion signal; up to ~2.5s.
+        for _ in range(50):
+            runs = await engine.list_runs("s_t24b")
+            if runs and runs[0].status.value == "success":
+                break
+            await asyncio.sleep(0.05)
+        runs = await engine.list_runs("s_t24b")
+        assert len(runs) == 1
+        rec = await engine.get_run_status(runs[0].run_id)
+        assert rec is not None
+        assert rec.status.value == "success"
+    finally:
+        await engine.stop()
+
+
+class _EventWithUid(BaseEvent):
+    user_id: str
+
+
+@pytest.mark.asyncio
+async def test_engine_reschedule_cron_job_updates_aps(cfg: OMEConfig) -> None:
+    @offline_strategy(name="cron_s", trigger=Cron(expr="0 3 * * *"), emits=[])
+    async def s(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    await engine.start()
+    try:
+        from apscheduler.triggers.cron import CronTrigger
+
+        engine.reschedule_cron_job("cron_s", "*/5 * * * *")
+
+        job = engine._scheduler.get_job("cron::cron_s")
+        assert isinstance(job.trigger, CronTrigger)
+        # CronTrigger stores parsed crontab fields; minute step=5 means "*/5".
+        minute_field = next(f for f in job.trigger.fields if f.name == "minute")
+        assert str(minute_field) == "*/5"
+    finally:
+        await engine.stop()
+
+
+@pytest.mark.asyncio
+async def test_engine_reschedule_idle_job_updates_interval(cfg: OMEConfig) -> None:
+    @offline_strategy(
+        name="idle_s",
+        trigger=Idle(
+            on=[_EventWithUid],
+            event_field="user_id",
+            idle_seconds=60,
+            scan_interval_seconds=30,
+        ),
+        emits=[],
+    )
+    async def s(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    await engine.start()
+    try:
+        engine.reschedule_idle_job("idle_s", scan_interval_seconds=10)
+        job = engine._scheduler.get_job("idle::idle_s")
+        # IntervalTrigger.interval is a timedelta.
+        assert job.trigger.interval.total_seconds() == 10
+    finally:
+        await engine.stop()
+
+
+def test_reschedule_cron_job_before_start_raises(cfg: OMEConfig) -> None:
+    engine = OfflineEngine(config=cfg)
+    with pytest.raises(OMEError, match="engine not started"):
+        engine.reschedule_cron_job("x", "* * * * *")
+
+
+def test_reschedule_idle_job_before_start_raises(cfg: OMEConfig) -> None:
+    engine = OfflineEngine(config=cfg)
+    with pytest.raises(OMEError, match="engine not started"):
+        engine.reschedule_idle_job("x", scan_interval_seconds=30)
+
+
+@pytest.mark.asyncio
+async def test_start_failure_cleans_up_engines_and_scheduler(
+    cfg: OMEConfig, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """A failure between scheduler start and ``_started = True`` must roll
+    back: pop from the module-level ``_ENGINES`` registry, shut the
+    scheduler thread down, and release the lock so a fresh ``OfflineEngine``
+    can start on the same jobstore.
+    """
+    from everos.infra.ome import engine as engine_mod
+
+    async def _boom(*args: Any, **kwargs: Any) -> None:
+        raise RuntimeError("crash recovery exploded")
+
+    monkeypatch.setattr(engine_mod, "scan_and_resume", _boom)
+
+    engine = OfflineEngine(config=cfg)
+    with pytest.raises(RuntimeError, match="crash recovery exploded"):
+        await engine.start()
+
+    assert engine._engine_id not in engine_mod._ENGINES
+    assert engine._scheduler is None
+    assert engine._started is False
+    assert engine._lock_handle is None
+
+    monkeypatch.undo()
+    engine2 = OfflineEngine(config=cfg)
+    await engine2.start()
+    await engine2.stop()
+
+
+# ── active_runs / wait_idle ────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_wait_idle_returns_true_when_no_runs(cfg: OMEConfig) -> None:
+    """Pre-emit idle: counter starts at 0, idle_event starts set."""
+    engine = OfflineEngine(config=cfg)
+    await engine.start()
+    try:
+        assert engine._active_runs == 0
+        assert await engine.wait_idle(timeout=0.5) is True
+    finally:
+        await engine.stop()
+
+
+@pytest.mark.asyncio
+async def test_wait_idle_blocks_until_strategy_finishes(cfg: OMEConfig) -> None:
+    """A strategy mid-flight keeps active_runs > 0 and idle_event clear
+    until it completes."""
+    release = asyncio.Event()
+    entered = asyncio.Event()
+
+    @offline_strategy(name="slow", trigger=Immediate(on=[_E]), emits=[])
+    async def slow(event: _E, ctx: StrategyContext) -> None:
+        entered.set()
+        await release.wait()
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(slow)
+    await engine.start()
+    try:
+        await engine.emit(_E())
+        await asyncio.wait_for(entered.wait(), timeout=2.0)
+        # Strategy is now mid-flight.
+        assert engine._active_runs >= 1
+        assert await engine.wait_idle(timeout=0.2) is False
+        # Release the strategy and verify wait_idle resolves.
+        release.set()
+        assert await engine.wait_idle(timeout=2.0) is True
+        assert engine._active_runs == 0
+    finally:
+        release.set()
+        await engine.stop()
+
+
+@pytest.mark.asyncio
+async def test_stop_waits_for_in_flight_run_to_complete(cfg: OMEConfig) -> None:
+    """stop() must not cancel in-flight strategies. Pre-fix this used
+    scheduler.shutdown(wait=True) which APS 3.x AsyncIOExecutor cancels
+    silently; post-fix stop() drains through wait_idle first.
+    """
+    completed: list[str] = []
+    started = asyncio.Event()
+    release = asyncio.Event()
+
+    @offline_strategy(name="slow_to_finish", trigger=Immediate(on=[_E]), emits=[])
+    async def slow(event: _E, ctx: StrategyContext) -> None:
+        started.set()
+        await release.wait()
+        completed.append("done")
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(slow)
+    await engine.start()
+    await engine.emit(_E())
+    await asyncio.wait_for(started.wait(), timeout=2.0)
+
+    # Stop concurrently with the in-flight strategy; release it after a
+    # tick so stop() has to actually wait.
+    stop_task = asyncio.create_task(engine.stop())
+    await asyncio.sleep(0.05)
+    assert not stop_task.done()
+    release.set()
+    await asyncio.wait_for(stop_task, timeout=5.0)
+    assert completed == ["done"]
+
+
+@pytest.mark.asyncio
+async def test_active_runs_decrements_on_strategy_exception(cfg: OMEConfig) -> None:
+    """A strategy that raises (and exhausts retries → DEAD_LETTER) must
+    still release its counter — dispatch_run's finally guarantees -1.
+    """
+
+    @offline_strategy(name="boom", trigger=Immediate(on=[_E]), emits=[])
+    async def boom(event: _E, ctx: StrategyContext) -> None:
+        raise RuntimeError("strategy boom")
+
+    cfg2 = OMEConfig(
+        jobstore_path=cfg.jobstore_path,
+        config_watch=False,
+        max_retries=0,
+    )
+    engine = OfflineEngine(config=cfg2)
+    engine.register(boom)
+    await engine.start()
+    try:
+        await engine.emit(_E())
+        assert await engine.wait_idle(timeout=2.0) is True
+        runs = await engine.list_runs("boom")
+        assert len(runs) == 1
+        assert runs[0].status == RunStatus.DEAD_LETTER
+        assert engine._active_runs == 0
+    finally:
+        await engine.stop()
+
+
+@pytest.mark.asyncio
+async def test_enqueue_run_rolls_back_counter_on_add_job_failure(
+    cfg: OMEConfig, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """If APScheduler ``add_job`` raises, the matching dispatch_run never
+    runs — _enqueue_run must roll back the pre-emptive +1 itself.
+    """
+
+    @offline_strategy(name="s", trigger=Immediate(on=[_E]), emits=[])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        return None
+
+    engine = OfflineEngine(config=cfg)
+    engine.register(s)
+    await engine.start()
+    try:
+
+        def _boom(*args: Any, **kwargs: Any) -> None:
+            raise RuntimeError("add_job exploded")
+
+        monkeypatch.setattr(engine._scheduler, "add_job", _boom)
+        with pytest.raises(RuntimeError, match="add_job exploded"):
+            await engine.emit(_E())
+        assert engine._active_runs == 0
+        assert await engine.wait_idle(timeout=0.5) is True
+    finally:
+        monkeypatch.undo()
+        await engine.stop()
--- a/tests/unit/test_infra/test_ome/test_events.py
+++ b/tests/unit/test_infra/test_ome/test_events.py
@ -0,0 +1,49 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+from everos.infra.ome.events import BaseEvent, CronTick, IdleTick, ManualTick
+
+
+class _DemoEvent(BaseEvent):
+    payload: str
+
+
+def test_base_event_auto_generates_id_and_ts() -> None:
+    e = _DemoEvent(payload="x")
+    assert isinstance(e.event_id, str)
+    assert len(e.event_id) >= 32
+    assert isinstance(e.ts, datetime)
+    assert e.ts.tzinfo is not None
+
+
+def test_base_event_is_frozen() -> None:
+    e = _DemoEvent(payload="x")
+    try:
+        e.payload = "y"  # type: ignore[misc]
+    except Exception:
+        return
+    raise AssertionError("BaseEvent should be frozen")
+
+
+def test_base_event_round_trips_json() -> None:
+    e = _DemoEvent(payload="hello")
+    blob = e.model_dump_json()
+    restored = _DemoEvent.model_validate_json(blob)
+    assert restored == e
+
+
+def test_cron_tick_carries_strategy_name() -> None:
+    e = CronTick(strategy_name="profile_extraction")
+    assert e.strategy_name == "profile_extraction"
+
+
+def test_idle_tick_carries_bucket_and_seconds() -> None:
+    e = IdleTick(strategy_name="agent_skill", bucket_key="user_42", idle_seconds=900)
+    assert e.bucket_key == "user_42"
+    assert e.idle_seconds == 900
+
+
+def test_manual_tick_carries_strategy_name() -> None:
+    e = ManualTick(strategy_name="cluster_memcells")
+    assert e.strategy_name == "cluster_memcells"
--- a/tests/unit/test_infra/test_ome/test_exceptions.py
+++ b/tests/unit/test_infra/test_ome/test_exceptions.py
@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.exceptions import (
+    EmitNotDeclaredError,
+    OMEError,
+    StartupValidationError,
+)
+
+
+class _UnknownEvent(BaseEvent):
+    pass
+
+
+def test_ome_error_is_base_exception() -> None:
+    assert issubclass(OMEError, Exception)
+
+
+def test_startup_validation_error_inherits_ome_error() -> None:
+    assert issubclass(StartupValidationError, OMEError)
+
+
+def test_emit_not_declared_error_inherits_ome_error() -> None:
+    assert issubclass(EmitNotDeclaredError, OMEError)
+
+
+def test_emit_not_declared_carries_strategy_and_event() -> None:
+    ev = _UnknownEvent()
+    err = EmitNotDeclaredError(strategy="cluster_memcells", event=ev)
+    assert err.strategy == "cluster_memcells"
+    assert err.event is ev
+    assert "_UnknownEvent" in str(err)
+    assert "cluster_memcells" in str(err)
+
+
+def test_startup_validation_carries_message() -> None:
+    err = StartupValidationError("missing trigger.on")
+    assert "missing trigger.on" in str(err)
--- a/tests/unit/test_infra/test_ome/test_gates.py
+++ b/tests/unit/test_infra/test_ome/test_gates.py
@ -0,0 +1,29 @@
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from everos.infra.ome.gates import Counter
+
+
+def test_counter_accepts_threshold() -> None:
+    c = Counter(threshold=5)
+    assert c.threshold == 5
+    assert c.cooldown_seconds == 0
+    assert c.event_field is None
+
+
+def test_counter_with_bucket_field() -> None:
+    c = Counter(threshold=5, event_field="user_id", cooldown_seconds=120)
+    assert c.event_field == "user_id"
+    assert c.cooldown_seconds == 120
+
+
+def test_counter_rejects_zero_threshold() -> None:
+    with pytest.raises(ValidationError):
+        Counter(threshold=0)
+
+
+def test_counter_rejects_negative_cooldown() -> None:
+    with pytest.raises(ValidationError):
+        Counter(threshold=5, cooldown_seconds=-1)
--- a/tests/unit/test_infra/test_ome/test_idle_scanner.py
+++ b/tests/unit/test_infra/test_ome/test_idle_scanner.py
@ -0,0 +1,109 @@
+from __future__ import annotations
+
+from datetime import UTC, datetime, timedelta
+from pathlib import Path
+
+import pytest
+
+from everos.infra.ome._background.idle_scanner import IdleScanner
+from everos.infra.ome._stores.idle import IdleStore
+from everos.infra.ome._stores.storage import OMEStorage
+from everos.infra.ome.events import BaseEvent, IdleTick
+from everos.infra.ome.triggers import Idle
+
+
+class _M(BaseEvent):
+    user_id: str = "u1"
+
+
+@pytest.mark.asyncio
+async def test_scan_once_emits_idle_ticks(tmp_path: Path) -> None:
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+    idle_store = IdleStore(storage=storage)
+    now = datetime.now(UTC)
+    await idle_store.touch("s", "u_old", at=now - timedelta(seconds=2000))
+    await idle_store.touch("s", "u_fresh", at=now)
+
+    emitted: list[IdleTick] = []
+
+    async def emit(e: BaseEvent) -> None:
+        if isinstance(e, IdleTick):
+            emitted.append(e)
+
+    trigger = Idle(on=[_M], event_field="user_id", idle_seconds=900)
+    scanner = IdleScanner(
+        strategy_name="s",
+        trigger=trigger,
+        idle_store=idle_store,
+        emit=emit,
+    )
+    await scanner.scan_once(now=now)
+    assert {e.bucket_key for e in emitted} == {"u_old"}
+    assert all(e.strategy_name == "s" for e in emitted)
+
+
+@pytest.mark.asyncio
+async def test_scan_once_with_now_none_uses_current_time(tmp_path: Path) -> None:
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+    idle_store = IdleStore(storage=storage)
+    now = datetime.now(UTC)
+    # Insert bucket with activity timestamp older than the threshold
+    await idle_store.touch("s", "u_overdue", at=now - timedelta(seconds=2000))
+
+    emitted: list[IdleTick] = []
+
+    async def emit(e: BaseEvent) -> None:
+        if isinstance(e, IdleTick):
+            emitted.append(e)
+
+    trigger = Idle(on=[_M], event_field="user_id", idle_seconds=900)
+    scanner = IdleScanner(
+        strategy_name="s",
+        trigger=trigger,
+        idle_store=idle_store,
+        emit=emit,
+    )
+    # Call scan_once with no now= argument; should use current time internally
+    await scanner.scan_once()
+    # Should emit idle tick for overdue bucket
+    assert len(emitted) >= 1
+    assert any(e.bucket_key == "u_overdue" for e in emitted)
+    assert all(e.strategy_name == "s" for e in emitted)
+
+
+@pytest.mark.asyncio
+async def test_scan_once_isolates_failing_emit(tmp_path: Path) -> None:
+    """A single bucket's emit failure must not abort the rest of the
+    scan. Mirrors dispatcher's _safe_applies isolation: one transient
+    downstream error shouldn't drop sibling IdleTicks for this round.
+    """
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+    idle_store = IdleStore(storage=storage)
+    now = datetime.now(UTC)
+    # Three overdue buckets — middle one's emit will raise.
+    for bucket in ("u_a", "u_boom", "u_c"):
+        await idle_store.touch("s", bucket, at=now - timedelta(seconds=2000))
+
+    emitted: list[str] = []
+
+    async def emit(e: BaseEvent) -> None:
+        if isinstance(e, IdleTick):
+            if e.bucket_key == "u_boom":
+                raise RuntimeError("downstream dispatch transient error")
+            emitted.append(e.bucket_key)
+
+    trigger = Idle(on=[_M], event_field="user_id", idle_seconds=900)
+    scanner = IdleScanner(
+        strategy_name="s",
+        trigger=trigger,
+        idle_store=idle_store,
+        emit=emit,
+    )
+    # Must NOT raise; emit failure for u_boom is swallowed + logged.
+    await scanner.scan_once(now=now)
+
+    # Both sibling buckets still received their IdleTick.
+    assert sorted(emitted) == ["u_a", "u_c"]
--- a/tests/unit/test_infra/test_ome/test_idle_store.py
+++ b/tests/unit/test_infra/test_ome/test_idle_store.py
@ -0,0 +1,95 @@
+from __future__ import annotations
+
+from datetime import timedelta
+from pathlib import Path
+
+import pytest
+
+from everos.component.utils.datetime import get_now_with_timezone
+from everos.infra.ome._stores.idle import IdleStore
+from everos.infra.ome._stores.storage import OMEStorage
+
+
+@pytest.fixture
+async def store(tmp_path: Path) -> IdleStore:
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+    return IdleStore(storage=storage)
+
+
+@pytest.mark.asyncio
+async def test_touch_records_activity(store: IdleStore) -> None:
+    now = get_now_with_timezone()
+    await store.touch("s", "u1", at=now)
+    last = await store.get_last_activity("s", "u1")
+    assert last == now
+
+
+@pytest.mark.asyncio
+async def test_scan_idle_returns_overdue(store: IdleStore) -> None:
+    now = get_now_with_timezone()
+    old = now - timedelta(seconds=1000)
+    fresh = now - timedelta(seconds=100)
+
+    await store.touch("s", "u_old", at=old)
+    await store.touch("s", "u_fresh", at=fresh)
+
+    overdue = await store.scan_idle("s", idle_seconds=900, now=now)
+    assert overdue == ["u_old"]
+
+
+@pytest.mark.asyncio
+async def test_scan_idle_empty_when_none_overdue(store: IdleStore) -> None:
+    now = get_now_with_timezone()
+    await store.touch("s", "u1", at=now)
+    overdue = await store.scan_idle("s", idle_seconds=900, now=now)
+    assert overdue == []
+
+
+@pytest.mark.asyncio
+async def test_touch_updates_existing_row(store: IdleStore) -> None:
+    early = get_now_with_timezone() - timedelta(seconds=500)
+    late = get_now_with_timezone()
+    await store.touch("s", "u1", at=early)
+    await store.touch("s", "u1", at=late)
+    assert await store.get_last_activity("s", "u1") == late
+
+
+@pytest.mark.asyncio
+async def test_scan_idle_returns_buckets_oldest_first(store: IdleStore) -> None:
+    """``scan_idle`` must return buckets in ascending ``last_activity_ts``
+    order so IdleTick emission order is reproducible across SQLite versions
+    and query plans.
+    """
+    now = get_now_with_timezone()
+    await store.touch("s", "u_mid", at=now - timedelta(seconds=1500))
+    await store.touch("s", "u_oldest", at=now - timedelta(seconds=2000))
+    await store.touch("s", "u_newest", at=now - timedelta(seconds=1100))
+
+    overdue = await store.scan_idle("s", idle_seconds=900, now=now)
+    assert overdue == ["u_oldest", "u_mid", "u_newest"]
+
+
+@pytest.mark.asyncio
+async def test_scan_idle_uses_composite_index(tmp_path: Path) -> None:
+    """``scan_idle``'s SQL must keep ``last_activity_ts`` un-wrapped so
+    the ``(strategy_name, last_activity_ts)`` index is honoured. Verify
+    via EXPLAIN QUERY PLAN — if a future refactor wraps the column in a
+    function/CAST again, this test fails immediately instead of waiting
+    for a perf regression in production.
+    """
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+
+    cutoff_iso = "2026-05-13T00:00:00+00:00"
+    async with storage.connect() as conn:
+        cur = await conn.execute(
+            "EXPLAIN QUERY PLAN "
+            "SELECT bucket_key FROM idle_store "
+            "WHERE strategy_name = ? AND last_activity_ts <= ?",
+            ("s", cutoff_iso),
+        )
+        rows = await cur.fetchall()
+
+    plan = " ".join(str(r) for r in rows)
+    assert "idx_idle_scan" in plan, f"expected idx_idle_scan in plan, got: {plan}"
--- a/tests/unit/test_infra/test_ome/test_records.py
+++ b/tests/unit/test_infra/test_ome/test_records.py
@ -0,0 +1,177 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any
+
+import pytest
+from pydantic import ValidationError
+
+from everos.component.utils.datetime import get_now_with_timezone
+from everos.infra.ome.records import RunRecord, RunStatus, StrategyRouteInfo
+
+
+def _ok_kwargs(**overrides: Any) -> dict[str, Any]:
+    """Build a baseline-valid RunRecord kwargs dict."""
+    base: dict[str, Any] = {
+        "run_id": "r1",
+        "strategy_name": "s",
+        "status": RunStatus.RUNNING,
+        "attempt": 0,
+        "started_at": get_now_with_timezone(),
+        "event_topic": "x:Y",
+        "event_payload": "{}",
+        "max_retries_snapshot": 1,
+    }
+    base.update(overrides)
+    return base
+
+
+def test_run_status_values() -> None:
+    assert RunStatus.RUNNING.value == "running"
+    assert RunStatus.SUCCESS.value == "success"
+    assert RunStatus.FAILED.value == "failed"
+    assert RunStatus.DEAD_LETTER.value == "dead_letter"
+    assert RunStatus.CRASHED.value == "crashed"
+
+
+def test_run_record_minimal() -> None:
+    rec = RunRecord(
+        run_id="r1",
+        strategy_name="cluster",
+        status=RunStatus.RUNNING,
+        attempt=0,
+        started_at=get_now_with_timezone(),
+        event_topic="my_app.events:EpisodeSaved",
+        event_payload="{}",
+        max_retries_snapshot=1,
+    )
+    assert rec.finished_at is None
+    assert rec.error is None
+
+
+def test_run_record_round_trips_json() -> None:
+    rec = RunRecord(
+        run_id="r1",
+        strategy_name="cluster",
+        status=RunStatus.SUCCESS,
+        attempt=0,
+        started_at=get_now_with_timezone(),
+        finished_at=get_now_with_timezone(),
+        event_topic="x:Y",
+        event_payload='{"a":1}',
+        max_retries_snapshot=1,
+    )
+    blob = rec.model_dump_json()
+    restored = RunRecord.model_validate_json(blob)
+    assert restored == rec
+
+
+def test_strategy_route_info() -> None:
+    info = StrategyRouteInfo(
+        strategy_name="profile_extraction",
+        enabled_pass=True,
+        applies_to_pass=True,
+        counter_pass=False,
+        counter_progress=(3, 5),
+    )
+    assert info.will_run is False
+    assert info.counter_progress == (3, 5)
+
+
+# ---------------------------------------------------------------------------
+# Constraint enforcement: every Field(...) / validator must actually reject
+# the bad input it claims to reject. Add a test per declared constraint so
+# accidental relaxation in the future fails CI.
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "field",
+    ["run_id", "strategy_name", "event_topic", "event_payload"],
+)
+def test_run_record_rejects_empty_identifier(field: str) -> None:
+    with pytest.raises(ValidationError, match=field):
+        RunRecord(**_ok_kwargs(**{field: ""}))
+
+
+@pytest.mark.parametrize("field", ["attempt", "max_retries_snapshot"])
+def test_run_record_rejects_negative_int(field: str) -> None:
+    with pytest.raises(ValidationError, match=field):
+        RunRecord(**_ok_kwargs(**{field: -1}))
+
+
+def test_run_record_rejects_naive_started_at() -> None:
+    naive = datetime(2026, 5, 12, 12, 0, 0)  # noqa: DTZ001 — deliberately naive
+    with pytest.raises(ValidationError, match="started_at"):
+        RunRecord(**_ok_kwargs(started_at=naive))
+
+
+def test_run_record_rejects_empty_error_when_set() -> None:
+    """error=None is allowed; error="" is not (min_length=1)."""
+    with pytest.raises(ValidationError, match="error"):
+        RunRecord(
+            **_ok_kwargs(
+                status=RunStatus.FAILED,
+                finished_at=get_now_with_timezone(),
+                error="",
+            )
+        )
+
+
+# Status-invariant violations: each must be rejected by _check_status_invariants.
+
+
+def test_running_must_have_no_finished_at() -> None:
+    with pytest.raises(ValidationError, match=r"RUNNING.*finished_at"):
+        RunRecord(
+            **_ok_kwargs(status=RunStatus.RUNNING, finished_at=get_now_with_timezone())
+        )
+
+
+def test_running_must_have_no_error() -> None:
+    with pytest.raises(ValidationError, match=r"RUNNING.*error"):
+        RunRecord(**_ok_kwargs(status=RunStatus.RUNNING, error="boom"))
+
+
+def test_success_must_have_finished_at() -> None:
+    with pytest.raises(ValidationError, match=r"success.*finished_at"):
+        RunRecord(**_ok_kwargs(status=RunStatus.SUCCESS))
+
+
+def test_success_must_have_no_error() -> None:
+    with pytest.raises(ValidationError, match=r"SUCCESS.*error"):
+        RunRecord(
+            **_ok_kwargs(
+                status=RunStatus.SUCCESS,
+                finished_at=get_now_with_timezone(),
+                error="should not be here",
+            )
+        )
+
+
+@pytest.mark.parametrize(
+    "status",
+    [RunStatus.FAILED, RunStatus.DEAD_LETTER, RunStatus.CRASHED],
+)
+def test_terminal_failure_must_have_finished_at(status: RunStatus) -> None:
+    with pytest.raises(ValidationError, match="finished_at"):
+        RunRecord(**_ok_kwargs(status=status, error="boom"))
+
+
+@pytest.mark.parametrize(
+    "status",
+    [RunStatus.FAILED, RunStatus.DEAD_LETTER, RunStatus.CRASHED],
+)
+def test_terminal_failure_must_have_error(status: RunStatus) -> None:
+    with pytest.raises(ValidationError, match="error"):
+        RunRecord(**_ok_kwargs(status=status, finished_at=get_now_with_timezone()))
+
+
+def test_strategy_route_info_rejects_empty_strategy_name() -> None:
+    with pytest.raises(ValidationError, match="strategy_name"):
+        StrategyRouteInfo(
+            strategy_name="",
+            enabled_pass=True,
+            applies_to_pass=True,
+            counter_pass=True,
+        )
--- a/tests/unit/test_infra/test_ome/test_registry.py
+++ b/tests/unit/test_infra/test_ome/test_registry.py
@ -0,0 +1,262 @@
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from everos.infra.ome._dispatch.registry import StrategyRegistry
+from everos.infra.ome.context import StrategyContext
+from everos.infra.ome.decorator import offline_strategy
+from everos.infra.ome.events import BaseEvent, CronTick, IdleTick, ManualTick
+from everos.infra.ome.exceptions import StartupValidationError
+from everos.infra.ome.triggers import Cron, Idle, Immediate
+
+
+class _A(BaseEvent):
+    pass
+
+
+class _B(BaseEvent):
+    pass
+
+
+def _make(
+    name: str,
+    on: list[type[BaseEvent]],
+    emits: list[type[BaseEvent]],
+):
+    @offline_strategy(name=name, trigger=Immediate(on=on), emits=emits)
+    async def _f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    return _f
+
+
+def test_register_extracts_meta() -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s1", [_A], [_B]))
+    assert reg.get("s1").name == "s1"
+
+
+def test_register_duplicate_raises() -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s1", [_A], [_B]))
+    with pytest.raises(StartupValidationError):
+        reg.register(_make("s1", [_A], [_B]))
+
+
+def test_register_non_decorated_raises() -> None:
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg = StrategyRegistry()
+    with pytest.raises(StartupValidationError):
+        reg.register(f)
+
+
+def test_replace_swaps_meta_in_place() -> None:
+    from dataclasses import replace
+
+    reg = StrategyRegistry()
+    reg.register(_make("s1", [_A], [_B]))
+    original = reg.get("s1")
+    new_meta = replace(original, max_retries=99)
+
+    reg.replace("s1", new_meta)
+
+    assert reg.get("s1").max_retries == 99
+    assert reg.get("s1") is new_meta
+
+
+def test_replace_unknown_strategy_raises() -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s1", [_A], [_B]))
+    existing = reg.get("s1")
+    with pytest.raises(KeyError):
+        reg.replace("missing", existing)
+
+
+def test_lookup_by_event() -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s_a", [_A], []))
+    reg.register(_make("s_b", [_B], []))
+    metas = reg.lookup_by_event(_A)
+    assert {m.name for m in metas} == {"s_a"}
+
+
+def test_validate_detects_cycle() -> None:
+    # s1 emits _B, listens _A;  s2 emits _A, listens _B  -> cycle
+    reg = StrategyRegistry()
+    reg.register(_make("s1", [_A], [_B]))
+    reg.register(_make("s2", [_B], [_A]))
+    with pytest.raises(StartupValidationError, match=r"(?i)cycle"):
+        reg.validate()
+
+
+def test_validate_passes_dag() -> None:
+    reg = StrategyRegistry()
+    reg.register(_make("s1", [_A], [_B]))
+    reg.register(_make("s2", [_B], []))
+    reg.validate()  # must not raise
+
+
+def test_lookup_by_event_finds_cron_strategy_for_cron_tick() -> None:
+    reg = StrategyRegistry()
+
+    @offline_strategy(name="cron_s", trigger=Cron(expr="0 * * * *"), emits=[])
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg.register(f)
+    metas = reg.lookup_by_event(CronTick)
+    assert [m.name for m in metas] == ["cron_s"]
+
+
+def test_lookup_by_event_finds_idle_strategy_for_idle_tick() -> None:
+    reg = StrategyRegistry()
+
+    @offline_strategy(
+        name="idle_s",
+        trigger=Idle(on=[_A], event_field="event_id", idle_seconds=900),
+        emits=[],
+    )
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg.register(f)
+    metas = reg.lookup_by_event(IdleTick)
+    assert [m.name for m in metas] == ["idle_s"]
+
+
+def test_lookup_by_event_returns_empty_for_manual_tick() -> None:
+    reg = StrategyRegistry()
+
+    @offline_strategy(name="manual_s", trigger=Immediate(on=[_A]), emits=[])
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg.register(f)
+    metas = reg.lookup_by_event(ManualTick)
+    assert metas == []
+
+
+class _EventWithUid(BaseEvent):
+    user_id: str
+
+
+class _EventWithoutUid(BaseEvent):
+    other: str
+
+
+def test_validate_passes_when_gate_event_field_present() -> None:
+    from everos.infra.ome.gates import Counter
+
+    reg = StrategyRegistry()
+
+    @offline_strategy(
+        name="s",
+        trigger=Immediate(on=[_EventWithUid]),
+        emits=[],
+        gate=Counter(threshold=3, event_field="user_id"),
+    )
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg.register(f)
+    reg.validate()  # must not raise
+
+
+def test_validate_raises_when_gate_event_field_missing_on_immediate() -> None:
+    from everos.infra.ome.gates import Counter
+
+    reg = StrategyRegistry()
+
+    @offline_strategy(
+        name="s",
+        trigger=Immediate(on=[_EventWithoutUid]),
+        emits=[],
+        gate=Counter(threshold=3, event_field="user_id"),
+    )
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg.register(f)
+    with pytest.raises(StartupValidationError) as exc:
+        reg.validate()
+    msg = str(exc.value)
+    assert "user_id" in msg
+    assert "_EventWithoutUid" in msg
+    assert "s" in msg
+
+
+def test_validate_raises_when_gate_event_field_missing_in_one_of_multiple() -> None:
+    from everos.infra.ome.gates import Counter
+
+    reg = StrategyRegistry()
+
+    @offline_strategy(
+        name="s",
+        trigger=Immediate(on=[_EventWithUid, _EventWithoutUid]),
+        emits=[],
+        gate=Counter(threshold=3, event_field="user_id"),
+    )
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg.register(f)
+    with pytest.raises(StartupValidationError):
+        reg.validate()
+
+
+def test_validate_passes_when_gate_event_field_is_none() -> None:
+    """gate.event_field=None means global bucket; no field-existence check."""
+    from everos.infra.ome.gates import Counter
+
+    reg = StrategyRegistry()
+
+    @offline_strategy(
+        name="s",
+        trigger=Immediate(on=[_EventWithoutUid]),
+        emits=[],
+        gate=Counter(threshold=3),  # event_field defaults to None
+    )
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg.register(f)
+    reg.validate()  # must not raise
+
+
+def test_validate_passes_when_no_gate() -> None:
+    reg = StrategyRegistry()
+
+    @offline_strategy(
+        name="s",
+        trigger=Immediate(on=[_EventWithoutUid]),
+        emits=[],
+    )
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg.register(f)
+    reg.validate()  # must not raise
+
+
+def test_validate_raises_when_gate_event_field_missing_on_cron_tick() -> None:
+    """Cron strategy: gate.event_field must exist on CronTick."""
+    from everos.infra.ome.gates import Counter
+
+    reg = StrategyRegistry()
+
+    @offline_strategy(
+        name="cron_s",
+        trigger=Cron(expr="0 3 * * *"),
+        emits=[],
+        gate=Counter(threshold=3, event_field="user_id"),  # not in CronTick
+    )
+    async def f(event: Any, ctx: StrategyContext) -> None:
+        return None
+
+    reg.register(f)
+    with pytest.raises(StartupValidationError):
+        reg.validate()
--- a/tests/unit/test_infra/test_ome/test_run_record_store.py
+++ b/tests/unit/test_infra/test_ome/test_run_record_store.py
@ -0,0 +1,144 @@
+"""Tests for RunRecordStore persistence layer."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from everos.component.utils.datetime import get_now_with_timezone
+from everos.infra.ome._stores.run_record import RunRecordStore
+from everos.infra.ome._stores.storage import OMEStorage
+from everos.infra.ome.records import RunStatus
+
+
+@pytest.fixture
+async def store(tmp_path: Path) -> RunRecordStore:
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+    return RunRecordStore(storage=storage, max_records_per_strategy=3)
+
+
+@pytest.mark.asyncio
+async def test_mark_running_inserts_row(store: RunRecordStore) -> None:
+    await store.mark_running(
+        run_id="r1",
+        strategy_name="s",
+        attempt=0,
+        event_topic="x:Y",
+        event_payload="{}",
+        max_retries_snapshot=1,
+    )
+    rec = await store.get("r1")
+    assert rec is not None
+    assert rec.status == RunStatus.RUNNING
+
+
+@pytest.mark.asyncio
+async def test_mark_success_updates_row(store: RunRecordStore) -> None:
+    await store.mark_running(
+        run_id="r1",
+        strategy_name="s",
+        attempt=0,
+        event_topic="x:Y",
+        event_payload="{}",
+        max_retries_snapshot=1,
+    )
+    await store.mark_success(run_id="r1", finished_at=get_now_with_timezone())
+    rec = await store.get("r1")
+    assert rec.status == RunStatus.SUCCESS
+    assert rec.finished_at is not None
+
+
+@pytest.mark.asyncio
+async def test_mark_failed_records_error(store: RunRecordStore) -> None:
+    await store.mark_running(
+        run_id="r1",
+        strategy_name="s",
+        attempt=0,
+        event_topic="x:Y",
+        event_payload="{}",
+        max_retries_snapshot=1,
+    )
+    await store.mark_failed(
+        run_id="r1", finished_at=get_now_with_timezone(), error="boom"
+    )
+    rec = await store.get("r1")
+    assert rec.status == RunStatus.FAILED
+    assert rec.error == "boom"
+
+
+@pytest.mark.asyncio
+async def test_mark_dead_letter(store: RunRecordStore) -> None:
+    await store.mark_running(
+        run_id="r1",
+        strategy_name="s",
+        attempt=2,
+        event_topic="x:Y",
+        event_payload="{}",
+        max_retries_snapshot=2,
+    )
+    await store.mark_dead_letter(
+        run_id="r1", finished_at=get_now_with_timezone(), error="exhausted"
+    )
+    rec = await store.get("r1")
+    assert rec.status == RunStatus.DEAD_LETTER
+
+
+@pytest.mark.asyncio
+async def test_ring_buffer_caps_strategy_records(store: RunRecordStore) -> None:
+    """Trim runs inside the same transaction as each ``mark_running``
+    insert; the per-strategy row count never exceeds the cap.
+    """
+    for i in range(5):
+        await store.mark_running(
+            run_id=f"r{i}",
+            strategy_name="s",
+            attempt=0,
+            event_topic="x:Y",
+            event_payload="{}",
+            max_retries_snapshot=1,
+        )
+        listed = await store.list_runs(strategy_name="s")
+        assert len(listed) <= 3  # never transiently above cap
+
+    listed = await store.list_runs(strategy_name="s")
+    assert [r.run_id for r in listed] == ["r4", "r3", "r2"]  # newest 3
+
+
+@pytest.mark.asyncio
+async def test_list_runs_filters_by_status(store: RunRecordStore) -> None:
+    await store.mark_running(
+        run_id="r1",
+        strategy_name="s",
+        attempt=0,
+        event_topic="x:Y",
+        event_payload="{}",
+        max_retries_snapshot=1,
+    )
+    await store.mark_success(run_id="r1", finished_at=get_now_with_timezone())
+    await store.mark_running(
+        run_id="r2",
+        strategy_name="s",
+        attempt=0,
+        event_topic="x:Y",
+        event_payload="{}",
+        max_retries_snapshot=1,
+    )
+    success_runs = await store.list_runs(strategy_name="s", status=RunStatus.SUCCESS)
+    assert [r.run_id for r in success_runs] == ["r1"]
+
+
+@pytest.mark.asyncio
+async def test_find_running_for_crash_recovery(store: RunRecordStore) -> None:
+    await store.mark_running(
+        run_id="r1",
+        strategy_name="s",
+        attempt=0,
+        event_topic="x:Y",
+        event_payload="{}",
+        max_retries_snapshot=1,
+    )
+    running = await store.find_running()
+    assert len(running) == 1
+    assert running[0].run_id == "r1"
--- a/tests/unit/test_infra/test_ome/test_runner.py
+++ b/tests/unit/test_infra/test_ome/test_runner.py
@ -0,0 +1,223 @@
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+
+import pytest
+
+from everos.infra.ome._dispatch.runner import Runner
+from everos.infra.ome._stores.run_record import RunRecordStore
+from everos.infra.ome._stores.storage import OMEStorage
+from everos.infra.ome.context import StrategyContext
+from everos.infra.ome.decorator import offline_strategy
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.records import RunStatus
+from everos.infra.ome.triggers import Immediate
+
+
+class _E(BaseEvent):
+    user_id: str = "u1"
+
+
+@pytest.fixture
+async def setup(tmp_path: Path):
+    storage = OMEStorage(db_path=tmp_path / "ome.db")
+    await storage.init()
+    rec_store = RunRecordStore(storage=storage, max_records_per_strategy=1000)
+    sem = asyncio.Semaphore(20)
+    return rec_store, sem
+
+
+@pytest.mark.asyncio
+async def test_runner_success_marks_record(setup) -> None:
+    rec_store, sem = setup
+
+    @offline_strategy(name="ok", trigger=Immediate(on=[_E]), emits=[])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        return None
+
+    runner = Runner(
+        run_record_store=rec_store,
+        engine_sem=sem,
+        emit_hook=_no_emit,
+    )
+    await runner.run(
+        s._ome_strategy_meta,
+        _E(),
+        run_id="r1",
+        max_retries_snapshot=1,
+    )
+
+    rec = await rec_store.get("r1")
+    assert rec.status == RunStatus.SUCCESS
+
+
+@pytest.mark.asyncio
+async def test_runner_retries_on_failure(setup) -> None:
+    rec_store, sem = setup
+    calls = {"n": 0}
+
+    @offline_strategy(
+        name="flaky",
+        trigger=Immediate(on=[_E]),
+        emits=[],
+        max_retries=2,
+    )
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        calls["n"] += 1
+        if calls["n"] < 3:
+            raise RuntimeError("boom")
+
+    runner = Runner(
+        run_record_store=rec_store,
+        engine_sem=sem,
+        emit_hook=_no_emit,
+    )
+    await runner.run(
+        s._ome_strategy_meta,
+        _E(),
+        run_id="r1",
+        max_retries_snapshot=2,
+    )
+    assert calls["n"] == 3
+    # Final successful attempt 2 has a new run_id (not "r1");
+    # find by status=SUCCESS, strategy_name=flaky
+    success_runs = await rec_store.list_runs(
+        strategy_name="flaky",
+        status=RunStatus.SUCCESS,
+    )
+    assert len(success_runs) == 1
+    assert success_runs[0].attempt == 2
+
+
+@pytest.mark.asyncio
+async def test_runner_dead_letter_after_exhaust(setup) -> None:
+    rec_store, sem = setup
+
+    @offline_strategy(
+        name="bad",
+        trigger=Immediate(on=[_E]),
+        emits=[],
+        max_retries=1,
+    )
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        raise RuntimeError("always-fail")
+
+    dl_calls: list = []
+
+    runner = Runner(
+        run_record_store=rec_store,
+        engine_sem=sem,
+        emit_hook=_no_emit,
+        on_dead_letter=lambda r: dl_calls.append(r),
+    )
+    await runner.run(
+        s._ome_strategy_meta,
+        _E(),
+        run_id="r1",
+        max_retries_snapshot=1,
+    )
+    dead_runs = await rec_store.list_runs(
+        strategy_name="bad",
+        status=RunStatus.DEAD_LETTER,
+    )
+    assert len(dead_runs) == 1
+    assert len(dl_calls) == 1
+
+
+@pytest.mark.asyncio
+async def test_runner_emit_must_be_declared(setup) -> None:
+    rec_store, sem = setup
+
+    class _Other(BaseEvent):
+        pass
+
+    @offline_strategy(
+        name="emit_undeclared",
+        trigger=Immediate(on=[_E]),
+        emits=[],
+    )
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        await ctx.emit(_Other())  # not declared
+
+    runner = Runner(
+        run_record_store=rec_store,
+        engine_sem=sem,
+        emit_hook=_no_emit,
+    )
+    await runner.run(
+        s._ome_strategy_meta,
+        _E(),
+        run_id="r1",
+        max_retries_snapshot=0,
+    )
+    rec = await rec_store.get("r1")
+    assert rec.status == RunStatus.DEAD_LETTER
+    assert "EmitNotDeclaredError" in (rec.error or "")
+
+
+@pytest.mark.asyncio
+async def test_runner_negative_max_retries_raises(setup) -> None:
+    """``max_retries_snapshot < 0`` is an internal-bug condition (Pydantic
+    constrains the user-supplied source to ``>= 0``), so the framework
+    fails fast rather than silently no-op the run.
+    """
+    rec_store, sem = setup
+
+    @offline_strategy(name="ok", trigger=Immediate(on=[_E]), emits=[])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        return None
+
+    runner = Runner(
+        run_record_store=rec_store,
+        engine_sem=sem,
+        emit_hook=_no_emit,
+    )
+    with pytest.raises(ValueError, match=r"max_retries_snapshot must be >= 0"):
+        await runner.run(
+            s._ome_strategy_meta,
+            _E(),
+            run_id="r1",
+            max_retries_snapshot=-1,
+        )
+
+
+@pytest.mark.asyncio
+async def test_runner_aborts_silently_when_mark_running_fails(
+    setup, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """When persistence itself fails before the strategy is invoked,
+    the run must exit cleanly (no exception escaping the framework) and
+    the strategy body must NOT execute — no RUNNING row exists for
+    crash recovery to pick up, so re-execution via recovery is
+    impossible. The emergency log is the only audit trail.
+    """
+    rec_store, sem = setup
+    called = {"n": 0}
+
+    @offline_strategy(name="ok", trigger=Immediate(on=[_E]), emits=[])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        called["n"] += 1
+
+    async def _boom(**_: object) -> None:
+        raise RuntimeError("disk_full")
+
+    monkeypatch.setattr(rec_store, "mark_running", _boom)
+
+    runner = Runner(
+        run_record_store=rec_store,
+        engine_sem=sem,
+        emit_hook=_no_emit,
+    )
+    # Must NOT raise; the framework swallows + logs.
+    await runner.run(
+        s._ome_strategy_meta,
+        _E(),
+        run_id="r1",
+        max_retries_snapshot=1,
+    )
+    assert called["n"] == 0
+
+
+async def _no_emit(event: BaseEvent) -> None:
+    return None
--- a/tests/unit/test_infra/test_ome/test_storage.py
+++ b/tests/unit/test_infra/test_ome/test_storage.py
@ -0,0 +1,144 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import aiosqlite
+import pytest
+
+from everos.infra.ome._stores.storage import OMEStorage
+
+
+@pytest.mark.asyncio
+async def test_storage_creates_db_and_tables(tmp_path: Path) -> None:
+    db = tmp_path / "ome.db"
+    storage = OMEStorage(db_path=db)
+    await storage.init()
+
+    assert db.exists()
+    async with aiosqlite.connect(db) as conn:
+        cur = await conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
+        )
+        names = {row[0] for row in await cur.fetchall()}
+    assert {"counter_store", "idle_store", "run_record"}.issubset(names)
+
+
+@pytest.mark.asyncio
+async def test_storage_applies_pragmas(tmp_path: Path) -> None:
+    db = tmp_path / "ome.db"
+    storage = OMEStorage(db_path=db)
+    await storage.init()
+
+    async with aiosqlite.connect(db) as conn:
+        cur = await conn.execute("PRAGMA journal_mode")
+        mode = (await cur.fetchone())[0]
+    assert mode == "wal"
+
+
+@pytest.mark.asyncio
+async def test_storage_init_is_idempotent(tmp_path: Path) -> None:
+    db = tmp_path / "ome.db"
+    storage = OMEStorage(db_path=db)
+    await storage.init()
+    await storage.init()  # second call must not raise
+
+
+@pytest.mark.asyncio
+async def test_storage_creates_parent_dir(tmp_path: Path) -> None:
+    db = tmp_path / "nested" / "dir" / "ome.db"
+    storage = OMEStorage(db_path=db)
+    await storage.init()
+    assert db.exists()
+
+
+@pytest.mark.asyncio
+async def test_storage_connect_applies_per_connection_pragmas(
+    tmp_path: Path,
+) -> None:
+    """``synchronous`` and ``cache_size`` are per-connection PRAGMAs:
+    SQLite resets them to defaults on every new connection. The
+    ``OMEStorage.connect`` wrapper must re-apply them or the module
+    docstring's promise is silently broken.
+    """
+    db = tmp_path / "ome.db"
+    storage = OMEStorage(db_path=db)
+    await storage.init()
+
+    async with storage.connect() as conn:
+        sync_row = await (await conn.execute("PRAGMA synchronous")).fetchone()
+        cache_row = await (await conn.execute("PRAGMA cache_size")).fetchone()
+        busy_row = await (await conn.execute("PRAGMA busy_timeout")).fetchone()
+
+    # synchronous: 0=OFF, 1=NORMAL, 2=FULL, 3=EXTRA
+    assert sync_row[0] == 1
+    # cache_size: negative value is "kibibytes of memory"
+    assert cache_row[0] == -65536
+    # busy_timeout: ms before SQLITE_BUSY is raised on contended writes
+    assert busy_row[0] == 5000
+
+
+@pytest.mark.asyncio
+async def test_storage_raw_aiosqlite_connect_does_not_carry_per_conn_pragmas(
+    tmp_path: Path,
+) -> None:
+    """Sanity check that documents why :meth:`OMEStorage.connect` exists:
+    opening the same db with raw ``aiosqlite.connect`` yields a connection
+    where ``synchronous`` is at SQLite's default (FULL=2), not NORMAL.
+    """
+    db = tmp_path / "ome.db"
+    storage = OMEStorage(db_path=db)
+    await storage.init()
+
+    async with aiosqlite.connect(db) as conn:
+        sync_row = await (await conn.execute("PRAGMA synchronous")).fetchone()
+
+    assert sync_row[0] == 2  # default FULL — confirms scope is per-connection
+
+
+@pytest.mark.asyncio
+async def test_storage_transaction_commits_on_success(tmp_path: Path) -> None:
+    db = tmp_path / "ome.db"
+    storage = OMEStorage(db_path=db)
+    await storage.init()
+
+    async with storage.transaction() as conn:
+        await conn.execute(
+            "INSERT INTO counter_store (strategy_name, bucket_key, counter) "
+            "VALUES (?, ?, ?)",
+            ("s", "u1", 42),
+        )
+
+    async with storage.connect() as conn:
+        cur = await conn.execute(
+            "SELECT counter FROM counter_store WHERE strategy_name=? AND bucket_key=?",
+            ("s", "u1"),
+        )
+        row = await cur.fetchone()
+    assert row is not None and row[0] == 42
+
+
+@pytest.mark.asyncio
+async def test_storage_transaction_rolls_back_on_exception(tmp_path: Path) -> None:
+    db = tmp_path / "ome.db"
+    storage = OMEStorage(db_path=db)
+    await storage.init()
+
+    class _BoomError(Exception):
+        pass
+
+    with pytest.raises(_BoomError):
+        async with storage.transaction() as conn:
+            await conn.execute(
+                "INSERT INTO counter_store (strategy_name, bucket_key, counter) "
+                "VALUES (?, ?, ?)",
+                ("s", "u1", 42),
+            )
+            raise _BoomError
+
+    async with storage.connect() as conn:
+        cur = await conn.execute(
+            "SELECT counter FROM counter_store WHERE strategy_name=? AND bucket_key=?",
+            ("s", "u1"),
+        )
+        row = await cur.fetchone()
+    assert row is None
--- a/tests/unit/test_infra/test_ome/test_testing_harness.py
+++ b/tests/unit/test_infra/test_ome/test_testing_harness.py
@ -0,0 +1,44 @@
+"""Tests for OME testing helpers (FakeStrategyContext + StrategyTestHarness)."""
+
+from __future__ import annotations
+
+import pytest
+
+from everos.infra.ome.context import StrategyContext
+from everos.infra.ome.decorator import offline_strategy
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.testing import FakeStrategyContext, StrategyTestHarness
+from everos.infra.ome.triggers import Immediate
+
+
+class _E(BaseEvent):
+    """Simple test event."""
+
+    pass
+
+
+@pytest.mark.asyncio
+async def test_fake_strategy_context_collects_emits() -> None:
+    """FakeStrategyContext should collect emit() calls into a list."""
+    ctx = FakeStrategyContext()
+    await ctx.emit(_E())
+    assert len(ctx.emitted) == 1
+
+
+@pytest.mark.asyncio
+async def test_harness_runs_strategy_end_to_end() -> None:
+    """StrategyTestHarness should execute a strategy end-to-end."""
+    seen: list[BaseEvent] = []
+
+    @offline_strategy(name="s_t23", trigger=Immediate(on=[_E]), emits=[])
+    async def s(event: _E, ctx: StrategyContext) -> None:
+        seen.append(event)
+
+    async with StrategyTestHarness() as h:
+        h.register(s)
+        await h.start()
+        await h.emit(_E())
+        await h.drain(timeout=5)
+        runs = await h.list_runs("s_t23")
+        assert len(runs) == 1
+        assert len(seen) == 1
--- a/tests/unit/test_infra/test_ome/test_triggers.py
+++ b/tests/unit/test_infra/test_ome/test_triggers.py
@ -0,0 +1,150 @@
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from everos.infra.ome.events import BaseEvent
+from everos.infra.ome.triggers import Cron, Idle, Immediate
+
+
+class _A(BaseEvent):
+    pass
+
+
+class _B(BaseEvent):
+    pass
+
+
+class _EventWithUserId(BaseEvent):
+    user_id: str
+
+
+def test_immediate_accepts_event_classes() -> None:
+    t = Immediate(on=[_A, _B])
+    assert t.on == [_A, _B]
+
+
+def test_immediate_rejects_empty_on() -> None:
+    with pytest.raises(ValidationError):
+        Immediate(on=[])
+
+
+def test_cron_accepts_expression() -> None:
+    t = Cron(expr="0 3 * * *")
+    assert t.expr == "0 3 * * *"
+
+
+def test_cron_rejects_blank() -> None:
+    with pytest.raises(ValidationError):
+        Cron(expr="")
+
+
+def test_idle_defaults_scan_interval() -> None:
+    t = Idle(on=[_EventWithUserId], event_field="user_id", idle_seconds=900)
+    assert t.scan_interval_seconds == 60
+
+
+def test_idle_rejects_negative_idle_seconds() -> None:
+    with pytest.raises(ValidationError):
+        Idle(on=[_EventWithUserId], event_field="user_id", idle_seconds=-1)
+
+
+def test_cron_accepts_valid_crontab() -> None:
+    t = Cron(expr="0 3 * * *")
+    assert t.expr == "0 3 * * *"
+
+
+def test_cron_rejects_invalid_crontab() -> None:
+    with pytest.raises(ValidationError) as exc:
+        Cron(expr="not a cron")
+    assert "expr" in str(exc.value)
+
+
+def test_cron_rejects_out_of_range_field() -> None:
+    # APS raises on out-of-range fields (e.g. minute=60)
+    with pytest.raises(ValidationError):
+        Cron(expr="60 0 * * *")
+
+
+def test_idle_accepts_existing_event_field() -> None:
+    t = Idle(
+        on=[_EventWithUserId],
+        event_field="user_id",
+        idle_seconds=30,
+        scan_interval_seconds=10,
+    )
+    assert t.event_field == "user_id"
+
+
+def test_idle_rejects_missing_event_field() -> None:
+    with pytest.raises(ValidationError) as exc:
+        Idle(on=[_EventWithUserId], event_field="bad_name", idle_seconds=30)
+    msg = str(exc.value)
+    assert "bad_name" in msg
+    assert "user_id" in msg
+
+
+def test_idle_validator_runs_on_model_validate() -> None:
+    base = Idle(
+        on=[_EventWithUserId],
+        event_field="user_id",
+        idle_seconds=30,
+        scan_interval_seconds=10,
+    )
+    with pytest.raises(ValidationError):
+        Idle.model_validate({**base.model_dump(), "event_field": "nope"})
+
+
+class _AnotherEventWithUserId(BaseEvent):
+    user_id: str
+
+
+class _EventWithoutUserId(BaseEvent):
+    other: str
+
+
+def test_idle_accepts_multiple_event_classes() -> None:
+    t = Idle(
+        on=[_EventWithUserId, _AnotherEventWithUserId],
+        event_field="user_id",
+        idle_seconds=30,
+        scan_interval_seconds=10,
+    )
+    assert t.on == [_EventWithUserId, _AnotherEventWithUserId]
+
+
+def test_idle_rejects_event_field_missing_in_any_class() -> None:
+    with pytest.raises(ValidationError) as exc:
+        Idle(
+            on=[_EventWithUserId, _EventWithoutUserId],
+            event_field="user_id",
+            idle_seconds=30,
+            scan_interval_seconds=10,
+        )
+    msg = str(exc.value)
+    assert "user_id" in msg
+    assert "_EventWithoutUserId" in msg
+
+
+def test_idle_rejects_scan_interval_exceeding_half_idle() -> None:
+    """The Idle docstring promises scan cadence <= idle_seconds // 2 so the
+    scanner has at least two chances to observe an idle bucket before its
+    silence window expires."""
+    with pytest.raises(ValidationError, match="scan_interval_seconds"):
+        Idle(
+            on=[_EventWithUserId],
+            event_field="user_id",
+            idle_seconds=30,
+            scan_interval_seconds=20,
+        )
+
+
+def test_idle_accepts_scan_interval_at_half_idle() -> None:
+    """Boundary: scan_interval_seconds == idle_seconds // 2 is accepted."""
+    t = Idle(
+        on=[_EventWithUserId],
+        event_field="user_id",
+        idle_seconds=60,
+        scan_interval_seconds=30,
+    )
+    assert t.scan_interval_seconds == 30
--- a/tests/unit/test_infra/test_sqlite/init.py
+++ b/tests/unit/test_infra/test_sqlite/init.py
--- a/tests/unit/test_infra/test_sqlite/test_repos/init.py
+++ b/tests/unit/test_infra/test_sqlite/test_repos/init.py
--- a/tests/unit/test_infra/test_sqlite/test_repos/test_cluster.py
+++ b/tests/unit/test_infra/test_sqlite/test_repos/test_cluster.py
@ -0,0 +1,298 @@
+"""Tests for :class:`_ClusterRepo` — cluster + cluster_member persistence.
+
+Verifies the boundary translations between the algo value object
+(:class:`everalgo.clustering.Cluster`) and the two-table storage shape:
+
+- centroid ``np.ndarray`` ↔ raw ``bytes``,
+- ``last_ts`` int ms-epoch stored verbatim (no datetime conversion),
+- ``preview`` ``list[str]`` ↔ JSON,
+- ``members`` ``list[str]`` ↔ ``cluster_member`` rows (forward + reverse).
+
+The repo is the only path that touches the storage; downstream cluster
+strategies must always see a fully-hydrated :class:`AlgoCluster` on read.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+from everalgo.clustering import Cluster as AlgoCluster
+from sqlmodel import SQLModel
+
+from everos.config import SqliteSettings
+from everos.core.persistence import (
+    MemoryRoot,
+    create_session_factory,
+    create_system_engine,
+)
+from everos.infra.persistence.sqlite.repos.cluster import (
+    _ClusterRepo,
+    mint_cluster_id,
+)
+
+
+@pytest.fixture
+async def repo(tmp_path: Path) -> _ClusterRepo:
+    mr = MemoryRoot(tmp_path)
+    mr.ensure()
+    engine = create_system_engine(mr.system_db, SqliteSettings())
+    factory = create_session_factory(engine)
+    async with engine.begin() as conn:
+        await conn.run_sync(SQLModel.metadata.create_all)
+    return _ClusterRepo(session_factory=factory)
+
+
+def _make_cluster(
+    *,
+    cluster_id: str,
+    centroid_vals: list[float],
+    members: list[str],
+    last_ts_ms: int = 1_700_000_000_000,
+    count: int = 1,
+    preview: list[str] | None = None,
+) -> AlgoCluster:
+    return AlgoCluster(
+        id=cluster_id,
+        centroid=np.array(centroid_vals, dtype=np.float32),
+        count=count,
+        last_ts=last_ts_ms,
+        preview=preview or [],
+        members=members,
+    )
+
+
+def test_mint_cluster_id_shape() -> None:
+    cid = mint_cluster_id()
+    assert cid.startswith("cl_")
+    assert len(cid) == 3 + 12  # ``cl_`` + 12 hex chars
+
+
+# ── round-trip ─────────────────────────────────────────────────────────
+
+
+async def test_upsert_then_list_round_trips_full_algo_cluster(
+    repo: _ClusterRepo,
+) -> None:
+    """Insert → list — every algo field survives storage."""
+    cluster = _make_cluster(
+        cluster_id="cl_aaa000000001",
+        centroid_vals=[0.25, -0.5, 0.75],
+        members=["mc_one", "mc_two"],
+        last_ts_ms=1_700_000_001_500,
+        count=2,
+        preview=["alice likes hiking", "alice plans tokyo"],
+    )
+    await repo.upsert_with_members(
+        cluster,
+        owner_id="u_alice",
+        owner_type="user",
+        kind="user_memory",
+        member_type="memcell",
+    )
+
+    rows = await repo.list_for_owner("u_alice", "user_memory")
+    assert len(rows) == 1
+    got = rows[0]
+    assert got.id == "cl_aaa000000001"
+    assert got.count == 2
+    assert got.last_ts == 1_700_000_001_500
+    assert got.preview == ["alice likes hiking", "alice plans tokyo"]
+    assert got.members == ["mc_one", "mc_two"]
+    np.testing.assert_allclose(
+        np.asarray(got.centroid),
+        np.array([0.25, -0.5, 0.75], dtype=np.float32),
+    )
+
+
+async def test_list_for_owner_isolates_by_owner_and_kind(
+    repo: _ClusterRepo,
+) -> None:
+    """Different owner_id or different kind = separate buckets."""
+    alice = _make_cluster(
+        cluster_id="cl_alice00000001",
+        centroid_vals=[1.0, 0.0],
+        members=["mc_a"],
+    )
+    bob = _make_cluster(
+        cluster_id="cl_bob0000000001",
+        centroid_vals=[0.0, 1.0],
+        members=["mc_b"],
+    )
+    agent_case = _make_cluster(
+        cluster_id="cl_case0000001",
+        centroid_vals=[0.5, 0.5],
+        members=["ac_20260517_0001"],
+    )
+    await repo.upsert_with_members(
+        alice,
+        owner_id="u_alice",
+        owner_type="user",
+        kind="user_memory",
+        member_type="memcell",
+    )
+    await repo.upsert_with_members(
+        bob,
+        owner_id="u_bob",
+        owner_type="user",
+        kind="user_memory",
+        member_type="memcell",
+    )
+    await repo.upsert_with_members(
+        agent_case,
+        owner_id="agent_42",
+        owner_type="agent",
+        kind="agent_case",
+        member_type="case",
+    )
+
+    alice_rows = await repo.list_for_owner("u_alice", "user_memory")
+    bob_rows = await repo.list_for_owner("u_bob", "user_memory")
+    agent_rows = await repo.list_for_owner("agent_42", "agent_case")
+    assert [r.id for r in alice_rows] == ["cl_alice00000001"]
+    assert [r.id for r in bob_rows] == ["cl_bob0000000001"]
+    assert [r.id for r in agent_rows] == ["cl_case0000001"]
+
+
+# ── upsert (idempotency + members merge) ────────────────────────────────
+
+
+async def test_upsert_appends_new_members_and_overwrites_scalar_fields(
+    repo: _ClusterRepo,
+) -> None:
+    """A second upsert with new members appends; centroid / count / preview replace."""
+    initial = _make_cluster(
+        cluster_id="cl_xxxxxxxxxxx1",
+        centroid_vals=[1.0, 0.0],
+        members=["mc_one"],
+        count=1,
+        preview=["first sample"],
+    )
+    await repo.upsert_with_members(
+        initial,
+        owner_id="u_alice",
+        owner_type="user",
+        kind="user_memory",
+        member_type="memcell",
+    )
+
+    # Merge: same cluster_id, count up, member list grew, centroid shifted.
+    updated = _make_cluster(
+        cluster_id="cl_xxxxxxxxxxx1",
+        centroid_vals=[0.5, 0.5],
+        members=["mc_one", "mc_two"],
+        count=2,
+        preview=["first sample", "second sample"],
+        last_ts_ms=1_700_000_002_000,
+    )
+    await repo.upsert_with_members(
+        updated,
+        owner_id="u_alice",
+        owner_type="user",
+        kind="user_memory",
+        member_type="memcell",
+    )
+
+    rows = await repo.list_for_owner("u_alice", "user_memory")
+    assert len(rows) == 1
+    got = rows[0]
+    assert got.count == 2
+    assert got.members == ["mc_one", "mc_two"]
+    assert got.preview == ["first sample", "second sample"]
+    np.testing.assert_allclose(
+        np.asarray(got.centroid),
+        np.array([0.5, 0.5], dtype=np.float32),
+    )
+
+
+async def test_upsert_is_idempotent_under_retry(repo: _ClusterRepo) -> None:
+    """OME at-least-once retry: same upsert twice → state unchanged, no duplicates."""
+    cluster = _make_cluster(
+        cluster_id="cl_idempot00001",
+        centroid_vals=[0.1, 0.9],
+        members=["mc_one", "mc_two"],
+        count=2,
+    )
+    await repo.upsert_with_members(
+        cluster,
+        owner_id="u_alice",
+        owner_type="user",
+        kind="user_memory",
+        member_type="memcell",
+    )
+    await repo.upsert_with_members(
+        cluster,
+        owner_id="u_alice",
+        owner_type="user",
+        kind="user_memory",
+        member_type="memcell",
+    )
+    rows = await repo.list_for_owner("u_alice", "user_memory")
+    assert len(rows) == 1
+    assert rows[0].members == ["mc_one", "mc_two"]
+
+
+async def test_upsert_rejects_unset_cluster_id(repo: _ClusterRepo) -> None:
+    """Algo's ``Cluster.id`` is caller-supplied — None is a programming error."""
+    cluster = AlgoCluster(
+        id=None,
+        centroid=np.array([1.0], dtype=np.float32),
+        count=1,
+        last_ts=1_700_000_000_000,
+        preview=[],
+        members=["mc_one"],
+    )
+    with pytest.raises(ValueError, match="cluster_id"):
+        await repo.upsert_with_members(
+            cluster,
+            owner_id="u_alice",
+            owner_type="user",
+            kind="user_memory",
+            member_type="memcell",
+        )
+
+
+# ── reverse lookup ──────────────────────────────────────────────────────
+
+
+async def test_find_cluster_id_for_member_reverse_lookup(
+    repo: _ClusterRepo,
+) -> None:
+    """``(member_type, member_id) → cluster_id`` index works both ways across kinds."""
+    user_cluster = _make_cluster(
+        cluster_id="cl_user0000001",
+        centroid_vals=[1.0, 0.0],
+        members=["mc_one"],
+    )
+    case_cluster = _make_cluster(
+        cluster_id="cl_case0000001",
+        centroid_vals=[0.0, 1.0],
+        members=["ac_20260517_0001"],
+    )
+    await repo.upsert_with_members(
+        user_cluster,
+        owner_id="u_alice",
+        owner_type="user",
+        kind="user_memory",
+        member_type="memcell",
+    )
+    await repo.upsert_with_members(
+        case_cluster,
+        owner_id="agent_42",
+        owner_type="agent",
+        kind="agent_case",
+        member_type="case",
+    )
+
+    assert (
+        await repo.find_cluster_id_for_member("memcell", "mc_one") == "cl_user0000001"
+    )
+    assert (
+        await repo.find_cluster_id_for_member("case", "ac_20260517_0001")
+        == "cl_case0000001"
+    )
+    # Type-discriminated: same id under wrong type misses.
+    assert await repo.find_cluster_id_for_member("case", "mc_one") is None
+    assert await repo.find_cluster_id_for_member("memcell", "ac_20260517_0001") is None
+    assert await repo.find_cluster_id_for_member("memcell", "mc_missing") is None
--- a/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py
+++ b/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py
@ -0,0 +1,508 @@
+"""Tests for :class:`_MdChangeStateRepo` — cascade work-queue persistence.
+
+Builds a fresh tmp-file SQLite engine per test (the in-memory ``sqlite``
+driver can't share schema across concurrent connections), wires a
+private repo instance to its session factory, then exercises every
+public method against the live database — no mocks, no in-memory
+shortcuts.
+
+Covers the unit-test matrix from
+``16_cascade_impl_design.md`` §14 for this commit:
+
+- ``upsert`` — LSN monotonic across the same path, retry_count resets.
+- ``claim_one`` — atomic; concurrent racers split 1 winner / N losers.
+- ``reset_retryable_to_pending`` — only ``retryable=TRUE`` rows move.
+
+Plus the rest of the repo surface (mark_done / mark_failed /
+queue_summary / list_failed / force_enqueue / claim_pending_batch).
+"""
+
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+
+import pytest
+from sqlmodel import SQLModel
+
+from everos.config import SqliteSettings
+from everos.core.persistence import (
+    MemoryRoot,
+    create_session_factory,
+    create_system_engine,
+)
+from everos.infra.persistence.sqlite.repos.md_change_state import (
+    _MdChangeStateRepo,
+)
+
+
+@pytest.fixture
+async def repo(tmp_path: Path) -> _MdChangeStateRepo:
+    """Per-test repo wired to a fresh tmp SQLite DB with schema applied."""
+    mr = MemoryRoot(tmp_path)
+    mr.ensure()
+    engine = create_system_engine(mr.system_db, SqliteSettings())
+    factory = create_session_factory(engine)
+    async with engine.begin() as conn:
+        await conn.run_sync(SQLModel.metadata.create_all)
+    return _MdChangeStateRepo(session_factory=factory)
+
+
+# ── upsert ──────────────────────────────────────────────────────────────
+
+
+async def test_upsert_assigns_monotonic_lsn(repo: _MdChangeStateRepo) -> None:
+    """Two distinct paths get strictly increasing LSNs."""
+    lsn_a = await repo.upsert(
+        "users/u/episodes/episode-2026-05-12.md",
+        kind="episode",
+        change_type="added",
+        mtime=1.0,
+    )
+    lsn_b = await repo.upsert(
+        "users/u/episodes/episode-2026-05-13.md",
+        kind="episode",
+        change_type="added",
+        mtime=2.0,
+    )
+    assert lsn_a == 1
+    assert lsn_b == 2
+
+
+async def test_upsert_same_path_bumps_lsn_and_resets_retry(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """Re-enqueueing the same path bumps LSN and clears prior failure state."""
+    path = "users/u/episodes/episode-2026-05-12.md"
+    await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    # Simulate a worker run that failed (retryable): claim then fail.
+    await repo.claim_one(path)
+    await repo.mark_failed(path, retryable=True, error="503", new_retry_count=3)
+
+    lsn_after = await repo.upsert(
+        path, kind="episode", change_type="modified", mtime=2.0
+    )
+    row = await repo.get_by_id(path)
+    assert row is not None
+    assert row.lsn == lsn_after
+    assert lsn_after > 1
+    # State reset back to pending; failure metadata cleared.
+    assert row.status == "pending"
+    assert row.retry_count == 0
+    assert row.error is None
+    assert row.retryable is None
+    # Re-enqueue refreshes change_type / mtime to the new event.
+    assert row.change_type == "modified"
+    assert row.mtime == 2.0
+
+
+# ── force_enqueue ───────────────────────────────────────────────────────
+
+
+async def test_force_enqueue_resurrects_done_row(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """`cascade sync --path` re-enqueues even a row that already landed."""
+    path = "users/u/episodes/episode-2026-05-12.md"
+    await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    await repo.claim_one(path)
+    await repo.mark_done(path)
+
+    lsn = await repo.force_enqueue(path, "episode")
+    row = await repo.get_by_id(path)
+    assert row is not None
+    assert row.lsn == lsn
+    assert row.status == "pending"
+    assert row.change_type == "modified"
+
+
+# ── claim_one ───────────────────────────────────────────────────────────
+
+
+async def test_claim_one_returns_row_when_pending(
+    repo: _MdChangeStateRepo,
+) -> None:
+    path = "users/u/episodes/episode-2026-05-12.md"
+    await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    row = await repo.claim_one(path)
+    assert row is not None
+    assert row.md_path == path
+    assert row.status == "processing"
+    assert row.last_attempt_at is not None
+
+
+async def test_claim_one_returns_none_when_already_processing(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """Second claim of the same row returns None — claim is one-shot."""
+    path = "users/u/episodes/episode-2026-05-12.md"
+    await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    first = await repo.claim_one(path)
+    assert first is not None
+    second = await repo.claim_one(path)
+    assert second is None
+
+
+async def test_claim_one_race_only_one_winner(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """Three concurrent claims on the same row: exactly one wins."""
+    path = "users/u/episodes/episode-2026-05-12.md"
+    await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    results = await asyncio.gather(
+        repo.claim_one(path),
+        repo.claim_one(path),
+        repo.claim_one(path),
+    )
+    winners = [r for r in results if r is not None]
+    assert len(winners) == 1
+
+
+# ── claim_pending_batch ─────────────────────────────────────────────────
+
+
+async def test_claim_pending_batch_returns_in_lsn_order(
+    repo: _MdChangeStateRepo,
+) -> None:
+    paths = [f"users/u/episodes/e-{i}.md" for i in range(3)]
+    for p in paths:
+        await repo.upsert(p, kind="episode", change_type="added", mtime=0.0)
+
+    batch = await repo.claim_pending_batch(limit=10)
+    assert [r.md_path for r in batch] == paths
+    assert all(r.status == "processing" for r in batch)
+
+
+async def test_claim_pending_batch_skips_already_claimed(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """Already-processing rows are not re-claimed."""
+    await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.upsert("b.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.claim_one("a.md")
+
+    batch = await repo.claim_pending_batch(limit=10)
+    assert [r.md_path for r in batch] == ["b.md"]
+
+
+async def test_claim_pending_batch_zero_limit_returns_empty(
+    repo: _MdChangeStateRepo,
+) -> None:
+    await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
+    assert await repo.claim_pending_batch(limit=0) == []
+
+
+# ── mark_done / mark_failed ─────────────────────────────────────────────
+
+
+async def test_mark_done_transitions_processing_to_done(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """``processing → done`` lands a clean terminal row (no error fields)."""
+    path = "users/u/episodes/episode-2026-05-12.md"
+    await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    await repo.claim_one(path)
+    await repo.mark_done(path)
+
+    row = await repo.get_by_id(path)
+    assert row is not None
+    assert row.status == "done"
+    assert row.error is None
+    assert row.retryable is None
+
+
+async def test_mark_failed_records_retryable_flag(
+    repo: _MdChangeStateRepo,
+) -> None:
+    path = "users/u/episodes/episode-2026-05-12.md"
+    await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    await repo.claim_one(path)
+    await repo.mark_failed(
+        path, retryable=False, error="YAML parse: line 5", new_retry_count=0
+    )
+
+    row = await repo.get_by_id(path)
+    assert row is not None
+    assert row.status == "failed"
+    assert row.retryable is False
+    assert row.error == "YAML parse: line 5"
+    assert row.retry_count == 0
+
+
+# ── Race: re-enqueue during processing must win over stale mark_xxx ─────
+#
+# Reproduces the Bug A scenario:
+#   T0  watcher upsert       → status=pending,    lsn=1
+#   T1  worker claim_one     → status=processing, lsn=1
+#   T2  watcher upsert again → status=pending,    lsn=2  (on_conflict_do_update)
+#   T3  worker (stale claim) finishes handler
+#   T4  worker mark_xxx      → must no-op because status != processing
+#
+# Without the guard, T4 overwrites T2's pending and the worker never
+# re-processes the latest md state.
+
+
+async def test_mark_done_noop_when_row_reverted_to_pending(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """Concurrent upsert during processing → mark_done must not overwrite it."""
+    path = "users/u/episodes/episode-2026-05-12.md"
+    # T0: watcher enqueues.
+    lsn_1 = await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    # T1: worker claims.
+    claimed = await repo.claim_one(path)
+    assert claimed is not None
+    # T2: watcher re-enqueues — row flipped back to pending with a fresh lsn.
+    lsn_2 = await repo.upsert(path, kind="episode", change_type="modified", mtime=2.0)
+    assert lsn_2 > lsn_1
+    # T4: stale mark_done — guard must make this a no-op.
+    await repo.mark_done(path)
+
+    row = await repo.get_by_id(path)
+    assert row is not None
+    assert row.status == "pending"  # not "done"
+    assert row.lsn == lsn_2  # upsert's lsn survives
+    assert row.change_type == "modified"  # upsert's payload survives
+    assert row.mtime == 2.0
+
+
+async def test_mark_failed_noop_when_row_reverted_to_pending(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """Concurrent upsert during processing → mark_failed must not overwrite it."""
+    path = "users/u/episodes/episode-2026-05-12.md"
+    lsn_1 = await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    claimed = await repo.claim_one(path)
+    assert claimed is not None
+    lsn_2 = await repo.upsert(path, kind="episode", change_type="modified", mtime=2.0)
+    assert lsn_2 > lsn_1
+    await repo.mark_failed(path, retryable=True, error="503", new_retry_count=2)
+
+    row = await repo.get_by_id(path)
+    assert row is not None
+    assert row.status == "pending"  # not "failed"
+    assert row.lsn == lsn_2
+    assert row.error is None  # upsert cleared the error fields
+    assert row.retryable is None
+    assert row.retry_count == 0
+
+
+async def test_mark_done_concurrent_with_upsert_preserves_reenqueue(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """asyncio.gather(upsert, mark_done): final state never loses the upsert.
+
+    Two valid commit orderings:
+      * upsert first → mark_done sees status != processing → no-op
+        → final = pending(lsn=2)
+      * mark_done first → row=done(lsn=1) → upsert flips back to pending(lsn=2)
+        → final = pending(lsn=2)
+
+    Both orderings converge on the same invariant: the re-enqueue wins.
+    """
+    path = "users/u/episodes/episode-2026-05-12.md"
+    lsn_1 = await repo.upsert(path, kind="episode", change_type="added", mtime=1.0)
+    await repo.claim_one(path)
+
+    # Race the two writes. SQLite WAL serialises commits, so one is
+    # ordered before the other — but the test does not pin which.
+    await asyncio.gather(
+        repo.upsert(path, kind="episode", change_type="modified", mtime=2.0),
+        repo.mark_done(path),
+    )
+
+    row = await repo.get_by_id(path)
+    assert row is not None
+    assert row.status == "pending"
+    assert row.lsn > lsn_1
+    assert row.change_type == "modified"
+    assert row.mtime == 2.0
+
+
+# ── reset_retryable_to_pending ──────────────────────────────────────────
+
+
+async def test_reset_retryable_to_pending_moves_only_retryable(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """`cascade fix --apply` semantics: only retryable=TRUE rows move."""
+    await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.upsert("b.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.upsert("c.md", kind="episode", change_type="added", mtime=0.0)
+
+    await repo.claim_one("a.md")
+    await repo.mark_failed("a.md", retryable=True, error="503", new_retry_count=3)
+    await repo.claim_one("b.md")
+    await repo.mark_failed("b.md", retryable=False, error="YAML", new_retry_count=0)
+    # c.md remains pending.
+
+    moved = await repo.reset_retryable_to_pending()
+    assert moved == 1
+
+    a = await repo.get_by_id("a.md")
+    b = await repo.get_by_id("b.md")
+    assert a is not None and a.status == "pending"
+    assert a.retry_count == 0
+    assert a.retryable is None
+    assert a.error is None
+    assert b is not None and b.status == "failed"
+    assert b.retryable is False
+
+
+async def test_reset_retryable_to_pending_zero_when_none_eligible(
+    repo: _MdChangeStateRepo,
+) -> None:
+    await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.claim_one("a.md")
+    await repo.mark_failed("a.md", retryable=False, error="YAML", new_retry_count=0)
+    assert await repo.reset_retryable_to_pending() == 0
+
+
+# ── list_failed ─────────────────────────────────────────────────────────
+
+
+async def test_list_failed_orders_by_lsn(repo: _MdChangeStateRepo) -> None:
+    for path in ("a.md", "b.md", "c.md"):
+        await repo.upsert(path, kind="episode", change_type="added", mtime=0.0)
+    await repo.claim_one("a.md")
+    await repo.mark_failed("a.md", retryable=True, error="x", new_retry_count=3)
+    await repo.claim_one("c.md")
+    await repo.mark_failed("c.md", retryable=False, error="y", new_retry_count=0)
+
+    rows = await repo.list_failed()
+    assert [r.md_path for r in rows] == ["a.md", "c.md"]
+
+
+# ── queue_summary ───────────────────────────────────────────────────────
+
+
+async def test_queue_summary_aggregates_all_states(
+    repo: _MdChangeStateRepo,
+) -> None:
+    # Pending: 2 (one rolled through processing)
+    await repo.upsert("p1.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.upsert("p2.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.claim_one("p2.md")  # → processing, still counts as pending.
+    # Done: 1 (full claim → mark_done path matches production flow).
+    await repo.upsert("d.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.claim_one("d.md")
+    await repo.mark_done("d.md")
+    # Failed retryable: 1
+    await repo.upsert("fr.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.claim_one("fr.md")
+    await repo.mark_failed("fr.md", retryable=True, error="503", new_retry_count=3)
+    # Failed permanent: 1
+    await repo.upsert("fp.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.claim_one("fp.md")
+    await repo.mark_failed("fp.md", retryable=False, error="YAML", new_retry_count=0)
+
+    summary = await repo.queue_summary()
+    assert summary.pending == 2
+    assert summary.done == 1
+    assert summary.failed_retryable == 1
+    assert summary.failed_permanent == 1
+    # 5 upserts → max LSN 5; last_processed = max among done/failed.
+    assert summary.max_lsn == 5
+    assert summary.last_processed_lsn == 5
+
+
+async def test_queue_summary_empty_table(repo: _MdChangeStateRepo) -> None:
+    summary = await repo.queue_summary()
+    assert summary == _empty_summary()
+
+
+def _empty_summary() -> object:
+    from everos.infra.persistence.sqlite import QueueSummary
+
+    return QueueSummary(
+        pending=0,
+        done=0,
+        failed_retryable=0,
+        failed_permanent=0,
+        max_lsn=0,
+        last_processed_lsn=0,
+    )
+
+
+# ── recover_orphan_processing ───────────────────────────────────────────
+
+
+async def test_recover_orphan_processing_resets_stale_rows(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """Crash recovery: every ``processing`` row goes back to ``pending``."""
+    await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.upsert("b.md", kind="episode", change_type="added", mtime=0.0)
+    # Simulate a worker that claimed both but died before mark_done/failed.
+    await repo.claim_one("a.md")
+    await repo.claim_one("b.md")
+
+    moved = await repo.recover_orphan_processing()
+    assert moved == 2
+    a = await repo.get_by_id("a.md")
+    b = await repo.get_by_id("b.md")
+    assert a is not None and a.status == "pending"
+    assert b is not None and b.status == "pending"
+    # last_attempt_at cleared so the next claim records the new attempt.
+    assert a.last_attempt_at is None
+    assert b.last_attempt_at is None
+
+
+async def test_recover_orphan_processing_zero_when_clean(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """No rows in ``processing`` → returns 0, leaves the rest alone."""
+    await repo.upsert("a.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.claim_one("a.md")
+    await repo.mark_done("a.md")
+    assert await repo.recover_orphan_processing() == 0
+    row = await repo.get_by_id("a.md")
+    assert row is not None
+    assert row.status == "done"
+
+
+async def test_recover_orphan_processing_only_touches_processing_rows(
+    repo: _MdChangeStateRepo,
+) -> None:
+    """Pending / done / failed rows are untouched."""
+    await repo.upsert("p.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.upsert("d.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.upsert("f.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.upsert("proc.md", kind="episode", change_type="added", mtime=0.0)
+    await repo.claim_one("d.md")
+    await repo.mark_done("d.md")
+    await repo.claim_one("f.md")
+    await repo.mark_failed("f.md", retryable=True, error="x", new_retry_count=1)
+    await repo.claim_one("proc.md")
+
+    moved = await repo.recover_orphan_processing()
+    assert moved == 1
+    p = await repo.get_by_id("p.md")
+    d = await repo.get_by_id("d.md")
+    f = await repo.get_by_id("f.md")
+    proc = await repo.get_by_id("proc.md")
+    assert p is not None and p.status == "pending"
+    assert d is not None and d.status == "done"
+    assert f is not None and f.status == "failed"
+    assert proc is not None and proc.status == "pending"
+
+
+# ── Partial indexes (smoke) ─────────────────────────────────────────────
+
+
+async def test_partial_indexes_are_created(repo: _MdChangeStateRepo) -> None:
+    """The three partial / mtime indexes from the schema land in sqlite_master."""
+    async with repo.session_factory() as s:
+        from sqlalchemy import text
+
+        result = await s.execute(
+            text("SELECT name FROM sqlite_master WHERE type='index'")
+        )
+        names = {row[0] for row in result.all()}
+    for expected in (
+        "idx_md_change_pending",
+        "idx_md_change_retryable",
+        "idx_md_change_mtime",
+        "idx_md_change_kind",
+    ):
+        assert expected in names, f"missing index {expected!r}; got {names!r}"
--- a/tests/unit/test_infra/test_sqlite/test_sqlite_manager.py
+++ b/tests/unit/test_infra/test_sqlite/test_sqlite_manager.py
@ -0,0 +1,53 @@
+"""SQLite manager singletons.
+
+Verifies ``get_engine`` / ``get_session_factory`` / ``dispose_engine``
+are idempotent and rebuild after dispose.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from everos.infra.persistence.sqlite import sqlite_manager
+
+
+@pytest.fixture(autouse=True)
+async def _reset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
+    """Point the singleton at an isolated memory-root and reset module state."""
+    monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
+    sqlite_manager._engine = None
+    sqlite_manager._session_factory = None
+    yield
+    await sqlite_manager.dispose_engine()
+
+
+async def test_get_engine_is_singleton(tmp_path: Path) -> None:
+    e1 = sqlite_manager.get_engine()
+    e2 = sqlite_manager.get_engine()
+    assert e1 is e2
+    # Engine points at the redirected memory root.
+    assert str(tmp_path) in str(e1.url)
+
+
+async def test_get_session_factory_is_singleton() -> None:
+    f1 = sqlite_manager.get_session_factory()
+    f2 = sqlite_manager.get_session_factory()
+    assert f1 is f2
+
+
+async def test_dispose_resets_state() -> None:
+    e1 = sqlite_manager.get_engine()
+    await sqlite_manager.dispose_engine()
+    assert sqlite_manager._engine is None
+    assert sqlite_manager._session_factory is None
+    e2 = sqlite_manager.get_engine()
+    assert e2 is not e1
+
+
+async def test_dispose_is_idempotent() -> None:
+    await sqlite_manager.dispose_engine()  # nothing built yet
+    sqlite_manager.get_engine()
+    await sqlite_manager.dispose_engine()
+    await sqlite_manager.dispose_engine()  # second call must not raise