chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
0
tests/unit/test_core/__init__.py
Normal file
0
tests/unit/test_core/__init__.py
Normal file
0
tests/unit/test_core/test_lifespan/__init__.py
Normal file
0
tests/unit/test_core/test_lifespan/__init__.py
Normal file
88
tests/unit/test_core/test_lifespan/test_factory.py
Normal file
88
tests/unit/test_core/test_lifespan/test_factory.py
Normal file
@ -0,0 +1,88 @@
|
||||
"""``build_lifespan`` — provider ordering, state storage, shutdown errors."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from everos.core.lifespan import LifespanProvider
|
||||
from everos.core.lifespan.factory import build_lifespan
|
||||
|
||||
|
||||
class _RecordingProvider(LifespanProvider):
|
||||
"""Provider that records the order in which startup/shutdown ran."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
order: int,
|
||||
log: list[str],
|
||||
*,
|
||||
returns: object | None = None,
|
||||
shutdown_raises: bool = False,
|
||||
) -> None:
|
||||
super().__init__(name=name, order=order)
|
||||
self._log = log
|
||||
self._returns = returns
|
||||
self._shutdown_raises = shutdown_raises
|
||||
|
||||
async def startup(self, app: FastAPI) -> object | None:
|
||||
self._log.append(f"start:{self.name}")
|
||||
return self._returns
|
||||
|
||||
async def shutdown(self, app: FastAPI) -> None:
|
||||
self._log.append(f"stop:{self.name}")
|
||||
if self._shutdown_raises:
|
||||
raise RuntimeError(f"{self.name} shutdown boom")
|
||||
|
||||
|
||||
async def test_startup_runs_in_order_ascending() -> None:
|
||||
log: list[str] = []
|
||||
p1 = _RecordingProvider("a", order=2, log=log)
|
||||
p2 = _RecordingProvider("b", order=1, log=log)
|
||||
p3 = _RecordingProvider("c", order=3, log=log)
|
||||
|
||||
app = FastAPI()
|
||||
async with build_lifespan([p1, p2, p3])(app):
|
||||
pass
|
||||
assert log[:3] == ["start:b", "start:a", "start:c"]
|
||||
|
||||
|
||||
async def test_shutdown_runs_in_reverse_order() -> None:
|
||||
log: list[str] = []
|
||||
p1 = _RecordingProvider("a", order=1, log=log)
|
||||
p2 = _RecordingProvider("b", order=2, log=log)
|
||||
|
||||
app = FastAPI()
|
||||
async with build_lifespan([p1, p2])(app):
|
||||
pass
|
||||
# shutdown phase: reverse of startup
|
||||
assert log[2:] == ["stop:b", "stop:a"]
|
||||
|
||||
|
||||
async def test_non_none_startup_result_stored_in_state() -> None:
|
||||
sentinel = object()
|
||||
p = _RecordingProvider("x", order=1, log=[], returns=sentinel)
|
||||
app = FastAPI()
|
||||
async with build_lifespan([p])(app):
|
||||
assert app.state.lifespan_data["x"] is sentinel
|
||||
|
||||
|
||||
async def test_none_startup_result_not_stored() -> None:
|
||||
p = _RecordingProvider("nullone", order=1, log=[], returns=None)
|
||||
app = FastAPI()
|
||||
async with build_lifespan([p])(app):
|
||||
assert "nullone" not in app.state.lifespan_data
|
||||
|
||||
|
||||
async def test_shutdown_exception_swallowed_and_logged() -> None:
|
||||
"""Failed shutdown logs but doesn't break sibling shutdown."""
|
||||
log: list[str] = []
|
||||
p1 = _RecordingProvider("a", order=1, log=log)
|
||||
p2 = _RecordingProvider("b", order=2, log=log, shutdown_raises=True)
|
||||
|
||||
app = FastAPI()
|
||||
async with build_lifespan([p1, p2])(app):
|
||||
pass
|
||||
# Even though "b" raised, "a" still shut down.
|
||||
assert log[-1] == "stop:a"
|
||||
assert "stop:b" in log # b's shutdown ran (and raised, but swallowed)
|
||||
35
tests/unit/test_core/test_lifespan/test_metrics_lifespan.py
Normal file
35
tests/unit/test_core/test_lifespan/test_metrics_lifespan.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""``MetricsLifespanProvider`` — startup returns registry, shutdown logs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import FastAPI
|
||||
from prometheus_client import CollectorRegistry
|
||||
|
||||
from everos.core.lifespan.metrics_lifespan import MetricsLifespanProvider
|
||||
from everos.core.observability.metrics import (
|
||||
reset_metrics_registry,
|
||||
set_metrics_registry,
|
||||
)
|
||||
|
||||
|
||||
async def test_startup_returns_registry() -> None:
|
||||
fresh = CollectorRegistry()
|
||||
set_metrics_registry(fresh)
|
||||
try:
|
||||
p = MetricsLifespanProvider()
|
||||
result = await p.startup(FastAPI())
|
||||
assert result is fresh
|
||||
finally:
|
||||
reset_metrics_registry()
|
||||
|
||||
|
||||
async def test_shutdown_is_noop() -> None:
|
||||
# Smoke test — must not raise.
|
||||
p = MetricsLifespanProvider()
|
||||
await p.shutdown(FastAPI())
|
||||
|
||||
|
||||
def test_provider_metadata() -> None:
|
||||
p = MetricsLifespanProvider(order=42)
|
||||
assert p.name == "metrics"
|
||||
assert p.order == 42
|
||||
0
tests/unit/test_core/test_middleware/__init__.py
Normal file
0
tests/unit/test_core/test_middleware/__init__.py
Normal file
106
tests/unit/test_core/test_middleware/test_global_exception.py
Normal file
106
tests/unit/test_core/test_middleware/test_global_exception.py
Normal file
@ -0,0 +1,106 @@
|
||||
"""``global_exception_handler`` — uniform error envelope per v1 API §1.
|
||||
|
||||
We mount the handler on a minimal FastAPI app with three error-emitting
|
||||
routes (HTTPException 4xx / 5xx, RequestValidationError, raw exception)
|
||||
and assert the envelope shape + status code each route produces.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
from pydantic import BaseModel
|
||||
|
||||
from everos.core.middleware.global_exception import global_exception_handler
|
||||
|
||||
|
||||
class _Body(BaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
def _build_app() -> FastAPI:
|
||||
app = FastAPI()
|
||||
app.add_exception_handler(HTTPException, global_exception_handler)
|
||||
app.add_exception_handler(RequestValidationError, global_exception_handler)
|
||||
app.add_exception_handler(Exception, global_exception_handler)
|
||||
|
||||
@app.get("/raise-400")
|
||||
async def raise_400() -> None:
|
||||
raise HTTPException(status_code=400, detail="bad input")
|
||||
|
||||
@app.get("/raise-500-http")
|
||||
async def raise_500_http() -> None:
|
||||
raise HTTPException(status_code=503, detail="upstream dead")
|
||||
|
||||
@app.get("/boom")
|
||||
async def boom() -> None:
|
||||
raise RuntimeError("hidden internals")
|
||||
|
||||
@app.post("/validate")
|
||||
async def validate(_body: _Body) -> dict[str, str]:
|
||||
return {"ok": "yes"}
|
||||
|
||||
return app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def client() -> AsyncIterator[AsyncClient]:
|
||||
app = _build_app()
|
||||
# raise_app_exceptions=False — let the registered handler convert the
|
||||
# RuntimeError into a 500 response instead of re-raising into the test.
|
||||
transport = ASGITransport(app=app, raise_app_exceptions=False)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as c:
|
||||
yield c
|
||||
|
||||
|
||||
def _assert_envelope(body: dict[str, object], *, code: str, path: str) -> None:
|
||||
"""Wiki §1 envelope: ``{request_id, error: {code, message, timestamp, path}}``."""
|
||||
assert isinstance(body["request_id"], str) and body["request_id"]
|
||||
error = body["error"]
|
||||
assert isinstance(error, dict)
|
||||
assert error["code"] == code
|
||||
assert isinstance(error["message"], str) and error["message"]
|
||||
assert isinstance(error["timestamp"], str) and "T" in error["timestamp"]
|
||||
assert error["path"] == path
|
||||
|
||||
|
||||
async def test_http_exception_4xx(client: AsyncClient) -> None:
|
||||
resp = await client.get("/raise-400")
|
||||
assert resp.status_code == 400
|
||||
body = resp.json()
|
||||
_assert_envelope(body, code="HTTP_ERROR", path="/raise-400")
|
||||
assert body["error"]["message"] == "bad input"
|
||||
|
||||
|
||||
async def test_http_exception_5xx_uses_system_error(client: AsyncClient) -> None:
|
||||
"""5xx routed through HTTPException still produces SYSTEM_ERROR + generic msg."""
|
||||
resp = await client.get("/raise-500-http")
|
||||
assert resp.status_code == 503
|
||||
body = resp.json()
|
||||
_assert_envelope(body, code="SYSTEM_ERROR", path="/raise-500-http")
|
||||
# Internal detail "upstream dead" is suppressed in 5xx envelopes.
|
||||
assert body["error"]["message"] == "Internal server error"
|
||||
|
||||
|
||||
async def test_unhandled_exception_5xx(client: AsyncClient) -> None:
|
||||
"""RuntimeError → 500 with generic ``SYSTEM_ERROR`` envelope; details hidden."""
|
||||
resp = await client.get("/boom")
|
||||
assert resp.status_code == 500
|
||||
body = resp.json()
|
||||
_assert_envelope(body, code="SYSTEM_ERROR", path="/boom")
|
||||
assert body["error"]["message"] == "Internal server error"
|
||||
# Must not leak the internal exception message.
|
||||
assert "hidden internals" not in resp.text
|
||||
|
||||
|
||||
async def test_validation_error_returns_422(client: AsyncClient) -> None:
|
||||
resp = await client.post("/validate", json={}) # missing ``name``
|
||||
assert resp.status_code == 422
|
||||
body = resp.json()
|
||||
_assert_envelope(body, code="HTTP_ERROR", path="/validate")
|
||||
# First-error message includes the offending field somewhere.
|
||||
assert "name" in body["error"]["message"].lower()
|
||||
148
tests/unit/test_core/test_middleware/test_profile.py
Normal file
148
tests/unit/test_core/test_middleware/test_profile.py
Normal file
@ -0,0 +1,148 @@
|
||||
"""``ProfileMiddleware`` — env gating, query-param gating, pyinstrument output."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from everos.core.middleware.profile import ProfileMiddleware, _profiling_enabled
|
||||
|
||||
|
||||
def _build_app() -> FastAPI:
|
||||
app = FastAPI()
|
||||
app.add_middleware(ProfileMiddleware)
|
||||
|
||||
@app.get("/hello")
|
||||
async def hello() -> dict[str, str]:
|
||||
return {"ok": "yes"}
|
||||
|
||||
return app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def disable_env(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.delenv("PROFILING_ENABLED", raising=False)
|
||||
monkeypatch.delenv("PROFILING", raising=False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def enable_env(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setenv("PROFILING_ENABLED", "true")
|
||||
|
||||
|
||||
def test_profiling_enabled_truthy_variants(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
for v in ("1", "true", "TRUE", "yes"):
|
||||
monkeypatch.setenv("PROFILING_ENABLED", v)
|
||||
assert _profiling_enabled() is True
|
||||
|
||||
|
||||
def test_profiling_enabled_falsy_variants(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
for v in ("0", "false", "no", "", "anything-else"):
|
||||
monkeypatch.setenv("PROFILING_ENABLED", v)
|
||||
assert _profiling_enabled() is False
|
||||
|
||||
|
||||
def test_profiling_falls_back_to_legacy_env(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.delenv("PROFILING_ENABLED", raising=False)
|
||||
monkeypatch.setenv("PROFILING", "yes")
|
||||
assert _profiling_enabled() is True
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def disabled_client(disable_env: None) -> AsyncIterator[AsyncClient]:
|
||||
app = _build_app()
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=app), base_url="http://test"
|
||||
) as c:
|
||||
yield c
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def enabled_client(enable_env: None) -> AsyncIterator[AsyncClient]:
|
||||
app = _build_app()
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=app), base_url="http://test"
|
||||
) as c:
|
||||
yield c
|
||||
|
||||
|
||||
async def test_disabled_passthrough(disabled_client: AsyncClient) -> None:
|
||||
"""When profiling is disabled, ``?profile=true`` is ignored — JSON returned."""
|
||||
resp = await disabled_client.get("/hello?profile=true")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json() == {"ok": "yes"}
|
||||
|
||||
|
||||
async def test_enabled_without_query_passthrough(enabled_client: AsyncClient) -> None:
|
||||
"""Enabled middleware but request without ``?profile=true`` → normal response."""
|
||||
resp = await enabled_client.get("/hello")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json() == {"ok": "yes"}
|
||||
|
||||
|
||||
async def test_enabled_with_query_returns_html(enabled_client: AsyncClient) -> None:
|
||||
"""With ``?profile=true`` and pyinstrument available, response is HTML."""
|
||||
try:
|
||||
import pyinstrument # noqa: F401
|
||||
except ImportError:
|
||||
pytest.skip("pyinstrument not installed in this env")
|
||||
|
||||
resp = await enabled_client.get("/hello?profile=true")
|
||||
assert resp.status_code == 200
|
||||
assert "text/html" in resp.headers.get("content-type", "")
|
||||
# Pyinstrument output contains the word "pyinstrument" in its template.
|
||||
assert "pyinstrument" in resp.text.lower() or "<html" in resp.text.lower()
|
||||
|
||||
|
||||
async def test_enabled_with_query_returns_html_when_inner_raises(
|
||||
enabled_client: AsyncClient,
|
||||
) -> None:
|
||||
"""An exception inside the wrapped handler is logged but still produces HTML."""
|
||||
try:
|
||||
import pyinstrument # noqa: F401
|
||||
except ImportError:
|
||||
pytest.skip("pyinstrument not installed in this env")
|
||||
|
||||
# Rebuild a tiny app whose route raises so the middleware's except branch
|
||||
# fires; the profile HTML is still emitted regardless.
|
||||
app = FastAPI()
|
||||
app.add_middleware(ProfileMiddleware)
|
||||
|
||||
@app.get("/bang")
|
||||
async def bang() -> None:
|
||||
raise RuntimeError("inner exception")
|
||||
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=app, raise_app_exceptions=False),
|
||||
base_url="http://test",
|
||||
) as c:
|
||||
resp = await c.get("/bang?profile=true")
|
||||
assert resp.status_code == 200
|
||||
assert "text/html" in resp.headers.get("content-type", "")
|
||||
|
||||
|
||||
async def test_enabled_without_pyinstrument(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""If pyinstrument import fails, middleware degrades to passthrough."""
|
||||
monkeypatch.setenv("PROFILING_ENABLED", "true")
|
||||
# Force the import inside ProfileMiddleware.__init__ to fail.
|
||||
import builtins
|
||||
|
||||
real_import = builtins.__import__
|
||||
|
||||
def fail_pyinstrument(name: str, *args: object, **kwargs: object) -> object:
|
||||
if name == "pyinstrument":
|
||||
raise ImportError("simulated")
|
||||
return real_import(name, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(builtins, "__import__", fail_pyinstrument)
|
||||
app = _build_app() # ProfileMiddleware ctor runs here
|
||||
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=app), base_url="http://test"
|
||||
) as c:
|
||||
resp = await c.get("/hello?profile=true")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json() == {"ok": "yes"}
|
||||
162
tests/unit/test_core/test_middleware/test_prometheus.py
Normal file
162
tests/unit/test_core/test_middleware/test_prometheus.py
Normal file
@ -0,0 +1,162 @@
|
||||
"""``PrometheusMiddleware`` — increments counters / histograms, skips /metrics.
|
||||
|
||||
We isolate the test from the production registry by overriding it with a
|
||||
fresh :class:`prometheus_client.CollectorRegistry` for the duration of
|
||||
the test. The middleware was already imported with module-level Counter /
|
||||
Histogram bound to whatever the registry was at import time — those
|
||||
metric objects continue to record to the real registry. The test
|
||||
therefore reads via ``_http_requests_total`` directly rather than via
|
||||
``generate_metrics_response()``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from everos.core.middleware import prometheus as prom_mod
|
||||
|
||||
|
||||
def _sample_value(metric: object, **labels: str) -> float:
|
||||
"""Read the current value of a labeled prometheus metric (test helper)."""
|
||||
labeled = metric.labels(**labels)._labeled # type: ignore[attr-defined]
|
||||
for sample in labeled.collect()[0].samples:
|
||||
if sample.name.endswith("_total"):
|
||||
return float(sample.value)
|
||||
return float("nan")
|
||||
|
||||
|
||||
def _histogram_count(metric: object, **labels: str) -> float:
|
||||
labeled = metric.labels(**labels)._labeled # type: ignore[attr-defined]
|
||||
for sample in labeled.collect()[0].samples:
|
||||
if sample.name.endswith("_count"):
|
||||
return float(sample.value)
|
||||
return float("nan")
|
||||
|
||||
|
||||
def _build_app() -> FastAPI:
|
||||
app = FastAPI()
|
||||
app.add_middleware(prom_mod.PrometheusMiddleware)
|
||||
|
||||
@app.get("/hello")
|
||||
async def hello() -> dict[str, str]:
|
||||
return {"ok": "yes"}
|
||||
|
||||
@app.get("/users/{user_id}")
|
||||
async def get_user(user_id: str) -> dict[str, str]:
|
||||
return {"user": user_id}
|
||||
|
||||
return app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def client() -> AsyncIterator[AsyncClient]:
|
||||
app = _build_app()
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=app), base_url="http://test"
|
||||
) as c:
|
||||
yield c
|
||||
|
||||
|
||||
async def test_increments_counter_on_200(client: AsyncClient) -> None:
|
||||
before = _sample_value(
|
||||
prom_mod._http_requests_total, method="GET", path="/hello", status="200"
|
||||
)
|
||||
resp = await client.get("/hello")
|
||||
assert resp.status_code == 200
|
||||
after = _sample_value(
|
||||
prom_mod._http_requests_total, method="GET", path="/hello", status="200"
|
||||
)
|
||||
assert after == before + 1
|
||||
|
||||
|
||||
async def test_observes_duration_histogram(client: AsyncClient) -> None:
|
||||
before = _histogram_count(
|
||||
prom_mod._http_request_duration_seconds, method="GET", path="/hello"
|
||||
)
|
||||
await client.get("/hello")
|
||||
after = _histogram_count(
|
||||
prom_mod._http_request_duration_seconds, method="GET", path="/hello"
|
||||
)
|
||||
assert after == before + 1
|
||||
|
||||
|
||||
def test_skip_paths_constant_contains_known_endpoints() -> None:
|
||||
"""Skip set is the contract — assert membership directly to avoid
|
||||
|
||||
polluting the global registry by ``.labels(path='/metrics')``-ing it
|
||||
(that creates a zero-valued sample which then leaks into the
|
||||
exposition format that test_metrics_route inspects).
|
||||
"""
|
||||
assert "/metrics" in prom_mod._SKIP_PATHS
|
||||
assert "/health" in prom_mod._SKIP_PATHS
|
||||
assert "/healthz" in prom_mod._SKIP_PATHS
|
||||
assert "/favicon.ico" in prom_mod._SKIP_PATHS
|
||||
|
||||
|
||||
async def test_path_params_normalized(client: AsyncClient) -> None:
|
||||
"""``/users/abc`` should record against the route template ``/users/{user_id}``."""
|
||||
before = _sample_value(
|
||||
prom_mod._http_requests_total,
|
||||
method="GET",
|
||||
path="/users/{user_id}",
|
||||
status="200",
|
||||
)
|
||||
resp = await client.get("/users/abc")
|
||||
assert resp.status_code == 200
|
||||
after = _sample_value(
|
||||
prom_mod._http_requests_total,
|
||||
method="GET",
|
||||
path="/users/{user_id}",
|
||||
status="200",
|
||||
)
|
||||
assert after == before + 1
|
||||
|
||||
|
||||
# ── _normalize_path direct tests (defensive fallback branches) ─────────
|
||||
|
||||
|
||||
def test_normalize_path_uses_path_params_fallback() -> None:
|
||||
"""When scope has no ``route`` but ``path_params`` is set, substitute names."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from everos.core.middleware.prometheus import _normalize_path
|
||||
|
||||
fake_req = SimpleNamespace(
|
||||
scope={},
|
||||
url=SimpleNamespace(path="/x/abc/y"),
|
||||
path_params={"id": "abc"},
|
||||
)
|
||||
# type: ignore[arg-type] — helper accepts anything duck-typed.
|
||||
assert _normalize_path(fake_req) == "/x/{id}/y" # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_normalize_path_unmatched_fallback() -> None:
|
||||
"""No route, no path_params → ``{unmatched}`` sentinel."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from everos.core.middleware.prometheus import _normalize_path
|
||||
|
||||
fake_req = SimpleNamespace(
|
||||
scope={},
|
||||
url=SimpleNamespace(path="/x"),
|
||||
path_params={},
|
||||
)
|
||||
assert _normalize_path(fake_req) == "{unmatched}" # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_normalize_path_non_dict_scope_falls_through() -> None:
|
||||
"""Defensive: a non-dict ``scope`` skips the route lookup entirely."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from everos.core.middleware.prometheus import _normalize_path
|
||||
|
||||
fake_req = SimpleNamespace(
|
||||
scope="not-a-dict",
|
||||
url=SimpleNamespace(path="/x"),
|
||||
path_params={},
|
||||
)
|
||||
assert _normalize_path(fake_req) == "{unmatched}" # type: ignore[arg-type]
|
||||
0
tests/unit/test_core/test_observability/__init__.py
Normal file
0
tests/unit/test_core/test_observability/__init__.py
Normal file
74
tests/unit/test_core/test_observability/test_gauge.py
Normal file
74
tests/unit/test_core/test_observability/test_gauge.py
Normal file
@ -0,0 +1,74 @@
|
||||
"""``Gauge`` / ``LabeledGauge`` — set / inc / dec; with & without labels."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterator
|
||||
|
||||
import pytest
|
||||
from prometheus_client import CollectorRegistry
|
||||
|
||||
from everos.core.observability.metrics import (
|
||||
Gauge,
|
||||
reset_metrics_registry,
|
||||
set_metrics_registry,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def isolated_registry() -> Iterator[None]:
|
||||
"""Swap in a fresh registry so test names don't clash with prod metrics."""
|
||||
set_metrics_registry(CollectorRegistry())
|
||||
yield
|
||||
reset_metrics_registry()
|
||||
|
||||
|
||||
def _value(gauge: Gauge, **labels: str) -> float:
|
||||
"""Read the gauge's current scalar value (helper for assertions)."""
|
||||
labeled = (
|
||||
gauge.labels(**labels)._labeled # type: ignore[attr-defined]
|
||||
if labels
|
||||
else gauge._gauge # type: ignore[attr-defined]
|
||||
)
|
||||
for sample in labeled.collect()[0].samples:
|
||||
if sample.name.endswith("_gauge") or "_" in sample.name:
|
||||
return float(sample.value)
|
||||
return float("nan")
|
||||
|
||||
|
||||
def test_unlabeled_set_inc_dec() -> None:
|
||||
g = Gauge(name="queue_depth", description="rows pending")
|
||||
g.set(10)
|
||||
assert _value(g) == 10
|
||||
g.inc(2)
|
||||
assert _value(g) == 12
|
||||
g.dec()
|
||||
assert _value(g) == 11
|
||||
g.dec(5)
|
||||
assert _value(g) == 6
|
||||
|
||||
|
||||
def test_labeled_isolates_streams() -> None:
|
||||
g = Gauge(name="cache_size", description="entries", labelnames=("region",))
|
||||
g.labels(region="us").set(100)
|
||||
g.labels(region="eu").set(50)
|
||||
g.labels(region="us").inc(5)
|
||||
g.labels(region="eu").dec(10)
|
||||
assert _value(g, region="us") == 105
|
||||
assert _value(g, region="eu") == 40
|
||||
|
||||
|
||||
def test_namespace_subsystem_unit_render_in_metric_name() -> None:
|
||||
g = Gauge(
|
||||
name="depth",
|
||||
description="d",
|
||||
namespace="everos",
|
||||
subsystem="cascade",
|
||||
unit="rows",
|
||||
)
|
||||
g.set(7)
|
||||
# Underlying name should include all parts.
|
||||
full_name = g._gauge._name # type: ignore[attr-defined]
|
||||
assert "everos" in full_name
|
||||
assert "cascade" in full_name
|
||||
assert "depth" in full_name
|
||||
assert "rows" in full_name
|
||||
111
tests/unit/test_core/test_observability/test_logging_factory.py
Normal file
111
tests/unit/test_core/test_observability/test_logging_factory.py
Normal file
@ -0,0 +1,111 @@
|
||||
"""``configure_logging`` + ``get_logger`` smoke tests."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
import structlog
|
||||
|
||||
from everos.core.observability.logging.factory import configure_logging, get_logger
|
||||
|
||||
|
||||
def test_get_logger_returns_structlog_instance() -> None:
|
||||
logger = get_logger("test.module")
|
||||
# structlog's BoundLogger interface — must expose .info / .warning / .error.
|
||||
assert hasattr(logger, "info")
|
||||
assert hasattr(logger, "warning")
|
||||
assert hasattr(logger, "error")
|
||||
|
||||
|
||||
def _strip_ansi(text: str) -> str:
|
||||
"""Remove ANSI escape sequences so assertions are stable."""
|
||||
import re
|
||||
|
||||
return re.sub(r"\x1b\[[0-9;]*m", "", text)
|
||||
|
||||
|
||||
def test_configure_logging_accepts_known_levels() -> None:
|
||||
"""Smoke-test the level-name → log-level mapping path; no raise."""
|
||||
for level in ("DEBUG", "INFO", "WARNING", "ERROR", "info", "warn"):
|
||||
configure_logging(level=level)
|
||||
|
||||
|
||||
def test_configure_logging_handles_unknown_level_silently() -> None:
|
||||
"""Unknown level name silently falls back via ``getattr(logging, ..., INFO)``."""
|
||||
# Just must not raise; behavior verified by absence of exception.
|
||||
configure_logging(level="NOPE")
|
||||
|
||||
|
||||
def test_configure_logging_emits_through_structlog(
|
||||
capsys: pytest.CaptureFixture[str],
|
||||
) -> None:
|
||||
configure_logging(level="INFO")
|
||||
logger = get_logger("everos.test")
|
||||
logger.info("hello", k="v")
|
||||
plain = _strip_ansi(capsys.readouterr().out)
|
||||
assert "hello" in plain
|
||||
# ConsoleRenderer renders key=value pairs (sans color codes).
|
||||
assert "k=v" in plain
|
||||
|
||||
|
||||
def test_configure_logging_demotes_noisy_http_loggers_to_warning(
|
||||
capsys: pytest.CaptureFixture[str],
|
||||
) -> None:
|
||||
"""Third-party HTTP client loggers (httpx / httpcore / urllib3) must be
|
||||
pinned at WARNING so each successful HTTP request doesn't produce an
|
||||
INFO line. everos's own ``get_logger(...)`` calls remain unaffected.
|
||||
"""
|
||||
import logging
|
||||
|
||||
configure_logging(level="INFO")
|
||||
|
||||
for name in ("httpx", "httpcore", "urllib3"):
|
||||
assert logging.getLogger(name).level == logging.WARNING, (
|
||||
f"{name} logger must be pinned to WARNING, got "
|
||||
f"{logging.getLevelName(logging.getLogger(name).level)}"
|
||||
)
|
||||
|
||||
# Behavioral check: an INFO from httpx must NOT reach stdout.
|
||||
logging.getLogger("httpx").info("HTTP Request: GET https://example.com 200 OK")
|
||||
plain = _strip_ansi(capsys.readouterr().out)
|
||||
assert "HTTP Request" not in plain
|
||||
|
||||
|
||||
def test_configure_logging_routes_stdlib_loggers_through_same_formatter(
|
||||
capsys: pytest.CaptureFixture[str],
|
||||
) -> None:
|
||||
"""stdlib ``logging.getLogger(...)`` output must share the structlog
|
||||
ProcessorFormatter so uvicorn / fastapi / third-party libs render with
|
||||
the same ``[level] event`` shape as everos's own structlog calls.
|
||||
|
||||
This is the user-visible half of the foreign-log-integration setup —
|
||||
without it, uvicorn's default ``LOGGING_CONFIG`` would (a) reinstall
|
||||
its own handlers and (b) print ``INFO:logger.name:message`` lines
|
||||
that look nothing like the structlog ConsoleRenderer output.
|
||||
"""
|
||||
import logging
|
||||
|
||||
configure_logging(level="INFO")
|
||||
third_party = logging.getLogger("uvicorn.access")
|
||||
third_party.info("foreign event")
|
||||
|
||||
plain = _strip_ansi(capsys.readouterr().out)
|
||||
assert "foreign event" in plain
|
||||
# Default stdlib LogRecord prefix must NOT survive.
|
||||
assert "INFO:uvicorn.access" not in plain
|
||||
# ConsoleRenderer marks level in brackets; both structlog and stdlib
|
||||
# paths must produce the same shape.
|
||||
assert "[info" in plain
|
||||
|
||||
|
||||
def test_get_logger_with_same_name_returns_equivalent(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""structlog caches bound loggers per name when cache_logger_on_first_use=True."""
|
||||
configure_logging()
|
||||
a = get_logger("everos.cache.test")
|
||||
b = get_logger("everos.cache.test")
|
||||
# Both should behave equivalently; identity is not guaranteed by structlog
|
||||
# API, but both must satisfy the same protocol surface.
|
||||
assert isinstance(a, structlog.stdlib.BoundLogger | structlog.BoundLoggerBase) or (
|
||||
hasattr(a, "info") and hasattr(b, "info")
|
||||
)
|
||||
0
tests/unit/test_core/test_persistence/__init__.py
Normal file
0
tests/unit/test_core/test_persistence/__init__.py
Normal file
@ -0,0 +1,104 @@
|
||||
"""Unit tests for the LanceDB async connection factory."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.config import LanceDBSettings
|
||||
from everos.core.persistence import MemoryRoot, open_lancedb_connection
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_root(tmp_path: Path) -> MemoryRoot:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
return mr
|
||||
|
||||
|
||||
async def test_connect_creates_lancedb_dir(memory_root: MemoryRoot) -> None:
|
||||
settings = LanceDBSettings()
|
||||
# Remove the auto-created dir to verify the factory recreates it.
|
||||
if memory_root.lancedb_dir.exists():
|
||||
memory_root.lancedb_dir.rmdir()
|
||||
assert not memory_root.lancedb_dir.exists()
|
||||
|
||||
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
|
||||
try:
|
||||
assert memory_root.lancedb_dir.is_dir()
|
||||
assert conn.is_open()
|
||||
finally:
|
||||
conn.close() # AsyncConnection.close() is sync
|
||||
|
||||
|
||||
async def test_empty_connection_lists_no_tables(memory_root: MemoryRoot) -> None:
|
||||
settings = LanceDBSettings()
|
||||
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
|
||||
try:
|
||||
# list_tables() returns ListTablesResponse(tables, page_token).
|
||||
result = await conn.list_tables()
|
||||
assert list(result.tables) == []
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def test_read_consistency_seconds_translated_to_timedelta(
|
||||
memory_root: MemoryRoot,
|
||||
) -> None:
|
||||
"""Non-None read_consistency_seconds must be passed as a timedelta."""
|
||||
settings = LanceDBSettings(read_consistency_seconds=5.0)
|
||||
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
|
||||
try:
|
||||
# The interval echoed back from the connection should equal what we set.
|
||||
# AsyncConnection.get_read_consistency_interval is async.
|
||||
import datetime as dt
|
||||
|
||||
interval = await conn.get_read_consistency_interval()
|
||||
assert interval == dt.timedelta(seconds=5.0)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def test_default_consistency_is_none(memory_root: MemoryRoot) -> None:
|
||||
settings = LanceDBSettings()
|
||||
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
|
||||
try:
|
||||
interval = await conn.get_read_consistency_interval()
|
||||
assert interval is None
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def test_index_cache_cap_is_plumbed_into_session(
|
||||
memory_root: MemoryRoot, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""A capped ``Session`` must reach ``lancedb.connect_async``.
|
||||
|
||||
The connection factory's whole purpose for installing a Session is
|
||||
to bound the index reader cache so FDs do not leak. We spy on the
|
||||
underlying ``connect_async`` and assert a Session is passed —
|
||||
Session objects don't expose the configured cap back as a property,
|
||||
so verifying that a Session is wired through is the closest unit-
|
||||
level check we can make. The behavioural side (LRU eviction →
|
||||
FD release under load) is covered by the fd-probe scripts kept
|
||||
outside the test suite.
|
||||
"""
|
||||
import lancedb
|
||||
|
||||
settings = LanceDBSettings(index_cache_size_bytes=1024)
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
real_connect = lancedb.connect_async
|
||||
|
||||
async def spy(*args, **kwargs): # type: ignore[no-untyped-def]
|
||||
captured["session"] = kwargs.get("session")
|
||||
return await real_connect(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(lancedb, "connect_async", spy)
|
||||
|
||||
conn = await open_lancedb_connection(memory_root.lancedb_dir, settings)
|
||||
try:
|
||||
assert isinstance(captured.get("session"), lancedb.Session)
|
||||
finally:
|
||||
conn.close()
|
||||
@ -0,0 +1,175 @@
|
||||
"""FTS-layer normalisation contract tests.
|
||||
|
||||
``BaseLanceTable.ensure_fts_indexes`` builds the LanceDB FTS index with
|
||||
the following configuration::
|
||||
|
||||
base_tokenizer="whitespace"
|
||||
lower_case=True
|
||||
stem=True
|
||||
remove_stop_words=True
|
||||
ascii_folding=True
|
||||
language="English" (tantivy default)
|
||||
|
||||
The app-layer ``JiebaTokenizer`` already handles segmentation +
|
||||
stopword filtering, so these FTS-layer settings act as a *belt-and-
|
||||
braces* layer of normalisation. These tests probe the FTS layer
|
||||
*directly* (bypassing jieba) to verify each setting actually behaves
|
||||
as the docstring claims:
|
||||
|
||||
- lower_case=True → query case-insensitive against the raw-cased text
|
||||
- stem=True → query for the word root hits inflected forms
|
||||
- remove_stop_words=False → FTS layer does NOT drop stop-words; the
|
||||
app-layer JiebaTokenizer is the single source of truth for
|
||||
stop-word filtering (English + Chinese)
|
||||
- ascii_folding=True → diacritics on Latin chars normalised (café → cafe)
|
||||
- CJK pass-through → no stemming applied to CJK
|
||||
|
||||
Tests build a fresh in-memory-ish LanceDB store under ``tmp_path``,
|
||||
declare a minimal schema with one ``body`` column, and inspect query
|
||||
hits against handcrafted rows.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
from pathlib import Path
|
||||
from typing import ClassVar
|
||||
|
||||
import lancedb
|
||||
import pytest
|
||||
from lancedb import AsyncTable
|
||||
|
||||
from everos.core.persistence.lancedb import BaseLanceTable
|
||||
|
||||
|
||||
class _FtsSpec(BaseLanceTable):
|
||||
"""Minimal schema with one BM25-indexed column for FTS-layer probes."""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = "fts_probe"
|
||||
BM25_FIELDS: ClassVar[list[str]] = ["body"]
|
||||
|
||||
id: str
|
||||
body: str
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def fts_table(tmp_path: Path) -> AsyncIterator[AsyncTable]:
|
||||
"""Build a fresh tmp LanceDB store + ``_FtsSpec`` table; index gets
|
||||
built on first ``ensure_fts_indexes`` call by each test (FTS index
|
||||
requires data first to materialise sensibly).
|
||||
"""
|
||||
conn = await lancedb.connect_async(str(tmp_path / "lancedb"))
|
||||
table = await conn.create_table(_FtsSpec.TABLE_NAME, schema=_FtsSpec)
|
||||
yield table
|
||||
|
||||
|
||||
async def _seed_and_index(table: AsyncTable, rows: list[dict]) -> None:
|
||||
"""Insert rows, then (re)build the FTS index over the full table."""
|
||||
await table.add([_FtsSpec(**r) for r in rows])
|
||||
await _FtsSpec.ensure_fts_indexes(table)
|
||||
|
||||
|
||||
async def _query_ids(table: AsyncTable, text: str) -> set[str]:
|
||||
"""Run a BM25 keyword query over the ``body`` column, return matched ids."""
|
||||
rows = await table.query().nearest_to_text(text, columns="body").limit(10).to_list()
|
||||
return {r["id"] for r in rows}
|
||||
|
||||
|
||||
# ── lower_case=True ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_lower_case_query_matches_uppercase_index(
|
||||
fts_table: AsyncTable,
|
||||
) -> None:
|
||||
"""Document indexed as ``HELLO`` is found by query ``hello``."""
|
||||
await _seed_and_index(
|
||||
fts_table,
|
||||
[
|
||||
{"id": "1", "body": "HELLO world"},
|
||||
{"id": "2", "body": "GOODBYE world"},
|
||||
],
|
||||
)
|
||||
hits = await _query_ids(fts_table, "hello")
|
||||
assert hits == {"1"}
|
||||
|
||||
|
||||
# ── stem=True ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_stem_query_root_matches_inflected_forms(
|
||||
fts_table: AsyncTable,
|
||||
) -> None:
|
||||
"""Query ``counsel`` hits documents containing ``counseling`` / ``counseled``."""
|
||||
await _seed_and_index(
|
||||
fts_table,
|
||||
[
|
||||
{"id": "1", "body": "counseling session happened"},
|
||||
{"id": "2", "body": "counseled patient yesterday"},
|
||||
{"id": "3", "body": "unrelated content"},
|
||||
],
|
||||
)
|
||||
hits = await _query_ids(fts_table, "counsel")
|
||||
assert hits == {"1", "2"}
|
||||
|
||||
|
||||
# ── remove_stop_words=False (app layer owns stop-words) ────────────────
|
||||
|
||||
|
||||
async def test_fts_layer_does_not_filter_stopwords(
|
||||
fts_table: AsyncTable,
|
||||
) -> None:
|
||||
"""FTS layer is configured ``remove_stop_words=False`` — app layer owns it.
|
||||
|
||||
The FTS index does NOT strip English stop-words. A query ``the``
|
||||
reaches BM25 unfiltered and hits a document that contains it.
|
||||
In production, :class:`JiebaTokenizer` removes ``the`` before
|
||||
tokens reach this layer; this test bypasses jieba to probe the
|
||||
FTS layer's behaviour in isolation.
|
||||
"""
|
||||
await _seed_and_index(
|
||||
fts_table,
|
||||
[
|
||||
{"id": "1", "body": "the cat sat on the mat"},
|
||||
{"id": "2", "body": "unrelated body text"},
|
||||
],
|
||||
)
|
||||
hits = await _query_ids(fts_table, "the")
|
||||
assert hits == {"1"}
|
||||
|
||||
|
||||
# ── ascii_folding=True ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_ascii_folding_strips_diacritics(fts_table: AsyncTable) -> None:
|
||||
"""``café`` is indexed/queried as ``cafe`` once diacritics are folded."""
|
||||
await _seed_and_index(
|
||||
fts_table,
|
||||
[
|
||||
{"id": "1", "body": "café latte"},
|
||||
{"id": "2", "body": "tea house"},
|
||||
],
|
||||
)
|
||||
hits = await _query_ids(fts_table, "cafe")
|
||||
assert hits == {"1"}
|
||||
|
||||
|
||||
# ── CJK pass-through ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_cjk_terms_pass_through_untouched(fts_table: AsyncTable) -> None:
|
||||
"""CJK tokens are not stemmed or stop-word-filtered (English-only rules).
|
||||
|
||||
Note: ``base_tokenizer="whitespace"`` means CJK substrings are split
|
||||
only on whitespace. The app-layer tokenizer (``JiebaTokenizer``)
|
||||
normally inserts spaces between CJK words before they reach this
|
||||
layer; here we simulate that by pre-spacing the body text.
|
||||
"""
|
||||
await _seed_and_index(
|
||||
fts_table,
|
||||
[
|
||||
{"id": "1", "body": "北京 天安门"},
|
||||
{"id": "2", "body": "上海 外滩"},
|
||||
],
|
||||
)
|
||||
hits = await _query_ids(fts_table, "北京")
|
||||
assert hits == {"1"}
|
||||
@ -0,0 +1,649 @@
|
||||
"""Tests for :class:`LanceRepoBase` + :class:`LanceDailyLogRepoBase`.
|
||||
|
||||
Exercises the chassis-level query helpers shared by every business
|
||||
LanceDB repo: ``find_where`` / ``find_one_where`` / ``find_by_owner`` /
|
||||
``find_by_md_path`` (on :class:`LanceRepoBase`), and the daily-log
|
||||
slice ``find_by_owner_entry`` / ``find_by_session`` /
|
||||
``find_by_parent`` (on :class:`LanceDailyLogRepoBase`). Also covers
|
||||
``get_by_id`` + ``upsert`` so the chassis CRUD surface is end-to-end
|
||||
verified.
|
||||
|
||||
Uses a tmp LanceDB connection + a locally-defined daily-log-shaped
|
||||
table so the chassis can be exercised without depending on any
|
||||
specific business schema (episode / atomic_fact / …).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from typing import ClassVar
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.config import LanceDBSettings
|
||||
from everos.core.persistence import (
|
||||
BaseLanceTable,
|
||||
MemoryRoot,
|
||||
Vector,
|
||||
open_lancedb_connection,
|
||||
)
|
||||
from everos.core.persistence.lancedb import (
|
||||
LanceDailyLogRepoBase,
|
||||
LanceRepoBase,
|
||||
)
|
||||
|
||||
|
||||
class _Note(BaseLanceTable):
|
||||
"""Minimal daily-log-shaped table for chassis tests."""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = "_note"
|
||||
|
||||
id: str
|
||||
owner_id: str
|
||||
app_id: str = "default"
|
||||
project_id: str = "default"
|
||||
entry_id: str
|
||||
session_id: str
|
||||
parent_type: str
|
||||
parent_id: str
|
||||
md_path: str
|
||||
text: str
|
||||
vector: Vector(4) # type: ignore[valid-type]
|
||||
|
||||
|
||||
class _SearchNote(BaseLanceTable):
|
||||
"""Schema with BM25_FIELDS declared — exercises FTS index setup."""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = "_search_note"
|
||||
BM25_FIELDS: ClassVar[list[str]] = ["tokens"]
|
||||
|
||||
id: str
|
||||
text: str
|
||||
"""Original surface form (display)."""
|
||||
|
||||
tokens: str
|
||||
"""Space-joined pre-tokenised text (BM25 index target)."""
|
||||
|
||||
vector: Vector(4) # type: ignore[valid-type]
|
||||
|
||||
|
||||
class _NoteRepo(LanceDailyLogRepoBase[_Note]):
|
||||
schema = _Note
|
||||
|
||||
|
||||
def _row(
|
||||
*,
|
||||
owner: str,
|
||||
entry: str,
|
||||
session: str = "sess_a",
|
||||
parent_type: str = "memcell",
|
||||
parent_id: str = "mc_1",
|
||||
md_path: str | None = None,
|
||||
text: str = "x",
|
||||
) -> _Note:
|
||||
return _Note(
|
||||
id=f"{owner}_{entry}",
|
||||
owner_id=owner,
|
||||
entry_id=entry,
|
||||
session_id=session,
|
||||
parent_type=parent_type,
|
||||
parent_id=parent_id,
|
||||
md_path=md_path or f"users/{owner}/notes/{entry}.md",
|
||||
text=text,
|
||||
vector=[1.0, 0.0, 0.0, 0.0],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_write_locks() -> None:
|
||||
"""Drop the per-table write-lock pool between tests.
|
||||
|
||||
``LanceRepoBase`` lazily creates an ``asyncio.Lock`` per table name
|
||||
and stashes it in a class-level dict; without a reset the lock
|
||||
object outlives the pytest-asyncio function-scoped event loop and
|
||||
the next test fails with "bound to a different event loop".
|
||||
"""
|
||||
LanceRepoBase._reset_locks_for_tests()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def repo(tmp_path: Path) -> _NoteRepo:
|
||||
"""Open a tmp connection, create the ``_note`` table, return a repo."""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
conn = await open_lancedb_connection(mr.lancedb_dir, LanceDBSettings())
|
||||
table = await conn.create_table("_note", schema=_Note)
|
||||
return _NoteRepo(table=table)
|
||||
|
||||
|
||||
# ── add + get_by_id + count ──────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_add_and_count(repo: _NoteRepo) -> None:
|
||||
await repo.add([_row(owner="u1", entry="ep_1"), _row(owner="u1", entry="ep_2")])
|
||||
assert await repo.count() == 2
|
||||
|
||||
|
||||
async def test_get_by_id_returns_typed_instance(repo: _NoteRepo) -> None:
|
||||
await repo.add([_row(owner="u1", entry="ep_1", text="hello")])
|
||||
got = await repo.get_by_id("u1_ep_1")
|
||||
assert got is not None
|
||||
assert isinstance(got, _Note)
|
||||
assert got.text == "hello"
|
||||
|
||||
|
||||
async def test_get_by_id_returns_none_when_missing(repo: _NoteRepo) -> None:
|
||||
assert await repo.get_by_id("ghost") is None
|
||||
|
||||
|
||||
# ── upsert ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_upsert_inserts_on_new(repo: _NoteRepo) -> None:
|
||||
await repo.upsert([_row(owner="u1", entry="ep_1", text="v1")])
|
||||
got = await repo.get_by_id("u1_ep_1")
|
||||
assert got is not None
|
||||
assert got.text == "v1"
|
||||
|
||||
|
||||
async def test_upsert_updates_on_existing(repo: _NoteRepo) -> None:
|
||||
await repo.add([_row(owner="u1", entry="ep_1", text="v1")])
|
||||
await repo.upsert([_row(owner="u1", entry="ep_1", text="v2")])
|
||||
got = await repo.get_by_id("u1_ep_1")
|
||||
assert got is not None
|
||||
assert got.text == "v2"
|
||||
assert await repo.count() == 1 # update, not append
|
||||
|
||||
|
||||
# ── find_where / find_one_where ─────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_find_where_returns_typed_list(repo: _NoteRepo) -> None:
|
||||
await repo.add(
|
||||
[
|
||||
_row(owner="u1", entry="ep_1"),
|
||||
_row(owner="u1", entry="ep_2"),
|
||||
_row(owner="u2", entry="ep_3"),
|
||||
]
|
||||
)
|
||||
rows = await repo.find_where("owner_id = 'u1'")
|
||||
assert len(rows) == 2
|
||||
assert all(isinstance(r, _Note) for r in rows)
|
||||
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
|
||||
|
||||
|
||||
async def test_find_one_where_returns_first_match(repo: _NoteRepo) -> None:
|
||||
await repo.add([_row(owner="u1", entry="ep_1")])
|
||||
got = await repo.find_one_where("entry_id = 'ep_1'")
|
||||
assert got is not None
|
||||
assert got.entry_id == "ep_1"
|
||||
|
||||
|
||||
async def test_find_one_where_returns_none(repo: _NoteRepo) -> None:
|
||||
assert await repo.find_one_where("entry_id = 'ghost'") is None
|
||||
|
||||
|
||||
# ── find_where_paginated ────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_find_where_paginated_first_page(repo: _NoteRepo) -> None:
|
||||
"""5 rows, page=1 size=2 → 2 rows, total=5, sorted DESC by entry_id."""
|
||||
await repo.add(
|
||||
[_row(owner="u1", entry=f"ep_{i}") for i in range(1, 6)],
|
||||
)
|
||||
rows, total = await repo.find_where_paginated(
|
||||
"owner_id = 'u1'",
|
||||
sort_by="entry_id",
|
||||
descending=True,
|
||||
page=1,
|
||||
page_size=2,
|
||||
)
|
||||
assert total == 5
|
||||
assert [r.entry_id for r in rows] == ["ep_5", "ep_4"]
|
||||
|
||||
|
||||
async def test_find_where_paginated_last_page_partial(repo: _NoteRepo) -> None:
|
||||
"""5 rows, page=3 size=2 → 1 row (the tail)."""
|
||||
await repo.add(
|
||||
[_row(owner="u1", entry=f"ep_{i}") for i in range(1, 6)],
|
||||
)
|
||||
rows, total = await repo.find_where_paginated(
|
||||
"owner_id = 'u1'",
|
||||
sort_by="entry_id",
|
||||
descending=True,
|
||||
page=3,
|
||||
page_size=2,
|
||||
)
|
||||
assert total == 5
|
||||
assert [r.entry_id for r in rows] == ["ep_1"]
|
||||
|
||||
|
||||
async def test_find_where_paginated_ascending_sort(repo: _NoteRepo) -> None:
|
||||
"""``descending=False`` flips order."""
|
||||
await repo.add(
|
||||
[_row(owner="u1", entry=f"ep_{i}") for i in range(1, 4)],
|
||||
)
|
||||
rows, total = await repo.find_where_paginated(
|
||||
"owner_id = 'u1'",
|
||||
sort_by="entry_id",
|
||||
descending=False,
|
||||
page=1,
|
||||
page_size=10,
|
||||
)
|
||||
assert total == 3
|
||||
assert [r.entry_id for r in rows] == ["ep_1", "ep_2", "ep_3"]
|
||||
|
||||
|
||||
async def test_find_where_paginated_empty_predicate(repo: _NoteRepo) -> None:
|
||||
"""Predicate that matches nothing → empty list + total=0."""
|
||||
rows, total = await repo.find_where_paginated(
|
||||
"owner_id = 'ghost'",
|
||||
sort_by="entry_id",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
assert rows == []
|
||||
assert total == 0
|
||||
|
||||
|
||||
async def test_find_where_paginated_filters_by_owner(repo: _NoteRepo) -> None:
|
||||
"""Total is the predicate's true count, not the table's row count."""
|
||||
await repo.add(
|
||||
[
|
||||
_row(owner="u1", entry="ep_1"),
|
||||
_row(owner="u1", entry="ep_2"),
|
||||
_row(owner="u2", entry="ep_3"),
|
||||
]
|
||||
)
|
||||
rows, total = await repo.find_where_paginated(
|
||||
"owner_id = 'u1'",
|
||||
sort_by="entry_id",
|
||||
page=1,
|
||||
page_size=10,
|
||||
)
|
||||
assert total == 2
|
||||
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
|
||||
|
||||
|
||||
async def test_find_where_paginated_truncates_above_max_fetch(
|
||||
repo: _NoteRepo,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""When total > max_fetch the chassis warns and returns a prefix sort.
|
||||
|
||||
Correctness contract: ``total`` is still the *true* row count from
|
||||
``count_rows(filter=...)``, but the page contents are taken from
|
||||
only the first ``max_fetch`` rows the engine scanned. structlog now
|
||||
routes through stdlib's root logger (see
|
||||
``core/observability/logging/factory.py``), so the standard
|
||||
``caplog`` fixture is the right way to assert on the warning.
|
||||
"""
|
||||
# Unit tests don't go through the CLI entry, so the structlog →
|
||||
# stdlib bridge is uninitialised — wire it up here so ``caplog``
|
||||
# can observe the warning.
|
||||
from everos.core.observability.logging import configure_logging
|
||||
|
||||
configure_logging(level="WARNING")
|
||||
|
||||
await repo.add(
|
||||
[_row(owner="u1", entry=f"ep_{i:03d}") for i in range(1, 11)],
|
||||
)
|
||||
with caplog.at_level("WARNING"):
|
||||
rows, total = await repo.find_where_paginated(
|
||||
"owner_id = 'u1'",
|
||||
sort_by="entry_id",
|
||||
page=1,
|
||||
page_size=3,
|
||||
max_fetch=5,
|
||||
)
|
||||
assert total == 10 # true match count
|
||||
assert len(rows) == 3
|
||||
assert "find_where_paginated truncated" in caplog.text
|
||||
|
||||
|
||||
# ── 5-table shared: find_by_owner / find_by_md_path ─────────────────────
|
||||
|
||||
|
||||
async def test_find_by_owner(repo: _NoteRepo) -> None:
|
||||
await repo.add(
|
||||
[
|
||||
_row(owner="u1", entry="ep_1"),
|
||||
_row(owner="u1", entry="ep_2"),
|
||||
_row(owner="u2", entry="ep_3"),
|
||||
]
|
||||
)
|
||||
rows = await repo.find_by_owner("u1")
|
||||
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
|
||||
|
||||
|
||||
async def test_find_by_md_path_round_trip(repo: _NoteRepo) -> None:
|
||||
path = "users/u1/notes/ep_1.md"
|
||||
await repo.add([_row(owner="u1", entry="ep_1", md_path=path)])
|
||||
got = await repo.find_by_md_path(path)
|
||||
assert got is not None
|
||||
assert got.entry_id == "ep_1"
|
||||
|
||||
|
||||
async def test_find_by_md_path_returns_none_when_missing(repo: _NoteRepo) -> None:
|
||||
assert await repo.find_by_md_path("users/u1/notes/ghost.md") is None
|
||||
|
||||
|
||||
# ── daily-log: find_by_owner_entry / find_by_session / find_by_parent ───
|
||||
|
||||
|
||||
async def test_find_by_owner_entry(repo: _NoteRepo) -> None:
|
||||
await repo.add([_row(owner="u1", entry="ep_7")])
|
||||
got = await repo.find_by_owner_entry("u1", "ep_7")
|
||||
assert got is not None
|
||||
assert got.entry_id == "ep_7"
|
||||
|
||||
|
||||
async def test_find_by_owner_entry_returns_none_when_missing(
|
||||
repo: _NoteRepo,
|
||||
) -> None:
|
||||
assert await repo.find_by_owner_entry("u1", "ghost") is None
|
||||
|
||||
|
||||
async def test_find_by_owner_entries_returns_only_matching_rows(
|
||||
repo: _NoteRepo,
|
||||
) -> None:
|
||||
"""Bulk lookup keeps only rows whose ``entry_id`` is in the set."""
|
||||
await repo.add(
|
||||
[
|
||||
_row(owner="u1", entry="ep_1"),
|
||||
_row(owner="u1", entry="ep_2"),
|
||||
_row(owner="u1", entry="ep_3"),
|
||||
_row(owner="u2", entry="ep_1"), # different owner — must not leak
|
||||
]
|
||||
)
|
||||
rows = await repo.find_by_owner_entries("u1", ["ep_1", "ep_3"])
|
||||
assert {r.entry_id for r in rows} == {"ep_1", "ep_3"}
|
||||
assert all(r.owner_id == "u1" for r in rows)
|
||||
|
||||
|
||||
async def test_find_by_owner_entries_empty_input_short_circuits(
|
||||
repo: _NoteRepo,
|
||||
) -> None:
|
||||
"""No ids → ``[]`` without emitting a ``WHERE entry_id IN ()`` predicate."""
|
||||
await repo.add([_row(owner="u1", entry="ep_1")])
|
||||
assert await repo.find_by_owner_entries("u1", []) == []
|
||||
|
||||
|
||||
async def test_find_by_session(repo: _NoteRepo) -> None:
|
||||
await repo.add(
|
||||
[
|
||||
_row(owner="u1", entry="ep_1", session="sess_a"),
|
||||
_row(owner="u1", entry="ep_2", session="sess_a"),
|
||||
_row(owner="u1", entry="ep_3", session="sess_b"),
|
||||
]
|
||||
)
|
||||
rows = await repo.find_by_session("u1", "sess_a")
|
||||
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
|
||||
|
||||
|
||||
async def test_find_by_parent(repo: _NoteRepo) -> None:
|
||||
await repo.add(
|
||||
[
|
||||
_row(owner="u1", entry="ep_1", parent_type="memcell", parent_id="mc_x"),
|
||||
_row(owner="u1", entry="ep_2", parent_type="memcell", parent_id="mc_x"),
|
||||
_row(owner="u1", entry="ep_3", parent_type="other", parent_id="mc_y"),
|
||||
]
|
||||
)
|
||||
rows = await repo.find_by_parent("memcell", "mc_x")
|
||||
assert {r.entry_id for r in rows} == {"ep_1", "ep_2"}
|
||||
|
||||
|
||||
# ── chassis fallback behaviour ──────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_table_lookup_not_implemented_when_no_override() -> None:
|
||||
"""Repo with neither ``table=`` injection nor ``_table_lookup`` raises."""
|
||||
|
||||
class _BareRepo(LanceRepoBase[_Note]):
|
||||
schema = _Note
|
||||
|
||||
bare = _BareRepo()
|
||||
with pytest.raises(NotImplementedError, match="_table_lookup"):
|
||||
await bare.count()
|
||||
|
||||
|
||||
async def test_table_name_derived_from_schema() -> None:
|
||||
"""``repo.table_name`` reads off ``schema.TABLE_NAME`` (single source of truth)."""
|
||||
|
||||
class _R(LanceRepoBase[_Note]):
|
||||
schema = _Note
|
||||
|
||||
assert _R().table_name == "_note" # equals _Note.TABLE_NAME
|
||||
|
||||
|
||||
# ── SQL-quote escape defence ────────────────────────────────────────────
|
||||
|
||||
|
||||
# ── BaseLanceTable.ensure_fts_indexes ───────────────────────────────────
|
||||
|
||||
|
||||
async def test_ensure_fts_indexes_creates_index(tmp_path: Path) -> None:
|
||||
"""Declared ``BM25_FIELDS`` becomes an FTS index after ensure."""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
conn = await open_lancedb_connection(mr.lancedb_dir, LanceDBSettings())
|
||||
table = await conn.create_table("_search_note", schema=_SearchNote)
|
||||
await table.add(
|
||||
[
|
||||
_SearchNote(
|
||||
id="1",
|
||||
text="hello world",
|
||||
tokens="hello world",
|
||||
vector=[1, 0, 0, 0],
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
await _SearchNote.ensure_fts_indexes(table)
|
||||
|
||||
indices = await table.list_indices()
|
||||
indexed_cols = {col for idx in indices for col in (idx.columns or [])}
|
||||
assert "tokens" in indexed_cols
|
||||
conn.close()
|
||||
|
||||
|
||||
async def test_ensure_fts_indexes_is_idempotent(tmp_path: Path) -> None:
|
||||
"""Calling twice is safe — no error, no duplicate index."""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
conn = await open_lancedb_connection(mr.lancedb_dir, LanceDBSettings())
|
||||
table = await conn.create_table("_search_note", schema=_SearchNote)
|
||||
await table.add([_SearchNote(id="1", text="hi", tokens="hi", vector=[1, 0, 0, 0])])
|
||||
|
||||
await _SearchNote.ensure_fts_indexes(table)
|
||||
first = await table.list_indices()
|
||||
await _SearchNote.ensure_fts_indexes(table)
|
||||
second = await table.list_indices()
|
||||
|
||||
assert len(first) == len(second)
|
||||
conn.close()
|
||||
|
||||
|
||||
async def test_ensure_fts_indexes_noop_when_no_fields_declared(
|
||||
repo: _NoteRepo,
|
||||
) -> None:
|
||||
"""Schema without ``BM25_FIELDS`` is a no-op (no error)."""
|
||||
table = await repo._table()
|
||||
# _Note declares no BM25_FIELDS — calling the classmethod is a no-op.
|
||||
await _Note.ensure_fts_indexes(table)
|
||||
indices = await table.list_indices()
|
||||
# No FTS index was created; vector/scalar may exist by default but we
|
||||
# only assert no error path triggered.
|
||||
assert isinstance(indices, list) or hasattr(indices, "__iter__")
|
||||
|
||||
|
||||
# ── SQL-quote escape defence ────────────────────────────────────────────
|
||||
|
||||
|
||||
# ── delete_by_md_path ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_delete_by_md_path_removes_matching_row(repo: _NoteRepo) -> None:
|
||||
"""Cascade md-deleted flow: rows for a path are wiped, count returned."""
|
||||
target = "users/u1/notes/ep_1.md"
|
||||
await repo.add(
|
||||
[
|
||||
_row(owner="u1", entry="ep_1", md_path=target),
|
||||
_row(owner="u1", entry="ep_2"),
|
||||
]
|
||||
)
|
||||
deleted = await repo.delete_by_md_path(target)
|
||||
assert deleted == 1
|
||||
assert await repo.find_by_md_path(target) is None
|
||||
assert await repo.count() == 1 # the other row survived
|
||||
|
||||
|
||||
async def test_delete_by_md_path_returns_zero_when_no_match(
|
||||
repo: _NoteRepo,
|
||||
) -> None:
|
||||
await repo.add([_row(owner="u1", entry="ep_1")])
|
||||
assert await repo.delete_by_md_path("users/u1/notes/ghost.md") == 0
|
||||
assert await repo.count() == 1
|
||||
|
||||
|
||||
async def test_delete_by_md_path_removes_multiple_entries_one_file(
|
||||
repo: _NoteRepo,
|
||||
) -> None:
|
||||
"""A daily-log md holds many entries → all rows for the path go."""
|
||||
shared = "users/u1/notes/episode-2026-05-12.md"
|
||||
await repo.add(
|
||||
[
|
||||
_row(owner="u1", entry="ep_1", md_path=shared),
|
||||
_row(owner="u1", entry="ep_2", md_path=shared),
|
||||
_row(owner="u1", entry="ep_3", md_path=shared),
|
||||
_row(owner="u2", entry="ep_4"), # different path, untouched
|
||||
]
|
||||
)
|
||||
deleted = await repo.delete_by_md_path(shared)
|
||||
assert deleted == 3
|
||||
assert await repo.count() == 1
|
||||
|
||||
|
||||
async def test_delete_by_md_path_escapes_single_quotes(
|
||||
repo: _NoteRepo,
|
||||
) -> None:
|
||||
"""A path containing a single quote does not break the predicate."""
|
||||
tricky = "users/u1/notes/it's.md"
|
||||
await repo.add([_row(owner="u1", entry="ep_1", md_path=tricky)])
|
||||
assert await repo.delete_by_md_path(tricky) == 1
|
||||
|
||||
|
||||
# ── SQL-quote escape defence (kept) ─────────────────────────────────────
|
||||
|
||||
|
||||
async def test_get_by_id_escapes_single_quotes(repo: _NoteRepo) -> None:
|
||||
"""An id containing a single quote does not break the predicate."""
|
||||
quoted_id = "u1_it's_fine"
|
||||
await repo.add(
|
||||
[
|
||||
_Note(
|
||||
id=quoted_id,
|
||||
owner_id="u1",
|
||||
entry_id="it's_fine",
|
||||
session_id="s",
|
||||
parent_type="memcell",
|
||||
parent_id="mc_1",
|
||||
md_path="x",
|
||||
text="t",
|
||||
vector=[1.0, 0.0, 0.0, 0.0],
|
||||
)
|
||||
]
|
||||
)
|
||||
got = await repo.get_by_id(quoted_id)
|
||||
assert got is not None
|
||||
assert got.entry_id == "it's_fine"
|
||||
|
||||
|
||||
# ── Concurrency: per-table write lock ───────────────────────────────────
|
||||
|
||||
|
||||
async def test_concurrent_upsert_disjoint_ids_no_lost_update(
|
||||
repo: _NoteRepo,
|
||||
) -> None:
|
||||
"""Regression for Bug B: cascade ``asyncio.gather`` over rows of the
|
||||
same kind would race on ``merge_insert`` and drop a write (observed
|
||||
on ``user_profile`` — pk = owner_id, two disjoint INSERTs ending up
|
||||
with only one row in LanceDB). The per-table ``asyncio.Lock`` in
|
||||
:meth:`LanceRepoBase.upsert` must serialise those writes so every
|
||||
submitted row lands.
|
||||
"""
|
||||
n = 16
|
||||
rows = [_row(owner=f"u_{i}", entry=f"ep_{i}") for i in range(n)]
|
||||
await asyncio.gather(*(repo.upsert([r]) for r in rows))
|
||||
assert await repo.count() == n
|
||||
for i in range(n):
|
||||
got = await repo.get_by_id(f"u_{i}_ep_{i}")
|
||||
assert got is not None, f"u_{i}_ep_{i} disappeared after concurrent upsert"
|
||||
|
||||
|
||||
async def test_concurrent_upsert_same_id_last_writer_wins(
|
||||
repo: _NoteRepo,
|
||||
) -> None:
|
||||
"""Concurrent upserts on the *same* pk must converge: exactly one row,
|
||||
one of the texts wins. The lock makes the outcome deterministic per
|
||||
schedule (no torn state, no duplicate row)."""
|
||||
row_a = _row(owner="u1", entry="ep_1", text="A")
|
||||
row_b = _row(owner="u1", entry="ep_1", text="B")
|
||||
await asyncio.gather(repo.upsert([row_a]), repo.upsert([row_b]))
|
||||
assert await repo.count() == 1
|
||||
got = await repo.get_by_id("u1_ep_1")
|
||||
assert got is not None
|
||||
assert got.text in {"A", "B"}
|
||||
|
||||
|
||||
async def test_read_not_blocked_by_write_lock(repo: _NoteRepo) -> None:
|
||||
"""Search / count must remain available while a write lock is held —
|
||||
only write paths take the lock. Acquires the table lock manually,
|
||||
then verifies a read still resolves."""
|
||||
await repo.add([_row(owner="u1", entry="ep_1", text="seed")])
|
||||
lock = repo._write_lock(repo.table_name)
|
||||
async with lock:
|
||||
# Whilst the lock is held, reads should not block.
|
||||
got = await asyncio.wait_for(repo.get_by_id("u1_ep_1"), timeout=2.0)
|
||||
assert got is not None
|
||||
assert got.text == "seed"
|
||||
|
||||
|
||||
async def test_write_lock_is_per_table(tmp_path: Path) -> None:
|
||||
"""Distinct tables share no lock — writes on table A do not stall
|
||||
writes on table B."""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
conn = await open_lancedb_connection(mr.lancedb_dir, LanceDBSettings())
|
||||
|
||||
class _OtherNote(BaseLanceTable):
|
||||
TABLE_NAME: ClassVar[str] = "_other_note"
|
||||
id: str
|
||||
owner_id: str
|
||||
entry_id: str
|
||||
session_id: str
|
||||
parent_type: str
|
||||
parent_id: str
|
||||
md_path: str
|
||||
text: str
|
||||
vector: Vector(4) # type: ignore[valid-type]
|
||||
|
||||
class _OtherRepo(LanceDailyLogRepoBase[_OtherNote]):
|
||||
schema = _OtherNote
|
||||
|
||||
table_a = await conn.create_table("_note_a", schema=_Note)
|
||||
table_b = await conn.create_table(_OtherNote.TABLE_NAME, schema=_OtherNote)
|
||||
|
||||
class _NoteARepo(LanceDailyLogRepoBase[_Note]):
|
||||
schema = _Note
|
||||
|
||||
@property
|
||||
def table_name(self) -> str:
|
||||
return "_note_a"
|
||||
|
||||
repo_a = _NoteARepo(table=table_a)
|
||||
repo_b = _OtherRepo(table=table_b)
|
||||
assert repo_a._write_lock(repo_a.table_name) is not repo_b._write_lock(
|
||||
repo_b.table_name
|
||||
)
|
||||
@ -0,0 +1,82 @@
|
||||
"""LanceDB IO toolkit — typical workflow demo.
|
||||
|
||||
End-to-end story for how to author + use a LanceDB-backed table in everos:
|
||||
|
||||
1. Define a table schema by subclassing :class:`BaseLanceTable` and
|
||||
declaring a ``Vector(N)`` column for the embedding.
|
||||
2. ``open_lancedb_connection`` to get an :class:`AsyncConnection`.
|
||||
3. ``conn.create_table(name, schema=Cls)`` to create the table from
|
||||
the Pydantic schema.
|
||||
4. ``table.add(rows)`` to insert.
|
||||
5. ``table.query().nearest_to(vec).limit(k).to_list()`` for vector
|
||||
search (BM25 + scalar filter can chain in the same query).
|
||||
6. ``table.count_rows()`` for size.
|
||||
7. Mutate via :func:`touch` + :meth:`AsyncTable.update` (LanceDB has
|
||||
no SQL ``onupdate`` equivalent — the app must bump ``updated_at``).
|
||||
8. ``table.delete(predicate)`` to remove rows.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from everos.config import LanceDBSettings
|
||||
from everos.core.persistence import (
|
||||
BaseLanceTable,
|
||||
MemoryRoot,
|
||||
Vector,
|
||||
open_lancedb_connection,
|
||||
)
|
||||
|
||||
|
||||
class _DemoNote(BaseLanceTable):
|
||||
"""Demo table — used only by this test module."""
|
||||
|
||||
text: str
|
||||
vector: Vector(4) # 4-dim for the test fixture
|
||||
|
||||
|
||||
async def test_lancedb_typical_workflow(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
settings = LanceDBSettings()
|
||||
|
||||
# 1. Open async connection rooted at <memory_root>/.index/lancedb/
|
||||
conn = await open_lancedb_connection(mr.lancedb_dir, settings)
|
||||
|
||||
# 2. Create the table from the BaseLanceTable schema
|
||||
table = await conn.create_table("_demo_notes", schema=_DemoNote)
|
||||
|
||||
# 3. Insert rows (Pydantic instances; created_at / updated_at filled in
|
||||
# by BaseLanceTable's default_factory).
|
||||
rows = [
|
||||
_DemoNote(text="hello world", vector=[1.0, 0.0, 0.0, 0.0]),
|
||||
_DemoNote(text="goodbye cruel world", vector=[0.0, 1.0, 0.0, 0.0]),
|
||||
_DemoNote(text="welcome aboard", vector=[1.0, 0.5, 0.0, 0.0]),
|
||||
]
|
||||
await table.add(rows)
|
||||
|
||||
# 4. Count
|
||||
assert await table.count_rows() == 3
|
||||
|
||||
# 5. Vector search — nearest_to picks rows by ANN distance.
|
||||
results = await table.query().nearest_to([0.95, 0.05, 0.0, 0.0]).limit(2).to_list()
|
||||
assert len(results) == 2
|
||||
# The closest row to [0.95, 0.05, 0, 0] is "hello world" [1, 0, 0, 0]
|
||||
# ahead of "welcome aboard" [1, 0.5, 0, 0].
|
||||
assert results[0]["text"] == "hello world"
|
||||
|
||||
# 6. Filter (scalar predicate). LanceDB SQL-like predicate string.
|
||||
only_hello = await table.query().where("text = 'hello world'").to_list()
|
||||
assert len(only_hello) == 1
|
||||
assert only_hello[0]["text"] == "hello world"
|
||||
|
||||
# 7. Delete by predicate
|
||||
await table.delete("text = 'goodbye cruel world'")
|
||||
assert await table.count_rows() == 2
|
||||
|
||||
# 8. List tables on the connection
|
||||
tables_response = await conn.list_tables()
|
||||
assert "_demo_notes" in list(tables_response.tables)
|
||||
|
||||
conn.close()
|
||||
96
tests/unit/test_core/test_persistence/test_locking.py
Normal file
96
tests/unit/test_core/test_persistence/test_locking.py
Normal file
@ -0,0 +1,96 @@
|
||||
"""Unit tests for memory_root_lock async context manager."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import multiprocessing
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import anyio
|
||||
import pytest
|
||||
|
||||
from everos.core.persistence import LockError, MemoryRoot, memory_root_lock
|
||||
|
||||
|
||||
async def test_lock_creates_anchor_file(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
async with memory_root_lock(mr):
|
||||
assert mr.lock_file.exists()
|
||||
|
||||
|
||||
async def test_lock_acquire_release_acquire(tmp_path: Path) -> None:
|
||||
"""Same process can re-acquire after release (no leftover state)."""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
async with memory_root_lock(mr):
|
||||
pass
|
||||
async with memory_root_lock(mr):
|
||||
pass
|
||||
|
||||
|
||||
def _hold_lock(memory_root_path: str, ready: object, release: object) -> None:
|
||||
"""Subprocess helper: acquire blocking lock, signal, wait, release.
|
||||
|
||||
The subprocess runs its own event loop via :func:`anyio.run` since
|
||||
:func:`memory_root_lock` is now async.
|
||||
"""
|
||||
|
||||
async def _run() -> None:
|
||||
mr = MemoryRoot(memory_root_path)
|
||||
async with memory_root_lock(mr, blocking=True):
|
||||
ready.set()
|
||||
# Use a thread-offloaded wait so we don't block the event loop.
|
||||
await anyio.to_thread.run_sync(release.wait, 5)
|
||||
|
||||
anyio.run(_run)
|
||||
|
||||
|
||||
async def test_nonblocking_raises_when_held_by_other_process(tmp_path: Path) -> None:
|
||||
"""Different process holding the lock → blocking=False raises LockError."""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
ctx = multiprocessing.get_context("spawn")
|
||||
ready = ctx.Event()
|
||||
release = ctx.Event()
|
||||
proc = ctx.Process(target=_hold_lock, args=(str(mr.root), ready, release))
|
||||
proc.start()
|
||||
try:
|
||||
assert ready.wait(timeout=5), "subprocess failed to acquire lock"
|
||||
with pytest.raises(LockError):
|
||||
async with memory_root_lock(mr, blocking=False):
|
||||
pass
|
||||
finally:
|
||||
release.set()
|
||||
proc.join(timeout=5)
|
||||
if proc.is_alive():
|
||||
proc.terminate()
|
||||
|
||||
|
||||
async def test_blocking_waits_for_release(tmp_path: Path) -> None:
|
||||
"""Different process holding lock + main process blocking=True waits."""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
ctx = multiprocessing.get_context("spawn")
|
||||
ready = ctx.Event()
|
||||
release = ctx.Event()
|
||||
proc = ctx.Process(target=_hold_lock, args=(str(mr.root), ready, release))
|
||||
proc.start()
|
||||
try:
|
||||
assert ready.wait(timeout=5)
|
||||
# Schedule the subprocess to release shortly; main process should
|
||||
# acquire the lock after that.
|
||||
release_started = time.monotonic()
|
||||
|
||||
def release_after_short_delay() -> None:
|
||||
time.sleep(0.2)
|
||||
release.set()
|
||||
|
||||
import threading
|
||||
|
||||
threading.Thread(target=release_after_short_delay, daemon=True).start()
|
||||
async with memory_root_lock(mr, blocking=True):
|
||||
elapsed = time.monotonic() - release_started
|
||||
# Should have waited at least roughly the delay.
|
||||
assert elapsed >= 0.1
|
||||
finally:
|
||||
release.set()
|
||||
proc.join(timeout=5)
|
||||
if proc.is_alive():
|
||||
proc.terminate()
|
||||
@ -0,0 +1,68 @@
|
||||
"""Tests for Frontmatter base classes (chassis layer)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from everos.core.persistence.markdown import (
|
||||
AgentScopedFrontmatter,
|
||||
BaseFrontmatter,
|
||||
UserScopedFrontmatter,
|
||||
)
|
||||
|
||||
|
||||
def test_base_requires_id_and_type() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
BaseFrontmatter() # type: ignore[call-arg]
|
||||
|
||||
|
||||
def test_base_default_schema_version_is_one() -> None:
|
||||
fm = BaseFrontmatter(id="x", type="t")
|
||||
assert fm.schema_version == 1
|
||||
|
||||
|
||||
def test_base_extra_fields_allowed() -> None:
|
||||
"""L2 / L3 / L4 fields ride along without subclass declaration."""
|
||||
fm = BaseFrontmatter(
|
||||
id="x",
|
||||
type="t",
|
||||
md_sha256="abc", # L2
|
||||
last_indexed_at="2026-04-22T10:00:00Z",
|
||||
custom_user_field="anything", # L4
|
||||
)
|
||||
dumped = fm.model_dump()
|
||||
assert dumped["md_sha256"] == "abc"
|
||||
assert dumped["custom_user_field"] == "anything"
|
||||
|
||||
|
||||
def test_user_scoped_track_default() -> None:
|
||||
fm = UserScopedFrontmatter(id="x", type="t", user_id="u_jason")
|
||||
assert fm.track == "user"
|
||||
|
||||
|
||||
def test_user_scoped_requires_user_id() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
UserScopedFrontmatter(id="x", type="t") # type: ignore[call-arg]
|
||||
|
||||
|
||||
def test_agent_scoped_track_default() -> None:
|
||||
fm = AgentScopedFrontmatter(id="x", type="t", agent_id="agent_zhangsan")
|
||||
assert fm.track == "agent"
|
||||
|
||||
|
||||
def test_agent_scoped_requires_agent_id() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
AgentScopedFrontmatter(id="x", type="t") # type: ignore[call-arg]
|
||||
|
||||
|
||||
def test_track_literal_rejects_invalid_value() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
UserScopedFrontmatter(id="x", type="t", user_id="u", track="agent")
|
||||
|
||||
|
||||
def test_scope_dir_classvars() -> None:
|
||||
"""Scope mixins declare the top-level memory-root subdirectory."""
|
||||
assert BaseFrontmatter.SCOPE_DIR == "" # scope-agnostic by default
|
||||
assert UserScopedFrontmatter.SCOPE_DIR == "users"
|
||||
assert AgentScopedFrontmatter.SCOPE_DIR == "agents"
|
||||
@ -0,0 +1,94 @@
|
||||
"""Unit tests for entry marker parsing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from everos.core.persistence import find_entry, split_entries
|
||||
|
||||
|
||||
def test_split_no_entries() -> None:
|
||||
assert split_entries("# heading\n\nbody.") == []
|
||||
|
||||
|
||||
def test_split_single_entry() -> None:
|
||||
body = (
|
||||
"preamble\n"
|
||||
"<!-- entry:abc123 -->\n"
|
||||
"content here\n"
|
||||
"<!-- /entry:abc123 -->\n"
|
||||
"trailing\n"
|
||||
)
|
||||
entries = split_entries(body)
|
||||
assert len(entries) == 1
|
||||
e = entries[0]
|
||||
assert e.id == "abc123"
|
||||
assert e.body == "content here"
|
||||
# offsets should bracket the markers
|
||||
assert body[e.start : e.end].startswith("<!-- entry:abc123 -->")
|
||||
assert body[e.start : e.end].endswith("<!-- /entry:abc123 -->")
|
||||
|
||||
|
||||
def test_split_multiple_entries() -> None:
|
||||
body = (
|
||||
"<!-- entry:e1 -->\nfirst\n<!-- /entry:e1 -->\n"
|
||||
"<!-- entry:e2 -->\nsecond\n<!-- /entry:e2 -->\n"
|
||||
)
|
||||
entries = split_entries(body)
|
||||
assert [e.id for e in entries] == ["e1", "e2"]
|
||||
assert entries[0].body == "first"
|
||||
assert entries[1].body == "second"
|
||||
|
||||
|
||||
def test_split_unmatched_open() -> None:
|
||||
"""Open without close → scan stops; preceding entries are still returned."""
|
||||
body = "<!-- entry:e1 -->\nok\n<!-- /entry:e1 -->\n<!-- entry:e2 -->\nno close\n"
|
||||
entries = split_entries(body)
|
||||
assert [e.id for e in entries] == ["e1"]
|
||||
|
||||
|
||||
def test_split_mismatched_id() -> None:
|
||||
"""Open id != close id → no match → scan stops at unterminated open."""
|
||||
body = "<!-- entry:e1 -->\ncontent\n<!-- /entry:other -->\n"
|
||||
entries = split_entries(body)
|
||||
assert entries == []
|
||||
|
||||
|
||||
def test_split_id_with_underscore_and_hyphen() -> None:
|
||||
body = "<!-- entry:abc_def-123 -->\nx\n<!-- /entry:abc_def-123 -->\n"
|
||||
entries = split_entries(body)
|
||||
assert len(entries) == 1
|
||||
assert entries[0].id == "abc_def-123"
|
||||
|
||||
|
||||
def test_split_offsets_consistent() -> None:
|
||||
body = "before\n<!-- entry:e1 -->\nx\n<!-- /entry:e1 -->\nafter\n"
|
||||
e = split_entries(body)[0]
|
||||
assert body[e.start : e.end] == "<!-- entry:e1 -->\nx\n<!-- /entry:e1 -->"
|
||||
|
||||
|
||||
def test_find_entry_found() -> None:
|
||||
body = (
|
||||
"<!-- entry:a -->\nfirst\n<!-- /entry:a -->\n"
|
||||
"<!-- entry:b -->\nsecond\n<!-- /entry:b -->\n"
|
||||
)
|
||||
e = find_entry(body, "b")
|
||||
assert e is not None
|
||||
assert e.id == "b"
|
||||
assert e.body == "second"
|
||||
|
||||
|
||||
def test_find_entry_not_found() -> None:
|
||||
body = "<!-- entry:a -->\nx\n<!-- /entry:a -->\n"
|
||||
assert find_entry(body, "missing") is None
|
||||
|
||||
|
||||
def test_find_entry_open_without_close() -> None:
|
||||
body = "<!-- entry:a -->\nx\n" # no close
|
||||
assert find_entry(body, "a") is None
|
||||
|
||||
|
||||
def test_split_entry_body_no_internal_newline_stripping() -> None:
|
||||
"""Internal blank lines preserved; only the *single* leading/trailing
|
||||
newline introduced by formatter is stripped."""
|
||||
body = "<!-- entry:e1 -->\nline1\n\nline3\n<!-- /entry:e1 -->\n"
|
||||
e = split_entries(body)[0]
|
||||
assert e.body == "line1\n\nline3"
|
||||
@ -0,0 +1,99 @@
|
||||
"""Tests for ``EntryId`` parse / format / next_for."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.core.persistence import EntryId
|
||||
|
||||
# ── format ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_format_pads_seq_to_eight_digits() -> None:
|
||||
eid = EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1)
|
||||
assert eid.format() == "umc_20260422_00000001"
|
||||
|
||||
|
||||
def test_format_pads_seq_at_99999999() -> None:
|
||||
eid = EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=99_999_999)
|
||||
assert eid.format() == "umc_20260422_99999999"
|
||||
|
||||
|
||||
def test_str_uses_format() -> None:
|
||||
eid = EntryId(prefix="ep", date=dt.date(2026, 1, 1), seq=42)
|
||||
assert str(eid) == "ep_20260101_00000042"
|
||||
|
||||
|
||||
# ── parse ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_parse_round_trip() -> None:
|
||||
raw = "umc_20260422_00000001"
|
||||
eid = EntryId.parse(raw)
|
||||
assert eid.prefix == "umc"
|
||||
assert eid.date == dt.date(2026, 4, 22)
|
||||
assert eid.seq == 1
|
||||
assert eid.format() == raw
|
||||
|
||||
|
||||
def test_parse_handles_seq_above_pad_width() -> None:
|
||||
"""Seq above 10**8 still parses; format emits more than 8 digits."""
|
||||
eid = EntryId.parse("umc_20260422_150000000")
|
||||
assert eid.seq == 150_000_000
|
||||
assert eid.format() == "umc_20260422_150000000"
|
||||
|
||||
|
||||
def test_parse_accepts_legacy_four_digit_seq() -> None:
|
||||
"""Pre-bump 4-digit seq strings still parse — format upgrades on round-trip."""
|
||||
eid = EntryId.parse("umc_20260422_0001")
|
||||
assert eid.seq == 1
|
||||
# format() returns the new 8-digit padding.
|
||||
assert eid.format() == "umc_20260422_00000001"
|
||||
|
||||
|
||||
def test_parse_accepts_legacy_three_digit_seq() -> None:
|
||||
"""Older 3-digit seq strings still parse cleanly."""
|
||||
eid = EntryId.parse("umc_20260422_001")
|
||||
assert eid.seq == 1
|
||||
assert eid.format() == "umc_20260422_00000001"
|
||||
|
||||
|
||||
def test_parse_rejects_too_few_segments() -> None:
|
||||
with pytest.raises(ValueError, match="invalid entry id format"):
|
||||
EntryId.parse("umc_20260422")
|
||||
|
||||
|
||||
def test_parse_rejects_invalid_date() -> None:
|
||||
with pytest.raises(ValueError, match="invalid date"):
|
||||
EntryId.parse("umc_2026XX22_00000001")
|
||||
|
||||
|
||||
def test_parse_rejects_non_numeric_seq() -> None:
|
||||
with pytest.raises(ValueError, match="invalid seq"):
|
||||
EntryId.parse("umc_20260422_xxxx")
|
||||
|
||||
|
||||
def test_parse_rejects_empty_prefix() -> None:
|
||||
with pytest.raises(ValueError, match="empty prefix"):
|
||||
EntryId.parse("_20260422_00000001")
|
||||
|
||||
|
||||
# ── next_for ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_next_for_seq_is_count_plus_one() -> None:
|
||||
eid = EntryId.next_for("umc", dt.date(2026, 4, 22), current_count=2)
|
||||
assert eid.seq == 3
|
||||
assert eid.format() == "umc_20260422_00000003"
|
||||
|
||||
|
||||
def test_next_for_starts_at_one_when_empty() -> None:
|
||||
eid = EntryId.next_for("umc", dt.date(2026, 4, 22), current_count=0)
|
||||
assert eid.seq == 1
|
||||
|
||||
|
||||
def test_next_for_rejects_negative_count() -> None:
|
||||
with pytest.raises(ValueError, match="must be >= 0"):
|
||||
EntryId.next_for("umc", dt.date(2026, 4, 22), current_count=-1)
|
||||
@ -0,0 +1,168 @@
|
||||
"""Unit tests for frontmatter parse / dump + path_glob chassis."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import ClassVar, Literal
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.core.persistence import (
|
||||
AgentScopedFrontmatter,
|
||||
BaseFrontmatter,
|
||||
DailyLogPathMixin,
|
||||
SkillPathMixin,
|
||||
UserScopedFrontmatter,
|
||||
dump_frontmatter,
|
||||
parse_frontmatter,
|
||||
)
|
||||
|
||||
|
||||
def test_parse_no_frontmatter() -> None:
|
||||
text = "# Just a heading\n\nbody."
|
||||
meta, body = parse_frontmatter(text)
|
||||
assert meta == {}
|
||||
assert body == text
|
||||
|
||||
|
||||
def test_parse_empty_frontmatter() -> None:
|
||||
text = "---\n---\n# body\n"
|
||||
meta, body = parse_frontmatter(text)
|
||||
assert meta == {}
|
||||
assert body == "# body\n"
|
||||
|
||||
|
||||
def test_parse_simple_frontmatter() -> None:
|
||||
text = "---\ntitle: Hello\ntags: [a, b]\n---\n# body\n"
|
||||
meta, body = parse_frontmatter(text)
|
||||
assert meta == {"title": "Hello", "tags": ["a", "b"]}
|
||||
assert body == "# body\n"
|
||||
|
||||
|
||||
def test_parse_nested_frontmatter() -> None:
|
||||
text = "---\nuser:\n id: u_1\n name: Alice\n---\nbody"
|
||||
meta, body = parse_frontmatter(text)
|
||||
assert meta == {"user": {"id": "u_1", "name": "Alice"}}
|
||||
assert body == "body"
|
||||
|
||||
|
||||
def test_parse_no_closing_delim() -> None:
|
||||
"""Missing closing --- → treat as no frontmatter (return original text)."""
|
||||
text = "---\ntitle: Hello\n# body without closing\n"
|
||||
meta, body = parse_frontmatter(text)
|
||||
assert meta == {}
|
||||
assert body == text
|
||||
|
||||
|
||||
def test_parse_non_mapping_yaml() -> None:
|
||||
"""YAML that parses to a non-mapping (e.g. list) → empty dict + original text."""
|
||||
text = "---\n- item1\n- item2\n---\nbody\n"
|
||||
meta, body = parse_frontmatter(text)
|
||||
assert meta == {}
|
||||
assert body == text
|
||||
|
||||
|
||||
def test_parse_opening_delim_no_newline() -> None:
|
||||
"""``---`` followed by non-newline char → not a frontmatter block."""
|
||||
text = "---this is not frontmatter"
|
||||
meta, body = parse_frontmatter(text)
|
||||
assert meta == {}
|
||||
assert body == text
|
||||
|
||||
|
||||
def test_parse_unicode_values() -> None:
|
||||
text = "---\ntitle: 你好\n---\n世界"
|
||||
meta, body = parse_frontmatter(text)
|
||||
assert meta == {"title": "你好"}
|
||||
assert body == "世界"
|
||||
|
||||
|
||||
def test_dump_empty_mapping_returns_empty_string() -> None:
|
||||
assert dump_frontmatter({}) == ""
|
||||
|
||||
|
||||
def test_dump_simple_mapping() -> None:
|
||||
out = dump_frontmatter({"title": "Hello"})
|
||||
assert out.startswith("---\n")
|
||||
assert out.endswith("---\n")
|
||||
assert "title: Hello" in out
|
||||
|
||||
|
||||
def test_dump_preserves_key_order() -> None:
|
||||
out = dump_frontmatter({"z": 1, "a": 2, "m": 3})
|
||||
body = out.strip("-\n")
|
||||
keys = [line.split(":", 1)[0] for line in body.strip().splitlines() if ":" in line]
|
||||
assert keys == ["z", "a", "m"]
|
||||
|
||||
|
||||
def test_dump_unicode() -> None:
|
||||
out = dump_frontmatter({"title": "你好"})
|
||||
assert "你好" in out # allow_unicode keeps non-ASCII verbatim
|
||||
|
||||
|
||||
def test_round_trip() -> None:
|
||||
meta = {"title": "Hello", "tags": ["a", "b"], "nested": {"k": "v"}}
|
||||
body_text = "# Body\n\nLine.\n"
|
||||
composed = dump_frontmatter(meta) + body_text
|
||||
parsed_meta, parsed_body = parse_frontmatter(composed)
|
||||
assert parsed_meta == meta
|
||||
assert parsed_body == body_text
|
||||
|
||||
|
||||
# ── path_glob chassis ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_base_path_glob_raises_not_implemented() -> None:
|
||||
"""A schema with no strategy mixin must surface a clear error."""
|
||||
|
||||
class _PlainFm(BaseFrontmatter):
|
||||
type: Literal["_plain"] = "_plain"
|
||||
|
||||
with pytest.raises(NotImplementedError, match="path_glob"):
|
||||
_PlainFm.path_glob()
|
||||
|
||||
|
||||
def test_daily_log_path_glob_user_scope() -> None:
|
||||
"""Mixin builds ``users/*/<dir>/<prefix>-*.md`` from ClassVars."""
|
||||
|
||||
class _UserDaily(DailyLogPathMixin, UserScopedFrontmatter):
|
||||
DIR_NAME: ClassVar[str] = "demo"
|
||||
FILE_PREFIX: ClassVar[str] = "entry"
|
||||
type: Literal["_user_daily"] = "_user_daily"
|
||||
|
||||
assert _UserDaily.path_glob() == "*/*/users/*/demo/entry-*.md"
|
||||
|
||||
|
||||
def test_daily_log_path_glob_agent_scope() -> None:
|
||||
"""Same mixin, agent scope swaps the leading directory."""
|
||||
|
||||
class _AgentDaily(DailyLogPathMixin, AgentScopedFrontmatter):
|
||||
DIR_NAME: ClassVar[str] = "cases"
|
||||
FILE_PREFIX: ClassVar[str] = "case"
|
||||
type: Literal["_agent_daily"] = "_agent_daily"
|
||||
|
||||
assert _AgentDaily.path_glob() == "*/*/agents/*/cases/case-*.md"
|
||||
|
||||
|
||||
def test_skill_path_glob() -> None:
|
||||
"""SkillPathMixin builds ``<scope>/*/<container>/<prefix>*/<main>``."""
|
||||
|
||||
class _AgentSkill(SkillPathMixin, AgentScopedFrontmatter):
|
||||
SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
|
||||
SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
|
||||
SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
|
||||
type: Literal["_agent_skill"] = "_agent_skill"
|
||||
|
||||
assert _AgentSkill.path_glob() == "*/*/agents/*/skills/skill_*/SKILL.md"
|
||||
|
||||
|
||||
def test_strategy_mixin_overrides_base_via_mro() -> None:
|
||||
"""Strategy mixin placed first in the parent list wins over abstract base."""
|
||||
|
||||
class _Daily(DailyLogPathMixin, UserScopedFrontmatter):
|
||||
DIR_NAME: ClassVar[str] = "x"
|
||||
FILE_PREFIX: ClassVar[str] = "y"
|
||||
type: Literal["_daily_mro"] = "_daily_mro"
|
||||
|
||||
# Concrete is reachable; abstract NotImplementedError is shadowed.
|
||||
assert isinstance(_Daily.path_glob(), str)
|
||||
assert "NotImplementedError" not in _Daily.path_glob()
|
||||
@ -0,0 +1,66 @@
|
||||
"""Unit tests for MarkdownReader."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from everos.core.persistence import MarkdownReader
|
||||
|
||||
|
||||
def test_parse_text_with_frontmatter_and_entries() -> None:
|
||||
text = (
|
||||
"---\n"
|
||||
"title: Day Log\n"
|
||||
"date: 2026-04-22\n"
|
||||
"---\n"
|
||||
"# Header\n"
|
||||
"<!-- entry:e1 -->\n"
|
||||
"first entry\n"
|
||||
"<!-- /entry:e1 -->\n"
|
||||
)
|
||||
parsed = MarkdownReader.parse(text)
|
||||
# PyYAML auto-converts unquoted ISO dates to datetime.date.
|
||||
assert parsed.frontmatter == {
|
||||
"title": "Day Log",
|
||||
"date": datetime.date(2026, 4, 22),
|
||||
}
|
||||
assert "# Header" in parsed.body
|
||||
assert len(parsed.entries) == 1
|
||||
assert parsed.entries[0].id == "e1"
|
||||
assert parsed.entries[0].body == "first entry"
|
||||
|
||||
|
||||
def test_parse_no_frontmatter_no_entries() -> None:
|
||||
text = "# Just a header\n\nbody.\n"
|
||||
parsed = MarkdownReader.parse(text)
|
||||
assert parsed.frontmatter == {}
|
||||
assert parsed.body == text
|
||||
assert parsed.entries == []
|
||||
|
||||
|
||||
def test_parse_only_frontmatter() -> None:
|
||||
text = "---\nkey: value\n---\n"
|
||||
parsed = MarkdownReader.parse(text)
|
||||
assert parsed.frontmatter == {"key": "value"}
|
||||
assert parsed.body == ""
|
||||
assert parsed.entries == []
|
||||
|
||||
|
||||
async def test_read_file(tmp_path: Path) -> None:
|
||||
f = tmp_path / "doc.md"
|
||||
f.write_text(
|
||||
"---\nk: v\n---\n<!-- entry:x -->\nbody\n<!-- /entry:x -->\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
parsed = await MarkdownReader.read(f)
|
||||
assert parsed.frontmatter == {"k": "v"}
|
||||
assert parsed.entries[0].id == "x"
|
||||
|
||||
|
||||
async def test_read_unicode_file(tmp_path: Path) -> None:
|
||||
f = tmp_path / "zh.md"
|
||||
f.write_text("---\ntitle: 你好\n---\n世界\n", encoding="utf-8")
|
||||
parsed = await MarkdownReader.read(f)
|
||||
assert parsed.frontmatter == {"title": "你好"}
|
||||
assert parsed.body == "世界\n"
|
||||
@ -0,0 +1,214 @@
|
||||
"""Tests for the audit-form structured entry chassis."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.core.persistence.markdown import (
|
||||
StructuredEntry,
|
||||
parse_structured_entry,
|
||||
render_structured_entry,
|
||||
)
|
||||
|
||||
# ── render ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_render_with_header_inline_and_sections() -> None:
|
||||
out = render_structured_entry(
|
||||
header="ep_20260422_001",
|
||||
inline={
|
||||
"type": "episode",
|
||||
"user_id": "u_jason",
|
||||
"group_id": "sp_1",
|
||||
},
|
||||
sections={"Summary": "first line\nsecond line"},
|
||||
)
|
||||
assert out.startswith("## ep_20260422_001\n\n")
|
||||
assert "**type**: episode" in out
|
||||
assert "**user_id**: u_jason" in out
|
||||
assert "**group_id**: sp_1" in out
|
||||
assert "### Summary\nfirst line\nsecond line" in out
|
||||
|
||||
|
||||
def test_render_inline_only_no_header_no_sections() -> None:
|
||||
out = render_structured_entry(inline={"k": "v"})
|
||||
assert out == "**k**: v"
|
||||
|
||||
|
||||
def test_render_lists_use_bracket_notation() -> None:
|
||||
out = render_structured_entry(
|
||||
inline={"participants": ["u_jason", "u_sarah"], "tags": ("a", "b")}
|
||||
)
|
||||
assert "**participants**: [u_jason, u_sarah]" in out
|
||||
assert "**tags**: [a, b]" in out
|
||||
|
||||
|
||||
def test_render_none_value_renders_empty() -> None:
|
||||
out = render_structured_entry(inline={"optional": None})
|
||||
assert out == "**optional**: "
|
||||
|
||||
|
||||
def test_render_scalar_uses_str() -> None:
|
||||
out = render_structured_entry(inline={"count": 3, "ratio": 0.5, "active": True})
|
||||
assert "**count**: 3" in out
|
||||
assert "**ratio**: 0.5" in out
|
||||
assert "**active**: True" in out
|
||||
|
||||
|
||||
# ── parse ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_parse_full_round_trip() -> None:
|
||||
src = render_structured_entry(
|
||||
header="ep_001",
|
||||
inline={"type": "episode", "user_id": "u_jason"},
|
||||
sections={"Summary": "the summary", "Body": "the body"},
|
||||
)
|
||||
entry = parse_structured_entry(src)
|
||||
assert entry.header == "ep_001"
|
||||
assert entry.inline == {"type": "episode", "user_id": "u_jason"}
|
||||
assert entry.sections == {"Summary": "the summary", "Body": "the body"}
|
||||
|
||||
|
||||
def test_parse_no_header_yields_none() -> None:
|
||||
src = "**k**: v\n\n### Section\nbody"
|
||||
entry = parse_structured_entry(src)
|
||||
assert entry.header is None
|
||||
assert entry.inline == {"k": "v"}
|
||||
assert entry.sections == {"Section": "body"}
|
||||
|
||||
|
||||
def test_parse_no_inline() -> None:
|
||||
src = "## ep_001\n\n### Body\nonly section"
|
||||
entry = parse_structured_entry(src)
|
||||
assert entry.header == "ep_001"
|
||||
assert entry.inline == {}
|
||||
assert entry.sections == {"Body": "only section"}
|
||||
|
||||
|
||||
def test_parse_no_sections() -> None:
|
||||
src = "## ep_001\n\n**k**: v"
|
||||
entry = parse_structured_entry(src)
|
||||
assert entry.header == "ep_001"
|
||||
assert entry.inline == {"k": "v"}
|
||||
assert entry.sections == {}
|
||||
|
||||
|
||||
def test_parse_inline_value_with_colon_kept_verbatim() -> None:
|
||||
src = "**timestamp**: 2026-04-22T10:03:11Z"
|
||||
entry = parse_structured_entry(src)
|
||||
assert entry.inline == {"timestamp": "2026-04-22T10:03:11Z"}
|
||||
|
||||
|
||||
def test_parse_list_value_kept_as_string() -> None:
|
||||
"""Type-agnostic by design — bracket notation is preserved as text."""
|
||||
src = "**participants**: [u_jason, u_sarah]"
|
||||
entry = parse_structured_entry(src)
|
||||
assert entry.inline == {"participants": "[u_jason, u_sarah]"}
|
||||
|
||||
|
||||
def test_parse_section_with_multiline_body() -> None:
|
||||
src = "### Episode\nline 1\nline 2\nline 3"
|
||||
entry = parse_structured_entry(src)
|
||||
assert entry.sections == {"Episode": "line 1\nline 2\nline 3"}
|
||||
|
||||
|
||||
def test_parse_section_titles_kept_verbatim() -> None:
|
||||
"""No Title-casing — titles stay exactly as written."""
|
||||
src = "### task_intent\ndoc text"
|
||||
entry = parse_structured_entry(src)
|
||||
assert "task_intent" in entry.sections
|
||||
|
||||
|
||||
def test_parse_tolerates_stray_text_outside_blocks() -> None:
|
||||
"""Stray paragraphs in the head become part of nothing — silently dropped."""
|
||||
src = (
|
||||
"## ep_001\n\nrandom prose paragraph\n"
|
||||
"**k**: v\nmore stray text\n\n### Section\nbody"
|
||||
)
|
||||
entry = parse_structured_entry(src)
|
||||
# H2 + inline match anchors; stray prose lines that don't match
|
||||
# **key**: ... are simply not captured.
|
||||
assert entry.header == "ep_001"
|
||||
assert entry.inline == {"k": "v"}
|
||||
assert entry.sections == {"Section": "body"}
|
||||
|
||||
|
||||
def test_dataclass_immutable() -> None:
|
||||
"""``StructuredEntry`` is frozen — accidental mutation raises."""
|
||||
entry = StructuredEntry(id="", body="", start=0, end=0, header="x")
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
entry.header = "y" # type: ignore[misc]
|
||||
|
||||
|
||||
def test_structured_entry_inherits_entry() -> None:
|
||||
"""``StructuredEntry`` is an :class:`Entry` subclass and carries
|
||||
the marker context plus the parsed audit-form fields together."""
|
||||
from everos.core.persistence.markdown import Entry
|
||||
|
||||
entry = StructuredEntry(
|
||||
id="ep_001",
|
||||
body="b",
|
||||
start=0,
|
||||
end=10,
|
||||
header="ep_001",
|
||||
inline={"k": "v"},
|
||||
sections={"S": "x"},
|
||||
)
|
||||
assert isinstance(entry, Entry)
|
||||
assert entry.id == "ep_001"
|
||||
assert entry.header == "ep_001"
|
||||
|
||||
|
||||
def test_entry_as_structured_preserves_marker_context() -> None:
|
||||
"""``Entry.as_structured`` copies id/start/end and adds parsed fields."""
|
||||
from everos.core.persistence.markdown import Entry
|
||||
|
||||
entry = Entry(
|
||||
id="ep_001",
|
||||
body="## ep_001\n\n**k**: v\n\n### Body\nthe body",
|
||||
start=42,
|
||||
end=128,
|
||||
)
|
||||
s = entry.as_structured()
|
||||
assert isinstance(s, StructuredEntry)
|
||||
assert s.id == "ep_001"
|
||||
assert s.start == 42
|
||||
assert s.end == 128
|
||||
assert s.header == "ep_001"
|
||||
assert s.inline == {"k": "v"}
|
||||
assert s.sections == {"Body": "the body"}
|
||||
|
||||
|
||||
# ── round-trip with realistic Episode entry ─────────────────────────────
|
||||
|
||||
|
||||
def test_round_trip_episode_shape() -> None:
|
||||
"""Mirrors the shape from the wiki Memory Types doc."""
|
||||
inline = {
|
||||
"type": "episode",
|
||||
"user_id": "u_jason",
|
||||
"group_id": "sp_1",
|
||||
"session_id": "sess_abc123",
|
||||
"timestamp": "2026-04-22T10:03:11Z",
|
||||
"parent_type": "memcell",
|
||||
"parent_id": "mc_20260422_001",
|
||||
"participants": ["u_jason", "u_sarah"],
|
||||
"subject": "weekend planning",
|
||||
}
|
||||
sections = {
|
||||
"Summary": "Jason and Sarah discussed weekend coffee plans.",
|
||||
"Episode": "At ten in the morning, while making coffee, Jason told Sarah...",
|
||||
}
|
||||
rendered = render_structured_entry(
|
||||
header="ep_20260422_001",
|
||||
inline=inline,
|
||||
sections=sections,
|
||||
)
|
||||
entry = parse_structured_entry(rendered)
|
||||
assert entry.header == "ep_20260422_001"
|
||||
# Lists become string in audit form.
|
||||
assert entry.inline["participants"] == "[u_jason, u_sarah]"
|
||||
# Scalars round-trip exactly.
|
||||
assert entry.inline["session_id"] == "sess_abc123"
|
||||
assert entry.sections == sections
|
||||
@ -0,0 +1,87 @@
|
||||
"""Markdown IO toolkit — typical workflow demo.
|
||||
|
||||
Doubles as living documentation for how a caller assembles + reads a
|
||||
day-level markdown file with multiple ``<!-- entry:id -->`` records.
|
||||
|
||||
End-to-end story:
|
||||
1. Build a body that contains entry markers.
|
||||
2. Use ``MarkdownWriter.write_markdown`` to persist frontmatter + body
|
||||
atomically (tmp file + fsync + rename, all inside the target dir).
|
||||
3. Use ``MarkdownReader.read`` to parse the resulting file back into
|
||||
a ``ParsedMarkdown`` (frontmatter dict + raw body + list[Entry]).
|
||||
4. Verify each entry's id / body matches what was written.
|
||||
5. Look up a single entry by id with ``find_entry``.
|
||||
6. Round-trip: dump_frontmatter + body → parse_frontmatter recovers
|
||||
the original mapping.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from everos.core.persistence import (
|
||||
MarkdownReader,
|
||||
MarkdownWriter,
|
||||
MemoryRoot,
|
||||
dump_frontmatter,
|
||||
find_entry,
|
||||
parse_frontmatter,
|
||||
)
|
||||
|
||||
|
||||
async def test_typical_day_log_write_then_read(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
writer = MarkdownWriter(mr)
|
||||
|
||||
# 1. Build a body with two entries (typical day-level append log).
|
||||
body = (
|
||||
"# Day log\n"
|
||||
"\n"
|
||||
"<!-- entry:ep_001 -->\n"
|
||||
"**Title**: Met Alice\n"
|
||||
"We discussed the new project layout.\n"
|
||||
"<!-- /entry:ep_001 -->\n"
|
||||
"\n"
|
||||
"<!-- entry:ep_002 -->\n"
|
||||
"**Title**: Read paper X\n"
|
||||
"Key idea: end-to-end async pipelines.\n"
|
||||
"<!-- /entry:ep_002 -->\n"
|
||||
)
|
||||
frontmatter = {
|
||||
"type": "episodic_day_log",
|
||||
"date": "2026-04-22",
|
||||
"user_id": "u_jason",
|
||||
"tags": ["meeting", "research"],
|
||||
}
|
||||
|
||||
# 2. Atomic write via the writer.
|
||||
target = mr.users_dir() / "u_jason" / "episodic" / "2026-04-22.md"
|
||||
written_path = await writer.write_markdown(
|
||||
target, frontmatter=frontmatter, body=body
|
||||
)
|
||||
assert written_path == target
|
||||
assert target.is_file()
|
||||
# No leftover temp file.
|
||||
leftover = list(target.parent.glob(f".{target.name}.tmp.*"))
|
||||
assert leftover == []
|
||||
|
||||
# 3. Read back into ParsedMarkdown.
|
||||
parsed = await MarkdownReader.read(target)
|
||||
|
||||
# 4. Validate frontmatter + entries.
|
||||
assert parsed.frontmatter == frontmatter
|
||||
assert [e.id for e in parsed.entries] == ["ep_001", "ep_002"]
|
||||
assert "Met Alice" in parsed.entries[0].body
|
||||
assert "Read paper X" in parsed.entries[1].body
|
||||
|
||||
# 5. Single-entry lookup.
|
||||
e2 = find_entry(parsed.body, "ep_002")
|
||||
assert e2 is not None
|
||||
assert "async pipelines" in e2.body
|
||||
|
||||
# 6. Round-trip frontmatter parse / dump.
|
||||
composed = dump_frontmatter(frontmatter) + body
|
||||
re_meta, re_body = parse_frontmatter(composed)
|
||||
assert re_meta == frontmatter
|
||||
assert re_body == body
|
||||
@ -0,0 +1,229 @@
|
||||
"""Unit tests for MarkdownWriter (atomic write)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.core.persistence import (
|
||||
EntryId,
|
||||
MarkdownReader,
|
||||
MarkdownWriter,
|
||||
MemoryRoot,
|
||||
)
|
||||
|
||||
|
||||
def _make_writer(tmp_path: Path) -> MarkdownWriter:
|
||||
return MarkdownWriter(MemoryRoot(tmp_path))
|
||||
|
||||
|
||||
async def test_write_creates_file_with_content(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "users" / "u1" / "out.md"
|
||||
result = await writer.write(target, "hello\n")
|
||||
assert result == target
|
||||
assert target.read_text(encoding="utf-8") == "hello\n"
|
||||
|
||||
|
||||
async def test_write_creates_parent_directories(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "a" / "b" / "c" / "f.md"
|
||||
await writer.write(target, "x")
|
||||
assert target.is_file()
|
||||
|
||||
|
||||
async def test_write_overwrites_existing(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "f.md"
|
||||
target.write_text("old", encoding="utf-8")
|
||||
await writer.write(target, "new")
|
||||
assert target.read_text(encoding="utf-8") == "new"
|
||||
|
||||
|
||||
async def test_write_no_temp_file_left_after_success(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "f.md"
|
||||
await writer.write(target, "ok")
|
||||
leftovers = [
|
||||
p.name
|
||||
for p in tmp_path.iterdir() # noqa: ASYNC240 — sync iterdir over a pytest tmp_path is fine in tests
|
||||
if p.name.startswith(".f.md.tmp.")
|
||||
]
|
||||
assert leftovers == []
|
||||
|
||||
|
||||
async def test_write_cleans_up_temp_on_failure(tmp_path: Path) -> None:
|
||||
"""If os.replace fails, the temp file should be cleaned up."""
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "f.md"
|
||||
|
||||
boom = OSError("simulated rename failure")
|
||||
with (
|
||||
patch("everos.core.persistence.markdown.writer.os.replace", side_effect=boom),
|
||||
pytest.raises(OSError, match="simulated"),
|
||||
):
|
||||
await writer.write(target, "hello")
|
||||
|
||||
# No tmp file leftover, and the target was not created.
|
||||
leftovers = [
|
||||
p.name
|
||||
for p in tmp_path.iterdir() # noqa: ASYNC240 — sync iterdir over a pytest tmp_path is fine in tests
|
||||
if p.name.startswith(".f.md.tmp.")
|
||||
]
|
||||
assert leftovers == []
|
||||
assert not target.exists()
|
||||
|
||||
|
||||
async def test_write_markdown_assembles_frontmatter_and_body(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "doc.md"
|
||||
await writer.write_markdown(
|
||||
target,
|
||||
frontmatter={"title": "Hello"},
|
||||
body="# Body\n",
|
||||
)
|
||||
text = target.read_text(encoding="utf-8")
|
||||
assert text.startswith("---\n")
|
||||
assert "title: Hello" in text
|
||||
assert text.rstrip("\n").endswith("# Body")
|
||||
|
||||
|
||||
async def test_write_markdown_round_trip(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "rt.md"
|
||||
await writer.write_markdown(
|
||||
target,
|
||||
frontmatter={"k": "v", "n": 1},
|
||||
body="<!-- entry:x -->\ncontent\n<!-- /entry:x -->\n",
|
||||
)
|
||||
parsed = await MarkdownReader.read(target)
|
||||
assert parsed.frontmatter == {"k": "v", "n": 1}
|
||||
assert len(parsed.entries) == 1
|
||||
assert parsed.entries[0].body == "content"
|
||||
|
||||
|
||||
async def test_write_markdown_no_frontmatter(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "plain.md"
|
||||
await writer.write_markdown(target, body="just body\n")
|
||||
assert target.read_text(encoding="utf-8") == "just body\n"
|
||||
|
||||
|
||||
def test_memory_root_property_accessible(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
assert writer.memory_root.root == tmp_path.resolve()
|
||||
|
||||
|
||||
# ── append_entry ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_append_entry_creates_file_when_missing(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "memcells" / "memcell-2026-04-22.md"
|
||||
eid = EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1)
|
||||
written = await writer.append_entry(
|
||||
target,
|
||||
entry_body="hello world",
|
||||
entry_id=eid,
|
||||
frontmatter_updates={
|
||||
"file_type": "memcell_daily",
|
||||
"entry_count": 1,
|
||||
},
|
||||
)
|
||||
assert written == target
|
||||
parsed = await MarkdownReader.read(target)
|
||||
assert parsed.frontmatter == {"file_type": "memcell_daily", "entry_count": 1}
|
||||
assert len(parsed.entries) == 1
|
||||
assert parsed.entries[0].id == "umc_20260422_00000001"
|
||||
assert parsed.entries[0].body == "hello world"
|
||||
|
||||
|
||||
async def test_append_entry_appends_to_existing(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "log.md"
|
||||
await writer.append_entry(
|
||||
target,
|
||||
entry_body="first",
|
||||
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1),
|
||||
frontmatter_updates={"entry_count": 1},
|
||||
)
|
||||
await writer.append_entry(
|
||||
target,
|
||||
entry_body="second",
|
||||
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=2),
|
||||
frontmatter_updates={"entry_count": 2},
|
||||
)
|
||||
parsed = await MarkdownReader.read(target)
|
||||
assert [e.id for e in parsed.entries] == [
|
||||
"umc_20260422_00000001",
|
||||
"umc_20260422_00000002",
|
||||
]
|
||||
assert [e.body for e in parsed.entries] == ["first", "second"]
|
||||
|
||||
|
||||
async def test_append_entry_merges_frontmatter_shallow(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "log.md"
|
||||
await writer.append_entry(
|
||||
target,
|
||||
entry_body="b",
|
||||
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1),
|
||||
frontmatter_updates={
|
||||
"file_type": "memcell_daily",
|
||||
"entry_count": 1,
|
||||
"last_appended_at": "2026-04-22T10:00:00Z",
|
||||
},
|
||||
)
|
||||
# Second append — overwrite entry_count + last_appended_at, keep file_type.
|
||||
await writer.append_entry(
|
||||
target,
|
||||
entry_body="b",
|
||||
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=2),
|
||||
frontmatter_updates={
|
||||
"entry_count": 2,
|
||||
"last_appended_at": "2026-04-22T10:05:00Z",
|
||||
},
|
||||
)
|
||||
parsed = await MarkdownReader.read(target)
|
||||
assert parsed.frontmatter == {
|
||||
"file_type": "memcell_daily",
|
||||
"entry_count": 2,
|
||||
"last_appended_at": "2026-04-22T10:05:00Z",
|
||||
}
|
||||
|
||||
|
||||
async def test_append_entry_without_frontmatter_updates_keeps_existing(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "log.md"
|
||||
await writer.write_markdown(target, frontmatter={"file_type": "x", "n": 1}, body="")
|
||||
await writer.append_entry(
|
||||
target,
|
||||
entry_body="body",
|
||||
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=1),
|
||||
)
|
||||
parsed = await MarkdownReader.read(target)
|
||||
assert parsed.frontmatter == {"file_type": "x", "n": 1}
|
||||
assert len(parsed.entries) == 1
|
||||
|
||||
|
||||
async def test_append_entry_round_trip_with_reader(tmp_path: Path) -> None:
|
||||
writer = _make_writer(tmp_path)
|
||||
target = tmp_path / "log.md"
|
||||
for i in range(5):
|
||||
await writer.append_entry(
|
||||
target,
|
||||
entry_body=f"content {i}",
|
||||
entry_id=EntryId(prefix="umc", date=dt.date(2026, 4, 22), seq=i + 1),
|
||||
frontmatter_updates={"entry_count": i + 1},
|
||||
)
|
||||
parsed = await MarkdownReader.read(target)
|
||||
assert len(parsed.entries) == 5
|
||||
assert parsed.frontmatter["entry_count"] == 5
|
||||
for i, e in enumerate(parsed.entries):
|
||||
assert e.id == f"umc_20260422_{i + 1:08d}"
|
||||
assert e.body == f"content {i}"
|
||||
@ -0,0 +1,200 @@
|
||||
"""Regression tests for the MarkdownWriter read-modify-write race.
|
||||
|
||||
Before the per-path :class:`asyncio.Lock` was added, two concurrent tasks
|
||||
calling :meth:`MarkdownWriter.append_entry` against the same path would
|
||||
each load the file, append one entry block in memory, and write the
|
||||
merged file back — the second writer's read pre-dated the first
|
||||
writer's write, so it overwrote the first writer's append. Both
|
||||
``entry_count`` (frontmatter) and the entry block markers were lost in
|
||||
proportion to concurrency level.
|
||||
|
||||
These tests drive ``N`` concurrent appends against one ``(owner, date)``
|
||||
and assert that no entry is lost at any concurrency level. They cover
|
||||
both the single-entry ``append_entry`` path (taken by tests / external
|
||||
callers) and the batched ``append_entries`` path (taken by strategies
|
||||
after the per-owner batching migration).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.core.persistence import EntryId, MarkdownWriter, MemoryRoot
|
||||
from everos.infra.persistence.markdown.writers.atomic_fact_writer import (
|
||||
AtomicFactWriter,
|
||||
)
|
||||
|
||||
|
||||
def _scan_md(md_path: Path) -> tuple[int, int]:
|
||||
"""Return ``(entry_tag_count, frontmatter_entry_count)``."""
|
||||
text = md_path.read_text(encoding="utf-8")
|
||||
tag_count = len(re.findall(r"<!-- entry:af_", text))
|
||||
fm_match = re.search(r"^entry_count: (\d+)", text, re.MULTILINE)
|
||||
fm_count = int(fm_match.group(1)) if fm_match else -1
|
||||
return tag_count, fm_count
|
||||
|
||||
|
||||
async def _drive_concurrent_appends(
|
||||
writer: AtomicFactWriter,
|
||||
owner: str,
|
||||
n: int,
|
||||
concurrency: int,
|
||||
) -> None:
|
||||
"""Issue ``n`` single-entry ``append_entry`` calls with bounded concurrency."""
|
||||
sem = asyncio.Semaphore(concurrency)
|
||||
|
||||
async def _guarded(idx: int) -> None:
|
||||
async with sem:
|
||||
await writer.append_entry(
|
||||
owner,
|
||||
inline={
|
||||
"owner_id": owner,
|
||||
"session_id": "race_test",
|
||||
"timestamp": "2026-05-18T00:00:00+00:00",
|
||||
"parent_type": "memcell",
|
||||
"parent_id": f"mc_{idx:04d}",
|
||||
},
|
||||
sections={"Fact": f"fact-{idx:04d}"},
|
||||
)
|
||||
|
||||
await asyncio.gather(*(_guarded(i) for i in range(n)))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("concurrency", [1, 2, 4, 8, 16])
|
||||
async def test_append_entry_no_lost_updates_under_concurrency(
|
||||
tmp_path: Path, concurrency: int
|
||||
) -> None:
|
||||
"""``append_entry`` from N concurrent tasks must not drop any entry."""
|
||||
root = MemoryRoot(root=tmp_path)
|
||||
writer = AtomicFactWriter(root=root)
|
||||
owner = "race_user"
|
||||
n = 30
|
||||
|
||||
await _drive_concurrent_appends(writer, owner, n, concurrency)
|
||||
|
||||
md_files = list((root.users_dir() / owner).rglob("*.md"))
|
||||
assert len(md_files) == 1, f"expected 1 md file, got {md_files}"
|
||||
tag_count, fm_count = _scan_md(md_files[0])
|
||||
|
||||
assert tag_count == n, (
|
||||
f"lost {n - tag_count} entries at concurrency={concurrency} "
|
||||
f"(tag_count={tag_count}, expected={n})"
|
||||
)
|
||||
assert fm_count == n, (
|
||||
f"frontmatter entry_count drift at concurrency={concurrency} "
|
||||
f"(fm_count={fm_count}, expected={n})"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("concurrency", [1, 2, 4, 8, 16])
|
||||
async def test_append_entries_batch_no_lost_updates_under_concurrency(
|
||||
tmp_path: Path, concurrency: int
|
||||
) -> None:
|
||||
"""``append_entries`` (batched) from N concurrent tasks must not drop any
|
||||
entry."""
|
||||
root = MemoryRoot(root=tmp_path)
|
||||
writer = AtomicFactWriter(root=root)
|
||||
owner = "race_user_batched"
|
||||
batches = 6
|
||||
items_per_batch = 5
|
||||
total = batches * items_per_batch
|
||||
|
||||
sem = asyncio.Semaphore(concurrency)
|
||||
|
||||
async def _one_batch(batch_idx: int) -> None:
|
||||
async with sem:
|
||||
items = [
|
||||
(
|
||||
{
|
||||
"owner_id": owner,
|
||||
"session_id": "race_test",
|
||||
"timestamp": "2026-05-18T00:00:00+00:00",
|
||||
"parent_type": "memcell",
|
||||
"parent_id": f"mc_b{batch_idx:02d}_i{i:02d}",
|
||||
},
|
||||
{"Fact": f"batched-fact-b{batch_idx:02d}-{i:02d}"},
|
||||
)
|
||||
for i in range(items_per_batch)
|
||||
]
|
||||
await writer.append_entries(owner, items)
|
||||
|
||||
await asyncio.gather(*(_one_batch(b) for b in range(batches)))
|
||||
|
||||
md_files = list((root.users_dir() / owner).rglob("*.md"))
|
||||
assert len(md_files) == 1
|
||||
tag_count, fm_count = _scan_md(md_files[0])
|
||||
|
||||
assert tag_count == total, (
|
||||
f"lost {total - tag_count} entries at concurrency={concurrency} "
|
||||
f"(tag_count={tag_count}, expected={total})"
|
||||
)
|
||||
assert fm_count == total, (
|
||||
f"frontmatter entry_count drift at concurrency={concurrency} "
|
||||
f"(fm_count={fm_count}, expected={total})"
|
||||
)
|
||||
|
||||
|
||||
async def test_lock_for_returns_same_lock_per_path(tmp_path: Path) -> None:
|
||||
"""``lock_for`` is the keying primitive that BaseDailyWriter relies on
|
||||
to serialise its multi-step read-compute-write sequence; aliasing paths
|
||||
must collapse to one lock object."""
|
||||
writer = MarkdownWriter(MemoryRoot(root=tmp_path))
|
||||
p1 = tmp_path / "foo" / "bar.md"
|
||||
p2 = tmp_path / "foo" / "bar.md"
|
||||
p3 = tmp_path / "foo" / ".." / "foo" / "bar.md"
|
||||
|
||||
lock1 = writer.lock_for(p1)
|
||||
lock2 = writer.lock_for(p2)
|
||||
lock3 = writer.lock_for(p3)
|
||||
|
||||
# Same canonical path → identical Lock object.
|
||||
assert lock1 is lock2
|
||||
assert lock1 is lock3
|
||||
|
||||
# Different path → different Lock.
|
||||
other = writer.lock_for(tmp_path / "foo" / "baz.md")
|
||||
assert other is not lock1
|
||||
|
||||
|
||||
async def test_append_entries_empty_is_noop(tmp_path: Path) -> None:
|
||||
"""Empty batch must not touch the file or allocate any EntryId."""
|
||||
writer = MarkdownWriter(MemoryRoot(root=tmp_path))
|
||||
target = tmp_path / "scratch.md"
|
||||
result = await writer.append_entries(target, [])
|
||||
assert result == target
|
||||
# No file should have been created (empty body + no frontmatter updates
|
||||
# still calls write_markdown — verify the file is empty or absent).
|
||||
if target.exists():
|
||||
assert target.read_text(encoding="utf-8") in ("", "---\n---\n\n")
|
||||
|
||||
|
||||
async def test_markdown_writer_append_entry_delegates_to_batch(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
"""``append_entry`` is documented as a wrapper for ``append_entries`` —
|
||||
asserting they produce identical file contents protects callers from
|
||||
drift between the two paths."""
|
||||
writer = MarkdownWriter(MemoryRoot(root=tmp_path))
|
||||
eid = EntryId.next_for("af", __import__("datetime").date(2026, 5, 18), 0)
|
||||
body = "**fact**: hello"
|
||||
|
||||
path_a = tmp_path / "a.md"
|
||||
path_b = tmp_path / "b.md"
|
||||
|
||||
await writer.append_entry(
|
||||
path_a,
|
||||
entry_body=body,
|
||||
entry_id=eid,
|
||||
frontmatter_updates={"id": "shared", "entry_count": 1},
|
||||
)
|
||||
await writer.append_entries(
|
||||
path_b,
|
||||
[(body, eid)],
|
||||
frontmatter_updates={"id": "shared", "entry_count": 1},
|
||||
)
|
||||
|
||||
assert path_a.read_text(encoding="utf-8") == path_b.read_text(encoding="utf-8")
|
||||
126
tests/unit/test_core/test_persistence/test_memory_root.py
Normal file
126
tests/unit/test_core/test_persistence/test_memory_root.py
Normal file
@ -0,0 +1,126 @@
|
||||
"""Unit tests for MemoryRoot path manager."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
|
||||
|
||||
def test_default_returns_home_everos(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
# Isolate from any ambient EVEROS_MEMORY__ROOT (e.g. the session-scoped
|
||||
# search-corpus fixture sets it for the whole run); the autouse
|
||||
# _reset_settings_cache fixture clears the load_settings cache, so the
|
||||
# delenv takes effect for this assertion of the hard-coded default.
|
||||
monkeypatch.delenv("EVEROS_MEMORY__ROOT", raising=False)
|
||||
mr = MemoryRoot.default()
|
||||
assert mr.root == (Path.home() / ".everos").resolve()
|
||||
|
||||
|
||||
def test_accepts_str_path(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(str(tmp_path))
|
||||
assert mr.root == tmp_path.resolve()
|
||||
|
||||
|
||||
def test_accepts_pathlib_path(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
assert mr.root == tmp_path.resolve()
|
||||
|
||||
|
||||
def test_user_visible_dirs_default_scope(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
# Omitting app/project resolves to the default space; "default" lands as
|
||||
# the reserved ``default_app`` / ``default_project`` directory names.
|
||||
base = mr.root / "default_app" / "default_project"
|
||||
assert mr.agents_dir() == base / "agents"
|
||||
assert mr.users_dir() == base / "users"
|
||||
assert mr.knowledge_dir() == base / "knowledge"
|
||||
|
||||
|
||||
def test_user_visible_dirs_named_scope(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
# A non-default app/project maps to itself (no ``default_*`` rewrite).
|
||||
base = mr.root / "claude_code" / "oss"
|
||||
assert mr.agents_dir("claude_code", "oss") == base / "agents"
|
||||
assert mr.users_dir("claude_code", "oss") == base / "users"
|
||||
assert mr.knowledge_dir("claude_code", "oss") == base / "knowledge"
|
||||
|
||||
|
||||
def test_dotfile_paths(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
assert mr.index_dir == tmp_path / ".index"
|
||||
assert mr.lancedb_dir == tmp_path / ".index" / "lancedb"
|
||||
assert mr.sqlite_dir == tmp_path / ".index" / "sqlite"
|
||||
assert mr.system_db == tmp_path / ".index" / "sqlite" / "system.db"
|
||||
assert mr.lock_file == tmp_path / ".lock"
|
||||
assert mr.tmp_dir == tmp_path / ".tmp"
|
||||
|
||||
|
||||
def test_ensure_creates_required_dirs(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(tmp_path / "fresh")
|
||||
mr.ensure()
|
||||
assert mr.root.is_dir()
|
||||
assert mr.index_dir.is_dir()
|
||||
assert mr.sqlite_dir.is_dir()
|
||||
assert mr.lancedb_dir.is_dir()
|
||||
assert mr.tmp_dir.is_dir()
|
||||
# User-visible dirs are NOT pre-created.
|
||||
assert not mr.agents_dir().exists()
|
||||
assert not mr.users_dir().exists()
|
||||
assert not mr.knowledge_dir().exists()
|
||||
|
||||
|
||||
def test_ensure_is_idempotent(tmp_path: Path) -> None:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
mr.ensure() # second call must not fail
|
||||
assert mr.tmp_dir.is_dir()
|
||||
|
||||
|
||||
def test_ensure_materializes_ome_config_template(tmp_path: Path) -> None:
|
||||
"""First ensure() drops a real ``ome.toml`` users can edit.
|
||||
|
||||
Without this, ``pip install everos && everos server start`` produced
|
||||
a warning (``config_reload_failed: No such file``) because the OME
|
||||
config reloader had no file to point at. The template ships under
|
||||
``src/everos/config/default_ome.toml`` and is byte-copied on first run.
|
||||
"""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
assert mr.ome_config.is_file()
|
||||
# Content is the shipped template verbatim — protects against a future
|
||||
# diff that silently changes what users see on first run.
|
||||
template = Path(__file__).resolve().parents[4] / (
|
||||
"src/everos/config/default_ome.toml"
|
||||
)
|
||||
assert mr.ome_config.read_bytes() == template.read_bytes()
|
||||
|
||||
|
||||
def test_ensure_preserves_user_edited_ome_config(tmp_path: Path) -> None:
|
||||
"""Second ensure() must not overwrite user edits.
|
||||
|
||||
The template materialisation is an existence check, not a content
|
||||
sync — once the user has tweaked their overrides the file is theirs.
|
||||
"""
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
custom = b"# user-edited\n[strategies.extract_foresight]\nenabled = false\n"
|
||||
mr.ome_config.write_bytes(custom)
|
||||
mr.ensure()
|
||||
assert mr.ome_config.read_bytes() == custom
|
||||
|
||||
|
||||
def test_frozen_dataclass_hashable(tmp_path: Path) -> None:
|
||||
a = MemoryRoot(tmp_path)
|
||||
b = MemoryRoot(tmp_path)
|
||||
assert a == b
|
||||
assert hash(a) == hash(b)
|
||||
assert {a, b} == {a} # set deduplication works
|
||||
|
||||
|
||||
def test_user_expansion(tmp_path: Path, monkeypatch) -> None:
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
mr = MemoryRoot("~/custom")
|
||||
assert mr.root == (tmp_path / "custom").resolve()
|
||||
113
tests/unit/test_core/test_persistence/test_sqlite/test_engine.py
Normal file
113
tests/unit/test_core/test_persistence/test_sqlite/test_engine.py
Normal file
@ -0,0 +1,113 @@
|
||||
"""Unit tests for the SQLite async engine + PRAGMA listener.
|
||||
|
||||
Critical: verifies PRAGMAs are actually applied at the SQLite layer
|
||||
(not just declared in code). The whole reason for the listener is that
|
||||
PRAGMAs are per-connection and the SA pool reuses connections.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import text
|
||||
|
||||
from everos.config import SqliteSettings
|
||||
from everos.core.persistence import (
|
||||
MemoryRoot,
|
||||
create_session_factory,
|
||||
create_system_engine,
|
||||
session_scope,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_root(tmp_path: Path) -> MemoryRoot:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
return mr
|
||||
|
||||
|
||||
async def test_engine_creates_db_file(memory_root: MemoryRoot) -> None:
|
||||
engine = create_system_engine(memory_root.system_db, SqliteSettings())
|
||||
factory = create_session_factory(engine)
|
||||
async with session_scope(factory) as s:
|
||||
await s.execute(text("SELECT 1"))
|
||||
await engine.dispose()
|
||||
assert memory_root.system_db.exists()
|
||||
|
||||
|
||||
async def test_pragmas_actually_applied_default_settings(
|
||||
memory_root: MemoryRoot,
|
||||
) -> None:
|
||||
"""Default PRAGMAs match what's in default.toml."""
|
||||
settings = SqliteSettings()
|
||||
engine = create_system_engine(memory_root.system_db, settings)
|
||||
factory = create_session_factory(engine)
|
||||
try:
|
||||
async with session_scope(factory) as s:
|
||||
assert _scalar(await _pragma(s, "journal_mode")) == "wal"
|
||||
# synchronous: 0=OFF 1=NORMAL 2=FULL 3=EXTRA
|
||||
assert _scalar(await _pragma(s, "synchronous")) == 1
|
||||
# foreign_keys: 1=ON 0=OFF
|
||||
assert _scalar(await _pragma(s, "foreign_keys")) == 1
|
||||
# temp_store: 0=DEFAULT 1=FILE 2=MEMORY
|
||||
assert _scalar(await _pragma(s, "temp_store")) == 2
|
||||
assert _scalar(await _pragma(s, "busy_timeout")) == 5000
|
||||
assert _scalar(await _pragma(s, "journal_size_limit")) == 64 * 1024 * 1024
|
||||
# cache_size: negative value = KB; positive = pages
|
||||
assert _scalar(await _pragma(s, "cache_size")) == -2048
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def test_pragmas_respect_custom_settings(memory_root: MemoryRoot) -> None:
|
||||
"""Engine reflects non-default tunables."""
|
||||
settings = SqliteSettings(
|
||||
journal_mode="DELETE",
|
||||
synchronous="FULL",
|
||||
foreign_keys=False,
|
||||
temp_store="FILE",
|
||||
busy_timeout_ms=10000,
|
||||
journal_size_limit_bytes=1024 * 1024,
|
||||
cache_size_kb=4096,
|
||||
)
|
||||
engine = create_system_engine(memory_root.system_db, settings)
|
||||
factory = create_session_factory(engine)
|
||||
try:
|
||||
async with session_scope(factory) as s:
|
||||
assert _scalar(await _pragma(s, "journal_mode")) == "delete"
|
||||
assert _scalar(await _pragma(s, "synchronous")) == 2 # FULL
|
||||
assert _scalar(await _pragma(s, "foreign_keys")) == 0
|
||||
assert _scalar(await _pragma(s, "temp_store")) == 1 # FILE
|
||||
assert _scalar(await _pragma(s, "busy_timeout")) == 10000
|
||||
assert _scalar(await _pragma(s, "cache_size")) == -4096
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def test_pragmas_applied_on_each_new_connection(
|
||||
memory_root: MemoryRoot,
|
||||
) -> None:
|
||||
"""The listener fires on every new connection from the pool, not just once."""
|
||||
settings = SqliteSettings()
|
||||
engine = create_system_engine(memory_root.system_db, settings)
|
||||
factory = create_session_factory(engine)
|
||||
try:
|
||||
# Two independent sessions → at least two connection acquisitions
|
||||
# → both must see WAL mode.
|
||||
async with session_scope(factory) as s1:
|
||||
assert _scalar(await _pragma(s1, "journal_mode")) == "wal"
|
||||
async with session_scope(factory) as s2:
|
||||
assert _scalar(await _pragma(s2, "journal_mode")) == "wal"
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def _pragma(session, name: str): # type: ignore[no-untyped-def]
|
||||
return await session.execute(text(f"PRAGMA {name}"))
|
||||
|
||||
|
||||
def _scalar(result): # type: ignore[no-untyped-def]
|
||||
row = result.fetchone()
|
||||
return row[0] if row is not None else None
|
||||
@ -0,0 +1,126 @@
|
||||
"""ORM CRUD demo: full INSERT / SELECT / UPDATE / DELETE on a BaseTable.
|
||||
|
||||
Doubles as living documentation for how to author a SQLModel-backed
|
||||
business table inside the everos persistence stack:
|
||||
|
||||
1. Subclass ``BaseTable`` (gets ``created_at`` / ``updated_at`` for free).
|
||||
2. Build a session factory from a real engine.
|
||||
3. Use ``session_scope`` for the transaction lifecycle.
|
||||
4. Verify ``updated_at`` auto-bumps on UPDATE.
|
||||
|
||||
The local table name is prefixed with ``_`` so it cannot be confused with
|
||||
a real business table.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from sqlmodel import SQLModel, select
|
||||
|
||||
from everos.config import SqliteSettings
|
||||
from everos.core.persistence import (
|
||||
BaseTable,
|
||||
Field,
|
||||
MemoryRoot,
|
||||
create_session_factory,
|
||||
create_system_engine,
|
||||
session_scope,
|
||||
)
|
||||
|
||||
|
||||
class _DemoNote(BaseTable, table=True):
|
||||
"""Tiny demo table — used only by this test module."""
|
||||
|
||||
__tablename__ = "_demo_notes" # type: ignore[assignment]
|
||||
|
||||
id: int | None = Field(default=None, primary_key=True)
|
||||
body: str
|
||||
tags: str | None = Field(default=None)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_root(tmp_path: Path) -> MemoryRoot:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
return mr
|
||||
|
||||
|
||||
async def test_orm_full_crud_lifecycle(memory_root: MemoryRoot) -> None:
|
||||
engine = create_system_engine(memory_root.system_db, SqliteSettings())
|
||||
factory = create_session_factory(engine)
|
||||
try:
|
||||
# ── Create schema ───────────────────────────────────────────────
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
|
||||
# ── INSERT ──────────────────────────────────────────────────────
|
||||
async with session_scope(factory) as s:
|
||||
note = _DemoNote(body="hello")
|
||||
s.add(note)
|
||||
await s.commit()
|
||||
await s.refresh(note)
|
||||
assert note.id is not None
|
||||
assert note.created_at is not None
|
||||
assert note.updated_at is not None
|
||||
# default_factory runs once per field, so the two timestamps
|
||||
# may differ by a few microseconds on INSERT. Order must hold.
|
||||
assert note.created_at <= note.updated_at
|
||||
note_id = note.id
|
||||
initial_created = note.created_at
|
||||
initial_updated = note.updated_at
|
||||
|
||||
# ── SELECT (single by id) ───────────────────────────────────────
|
||||
async with session_scope(factory) as s:
|
||||
stmt = select(_DemoNote).where(_DemoNote.id == note_id)
|
||||
result = (await s.execute(stmt)).scalars().first()
|
||||
assert result is not None
|
||||
assert result.body == "hello"
|
||||
|
||||
# ── SELECT (filter + order) ─────────────────────────────────────
|
||||
async with session_scope(factory) as s:
|
||||
s.add(_DemoNote(body="second"))
|
||||
s.add(_DemoNote(body="third"))
|
||||
await s.commit()
|
||||
|
||||
async with session_scope(factory) as s:
|
||||
stmt = select(_DemoNote).order_by(_DemoNote.id)
|
||||
rows = (await s.execute(stmt)).scalars().all()
|
||||
assert [r.body for r in rows] == ["hello", "second", "third"]
|
||||
|
||||
# ── UPDATE (verify updated_at auto-bumps) ───────────────────────
|
||||
# Sleep slightly so onupdate has a measurably newer timestamp
|
||||
# than the initial insert (timestamp resolution is fine but the
|
||||
# comparison should be ``>=`` to be robust on fast machines).
|
||||
await asyncio.sleep(0.01)
|
||||
async with session_scope(factory) as s:
|
||||
stmt = select(_DemoNote).where(_DemoNote.id == note_id)
|
||||
n = (await s.execute(stmt)).scalars().first()
|
||||
assert n is not None
|
||||
n.body = "hello world"
|
||||
n.tags = "demo"
|
||||
await s.commit()
|
||||
await s.refresh(n)
|
||||
assert n.body == "hello world"
|
||||
assert n.tags == "demo"
|
||||
assert n.updated_at >= initial_updated # bumped via onupdate
|
||||
assert n.created_at == initial_created # unchanged on update
|
||||
|
||||
# ── DELETE ──────────────────────────────────────────────────────
|
||||
async with session_scope(factory) as s:
|
||||
stmt = select(_DemoNote).where(_DemoNote.id == note_id)
|
||||
n = (await s.execute(stmt)).scalars().first()
|
||||
assert n is not None
|
||||
await s.delete(n)
|
||||
await s.commit()
|
||||
|
||||
async with session_scope(factory) as s:
|
||||
count_stmt = select(_DemoNote).where(_DemoNote.id == note_id)
|
||||
assert (await s.execute(count_stmt)).scalars().first() is None
|
||||
# Other rows survive
|
||||
remaining = (await s.execute(select(_DemoNote))).scalars().all()
|
||||
assert {r.body for r in remaining} == {"second", "third"}
|
||||
finally:
|
||||
await engine.dispose()
|
||||
@ -0,0 +1,160 @@
|
||||
"""RepoBase CRUD demo + assertions.
|
||||
|
||||
Doubles as living documentation for how a service / memory layer caller
|
||||
uses the generic repository — no manual session handling. Exercises the
|
||||
explicit-factory constructor path; the lazy ``_factory_lookup`` hook is
|
||||
exercised indirectly via the lifespan + manager tests once business
|
||||
repos land under ``infra/.../repos/``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.config import SqliteSettings
|
||||
from everos.core.persistence import (
|
||||
BaseTable,
|
||||
Field,
|
||||
MemoryRoot,
|
||||
RepoBase,
|
||||
create_session_factory,
|
||||
create_system_engine,
|
||||
)
|
||||
|
||||
|
||||
class _DemoUser(BaseTable, table=True):
|
||||
"""Demo table — only used by this test module."""
|
||||
|
||||
__tablename__ = "_demo_users" # type: ignore[assignment]
|
||||
|
||||
id: int | None = Field(default=None, primary_key=True)
|
||||
name: str
|
||||
active: bool = Field(default=True)
|
||||
|
||||
|
||||
class _DemoUserRepo(RepoBase[_DemoUser]):
|
||||
model = _DemoUser
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_root(tmp_path: Path) -> MemoryRoot:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
return mr
|
||||
|
||||
|
||||
async def _setup_repo(memory_root: MemoryRoot) -> tuple[_DemoUserRepo, object]:
|
||||
"""Build engine, factory, and ensure schema. Returns (repo, engine)."""
|
||||
engine = create_system_engine(memory_root.system_db, SqliteSettings())
|
||||
factory = create_session_factory(engine)
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
return _DemoUserRepo(factory), engine
|
||||
|
||||
|
||||
async def test_repo_add_and_get(memory_root: MemoryRoot) -> None:
|
||||
repo, engine = await _setup_repo(memory_root)
|
||||
try:
|
||||
added = await repo.add(_DemoUser(name="alice"))
|
||||
assert added.id is not None
|
||||
assert added.created_at is not None
|
||||
|
||||
fetched = await repo.get_by_id(added.id)
|
||||
assert fetched is not None
|
||||
assert fetched.name == "alice"
|
||||
assert fetched.active is True
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def test_repo_add_many_and_list_all(memory_root: MemoryRoot) -> None:
|
||||
repo, engine = await _setup_repo(memory_root)
|
||||
try:
|
||||
users = await repo.add_many(
|
||||
[
|
||||
_DemoUser(name="alice"),
|
||||
_DemoUser(name="bob"),
|
||||
_DemoUser(name="carol", active=False),
|
||||
]
|
||||
)
|
||||
assert all(u.id is not None for u in users)
|
||||
|
||||
all_users = await repo.list_all()
|
||||
assert {u.name for u in all_users} == {"alice", "bob", "carol"}
|
||||
|
||||
assert await repo.count() == 3
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def test_repo_find_where_and_find_one(memory_root: MemoryRoot) -> None:
|
||||
repo, engine = await _setup_repo(memory_root)
|
||||
try:
|
||||
await repo.add_many(
|
||||
[
|
||||
_DemoUser(name="alice", active=True),
|
||||
_DemoUser(name="bob", active=False),
|
||||
_DemoUser(name="carol", active=True),
|
||||
]
|
||||
)
|
||||
|
||||
actives = await repo.find_where(active=True)
|
||||
assert {u.name for u in actives} == {"alice", "carol"}
|
||||
|
||||
bob = await repo.find_one(name="bob")
|
||||
assert bob is not None
|
||||
assert bob.active is False
|
||||
|
||||
ghost = await repo.find_one(name="no_such")
|
||||
assert ghost is None
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def test_repo_update_bumps_updated_at(memory_root: MemoryRoot) -> None:
|
||||
repo, engine = await _setup_repo(memory_root)
|
||||
try:
|
||||
u = await repo.add(_DemoUser(name="alice"))
|
||||
original_updated = u.updated_at
|
||||
original_created = u.created_at
|
||||
|
||||
await asyncio.sleep(0.01)
|
||||
u.name = "alice2"
|
||||
u.active = False
|
||||
updated = await repo.update(u)
|
||||
|
||||
assert updated.name == "alice2"
|
||||
assert updated.active is False
|
||||
assert updated.updated_at >= original_updated # bumped
|
||||
assert updated.created_at == original_created
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def test_repo_delete_by_instance(memory_root: MemoryRoot) -> None:
|
||||
repo, engine = await _setup_repo(memory_root)
|
||||
try:
|
||||
u = await repo.add(_DemoUser(name="alice"))
|
||||
assert await repo.count() == 1
|
||||
|
||||
await repo.delete(u)
|
||||
assert await repo.count() == 0
|
||||
assert await repo.get_by_id(u.id) is None
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def test_repo_delete_by_id_returns_bool(memory_root: MemoryRoot) -> None:
|
||||
repo, engine = await _setup_repo(memory_root)
|
||||
try:
|
||||
u = await repo.add(_DemoUser(name="alice"))
|
||||
|
||||
assert await repo.delete_by_id(u.id) is True
|
||||
assert await repo.delete_by_id(u.id) is False # already gone
|
||||
assert await repo.delete_by_id(99999) is False # never existed
|
||||
finally:
|
||||
await engine.dispose()
|
||||
@ -0,0 +1,78 @@
|
||||
"""Unit tests for session_scope semantics."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import text
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
from everos.config import SqliteSettings
|
||||
from everos.core.persistence import (
|
||||
MemoryRoot,
|
||||
create_session_factory,
|
||||
create_system_engine,
|
||||
session_scope,
|
||||
)
|
||||
|
||||
|
||||
class _Sample(SQLModel, table=True):
|
||||
"""Tiny model used only by these tests."""
|
||||
|
||||
__tablename__ = "_sample_session_scope" # type: ignore[assignment]
|
||||
id: int | None = Field(default=None, primary_key=True)
|
||||
note: str
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_root(tmp_path: Path) -> MemoryRoot:
|
||||
mr = MemoryRoot(tmp_path)
|
||||
mr.ensure()
|
||||
return mr
|
||||
|
||||
|
||||
async def test_session_scope_commits_on_success(memory_root: MemoryRoot) -> None:
|
||||
engine = create_system_engine(memory_root.system_db, SqliteSettings())
|
||||
factory = create_session_factory(engine)
|
||||
try:
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
|
||||
async with session_scope(factory) as s:
|
||||
s.add(_Sample(note="hello"))
|
||||
await s.commit()
|
||||
|
||||
async with session_scope(factory) as s:
|
||||
row = (
|
||||
await s.execute(text("SELECT note FROM _sample_session_scope"))
|
||||
).fetchone()
|
||||
assert row is not None
|
||||
assert row[0] == "hello"
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def test_session_scope_rolls_back_on_exception(
|
||||
memory_root: MemoryRoot,
|
||||
) -> None:
|
||||
engine = create_system_engine(memory_root.system_db, SqliteSettings())
|
||||
factory = create_session_factory(engine)
|
||||
try:
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
async with session_scope(factory) as s:
|
||||
s.add(_Sample(note="should rollback"))
|
||||
# No commit yet → scope must rollback on exception.
|
||||
raise RuntimeError("boom")
|
||||
|
||||
async with session_scope(factory) as s:
|
||||
count = (
|
||||
await s.execute(text("SELECT COUNT(*) FROM _sample_session_scope"))
|
||||
).fetchone()
|
||||
assert count is not None
|
||||
assert count[0] == 0
|
||||
finally:
|
||||
await engine.dispose()
|
||||
Reference in New Issue
Block a user