chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

@ -0,0 +1,157 @@
"""422 validation paths for ``POST /api/v1/memory/get``.
These are route-layer error tests — they exercise:
- DTO-layer rejections (page_size cap, empty owner_id, missing /
invalid memory_type, invalid sort_order, owner+memory_type mismatch)
- service-layer ``compile_filters_for_get`` rejections (unknown filter
field, malformed op shape)
No data is seeded; nothing reaches LanceDB. The full happy-path / data
e2e suite (with seeded rows and 200 assertions) lives in
``tests/integration/test_get_endpoint_e2e.py``.
"""
from __future__ import annotations
from collections.abc import AsyncIterator
from importlib import import_module
from pathlib import Path
import pytest
from httpx import ASGITransport, AsyncClient
from everos.config import load_settings
from everos.entrypoints.api.app import create_app
from everos.infra.persistence.lancedb import lancedb_manager
# ``everos.service.__init__`` re-exports ``get`` shadowing the
# submodule. Reach the real module via importlib so we can reset its
# ``_manager`` lazy singleton.
get_service_mod = import_module("everos.service.get")
@pytest.fixture
async def client(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[AsyncClient]:
"""FastAPI app with no lifespan; resets get-path singletons per test."""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
load_settings.cache_clear()
lancedb_manager._conn = None
lancedb_manager._tables.clear()
get_service_mod._manager = None
app = create_app(lifespan_providers=[])
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as c:
yield c
await lancedb_manager.dispose_connection()
load_settings.cache_clear()
# ── DTO-layer 422 ──────────────────────────────────────────────────────
async def test_page_size_above_cap_returns_422(client: AsyncClient) -> None:
"""``page_size > 100`` violates the wiki cap → 422 at the DTO layer."""
resp = await client.post(
"/api/v1/memory/get",
json={
"user_id": "u1",
"memory_type": "episode",
"page_size": 200,
},
)
assert resp.status_code == 422
async def test_empty_user_id_returns_422(client: AsyncClient) -> None:
"""``user_id`` carries ``min_length=1`` end-to-end."""
resp = await client.post(
"/api/v1/memory/get",
json={
"user_id": "",
"memory_type": "episode",
},
)
assert resp.status_code == 422
async def test_missing_memory_type_returns_422(client: AsyncClient) -> None:
"""Omitting the required ``memory_type`` field is rejected at the DTO layer."""
resp = await client.post(
"/api/v1/memory/get",
json={"user_id": "u1"},
)
assert resp.status_code == 422
async def test_invalid_memory_type_value_returns_422(client: AsyncClient) -> None:
"""``memory_type`` outside the four-kind enum → 422."""
resp = await client.post(
"/api/v1/memory/get",
json={
"user_id": "u1",
"memory_type": "atomic_fact", # not a top-level kind
},
)
assert resp.status_code == 422
async def test_invalid_sort_order_returns_422(client: AsyncClient) -> None:
"""``sort_order`` is a tight Literal — uppercase variant rejected."""
resp = await client.post(
"/api/v1/memory/get",
json={
"user_id": "u1",
"memory_type": "episode",
"sort_order": "DESC",
},
)
assert resp.status_code == 422
async def test_owner_memory_type_mismatch_returns_422(client: AsyncClient) -> None:
"""``user`` + ``agent_case`` is a hard pydantic error."""
resp = await client.post(
"/api/v1/memory/get",
json={
"user_id": "u1",
"memory_type": "agent_case",
},
)
assert resp.status_code == 422
# ── service.compile_filters_for_get 422 ───────────────────────────────
async def test_unknown_filter_field_returns_422(client: AsyncClient) -> None:
"""A field outside ``ALLOWED_FIELDS`` surfaces as 422 from the adapter."""
resp = await client.post(
"/api/v1/memory/get",
json={
"user_id": "u1",
"memory_type": "episode",
"filters": {"random_attr": "boom"},
},
)
assert resp.status_code == 422
assert "unsupported" in resp.text
async def test_malformed_filter_in_op_returns_422(client: AsyncClient) -> None:
"""``in`` op with a scalar (not list) surfaces as 422 from the adapter."""
resp = await client.post(
"/api/v1/memory/get",
json={
"user_id": "u1",
"memory_type": "episode",
"filters": {"session_id": {"in": "not_a_list"}},
},
)
assert resp.status_code == 422

View File

@ -0,0 +1,125 @@
"""``GET /metrics`` — Prometheus exposition + middleware integration.
Verifies three contracts of the metrics path:
1. The route renders ``prometheus_client``-parseable exposition format.
2. The ``PrometheusMiddleware`` actually bumps the per-route counter
on a real round-trip (verified via before/after delta to avoid
coupling to the global registry's cross-test accumulation).
3. The ``_SKIP_PATHS`` set (``/metrics``, ``/health``) is honoured —
those endpoints never appear in ``everos_http_requests_total``.
No lifespan / no LanceDB / no LLM needed — middleware lives at the ASGI
layer above any of that.
"""
from __future__ import annotations
from collections.abc import AsyncIterator
from pathlib import Path
import pytest
from httpx import ASGITransport, AsyncClient
from prometheus_client.parser import text_string_to_metric_families
from everos.config import load_settings
from everos.entrypoints.api.app import create_app
# ``prometheus_client.parser`` strips the ``_total`` counter suffix from
# the *family* name but leaves *sample* names intact.
_REQUESTS_FAMILY = "everos_http_requests"
_REQUESTS_TOTAL = "everos_http_requests_total"
@pytest.fixture
async def client(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[AsyncClient]:
"""FastAPI app with no lifespan; middleware stack is wired by ``create_app``."""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
load_settings.cache_clear()
app = create_app(lifespan_providers=[])
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as c:
yield c
load_settings.cache_clear()
# ── Helpers ────────────────────────────────────────────────────────────
def _counter_value(text: str, path: str, status: str) -> float:
"""Sum ``everos_http_requests_total`` samples matching path + status."""
total = 0.0
for fam in text_string_to_metric_families(text):
if fam.name != _REQUESTS_FAMILY:
continue
for s in fam.samples:
if s.name != _REQUESTS_TOTAL:
continue
if s.labels.get("path") == path and s.labels.get("status") == status:
total += s.value
return total
def _all_recorded_paths(text: str) -> set[str]:
"""Set of ``path`` label values present in ``everos_http_requests_total``."""
paths: set[str] = set()
for fam in text_string_to_metric_families(text):
if fam.name != _REQUESTS_FAMILY:
continue
for s in fam.samples:
if s.name == _REQUESTS_TOTAL:
paths.add(s.labels.get("path", ""))
return paths
# ── Tests ──────────────────────────────────────────────────────────────
async def test_metrics_endpoint_renders_prometheus_format(
client: AsyncClient,
) -> None:
"""``GET /metrics`` returns parsable Prometheus exposition format."""
resp = await client.get("/metrics")
assert resp.status_code == 200
assert "text/plain" in resp.headers.get("content-type", "")
# Must parse cleanly + expose the request counter family.
families = {f.name for f in text_string_to_metric_families(resp.text)}
assert _REQUESTS_FAMILY in families
async def test_metrics_counter_increments_on_request(client: AsyncClient) -> None:
"""A real route hit bumps ``everos_http_requests_total`` for that label triple.
Uses a 422 to avoid needing LanceDB — Pydantic rejects the empty
body before the route handler runs, but the middleware still sees
a completed request/response with ``status=422``.
"""
before_resp = await client.get("/metrics")
before = _counter_value(before_resp.text, "/api/v1/memory/get", "422")
bad = await client.post("/api/v1/memory/get", json={})
assert bad.status_code == 422
after_resp = await client.get("/metrics")
after = _counter_value(after_resp.text, "/api/v1/memory/get", "422")
assert after - before == 1.0, f"counter not bumped: {before}{after}"
async def test_metrics_skip_paths_not_recorded(client: AsyncClient) -> None:
"""``_SKIP_PATHS`` (``/metrics``, ``/health``) never appear in the counter."""
# Hit both endpoints. If they were *not* skipped, they'd show up in
# the next /metrics dump.
await client.get("/health")
await client.get("/metrics")
resp = await client.get("/metrics")
recorded = _all_recorded_paths(resp.text)
assert "/metrics" not in recorded, recorded
assert "/health" not in recorded, recorded

View File

@ -0,0 +1,133 @@
"""422 validation paths for ``POST /api/v1/memory/search``.
These exercise the request → DTO / route → service.compile_filters
error paths *without* needing any seeded data or external services
(no embedder / no LLM / no LanceDB rows). The full data-driven e2e
suite lives in ``tests/integration/test_search_endpoint_e2e.py``.
"""
from __future__ import annotations
from collections.abc import AsyncIterator
from importlib import import_module
from pathlib import Path
import pytest
from httpx import ASGITransport, AsyncClient
from everos.config import load_settings
from everos.entrypoints.api.app import create_app
from everos.infra.persistence.lancedb import lancedb_manager
search_service_mod = import_module("everos.service.search")
@pytest.fixture
async def client(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> AsyncIterator[AsyncClient]:
"""FastAPI app with no lifespan; resets search singletons per test."""
monkeypatch.setenv("EVEROS_MEMORY__ROOT", str(tmp_path))
load_settings.cache_clear()
lancedb_manager._conn = None
lancedb_manager._tables.clear()
for attr in ("_manager", "_embedding", "_reranker", "_llm_client"):
setattr(search_service_mod, attr, None)
for attr in ("_embedding_resolved", "_rerank_resolved", "_llm_resolved"):
setattr(search_service_mod, attr, False)
app = create_app(lifespan_providers=[])
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as c:
yield c
await lancedb_manager.dispose_connection()
load_settings.cache_clear()
def _body(**overrides) -> dict:
"""Minimal valid SearchRequest body; tests override one field to break it.
``method="keyword"`` is pinned because the SearchRequest DTO defaults
to HYBRID, which ``SearchManager._validate_components`` rejects when
no ``[embedding]`` provider is configured (the case in CI). Keyword
needs no embedder, so DTO / compile_filters validation paths fire
cleanly without external services — which is exactly what this file
is supposed to exercise.
"""
base = {
"user_id": "u1",
"query": "hello",
"method": "keyword",
}
base.update(overrides)
return base
# ── DTO-layer 422 ──────────────────────────────────────────────────────
async def test_empty_query_returns_422(client: AsyncClient) -> None:
"""``query`` carries ``min_length=1``."""
resp = await client.post("/api/v1/memory/search", json=_body(query=""))
assert resp.status_code == 422
async def test_empty_user_id_returns_422(client: AsyncClient) -> None:
"""``user_id`` carries ``min_length=1``."""
resp = await client.post("/api/v1/memory/search", json=_body(user_id=""))
assert resp.status_code == 422
async def test_both_user_and_agent_id_returns_422(client: AsyncClient) -> None:
"""Both ``user_id`` and ``agent_id`` set → xor validator rejects."""
resp = await client.post("/api/v1/memory/search", json=_body(agent_id="agent_x"))
assert resp.status_code == 422
async def test_invalid_method_returns_422(client: AsyncClient) -> None:
"""``method`` outside the SearchMethod enum → 422."""
resp = await client.post("/api/v1/memory/search", json=_body(method="bm42"))
assert resp.status_code == 422
async def test_top_k_zero_returns_422(client: AsyncClient) -> None:
"""``top_k=0`` violates the validator (must be -1 or 1..100)."""
resp = await client.post("/api/v1/memory/search", json=_body(top_k=0))
assert resp.status_code == 422
async def test_top_k_above_cap_returns_422(client: AsyncClient) -> None:
"""``top_k=101`` exceeds the 100 cap."""
resp = await client.post("/api/v1/memory/search", json=_body(top_k=101))
assert resp.status_code == 422
async def test_radius_above_one_returns_422(client: AsyncClient) -> None:
"""``radius`` is constrained to [0.0, 1.0]."""
resp = await client.post("/api/v1/memory/search", json=_body(radius=1.5))
assert resp.status_code == 422
# ── service.compile_filters 422 ───────────────────────────────────────
async def test_unknown_filter_field_returns_422(client: AsyncClient) -> None:
"""A field outside ``ALLOWED_FIELDS`` surfaces as 422 from the adapter."""
resp = await client.post(
"/api/v1/memory/search",
json=_body(filters={"random_attr": "boom"}),
)
assert resp.status_code == 422
assert "unsupported" in resp.text
async def test_reserved_owner_id_in_filters_returns_422(client: AsyncClient) -> None:
"""``owner_id`` is reserved at the top level — must not appear inside filters."""
resp = await client.post(
"/api/v1/memory/search",
json=_body(filters={"owner_id": "spoof"}),
)
assert resp.status_code == 422