chore: initialize EverOS 1.0.0

md-first memory extraction framework for AI agents.

Markdown is the single source of truth; SQLite holds state and LanceDB
provides the rebuildable vector + BM25 + scalar index. The codebase follows
a single-direction DDD layering (entrypoints -> service -> memory -> infra,
with component / core / config cross-cutting) enforced by import-linter.

Engineering surface:
- Coding conventions in .claude/rules/ (path-scoped) and workflows in
  .claude/skills/ (/commit, /new-branch, /pr).
- GitHub Actions CI runs make lint + test + integration; pre-commit mirrors
  the gates locally (ruff, hygiene hooks, gitlint commit-msg).
- Commit messages follow Conventional Commits, enforced by gitlint.
- make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
Elliot Chen
2026-06-05 22:35:51 +08:00
commit 518b8eca85
636 changed files with 160553 additions and 0 deletions

View File

View File

33
src/everos/core/errors.py Normal file
View File

@ -0,0 +1,33 @@
"""Cross-cutting domain errors surfaced to API callers.
These live in ``core`` so the ``memory`` layer can raise them and the
``entrypoints`` layer can catch them without crossing the layered import
boundary — ``any -> core`` is the only edge both share (entrypoints must
not import ``memory`` directly).
"""
from __future__ import annotations
class MultimodalError(Exception):
"""Base for multimodal-parsing errors meant to reach the caller.
The API layer maps any ``MultimodalError`` to an aligned
``{error: {code, message}}`` envelope (HTTP 415).
"""
class UnsupportedModalityError(MultimodalError):
"""everalgo cannot handle this modality (e.g. video stub, unknown type).
Wraps everalgo's ``NotImplementedError`` / dispatch ``ValueError`` so the
caller gets a stable, aligned error instead of a raw 500.
"""
class MultimodalNotEnabledError(MultimodalError):
"""Multimodal capability is not ready.
Raised when the ``everos[multimodal]`` extra is not installed, or when a
required system dependency (LibreOffice for Office documents) is absent.
"""

View File

@ -0,0 +1,27 @@
"""Application lifespan composition (chassis only).
This subpackage holds the *generic* lifespan machinery — the
:class:`LifespanProvider` ABC, :func:`build_lifespan` factory, and
chassis-level providers that are independent of any storage backend
(observability metrics, etc.). Concrete storage-backend providers
(SQLite / LanceDB) live next to the entrypoint that composes them
(see :mod:`everos.entrypoints.api.lifespans`) so ``core`` stays free
of concrete-backend imports.
External usage:
from everos.core.lifespan import (
LifespanProvider,
MetricsLifespanProvider,
build_lifespan,
)
"""
from .base import LifespanProvider as LifespanProvider
from .factory import build_lifespan as build_lifespan
from .metrics_lifespan import MetricsLifespanProvider as MetricsLifespanProvider
__all__ = [
"LifespanProvider",
"MetricsLifespanProvider",
"build_lifespan",
]

View File

@ -0,0 +1,30 @@
"""Lifespan provider abstract base.
A LifespanProvider is one unit of startup / shutdown work invoked by the
FastAPI lifespan factory. Providers are registered explicitly (no DI
auto-discovery) and executed in ``order`` ascending on startup, reverse
on shutdown.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any
from fastapi import FastAPI
class LifespanProvider(ABC):
"""One unit of startup / shutdown work."""
def __init__(self, name: str, order: int = 0) -> None:
self.name = name
self.order = order
@abstractmethod
async def startup(self, app: FastAPI) -> Any:
"""Startup hook; return value is stored on ``app.state.lifespan_data[name]``."""
@abstractmethod
async def shutdown(self, app: FastAPI) -> None:
"""Shutdown hook; called in reverse order during application teardown."""

View File

@ -0,0 +1,57 @@
"""Lifespan composition factory.
Builds a FastAPI lifespan context manager from an explicit list of
LifespanProvider instances.
"""
from __future__ import annotations
from collections.abc import AsyncIterator, Callable, Sequence
from contextlib import asynccontextmanager
from fastapi import FastAPI
from everos.core.observability.logging import get_logger
from .base import LifespanProvider
logger = get_logger(__name__)
def build_lifespan(
providers: Sequence[LifespanProvider],
) -> Callable[[FastAPI], AsyncIterator[None]]:
"""Compose providers into a FastAPI lifespan context manager.
Providers are run in ``order`` ascending on startup and reverse on
shutdown. A non-None return value from ``startup`` is stored under
``app.state.lifespan_data[provider.name]``.
"""
sorted_providers = sorted(providers, key=lambda p: p.order)
@asynccontextmanager
async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
lifespan_data: dict[str, object] = {}
try:
for provider in sorted_providers:
logger.info(
"lifespan_provider_startup",
name=provider.name,
order=provider.order,
)
result = await provider.startup(app)
if result is not None:
lifespan_data[provider.name] = result
app.state.lifespan_data = lifespan_data
yield
finally:
for provider in reversed(sorted_providers):
try:
logger.info("lifespan_provider_shutdown", name=provider.name)
await provider.shutdown(app)
except Exception:
logger.exception(
"lifespan_provider_shutdown_failed", name=provider.name
)
return _lifespan

View File

@ -0,0 +1,36 @@
"""Metrics lifespan provider.
Confirms the metrics registry is ready and logs that the ``/metrics`` HTTP
endpoint is mounted on the main API. Kept as a placeholder to demonstrate
the lifespan pattern; replace or extend with a standalone metrics server
(e.g. ``prometheus_client.start_http_server`` on a separate port) if you
need to expose metrics on a dedicated socket.
"""
from __future__ import annotations
from typing import Any
from fastapi import FastAPI
from everos.core.observability.logging import get_logger
from everos.core.observability.metrics import get_metrics_registry
from .base import LifespanProvider
logger = get_logger(__name__)
class MetricsLifespanProvider(LifespanProvider):
"""No-op startup that warms the metrics registry and logs readiness."""
def __init__(self, order: int = 5) -> None:
super().__init__(name="metrics", order=order)
async def startup(self, app: FastAPI) -> Any:
registry = get_metrics_registry()
logger.info("metrics_registry_ready", endpoint="/metrics")
return registry
async def shutdown(self, app: FastAPI) -> None:
logger.info("metrics_lifespan_shutdown")

View File

@ -0,0 +1,31 @@
"""Cross-cutting HTTP middleware components.
External usage:
from everos.core.middleware import (
DEFAULT_CORS_ALLOW_CREDENTIALS,
DEFAULT_CORS_ALLOW_HEADERS,
DEFAULT_CORS_ALLOW_METHODS,
DEFAULT_CORS_ORIGINS,
ProfileMiddleware,
PrometheusMiddleware,
global_exception_handler,
)
"""
from .cors import DEFAULT_CORS_ALLOW_CREDENTIALS as DEFAULT_CORS_ALLOW_CREDENTIALS
from .cors import DEFAULT_CORS_ALLOW_HEADERS as DEFAULT_CORS_ALLOW_HEADERS
from .cors import DEFAULT_CORS_ALLOW_METHODS as DEFAULT_CORS_ALLOW_METHODS
from .cors import DEFAULT_CORS_ORIGINS as DEFAULT_CORS_ORIGINS
from .global_exception import global_exception_handler as global_exception_handler
from .profile import ProfileMiddleware as ProfileMiddleware
from .prometheus import PrometheusMiddleware as PrometheusMiddleware
__all__ = [
"DEFAULT_CORS_ALLOW_CREDENTIALS",
"DEFAULT_CORS_ALLOW_HEADERS",
"DEFAULT_CORS_ALLOW_METHODS",
"DEFAULT_CORS_ORIGINS",
"ProfileMiddleware",
"PrometheusMiddleware",
"global_exception_handler",
]

View File

@ -0,0 +1,12 @@
"""CORS configuration defaults.
The CORS middleware itself is FastAPI's stock ``CORSMiddleware``; this module
centralises the default policy values used by the application factory.
"""
from __future__ import annotations
DEFAULT_CORS_ALLOW_CREDENTIALS: bool = True
DEFAULT_CORS_ALLOW_HEADERS: list[str] = ["*"]
DEFAULT_CORS_ALLOW_METHODS: list[str] = ["*"]
DEFAULT_CORS_ORIGINS: list[str] = ["*"]

View File

@ -0,0 +1,143 @@
"""Global exception handler — uniform error envelope per v1 API brief §1.
Envelope shape (matches the v1 API brief §1 — ``request_id`` at the top
level alongside ``error``; the ``error`` object carries ``code`` /
``message`` plus ops-friendly ``timestamp`` / ``path`` for debugging)::
{
"request_id": "<32 lowercase hex chars — W3C trace_id format>",
"error": {
"code": "HTTP_ERROR" | "SYSTEM_ERROR",
"message": "<reason>",
"timestamp": "<ISO 8601 with tz>",
"path": "<request path>"
}
}
Rules:
- 4xx (DTO / business validation / HTTPException) → ``code="HTTP_ERROR"``
with the human-readable reason in ``message``.
- 5xx (unhandled exception) → ``code="SYSTEM_ERROR"`` with a fixed
``message="Internal server error"`` — internal exception details are
logged but never leak to the client.
- ``request_id`` is sourced from ``request.state.request_id`` (set by
upstream middleware); falls back to a freshly minted id when absent.
"""
from __future__ import annotations
from fastapi import HTTPException, Request
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse
from starlette.status import (
HTTP_422_UNPROCESSABLE_ENTITY,
HTTP_500_INTERNAL_SERVER_ERROR,
)
from everos.component.utils.datetime import (
get_now_with_timezone,
to_iso_format,
)
from everos.core.observability.logging import get_logger
from everos.core.observability.tracing import gen_request_id
logger = get_logger(__name__)
_INTERNAL_ERROR_MESSAGE = "Internal server error"
def _request_id(request: Request) -> str:
"""Return the request_id set by middleware, or mint a fresh fallback."""
rid = getattr(request.state, "request_id", None)
if rid:
return str(rid)
return gen_request_id()
def _envelope(
*,
code: str,
message: str,
request: Request,
) -> dict[str, object]:
"""Build the canonical error envelope (wiki §1 shape — nested ``error``).
``request_id`` at the top level, ``error`` object carries the
contract fields (``code`` / ``message``) plus ops-friendly
``timestamp`` / ``path``.
"""
return {
"request_id": _request_id(request),
"error": {
"code": code,
"message": message,
"timestamp": to_iso_format(get_now_with_timezone()),
"path": str(request.url.path),
},
}
async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse:
"""Convert any exception into a uniform JSON error response."""
path = str(request.url.path)
method = request.method
if isinstance(exc, RequestValidationError):
errors = exc.errors()
if errors:
first = errors[0]
loc = ".".join(str(p) for p in first.get("loc", []) if p != "body")
msg = first.get("msg", "Validation error")
message = f"{msg}: {loc}" if loc else msg
else:
message = "Request validation error"
logger.warning("validation_error", method=method, path=path, message=message)
return JSONResponse(
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
content=_envelope(code="HTTP_ERROR", message=message, request=request),
)
if isinstance(exc, HTTPException):
logger.warning(
"http_exception",
method=method,
path=path,
status_code=exc.status_code,
detail=exc.detail,
)
# 5xx routed through HTTPException is rare but valid; still honour
# the SYSTEM_ERROR code so the envelope is consistent.
if exc.status_code >= 500:
return JSONResponse(
status_code=exc.status_code,
content=_envelope(
code="SYSTEM_ERROR",
message=_INTERNAL_ERROR_MESSAGE,
request=request,
),
)
return JSONResponse(
status_code=exc.status_code,
content=_envelope(
code="HTTP_ERROR",
message=str(exc.detail),
request=request,
),
)
logger.error(
"unhandled_exception",
method=method,
path=path,
exception_type=type(exc).__name__,
exc_info=True,
)
return JSONResponse(
status_code=HTTP_500_INTERNAL_SERVER_ERROR,
content=_envelope(
code="SYSTEM_ERROR",
message=_INTERNAL_ERROR_MESSAGE,
request=request,
),
)

View File

@ -0,0 +1,69 @@
"""Performance profiling middleware (HTML report via pyinstrument).
Triggered with ``?profile=true`` query parameter when ``PROFILING_ENABLED=true``
is set. Gracefully no-ops if pyinstrument is not installed.
"""
from __future__ import annotations
import os
from collections.abc import Awaitable, Callable
from fastapi import Request
from fastapi.responses import HTMLResponse
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import Response
from starlette.types import ASGIApp
from everos.core.observability.logging import get_logger
logger = get_logger(__name__)
_TRUTHY = frozenset({"1", "true", "yes"})
def _profiling_enabled() -> bool:
"""Read ``PROFILING_ENABLED`` env var (1 / true / yes => enabled)."""
raw = os.getenv("PROFILING_ENABLED", os.getenv("PROFILING", "false")).lower()
return raw in _TRUTHY
class ProfileMiddleware(BaseHTTPMiddleware):
"""Returns a pyinstrument HTML report when ``?profile=true`` is set."""
def __init__(self, app: ASGIApp) -> None:
super().__init__(app)
self._enabled = _profiling_enabled()
self._available = False
if self._enabled:
try:
import pyinstrument # noqa: F401
self._available = True
logger.info("profiling_middleware_enabled")
except ImportError:
logger.warning("profiling_requested_but_pyinstrument_missing")
self._enabled = False
async def dispatch(
self,
request: Request,
call_next: Callable[[Request], Awaitable[Response]],
) -> Response:
if not self._enabled or not self._available:
return await call_next(request)
if request.query_params.get("profile", "").lower() not in _TRUTHY:
return await call_next(request)
from pyinstrument import Profiler
profiler = Profiler()
profiler.start()
logger.info("profile_started", method=request.method, path=request.url.path)
try:
await call_next(request)
except Exception:
logger.exception("profile_request_failed")
profiler.stop()
return HTMLResponse(content=profiler.output_html(), status_code=200)

View File

@ -0,0 +1,84 @@
"""Prometheus HTTP metrics middleware.
Auto-instruments incoming HTTP requests with a request counter and a
duration histogram. Mounted via ``app.add_middleware(PrometheusMiddleware)``.
Skips internal endpoints (``/metrics``, ``/health``, etc.) so they do not
inflate cardinality or pollute their own statistics.
"""
from __future__ import annotations
import time
from collections.abc import Awaitable, Callable
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response
from everos.core.observability.logging import get_logger
from everos.core.observability.metrics import Counter, Histogram, HistogramBuckets
logger = get_logger(__name__)
_http_requests_total = Counter(
name="http_requests_total",
description="Total number of HTTP requests handled.",
labelnames=("method", "path", "status"),
namespace="everos",
)
_http_request_duration_seconds = Histogram(
name="http_request_duration_seconds",
description="HTTP request duration in seconds.",
labelnames=("method", "path"),
namespace="everos",
buckets=HistogramBuckets.DEFAULT,
)
_SKIP_PATHS = frozenset({"/metrics", "/health", "/healthz", "/favicon.ico"})
def _normalize_path(request: Request) -> str:
"""Resolve the route template (e.g. ``/users/{user_id}``) for stable labels."""
scope = getattr(request, "scope", {})
route = scope.get("route") if isinstance(scope, dict) else None
if route is not None and hasattr(route, "path"):
return route.path
if request.path_params:
path = request.url.path
for name, value in request.path_params.items():
if str(value) in path:
path = path.replace(str(value), f"{{{name}}}")
return path
return "{unmatched}"
class PrometheusMiddleware(BaseHTTPMiddleware):
"""Records ``http_requests_total`` and ``http_request_duration_seconds``."""
async def dispatch(
self,
request: Request,
call_next: Callable[[Request], Awaitable[Response]],
) -> Response:
if request.url.path in _SKIP_PATHS:
return await call_next(request)
method = request.method
start = time.perf_counter()
status = "500"
response: Response | None = None
try:
response = await call_next(request)
status = str(response.status_code)
return response
finally:
duration = time.perf_counter() - start
path = _normalize_path(request)
_http_requests_total.labels(method=method, path=path, status=status).inc()
_http_request_duration_seconds.labels(method=method, path=path).observe(
duration
)

View File

@ -0,0 +1,13 @@
"""structlog-based logging factory.
External usage:
from everos.core.observability.logging import get_logger, configure_logging
logger = get_logger(__name__)
logger.info("event_name", key=value)
"""
from .factory import configure_logging as configure_logging
from .factory import get_logger as get_logger
__all__ = ["configure_logging", "get_logger"]

View File

@ -0,0 +1,117 @@
"""structlog logger factory.
Provides ``get_logger(__name__)`` for module-level logger acquisition.
``configure_logging()`` is called once at process startup (run.py / lifespan)
to set up the structlog processor chain and route stdlib logging through
the same formatter so output stays uniform regardless of the caller.
The configuration follows structlog's official "Foreign Log Integration"
recipe: a single ``ProcessorFormatter`` renders both everos's own
``get_logger(...)`` calls and any stdlib ``logging.getLogger(...)`` call
made by third-party libraries (uvicorn, fastapi, httpx, openai, ...).
That way all three of the previously divergent prefixes — ``INFO:``,
``[warning ]``, plus the unconfigured no-prefix output — collapse to
one ``[level] event key=value`` shape.
Rust-side loggers (LanceDB / Lance / Arrow) live in the Rust ``log``
crate and emit straight to stderr without going through Python; this
module cannot reach them. Control their level with ``RUST_LOG`` env.
"""
from __future__ import annotations
import logging
import sys
from typing import Any
import structlog
def get_logger(name: str) -> Any:
"""Return a structlog logger bound to the given module name."""
return structlog.get_logger(name)
def configure_logging(level: str = "INFO") -> None:
"""Configure structlog and stdlib logging once at process startup.
After this call:
* Every ``structlog.get_logger(...)`` and ``logging.getLogger(...)``
message flows through the same ``ProcessorFormatter``, so output
format is identical regardless of which logging API the caller used.
* Root-logger handlers are replaced with a single ``StreamHandler``
pointing at ``sys.stdout``; any previously installed handler
(uvicorn's default ``LOGGING_CONFIG``, libraries that call
``logging.basicConfig``, etc.) is removed.
The ``uvicorn.run(..., log_config=None)`` flag is the matching half
on the server entry point — without it, uvicorn re-installs its own
handlers on every startup and overrides what we set here.
Args:
level: Log level name (``DEBUG`` / ``INFO`` / ``WARNING`` / ``ERROR``).
Unknown names silently fall back to ``INFO`` via
``getattr(logging, ..., INFO)``.
"""
log_level = getattr(logging, level.upper(), logging.INFO)
shared_processors: list[Any] = [
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
]
# structlog's own loggers feed into stdlib's logging, so the root
# logger handler decides where output lands and how it's rendered.
structlog.configure(
processors=[
*shared_processors,
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
],
wrapper_class=structlog.make_filtering_bound_logger(log_level),
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
# The single formatter shared by both pipelines:
# * structlog events arrive already wrapped via ``wrap_for_formatter``;
# * foreign records (stdlib LogRecord) get pushed through
# ``foreign_pre_chain`` so they pick up the same level / timestamp
# fields before hitting ``ConsoleRenderer``.
formatter = structlog.stdlib.ProcessorFormatter(
foreign_pre_chain=shared_processors,
processors=[
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
structlog.dev.ConsoleRenderer(),
],
)
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)
# Drop any handler we installed on a previous ``configure_logging``
# call (identified by formatter type) so repeated invocations don't
# produce duplicate output, but keep handlers other parties have
# attached — pytest's caplog handler in particular has to survive,
# otherwise tests using the ``caplog`` fixture can't see records
# that flow through structlog.
root = logging.getLogger()
root.handlers = [
h
for h in root.handlers
if not isinstance(h.formatter, structlog.stdlib.ProcessorFormatter)
]
root.addHandler(handler)
root.setLevel(log_level)
# Third-party HTTP clients log every successful request at INFO level —
# `httpx` is the worst offender (one line per call, called once per
# LLM / embedding / rerank request). A single LoCoMo conv run easily
# produces a thousand such lines, drowning everos's own events. They
# are useful for debugging API failures, but failures already surface
# via exceptions + status codes — so demote the success path to WARNING
# and let real errors still come through.
for noisy in ("httpx", "httpcore", "urllib3"):
logging.getLogger(noisy).setLevel(logging.WARNING)

View File

@ -0,0 +1,34 @@
"""Prometheus-style metrics primitives + registry.
External usage:
from everos.core.observability.metrics import (
Counter, Gauge, Histogram, HistogramBuckets,
get_metrics_registry, generate_metrics_response,
)
"""
from .counter import Counter as Counter
from .counter import LabeledCounter as LabeledCounter
from .gauge import Gauge as Gauge
from .gauge import LabeledGauge as LabeledGauge
from .histogram import Histogram as Histogram
from .histogram import HistogramBuckets as HistogramBuckets
from .histogram import LabeledHistogram as LabeledHistogram
from .registry import generate_metrics_response as generate_metrics_response
from .registry import get_metrics_registry as get_metrics_registry
from .registry import reset_metrics_registry as reset_metrics_registry
from .registry import set_metrics_registry as set_metrics_registry
__all__ = [
"Counter",
"Gauge",
"Histogram",
"HistogramBuckets",
"LabeledCounter",
"LabeledGauge",
"LabeledHistogram",
"generate_metrics_response",
"get_metrics_registry",
"reset_metrics_registry",
"set_metrics_registry",
]

View File

@ -0,0 +1,50 @@
"""Counter wrapper around ``prometheus_client.Counter``."""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from prometheus_client import Counter as PromCounter
from .registry import get_metrics_registry
class Counter:
"""Monotonically-increasing counter (totals, error counts)."""
def __init__(
self,
name: str,
description: str,
labelnames: Sequence[str] = (),
namespace: str = "",
subsystem: str = "",
unit: str = "",
) -> None:
self._counter = PromCounter(
name=name,
documentation=description,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
registry=get_metrics_registry(),
)
self._labelnames = tuple(labelnames)
def labels(self, **labels: str) -> LabeledCounter:
return LabeledCounter(self._counter.labels(**labels))
def inc(self, amount: float = 1.0) -> None:
self._counter.inc(amount)
class LabeledCounter:
"""Counter slice with labels applied."""
def __init__(self, labeled: Any) -> None:
self._labeled = labeled
def inc(self, amount: float = 1.0) -> None:
self._labeled.inc(amount)

View File

@ -0,0 +1,66 @@
"""Gauge wrapper around ``prometheus_client.Gauge``.
Async auto-refresh is intentionally not included in v0.1; subclass
:class:`Gauge` and call :meth:`set` from your own scheduling logic when
needed.
"""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from prometheus_client import Gauge as PromGauge
from .registry import get_metrics_registry
class Gauge:
"""Instantaneous numeric value (queue depth, cache size)."""
def __init__(
self,
name: str,
description: str,
labelnames: Sequence[str] = (),
namespace: str = "",
subsystem: str = "",
unit: str = "",
) -> None:
self._gauge = PromGauge(
name=name,
documentation=description,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
registry=get_metrics_registry(),
)
def labels(self, **labels: str) -> LabeledGauge:
return LabeledGauge(self._gauge.labels(**labels))
def set(self, value: float) -> None:
self._gauge.set(value)
def inc(self, amount: float = 1.0) -> None:
self._gauge.inc(amount)
def dec(self, amount: float = 1.0) -> None:
self._gauge.dec(amount)
class LabeledGauge:
"""Gauge slice with labels applied."""
def __init__(self, labeled: Any) -> None:
self._labeled = labeled
def set(self, value: float) -> None:
self._labeled.set(value)
def inc(self, amount: float = 1.0) -> None:
self._labeled.inc(amount)
def dec(self, amount: float = 1.0) -> None:
self._labeled.dec(amount)

View File

@ -0,0 +1,102 @@
"""Histogram wrapper around ``prometheus_client.Histogram``."""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from prometheus_client import Histogram as PromHistogram
from .registry import get_metrics_registry
class HistogramBuckets:
"""Predefined bucket configurations for common workloads."""
DEFAULT: tuple[float, ...] = (
0.005,
0.01,
0.025,
0.05,
0.1,
0.25,
0.5,
1.0,
2.5,
5.0,
10.0,
)
FAST: tuple[float, ...] = (0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5)
API_CALL: tuple[float, ...] = (
0.01,
0.05,
0.1,
0.25,
0.5,
1.0,
2.0,
5.0,
10.0,
30.0,
)
BATCH: tuple[float, ...] = (0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0)
DATABASE: tuple[float, ...] = (
0.001,
0.005,
0.01,
0.025,
0.05,
0.1,
0.25,
0.5,
1.0,
2.5,
5.0,
)
class Histogram:
"""Distribution of observed values (latency, sizes)."""
def __init__(
self,
name: str,
description: str,
labelnames: Sequence[str] = (),
namespace: str = "",
subsystem: str = "",
unit: str = "",
buckets: Sequence[float] = HistogramBuckets.DEFAULT,
) -> None:
self._histogram = PromHistogram(
name=name,
documentation=description,
labelnames=labelnames,
namespace=namespace,
subsystem=subsystem,
unit=unit,
buckets=tuple(buckets),
registry=get_metrics_registry(),
)
def labels(self, **labels: str) -> LabeledHistogram:
return LabeledHistogram(self._histogram.labels(**labels))
def observe(self, amount: float) -> None:
self._histogram.observe(amount)
def time(self) -> Any:
return self._histogram.time()
class LabeledHistogram:
"""Histogram slice with labels applied."""
def __init__(self, labeled: Any) -> None:
self._labeled = labeled
def observe(self, amount: float) -> None:
self._labeled.observe(amount)
def time(self) -> Any:
return self._labeled.time()

View File

@ -0,0 +1,35 @@
"""Prometheus metrics registry singleton."""
from __future__ import annotations
from prometheus_client import REGISTRY, CollectorRegistry, generate_latest
_registry: CollectorRegistry | None = None
def get_metrics_registry() -> CollectorRegistry:
"""Return the global metrics registry.
Defaults to ``prometheus_client.REGISTRY``.
"""
global _registry
if _registry is None:
_registry = REGISTRY
return _registry
def set_metrics_registry(registry: CollectorRegistry) -> None:
"""Override the global registry (mainly for tests)."""
global _registry
_registry = registry
def generate_metrics_response() -> bytes:
"""Render the current registry into Prometheus exposition format."""
return generate_latest(get_metrics_registry())
def reset_metrics_registry() -> None:
"""Reset the global registry override (mainly for tests)."""
global _registry
_registry = None

View File

@ -0,0 +1,32 @@
"""Tracing utilities — W3C-compatible request id generation.
External usage::
from everos.core.observability.tracing import gen_request_id
"""
from __future__ import annotations
from uuid import uuid4
def gen_request_id() -> str:
"""Generate a request id matching the W3C trace-context spec.
Returns 32 lowercase hex characters (128-bit, no prefix) — the same
format as a W3C ``trace_id`` / OpenTelemetry trace identifier. Routes
and services that mint a fresh request id (when one wasn't injected
by upstream middleware) should call this helper rather than rolling
their own uuid / prefix format, so the id layer stays compatible
with OpenTelemetry exporters and standard APM tooling.
Example::
>>> rid = gen_request_id()
>>> len(rid)
32
"""
return uuid4().hex
__all__ = ["gen_request_id"]

View File

@ -0,0 +1,106 @@
"""Persistence primitives.
Read/write toolkit for markdown files, async wrappers around the SQLite
system DB and LanceDB index, plus a memory-root path manager. Higher
layers (``memory``, ``infra``) layer business semantics on top of these
building blocks; this subpackage knows nothing about Entry / MemCell /
Episode or any other business model.
External usage:
from everos.core.persistence import (
# Path manager + lock
MemoryRoot, memory_root_lock, LockError,
# Markdown IO toolkit
MarkdownReader, MarkdownWriter, ParsedMarkdown, Entry,
parse_frontmatter, dump_frontmatter, split_entries, find_entry,
# Frontmatter schema chassis
BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
DailyLogPathMixin, SkillPathMixin,
# Async SQLite (SQLModel / SA 2.0)
create_system_engine, create_session_factory, session_scope,
SQLModel, Field, Relationship, BaseTable, RepoBase,
# Async LanceDB
open_lancedb_connection, LanceModel, Vector, BaseLanceTable, touch,
LanceRepoBase,
)
"""
from .lancedb import BaseLanceTable as BaseLanceTable
from .lancedb import LanceModel as LanceModel
from .lancedb import LanceRepoBase as LanceRepoBase
from .lancedb import Vector as Vector
from .lancedb import open_lancedb_connection as open_lancedb_connection
from .lancedb import touch as touch
from .locking import LockError as LockError
from .locking import memory_root_lock as memory_root_lock
from .markdown import AgentScopedFrontmatter as AgentScopedFrontmatter
from .markdown import BaseFrontmatter as BaseFrontmatter
from .markdown import DailyLogPathMixin as DailyLogPathMixin
from .markdown import Entry as Entry
from .markdown import EntryId as EntryId
from .markdown import MarkdownReader as MarkdownReader
from .markdown import MarkdownWriter as MarkdownWriter
from .markdown import ParsedMarkdown as ParsedMarkdown
from .markdown import SkillPathMixin as SkillPathMixin
from .markdown import StructuredEntry as StructuredEntry
from .markdown import UserScopedFrontmatter as UserScopedFrontmatter
from .markdown import dump_frontmatter as dump_frontmatter
from .markdown import find_entry as find_entry
from .markdown import parse_frontmatter as parse_frontmatter
from .markdown import parse_structured_entry as parse_structured_entry
from .markdown import render_structured_entry as render_structured_entry
from .markdown import split_entries as split_entries
from .memory_root import MemoryRoot as MemoryRoot
from .memory_root import app_dir_name as app_dir_name
from .memory_root import app_id_from_dir as app_id_from_dir
from .memory_root import project_dir_name as project_dir_name
from .memory_root import project_id_from_dir as project_id_from_dir
from .sqlite import BaseTable as BaseTable
from .sqlite import Field as Field
from .sqlite import Relationship as Relationship
from .sqlite import RepoBase as RepoBase
from .sqlite import SQLModel as SQLModel
from .sqlite import create_session_factory as create_session_factory
from .sqlite import create_system_engine as create_system_engine
from .sqlite import session_scope as session_scope
__all__ = [
"AgentScopedFrontmatter",
"BaseFrontmatter",
"BaseLanceTable",
"BaseTable",
"DailyLogPathMixin",
"Entry",
"EntryId",
"Field",
"LanceModel",
"LanceRepoBase",
"LockError",
"MarkdownReader",
"MarkdownWriter",
"MemoryRoot",
"ParsedMarkdown",
"Relationship",
"RepoBase",
"SkillPathMixin",
"StructuredEntry",
"SQLModel",
"UserScopedFrontmatter",
"Vector",
"app_dir_name",
"app_id_from_dir",
"create_session_factory",
"create_system_engine",
"dump_frontmatter",
"find_entry",
"memory_root_lock",
"project_dir_name",
"project_id_from_dir",
"open_lancedb_connection",
"parse_frontmatter",
"parse_structured_entry",
"render_structured_entry",
"session_scope",
"split_entries",
"touch",
]

View File

@ -0,0 +1,34 @@
"""LanceDB async persistence.
External usage (connection):
from everos.core.persistence.lancedb import open_lancedb_connection
External usage (ORM model basics — re-exported from lancedb.pydantic):
from everos.core.persistence.lancedb import (
LanceModel, Vector, BaseLanceTable, touch,
)
External usage (generic CRUD repository base):
from everos.core.persistence.lancedb import LanceRepoBase
"""
# Re-export the LanceDB-flavoured Pydantic primitives so business code has a
# single canonical entry point for table schemas.
from lancedb.pydantic import LanceModel as LanceModel
from lancedb.pydantic import Vector as Vector
from .base import BaseLanceTable as BaseLanceTable
from .base import touch as touch
from .connection import open_lancedb_connection as open_lancedb_connection
from .repository import LanceDailyLogRepoBase as LanceDailyLogRepoBase
from .repository import LanceRepoBase as LanceRepoBase
__all__ = [
"BaseLanceTable",
"LanceDailyLogRepoBase",
"LanceModel",
"LanceRepoBase",
"Vector",
"open_lancedb_connection",
"touch",
]

View File

@ -0,0 +1,158 @@
"""Common LanceDB base for everos tables.
:class:`BaseLanceTable` adds ``created_at`` / ``updated_at`` columns and
the :attr:`BM25_FIELDS` declaration + :meth:`ensure_fts_indexes`
classmethod so each schema owns *both* its column shape **and** its
BM25 index spec — repos stay focused on queries.
Note:
LanceDB has no SQL ``onupdate`` equivalent — the application must
explicitly set ``updated_at = get_utc_now()`` before calling
:meth:`AsyncTable.update` / :meth:`AsyncTable.merge_insert`. The
convenience :func:`touch` helper does this in one call.
**Every datetime column automatically carries ``tz=UTC`` in the
Arrow schema.** LanceDB's Pydantic→PyArrow converter does not
understand ``typing.Annotated`` metadata, so :data:`UtcDatetime`
cannot be used as the field type annotation. Instead,
:meth:`BaseLanceTable.to_arrow_schema` walks the inferred schema
and rewrites every ``timestamp[us]`` (naive) column to
``timestamp[us, tz=UTC]``. PyArrow then auto-``astimezone(UTC)``
aware inputs on write **and** returns aware UTC datetimes on read
— no per-table configuration, no caller-side ``ensure_utc``.
Subclasses just declare ``datetime`` fields normally::
class Episode(BaseLanceTable):
timestamp: dt.datetime
"""
from __future__ import annotations
import datetime as dt
from typing import ClassVar
import pyarrow as pa
from lancedb import AsyncTable
from lancedb.index import FTS
from lancedb.pydantic import LanceModel
from pydantic import Field
from everos.component.utils.datetime import get_utc_now
class BaseLanceTable(LanceModel):
"""Pydantic / LanceDB base with ``created_at`` / ``updated_at`` and
schema-level LanceDB metadata (``TABLE_NAME`` / ``BM25_FIELDS``).
The schema is the single source of truth for everything LanceDB
needs to materialise the table: column shape, table name, vector
dim (declared per-subclass), and which columns carry an FTS index.
Repos read these ClassVars; they do not duplicate them.
"""
TABLE_NAME: ClassVar[str] = ""
"""LanceDB table name. Business schemas must override (e.g.
``"episode"``). Left empty on chassis / test schemas that construct
their table inline."""
BM25_FIELDS: ClassVar[list[str]] = []
"""Columns to build LanceDB FTS (BM25) indexes on.
Each declared column must already exist as a ``str`` (or
``str | None``) field on the schema. Tokens are assumed to be
**app-layer pre-tokenised** (space-joined); the FTS index uses
``base_tokenizer="whitespace"`` so segmentation is owned by the
app layer (:class:`JiebaTokenizer`). The same boundary owns stop-
word filtering (English + Chinese); FTS-side ``remove_stop_words``
is OFF. FTS *does* keep lightweight English-aware normalisation
(``lower_case`` / ``stem`` / ``ascii_folding``) as a belt-and-
braces layer on the same English tokens that survive jieba.
See ``17_lancedb_tables_design.md`` §2.4.1 and
:meth:`ensure_fts_indexes` below for the exact knobs."""
created_at: dt.datetime = Field(default_factory=get_utc_now)
updated_at: dt.datetime = Field(default_factory=get_utc_now)
@classmethod
def to_arrow_schema(cls) -> pa.Schema:
"""Patch the default Arrow schema: force every timestamp to ``tz=UTC``.
The base ``LanceModel.to_arrow_schema()`` infers Arrow types from
Pydantic field annotations and emits naive ``timestamp[us]`` for
every :class:`datetime.datetime` column. We rewrite **every**
timestamp column to ``timestamp[us, tz=UTC]``:
* **on write** — PyArrow ``astimezone(UTC)``-s aware input
automatically before serialising the i64 epoch micros.
* **on read** — PyArrow returns aware UTC datetimes.
Zero per-table configuration. The rewrite also **overrides any
non-UTC tz** a subclass might have declared explicitly, because
project convention is: storage is always UTC. Mixed-tz columns
would violate the two-zone discipline (see
``docs/datetime.md``); enforcing UTC at the schema level closes
that loophole.
"""
base = super().to_arrow_schema()
return pa.schema(
[
pa.field(f.name, pa.timestamp("us", tz="UTC"), nullable=f.nullable)
if pa.types.is_timestamp(f.type)
else f
for f in base
]
)
@classmethod
async def ensure_fts_indexes(cls, table: AsyncTable) -> None:
"""Create FTS indexes on every column in :attr:`BM25_FIELDS`.
Idempotent: columns that already have an index are skipped, so
this is safe to call on every startup. The FTS config is fixed
to the app-layer pre-tokenisation + LanceDB normalisation
convention (designed for **multilingual mixed content**):
- ``base_tokenizer="whitespace"`` — split on the spaces our
app-layer tokenizer provider already inserted between tokens.
- ``lower_case=True`` — Unicode-aware case-fold (English A→a;
no-op on CJK characters).
- ``stem=True`` — Porter / Snowball English stemmer per
``language="English"`` (tantivy default). CJK tokens have no
stemmer and pass through untouched.
- ``remove_stop_words=False`` — **stop-word removal is owned by
the app-layer** (:class:`JiebaTokenizer`), which already drops
both Chinese and English stop-words before tokens reach the
FTS index. Keeping FTS-side filtering off avoids double-
filtering and a divided source of truth.
- ``ascii_folding=True`` — strips diacritics (é→e) on Latin
characters; no-op on CJK.
- ``with_position=True`` — enables phrase queries.
Subclasses normally do not need to override this — declaring
:attr:`BM25_FIELDS` is enough.
"""
if not cls.BM25_FIELDS:
return
indices = await table.list_indices()
indexed_cols = {col for idx in indices for col in (idx.columns or [])}
for field in cls.BM25_FIELDS:
if field in indexed_cols:
continue
await table.create_index(
column=field,
config=FTS(
with_position=True,
base_tokenizer="whitespace",
lower_case=True,
stem=True,
remove_stop_words=False,
ascii_folding=True,
),
)
def touch(record: BaseLanceTable) -> BaseLanceTable:
"""Set ``record.updated_at = now`` and return the record (chainable)."""
record.updated_at = get_utc_now()
return record

View File

@ -0,0 +1,68 @@
"""Async LanceDB connection factory.
LanceDB does not live inside the SQLAlchemy ecosystem; it has its own
``connect_async`` returning :class:`lancedb.AsyncConnection`. This module
is a thin wrapper that:
1. ensures the lancedb root directory exists
2. converts ``LanceDBSettings.read_consistency_seconds`` into the
:class:`datetime.timedelta` value LanceDB expects
3. installs a capped :class:`lancedb.Session` so the global index
cache cannot grow unbounded and exhaust file descriptors
(see :attr:`LanceDBSettings.index_cache_size_bytes` for the
full rationale)
"""
from __future__ import annotations
import datetime as dt
from pathlib import Path
import lancedb
from lancedb import AsyncConnection
from everos.config import LanceDBSettings
async def open_lancedb_connection(
lancedb_dir: Path,
lancedb_settings: LanceDBSettings,
) -> AsyncConnection:
"""Open an async LanceDB connection rooted at ``lancedb_dir``.
Args:
lancedb_dir: Filesystem path to the LanceDB root (typically
``MemoryRoot.lancedb_dir``). Created if missing.
lancedb_settings: Tunables; the ``read_consistency_seconds`` field
is converted to a :class:`~datetime.timedelta`, and
``index_cache_size_bytes`` caps the global index cache.
Returns:
An :class:`AsyncConnection` ready for table operations.
"""
# mkdir is a microsecond-fast syscall and only fires on first connect;
# not worth pulling in anyio.Path / aiofiles for it.
lancedb_dir.mkdir(parents=True, exist_ok=True) # noqa: ASYNC240
interval: dt.timedelta | None = None
if lancedb_settings.read_consistency_seconds is not None:
interval = dt.timedelta(seconds=lancedb_settings.read_consistency_seconds)
# Bound the index cache so its readers (each one holds the FDs of
# an opened ``_indices/<uuid>/...`` directory) get LRU-evicted
# rather than leaking. Without this, a long-running daemon's FD
# count grows monotonically until ``EMFILE``. The metadata cache
# is intentionally left at the lancedb default (unbounded): it
# holds parsed in-memory manifests with zero FD pressure, and a
# cap there would just thrash. See ``LanceDBSettings`` for the
# measurement that picked the default size.
session = lancedb.Session(
index_cache_size_bytes=lancedb_settings.index_cache_size_bytes,
metadata_cache_size_bytes=None,
)
return await lancedb.connect_async(
str(lancedb_dir),
read_consistency_interval=interval,
session=session,
)

View File

@ -0,0 +1,530 @@
"""Generic CRUD repository for LanceDB-backed tables.
``LanceRepoBase`` mirrors the SQLite ``RepoBase`` shape: a pure generic
CRUD helper that knows nothing about a storage runtime. Concrete repos
either pass an :class:`AsyncTable` explicitly (typical in tests) or
override :meth:`_table_lookup` to pull the cached table from their
storage manager (typical in
:mod:`everos.infra.persistence.lancedb.repos`).
"""
from __future__ import annotations
import asyncio
import datetime as dt
from collections.abc import Sequence
from typing import Any, ClassVar
from lancedb import AsyncTable
from everos.core.observability.logging import get_logger
from .base import BaseLanceTable
logger = get_logger(__name__)
def _q(value: str) -> str:
"""Escape single quotes for a LanceDB SQL-like ``where`` predicate.
LanceDB has no parameterised query API; predicates are strings.
Doubling the quote (``'`` → ``''``) is the SQL-standard way to keep
a literal single quote inside a single-quoted string. everos's PK
convention (``<owner_id>_<entry_id>``) never carries quotes — this
is defensive.
"""
return value.replace("'", "''")
class LanceRepoBase[T: BaseLanceTable]:
"""Generic CRUD repository for one LanceDB table.
Subclass and bind to a schema. Two ways to provide the table:
1. **Explicit (tests / DI)** — pass it to ``__init__``::
repo = EpisodeRepo(table)
2. **Lazy hook (production singletons)** — override
:meth:`_table_lookup` so the repo can be instantiated as a
module-level singleton with no live connection yet::
class _EpisodeRepo(LanceRepoBase[Episode]):
schema = Episode
async def _table_lookup(self):
from everos.infra.persistence.lancedb.lancedb_manager import (
get_table,
)
return await get_table(self.schema.TABLE_NAME, self.schema)
episode_repo = _EpisodeRepo()
await episode_repo.add([Episode(text=..., vector=[...])])
The LanceDB table name lives on the schema (``BaseLanceTable.TABLE_NAME``)
so every LanceDB-side metadatum — column shape, table name,
vector dim, BM25 index spec — sits in one place. ``table_name``
here is a thin pass-through; subclasses normally do **not**
override it.
Write paths (``add`` / ``upsert`` / ``delete`` / ``delete_by_md_path``)
are serialised by a per-``table_name`` :class:`asyncio.Lock`. LanceDB's
``merge_insert`` is a read-modify-write at the storage layer with no
application-visible OCC contract — two concurrent calls against the
same table can race on the version manifest and lose updates even
when the row sets are disjoint (observed: cascade worker
``asyncio.gather`` over a batch of ``user_profile`` rows where one
write disappears). Serialising on the table name closes that window;
reads stay unlocked so search QPS is not impacted by writers.
Locks live in a class-level dict keyed by table name and are never
evicted (mirrors :mod:`everos.memory.strategies._partition_locks`
on bpo-28427 — a lock with pending waiters must outlive any dict
entry that points to it).
"""
schema: type[T]
_table_locks: ClassVar[dict[str, asyncio.Lock]] = {}
"""Per-table-name write lock pool (process-wide, lazily populated)."""
@property
def table_name(self) -> str:
"""LanceDB table name, resolved from :attr:`schema.TABLE_NAME`."""
return self.schema.TABLE_NAME
@classmethod
def _write_lock(cls, table_name: str) -> asyncio.Lock:
"""Return the write lock for ``table_name``; create on first use.
``dict.setdefault`` is atomic under single-threaded asyncio (no
``await`` between check and insert), so no meta-lock is needed.
"""
return cls._table_locks.setdefault(table_name, asyncio.Lock())
@classmethod
def _reset_locks_for_tests(cls) -> None:
"""Test-only: drop the write-lock pool.
``asyncio.Lock`` binds to the current event loop on first
``acquire()``; pytest-asyncio creates a fresh loop per test, so
a module-level lock surviving across tests fails with "bound to
a different event loop". The production cascade worker runs on
one loop forever and does not need this hook. Mirrors
:func:`everos.memory.strategies._partition_locks._reset_for_tests`.
"""
cls._table_locks.clear()
def __init__(self, table: AsyncTable | None = None) -> None:
"""Bind to a table directly; if ``None``, defer to ``_table_lookup``."""
self._table_override = table
async def _table_lookup(self) -> AsyncTable:
"""Resolve the table on first use. Override in subclass.
``LanceRepoBase`` itself has no idea where the runtime singleton
lives. The default raises so a missing override is loud rather
than silently broken.
"""
raise NotImplementedError(
f"{type(self).__name__}: pass table= to __init__ "
"or override _table_lookup() to wire the storage manager."
)
async def _table(self) -> AsyncTable:
if self._table_override is not None:
return self._table_override
return await self._table_lookup()
# ── Create ─────────────────────────────────────────────────────────────
async def add(self, records: Sequence[T]) -> None:
"""Insert one or more records."""
table = await self._table()
async with self._write_lock(self.table_name):
await table.add(list(records))
# ── Upsert ─────────────────────────────────────────────────────────────
async def upsert(
self,
records: Sequence[T],
*,
by: str = "id",
) -> None:
"""Upsert records keyed by ``by`` (PK column, default ``"id"``).
Wraps LanceDB's ``merge_insert(on=...)`` fluent builder with the
equivalent of ``INSERT ... ON CONFLICT(by) DO UPDATE`` — matching
rows are replaced wholesale, non-matching rows inserted.
Cascade uses this when reconciling md → LanceDB: an entry seen
for the first time inserts; an entry that was edited in md
updates its existing row.
"""
table = await self._table()
async with self._write_lock(self.table_name):
await (
table.merge_insert(by)
.when_matched_update_all()
.when_not_matched_insert_all()
.execute(list(records))
)
# ── Maintenance ────────────────────────────────────────────────────────
async def optimize(self, *, cleanup_older_than: dt.timedelta | None = None) -> None:
"""Compact fragments + merge new data into the FTS / vector indexes.
LanceDB's ``merge_insert`` writes new data into a fresh fragment.
The FTS (BM25) index built by :meth:`ensure_fts_indexes` only
covers fragments visible at index-build time, so rows written
after the initial build can become **invisible to BM25 queries**
until ``optimize()`` runs and merges those fragments into the
index segment that the query engine reads.
Symptom this guards against (verified on LoCoMo conv0): after
steady-state cascade ingest, ``nearest_to_text("any_common_word")``
returns 0 hits even though the column literally contains the
token in 100% of rows — the new fragments simply hadn't been
indexed.
Cascade triggers this through a per-kind throttle + trailing
edge scheduler (``CascadeWorker._schedule_optimize``): at most
one run per ~1s window per kind, decoupled from the drain
loop, with a 60s heartbeat sweep as a safety net. Cost is
O(N) data-rewrite per optimized fragment; the throttle is how
we cap it under sustained write pressure.
Args:
cleanup_older_than: When set, also prune (physically delete)
files belonging to dataset versions older than this
interval. ``None`` (default) compacts only — historical
manifests, replaced data fragments, and stale index
UUID files are kept on disk forever, which inflates the
file count (and FD usage at scan time) without bound.
Cascade passes a non-None value on a slower beat
(``CascadeWorker._optimize_prune_interval``) so the
hot drain path stays cheap. Note: this does *not*
shrink **active** index internals (FTS ``part_N`` count
or vector index UUID count) — those only collapse via
``drop_index + create_index``, which is not done here.
"""
table = await self._table()
await table.optimize(cleanup_older_than=cleanup_older_than)
async def rebuild_indexes(self) -> None:
"""Drop and re-create every index on this table.
**Why this exists** — workaround for an upstream Python API gap:
Lance's Rust ``OptimizeOptions`` has a ``num_indices_to_merge``
knob (default 1) that bounds the number of active index UUIDs
per column. With ``Some(1)``, every ``optimize_indices()`` call
merges its delta into the base — active UUID count stays at 1.
Two problems block us from using it from the application layer:
1. ``lancedb.AsyncTable.optimize()`` does **not expose** this
parameter (verified on lancedb main 2026-05-28). It forwards
only ``cleanup_since_ms`` and ``delete_unverified`` to Rust.
2. Even calling Lance directly via ``pylance``, the merge
behaviour itself is buggy on ``lance crate 4.0`` (what
lancedb 0.30.2 embeds) — ``num_indices_to_merge=1`` does
nothing. Fix landed in ``lance 7.x``, but ``pylance 7.x``
can not collapse indexes on a ``lance 4.0``-format dataset
(verified by experiment).
So in our current stack there is **no application-level path**
to bound active index UUID growth. ``optimize()`` keeps
accumulating one new UUID (vector) / one new ``part_N`` (FTS)
per call.
This method is the workaround: drop every existing index and
rebuild from the schema's ``ensure_fts_indexes`` contract. The
rebuild is **O(N) full retrain** but cheap in practice (~0.3s
for 50k rows × 2 FTS columns on local SSD), and during the
window LanceDB transparently falls back to brute-force scan so
queries and writes stay available.
**Cadence** — :class:`CascadeWorker` runs this on a slow loop
(default 12h per kind). Frequency is bounded by the rebuild
cost, not by correctness — even daily is fine functionally;
12h is a conservative pick to keep file/UUID counts well below
any FD ceiling under steady-state ingest.
**When to remove** — once lancedb exposes ``num_indices_to_merge``
on the async Python API **and** the embedded ``lance crate``
ships the working merge implementation, delete this method and
switch to ``optimize(num_indices_to_merge=1)`` in the regular
``optimize()`` path. Tracking issues / context:
- https://github.com/lancedb/lancedb/issues/2193
- https://github.com/lancedb/lancedb/issues/3177
- https://github.com/lance-format/lance/pull/6711 (partial fix
in lance v7.0.0)
- https://docs.rs/lancedb/latest/lancedb/table/struct.OptimizeOptions.html
"""
table = await self._table()
async with self._write_lock(self.table_name):
for idx in await table.list_indices():
await table.drop_index(idx.name)
await self.schema.ensure_fts_indexes(table)
# ── Read ───────────────────────────────────────────────────────────────
async def count(self) -> int:
"""Total row count."""
table = await self._table()
return await table.count_rows()
async def get_by_id(
self,
id_value: str,
*,
id_field: str = "id",
) -> T | None:
"""Fetch one row by scalar PK; ``None`` if missing.
Uses LanceDB scalar filter ``<id_field> = '<id_value>'``. Single
quotes in ``id_value`` are doubled to avoid breaking the SQL-like
predicate; everos's PK convention is ``<owner_id>_<entry_id>``
which never contains quotes, so the escape is defensive.
"""
table = await self._table()
rows = (
await table.query()
.where(f"{id_field} = '{_q(id_value)}'")
.limit(1)
.to_list()
)
if not rows:
return None
return self.schema.model_validate(rows[0])
async def find_where(
self,
where: str,
*,
limit: int = 100,
) -> list[T]:
"""Scalar query returning *typed* schema instances.
Like :meth:`search` but returns ``list[T]`` rather than raw
LanceDB row dicts. No vector ANN; pure scalar filter only.
Use :meth:`search` when you need ``_distance`` or want to mix
ANN with filters.
"""
table = await self._table()
rows = await table.query().where(where).limit(limit).to_list()
return [self.schema.model_validate(r) for r in rows]
async def find_one_where(self, where: str) -> T | None:
"""Single-row variant of :meth:`find_where` (``None`` if no match)."""
rows = await self.find_where(where, limit=1)
return rows[0] if rows else None
async def find_where_paginated(
self,
where: str,
*,
sort_by: str,
descending: bool = True,
page: int = 1,
page_size: int = 20,
max_fetch: int = 20000,
) -> tuple[list[T], int]:
"""Paginated scalar query with in-memory sort.
LanceDB has no native ``ORDER BY``. The chassis fetches up to
``max_fetch`` rows matching ``where``, sorts the resulting Arrow
table by ``sort_by``, then slices ``page`` × ``page_size``. The
*true* row count of the predicate is returned alongside the
page so callers can render pagination controls without a second
query.
Args:
where: SQL-like scalar predicate. Required (no implicit
full-table scan from ``find_where_paginated``).
sort_by: Column name to sort the result set by.
descending: ``True`` (default) → newest first; ``False`` →
ascending.
page: 1-indexed page number.
page_size: Rows per page.
max_fetch: Cap on rows pulled before the in-memory sort.
When the predicate matches more rows than this cap the
page is sorted over an *arbitrary* prefix and the page
contents are only approximately correct — the chassis
emits a warning so the caller learns about the
truncation.
Returns:
``(rows, total)`` — ``rows`` is the typed page,
``total`` is ``count_rows(filter=where)`` (the predicate's
true match count, regardless of ``max_fetch``).
"""
table = await self._table()
total = await table.count_rows(filter=where)
if total > max_fetch:
logger.warning(
"find_where_paginated truncated",
extra={
"table": self.table_name,
"where": where,
"total": total,
"max_fetch": max_fetch,
},
)
arrow_tbl = await table.query().where(where).limit(max_fetch).to_arrow()
order = "descending" if descending else "ascending"
arrow_tbl = arrow_tbl.sort_by([(sort_by, order)])
offset = (page - 1) * page_size
page_rows = arrow_tbl.slice(offset, page_size)
return (
[self.schema.model_validate(r) for r in page_rows.to_pylist()],
total,
)
async def find_by_owner(
self,
owner_id: str,
*,
limit: int = 100,
) -> list[T]:
"""Fetch rows by ``owner_id`` (5 business tables share this column)."""
return await self.find_where(
f"owner_id = '{_q(owner_id)}'",
limit=limit,
)
async def find_by_md_path(self, md_path: str) -> T | None:
"""Reverse-lookup from md path (cascade maps md edit → row)."""
return await self.find_one_where(f"md_path = '{_q(md_path)}'")
async def search(
self,
*,
vector: Sequence[float] | None = None,
where: str | None = None,
limit: int = 10,
) -> list[dict[str, Any]]:
"""Hybrid search: optional vector ANN + scalar SQL-like predicate.
Args:
vector: Embedding to find nearest rows for; ``None`` skips ANN.
where: SQL-like predicate (e.g. ``"tags = 'meeting'"``).
limit: Max rows.
Returns:
List of row dicts (LanceDB native shape — fields depend on
``schema``; ``_distance`` added when ``vector`` is given).
"""
table = await self._table()
q = table.query()
if vector is not None:
q = q.nearest_to(list(vector))
if where is not None:
q = q.where(where)
return await q.limit(limit).to_list()
# ── Delete ─────────────────────────────────────────────────────────────
async def delete(self, predicate: str) -> None:
"""Delete rows matching a SQL-like predicate."""
table = await self._table()
async with self._write_lock(self.table_name):
await table.delete(predicate)
async def delete_by_md_path(self, md_path: str) -> int:
"""Delete every row whose ``md_path`` matches; return rows deleted.
Cascade handler calls this when an md file is removed on disk
(or when reverse-reconcile discovers an orphaned LanceDB row).
Single quotes in ``md_path`` are doubled defensively.
"""
table = await self._table()
async with self._write_lock(self.table_name):
result = await table.delete(f"md_path = '{_q(md_path)}'")
return int(result.num_deleted_rows)
class LanceDailyLogRepoBase[T: BaseLanceTable](LanceRepoBase[T]):
"""LanceRepoBase + queries unique to daily-log tables.
Daily-log tables (``episode`` / ``atomic_fact`` / ``foresight`` /
``agent_case``) share a fixed schema slice: ``entry_id`` (md seq
id), ``session_id`` (conversation scope), and ``parent_type`` /
``parent_id`` (record lineage). The queries below compose those
columns; ``agent_skill`` is *not* a daily-log (it is a named
single-file entity) and uses :class:`LanceRepoBase` directly.
"""
async def find_by_owner_entry(
self,
owner_id: str,
entry_id: str,
*,
app_id: str = "default",
project_id: str = "default",
) -> T | None:
"""Single point-query by ``(app, project, owner_id, entry_id)``.
``entry_id`` is only unique within a (app, project, owner) scope —
the same ``ac_<date>_<seq>`` recurs in another space — so the
scope segments are part of the predicate to avoid a cross-space hit.
"""
return await self.find_one_where(
f"owner_id = '{_q(owner_id)}' AND entry_id = '{_q(entry_id)}' "
f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'"
)
async def find_by_owner_entries(
self,
owner_id: str,
entry_ids: Sequence[str],
*,
app_id: str = "default",
project_id: str = "default",
) -> list[T]:
"""Bulk point-query by ``(app, project, owner_id, entry_id IN ...)``.
Empty ``entry_ids`` short-circuits to ``[]`` rather than emit a
``WHERE entry_id IN ()`` predicate (LanceDB rejects empty
tuples). The query's ``limit`` is bound to ``len(entry_ids)``
because at most one row per id can exist under one (app, project,
owner) scope.
"""
if not entry_ids:
return []
quoted = ", ".join(f"'{_q(eid)}'" for eid in entry_ids)
return await self.find_where(
f"owner_id = '{_q(owner_id)}' AND entry_id IN ({quoted}) "
f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'",
limit=len(entry_ids),
)
async def find_by_session(
self,
owner_id: str,
session_id: str,
*,
limit: int = 100,
) -> list[T]:
"""Every row in one conversation ``session_id`` under ``owner_id``."""
return await self.find_where(
f"owner_id = '{_q(owner_id)}' AND session_id = '{_q(session_id)}'",
limit=limit,
)
async def find_by_parent(
self,
parent_type: str,
parent_id: str,
*,
limit: int = 100,
) -> list[T]:
"""Every row whose parent matches ``(parent_type, parent_id)``."""
return await self.find_where(
f"parent_type = '{_q(parent_type)}' AND parent_id = '{_q(parent_id)}'",
limit=limit,
)

View File

@ -0,0 +1,76 @@
"""Process-wide exclusive lock on a memory-root.
Uses ``fcntl.flock`` (POSIX advisory locking, available on Linux + macOS;
Windows is not supported — see project README on platform scope). The
public surface is an :func:`contextlib.asynccontextmanager` so callers
use ``async with memory_root_lock(mr):``; the underlying syscalls have
no async equivalent so they run in a worker thread via
:func:`anyio.to_thread.run_sync`.
"""
from __future__ import annotations
import fcntl
import os
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
import anyio
from .memory_root import MemoryRoot
class LockError(RuntimeError):
"""Raised when the memory-root lock cannot be acquired in non-blocking mode."""
@asynccontextmanager
async def memory_root_lock(
memory_root: MemoryRoot,
*,
blocking: bool = True,
) -> AsyncIterator[None]:
"""Acquire an exclusive process lock on the memory-root.
Args:
memory_root: The memory-root to lock. The lock anchor file
(``<root>/.lock``) is created on first use.
blocking: If ``True`` (default), wait until the lock is free. If
``False``, raise :class:`LockError` immediately when another
process holds it.
Raises:
LockError: When ``blocking=False`` and the lock is already held.
"""
await anyio.Path(memory_root.root).mkdir(parents=True, exist_ok=True)
lock_path = memory_root.lock_file
# Open the anchor file (create on first use). The fd, not the path, is
# what fcntl operates on. ``os.open`` is microsecond-fast but offloaded
# for consistency with the rest of the lock acquisition flow.
fd = await anyio.to_thread.run_sync(
lambda: os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o644)
)
flags = fcntl.LOCK_EX
if not blocking:
flags |= fcntl.LOCK_NB
try:
await anyio.to_thread.run_sync(fcntl.flock, fd, flags)
except BlockingIOError as exc:
await anyio.to_thread.run_sync(os.close, fd)
raise LockError(
f"another process already holds the memory-root lock at {lock_path}"
) from exc
# Lock acquired — release + close strictly on exit. The BlockingIOError
# path above already cleaned up its fd, so it must NOT enter this
# finally block (otherwise we'd double-close).
try:
yield
finally:
try:
await anyio.to_thread.run_sync(fcntl.flock, fd, fcntl.LOCK_UN)
finally:
await anyio.to_thread.run_sync(os.close, fd)

View File

@ -0,0 +1,62 @@
"""Markdown file IO toolkit.
Atomic write + YAML frontmatter parse/dump + entry marker parse +
audit-form structured-entry parsing. Knows nothing about business
models (no MemCell / Episode); the :class:`Entry` here is a
*marker-delimited* span within a markdown body, not a business record.
External usage (IO + parse):
from everos.core.persistence.markdown import (
Entry, EntryId, StructuredEntry,
MarkdownReader, MarkdownWriter, ParsedMarkdown,
parse_frontmatter, dump_frontmatter,
split_entries, find_entry,
parse_structured_entry, render_structured_entry,
)
External usage (frontmatter schema chassis):
from everos.core.persistence.markdown import (
BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
DailyLogPathMixin, SkillPathMixin, ProfilePathMixin,
)
"""
from .entries import Entry as Entry
from .entries import EntryId as EntryId
from .entries import StructuredEntry as StructuredEntry
from .entries import find_entry as find_entry
from .entries import parse_structured_entry as parse_structured_entry
from .entries import render_structured_entry as render_structured_entry
from .entries import split_entries as split_entries
from .frontmatter import AgentScopedFrontmatter as AgentScopedFrontmatter
from .frontmatter import BaseFrontmatter as BaseFrontmatter
from .frontmatter import DailyLogPathMixin as DailyLogPathMixin
from .frontmatter import ProfilePathMixin as ProfilePathMixin
from .frontmatter import SkillPathMixin as SkillPathMixin
from .frontmatter import UserScopedFrontmatter as UserScopedFrontmatter
from .frontmatter import dump_frontmatter as dump_frontmatter
from .frontmatter import parse_frontmatter as parse_frontmatter
from .parsed import ParsedMarkdown as ParsedMarkdown
from .reader import MarkdownReader as MarkdownReader
from .writer import MarkdownWriter as MarkdownWriter
__all__ = [
"AgentScopedFrontmatter",
"BaseFrontmatter",
"DailyLogPathMixin",
"Entry",
"EntryId",
"MarkdownReader",
"MarkdownWriter",
"ParsedMarkdown",
"ProfilePathMixin",
"SkillPathMixin",
"StructuredEntry",
"UserScopedFrontmatter",
"dump_frontmatter",
"find_entry",
"parse_frontmatter",
"parse_structured_entry",
"render_structured_entry",
"split_entries",
]

View File

@ -0,0 +1,368 @@
"""Markdown entries — id format, marker spans, and audit-form parsing.
Three closely-related entry concepts live together here so a reader
sees the whole entry surface in one file:
1. :class:`EntryId` — the ``<prefix>_<YYYYMMDD>_<NNNN>`` structured id
stamped into each daily-log entry's open / close markers. Carries
the prefix declared by the frontmatter schema, the date bucket, and
the in-file zero-padded sequence.
2. :class:`Entry` — a marker-delimited span inside a markdown body::
<!-- entry:abc123 -->
...content...
<!-- /entry:abc123 -->
:func:`split_entries` and :func:`find_entry` locate these spans
without interpreting the inner content. Higher layers (writers,
cascade) parse it per record type.
3. :class:`StructuredEntry` — :class:`Entry` extended with the parsed
audit-form body fields (header / inline / sections). Built either
from a raw body string via :func:`parse_structured_entry` or from
an existing :class:`Entry` via :meth:`Entry.as_structured`.
Audit-form layout::
## <header> ← optional H2 (usually entry id, for grep)
**key**: value ← inline fields, one per line
**key2**: value2
### Section Title ← section fields: H3 + free-form text
body content...
### Another Section
more content...
The audit chassis is intentionally **type-agnostic** — every field
round-trips as a string. Inline values are stringified on render
(lists become ``[a, b, c]``, scalars use ``str()``); on parse
everything is the raw text after the colon. Section titles are kept
verbatim. This keeps parsing tolerant of stray fields, wrapped
strings, and manually-typed timestamps; the strong-typed model lives
in business writers + the SQLite/LanceDB indexes.
Cross-user uniqueness is handled at the database layer via a composite
``<user_id>_<entry_id>`` field; it is *not* encoded into the
:class:`EntryId` string itself.
"""
from __future__ import annotations
import datetime as _dt
import re
from collections.abc import Mapping
from dataclasses import dataclass, field
from typing import Self
# ── EntryId — structured id for marker stamping ─────────────────────────
_DATE_FMT = "%Y%m%d"
_SEQ_DIGITS = 8
"""Minimum zero-padding for the in-file seq.
8 digits keeps lexicographic order == numeric order up to 10**8
entries per file (per user, per day). ``format()`` is "at least 8"
larger seqs emit more digits without truncation. ``parse`` is
permissive: shorter (legacy 4-digit) and longer seq strings both
parse cleanly; format normalises to >= 8 digits on round-trip.
"""
@dataclass(frozen=True, slots=True)
class EntryId:
"""Parsed components of an entry id (``<prefix>_<YYYYMMDD>_<NNNN>``)."""
prefix: str
date: _dt.date
seq: int
def format(self) -> str:
"""Render as ``<prefix>_<YYYYMMDD>_<NNNN>``."""
return (
f"{self.prefix}_{self.date.strftime(_DATE_FMT)}_{self.seq:0{_SEQ_DIGITS}d}"
)
def __str__(self) -> str: # noqa: D401
return self.format()
@classmethod
def parse(cls, s: str) -> Self:
"""Parse ``<prefix>_<YYYYMMDD>_<NNNN>``.
Uses ``rsplit("_", 2)`` so a multi-segment prefix (rare, but
possible) is preserved as-is.
"""
parts = s.rsplit("_", 2)
if len(parts) != 3:
raise ValueError(f"invalid entry id format: {s!r}")
prefix, date_str, seq_str = parts
if not prefix:
raise ValueError(f"empty prefix in entry id: {s!r}")
try:
d = _dt.datetime.strptime(date_str, _DATE_FMT).date()
except ValueError as exc:
raise ValueError(f"invalid date in entry id: {s!r}") from exc
try:
seq = int(seq_str)
except ValueError as exc:
raise ValueError(f"invalid seq in entry id: {s!r}") from exc
if seq < 0:
raise ValueError(f"negative seq in entry id: {s!r}")
return cls(prefix=prefix, date=d, seq=seq)
@classmethod
def next_for(cls, prefix: str, date: _dt.date, current_count: int) -> Self:
"""Build the id for the next entry given the file's current count.
``current_count`` is the value of ``frontmatter.entry_count``
*before* this append. The new id gets ``seq = current_count + 1``.
"""
if current_count < 0:
raise ValueError(f"current_count must be >= 0, got {current_count}")
return cls(prefix=prefix, date=date, seq=current_count + 1)
# ── Entry — marker-delimited span inside a body ─────────────────────────
# Filename / URL-safe id alphabet for the marker.
_ID_PATTERN = r"[A-Za-z0-9_-]+"
_OPEN_RE = re.compile(rf"<!-- entry:({_ID_PATTERN}) -->")
@dataclass(frozen=True)
class Entry:
"""One marker-delimited entry within a markdown body.
Attributes:
id: Value between ``entry:`` and ``-->`` in the open marker.
body: Content between the open and close markers, with one leading
and one trailing newline removed (typical formatter output).
start: Offset of the opening ``<!-- entry:id -->`` in the source body.
end: Offset just past the closing ``<!-- /entry:id -->`` in the source.
"""
id: str
body: str
start: int
end: int
def as_structured(self) -> StructuredEntry:
"""Parse my body as audit-form and return a :class:`StructuredEntry`.
The id / body / start / end fields are preserved; the parsed
``header`` / ``inline`` / ``sections`` are added on top.
"""
return parse_structured_entry(self.body, _origin=self)
def split_entries(body: str) -> list[Entry]:
"""Scan ``body`` and return every entry in order.
Unmatched / unterminated open markers stop the scan at the first
such marker — partial entries are not returned. Callers needing
strict validation should layer a dedicated check on top.
"""
entries: list[Entry] = []
pos = 0
while True:
open_match = _OPEN_RE.search(body, pos)
if open_match is None:
break
entry_id = open_match.group(1)
close_match = _close_re_for(entry_id).search(body, open_match.end())
if close_match is None:
# Unterminated entry — abort further scanning.
break
entries.append(
Entry(
id=entry_id,
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
start=open_match.start(),
end=close_match.end(),
)
)
pos = close_match.end()
return entries
def find_entry(body: str, entry_id: str) -> Entry | None:
"""Find the first entry with ``entry_id``, or ``None``."""
open_re = re.compile(rf"<!-- entry:{re.escape(entry_id)} -->")
open_match = open_re.search(body)
if open_match is None:
return None
close_match = _close_re_for(entry_id).search(body, open_match.end())
if close_match is None:
return None
return Entry(
id=entry_id,
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
start=open_match.start(),
end=close_match.end(),
)
def _close_re_for(entry_id: str) -> re.Pattern[str]:
"""Build the close-marker regex for a specific id."""
return re.compile(rf"<!-- /entry:{re.escape(entry_id)} -->")
def _strip_one_newline(text: str) -> str:
"""Strip one leading and one trailing newline (typical formatter padding)."""
if text.startswith("\r\n"):
text = text[2:]
elif text.startswith("\n"):
text = text[1:]
if text.endswith("\r\n"):
text = text[:-2]
elif text.endswith("\n"):
text = text[:-1]
return text
# ── StructuredEntry — Entry + parsed audit-form fields ──────────────────
# H2 line: ``## <header>``.
_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
# Inline field: ``**key**: value``. Anchored to line start so a stray
# ``**emphasis**`` mid-paragraph isn't mistaken for a field.
_INLINE_RE = re.compile(
r"^\*\*(?P<key>[^*\n]+?)\*\*:\s*(?P<value>.*?)\s*$",
re.MULTILINE,
)
# H3 line: ``### Title``.
_H3_RE = re.compile(r"^###\s+(.+?)\s*$", re.MULTILINE)
@dataclass(frozen=True)
class StructuredEntry(Entry):
""":class:`Entry` whose body has been parsed as audit-form data.
Inherits ``id`` / ``body`` / ``start`` / ``end`` from :class:`Entry`
(zeroed when built from a raw body string with no marker context)
and adds three parsed views of the body: the optional H2 header,
the inline ``**key**: value`` map, and the ``### Title`` sections.
Audit-form values are strings only; type coercion is the caller's
job (a strong-typed model lives in the writer / index).
"""
header: str | None = None
inline: dict[str, str] = field(default_factory=dict)
sections: dict[str, str] = field(default_factory=dict)
def render_structured_entry(
*,
header: str | None = None,
inline: Mapping[str, object] | None = None,
sections: Mapping[str, str] | None = None,
) -> str:
"""Render an audit-form entry body.
Args:
header: Optional H2 line at the top (typically the entry id —
redundant with the marker but useful for plain-text grep).
inline: ``{key: value}`` rendered as ``**key**: value``. Values
are stringified: ``list``/``tuple`` become ``[a, b, c]``;
``None`` becomes the empty string; everything else uses
``str()``.
sections: ``{title: body}`` rendered as ``### Title`` plus the
body text. Title is verbatim; body's trailing whitespace is
stripped.
Returns:
The rendered string, no trailing newline (the caller — typically
:meth:`MarkdownWriter.append_entry` — handles markers + newlines).
"""
inline = inline or {}
sections = sections or {}
lines: list[str] = []
if header:
lines.append(f"## {header}")
lines.append("")
for key, value in inline.items():
lines.append(f"**{key}**: {_render_value(value)}")
for title, body in sections.items():
lines.append("")
lines.append(f"### {title}")
lines.append(body.rstrip())
return "\n".join(lines)
def parse_structured_entry(
body: str, *, _origin: Entry | None = None
) -> StructuredEntry:
"""Parse an audit-form entry body. Strings only — no type coercion.
Tolerant of:
- missing H2 (``header`` will be ``None``)
- inline fields appearing before, between or after sections
(only matches before the first H3 are taken as the inline block)
- extra whitespace and stray lines (silently kept inside the
enclosing section's body)
When called via :meth:`Entry.as_structured`, the ``_origin`` Entry
contributes its ``id`` / ``start`` / ``end``; otherwise those fall
back to ``""`` / ``0`` / ``len(body)``.
Returns:
:class:`StructuredEntry` with everything as strings.
"""
text = body.strip("\n")
# Split on H3 lines.
parts = _H3_RE.split(text)
head = parts[0]
sections_dict: dict[str, str] = {}
for i in range(1, len(parts), 2):
title = parts[i].strip()
content = parts[i + 1] if i + 1 < len(parts) else ""
sections_dict[title] = content.strip("\n").rstrip()
header: str | None = None
h2 = _H2_RE.search(head)
if h2:
header = h2.group(1).strip()
inline_dict: dict[str, str] = {
m.group("key").strip(): m.group("value").strip()
for m in _INLINE_RE.finditer(head)
}
if _origin is not None:
return StructuredEntry(
id=_origin.id,
body=_origin.body,
start=_origin.start,
end=_origin.end,
header=header,
inline=inline_dict,
sections=sections_dict,
)
return StructuredEntry(
id="",
body=body,
start=0,
end=len(body),
header=header,
inline=inline_dict,
sections=sections_dict,
)
def _render_value(value: object) -> str:
"""Stringify an inline value the audit-friendly way."""
if value is None:
return ""
if isinstance(value, list | tuple):
return "[" + ", ".join(str(item) for item in value) + "]"
return str(value)

View File

@ -0,0 +1,300 @@
"""Frontmatter — YAML block parse / dump + L1 schema chassis.
Frontmatter is the leading ``---``-delimited YAML block at the top of
a markdown document::
---
title: Hello
tags: [a, b]
---
# Body starts here
Two complementary surfaces live here:
1. :func:`parse_frontmatter` / :func:`dump_frontmatter` — schema-free
YAML helpers (``yaml.safe_load`` / ``yaml.safe_dump``,
``sort_keys=False`` so caller-controlled key order is preserved).
2. The L1 chassis classes — :class:`BaseFrontmatter`,
:class:`UserScopedFrontmatter`, :class:`AgentScopedFrontmatter` —
which fix the *absolute-readonly* fields (``id`` / ``type`` /
``schema_version``) plus scope (``user_id`` / ``agent_id`` +
``track``). Every business frontmatter schema in
``infra/persistence/markdown/mds/`` subclasses one of these.
Concrete business schemas (``UserMemcellDailyFrontmatter``,
``SkillFrontmatter``, …) live in ``infra``; they add per-record
business fields plus the path-resolution metadata daily-log writers
need (``ENTRY_ID_PREFIX`` / ``DIR_NAME`` / ``FILE_PREFIX``).
"""
from __future__ import annotations
from collections.abc import Mapping
from typing import Any, ClassVar, Literal
import yaml
from pydantic import BaseModel, ConfigDict
# ── YAML helpers ────────────────────────────────────────────────────────
_DELIM = "---"
def parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
"""Parse a leading ``---\\n...\\n---\\n`` YAML block.
Returns:
(meta, remainder): ``meta`` is the parsed YAML mapping (empty dict
if no frontmatter present, malformed, or non-mapping). ``remainder``
is everything after the closing delimiter line — including the body's
leading content as-is.
Notes:
- If the document does not start with ``---``, returns ``({}, text)``
unchanged.
- If a closing ``---`` line is not found, returns ``({}, text)``.
- If the YAML block is empty (``---\\n---\\n``), returns
``({}, remainder)``.
- If the parsed YAML is not a mapping (e.g. a scalar list), returns
``({}, text)`` — frontmatter must be a mapping.
"""
if not text.startswith(_DELIM):
return {}, text
# Skip the opening "---" and the newline that must follow it.
rest = text[len(_DELIM) :]
if rest.startswith("\r\n"):
rest = rest[2:]
elif rest.startswith("\n"):
rest = rest[1:]
else:
# Opening "---" not followed by a newline → not a valid frontmatter.
return {}, text
closing_idx = _find_closing_delim(rest)
if closing_idx is None:
return {}, text
yaml_block = rest[:closing_idx]
remainder = rest[closing_idx + len(_DELIM) :]
# Drop the newline that follows the closing delimiter, if any.
if remainder.startswith("\r\n"):
remainder = remainder[2:]
elif remainder.startswith("\n"):
remainder = remainder[1:]
parsed: Any = yaml.safe_load(yaml_block) if yaml_block.strip() else {}
if parsed is None:
parsed = {}
if not isinstance(parsed, dict):
return {}, text
return parsed, remainder
def dump_frontmatter(meta: Mapping[str, Any]) -> str:
"""Render a mapping as a ``---\\n<yaml>\\n---\\n`` block.
An empty mapping yields the empty string (no delimiters). The YAML
payload preserves caller-supplied key order (``sort_keys=False``).
"""
if not meta:
return ""
yaml_block = yaml.safe_dump(
dict(meta),
sort_keys=False,
allow_unicode=True,
default_flow_style=False,
)
return f"{_DELIM}\n{yaml_block}{_DELIM}\n"
def _find_closing_delim(text: str) -> int | None:
"""Find the offset of a line that is exactly ``---``.
A "line" is text between two newlines (or string boundaries).
Returns the offset of the first character of the matching line, or
``None`` if no such line exists.
"""
pos = 0
while pos < len(text):
nl = text.find("\n", pos)
line = text[pos:nl] if nl != -1 else text[pos:]
if line.rstrip("\r") == _DELIM:
return pos
if nl == -1:
return None
pos = nl + 1
return None
# ── L1 schema chassis ───────────────────────────────────────────────────
class BaseFrontmatter(BaseModel):
"""L1 fields every markdown frontmatter must carry.
These match the *absolute-readonly* tier in the EverOS Markdown First
spec — they identify the record across markdown ↔ LanceDB and must
never be rewritten by a human edit.
Subclasses add scope (``UserScopedFrontmatter`` /
``AgentScopedFrontmatter``) plus per-record business fields.
"""
SCOPE_DIR: ClassVar[str] = ""
"""Top-level directory under the memory-root that holds this kind.
Scope mixins set this to ``"users"`` / ``"agents"``. Scope-agnostic
schemas (rare) leave it empty; consumers that need to resolve a path
(writers, layout reverse-lookup) must reject schemas with empty
``SCOPE_DIR``.
"""
id: str
type: str
schema_version: int = 1
# Permit additional fields so L2 system-managed metadata
# (``md_sha256``, ``last_indexed_at``, ``lsn``, …) can ride along on
# the same model without forcing every subclass to redeclare them.
model_config = ConfigDict(extra="allow")
@classmethod
def path_glob(cls) -> str:
"""Return an ``fnmatch``-style glob (relative to memory-root)
covering every markdown file this schema describes.
Used by the cascade kind registry — the scanner walks every kind's
``path_glob()`` to enumerate eligible files without hard-coding
path patterns in cascade. The schema is the single source of truth
for both the writer's path resolution and the scanner's enumeration.
Subclasses must override — typically by mixing in
:class:`DailyLogPathMixin` or :class:`SkillPathMixin` *before* the
scope mixin in the MRO so this abstract version is shadowed.
"""
raise NotImplementedError(
f"{cls.__name__} must declare path_glob() "
f"(mix in DailyLogPathMixin / SkillPathMixin, or override directly)"
)
class DailyLogPathMixin:
"""Path strategy for daily-log files.
Files live at ``<SCOPE_DIR>/<scope_id>/<DIR_NAME>/<FILE_PREFIX>-<YYYY-MM-DD>.md``.
Subclasses must inherit a scope mixin (``UserScopedFrontmatter`` /
``AgentScopedFrontmatter``) supplying ``SCOPE_DIR``, and must declare
their own ``DIR_NAME`` / ``FILE_PREFIX`` ClassVars.
Place **this mixin first** so Python's MRO resolves ``path_glob()`` to
the mixin's concrete implementation rather than
:meth:`BaseFrontmatter.path_glob`'s ``NotImplementedError`` stub::
class EpisodeDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
DIR_NAME: ClassVar[str] = "episodes"
FILE_PREFIX: ClassVar[str] = "episode"
...
"""
DIR_NAME: ClassVar[str]
FILE_PREFIX: ClassVar[str]
SCOPE_DIR: ClassVar[str]
@classmethod
def path_glob(cls) -> str:
# Leading ``*/*/`` matches the <app>/<project> scope prefix that
# precedes every user-visible dir; the scanner's ``root.glob`` is
# anchored at root, so the prefix is mandatory (without it nothing
# matches), and the watcher's right-anchored ``PurePosixPath.match``
# agrees on the same shape.
return f"*/*/{cls.SCOPE_DIR}/*/{cls.DIR_NAME}/{cls.FILE_PREFIX}-*.md"
class SkillPathMixin:
"""Path strategy for skill-directory files.
Each skill lives at ``<SCOPE_DIR>/<scope_id>/<SKILLS_CONTAINER_NAME>/
<SKILL_DIR_PREFIX><skill_name>/<SKILL_MAIN_FILENAME>``. The glob covers
every skill's main file; sibling ``references/*.md`` and ``scripts/*``
are excluded (they ride alongside the main file and the cascade
daemon rebuilds the index column by concatenation, see
:class:`AgentSkillFrontmatter`'s docstring).
Place **this mixin first** so MRO resolves ``path_glob()`` here::
class AgentSkillFrontmatter(SkillPathMixin, AgentScopedFrontmatter):
SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
...
"""
SKILLS_CONTAINER_NAME: ClassVar[str]
SKILL_DIR_PREFIX: ClassVar[str]
SKILL_MAIN_FILENAME: ClassVar[str]
SCOPE_DIR: ClassVar[str]
@classmethod
def path_glob(cls) -> str:
# Leading ``*/*/`` matches the <app>/<project> scope prefix.
return (
f"*/*/{cls.SCOPE_DIR}/*/{cls.SKILLS_CONTAINER_NAME}/"
f"{cls.SKILL_DIR_PREFIX}*/{cls.SKILL_MAIN_FILENAME}"
)
class ProfilePathMixin:
"""Path strategy for single-file profile markdown.
Profiles live at ``<SCOPE_DIR>/<scope_id>/<PROFILE_FILENAME>`` —
one fixed-name file directly under the scope's owner directory, no
intermediate ``<dir>/`` segment (unlike daily-logs) and no per-name
subdir (unlike skills). Subclasses must inherit a scope mixin
(``UserScopedFrontmatter`` / ``AgentScopedFrontmatter``) supplying
``SCOPE_DIR`` and declare their own ``PROFILE_FILENAME``.
Place **this mixin first** so MRO resolves ``path_glob()`` here::
class UserProfileFrontmatter(ProfilePathMixin, UserScopedFrontmatter):
PROFILE_FILENAME: ClassVar[str] = "user.md"
...
"""
PROFILE_FILENAME: ClassVar[str]
SCOPE_DIR: ClassVar[str]
@classmethod
def path_glob(cls) -> str:
# Leading ``*/*/`` matches the <app>/<project> scope prefix.
return f"*/*/{cls.SCOPE_DIR}/*/{cls.PROFILE_FILENAME}"
class UserScopedFrontmatter(BaseFrontmatter):
"""Records that belong to a single user (track = ``user``).
The frontmatter only carries the *file-level* scope (``user_id``,
which the path itself already expresses); business attributes like
``group_id`` live inside each entry's structured body — see
:class:`StructuredEntry` in :mod:`.entries`.
"""
SCOPE_DIR: ClassVar[str] = "users"
user_id: str
track: Literal["user"] = "user"
class AgentScopedFrontmatter(BaseFrontmatter):
"""Records that belong to a single agent (track = ``agent``).
Same scope-vs-business split as :class:`UserScopedFrontmatter`:
``agent_id`` is the file-level scope; ``group_id`` etc. ride on
each entry, not on the file frontmatter.
"""
SCOPE_DIR: ClassVar[str] = "agents"
agent_id: str
track: Literal["agent"] = "agent"

View File

@ -0,0 +1,31 @@
"""Parsed-markdown data type.
The output shape of :class:`MarkdownReader` is held here, separate
from the reader implementation: callers that only consume parse
results don't need to import the reader machinery, and downstream
modules (writer, business readers) can produce :class:`ParsedMarkdown`
without going through ``MarkdownReader.read`` if they already hold
the pieces.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from .entries import Entry
@dataclass(frozen=True)
class ParsedMarkdown:
"""A markdown document after parsing.
Attributes:
frontmatter: Parsed YAML mapping (empty dict if no frontmatter block).
body: Document text after the frontmatter block; not entry-stripped.
entries: Marker-delimited entries discovered inside ``body``.
"""
frontmatter: dict[str, Any]
body: str
entries: list[Entry] = field(default_factory=list)

View File

@ -0,0 +1,42 @@
"""Markdown file reader.
Loads a markdown document and splits it into:
1. ``frontmatter`` — parsed YAML (empty dict if absent)
2. ``body`` — raw text after the closing ``---`` delimiter
3. ``entries`` — marker-delimited spans inside ``body``
The reader is purely parsing; it does not validate frontmatter shape,
entry content, or cross-references. Higher layers add business-aware
checks. The :class:`ParsedMarkdown` data type lives in :mod:`.parsed`.
``parse`` is sync (pure in-memory string processing). ``read`` is async
and uses :class:`anyio.Path` so file I/O does not block the event loop.
"""
from __future__ import annotations
from pathlib import Path
import anyio
from .entries import split_entries
from .frontmatter import parse_frontmatter
from .parsed import ParsedMarkdown
class MarkdownReader:
"""Parse markdown files / strings into :class:`ParsedMarkdown`."""
@staticmethod
def parse(text: str) -> ParsedMarkdown:
"""Parse already-loaded text (no IO)."""
meta, body = parse_frontmatter(text)
entries = split_entries(body)
return ParsedMarkdown(frontmatter=meta, body=body, entries=entries)
@staticmethod
async def read(path: Path) -> ParsedMarkdown:
"""Read the file at ``path`` and parse its content."""
text = await anyio.Path(path).read_text(encoding="utf-8")
return MarkdownReader.parse(text)

View File

@ -0,0 +1,269 @@
"""Markdown file writer with atomic write semantics.
Atomicity is provided by writing to a same-directory temp file
(``.<name>.tmp.<uuid>``) and using :func:`os.replace` to rename it onto
the target. Keeping the temp file in the same directory guarantees the
rename is on the same filesystem (POSIX rename is atomic only within a
single fs).
All public methods are async. File I/O (``read_text`` / ``write_text``
/ ``mkdir``) goes through :class:`anyio.Path`; the few syscalls without
a native async equivalent (``os.fsync`` / ``os.replace`` / ``unlink``
in the cleanup path) are offloaded via :func:`anyio.to_thread.run_sync`.
In-process per-path locking
---------------------------
:meth:`append_entry` / :meth:`append_entries` are read-modify-write of
the whole file (load frontmatter+body, merge an entry block, atomic
write the result). The atomic write itself is safe, but the read→write
window crosses ``await`` points. Concurrent asyncio tasks targeting the
same path would otherwise lose-update each other (both read N entries,
both produce N+1, second write overwrites the first → 1 entry lost).
To prevent this, an in-process per-path :class:`asyncio.Lock` is held
across the entire read-modify-write sequence. Lock objects live on the
writer instance (not class-level) so they bind to the event loop active
when the writer was constructed — this avoids the
"Lock bound to different loop" failure mode that surfaces when
pytest-asyncio rebuilds the loop between tests but module-level writer
singletons leak Lock objects across boundaries.
Process-level coordination (multi-process writers against the same
memory-root) remains the job of
:func:`everos.core.persistence.locking.memory_root_lock`, which uses
``fcntl.flock``. The two locks compose: per-path async lock serialises
tasks within one process, ``memory_root_lock`` serialises processes
against each other.
"""
from __future__ import annotations
import asyncio
import contextlib
import os
import uuid
from collections.abc import Mapping, Sequence
from pathlib import Path
from typing import Any
import anyio
from ..memory_root import MemoryRoot
from .entries import EntryId
from .frontmatter import dump_frontmatter
from .reader import MarkdownReader
class MarkdownWriter:
"""Atomic writer for markdown files inside a memory-root.
The ``memory_root`` reference is held to enable future enforcement that
targets stay within the configured root; current writes do not depend on
it for the rename itself (same-dir temp file).
"""
def __init__(self, memory_root: MemoryRoot) -> None:
self._memory_root = memory_root
# Per-path async lock registry. ``setdefault`` is GIL-atomic, so
# concurrent callers race only on the dict insert (resolved by
# ``setdefault`` returning the existing value), not on the Lock.
# Plain dict (not WeakValueDictionary): a Lock with pending waiters
# must outlive any task awaiting it; ref-counted GC would race with
# those waiters. See Python bpo-28427 for the WeakValueDictionary
# multithreading hazard that bites the weak-ref approach.
self._path_locks: dict[Path, asyncio.Lock] = {}
@property
def memory_root(self) -> MemoryRoot:
return self._memory_root
def lock_for(self, path: Path) -> asyncio.Lock:
"""Return the per-path lock; create on first use.
Public so that higher-level writers (e.g. :class:`BaseDailyWriter`)
can serialise their own multi-step ``read → compute → write``
sequences against this writer's single-step ``append`` paths.
Pair with :meth:`_append_entries_unlocked` to avoid reentrant
re-acquisition of the same lock from within an already-locked
critical section (``asyncio.Lock`` is *not* reentrant).
"""
# Resolve to an absolute canonical path so aliases (relative vs.
# absolute, symlinks) share the same lock object.
key = Path(path).resolve()
lock = self._path_locks.get(key)
if lock is None:
lock = asyncio.Lock()
self._path_locks[key] = lock
return lock
async def write(self, path: Path, content: str) -> Path:
"""Atomically write ``content`` to ``path``.
Steps:
1. ``mkdir -p`` the parent directory.
2. Write to ``<parent>/.<name>.tmp.<uuid>``.
3. ``flush`` + ``fsync`` the temp file.
4. ``os.replace`` the temp file onto ``path`` (atomic on POSIX).
Returns:
``path`` (resolved as written).
"""
target = Path(path)
await anyio.Path(target.parent).mkdir(parents=True, exist_ok=True)
tmp = target.parent / f".{target.name}.tmp.{uuid.uuid4().hex}"
try:
await anyio.to_thread.run_sync(_write_and_fsync, tmp, content)
await anyio.to_thread.run_sync(os.replace, tmp, target)
except Exception:
# Best-effort cleanup of the staging file on failure.
await _unlink_quiet(tmp)
raise
return target
async def write_markdown(
self,
path: Path,
*,
frontmatter: Mapping[str, Any] | None = None,
body: str = "",
) -> Path:
"""Assemble ``frontmatter`` + ``body`` then atomic-write to ``path``."""
head = dump_frontmatter(frontmatter or {})
return await self.write(path, head + body)
async def append_entry(
self,
path: Path,
*,
entry_body: str,
entry_id: EntryId,
frontmatter_updates: Mapping[str, Any] | None = None,
) -> Path:
"""Append a single entry block to a markdown file, merging frontmatter.
Convenience wrapper around :meth:`append_entries` for single-entry
callers. See that method for full semantics.
Args:
path: Target markdown file. Created if missing.
entry_body: Content between the open and close markers.
One leading and trailing newline are added automatically.
entry_id: The id to stamp on this entry. The caller normally
builds it with :meth:`EntryId.next_for`.
frontmatter_updates: Mapping shallow-merged into existing
frontmatter (later wins). ``None`` skips the merge.
Returns:
``path`` (resolved as written).
"""
return await self.append_entries(
path,
[(entry_body, entry_id)],
frontmatter_updates=frontmatter_updates,
)
async def append_entries(
self,
path: Path,
entries: Sequence[tuple[str, EntryId]],
*,
frontmatter_updates: Mapping[str, Any] | None = None,
) -> Path:
"""Append ``N`` entry blocks in a single locked read-modify-write cycle.
Compared with calling :meth:`append_entry` ``N`` times, this:
* Performs one file read + one file write instead of ``N`` of each
(IO complexity drops from ``O(N²)`` to ``O(N)`` when the file
already holds many entries).
* Holds the per-path lock for one short critical section instead of
``N`` separate acquisitions.
* Updates ``frontmatter`` once at the end (no intermediate
``entry_count`` flapping).
The caller assigns and supplies all :class:`EntryId` values — see
:meth:`append_entry` for the rationale. The order in ``entries`` is
the order the blocks land in the file.
Args:
path: Target markdown file. Created if missing.
entries: ``(entry_body, entry_id)`` pairs to append, in order.
Empty sequence is allowed; the file is still touched for
frontmatter updates if any are supplied.
frontmatter_updates: Mapping shallow-merged into existing
frontmatter once after all entries are appended.
Returns:
``path`` (resolved as written).
"""
target = Path(path)
async with self.lock_for(target):
return await self._append_entries_unlocked(
target,
entries,
frontmatter_updates=frontmatter_updates,
)
async def _append_entries_unlocked(
self,
path: Path,
entries: Sequence[tuple[str, EntryId]],
*,
frontmatter_updates: Mapping[str, Any] | None = None,
) -> Path:
"""Same as :meth:`append_entries` but assumes the caller already
holds :meth:`lock_for` ``(path)``.
For use by higher-level writers that perform a multi-step
``read → compute eid → write`` sequence and need to keep the lock
held across the read and the write. Public ``append_entries`` /
``append_entry`` always wrap this with the lock.
Reentrant re-acquisition is unsafe — ``asyncio.Lock`` is not
reentrant, so calling this without holding the lock yourself
breaks the safety contract.
"""
target = Path(path)
# 1. Load existing markdown (or initialise empty).
if await anyio.Path(target).is_file():
parsed = await MarkdownReader.read(target)
meta: dict[str, Any] = dict(parsed.frontmatter)
body = parsed.body
else:
meta = {}
body = ""
# 2. Shallow-merge frontmatter updates.
if frontmatter_updates:
meta.update(frontmatter_updates)
# 3. Append all entry blocks in order.
if entries:
if body and not body.endswith("\n"):
body += "\n"
appended_blocks: list[str] = []
for entry_body, entry_id in entries:
eid_str = entry_id.format()
appended_blocks.append(
f"<!-- entry:{eid_str} -->\n{entry_body}\n"
f"<!-- /entry:{eid_str} -->\n"
)
body = body + "".join(appended_blocks)
# 4. Atomic write.
return await self.write_markdown(target, frontmatter=meta, body=body)
def _write_and_fsync(tmp: Path, content: str) -> None:
"""Sync helper: write + fsync the staging file. Offloaded to a thread."""
with open(tmp, "w", encoding="utf-8") as fh:
fh.write(content)
fh.flush()
os.fsync(fh.fileno())
async def _unlink_quiet(tmp: Path) -> None:
"""Best-effort unlink — swallow OSError so the original exception wins."""
with contextlib.suppress(OSError):
await anyio.Path(tmp).unlink(missing_ok=True)

View File

@ -0,0 +1,243 @@
"""memory-root path manager.
Single root directory holding all persisted memory:
User-visible (no dot prefix, edited by humans / agents):
agents/ per-agent records
users/ per-user records
knowledge/ global shared knowledge
System-managed (dotfile prefix, hidden by default in ls / Finder):
.index/ derived indexes (rebuildable from markdown)
sqlite/ system.db (+ WAL/SHM), ome.db, ome.aps.db
lancedb/ LanceDB tables
.tmp/ atomic-write staging directory
.lock single-process lock anchor (created on demand by
``memory_root_lock``)
User-editable (at the root):
ome.toml OME strategy overrides (hot-reloaded)
The cascade queue, LSN watermark, and change audit all live in
``system.db`` (table ``md_change_state``), not in separate dotfiles.
The default location and tunables come from :class:`everos.config.Settings`
(loaded from ``config/default.toml`` + ``EVEROS_*`` environment variables);
:meth:`MemoryRoot.default` resolves the configured path.
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
# ── app / project directory-name convention ──────────────────────────────────
#
# A memory root is partitioned by ``<app>/<project>`` *before* the user-visible
# scope dirs (``agents`` / ``users`` / ``knowledge``), so memory for different
# (app, project) pairs never shares a directory. The reserved id ``"default"``
# materialises as ``default_app`` / ``default_project`` on disk (rather than a
# bare ``default``) so a default space is visually distinct from a user-named
# directory; every other id maps to itself.
#
# The mapping is symmetric: the cascade path parser reverses it (see
# :func:`app_id_from_dir`) to recover the ids from an on-disk path. The write
# side (here) and the read side (cascade) MUST stay in lockstep, or rebuilt
# rows carry app/project that disagree with what was written. ``default_app`` /
# ``default_project`` are therefore reserved directory names.
_DEFAULT_SCOPE_ID = "default"
_DEFAULT_APP_DIR = "default_app"
_DEFAULT_PROJECT_DIR = "default_project"
# Path to the shipped OME override template; copied to ``<root>/ome.toml`` on
# first ``ensure()`` so users have a real file to edit instead of having to
# create one from scratch. ``parents[2]`` is the ``src/everos/`` package root
# (memory_root.py sits at ``core/persistence/memory_root.py``).
_OME_TEMPLATE_PATH = Path(__file__).parents[2] / "config" / "default_ome.toml"
def app_dir_name(app_id: str) -> str:
"""Map an ``app_id`` to its on-disk directory name."""
return _DEFAULT_APP_DIR if app_id == _DEFAULT_SCOPE_ID else app_id
def project_dir_name(project_id: str) -> str:
"""Map a ``project_id`` to its on-disk directory name."""
return _DEFAULT_PROJECT_DIR if project_id == _DEFAULT_SCOPE_ID else project_id
def app_id_from_dir(dir_name: str) -> str:
"""Inverse of :func:`app_dir_name` — recover the ``app_id`` from a dir name."""
return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_APP_DIR else dir_name
def project_id_from_dir(dir_name: str) -> str:
"""Inverse of :func:`project_dir_name` — recover the ``project_id``."""
return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_PROJECT_DIR else dir_name
@dataclass(frozen=True, init=False)
class MemoryRoot:
"""Path manager for a memory-root directory.
Constructor accepts any path-like (``str`` or ``Path``); it is normalised
to an absolute, resolved ``Path`` so equality and hashing are stable
regardless of how the caller spells the path. ``init=False`` is paired
with a hand-written ``__init__`` so the input type (``Path | str``) is
decoupled from the stored field type (``Path``) — stdlib dataclass has
no converter slot, and Pyright would otherwise reject ``MemoryRoot(s)``
where ``s`` is a ``str``.
"""
root: Path
def __init__(self, root: Path | str) -> None:
# ``frozen=True`` forbids attribute assignment, so go through
# ``object.__setattr__`` to install the normalised Path field.
resolved = Path(root).expanduser().resolve()
object.__setattr__(self, "root", resolved)
@classmethod
def default(cls) -> MemoryRoot:
"""Return the memory-root from :class:`everos.config.Settings`.
The effective default lives in ``config/default.toml`` (``[memory]
root``); environment variable ``EVEROS_MEMORY__ROOT`` overrides it.
"""
# Lazy import to keep this module dependency-free at import time.
from everos.config import load_settings
return cls(load_settings().memory.root)
# ── User-visible (partitioned by app / project) ──────────────────────────
#
# These take ``(app_id, project_id)`` because the scope dirs hang off the
# ``<root>/<app>/<project>/`` prefix; they are request-level inputs, never
# instance state. Both default to ``"default"`` so call sites that don't
# yet carry scope still resolve to the default space.
def agents_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
"""``<root>/<app>/<project>/agents/`` — per-agent records."""
return (
self.root / app_dir_name(app_id) / project_dir_name(project_id) / "agents"
)
def users_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
"""``<root>/<app>/<project>/users/`` — per-user records."""
return self.root / app_dir_name(app_id) / project_dir_name(project_id) / "users"
def knowledge_dir(
self, app_id: str = "default", project_id: str = "default"
) -> Path:
"""``<root>/<app>/<project>/knowledge/`` — shared knowledge."""
return (
self.root
/ app_dir_name(app_id)
/ project_dir_name(project_id)
/ "knowledge"
)
# ── System-managed (dotfiles) ───────────────────────────────────────────
@property
def index_dir(self) -> Path:
"""``<root>/.index/`` — derived index root."""
return self.root / ".index"
@property
def lancedb_dir(self) -> Path:
"""``<root>/.index/lancedb/`` — LanceDB table root."""
return self.index_dir / "lancedb"
@property
def sqlite_dir(self) -> Path:
"""``<root>/.index/sqlite/`` — SQLite system DB root.
Holds ``system.db`` plus its sidecars (``-wal`` / ``-shm`` in WAL
mode). Symmetric with :attr:`lancedb_dir`.
"""
return self.index_dir / "sqlite"
@property
def system_db(self) -> Path:
"""``<root>/.index/sqlite/system.db`` — SQLite DB for system
state, audit log, task queue, LSN watermark, and other metadata.
"""
return self.sqlite_dir / "system.db"
@property
def ome_db(self) -> Path:
"""``<root>/.index/sqlite/ome.db`` — SQLite DB backing the Offline
Memory Engine's own state: run records, counter store, idle store.
Symmetric with :attr:`system_db`.
"""
return self.sqlite_dir / "ome.db"
@property
def ome_aps_db(self) -> Path:
"""``<root>/.index/sqlite/ome.aps.db`` — SQLite DB holding the
APScheduler jobstore for the Offline Memory Engine. Split from
:attr:`ome_db` so APS's sync SQLAlchemy writer and OME's async
aiosqlite writer never contend for the same sqlite file lock.
"""
return self.sqlite_dir / "ome.aps.db"
@property
def ome_config(self) -> Path:
"""``<root>/ome.toml`` — user-editable OME strategy overrides.
Drop a file here to toggle strategies on/off or tweak per-strategy
knobs (max_retries, gate, cron …) without restarting the server.
The engine watches this file and hot-reloads changes within ~2 s.
Example to disable foresight and user-profile extraction::
[strategies.extract_foresight]
enabled = false
[strategies.extract_user_profile]
enabled = false
"""
return self.root / "ome.toml"
@property
def lock_file(self) -> Path:
"""``<root>/.lock`` — single-process exclusive lock anchor."""
return self.root / ".lock"
@property
def tmp_dir(self) -> Path:
"""``<root>/.tmp/`` — staging directory for batch / multi-step writes.
Note:
``MarkdownWriter`` does *not* use this for atomic single-file
writes; it uses a same-directory temp file to guarantee a
same-filesystem rename. This directory is reserved for callers
that need scratch space outside any single target directory.
"""
return self.root / ".tmp"
# ── Operations ──────────────────────────────────────────────────────────
def ensure(self) -> None:
"""Create the memory-root and the runtime-required dotfile dirs.
User-visible directories (``agents/`` / ``users/`` / ``knowledge/``)
are *not* pre-created — they appear on first write of their records.
Only directories the runtime infrastructure requires are made:
<root>/
<root>/.index/
<root>/.index/sqlite/
<root>/.index/lancedb/
<root>/.tmp/
"""
self.root.mkdir(parents=True, exist_ok=True)
self.index_dir.mkdir(parents=True, exist_ok=True)
self.sqlite_dir.mkdir(parents=True, exist_ok=True)
self.lancedb_dir.mkdir(parents=True, exist_ok=True)
self.tmp_dir.mkdir(parents=True, exist_ok=True)
# Materialize the OME override template on first run; existence-only
# check preserves any edits the user has already made.
if not self.ome_config.exists():
self.ome_config.write_bytes(_OME_TEMPLATE_PATH.read_bytes())

View File

@ -0,0 +1,42 @@
"""SQLite async persistence (SQLModel + SQLAlchemy 2.0 + aiosqlite).
External usage (engine + sessions):
from everos.core.persistence.sqlite import (
create_system_engine, create_session_factory, session_scope,
)
External usage (ORM model basics — re-exported from sqlmodel):
from everos.core.persistence.sqlite import (
SQLModel, Field, Relationship, BaseTable,
)
External usage (generic CRUD repository base):
from everos.core.persistence.sqlite import RepoBase
The ``system_db`` is the everos
``<memory_root>/.index/sqlite/system.db`` SQLite file holding system
state, audit log, task queue, LSN watermark, and other metadata.
"""
# Re-export key sqlmodel symbols so business code has a single canonical
# entry point (``everos.core.persistence.sqlite``) for ORM authoring.
from sqlmodel import Field as Field
from sqlmodel import Relationship as Relationship
from sqlmodel import SQLModel as SQLModel
from .base import BaseTable as BaseTable
from .engine import create_system_engine as create_system_engine
from .repository import RepoBase as RepoBase
from .session import create_session_factory as create_session_factory
from .session import session_scope as session_scope
__all__ = [
"BaseTable",
"Field",
"Relationship",
"RepoBase",
"SQLModel",
"create_session_factory",
"create_system_engine",
"session_scope",
]

View File

@ -0,0 +1,112 @@
"""Common SQLModel base for everos tables.
:class:`BaseTable` adds ``created_at`` / ``updated_at`` columns. The
``updated_at`` column auto-refreshes on UPDATE through SA's ``onupdate``
hook (no explicit assignment needed in business code).
The **two-zone storage-UTC discipline** is enforced by a SQLAlchemy
:class:`TypeDecorator` (:class:`UtcDateTimeColumn`) used as the SQL
column type for every datetime field:
* **on write** — ``process_bind_param`` converts every datetime to
aware UTC before SQLAlchemy emits the bound parameter. This covers
*every* SQLAlchemy write path uniformly:
- ORM ``session.add()`` / ``session.merge()`` (unit-of-work flush)
- Core ``session.execute(insert(...).values(...))``
- Core ``session.execute(update(...).values(...))``
- Bulk ``bulk_insert_mappings`` / ``bulk_save_objects``
- Raw SQL with bound parameters
Reaching into the column type is the only place SQLAlchemy guarantees
*every* write path passes through. Mapper events (``before_insert`` /
``before_update``) only fire on the ORM unit-of-work path and would
silently miss Core statements — which :mod:`everos.infra.persistence
.sqlite.repos.md_change_state` uses heavily.
* **on read** — ``process_result_value`` re-attaches ``tzinfo=UTC`` to
every naive datetime returned from SQLite (which has no native tz
storage and always returns naive). Callers therefore never observe a
naive datetime regardless of which read API they use.
Subclass with ``table=True`` to declare a real SQLite table::
from sqlmodel import Field
class Sender(BaseTable, table=True):
id: int | None = Field(default=None, primary_key=True)
name: str
"""
from __future__ import annotations
import datetime as _dt
from typing import Any
from sqlalchemy import DateTime
from sqlalchemy import types as sa_types
from sqlmodel import Field, SQLModel
from everos.component.utils.datetime import UtcDatetime, ensure_utc, get_utc_now
class UtcDateTimeColumn(sa_types.TypeDecorator[_dt.datetime]):
"""SQLAlchemy column type enforcing storage-UTC on every read/write.
Implementation:
* ``impl = DateTime`` — uses the dialect's standard DateTime SQL type
(TEXT ISO-8601 on SQLite; ``TIMESTAMP`` on Postgres etc.).
* ``process_bind_param`` — write hook. Awares → ``astimezone(UTC)``;
naives → assumed already UTC (storage-boundary convention; see
:func:`ensure_utc` docstring); ``None`` passes through.
* ``process_result_value`` — read hook. Naive ``datetime`` →
``replace(tzinfo=UTC)``; aware passes through unchanged.
``cache_ok = True`` — SQLAlchemy can safely cache statement
compilations using this type (no per-instance mutable state).
"""
impl = DateTime
cache_ok = True
def process_bind_param(
self, value: _dt.datetime | None, _dialect: Any
) -> _dt.datetime | None:
if value is None:
return None
if not isinstance(value, _dt.datetime):
return value
return ensure_utc(value)
def process_result_value(
self, value: _dt.datetime | None, _dialect: Any
) -> _dt.datetime | None:
if value is None:
return None
if isinstance(value, _dt.datetime) and value.tzinfo is None:
return value.replace(tzinfo=_dt.UTC)
return value
class BaseTable(SQLModel):
"""Mixin providing ``created_at`` / ``updated_at`` columns.
Both default to :func:`get_utc_now` on INSERT.
``updated_at`` is auto-refreshed by SQLAlchemy on every UPDATE via the
``onupdate`` hook — do not set it manually unless overriding intentionally.
Both columns use :class:`UtcDateTimeColumn` as the SQL column type
so storage-UTC is enforced **at the SQLAlchemy bind layer** on every
write path (ORM + Core + bulk + raw bound params).
"""
created_at: UtcDatetime = Field(
default_factory=get_utc_now,
sa_type=UtcDateTimeColumn,
)
updated_at: UtcDatetime = Field(
default_factory=get_utc_now,
sa_type=UtcDateTimeColumn,
sa_column_kwargs={"onupdate": get_utc_now},
)

View File

@ -0,0 +1,74 @@
"""Async SQLAlchemy engine factory + per-connection PRAGMA listener.
The engine connects through ``aiosqlite`` (SA URL ``sqlite+aiosqlite://``).
PRAGMAs are *per-connection* — they must be re-applied every time the
SA pool opens a new connection. We attach a ``connect`` event listener on
the engine's underlying sync engine for that purpose.
"""
from __future__ import annotations
from pathlib import Path
from sqlalchemy import event
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
from everos.config import SqliteSettings
def create_system_engine(
db_path: Path,
sqlite_settings: SqliteSettings,
*,
echo: bool = False,
) -> AsyncEngine:
"""Create an async SQLAlchemy engine for the everos system DB.
``MemoryRoot.system_db`` is the conventional path; the DB holds system
state, audit log, task queue, LSN watermark, and other metadata.
Args:
db_path: Filesystem path to the system DB file. Parent directory is
created if missing.
sqlite_settings: Tunables (journal_mode, synchronous, foreign_keys,
temp_store, busy_timeout, journal_size_limit, cache_size).
echo: When ``True``, SQLAlchemy logs every statement (development).
Returns:
An :class:`AsyncEngine` ready for use with :class:`AsyncSession`.
"""
db_path.parent.mkdir(parents=True, exist_ok=True)
# Three slashes = relative path; four slashes = absolute. ``str(db_path)``
# of an absolute Path begins with ``/`` so the f-string yields four.
url = f"sqlite+aiosqlite:///{db_path}"
engine = create_async_engine(url, echo=echo, future=True)
_register_pragma_listener(engine, sqlite_settings)
return engine
def _register_pragma_listener(
engine: AsyncEngine,
sqlite_settings: SqliteSettings,
) -> None:
"""Attach a ``connect`` listener that applies PRAGMAs on every new connection."""
@event.listens_for(engine.sync_engine, "connect")
def _apply_pragmas(dbapi_connection, _connection_record) -> None: # type: ignore[no-untyped-def]
cursor = dbapi_connection.cursor()
try:
cursor.execute(f"PRAGMA journal_mode={sqlite_settings.journal_mode}")
cursor.execute(f"PRAGMA synchronous={sqlite_settings.synchronous}")
cursor.execute(
f"PRAGMA foreign_keys={'ON' if sqlite_settings.foreign_keys else 'OFF'}"
)
cursor.execute(f"PRAGMA temp_store={sqlite_settings.temp_store}")
cursor.execute(f"PRAGMA busy_timeout={sqlite_settings.busy_timeout_ms}")
cursor.execute(
f"PRAGMA journal_size_limit={sqlite_settings.journal_size_limit_bytes}"
)
# cache_size: negative = KB, positive = pages.
cursor.execute(f"PRAGMA cache_size=-{sqlite_settings.cache_size_kb}")
finally:
cursor.close()

View File

@ -0,0 +1,166 @@
"""Generic CRUD repository for SQLModel-backed tables.
``RepoBase`` is a pure generic CRUD helper that sits alongside
:class:`BaseTable`. It knows nothing about a storage runtime — concrete
repos either pass ``session_factory`` explicitly (typical in tests) or
override :meth:`_factory_lookup` to pull the singleton from their
storage manager (typical in :mod:`everos.infra.persistence.sqlite.repos`).
Each method opens its own ``session_scope`` (auto rollback on exception,
session closed at end). For multi-step transactional work, use the
session factory directly via :attr:`session_factory`.
"""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from sqlmodel import SQLModel, select
from .session import session_scope
class RepoBase[T: SQLModel]:
"""Generic CRUD repository for one SQLModel table.
Subclass and bind to a model. Two ways to provide the session factory:
1. **Explicit (tests / DI)** — pass it to ``__init__``::
repo = SenderRepo(session_factory)
2. **Lazy hook (production singletons)** — override
:meth:`_factory_lookup` so the repo can be instantiated as a
module-level singleton with no factory bound yet::
class _SenderRepo(RepoBase[Sender]):
model = Sender
def _factory_lookup(self):
from everos.infra.persistence.sqlite.sqlite_manager import (
get_session_factory,
)
return get_session_factory()
sender_repo = _SenderRepo()
await sender_repo.add(Sender(name="alice"))
"""
model: type[T]
def __init__(
self,
session_factory: async_sessionmaker[AsyncSession] | None = None,
) -> None:
"""Bind to a session factory; if ``None``, defer to ``_factory_lookup``."""
self._factory_override = session_factory
def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
"""Resolve a session factory on first use. Override in subclass.
``RepoBase`` itself has no idea where the runtime singleton lives
— that knowledge belongs to the infra subclass. The default raises
so a missing override is loud rather than silently broken.
"""
raise NotImplementedError(
f"{type(self).__name__}: pass session_factory= to __init__ "
"or override _factory_lookup() to wire the storage manager."
)
@property
def _factory(self) -> async_sessionmaker[AsyncSession]:
if self._factory_override is not None:
return self._factory_override
return self._factory_lookup()
@property
def session_factory(self) -> async_sessionmaker[AsyncSession]:
"""Underlying session factory (for multi-step transactions)."""
return self._factory
# ── Create ─────────────────────────────────────────────────────────────
async def add(self, instance: T) -> T:
"""Insert one row, commit, refresh, return the instance."""
async with session_scope(self._factory) as s:
s.add(instance)
await s.commit()
await s.refresh(instance)
return instance
async def add_many(self, instances: Sequence[T]) -> list[T]:
"""Insert many rows in one transaction."""
items = list(instances)
async with session_scope(self._factory) as s:
s.add_all(items)
await s.commit()
for inst in items:
await s.refresh(inst)
return items
# ── Read ───────────────────────────────────────────────────────────────
async def get_by_id(self, id_value: Any) -> T | None:
"""Get a row by primary key. Returns ``None`` if not found."""
async with session_scope(self._factory) as s:
return await s.get(self.model, id_value)
async def list_all(self) -> list[T]:
"""Return all rows (no filter, no order)."""
async with session_scope(self._factory) as s:
stmt = select(self.model)
return list((await s.execute(stmt)).scalars().all())
async def find_where(self, **filters: Any) -> list[T]:
"""Equality-only filtering, e.g. ``find_where(name="alice", active=True)``."""
async with session_scope(self._factory) as s:
stmt = select(self.model).filter_by(**filters)
return list((await s.execute(stmt)).scalars().all())
async def find_one(self, **filters: Any) -> T | None:
"""First row matching ``filters`` (no ordering); ``None`` if not found."""
async with session_scope(self._factory) as s:
stmt = select(self.model).filter_by(**filters).limit(1)
return (await s.execute(stmt)).scalars().first()
async def count(self) -> int:
"""Total row count (no filter)."""
async with session_scope(self._factory) as s:
stmt = select(func.count()).select_from(self.model)
return int((await s.execute(stmt)).scalar_one())
# ── Update ─────────────────────────────────────────────────────────────
async def update(self, instance: T) -> T:
"""Persist changes on an instance whose primary key already exists.
Uses ``session.merge`` so detached / fresh-from-Pydantic instances
are reattached. ``BaseTable.updated_at`` auto-bumps via SA's
``onupdate`` hook.
"""
async with session_scope(self._factory) as s:
merged = await s.merge(instance)
await s.commit()
await s.refresh(merged)
return merged
# ── Delete ─────────────────────────────────────────────────────────────
async def delete(self, instance: T) -> None:
"""Delete by instance (primary key must be set)."""
async with session_scope(self._factory) as s:
merged = await s.merge(instance)
await s.delete(merged)
await s.commit()
async def delete_by_id(self, id_value: Any) -> bool:
"""Delete by primary key. Returns ``True`` if a row was removed."""
async with session_scope(self._factory) as s:
instance = await s.get(self.model, id_value)
if instance is None:
return False
await s.delete(instance)
await s.commit()
return True

View File

@ -0,0 +1,45 @@
"""Async session factory + session scope context manager."""
from __future__ import annotations
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
def create_session_factory(engine: AsyncEngine) -> async_sessionmaker[AsyncSession]:
"""Build an :class:`async_sessionmaker` bound to ``engine``.
``expire_on_commit=False`` keeps attribute access on instances valid
after commit, which is the conventional setup for async SA usage.
"""
return async_sessionmaker(
bind=engine,
class_=AsyncSession,
expire_on_commit=False,
)
@asynccontextmanager
async def session_scope(
session_factory: async_sessionmaker[AsyncSession],
) -> AsyncIterator[AsyncSession]:
"""Yield an :class:`AsyncSession` inside a try/rollback/close block.
The session is rolled back on any exception in the ``async with`` body,
then closed. Callers are responsible for calling ``await session.commit()``
on success.
Usage:
factory = create_session_factory(engine)
async with session_scope(factory) as session:
session.add(some_record)
await session.commit()
"""
async with session_factory() as session:
try:
yield session
except Exception:
await session.rollback()
raise