chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
35
src/everos/README.md
Normal file
35
src/everos/README.md
Normal file
@ -0,0 +1,35 @@
|
||||
# `everos` package
|
||||
|
||||
Source layout for the `everos` Python package. This README is a quick
|
||||
orientation; full architectural detail lives elsewhere.
|
||||
|
||||
## Layout
|
||||
|
||||
```
|
||||
everos/
|
||||
├── entrypoints/ Presentation: cli + api
|
||||
├── service/ Application: use case orchestration
|
||||
├── memory/ Domain: extract + search + cascade + prompt_slots + models
|
||||
├── infra/ Infrastructure: persistence/{markdown, sqlite, lancedb}
|
||||
├── component/ Cross-cutting providers: llm / embedding / config / utils
|
||||
├── core/ Runtime base: observability / lifespan / context
|
||||
└── config/ Data: Settings + default.toml + prompt_slots templates
|
||||
```
|
||||
|
||||
Each subpackage has a top-level `__init__.py` describing its responsibility
|
||||
and public API.
|
||||
|
||||
## Dependency rule
|
||||
|
||||
```
|
||||
entrypoints → service → memory → infra
|
||||
↓
|
||||
component / core / config
|
||||
```
|
||||
|
||||
Single-direction; enforced by `import-linter` in CI.
|
||||
|
||||
## Further reading
|
||||
|
||||
- Architecture: [../../docs/architecture.md](../../docs/architecture.md)
|
||||
- Coding rules (auto-loaded by Claude Code): [../../.claude/rules/](../../.claude/rules/)
|
||||
11
src/everos/__init__.py
Normal file
11
src/everos/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
"""everos — md-first memory extraction framework."""
|
||||
|
||||
from importlib.metadata import PackageNotFoundError
|
||||
from importlib.metadata import version as _pkg_version
|
||||
|
||||
try:
|
||||
__version__ = _pkg_version("everos")
|
||||
except PackageNotFoundError:
|
||||
# Editable install without dist-info, or running from a source tree that
|
||||
# was never installed. Fall back to a sentinel rather than crash imports.
|
||||
__version__ = "0.0.0+unknown"
|
||||
4
src/everos/component/__init__.py
Normal file
4
src/everos/component/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
"""Cross-cutting components.
|
||||
|
||||
Technical capabilities used by every layer; depend on no business layer.
|
||||
"""
|
||||
14
src/everos/component/config/__init__.py
Normal file
14
src/everos/component/config/__init__.py
Normal file
@ -0,0 +1,14 @@
|
||||
"""Config processing capability.
|
||||
|
||||
YAML loader for category-organised config trees (PromptSlot templates,
|
||||
etc.). Distinct from :mod:`everos.config` (configuration *data* + Settings
|
||||
schema, which uses TOML for the Pydantic Settings file) — this subpackage
|
||||
holds *capability* (how to load), the other holds *data* (what to load).
|
||||
|
||||
External usage:
|
||||
from everos.component.config import YamlConfigLoader
|
||||
"""
|
||||
|
||||
from .loader import YamlConfigLoader as YamlConfigLoader
|
||||
|
||||
__all__ = ["YamlConfigLoader"]
|
||||
146
src/everos/component/config/loader.py
Normal file
146
src/everos/component/config/loader.py
Normal file
@ -0,0 +1,146 @@
|
||||
"""YAML config loader for category-organised file trees.
|
||||
|
||||
Concept: a project keeps several *categories* of YAML config files under
|
||||
their own subdirectories — for example PromptSlot templates under
|
||||
``config/prompt_slots/<name>.yaml``. The loader:
|
||||
|
||||
1. registers a category → subdirectory mapping
|
||||
2. resolves ``find(category, name)`` to ``<root>/<subdir>/<name>.yaml``
|
||||
3. caches parsed contents until ``refresh`` is called
|
||||
|
||||
Uses ``yaml.safe_load`` (no arbitrary tags) — PyYAML is already a project
|
||||
dependency for markdown frontmatter, so no extra cost.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
class YamlConfigLoader:
|
||||
"""Load YAML files organised by category subdirectories.
|
||||
|
||||
Usage:
|
||||
loader = YamlConfigLoader(root=Path("src/everos/config"))
|
||||
loader.register_category("prompt_slots")
|
||||
# → reads <root>/prompt_slots/episode.yaml
|
||||
meta = loader.find("prompt_slots", "episode")
|
||||
names = loader.list("prompt_slots")
|
||||
loader.refresh() # next find() re-reads from disk
|
||||
|
||||
Cache semantics:
|
||||
* ``find`` parses the file on first access and caches the dict.
|
||||
* ``refresh()`` empties the entire cache.
|
||||
* ``refresh(category)`` empties one category's entries.
|
||||
* ``refresh(category, name)`` empties a single entry.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: Path,
|
||||
categories: Mapping[str, str | None] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Args:
|
||||
root: Base directory containing the category subdirectories.
|
||||
categories: Optional pre-registered category map (``name → subdir``).
|
||||
When ``subdir`` is ``None`` the category name is used as-is.
|
||||
"""
|
||||
self._root = Path(root)
|
||||
self._subdirs: dict[str, str] = {}
|
||||
self._cache: dict[tuple[str, str], dict[str, Any]] = {}
|
||||
|
||||
if categories:
|
||||
for name, subdir in categories.items():
|
||||
self.register_category(name, subdir)
|
||||
|
||||
# ── Category management ────────────────────────────────────────────────
|
||||
|
||||
def register_category(self, name: str, subdir: str | None = None) -> None:
|
||||
"""Register a category. ``subdir`` defaults to ``name``."""
|
||||
self._subdirs[name] = subdir if subdir is not None else name
|
||||
|
||||
def categories(self) -> list[str]:
|
||||
"""Return registered category names (sorted)."""
|
||||
return sorted(self._subdirs)
|
||||
|
||||
# ── Lookup ─────────────────────────────────────────────────────────────
|
||||
|
||||
def find(self, category: str, name: str) -> dict[str, Any]:
|
||||
"""Load ``<root>/<subdir>/<name>.yaml`` for ``category``.
|
||||
|
||||
Raises:
|
||||
KeyError: if ``category`` was not registered.
|
||||
FileNotFoundError: if the yaml file does not exist.
|
||||
TypeError: if the parsed YAML is not a mapping.
|
||||
"""
|
||||
cache_key = (category, name)
|
||||
if cache_key in self._cache:
|
||||
return self._cache[cache_key]
|
||||
|
||||
path = self._path_for(category, name)
|
||||
if not path.is_file():
|
||||
raise FileNotFoundError(f"yaml not found: {path}")
|
||||
|
||||
with path.open("r", encoding="utf-8") as fh:
|
||||
data = yaml.safe_load(fh)
|
||||
if data is None:
|
||||
data = {}
|
||||
if not isinstance(data, dict):
|
||||
raise TypeError(
|
||||
f"yaml top-level must be a mapping, got {type(data).__name__}: {path}"
|
||||
)
|
||||
self._cache[cache_key] = data
|
||||
return data
|
||||
|
||||
def list(self, category: str) -> list[str]:
|
||||
"""Return sorted yaml stems available in ``category`` (no extension).
|
||||
|
||||
Raises:
|
||||
KeyError: if ``category`` was not registered.
|
||||
"""
|
||||
directory = self._dir_for(category)
|
||||
if not directory.is_dir():
|
||||
return []
|
||||
return sorted(p.stem for p in directory.glob("*.yaml"))
|
||||
|
||||
# ── Cache control ──────────────────────────────────────────────────────
|
||||
|
||||
def refresh(
|
||||
self,
|
||||
category: str | None = None,
|
||||
name: str | None = None,
|
||||
) -> None:
|
||||
"""Invalidate cached entries.
|
||||
|
||||
- ``refresh()`` → drop every cached entry
|
||||
- ``refresh(category)`` → drop everything in ``category``
|
||||
- ``refresh(category, name)`` → drop a single entry
|
||||
"""
|
||||
if category is None:
|
||||
self._cache.clear()
|
||||
return
|
||||
if name is not None:
|
||||
self._cache.pop((category, name), None)
|
||||
return
|
||||
self._cache = {
|
||||
(cat, n): v for (cat, n), v in self._cache.items() if cat != category
|
||||
}
|
||||
|
||||
# ── Internals ──────────────────────────────────────────────────────────
|
||||
|
||||
def _dir_for(self, category: str) -> Path:
|
||||
try:
|
||||
subdir = self._subdirs[category]
|
||||
except KeyError as exc:
|
||||
raise KeyError(
|
||||
f"category not registered: {category!r}; known: {sorted(self._subdirs)}"
|
||||
) from exc
|
||||
return self._root / subdir
|
||||
|
||||
def _path_for(self, category: str, name: str) -> Path:
|
||||
return self._dir_for(category) / f"{name}.yaml"
|
||||
33
src/everos/component/embedding/__init__.py
Normal file
33
src/everos/component/embedding/__init__.py
Normal file
@ -0,0 +1,33 @@
|
||||
"""Embedding provider adapters (one provider per file).
|
||||
|
||||
|
||||
Public surface:
|
||||
|
||||
- :class:`EmbeddingProvider` — Protocol every provider satisfies.
|
||||
- :class:`EmbeddingError` — provider-side failure.
|
||||
- :class:`OpenAIEmbeddingProvider` — concrete provider for any
|
||||
OpenAI-protocol embeddings endpoint (DeepInfra, vLLM, OpenAI, …).
|
||||
- :func:`build_embedding_provider` — settings-driven factory.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.component.embedding import build_embedding_provider
|
||||
provider = build_embedding_provider(settings.embedding)
|
||||
vec = await provider.embed("hello")
|
||||
"""
|
||||
|
||||
from .accessor import EmbeddingNotConfiguredError as EmbeddingNotConfiguredError
|
||||
from .accessor import get_embedder as get_embedder
|
||||
from .factory import build_embedding_provider as build_embedding_provider
|
||||
from .openai_provider import OpenAIEmbeddingProvider as OpenAIEmbeddingProvider
|
||||
from .protocol import EmbeddingError as EmbeddingError
|
||||
from .protocol import EmbeddingProvider as EmbeddingProvider
|
||||
|
||||
__all__ = [
|
||||
"EmbeddingError",
|
||||
"EmbeddingNotConfiguredError",
|
||||
"EmbeddingProvider",
|
||||
"OpenAIEmbeddingProvider",
|
||||
"build_embedding_provider",
|
||||
"get_embedder",
|
||||
]
|
||||
48
src/everos/component/embedding/accessor.py
Normal file
48
src/everos/component/embedding/accessor.py
Normal file
@ -0,0 +1,48 @@
|
||||
"""Process-wide embedding provider accessor.
|
||||
|
||||
Lazy singleton mirror of :func:`everos.component.llm.get_llm_client`:
|
||||
first call reads settings and builds the OpenAI-protocol embedding
|
||||
client; subsequent calls return the cached instance. Strategies and
|
||||
other components that need a process-wide embedder import this rather
|
||||
than threading the provider through their constructors.
|
||||
|
||||
Raises :class:`EmbeddingNotConfiguredError` when credentials are missing
|
||||
so misconfiguration surfaces at the call site (or at app startup via a
|
||||
lifespan provider) instead of silently degrading.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from everos.config import load_settings
|
||||
from everos.core.observability.logging import get_logger
|
||||
|
||||
from .factory import build_embedding_provider
|
||||
from .protocol import EmbeddingProvider
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class EmbeddingNotConfiguredError(RuntimeError):
|
||||
"""Raised when ``settings.embedding`` lacks ``model``/``api_key``/``base_url``."""
|
||||
|
||||
|
||||
_embedder: EmbeddingProvider | None = None
|
||||
|
||||
|
||||
def get_embedder() -> EmbeddingProvider:
|
||||
"""Return the singleton :class:`EmbeddingProvider`.
|
||||
|
||||
Raises:
|
||||
EmbeddingNotConfiguredError: When required settings fields are
|
||||
unset. See :func:`build_embedding_provider` for the exact
|
||||
keys.
|
||||
"""
|
||||
global _embedder
|
||||
if _embedder is not None:
|
||||
return _embedder
|
||||
try:
|
||||
_embedder = build_embedding_provider(load_settings().embedding)
|
||||
except ValueError as exc:
|
||||
raise EmbeddingNotConfiguredError(str(exc)) from exc
|
||||
logger.info("embedder_built")
|
||||
return _embedder
|
||||
56
src/everos/component/embedding/factory.py
Normal file
56
src/everos/component/embedding/factory.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""Factory for building an embedding provider from :class:`EmbeddingSettings`."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from everos.config import EmbeddingSettings
|
||||
|
||||
from .openai_provider import OpenAIEmbeddingProvider
|
||||
from .protocol import EmbeddingProvider
|
||||
|
||||
# Vector dim for the LanceDB index column — see ``17_lancedb_tables_design.md``.
|
||||
_DEFAULT_DIM = 1024
|
||||
|
||||
|
||||
def build_embedding_provider(
|
||||
settings: EmbeddingSettings,
|
||||
*,
|
||||
dim: int = _DEFAULT_DIM,
|
||||
) -> EmbeddingProvider:
|
||||
"""Build an OpenAI-compatible embedding provider from settings.
|
||||
|
||||
Args:
|
||||
settings: The :class:`EmbeddingSettings` slice from
|
||||
:func:`everos.config.load_settings`.
|
||||
dim: Target vector dimension; defaults to 1024 to match the
|
||||
LanceDB ``vector`` column shape.
|
||||
|
||||
Returns:
|
||||
An :class:`EmbeddingProvider` ready to call ``embed`` /
|
||||
``embed_batch``.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``model``, ``api_key`` or ``base_url`` is unset.
|
||||
"""
|
||||
if not settings.model:
|
||||
raise ValueError(
|
||||
"Embedding model is not configured "
|
||||
"(set EVEROS_EMBEDDING__MODEL or [embedding] model in user toml)"
|
||||
)
|
||||
if settings.api_key is None:
|
||||
raise ValueError(
|
||||
"Embedding api_key is not configured (set EVEROS_EMBEDDING__API_KEY)"
|
||||
)
|
||||
if not settings.base_url:
|
||||
raise ValueError(
|
||||
"Embedding base_url is not configured (set EVEROS_EMBEDDING__BASE_URL)"
|
||||
)
|
||||
return OpenAIEmbeddingProvider(
|
||||
model=settings.model,
|
||||
api_key=settings.api_key.get_secret_value(),
|
||||
base_url=settings.base_url,
|
||||
dim=dim,
|
||||
timeout=settings.timeout_seconds,
|
||||
max_retries=settings.max_retries,
|
||||
batch_size=settings.batch_size,
|
||||
max_concurrent=settings.max_concurrent,
|
||||
)
|
||||
98
src/everos/component/embedding/openai_provider.py
Normal file
98
src/everos/component/embedding/openai_provider.py
Normal file
@ -0,0 +1,98 @@
|
||||
"""OpenAI-compatible embedding provider.
|
||||
|
||||
Wraps :class:`openai.AsyncOpenAI` so any OpenAI-protocol endpoint
|
||||
(DeepInfra, OpenAI, Together, Fireworks, …) works without per-provider
|
||||
forks. Self-hosted vLLM also exposes the same shape; the only quirk it
|
||||
imposes is that the ``dimensions`` request parameter is ignored — we
|
||||
truncate client-side to ``dim`` so callers always see the declared
|
||||
shape regardless of backend.
|
||||
|
||||
Concurrency model:
|
||||
|
||||
- ``embed_batch`` splits the inputs into chunks of ``batch_size``.
|
||||
- An :class:`asyncio.Semaphore` capped at ``max_concurrent`` bounds
|
||||
in-flight requests; remaining chunks queue and start as slots free.
|
||||
- Retries / timeouts come from the openai SDK (``max_retries``,
|
||||
``timeout`` constructor args).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Sequence
|
||||
|
||||
import openai
|
||||
|
||||
from .protocol import EmbeddingError
|
||||
|
||||
|
||||
class OpenAIEmbeddingProvider:
|
||||
"""OpenAI-compatible embedding provider with batching + concurrency.
|
||||
|
||||
Args:
|
||||
model: Embedding model id (e.g. ``"Qwen/Qwen3-Embedding-4B"``).
|
||||
api_key: Bearer credential as a plain ``str``.
|
||||
base_url: OpenAI-protocol endpoint
|
||||
(e.g. ``"https://api.deepinfra.com/v1/openai"``).
|
||||
dim: Target vector dimension. Vectors longer than this are
|
||||
truncated client-side (matches the LanceDB column shape —
|
||||
see ``17_lancedb_tables_design.md``).
|
||||
timeout: Per-request timeout, seconds.
|
||||
max_retries: Retry budget exposed via the openai SDK.
|
||||
batch_size: How many inputs per ``/embeddings`` call.
|
||||
max_concurrent: Cap on in-flight chunked requests.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
dim: int = 1024,
|
||||
timeout: float = 30.0,
|
||||
max_retries: int = 3,
|
||||
batch_size: int = 10,
|
||||
max_concurrent: int = 5,
|
||||
) -> None:
|
||||
self.dim = dim
|
||||
self._model = model
|
||||
self._batch_size = batch_size
|
||||
self._semaphore = asyncio.Semaphore(max_concurrent)
|
||||
self._client = openai.AsyncOpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
)
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""Embed a single string."""
|
||||
vectors = await self._embed_chunk([text])
|
||||
return vectors[0]
|
||||
|
||||
async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
|
||||
"""Embed many strings, preserving input order."""
|
||||
if not texts:
|
||||
return []
|
||||
chunks = [
|
||||
list(texts[i : i + self._batch_size])
|
||||
for i in range(0, len(texts), self._batch_size)
|
||||
]
|
||||
results = await asyncio.gather(*(self._embed_chunk(chunk) for chunk in chunks))
|
||||
# gather preserves order across awaitables, and each chunk preserves
|
||||
# its internal order — so flattening yields the input order back.
|
||||
return [vec for chunk in results for vec in chunk]
|
||||
|
||||
async def _embed_chunk(self, chunk: list[str]) -> list[list[float]]:
|
||||
"""One ``/embeddings`` call, semaphore-guarded."""
|
||||
async with self._semaphore:
|
||||
try:
|
||||
response = await self._client.embeddings.create(
|
||||
model=self._model,
|
||||
input=chunk,
|
||||
)
|
||||
except openai.OpenAIError as exc:
|
||||
raise EmbeddingError(str(exc)) from exc
|
||||
# OpenAI returns ``data`` indexed by request order; truncate to ``dim``.
|
||||
return [list(item.embedding[: self.dim]) for item in response.data]
|
||||
48
src/everos/component/embedding/protocol.py
Normal file
48
src/everos/component/embedding/protocol.py
Normal file
@ -0,0 +1,48 @@
|
||||
"""Embedding provider protocol.
|
||||
|
||||
|
||||
The cascade worker / retrieval pipeline depend on a single small
|
||||
contract: turn a string (or list of strings) into a fixed-dimension
|
||||
vector. Whether the backend is OpenAI, vLLM, DeepInfra, Ollama, or a
|
||||
local model is the provider's business — the contract is invariant.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
|
||||
class EmbeddingError(Exception):
|
||||
"""Raised on any provider-side embedding failure.
|
||||
|
||||
Wraps the upstream SDK exception via ``__cause__`` (PEP 3134) so
|
||||
diagnostic loggers preserve the original error chain.
|
||||
"""
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class EmbeddingProvider(Protocol):
|
||||
"""Async embedding provider contract.
|
||||
|
||||
``dim`` is the post-truncation vector dimension every embed call
|
||||
returns. Providers that don't natively support dimension truncation
|
||||
must truncate client-side so callers see the declared shape.
|
||||
"""
|
||||
|
||||
dim: int
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""Embed a single string. Returns a ``[dim]`` vector."""
|
||||
...
|
||||
|
||||
async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
|
||||
"""Embed a batch of strings preserving input order.
|
||||
|
||||
Implementations chunk by ``batch_size`` and bound in-flight
|
||||
requests by ``max_concurrent`` (both from settings). On failure,
|
||||
raises :class:`EmbeddingError` — the worker treats it as a
|
||||
retryable / unrecoverable case per HTTP-status mapping.
|
||||
"""
|
||||
...
|
||||
45
src/everos/component/llm/__init__.py
Normal file
45
src/everos/component/llm/__init__.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""LLM provider adapters (one provider per file, mem0-style).
|
||||
|
||||
Public surface:
|
||||
|
||||
- :class:`LLMClient` — Protocol every provider satisfies (re-exported
|
||||
from :mod:`everalgo.llm`; same shape so everos providers can be
|
||||
handed to everalgo operators).
|
||||
- :class:`ChatMessage` / :class:`ChatResponse` / :class:`Usage` — chat
|
||||
payload types (re-exported from :mod:`everalgo.llm`).
|
||||
- :class:`LLMError` — provider-side failure (re-exported).
|
||||
- :class:`LLMNotConfiguredError` — raised when credentials are missing.
|
||||
- :class:`OpenAIProvider` — concrete provider wrapping
|
||||
``openai.AsyncOpenAI`` against any OpenAI-compatible endpoint.
|
||||
- :func:`build_llm_provider` — settings-driven factory.
|
||||
- :func:`get_llm_client` — process-wide lazy singleton accessor.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.component.llm import build_llm_provider, LLMClient
|
||||
provider = build_llm_provider(settings.llm)
|
||||
"""
|
||||
|
||||
from .client import LLMNotConfiguredError as LLMNotConfiguredError
|
||||
from .client import get_llm_client as get_llm_client
|
||||
from .client import get_multimodal_llm_client as get_multimodal_llm_client
|
||||
from .factory import build_llm_provider as build_llm_provider
|
||||
from .openai_provider import OpenAIProvider as OpenAIProvider
|
||||
from .protocol import ChatMessage as ChatMessage
|
||||
from .protocol import ChatResponse as ChatResponse
|
||||
from .protocol import LLMClient as LLMClient
|
||||
from .protocol import LLMError as LLMError
|
||||
from .protocol import Usage as Usage
|
||||
|
||||
__all__ = [
|
||||
"ChatMessage",
|
||||
"ChatResponse",
|
||||
"LLMClient",
|
||||
"LLMError",
|
||||
"LLMNotConfiguredError",
|
||||
"OpenAIProvider",
|
||||
"Usage",
|
||||
"build_llm_provider",
|
||||
"get_llm_client",
|
||||
"get_multimodal_llm_client",
|
||||
]
|
||||
89
src/everos/component/llm/client.py
Normal file
89
src/everos/component/llm/client.py
Normal file
@ -0,0 +1,89 @@
|
||||
"""Process-wide LLM client accessor.
|
||||
|
||||
Lazy singleton — first call reads settings and builds the algo LLM
|
||||
client; subsequent calls return the cached instance. Raises
|
||||
:class:`LLMNotConfiguredError` when no credentials are present so
|
||||
misconfiguration surfaces at app startup (via the LLM lifespan
|
||||
provider) instead of silently failing per-request downstream.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from everalgo.llm import build_client
|
||||
from everalgo.llm.config import LLMConfig
|
||||
from everalgo.llm.protocols import LLMClient
|
||||
|
||||
from everos.config import load_settings
|
||||
from everos.core.observability.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class LLMNotConfiguredError(RuntimeError):
|
||||
"""Raised when ``settings.llm`` is missing ``api_key`` or ``base_url``."""
|
||||
|
||||
|
||||
_llm_client: LLMClient | None = None
|
||||
_multimodal_client: LLMClient | None = None
|
||||
|
||||
|
||||
def get_llm_client() -> LLMClient:
|
||||
"""Return the singleton algo LLM client.
|
||||
|
||||
Raises:
|
||||
LLMNotConfiguredError: When ``settings.llm.api_key`` or
|
||||
``settings.llm.base_url`` is unset.
|
||||
"""
|
||||
global _llm_client
|
||||
if _llm_client is not None:
|
||||
return _llm_client
|
||||
|
||||
llm_cfg = load_settings().llm
|
||||
api_key = (
|
||||
llm_cfg.api_key.get_secret_value() if llm_cfg.api_key is not None else None
|
||||
)
|
||||
if not api_key or not llm_cfg.base_url:
|
||||
raise LLMNotConfiguredError(
|
||||
"LLM is required; set EVEROS_LLM__API_KEY + EVEROS_LLM__BASE_URL"
|
||||
)
|
||||
_llm_client = build_client(
|
||||
LLMConfig(
|
||||
model=llm_cfg.model,
|
||||
api_key=api_key,
|
||||
base_url=llm_cfg.base_url,
|
||||
)
|
||||
)
|
||||
logger.info("llm_client_built", model=llm_cfg.model)
|
||||
return _llm_client
|
||||
|
||||
|
||||
def get_multimodal_llm_client() -> LLMClient:
|
||||
"""Return the singleton multimodal LLM client (for everalgo.parser).
|
||||
|
||||
Reads the flat ``[multimodal]`` config — kept separate from the main
|
||||
``[llm]`` so parsing can target a vision/audio-capable endpoint.
|
||||
|
||||
Raises:
|
||||
LLMNotConfiguredError: When ``settings.multimodal.api_key`` or
|
||||
``settings.multimodal.base_url`` is unset.
|
||||
"""
|
||||
global _multimodal_client
|
||||
if _multimodal_client is not None:
|
||||
return _multimodal_client
|
||||
|
||||
cfg = load_settings().multimodal
|
||||
api_key = cfg.api_key.get_secret_value() if cfg.api_key is not None else None
|
||||
if not api_key or not cfg.base_url:
|
||||
raise LLMNotConfiguredError(
|
||||
"Multimodal LLM is required for parsing; set "
|
||||
"EVEROS_MULTIMODAL__API_KEY + EVEROS_MULTIMODAL__BASE_URL"
|
||||
)
|
||||
_multimodal_client = build_client(
|
||||
LLMConfig(
|
||||
model=cfg.model,
|
||||
api_key=api_key,
|
||||
base_url=cfg.base_url,
|
||||
)
|
||||
)
|
||||
logger.info("multimodal_llm_client_built", model=cfg.model)
|
||||
return _multimodal_client
|
||||
45
src/everos/component/llm/factory.py
Normal file
45
src/everos/component/llm/factory.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""Factory for building an LLM provider from :class:`LLMSettings`."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from everos.config import LLMSettings
|
||||
|
||||
from .openai_provider import OpenAIProvider
|
||||
from .protocol import LLMClient
|
||||
|
||||
|
||||
def build_llm_provider(settings: LLMSettings) -> LLMClient:
|
||||
"""Build an OpenAI-compatible LLM provider from settings.
|
||||
|
||||
Unwraps :class:`pydantic.SecretStr` here so downstream callers never
|
||||
touch the raw key directly. Fails fast if either ``api_key`` or
|
||||
``base_url`` is missing — caller is expected to set them via
|
||||
``.env`` / user toml / programmatic init before calling.
|
||||
|
||||
Args:
|
||||
settings: The :class:`LLMSettings` slice from
|
||||
:func:`everos.config.load_settings`.
|
||||
|
||||
Returns:
|
||||
A provider that structurally satisfies
|
||||
:class:`everalgo.llm.LLMClient` and can be passed to everalgo
|
||||
operators via ``llm=``.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``api_key`` or ``base_url`` is unset.
|
||||
"""
|
||||
if settings.api_key is None:
|
||||
raise ValueError(
|
||||
"LLM api_key is not configured "
|
||||
"(set EVEROS_LLM__API_KEY or [llm] api_key in user toml)"
|
||||
)
|
||||
if not settings.base_url:
|
||||
raise ValueError(
|
||||
"LLM base_url is not configured "
|
||||
"(set EVEROS_LLM__BASE_URL or [llm] base_url in user toml)"
|
||||
)
|
||||
return OpenAIProvider(
|
||||
model=settings.model,
|
||||
api_key=settings.api_key.get_secret_value(),
|
||||
base_url=settings.base_url,
|
||||
)
|
||||
114
src/everos/component/llm/openai_provider.py
Normal file
114
src/everos/component/llm/openai_provider.py
Normal file
@ -0,0 +1,114 @@
|
||||
"""OpenAI-compatible LLM provider for everos.
|
||||
|
||||
Implements the :class:`everalgo.llm.LLMClient` structural contract by
|
||||
wrapping :class:`openai.AsyncOpenAI` — the same backbone everalgo's own
|
||||
``OpenAICompatClient`` uses, but defined here in everos so the
|
||||
provider can be constructed from :class:`everos.config.LLMSettings`
|
||||
and handed to everalgo extractors via the ``llm=`` per-call parameter.
|
||||
|
||||
Keeps the provider lean (matches the everalgo minimum-viable shape):
|
||||
no multi-key rotation, no scenario-level routing, no token-usage
|
||||
collector — those are deployment concerns layered on top.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Literal
|
||||
|
||||
import openai
|
||||
|
||||
from .protocol import ChatMessage, ChatResponse, LLMError, Usage
|
||||
|
||||
|
||||
class OpenAIProvider:
|
||||
"""Thin async wrapper over ``openai.AsyncOpenAI``.
|
||||
|
||||
Structurally satisfies :class:`everalgo.llm.LLMClient` (PEP 544);
|
||||
instances can be passed directly to everalgo operators that accept
|
||||
``llm: LLMClient | None``.
|
||||
|
||||
Args:
|
||||
model: Default model id (override per-call with ``model=`` on
|
||||
:meth:`chat`).
|
||||
api_key: Bearer credential. Pass as plain ``str`` — settings
|
||||
should unwrap :class:`pydantic.SecretStr` at the factory
|
||||
boundary.
|
||||
base_url: OpenAI-compatible endpoint (e.g.
|
||||
``"https://openrouter.ai/api/v1"``).
|
||||
timeout: Per-request timeout in seconds.
|
||||
temperature: Default sampling temperature (overridable per call).
|
||||
max_tokens: Default max-tokens cap (overridable per call).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
api_key: str,
|
||||
base_url: str | None = None,
|
||||
timeout: float = 60.0,
|
||||
temperature: float = 0.0,
|
||||
max_tokens: int | None = None,
|
||||
) -> None:
|
||||
self._model = model
|
||||
self._temperature = temperature
|
||||
self._max_tokens = max_tokens
|
||||
self._client = openai.AsyncOpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[ChatMessage],
|
||||
*,
|
||||
model: str | None = None,
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
response_format: Mapping[str, Any] | None = None,
|
||||
**extra: Any,
|
||||
) -> ChatResponse:
|
||||
"""Send a chat completion request and return the parsed response."""
|
||||
request: dict[str, Any] = {
|
||||
"model": model or self._model,
|
||||
"messages": [m.model_dump() for m in messages],
|
||||
"temperature": (
|
||||
temperature if temperature is not None else self._temperature
|
||||
),
|
||||
}
|
||||
effective_max = max_tokens if max_tokens is not None else self._max_tokens
|
||||
if effective_max is not None:
|
||||
request["max_tokens"] = effective_max
|
||||
if response_format is not None:
|
||||
request["response_format"] = dict(response_format)
|
||||
request.update(extra)
|
||||
|
||||
try:
|
||||
completion = await self._client.chat.completions.create(**request)
|
||||
except openai.OpenAIError as exc:
|
||||
raise LLMError(str(exc)) from exc
|
||||
|
||||
choice = completion.choices[0]
|
||||
usage: Usage | None = None
|
||||
if completion.usage is not None:
|
||||
usage = Usage(
|
||||
prompt_tokens=completion.usage.prompt_tokens,
|
||||
completion_tokens=completion.usage.completion_tokens,
|
||||
)
|
||||
return ChatResponse(
|
||||
content=choice.message.content or "",
|
||||
model=completion.model,
|
||||
usage=usage,
|
||||
finish_reason=_normalise_finish_reason(choice.finish_reason),
|
||||
raw=None,
|
||||
)
|
||||
|
||||
|
||||
def _normalise_finish_reason(
|
||||
value: str | None,
|
||||
) -> Literal["stop", "length", "content_filter"] | None:
|
||||
if value in ("stop", "length", "content_filter"):
|
||||
return value # type: ignore[return-value]
|
||||
return None
|
||||
39
src/everos/component/llm/protocol.py
Normal file
39
src/everos/component/llm/protocol.py
Normal file
@ -0,0 +1,39 @@
|
||||
"""LLM client protocol re-export.
|
||||
|
||||
The structural contract every everos LLM provider satisfies is the same
|
||||
:class:`everalgo.llm.LLMClient` Protocol — everos providers must be
|
||||
pass-through-compatible with the everalgo extractors that accept an
|
||||
``llm=`` parameter. Re-exporting the type here keeps the import path
|
||||
stable (``everos.component.llm``) even if the everalgo namespace
|
||||
shifts later.
|
||||
|
||||
The :class:`ChatMessage` / :class:`ChatResponse` / :class:`Usage`
|
||||
shapes are likewise re-exported so callers can build / inspect chat
|
||||
payloads without reaching into the everalgo package directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from everalgo.llm import (
|
||||
ChatMessage as ChatMessage,
|
||||
)
|
||||
from everalgo.llm import (
|
||||
ChatResponse as ChatResponse,
|
||||
)
|
||||
from everalgo.llm import (
|
||||
LLMClient as LLMClient,
|
||||
)
|
||||
from everalgo.llm import (
|
||||
LLMError as LLMError,
|
||||
)
|
||||
from everalgo.llm import (
|
||||
Usage as Usage,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ChatMessage",
|
||||
"ChatResponse",
|
||||
"LLMClient",
|
||||
"LLMError",
|
||||
"Usage",
|
||||
]
|
||||
34
src/everos/component/rerank/__init__.py
Normal file
34
src/everos/component/rerank/__init__.py
Normal file
@ -0,0 +1,34 @@
|
||||
"""Rerank provider adapters (one provider per file).
|
||||
|
||||
Public surface:
|
||||
|
||||
- :class:`RerankProvider` — Protocol every provider satisfies.
|
||||
- :class:`RerankResult` / :class:`RerankError` — value type + error.
|
||||
- :class:`DeepInfraRerankProvider` — DeepInfra inference-API rerank.
|
||||
- :class:`VllmRerankProvider` — OpenAI-compat ``/v1/rerank`` (vLLM,
|
||||
self-hosted, other compatible servers).
|
||||
- :func:`build_rerank_provider` — settings-driven factory that picks
|
||||
the concrete provider via ``settings.rerank.provider``.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.component.rerank import build_rerank_provider
|
||||
provider = build_rerank_provider(settings.rerank)
|
||||
scored = await provider.rerank("how to file a claim", documents)
|
||||
"""
|
||||
|
||||
from .deepinfra_provider import DeepInfraRerankProvider as DeepInfraRerankProvider
|
||||
from .factory import build_rerank_provider as build_rerank_provider
|
||||
from .protocol import RerankError as RerankError
|
||||
from .protocol import RerankProvider as RerankProvider
|
||||
from .protocol import RerankResult as RerankResult
|
||||
from .vllm_provider import VllmRerankProvider as VllmRerankProvider
|
||||
|
||||
__all__ = [
|
||||
"DeepInfraRerankProvider",
|
||||
"RerankError",
|
||||
"RerankProvider",
|
||||
"RerankResult",
|
||||
"VllmRerankProvider",
|
||||
"build_rerank_provider",
|
||||
]
|
||||
196
src/everos/component/rerank/deepinfra_provider.py
Normal file
196
src/everos/component/rerank/deepinfra_provider.py
Normal file
@ -0,0 +1,196 @@
|
||||
"""DeepInfra inference-API rerank provider.
|
||||
|
||||
DeepInfra exposes reranker models (e.g. ``Qwen/Qwen3-Reranker-4B``) at::
|
||||
|
||||
POST {base_url}/{model}
|
||||
Authorization: Bearer <api_key>
|
||||
Content-Type: application/json
|
||||
|
||||
The request shape is the inference-API convention used across DeepInfra
|
||||
reranker / classifier models:
|
||||
|
||||
{
|
||||
"queries": ["<query>"],
|
||||
"documents": ["<doc 1>", "<doc 2>", ...]
|
||||
}
|
||||
|
||||
The response carries one ``scores`` array per query:
|
||||
|
||||
{
|
||||
"scores": [[0.12, 0.87, 0.43, ...]],
|
||||
"request_id": "...",
|
||||
"inference_status": {...}
|
||||
}
|
||||
|
||||
We submit one query at a time (matches the :class:`RerankProvider`
|
||||
contract) and unwrap the inner score list. Documents longer than the
|
||||
model's input window are silently truncated server-side.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from .protocol import RerankError, RerankResult
|
||||
|
||||
# Qwen3-Reranker chat template. The DeepInfra inference API treats the reranker
|
||||
# as a yes/no generator, so the prompt scaffolding must be supplied client-side
|
||||
# (verbatim mirror of the EverCore benchmark's reranker client). Without it the
|
||||
# model scores raw text off-template and returns uncalibrated relevance.
|
||||
_QWEN3_PREFIX = (
|
||||
"<|im_start|>system\n"
|
||||
"Judge whether the Document meets the requirements based on the Query and "
|
||||
'the Instruct provided. Note that the answer can only be "yes" or "no".'
|
||||
"<|im_end|>\n<|im_start|>user\n"
|
||||
)
|
||||
_QWEN3_SUFFIX = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
|
||||
_DEFAULT_RERANK_INSTRUCTION = (
|
||||
"Given a question and a passage, determine if the passage contains "
|
||||
"information relevant to answering the question."
|
||||
)
|
||||
|
||||
|
||||
def _format_qwen3_inputs(
|
||||
query: str, documents: list[str], instruction: str | None
|
||||
) -> tuple[str, list[str]]:
|
||||
"""Wrap query + documents in the Qwen3-Reranker chat template."""
|
||||
instr = instruction or _DEFAULT_RERANK_INSTRUCTION
|
||||
formatted_query = f"{_QWEN3_PREFIX}<Instruct>: {instr}\n<Query>: {query}\n"
|
||||
formatted_docs = [f"<Document>: {doc}{_QWEN3_SUFFIX}" for doc in documents]
|
||||
return formatted_query, formatted_docs
|
||||
|
||||
|
||||
class DeepInfraRerankProvider:
|
||||
"""Rerank provider for the DeepInfra inference API.
|
||||
|
||||
Args:
|
||||
model: Reranker model id (e.g. ``"Qwen/Qwen3-Reranker-4B"``).
|
||||
api_key: Bearer credential as plain ``str``.
|
||||
base_url: Inference endpoint root
|
||||
(e.g. ``"https://api.deepinfra.com/v1/inference"``). The
|
||||
``/{model}`` suffix is appended at request time.
|
||||
timeout: Per-request timeout, seconds.
|
||||
max_retries: Soft retry count on transport errors / 5xx.
|
||||
batch_size: Cap on documents per request (large doc lists are
|
||||
split, scores merged in input order).
|
||||
max_concurrent: Cap on in-flight requests across all batches.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
timeout: float = 30.0,
|
||||
max_retries: int = 3,
|
||||
batch_size: int = 10,
|
||||
max_concurrent: int = 5,
|
||||
) -> None:
|
||||
self._model = model
|
||||
self._api_key = api_key
|
||||
self._url = f"{base_url.rstrip('/')}/{model}"
|
||||
self._timeout = timeout
|
||||
self._max_retries = max_retries
|
||||
self._batch_size = batch_size
|
||||
self._semaphore = asyncio.Semaphore(max_concurrent)
|
||||
|
||||
async def rerank(
|
||||
self,
|
||||
query: str,
|
||||
documents: Sequence[str],
|
||||
*,
|
||||
instruction: str | None = None,
|
||||
) -> list[RerankResult]:
|
||||
"""Score every document against ``query``; return sorted desc."""
|
||||
if not documents:
|
||||
return []
|
||||
|
||||
chunks: list[tuple[int, list[str]]] = [
|
||||
(offset, list(documents[offset : offset + self._batch_size]))
|
||||
for offset in range(0, len(documents), self._batch_size)
|
||||
]
|
||||
chunk_scores = await asyncio.gather(
|
||||
*(self._score_chunk(query, docs, instruction) for _, docs in chunks)
|
||||
)
|
||||
scored: list[RerankResult] = []
|
||||
for (offset, _), scores in zip(chunks, chunk_scores, strict=True):
|
||||
scored.extend(
|
||||
RerankResult(index=offset + i, score=score)
|
||||
for i, score in enumerate(scores)
|
||||
)
|
||||
scored.sort(key=lambda r: r.score, reverse=True)
|
||||
return scored
|
||||
|
||||
async def _score_chunk(
|
||||
self, query: str, documents: list[str], instruction: str | None
|
||||
) -> list[float]:
|
||||
formatted_query, formatted_docs = _format_qwen3_inputs(
|
||||
query, documents, instruction
|
||||
)
|
||||
payload: dict[str, Any] = {
|
||||
"queries": [formatted_query],
|
||||
"documents": formatted_docs,
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
async with self._semaphore:
|
||||
for attempt in range(self._max_retries + 1):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
||||
response = await client.post(
|
||||
self._url, json=payload, headers=headers
|
||||
)
|
||||
except httpx.HTTPError as exc:
|
||||
if attempt == self._max_retries:
|
||||
raise RerankError(
|
||||
f"DeepInfra rerank transport failure: {exc}"
|
||||
) from exc
|
||||
continue
|
||||
|
||||
if response.status_code == 200:
|
||||
return _extract_scores(response.json(), len(documents))
|
||||
|
||||
# Retry on 5xx / 429 only; surface 4xx immediately.
|
||||
if response.status_code >= 500 or response.status_code == 429:
|
||||
if attempt == self._max_retries:
|
||||
raise RerankError(
|
||||
f"DeepInfra rerank HTTP {response.status_code}: "
|
||||
f"{response.text[:200]}"
|
||||
)
|
||||
continue
|
||||
raise RerankError(
|
||||
f"DeepInfra rerank HTTP {response.status_code}: "
|
||||
f"{response.text[:200]}"
|
||||
)
|
||||
|
||||
raise RerankError(
|
||||
f"DeepInfra rerank exhausted retries ({self._max_retries})"
|
||||
)
|
||||
|
||||
|
||||
def _extract_scores(body: dict[str, Any], expected_len: int) -> list[float]:
|
||||
"""Unwrap ``scores`` from the DeepInfra response body.
|
||||
|
||||
Inference API returns ``scores`` as either:
|
||||
|
||||
- ``[[s1, s2, ...]]`` — one score row per query (current single-query
|
||||
shape); take row 0.
|
||||
- ``[s1, s2, ...]`` — flat list (fallback for providers that drop
|
||||
the outer list when only one query is sent).
|
||||
"""
|
||||
raw = body.get("scores")
|
||||
if not isinstance(raw, list):
|
||||
raise RerankError(f"DeepInfra rerank response missing scores: {body!r}")
|
||||
row = raw[0] if raw and isinstance(raw[0], list) else raw
|
||||
if len(row) != expected_len:
|
||||
raise RerankError(
|
||||
f"DeepInfra rerank returned {len(row)} scores, expected {expected_len}"
|
||||
)
|
||||
return [float(s) for s in row]
|
||||
74
src/everos/component/rerank/factory.py
Normal file
74
src/everos/component/rerank/factory.py
Normal file
@ -0,0 +1,74 @@
|
||||
"""Factory for building a rerank provider from :class:`RerankSettings`.
|
||||
|
||||
The ``provider`` field on :class:`RerankSettings` selects which concrete
|
||||
implementation to build:
|
||||
|
||||
- ``"deepinfra"`` → :class:`DeepInfraRerankProvider`
|
||||
- ``"vllm"`` → :class:`VllmRerankProvider`
|
||||
|
||||
Adding a new provider = one match arm here + one new file under
|
||||
:mod:`everos.component.rerank`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from everos.config import RerankSettings
|
||||
|
||||
from .deepinfra_provider import DeepInfraRerankProvider
|
||||
from .protocol import RerankProvider
|
||||
from .vllm_provider import VllmRerankProvider
|
||||
|
||||
|
||||
def build_rerank_provider(settings: RerankSettings) -> RerankProvider:
|
||||
"""Build a rerank provider from settings.
|
||||
|
||||
Args:
|
||||
settings: The :class:`RerankSettings` slice from
|
||||
:func:`everos.config.load_settings`.
|
||||
|
||||
Returns:
|
||||
A :class:`RerankProvider` ready to call ``rerank``.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``model`` or ``base_url`` is unset, or if
|
||||
``provider`` does not match a known implementation.
|
||||
``api_key`` is required for ``deepinfra``; optional (empty
|
||||
string) for ``vllm`` self-hosted endpoints.
|
||||
"""
|
||||
if not settings.model:
|
||||
raise ValueError(
|
||||
"Rerank model is not configured "
|
||||
"(set EVEROS_RERANK__MODEL or [rerank] model in user toml)"
|
||||
)
|
||||
if not settings.base_url:
|
||||
raise ValueError(
|
||||
"Rerank base_url is not configured (set EVEROS_RERANK__BASE_URL)"
|
||||
)
|
||||
api_key = settings.api_key.get_secret_value() if settings.api_key else ""
|
||||
|
||||
if settings.provider == "deepinfra":
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"DeepInfra rerank api_key is not configured "
|
||||
"(set EVEROS_RERANK__API_KEY)"
|
||||
)
|
||||
return DeepInfraRerankProvider(
|
||||
model=settings.model,
|
||||
api_key=api_key,
|
||||
base_url=settings.base_url,
|
||||
timeout=settings.timeout_seconds,
|
||||
max_retries=settings.max_retries,
|
||||
batch_size=settings.batch_size,
|
||||
max_concurrent=settings.max_concurrent,
|
||||
)
|
||||
if settings.provider == "vllm":
|
||||
return VllmRerankProvider(
|
||||
model=settings.model,
|
||||
api_key=api_key,
|
||||
base_url=settings.base_url,
|
||||
timeout=settings.timeout_seconds,
|
||||
max_retries=settings.max_retries,
|
||||
batch_size=settings.batch_size,
|
||||
max_concurrent=settings.max_concurrent,
|
||||
)
|
||||
raise ValueError(f"unknown rerank provider: {settings.provider!r}")
|
||||
62
src/everos/component/rerank/protocol.py
Normal file
62
src/everos/component/rerank/protocol.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""Rerank provider protocol.
|
||||
|
||||
The contract every rerank provider satisfies: given a query and a list
|
||||
of candidate documents, return a re-ordered list of ``(index, score)``
|
||||
pairs (highest relevance first). The provider does **not** filter —
|
||||
that's the caller's job (e.g. drop scores below a threshold, take
|
||||
``top_k``). Returning every input pair keeps the contract stable
|
||||
across providers whose backends may not natively support ``top_n``.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import NamedTuple, Protocol, runtime_checkable
|
||||
|
||||
|
||||
class RerankError(Exception):
|
||||
"""Raised on any provider-side rerank failure."""
|
||||
|
||||
|
||||
class RerankResult(NamedTuple):
|
||||
"""One scored document from a rerank call.
|
||||
|
||||
``index`` is the position of the document in the *input* list (so
|
||||
callers can map back to the original document text). ``score`` is
|
||||
provider-defined; higher = more relevant.
|
||||
"""
|
||||
|
||||
index: int
|
||||
score: float
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class RerankProvider(Protocol):
|
||||
"""Async rerank provider contract."""
|
||||
|
||||
async def rerank(
|
||||
self,
|
||||
query: str,
|
||||
documents: Sequence[str],
|
||||
*,
|
||||
instruction: str | None = None,
|
||||
) -> list[RerankResult]:
|
||||
"""Score and re-order ``documents`` against ``query``.
|
||||
|
||||
Args:
|
||||
query: The search query.
|
||||
documents: Passage texts to score against ``query``.
|
||||
instruction: Task instruction for instruction-tuned rerankers
|
||||
(e.g. Qwen3-Reranker). Providers that wrap the model's chat
|
||||
template fold this into the prompt; providers backed by a
|
||||
dedicated rerank endpoint that handles templating server-side
|
||||
may ignore it. ``None`` defers to the provider's default.
|
||||
|
||||
Returns:
|
||||
One :class:`RerankResult` per input document, sorted by
|
||||
``score`` descending. The returned list length equals
|
||||
``len(documents)``.
|
||||
"""
|
||||
|
||||
...
|
||||
173
src/everos/component/rerank/vllm_provider.py
Normal file
173
src/everos/component/rerank/vllm_provider.py
Normal file
@ -0,0 +1,173 @@
|
||||
"""vLLM rerank provider.
|
||||
|
||||
Self-deployed vLLM (and other OpenAI-compatible rerank servers) expose
|
||||
the OpenAI-style rerank endpoint::
|
||||
|
||||
POST {base_url}/rerank
|
||||
Authorization: Bearer <api_key> # optional for self-hosted ("EMPTY")
|
||||
Content-Type: application/json
|
||||
|
||||
Request body:
|
||||
|
||||
{
|
||||
"model": "<model>",
|
||||
"query": "<query>",
|
||||
"documents": ["<doc 1>", "<doc 2>", ...]
|
||||
}
|
||||
|
||||
Response body:
|
||||
|
||||
{
|
||||
"results": [
|
||||
{"index": 0, "relevance_score": 0.87},
|
||||
{"index": 1, "relevance_score": 0.43},
|
||||
...
|
||||
],
|
||||
"id": "...",
|
||||
...
|
||||
}
|
||||
|
||||
We pass documents through as-is — caller is responsible for any
|
||||
prompt-template formatting required by the underlying reranker. Output
|
||||
ordering may already be score-descending; we sort defensively to honour
|
||||
the :class:`RerankProvider` contract regardless of server behaviour.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from .protocol import RerankError, RerankResult
|
||||
|
||||
|
||||
class VllmRerankProvider:
|
||||
"""Rerank provider for vLLM / OpenAI-compat ``/v1/rerank`` endpoints.
|
||||
|
||||
Args:
|
||||
model: Reranker model id (e.g. ``"Qwen/Qwen3-Reranker-4B"``).
|
||||
api_key: Bearer credential. Pass ``""`` (empty string) for
|
||||
self-hosted endpoints that don't require auth — the
|
||||
``Authorization`` header is omitted in that case.
|
||||
base_url: API root that *contains* the ``/v1`` prefix
|
||||
(e.g. ``"http://localhost:8000/v1"``). The ``/rerank``
|
||||
suffix is appended at request time.
|
||||
timeout: Per-request timeout, seconds.
|
||||
max_retries: Soft retry count on transport errors / 5xx.
|
||||
batch_size: Cap on documents per request.
|
||||
max_concurrent: Cap on in-flight requests across all batches.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
timeout: float = 30.0,
|
||||
max_retries: int = 3,
|
||||
batch_size: int = 10,
|
||||
max_concurrent: int = 5,
|
||||
) -> None:
|
||||
self._model = model
|
||||
self._api_key = api_key
|
||||
self._url = f"{base_url.rstrip('/')}/rerank"
|
||||
self._timeout = timeout
|
||||
self._max_retries = max_retries
|
||||
self._batch_size = batch_size
|
||||
self._semaphore = asyncio.Semaphore(max_concurrent)
|
||||
|
||||
async def rerank(
|
||||
self,
|
||||
query: str,
|
||||
documents: Sequence[str],
|
||||
*,
|
||||
instruction: str | None = None,
|
||||
) -> list[RerankResult]:
|
||||
"""Score every document against ``query``; return sorted desc.
|
||||
|
||||
``instruction`` is accepted for protocol parity but not transmitted:
|
||||
the OpenAI-compatible ``/rerank`` endpoint applies the reranker's chat
|
||||
template (including any task instruction) server-side, so unlike the
|
||||
DeepInfra completion-style API there is no client-side template to fill.
|
||||
"""
|
||||
if not documents:
|
||||
return []
|
||||
|
||||
chunks: list[tuple[int, list[str]]] = [
|
||||
(offset, list(documents[offset : offset + self._batch_size]))
|
||||
for offset in range(0, len(documents), self._batch_size)
|
||||
]
|
||||
chunk_results = await asyncio.gather(
|
||||
*(self._score_chunk(query, docs) for _, docs in chunks)
|
||||
)
|
||||
scored: list[RerankResult] = []
|
||||
for (offset, _), partial in zip(chunks, chunk_results, strict=True):
|
||||
scored.extend(
|
||||
RerankResult(index=offset + r.index, score=r.score) for r in partial
|
||||
)
|
||||
scored.sort(key=lambda r: r.score, reverse=True)
|
||||
return scored
|
||||
|
||||
async def _score_chunk(
|
||||
self, query: str, documents: list[str]
|
||||
) -> list[RerankResult]:
|
||||
payload: dict[str, Any] = {
|
||||
"model": self._model,
|
||||
"query": query,
|
||||
"documents": documents,
|
||||
}
|
||||
headers: dict[str, str] = {"Content-Type": "application/json"}
|
||||
if self._api_key:
|
||||
headers["Authorization"] = f"Bearer {self._api_key}"
|
||||
|
||||
async with self._semaphore:
|
||||
for attempt in range(self._max_retries + 1):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
||||
response = await client.post(
|
||||
self._url, json=payload, headers=headers
|
||||
)
|
||||
except httpx.HTTPError as exc:
|
||||
if attempt == self._max_retries:
|
||||
raise RerankError(
|
||||
f"vLLM rerank transport failure: {exc}"
|
||||
) from exc
|
||||
continue
|
||||
|
||||
if response.status_code == 200:
|
||||
return _parse_rerank_results(response.json())
|
||||
|
||||
if response.status_code >= 500 or response.status_code == 429:
|
||||
if attempt == self._max_retries:
|
||||
raise RerankError(
|
||||
f"vLLM rerank HTTP {response.status_code}: "
|
||||
f"{response.text[:200]}"
|
||||
)
|
||||
continue
|
||||
raise RerankError(
|
||||
f"vLLM rerank HTTP {response.status_code}: {response.text[:200]}"
|
||||
)
|
||||
|
||||
raise RerankError(f"vLLM rerank exhausted retries ({self._max_retries})")
|
||||
|
||||
|
||||
def _parse_rerank_results(body: dict[str, Any]) -> list[RerankResult]:
|
||||
items = body.get("results")
|
||||
if not isinstance(items, list):
|
||||
raise RerankError(f"vLLM rerank response missing results: {body!r}")
|
||||
parsed: list[RerankResult] = []
|
||||
for item in items:
|
||||
try:
|
||||
parsed.append(
|
||||
RerankResult(
|
||||
index=int(item["index"]),
|
||||
score=float(item["relevance_score"]),
|
||||
)
|
||||
)
|
||||
except (KeyError, TypeError, ValueError) as exc:
|
||||
raise RerankError(f"malformed rerank result entry: {item!r}") from exc
|
||||
return parsed
|
||||
24
src/everos/component/tokenizer/__init__.py
Normal file
24
src/everos/component/tokenizer/__init__.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""Tokenizer provider — sync app-layer tokenisation for BM25 indexing.
|
||||
|
||||
Public surface:
|
||||
|
||||
- :class:`Tokenizer` — Protocol every provider satisfies.
|
||||
- :class:`JiebaTokenizer` — default jieba-backed implementation.
|
||||
- :func:`build_tokenizer` — factory returning the configured tokenizer.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.component.tokenizer import build_tokenizer
|
||||
tk = build_tokenizer()
|
||||
tokens = tk.tokenize("hello 世界") # ['hello', '世界']
|
||||
"""
|
||||
|
||||
from .factory import build_tokenizer as build_tokenizer
|
||||
from .jieba_provider import JiebaTokenizer as JiebaTokenizer
|
||||
from .protocol import Tokenizer as Tokenizer
|
||||
|
||||
__all__ = [
|
||||
"JiebaTokenizer",
|
||||
"Tokenizer",
|
||||
"build_tokenizer",
|
||||
]
|
||||
17
src/everos/component/tokenizer/factory.py
Normal file
17
src/everos/component/tokenizer/factory.py
Normal file
@ -0,0 +1,17 @@
|
||||
"""Factory for the cascade-time tokenizer.
|
||||
|
||||
Single implementation today (``JiebaTokenizer``). Lifting this into a
|
||||
factory keeps callers (cascade handler) decoupled from the concrete
|
||||
choice, so swapping to char-bigram / hf tokenizer later is a one-file
|
||||
change — see ``17_lancedb_tables_design.md`` §2.4.1.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .jieba_provider import JiebaTokenizer
|
||||
from .protocol import Tokenizer
|
||||
|
||||
|
||||
def build_tokenizer() -> Tokenizer:
|
||||
"""Build the default tokenizer (``JiebaTokenizer``)."""
|
||||
return JiebaTokenizer()
|
||||
141
src/everos/component/tokenizer/jieba_provider.py
Normal file
141
src/everos/component/tokenizer/jieba_provider.py
Normal file
@ -0,0 +1,141 @@
|
||||
"""Jieba-based tokenizer — covers CJK + English mixed content.
|
||||
|
||||
Uses ``jieba.cut_for_search`` (search-mode segmentation: yields both the
|
||||
greedy max-match segment and its finer sub-segments for compound CJK
|
||||
words). Same mode as the legacy enterprise keyword-search path uses on
|
||||
the query side — keeping cascade write and search query symmetric is
|
||||
the hard contract for BM25 recall to work.
|
||||
|
||||
After segmentation we drop:
|
||||
|
||||
* whitespace / empty tokens (so the join-on-space output stays clean),
|
||||
* tokens shorter than ``min_token_length`` (default 2 — same threshold
|
||||
enterprise's ``filter_stopwords(min_length=2)`` uses; single-char
|
||||
fragments mostly hurt BM25 precision),
|
||||
* tokens in a small bilingual stopword set (Chinese function words +
|
||||
English articles / prepositions / aux verbs).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Final
|
||||
|
||||
import jieba
|
||||
|
||||
# Small bilingual stopword set. Intentionally tight (not a full
|
||||
# Chinese stopword list) so the behaviour is predictable; callers
|
||||
# tuning recall can subclass / extend.
|
||||
_DEFAULT_STOPWORDS: Final[frozenset[str]] = frozenset(
|
||||
{
|
||||
# English — articles / prepositions / aux verbs that dominate BM25
|
||||
# idf-noise but add no recall value.
|
||||
"the",
|
||||
"a",
|
||||
"an",
|
||||
"and",
|
||||
"or",
|
||||
"but",
|
||||
"if",
|
||||
"of",
|
||||
"to",
|
||||
"in",
|
||||
"on",
|
||||
"at",
|
||||
"by",
|
||||
"for",
|
||||
"with",
|
||||
"as",
|
||||
"is",
|
||||
"are",
|
||||
"was",
|
||||
"were",
|
||||
"be",
|
||||
"been",
|
||||
"being",
|
||||
"do",
|
||||
"does",
|
||||
"did",
|
||||
"has",
|
||||
"have",
|
||||
"had",
|
||||
"this",
|
||||
"that",
|
||||
"these",
|
||||
"those",
|
||||
"it",
|
||||
"its",
|
||||
# Chinese — function words / particles. ``cut_for_search`` emits
|
||||
# these as single-char tokens anyway, and the min_length=2 floor
|
||||
# would drop most; listing them explicitly makes the intent clear
|
||||
# and is a no-op when min_length filtering also kicks in.
|
||||
"的",
|
||||
"了",
|
||||
"和",
|
||||
"是",
|
||||
"在",
|
||||
"我",
|
||||
"你",
|
||||
"他",
|
||||
"她",
|
||||
"它",
|
||||
"也",
|
||||
"都",
|
||||
"就",
|
||||
"还",
|
||||
"或",
|
||||
"及",
|
||||
"与",
|
||||
"对",
|
||||
"把",
|
||||
"被",
|
||||
"有",
|
||||
"没",
|
||||
"不",
|
||||
"啊",
|
||||
"吗",
|
||||
"呢",
|
||||
"吧",
|
||||
"哦",
|
||||
}
|
||||
)
|
||||
|
||||
_DEFAULT_MIN_TOKEN_LENGTH: Final[int] = 2
|
||||
|
||||
|
||||
class JiebaTokenizer:
|
||||
"""Tokenizer that calls into ``jieba.cut_for_search`` and filters."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
min_token_length: int = _DEFAULT_MIN_TOKEN_LENGTH,
|
||||
extra_stopwords: frozenset[str] | None = None,
|
||||
) -> None:
|
||||
# Touching ``jieba.initialize()`` here would force eager dict load
|
||||
# at import time and balloon test-collection latency. ``jieba.cut*``
|
||||
# lazy-loads on first call instead.
|
||||
self._min_len = min_token_length
|
||||
self._stopwords = (
|
||||
_DEFAULT_STOPWORDS | extra_stopwords
|
||||
if extra_stopwords
|
||||
else _DEFAULT_STOPWORDS
|
||||
)
|
||||
|
||||
def tokenize(self, text: str) -> list[str]:
|
||||
if not text:
|
||||
return []
|
||||
out: list[str] = []
|
||||
for raw in jieba.cut_for_search(text):
|
||||
tok = raw.strip().lower()
|
||||
if not tok or tok.isspace():
|
||||
continue
|
||||
if len(tok) < self._min_len:
|
||||
continue
|
||||
if tok in self._stopwords:
|
||||
continue
|
||||
out.append(tok)
|
||||
return out
|
||||
|
||||
def tokenize_batch(self, texts: Sequence[str]) -> list[list[str]]:
|
||||
return [self.tokenize(t) for t in texts]
|
||||
37
src/everos/component/tokenizer/protocol.py
Normal file
37
src/everos/component/tokenizer/protocol.py
Normal file
@ -0,0 +1,37 @@
|
||||
"""Tokenizer protocol.
|
||||
|
||||
App-layer tokenisation gates every BM25-indexed field in LanceDB
|
||||
(``17_lancedb_tables_design.md`` §2.4.1): the source surface form lives
|
||||
in ``<field>`` while the space-joined token stream lives in
|
||||
``<field>_tokens``, and the FTS index reads only the latter using a
|
||||
whitespace tokenizer. Keeping the tokenizer decision in the app layer
|
||||
means it can swap (jieba → unigram → hf) without re-indexing or
|
||||
touching LanceDB schemas.
|
||||
|
||||
The protocol is sync — every concrete tokenizer in scope today (jieba,
|
||||
char-bigram, regex word-split) is CPU-bound with no IO, so an async
|
||||
wrapper would just shuffle work onto the event loop. If a future GPU
|
||||
or remote tokenizer needs IO it should add an async method explicitly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Tokenizer(Protocol):
|
||||
"""Sync tokeniser contract used by the cascade handler."""
|
||||
|
||||
def tokenize(self, text: str) -> list[str]:
|
||||
"""Return the ordered token list for ``text``.
|
||||
|
||||
Implementations must drop empty / whitespace-only tokens so the
|
||||
resulting space-joined string never carries adjacent spaces.
|
||||
"""
|
||||
...
|
||||
|
||||
def tokenize_batch(self, texts: Sequence[str]) -> list[list[str]]:
|
||||
"""Tokenise many strings, preserving input order."""
|
||||
...
|
||||
22
src/everos/component/utils/__init__.py
Normal file
22
src/everos/component/utils/__init__.py
Normal file
@ -0,0 +1,22 @@
|
||||
"""Common utilities (datetime, tokenization, etc.).
|
||||
|
||||
Public API:
|
||||
from everos.component.utils.datetime import (
|
||||
UtcDatetime,
|
||||
ensure_utc,
|
||||
from_iso_format,
|
||||
from_timestamp,
|
||||
get_now_with_timezone,
|
||||
get_utc_now,
|
||||
to_date_str,
|
||||
to_display_tz,
|
||||
to_iso_format,
|
||||
to_timestamp_ms,
|
||||
today_with_timezone,
|
||||
)
|
||||
from everos.component.utils.tokenize import (
|
||||
tokens_for_index,
|
||||
tokens_for_query,
|
||||
join_tokens,
|
||||
)
|
||||
"""
|
||||
263
src/everos/component/utils/datetime.py
Normal file
263
src/everos/component/utils/datetime.py
Normal file
@ -0,0 +1,263 @@
|
||||
"""Timezone-aware datetime helpers.
|
||||
|
||||
EverOS follows a **two-zone discipline**:
|
||||
|
||||
* **Storage** (SQLite + LanceDB) is always UTC. Use :func:`get_utc_now`
|
||||
for any ``default_factory`` / write-path timestamp; if you accept a
|
||||
``datetime`` from a caller, normalise with :func:`ensure_utc` before
|
||||
it crosses the persistence boundary.
|
||||
* **Display** (markdown frontmatter, HTTP API response, date buckets for
|
||||
daily-log filenames) uses the configured "display timezone" from
|
||||
:attr:`everos.config.MemorySettings.timezone` (``EVEROS_MEMORY__TIMEZONE``).
|
||||
Use :func:`get_now_with_timezone` / :func:`today_with_timezone` /
|
||||
:func:`to_display_tz` here.
|
||||
|
||||
The display timezone also serves as the **fallback timezone for naive
|
||||
input**: if a caller hands us a string / datetime without offset (e.g.
|
||||
a hand-written ISO timestamp), :func:`from_iso_format` attaches the
|
||||
display timezone before further processing — that matches a human's
|
||||
intuition ("if I didn't say a zone, you should assume my zone").
|
||||
|
||||
Never call :func:`datetime.datetime.now` /
|
||||
:func:`datetime.datetime.utcnow` directly — see
|
||||
:doc:`.claude/rules/datetime-handling`.
|
||||
|
||||
Cache invalidation in tests::
|
||||
|
||||
load_settings.cache_clear()
|
||||
_display_tz.cache_clear()
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from functools import cache
|
||||
from typing import Annotated
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from pydantic import AfterValidator
|
||||
|
||||
_MS_THRESHOLD = 1e12 # ts >= this is treated as milliseconds
|
||||
|
||||
|
||||
@cache
|
||||
def _display_tz() -> _dt.tzinfo:
|
||||
"""Resolve the configured **display timezone** (cached).
|
||||
|
||||
Reads :attr:`everos.config.MemorySettings.timezone`; that field
|
||||
validates the name with :class:`zoneinfo.ZoneInfo` at load time, so
|
||||
by the time we reach here the value is guaranteed valid. This
|
||||
timezone governs:
|
||||
|
||||
1. ISO output rendered in markdown / API responses.
|
||||
2. The fallback zone attached to naive-input datetimes.
|
||||
|
||||
It does **not** govern storage — see :func:`get_utc_now`.
|
||||
"""
|
||||
# Lazy import to avoid pulling in pydantic-settings at module load.
|
||||
from everos.config import load_settings
|
||||
|
||||
return ZoneInfo(load_settings().memory.timezone)
|
||||
|
||||
|
||||
def get_utc_now() -> _dt.datetime:
|
||||
"""Return the current time as a UTC-aware datetime.
|
||||
|
||||
Use for any **storage** write-path (SQLite ``default_factory``,
|
||||
LanceDB row construction, OME event ``ts``, any internal "when
|
||||
did this happen" record). Independent of the display timezone — a
|
||||
new deployment that switches ``EVEROS_MEMORY__TIMEZONE`` will not
|
||||
misalign existing rows.
|
||||
|
||||
Display-side code should use :func:`get_now_with_timezone` instead,
|
||||
or render via :func:`to_display_tz`.
|
||||
"""
|
||||
return _dt.datetime.now(tz=_dt.UTC)
|
||||
|
||||
|
||||
def get_now_with_timezone() -> _dt.datetime:
|
||||
"""Return the current time in the **display timezone** (configured).
|
||||
|
||||
Use for **display** write-paths only — markdown frontmatter values,
|
||||
daily-log date buckets, places where a human will see the literal
|
||||
string. The returned datetime carries the display timezone offset
|
||||
so ``.isoformat()`` produces something like
|
||||
``2026-05-29T14:00:00+08:00``.
|
||||
|
||||
For storage / internal "when did this happen" timestamps use
|
||||
:func:`get_utc_now` instead — display timezone must not bleed into
|
||||
persisted rows.
|
||||
"""
|
||||
return _dt.datetime.now(tz=_display_tz())
|
||||
|
||||
|
||||
def today_with_timezone() -> _dt.date:
|
||||
"""Return today's date in the **display timezone**.
|
||||
|
||||
Use this anywhere a *date bucket* is needed (e.g. daily-log file
|
||||
boundaries) — it normalises ``get_now_with_timezone().date()`` so
|
||||
the timezone fallback rules are applied consistently.
|
||||
"""
|
||||
return get_now_with_timezone().date()
|
||||
|
||||
|
||||
def ensure_utc(d: _dt.datetime | None) -> _dt.datetime | None:
|
||||
"""Normalise any datetime to UTC at the **storage boundary**.
|
||||
|
||||
Semantics:
|
||||
|
||||
* ``None`` → ``None`` (nullable-column convenience: lets callers
|
||||
pipe ``ensure_utc(row.last_attempt_at)`` without an outer guard).
|
||||
* Aware input → ``astimezone(UTC)``.
|
||||
* **Naive input → assume UTC** (attach ``tzinfo=UTC``); no
|
||||
display-tz fallback.
|
||||
|
||||
Why naive→UTC rather than naive→display→UTC? Every caller of this
|
||||
function sits at the storage boundary, and the dominant naive
|
||||
source is SQLite reads: SQLAlchemy strips tz on write so what
|
||||
comes back is a naive value whose bytes are UTC. Treating those
|
||||
naive reads as display-tz would drift by the configured offset on
|
||||
every round trip — exactly the bug Q2 prevents.
|
||||
|
||||
Caller-supplied datetimes that may genuinely be naive in display
|
||||
tz (e.g. ISO strings from HTTP request bodies that omitted the
|
||||
offset) should be funnelled through :func:`from_iso_format` first,
|
||||
which encodes the "if you didn't say a zone, assume your zone"
|
||||
rule. The aware result then passes through ``ensure_utc`` as a
|
||||
pure ``astimezone(UTC)``.
|
||||
|
||||
Use the :data:`UtcDatetime` ``Annotated`` type to apply this
|
||||
automatically on Pydantic model fields.
|
||||
"""
|
||||
if d is None:
|
||||
return None
|
||||
if d.tzinfo is None:
|
||||
return d.replace(tzinfo=_dt.UTC)
|
||||
return d.astimezone(_dt.UTC)
|
||||
|
||||
|
||||
def to_display_tz(d: _dt.datetime | None) -> _dt.datetime | None:
|
||||
"""Convert a datetime to the **display timezone** (configured).
|
||||
|
||||
Used at the **response render boundary**: any datetime leaving the
|
||||
system through an API response or markdown body passes through
|
||||
here so the user sees their wall-clock time with the matching
|
||||
``+HH:MM`` offset.
|
||||
|
||||
* ``None`` → ``None`` (nullable-column convenience).
|
||||
* Naive input is treated as already display-tz local (the fallback
|
||||
rule) — attach the zone and return as-is.
|
||||
* Aware input is ``astimezone(...)``-d to the display tz.
|
||||
"""
|
||||
if d is None:
|
||||
return None
|
||||
if d.tzinfo is None:
|
||||
return d.replace(tzinfo=_display_tz())
|
||||
return d.astimezone(_display_tz())
|
||||
|
||||
|
||||
UtcDatetime = Annotated[_dt.datetime, AfterValidator(ensure_utc)]
|
||||
"""Pydantic-friendly ``datetime`` type that normalises to UTC.
|
||||
|
||||
Apply to any SQLModel / Pydantic ``datetime`` field that maps to a
|
||||
storage column. Both INSERT default values and post-read values pass
|
||||
through :func:`ensure_utc`, so SQLite's tz-stripping behaviour is
|
||||
neutralised: rows go in as UTC and come out as UTC-aware.
|
||||
|
||||
Usage::
|
||||
|
||||
from everos.component.utils.datetime import UtcDatetime, get_utc_now
|
||||
|
||||
class MyRow(BaseTable, table=True):
|
||||
happened_at: UtcDatetime = Field(default_factory=get_utc_now)
|
||||
"""
|
||||
|
||||
|
||||
def from_timestamp(ts: int | float) -> _dt.datetime:
|
||||
"""Parse a Unix timestamp into a timezone-aware datetime.
|
||||
|
||||
Auto-detects seconds vs milliseconds: values ``>= 1e12`` are treated as
|
||||
milliseconds. Returned datetime is in the default timezone.
|
||||
"""
|
||||
seconds = ts / 1000.0 if ts >= _MS_THRESHOLD else float(ts)
|
||||
return _dt.datetime.fromtimestamp(seconds, tz=_display_tz())
|
||||
|
||||
|
||||
def from_iso_format(value: _dt.datetime | int | float | str) -> _dt.datetime:
|
||||
"""Parse a value into a timezone-aware datetime (strict).
|
||||
|
||||
Accepted inputs:
|
||||
* ``datetime`` — naive values get the default timezone attached.
|
||||
* ``int`` / ``float`` — Unix timestamp (auto-detect seconds vs ms).
|
||||
* ``str`` — ISO-8601, including ``"Z"`` suffix for UTC.
|
||||
|
||||
Raises:
|
||||
TypeError: On unsupported input type.
|
||||
ValueError: On malformed string / negative timestamp.
|
||||
"""
|
||||
if isinstance(value, _dt.datetime):
|
||||
if value.tzinfo is None:
|
||||
return value.replace(tzinfo=_display_tz())
|
||||
return value
|
||||
if isinstance(value, bool): # bool is an int subclass — reject explicitly
|
||||
raise TypeError("from_iso_format does not accept bool")
|
||||
if isinstance(value, int | float):
|
||||
return from_timestamp(value)
|
||||
if isinstance(value, str):
|
||||
s = value.strip()
|
||||
# Python's fromisoformat accepts "+HH:MM" but not the "Z" suffix; map it.
|
||||
if s.endswith("Z"):
|
||||
s = s[:-1] + "+00:00"
|
||||
parsed = _dt.datetime.fromisoformat(s)
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=_display_tz())
|
||||
return parsed
|
||||
raise TypeError(
|
||||
f"from_iso_format: unsupported type {type(value).__name__}; "
|
||||
"expected datetime / int / float / str"
|
||||
)
|
||||
|
||||
|
||||
def to_iso_format(
|
||||
value: _dt.datetime | int | float | str | None,
|
||||
) -> str | None:
|
||||
"""Render a value as an ISO-8601 string (timezone-aware).
|
||||
|
||||
Accepted inputs:
|
||||
* ``None`` — returns ``None`` (nullable column convenience).
|
||||
* ``datetime`` — rendered as-is (must already be tz-aware).
|
||||
* ``int`` / ``float`` — interpreted via :func:`from_timestamp`.
|
||||
* ``str`` — re-validated through :func:`from_iso_format`.
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, _dt.datetime):
|
||||
return value.isoformat()
|
||||
if isinstance(value, bool): # bool is an int subclass
|
||||
raise TypeError("to_iso_format does not accept bool")
|
||||
if isinstance(value, int | float):
|
||||
return from_timestamp(value).isoformat()
|
||||
if isinstance(value, str):
|
||||
if not value:
|
||||
return None
|
||||
return from_iso_format(value).isoformat()
|
||||
raise TypeError(
|
||||
f"to_iso_format: unsupported type {type(value).__name__}; "
|
||||
"expected datetime / int / float / str / None"
|
||||
)
|
||||
|
||||
|
||||
def to_date_str(d: _dt.datetime | None) -> str | None:
|
||||
"""Render the date portion of a datetime as ``YYYY-MM-DD``.
|
||||
|
||||
Accepts ``None`` for nullable database columns. When the input is
|
||||
already a :class:`datetime.date`, call ``d.isoformat()`` directly.
|
||||
"""
|
||||
if d is None:
|
||||
return None
|
||||
return d.date().isoformat()
|
||||
|
||||
|
||||
def to_timestamp_ms(d: _dt.datetime) -> int:
|
||||
"""Convert a datetime to a Unix timestamp in milliseconds."""
|
||||
return int(d.timestamp() * 1000)
|
||||
37
src/everos/config/__init__.py
Normal file
37
src/everos/config/__init__.py
Normal file
@ -0,0 +1,37 @@
|
||||
"""Configuration data and Settings schema.
|
||||
|
||||
Public API:
|
||||
from everos.config import (
|
||||
Settings, MemorySettings, SqliteSettings, LanceDBSettings,
|
||||
LLMSettings, EmbeddingSettings, RerankSettings,
|
||||
BoundaryDetectionSettings,
|
||||
load_settings,
|
||||
)
|
||||
|
||||
Distinct from ``everos.component.config`` (which is a *capability* —
|
||||
loader / merger / env reader).
|
||||
"""
|
||||
|
||||
from .settings import BoundaryDetectionSettings as BoundaryDetectionSettings
|
||||
from .settings import EmbeddingSettings as EmbeddingSettings
|
||||
from .settings import LanceDBSettings as LanceDBSettings
|
||||
from .settings import LLMSettings as LLMSettings
|
||||
from .settings import MemorySettings as MemorySettings
|
||||
from .settings import MultimodalSettings as MultimodalSettings
|
||||
from .settings import RerankSettings as RerankSettings
|
||||
from .settings import Settings as Settings
|
||||
from .settings import SqliteSettings as SqliteSettings
|
||||
from .settings import load_settings as load_settings
|
||||
|
||||
__all__ = [
|
||||
"BoundaryDetectionSettings",
|
||||
"EmbeddingSettings",
|
||||
"LLMSettings",
|
||||
"LanceDBSettings",
|
||||
"MemorySettings",
|
||||
"MultimodalSettings",
|
||||
"RerankSettings",
|
||||
"Settings",
|
||||
"SqliteSettings",
|
||||
"load_settings",
|
||||
]
|
||||
137
src/everos/config/default.toml
Normal file
137
src/everos/config/default.toml
Normal file
@ -0,0 +1,137 @@
|
||||
# everos default configuration.
|
||||
#
|
||||
# Lookup order (later overrides earlier):
|
||||
# 1. This file (shipped defaults; lowest priority)
|
||||
# 2. ~/.everos/config.toml — user-level overrides (optional;
|
||||
# path is overridable via EVEROS_CONFIG_FILE)
|
||||
# 3. .env file in the working directory
|
||||
# 4. Environment variables — EVEROS_<SECTION>__<KEY>
|
||||
# e.g. EVEROS_SQLITE__BUSY_TIMEOUT_MS=10000
|
||||
# 5. Programmatic init args (highest priority)
|
||||
#
|
||||
# `null` (omitted in TOML) means "use the Pydantic default declared in code".
|
||||
|
||||
[memory]
|
||||
# memory-root is the single directory holding all persisted memory.
|
||||
# `~` is expanded; the path is resolved when MemoryRoot is constructed.
|
||||
root = "~/.everos"
|
||||
# Effective timezone for date buckets and timestamps. Drives
|
||||
# component.utils.datetime; this is the SOLE source — OS `TZ` is not
|
||||
# read. Override via `EVEROS_MEMORY__TIMEZONE` env var if needed.
|
||||
timezone = "UTC"
|
||||
|
||||
[api]
|
||||
# HTTP server bind for ``everos server start``. Default ``127.0.0.1``
|
||||
# keeps the API on loopback only — EverOS ships no built-in auth (see
|
||||
# SECURITY.md threat model). Only set ``host = "0.0.0.0"`` after you
|
||||
# have placed your own gateway / auth layer in front of the server.
|
||||
# Override via EVEROS_API__HOST and EVEROS_API__PORT.
|
||||
host = "127.0.0.1"
|
||||
port = 8000
|
||||
|
||||
[sqlite]
|
||||
# PRAGMA journal_mode — WAL is the recommended high-concurrency mode.
|
||||
journal_mode = "WAL"
|
||||
# PRAGMA synchronous — NORMAL is safe under WAL and ~2x faster than FULL.
|
||||
synchronous = "NORMAL"
|
||||
# PRAGMA foreign_keys — must be explicitly enabled per connection.
|
||||
foreign_keys = true
|
||||
# PRAGMA temp_store — MEMORY keeps query intermediates in RAM (no IO impact
|
||||
# on durability — only affects sort/group/temp-table calculation buffers).
|
||||
temp_store = "MEMORY"
|
||||
# PRAGMA busy_timeout — milliseconds to wait on a locked DB before erroring.
|
||||
busy_timeout_ms = 5000
|
||||
# PRAGMA journal_size_limit — cap WAL/journal at ~64 MB.
|
||||
journal_size_limit_bytes = 67108864
|
||||
# PRAGMA cache_size — KB of page cache (per connection).
|
||||
cache_size_kb = 2048
|
||||
|
||||
[lancedb]
|
||||
# Read consistency interval in seconds.
|
||||
# omitted / null -> no consistency check (fastest reads)
|
||||
# 0 -> strict (every read checks updates)
|
||||
# >0 -> eventual (interval seconds between checks)
|
||||
# Uncomment to override:
|
||||
# read_consistency_seconds = 5.0
|
||||
|
||||
[llm]
|
||||
# Provider-agnostic OpenAI-protocol client config. Override via env:
|
||||
# EVEROS_LLM__MODEL, EVEROS_LLM__API_KEY, EVEROS_LLM__BASE_URL
|
||||
# Or via a ``.env`` file next to the project root (auto-loaded).
|
||||
model = "gpt-4o-mini"
|
||||
# api_key = ""
|
||||
# base_url = ""
|
||||
|
||||
[multimodal]
|
||||
# Independent LLM for multimodal parsing (everalgo-parser); must accept
|
||||
# image / pdf / audio image_url parts. Override via env:
|
||||
# EVEROS_MULTIMODAL__MODEL, EVEROS_MULTIMODAL__API_KEY, EVEROS_MULTIMODAL__BASE_URL
|
||||
model = "google/gemini-3-flash-preview"
|
||||
max_concurrency = 4
|
||||
# api_key = ""
|
||||
# base_url = ""
|
||||
# file:// content-item support (read locally by EverOS, not everalgo).
|
||||
# file_uri_allow_dirs: empty = allow any readable file (local-first default);
|
||||
# list base dirs to confine reads when the API is exposed.
|
||||
# file_uri_allow_dirs = ["/srv/uploads"]
|
||||
# file_uri_max_bytes = 52428800 # 50 MiB cap per file:// asset
|
||||
|
||||
[embedding]
|
||||
# OpenAI-compatible embedding endpoint. Override via env:
|
||||
# EVEROS_EMBEDDING__MODEL, EVEROS_EMBEDDING__API_KEY, EVEROS_EMBEDDING__BASE_URL
|
||||
# model / api_key / base_url have no shipped defaults — must be set
|
||||
# (env or user toml) before the embedding capability is used.
|
||||
# model = "Qwen/Qwen3-Embedding-4B"
|
||||
# api_key = ""
|
||||
# base_url = "https://api.example.com/v1"
|
||||
timeout_seconds = 30.0
|
||||
max_retries = 3
|
||||
batch_size = 10
|
||||
max_concurrent = 5
|
||||
|
||||
[rerank]
|
||||
# Rerank provider. Override via env:
|
||||
# EVEROS_RERANK__PROVIDER, EVEROS_RERANK__MODEL, EVEROS_RERANK__API_KEY,
|
||||
# EVEROS_RERANK__BASE_URL
|
||||
# `provider` picks the request-shape:
|
||||
# - "deepinfra" -> POST {base_url}/{model} (DeepInfra inference API)
|
||||
# - "vllm" -> POST {base_url}/rerank (OpenAI-compat rerank endpoint)
|
||||
provider = "deepinfra"
|
||||
# model = "Qwen/Qwen3-Reranker-4B"
|
||||
# api_key = ""
|
||||
# base_url = "https://api.deepinfra.com/v1/inference"
|
||||
timeout_seconds = 30.0
|
||||
max_retries = 3
|
||||
batch_size = 10
|
||||
max_concurrent = 5
|
||||
|
||||
[boundary_detection]
|
||||
# Passed through to ``everalgo.BoundaryDetector.adetect``.
|
||||
hard_token_limit = 65536
|
||||
hard_msg_limit = 500
|
||||
|
||||
[search]
|
||||
# Vector retrieval strategy when SearchMethod.VECTOR is selected.
|
||||
# "maxsim_atomic" (default): ANN over atomic_fact.vector (pool=top_k*20),
|
||||
# max-pool the per-fact cosine by parent memcell, then reverse-resolve
|
||||
# to episode rows. MaxSim over atomic facts; +0.6pp over the legacy
|
||||
# episode-vector path on LoCoMo, at the cost of one extra LanceDB scan.
|
||||
# "episode": single-vector ANN over episode.vector (legacy path).
|
||||
# Override via EVEROS_SEARCH__VECTOR_STRATEGY.
|
||||
vector_strategy = "maxsim_atomic"
|
||||
|
||||
[memorize]
|
||||
# Conversation mode. Selects the boundary detector and which pipelines run:
|
||||
# "chat" -> BoundaryDetector + user_memory only
|
||||
# "agent" -> AgentBoundaryDetector + user_memory + agent_memory
|
||||
# A single service process serves one mode at a time; switching mode
|
||||
# requires a restart. Override via EVEROS_MEMORIZE__MODE.
|
||||
mode = "agent"
|
||||
|
||||
# Maximum wall-clock for one memorize() invocation while holding the
|
||||
# per-session lock. On timeout the outer asyncio.timeout cancels the call
|
||||
# and the lock auto-releases so subsequent concurrent /add on the same
|
||||
# session aren't deadlocked. Covers boundary LLM + memcell writes +
|
||||
# synchronous portion of pipeline dispatch.
|
||||
# Override via EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS.
|
||||
session_lock_timeout_seconds = 360.0
|
||||
64
src/everos/config/default_ome.toml
Normal file
64
src/everos/config/default_ome.toml
Normal file
@ -0,0 +1,64 @@
|
||||
# everos OME (Offline Memory Engine) — per-strategy overrides.
|
||||
#
|
||||
# This file is materialised at ``<memory-root>/ome.toml`` by
|
||||
# ``MemoryRoot.ensure()`` on first server start. Edit it to toggle
|
||||
# individual strategies or tweak their gate / retry / cron without
|
||||
# restarting the server; the engine watches this file and hot-reloads
|
||||
# changes within ~2 seconds. Re-running ``ensure()`` will NOT overwrite
|
||||
# your edits — the file is only materialised when absent.
|
||||
#
|
||||
# Overrides are partial: only the keys you set replace the in-code
|
||||
# defaults; omitted keys keep each strategy's coded value. Unknown
|
||||
# keys (typos) raise StartupValidationError, so you cannot silently
|
||||
# misconfigure a strategy.
|
||||
#
|
||||
# Per-strategy schema (StrategyOverride):
|
||||
# enabled = bool # disable a strategy entirely
|
||||
# max_retries = int >= 0 # re-fire on failure
|
||||
# cron = str # replace the @cron(...) trigger
|
||||
# idle_seconds = int > 0 # replace @idle(...) idle window
|
||||
# scan_interval_seconds = int > 0 # paired with idle_seconds; must be <= idle/2
|
||||
# [strategies.<name>.gate] # only valid on @counter-gated strategies
|
||||
# threshold = int > 0 # counter trigger threshold
|
||||
# cooldown_seconds = int >= 0 # min seconds between fires
|
||||
# event_field = str # dispatch field for counter increment
|
||||
|
||||
# ── User-memory pipeline ────────────────────────────────────────────────
|
||||
|
||||
# Atomic fact extraction (runs per memcell). Disable to skip fact mining.
|
||||
# [strategies.extract_atomic_facts]
|
||||
# enabled = true
|
||||
|
||||
# Foresight extraction (runs per memcell). Heavy LLM call — common to
|
||||
# disable in evaluation / benchmark runs.
|
||||
# [strategies.extract_foresight]
|
||||
# enabled = false
|
||||
|
||||
# Profile clustering trigger (counter-gated; fires once N user memcells
|
||||
# accumulate). Lower the threshold to cluster more aggressively.
|
||||
# [strategies.trigger_profile_clustering]
|
||||
# enabled = true
|
||||
# [strategies.trigger_profile_clustering.gate]
|
||||
# threshold = 5
|
||||
|
||||
# User-profile extraction (runs after clustering trigger fires). Common
|
||||
# to disable in evaluation runs where ground-truth profiles aren't measured.
|
||||
# [strategies.extract_user_profile]
|
||||
# enabled = false
|
||||
|
||||
# ── Agent-memory pipeline ───────────────────────────────────────────────
|
||||
|
||||
# Agent case extraction (runs per agent memcell). One per tool call cycle.
|
||||
# [strategies.extract_agent_case]
|
||||
# enabled = true
|
||||
|
||||
# Skill clustering trigger (counter-gated; fires once N agent cases
|
||||
# accumulate per agent).
|
||||
# [strategies.trigger_skill_clustering]
|
||||
# enabled = true
|
||||
# [strategies.trigger_skill_clustering.gate]
|
||||
# threshold = 5
|
||||
|
||||
# Agent skill extraction (runs after skill clustering trigger fires).
|
||||
# [strategies.extract_agent_skill]
|
||||
# enabled = true
|
||||
0
src/everos/config/prompt_slots/.gitkeep
Normal file
0
src/everos/config/prompt_slots/.gitkeep
Normal file
0
src/everos/config/prompt_slots/__init__.py
Normal file
0
src/everos/config/prompt_slots/__init__.py
Normal file
20
src/everos/config/prompt_slots/boundary_detection.yaml
Normal file
20
src/everos/config/prompt_slots/boundary_detection.yaml
Normal file
@ -0,0 +1,20 @@
|
||||
# Custom prompt slot for BoundaryDetector.adetect.
|
||||
#
|
||||
# Default behaviour
|
||||
# Leave this slot disabled (``enabled: false``). The pipeline will pass
|
||||
# ``prompt=None`` through to algo, which falls back to the everalgo
|
||||
# bundled default prompt — see:
|
||||
# ~/everalgo/packages/everalgo-boundary/src/everalgo/boundary/prompts/en/chat.py
|
||||
# (constant ``CHAT_BOUNDARY_DETECT_PROMPT_EN``)
|
||||
#
|
||||
# To customise
|
||||
# 1. Read the algo default at the path above; note the required
|
||||
# placeholders ``{messages}`` and ``{token_count}``.
|
||||
# 2. Replace the ``template`` body below with your prompt.
|
||||
# 3. Flip ``enabled`` to ``true``.
|
||||
#
|
||||
# When ``enabled: false`` or ``template`` is empty, the pipeline sends
|
||||
# ``prompt=None`` and the algo default is used (zero override cost).
|
||||
|
||||
enabled: false
|
||||
template: ""
|
||||
23
src/everos/config/prompt_slots/episode_extract.yaml
Normal file
23
src/everos/config/prompt_slots/episode_extract.yaml
Normal file
@ -0,0 +1,23 @@
|
||||
# Custom prompt slot for EpisodeExtractor.aextract.
|
||||
#
|
||||
# Default behaviour
|
||||
# Leave this slot disabled (``enabled: false``). The pipeline will pass
|
||||
# ``prompt=None`` through to algo, which falls back to the everalgo
|
||||
# bundled default prompt — see:
|
||||
# everalgo/user_memory/prompts/en/episode.py
|
||||
# (the pipeline calls ``aextract`` with ``sender_id=None``, so the
|
||||
# whole-memcell ``EPISODE_GENERATION_PROMPT`` is used, not the
|
||||
# per-user ``USER_EPISODE_GENERATION_PROMPT``)
|
||||
#
|
||||
# To customise
|
||||
# 1. Read the algo default at the path above; note the required
|
||||
# placeholders ``{conversation_start_time}``, ``{conversation}`` and
|
||||
# ``{custom_instructions}``.
|
||||
# 2. Replace the ``template`` body below with your prompt.
|
||||
# 3. Flip ``enabled`` to ``true``.
|
||||
#
|
||||
# When ``enabled: false`` or ``template`` is empty, the pipeline sends
|
||||
# ``prompt=None`` and the algo default is used (zero override cost).
|
||||
|
||||
enabled: false
|
||||
template: ""
|
||||
403
src/everos/config/settings.py
Normal file
403
src/everos/config/settings.py
Normal file
@ -0,0 +1,403 @@
|
||||
"""Application settings.
|
||||
|
||||
Loaded by :func:`load_settings`. Source priority (later wins):
|
||||
|
||||
1. ``config/default.toml`` (shipped values; lowest priority)
|
||||
2. ``~/.everos/config.toml`` (user-level overrides; optional)
|
||||
3. ``.env`` file in the working directory (secrets / machine-specific)
|
||||
4. ``EVEROS_<SECTION>__<KEY>`` environment variables
|
||||
5. Init args passed programmatically (highest priority)
|
||||
|
||||
The user-level toml path defaults to ``~/.everos/config.toml``. Override
|
||||
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
|
||||
optional — if it does not exist, the source is silently skipped.
|
||||
|
||||
The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
|
||||
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.
|
||||
|
||||
``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
|
||||
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
|
||||
call. Tests that mutate environment variables must call
|
||||
``load_settings.cache_clear()`` after the mutation to invalidate.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from functools import cache
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr, field_validator
|
||||
from pydantic_settings import (
|
||||
BaseSettings,
|
||||
PydanticBaseSettingsSource,
|
||||
SettingsConfigDict,
|
||||
TomlConfigSettingsSource,
|
||||
)
|
||||
|
||||
_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
|
||||
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
|
||||
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()
|
||||
|
||||
|
||||
def _resolve_user_toml_path() -> Path:
|
||||
"""Resolve the user-level ``config.toml`` path.
|
||||
|
||||
Defaults to ``~/.everos/config.toml``; override with the
|
||||
``EVEROS_CONFIG_FILE`` environment variable.
|
||||
"""
|
||||
override = os.environ.get(_USER_TOML_ENV_VAR)
|
||||
return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH
|
||||
|
||||
|
||||
class MemorySettings(BaseModel):
|
||||
"""memory-root configuration."""
|
||||
|
||||
root: Path = Path("~/.everos")
|
||||
timezone: str = "UTC"
|
||||
"""Effective timezone for date buckets and timestamps.
|
||||
|
||||
Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
|
||||
TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
|
||||
:class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
|
||||
fast (no silent fallback). This is the **sole** source of truth for
|
||||
the project's effective timezone — the OS ``TZ`` env var is *not*
|
||||
consulted, keeping the configuration deterministic.
|
||||
"""
|
||||
|
||||
@field_validator("timezone")
|
||||
@classmethod
|
||||
def _validate_timezone(cls, v: str) -> str:
|
||||
try:
|
||||
ZoneInfo(v)
|
||||
except (ZoneInfoNotFoundError, ValueError) as exc:
|
||||
raise ValueError(f"invalid timezone: {v!r}") from exc
|
||||
return v
|
||||
|
||||
|
||||
class ApiSettings(BaseModel):
|
||||
"""HTTP API server bind configuration.
|
||||
|
||||
Default ``host = "127.0.0.1"`` keeps the server on loopback only,
|
||||
matching the threat model in ``SECURITY.md``: EverOS ships **no
|
||||
built-in authentication**, so binding to a routable interface
|
||||
(``0.0.0.0`` etc.) without your own gateway / auth layer in front
|
||||
is unsupported.
|
||||
|
||||
Env binding:
|
||||
EVEROS_API__HOST
|
||||
EVEROS_API__PORT
|
||||
"""
|
||||
|
||||
host: str = "127.0.0.1"
|
||||
port: int = Field(default=8000, ge=1, le=65535)
|
||||
|
||||
|
||||
class SqliteSettings(BaseModel):
|
||||
"""SQLite tunables applied as PRAGMAs on every new connection."""
|
||||
|
||||
journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
|
||||
"WAL"
|
||||
)
|
||||
synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
|
||||
foreign_keys: bool = True
|
||||
temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
|
||||
busy_timeout_ms: int = Field(default=5000, ge=0)
|
||||
journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
|
||||
cache_size_kb: int = Field(default=2048, ge=0)
|
||||
|
||||
|
||||
class LLMSettings(BaseModel):
|
||||
"""LLM client configuration.
|
||||
|
||||
Read by the service layer when lazily constructing the LLM client
|
||||
handed to algo extractors. Provider-agnostic field names — the
|
||||
project follows the OpenAI API protocol so any OpenAI-compatible
|
||||
endpoint plugs in via ``base_url``.
|
||||
|
||||
Env binding (via parent ``Settings``):
|
||||
EVEROS_LLM__MODEL
|
||||
EVEROS_LLM__API_KEY
|
||||
EVEROS_LLM__BASE_URL
|
||||
"""
|
||||
|
||||
model: str = "gpt-4o-mini"
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
|
||||
|
||||
class MultimodalSettings(BaseModel):
|
||||
"""Multimodal parsing LLM config (everalgo-parser).
|
||||
|
||||
Flat section mirroring ``[llm]``. The model must accept multimodal
|
||||
``image_url`` parts (image / pdf / audio); it is kept independent from
|
||||
the main ``[llm]`` so parsing can target a vision/audio-capable
|
||||
endpoint without affecting boundary / extraction.
|
||||
|
||||
Env binding (via parent ``Settings``):
|
||||
EVEROS_MULTIMODAL__MODEL
|
||||
EVEROS_MULTIMODAL__API_KEY
|
||||
EVEROS_MULTIMODAL__BASE_URL
|
||||
EVEROS_MULTIMODAL__MAX_CONCURRENCY
|
||||
EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
|
||||
EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
|
||||
"""
|
||||
|
||||
model: str = "google/gemini-3-flash-preview"
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
max_concurrency: int = 4
|
||||
|
||||
# ``file://`` content-item support (read locally by EverOS, not everalgo).
|
||||
file_uri_allow_dirs: list[str] = []
|
||||
"""Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
|
||||
file (local-first default); set to confine reads when the API is exposed."""
|
||||
file_uri_max_bytes: int = 50 * 1024 * 1024
|
||||
"""Max size (bytes) of a ``file://`` asset; larger files are rejected."""
|
||||
|
||||
|
||||
class EmbeddingSettings(BaseModel):
|
||||
"""Embedding client configuration.
|
||||
|
||||
OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
|
||||
``base_url`` are required at runtime when the embedding capability
|
||||
is enabled; the runtime knobs (``timeout`` etc.) have sensible
|
||||
defaults.
|
||||
|
||||
Env binding:
|
||||
EVEROS_EMBEDDING__MODEL
|
||||
EVEROS_EMBEDDING__API_KEY
|
||||
EVEROS_EMBEDDING__BASE_URL
|
||||
EVEROS_EMBEDDING__TIMEOUT_SECONDS
|
||||
EVEROS_EMBEDDING__MAX_RETRIES
|
||||
EVEROS_EMBEDDING__BATCH_SIZE
|
||||
EVEROS_EMBEDDING__MAX_CONCURRENT
|
||||
"""
|
||||
|
||||
model: str | None = None
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
timeout_seconds: float = Field(default=30.0, gt=0)
|
||||
max_retries: int = Field(default=3, ge=0)
|
||||
batch_size: int = Field(default=10, ge=1)
|
||||
max_concurrent: int = Field(default=5, ge=1)
|
||||
|
||||
|
||||
class RerankSettings(BaseModel):
|
||||
"""Rerank client configuration.
|
||||
|
||||
Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
|
||||
schemas differ between providers — DeepInfra uses ``POST {base_url}/
|
||||
{model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
|
||||
with ``{model, query, documents}``. ``provider`` picks which client
|
||||
implementation the factory builds.
|
||||
|
||||
Env binding:
|
||||
EVEROS_RERANK__PROVIDER
|
||||
EVEROS_RERANK__MODEL
|
||||
EVEROS_RERANK__API_KEY
|
||||
EVEROS_RERANK__BASE_URL
|
||||
EVEROS_RERANK__TIMEOUT_SECONDS
|
||||
EVEROS_RERANK__MAX_RETRIES
|
||||
EVEROS_RERANK__BATCH_SIZE
|
||||
EVEROS_RERANK__MAX_CONCURRENT
|
||||
"""
|
||||
|
||||
provider: Literal["deepinfra", "vllm"] = "deepinfra"
|
||||
model: str | None = None
|
||||
api_key: SecretStr | None = None
|
||||
base_url: str | None = None
|
||||
timeout_seconds: float = Field(default=30.0, gt=0)
|
||||
max_retries: int = Field(default=3, ge=0)
|
||||
batch_size: int = Field(default=10, ge=1)
|
||||
max_concurrent: int = Field(default=5, ge=1)
|
||||
|
||||
|
||||
class BoundaryDetectionSettings(BaseModel):
|
||||
"""Hard limits passed through to ``everalgo`` BoundaryDetector."""
|
||||
|
||||
hard_token_limit: int = Field(default=65536, ge=1)
|
||||
hard_msg_limit: int = Field(default=500, ge=1)
|
||||
|
||||
|
||||
class MemorizeSettings(BaseModel):
|
||||
"""Memorize use-case configuration.
|
||||
|
||||
``mode`` selects which boundary detector runs and which pipelines are
|
||||
dispatched. A service process serves one mode at a time; toggling
|
||||
requires a restart.
|
||||
|
||||
- ``"chat"`` -> ``everalgo.user_memory.BoundaryDetector`` and only the
|
||||
user-memory pipeline runs.
|
||||
- ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
|
||||
both user-memory + agent-memory pipelines run.
|
||||
|
||||
``session_lock_timeout_seconds`` caps how long one ``memorize()``
|
||||
invocation can hold the per-session lock. Covers boundary LLM call +
|
||||
memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
|
||||
a stuck LLM from deadlocking subsequent concurrent calls on the same
|
||||
session_id: on timeout the outer ``asyncio.timeout`` cancels the task
|
||||
and the lock auto-releases.
|
||||
|
||||
Env binding:
|
||||
EVEROS_MEMORIZE__MODE
|
||||
EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
|
||||
"""
|
||||
|
||||
mode: Literal["chat", "agent"] = "agent"
|
||||
session_lock_timeout_seconds: float = Field(default=360.0, gt=0)
|
||||
|
||||
|
||||
class SearchSettings(BaseModel):
|
||||
"""Search-pipeline policy knobs.
|
||||
|
||||
``vector_strategy`` selects the read path taken by
|
||||
``SearchMethod.VECTOR``:
|
||||
|
||||
- ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
|
||||
(recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
|
||||
cosine by parent memcell, then reverse-resolve the top memcells back
|
||||
to episode rows. MaxSim over atomic facts; trades one extra LanceDB
|
||||
scan for finer-grained semantic match on long episodes.
|
||||
- ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
|
||||
per episode = the embedded Content section). The legacy path; kept
|
||||
so deployments can opt out via env.
|
||||
|
||||
Env binding:
|
||||
EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
|
||||
"""
|
||||
|
||||
vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"
|
||||
|
||||
|
||||
class LanceDBSettings(BaseModel):
|
||||
"""LanceDB tunables.
|
||||
|
||||
``read_consistency_seconds``:
|
||||
``None`` (omitted) → no consistency check (highest performance).
|
||||
``0`` → strict consistency (every read).
|
||||
``>0`` → eventual (interval between checks).
|
||||
|
||||
``index_cache_size_bytes``:
|
||||
Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
|
||||
in lance crate). Each cached entry is one opened FTS / vector /
|
||||
scalar index reader and **holds the file descriptors of its on-disk
|
||||
``_indices/<uuid>/...`` files**.
|
||||
|
||||
LanceDB's own default is ``None`` (unbounded), which on a long-
|
||||
running daemon means every new index UUID created by an
|
||||
``optimize()`` call adds a fresh reader to the cache, and its
|
||||
FDs are never released — they leak monotonically until
|
||||
``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
|
||||
take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
|
||||
of 256 / Linux's 1024.
|
||||
|
||||
Setting a byte cap turns the cache into a real LRU: when it
|
||||
exceeds the cap, the oldest readers are dropped, Rust ``Drop``
|
||||
runs ``close(fd)``, and the FD pressure resolves itself.
|
||||
|
||||
Cap → steady-state FD upper bound (measured under 30 add+optimize
|
||||
cycles with the real ``Episode`` schema and 100-query stress):
|
||||
|
||||
=========== ================= ===================
|
||||
cap FD upper bound query latency (100q)
|
||||
=========== ================= ===================
|
||||
``2 MB`` ~45 ~5 ms
|
||||
``4 MB`` ~52 ~3 ms
|
||||
``8 MB`` ~140 ~2.4 ms
|
||||
``16 MB`` ~290 ~2.3 ms ← default
|
||||
``32 MB`` ~630 ~1.4 ms
|
||||
``unbound`` >960 (leaks) ~1.3 ms
|
||||
=========== ================= ===================
|
||||
|
||||
EverOS's measured steady-state working set after a 12 h
|
||||
``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
|
||||
(5 tables × ~7 BM25 columns × ~10 part_N entries each), so
|
||||
``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
|
||||
yet-evicted readers, while the FD ceiling (~290) stays well below
|
||||
common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
|
||||
Linux default 1024 is fine out of the box).
|
||||
|
||||
Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
|
||||
working set is much larger (heavier table count or much wider
|
||||
indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
|
||||
boxes).
|
||||
|
||||
Note: the *metadata* cache (``metadata_cache_size_bytes``) is
|
||||
**not** exposed — experiment showed it caches in-memory parsed
|
||||
manifests / fragment stats with zero impact on FD count; leaving
|
||||
it unbounded (lancedb default) is fine.
|
||||
"""
|
||||
|
||||
read_consistency_seconds: float | None = None
|
||||
index_cache_size_bytes: int = 16 * 1024 * 1024
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Top-level application settings."""
|
||||
|
||||
memory: MemorySettings = MemorySettings()
|
||||
api: ApiSettings = ApiSettings()
|
||||
sqlite: SqliteSettings = SqliteSettings()
|
||||
lancedb: LanceDBSettings = LanceDBSettings()
|
||||
llm: LLMSettings = LLMSettings()
|
||||
embedding: EmbeddingSettings = EmbeddingSettings()
|
||||
rerank: RerankSettings = RerankSettings()
|
||||
boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
|
||||
memorize: MemorizeSettings = MemorizeSettings()
|
||||
search: SearchSettings = SearchSettings()
|
||||
multimodal: MultimodalSettings = MultimodalSettings()
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
env_prefix="EVEROS_",
|
||||
env_nested_delimiter="__",
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
toml_file=_DEFAULT_TOML_PATH,
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def settings_customise_sources(
|
||||
cls,
|
||||
settings_cls: type[BaseSettings],
|
||||
init_settings: PydanticBaseSettingsSource,
|
||||
env_settings: PydanticBaseSettingsSource,
|
||||
dotenv_settings: PydanticBaseSettingsSource,
|
||||
file_secret_settings: PydanticBaseSettingsSource,
|
||||
) -> tuple[PydanticBaseSettingsSource, ...]:
|
||||
"""Layer TOML sources between env / dotenv and the secret store.
|
||||
|
||||
Order (earlier wins in pydantic-settings):
|
||||
init_args > env > .env > user_toml > default_toml > secrets
|
||||
|
||||
The user-level toml (default ``~/.everos/config.toml``) is only
|
||||
registered when the file exists, so the source list stays tight.
|
||||
"""
|
||||
sources: list[PydanticBaseSettingsSource] = [
|
||||
init_settings,
|
||||
env_settings,
|
||||
dotenv_settings,
|
||||
]
|
||||
user_toml_path = _resolve_user_toml_path()
|
||||
if user_toml_path.is_file():
|
||||
sources.append(
|
||||
TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
|
||||
)
|
||||
sources.append(TomlConfigSettingsSource(settings_cls))
|
||||
sources.append(file_secret_settings)
|
||||
return tuple(sources)
|
||||
|
||||
|
||||
@cache
|
||||
def load_settings() -> Settings:
|
||||
"""Load settings from default.toml + environment variables (cached).
|
||||
|
||||
Cached at the module level — every caller sees the same instance until
|
||||
something explicitly clears the cache (``load_settings.cache_clear()``).
|
||||
Tests that monkeypatch environment variables must call
|
||||
``cache_clear`` after each mutation to pick the new env up.
|
||||
"""
|
||||
return Settings()
|
||||
0
src/everos/core/__init__.py
Normal file
0
src/everos/core/__init__.py
Normal file
0
src/everos/core/context/__init__.py
Normal file
0
src/everos/core/context/__init__.py
Normal file
33
src/everos/core/errors.py
Normal file
33
src/everos/core/errors.py
Normal file
@ -0,0 +1,33 @@
|
||||
"""Cross-cutting domain errors surfaced to API callers.
|
||||
|
||||
These live in ``core`` so the ``memory`` layer can raise them and the
|
||||
``entrypoints`` layer can catch them without crossing the layered import
|
||||
boundary — ``any -> core`` is the only edge both share (entrypoints must
|
||||
not import ``memory`` directly).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class MultimodalError(Exception):
|
||||
"""Base for multimodal-parsing errors meant to reach the caller.
|
||||
|
||||
The API layer maps any ``MultimodalError`` to an aligned
|
||||
``{error: {code, message}}`` envelope (HTTP 415).
|
||||
"""
|
||||
|
||||
|
||||
class UnsupportedModalityError(MultimodalError):
|
||||
"""everalgo cannot handle this modality (e.g. video stub, unknown type).
|
||||
|
||||
Wraps everalgo's ``NotImplementedError`` / dispatch ``ValueError`` so the
|
||||
caller gets a stable, aligned error instead of a raw 500.
|
||||
"""
|
||||
|
||||
|
||||
class MultimodalNotEnabledError(MultimodalError):
|
||||
"""Multimodal capability is not ready.
|
||||
|
||||
Raised when the ``everos[multimodal]`` extra is not installed, or when a
|
||||
required system dependency (LibreOffice for Office documents) is absent.
|
||||
"""
|
||||
27
src/everos/core/lifespan/__init__.py
Normal file
27
src/everos/core/lifespan/__init__.py
Normal file
@ -0,0 +1,27 @@
|
||||
"""Application lifespan composition (chassis only).
|
||||
|
||||
This subpackage holds the *generic* lifespan machinery — the
|
||||
:class:`LifespanProvider` ABC, :func:`build_lifespan` factory, and
|
||||
chassis-level providers that are independent of any storage backend
|
||||
(observability metrics, etc.). Concrete storage-backend providers
|
||||
(SQLite / LanceDB) live next to the entrypoint that composes them
|
||||
(see :mod:`everos.entrypoints.api.lifespans`) so ``core`` stays free
|
||||
of concrete-backend imports.
|
||||
|
||||
External usage:
|
||||
from everos.core.lifespan import (
|
||||
LifespanProvider,
|
||||
MetricsLifespanProvider,
|
||||
build_lifespan,
|
||||
)
|
||||
"""
|
||||
|
||||
from .base import LifespanProvider as LifespanProvider
|
||||
from .factory import build_lifespan as build_lifespan
|
||||
from .metrics_lifespan import MetricsLifespanProvider as MetricsLifespanProvider
|
||||
|
||||
__all__ = [
|
||||
"LifespanProvider",
|
||||
"MetricsLifespanProvider",
|
||||
"build_lifespan",
|
||||
]
|
||||
30
src/everos/core/lifespan/base.py
Normal file
30
src/everos/core/lifespan/base.py
Normal file
@ -0,0 +1,30 @@
|
||||
"""Lifespan provider abstract base.
|
||||
|
||||
A LifespanProvider is one unit of startup / shutdown work invoked by the
|
||||
FastAPI lifespan factory. Providers are registered explicitly (no DI
|
||||
auto-discovery) and executed in ``order`` ascending on startup, reverse
|
||||
on shutdown.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
|
||||
class LifespanProvider(ABC):
|
||||
"""One unit of startup / shutdown work."""
|
||||
|
||||
def __init__(self, name: str, order: int = 0) -> None:
|
||||
self.name = name
|
||||
self.order = order
|
||||
|
||||
@abstractmethod
|
||||
async def startup(self, app: FastAPI) -> Any:
|
||||
"""Startup hook; return value is stored on ``app.state.lifespan_data[name]``."""
|
||||
|
||||
@abstractmethod
|
||||
async def shutdown(self, app: FastAPI) -> None:
|
||||
"""Shutdown hook; called in reverse order during application teardown."""
|
||||
57
src/everos/core/lifespan/factory.py
Normal file
57
src/everos/core/lifespan/factory.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""Lifespan composition factory.
|
||||
|
||||
Builds a FastAPI lifespan context manager from an explicit list of
|
||||
LifespanProvider instances.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncIterator, Callable, Sequence
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from everos.core.observability.logging import get_logger
|
||||
|
||||
from .base import LifespanProvider
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def build_lifespan(
|
||||
providers: Sequence[LifespanProvider],
|
||||
) -> Callable[[FastAPI], AsyncIterator[None]]:
|
||||
"""Compose providers into a FastAPI lifespan context manager.
|
||||
|
||||
Providers are run in ``order`` ascending on startup and reverse on
|
||||
shutdown. A non-None return value from ``startup`` is stored under
|
||||
``app.state.lifespan_data[provider.name]``.
|
||||
"""
|
||||
sorted_providers = sorted(providers, key=lambda p: p.order)
|
||||
|
||||
@asynccontextmanager
|
||||
async def _lifespan(app: FastAPI) -> AsyncIterator[None]:
|
||||
lifespan_data: dict[str, object] = {}
|
||||
try:
|
||||
for provider in sorted_providers:
|
||||
logger.info(
|
||||
"lifespan_provider_startup",
|
||||
name=provider.name,
|
||||
order=provider.order,
|
||||
)
|
||||
result = await provider.startup(app)
|
||||
if result is not None:
|
||||
lifespan_data[provider.name] = result
|
||||
app.state.lifespan_data = lifespan_data
|
||||
yield
|
||||
finally:
|
||||
for provider in reversed(sorted_providers):
|
||||
try:
|
||||
logger.info("lifespan_provider_shutdown", name=provider.name)
|
||||
await provider.shutdown(app)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"lifespan_provider_shutdown_failed", name=provider.name
|
||||
)
|
||||
|
||||
return _lifespan
|
||||
36
src/everos/core/lifespan/metrics_lifespan.py
Normal file
36
src/everos/core/lifespan/metrics_lifespan.py
Normal file
@ -0,0 +1,36 @@
|
||||
"""Metrics lifespan provider.
|
||||
|
||||
Confirms the metrics registry is ready and logs that the ``/metrics`` HTTP
|
||||
endpoint is mounted on the main API. Kept as a placeholder to demonstrate
|
||||
the lifespan pattern; replace or extend with a standalone metrics server
|
||||
(e.g. ``prometheus_client.start_http_server`` on a separate port) if you
|
||||
need to expose metrics on a dedicated socket.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.core.observability.metrics import get_metrics_registry
|
||||
|
||||
from .base import LifespanProvider
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class MetricsLifespanProvider(LifespanProvider):
|
||||
"""No-op startup that warms the metrics registry and logs readiness."""
|
||||
|
||||
def __init__(self, order: int = 5) -> None:
|
||||
super().__init__(name="metrics", order=order)
|
||||
|
||||
async def startup(self, app: FastAPI) -> Any:
|
||||
registry = get_metrics_registry()
|
||||
logger.info("metrics_registry_ready", endpoint="/metrics")
|
||||
return registry
|
||||
|
||||
async def shutdown(self, app: FastAPI) -> None:
|
||||
logger.info("metrics_lifespan_shutdown")
|
||||
31
src/everos/core/middleware/__init__.py
Normal file
31
src/everos/core/middleware/__init__.py
Normal file
@ -0,0 +1,31 @@
|
||||
"""Cross-cutting HTTP middleware components.
|
||||
|
||||
External usage:
|
||||
from everos.core.middleware import (
|
||||
DEFAULT_CORS_ALLOW_CREDENTIALS,
|
||||
DEFAULT_CORS_ALLOW_HEADERS,
|
||||
DEFAULT_CORS_ALLOW_METHODS,
|
||||
DEFAULT_CORS_ORIGINS,
|
||||
ProfileMiddleware,
|
||||
PrometheusMiddleware,
|
||||
global_exception_handler,
|
||||
)
|
||||
"""
|
||||
|
||||
from .cors import DEFAULT_CORS_ALLOW_CREDENTIALS as DEFAULT_CORS_ALLOW_CREDENTIALS
|
||||
from .cors import DEFAULT_CORS_ALLOW_HEADERS as DEFAULT_CORS_ALLOW_HEADERS
|
||||
from .cors import DEFAULT_CORS_ALLOW_METHODS as DEFAULT_CORS_ALLOW_METHODS
|
||||
from .cors import DEFAULT_CORS_ORIGINS as DEFAULT_CORS_ORIGINS
|
||||
from .global_exception import global_exception_handler as global_exception_handler
|
||||
from .profile import ProfileMiddleware as ProfileMiddleware
|
||||
from .prometheus import PrometheusMiddleware as PrometheusMiddleware
|
||||
|
||||
__all__ = [
|
||||
"DEFAULT_CORS_ALLOW_CREDENTIALS",
|
||||
"DEFAULT_CORS_ALLOW_HEADERS",
|
||||
"DEFAULT_CORS_ALLOW_METHODS",
|
||||
"DEFAULT_CORS_ORIGINS",
|
||||
"ProfileMiddleware",
|
||||
"PrometheusMiddleware",
|
||||
"global_exception_handler",
|
||||
]
|
||||
12
src/everos/core/middleware/cors.py
Normal file
12
src/everos/core/middleware/cors.py
Normal file
@ -0,0 +1,12 @@
|
||||
"""CORS configuration defaults.
|
||||
|
||||
The CORS middleware itself is FastAPI's stock ``CORSMiddleware``; this module
|
||||
centralises the default policy values used by the application factory.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
DEFAULT_CORS_ALLOW_CREDENTIALS: bool = True
|
||||
DEFAULT_CORS_ALLOW_HEADERS: list[str] = ["*"]
|
||||
DEFAULT_CORS_ALLOW_METHODS: list[str] = ["*"]
|
||||
DEFAULT_CORS_ORIGINS: list[str] = ["*"]
|
||||
143
src/everos/core/middleware/global_exception.py
Normal file
143
src/everos/core/middleware/global_exception.py
Normal file
@ -0,0 +1,143 @@
|
||||
"""Global exception handler — uniform error envelope per v1 API brief §1.
|
||||
|
||||
Envelope shape (matches the v1 API brief §1 — ``request_id`` at the top
|
||||
level alongside ``error``; the ``error`` object carries ``code`` /
|
||||
``message`` plus ops-friendly ``timestamp`` / ``path`` for debugging)::
|
||||
|
||||
{
|
||||
"request_id": "<32 lowercase hex chars — W3C trace_id format>",
|
||||
"error": {
|
||||
"code": "HTTP_ERROR" | "SYSTEM_ERROR",
|
||||
"message": "<reason>",
|
||||
"timestamp": "<ISO 8601 with tz>",
|
||||
"path": "<request path>"
|
||||
}
|
||||
}
|
||||
|
||||
Rules:
|
||||
- 4xx (DTO / business validation / HTTPException) → ``code="HTTP_ERROR"``
|
||||
with the human-readable reason in ``message``.
|
||||
- 5xx (unhandled exception) → ``code="SYSTEM_ERROR"`` with a fixed
|
||||
``message="Internal server error"`` — internal exception details are
|
||||
logged but never leak to the client.
|
||||
- ``request_id`` is sourced from ``request.state.request_id`` (set by
|
||||
upstream middleware); falls back to a freshly minted id when absent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import HTTPException, Request
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.responses import JSONResponse
|
||||
from starlette.status import (
|
||||
HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
from everos.component.utils.datetime import (
|
||||
get_now_with_timezone,
|
||||
to_iso_format,
|
||||
)
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.core.observability.tracing import gen_request_id
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_INTERNAL_ERROR_MESSAGE = "Internal server error"
|
||||
|
||||
|
||||
def _request_id(request: Request) -> str:
|
||||
"""Return the request_id set by middleware, or mint a fresh fallback."""
|
||||
rid = getattr(request.state, "request_id", None)
|
||||
if rid:
|
||||
return str(rid)
|
||||
return gen_request_id()
|
||||
|
||||
|
||||
def _envelope(
|
||||
*,
|
||||
code: str,
|
||||
message: str,
|
||||
request: Request,
|
||||
) -> dict[str, object]:
|
||||
"""Build the canonical error envelope (wiki §1 shape — nested ``error``).
|
||||
|
||||
``request_id`` at the top level, ``error`` object carries the
|
||||
contract fields (``code`` / ``message``) plus ops-friendly
|
||||
``timestamp`` / ``path``.
|
||||
"""
|
||||
return {
|
||||
"request_id": _request_id(request),
|
||||
"error": {
|
||||
"code": code,
|
||||
"message": message,
|
||||
"timestamp": to_iso_format(get_now_with_timezone()),
|
||||
"path": str(request.url.path),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse:
|
||||
"""Convert any exception into a uniform JSON error response."""
|
||||
path = str(request.url.path)
|
||||
method = request.method
|
||||
|
||||
if isinstance(exc, RequestValidationError):
|
||||
errors = exc.errors()
|
||||
if errors:
|
||||
first = errors[0]
|
||||
loc = ".".join(str(p) for p in first.get("loc", []) if p != "body")
|
||||
msg = first.get("msg", "Validation error")
|
||||
message = f"{msg}: {loc}" if loc else msg
|
||||
else:
|
||||
message = "Request validation error"
|
||||
|
||||
logger.warning("validation_error", method=method, path=path, message=message)
|
||||
return JSONResponse(
|
||||
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
content=_envelope(code="HTTP_ERROR", message=message, request=request),
|
||||
)
|
||||
|
||||
if isinstance(exc, HTTPException):
|
||||
logger.warning(
|
||||
"http_exception",
|
||||
method=method,
|
||||
path=path,
|
||||
status_code=exc.status_code,
|
||||
detail=exc.detail,
|
||||
)
|
||||
# 5xx routed through HTTPException is rare but valid; still honour
|
||||
# the SYSTEM_ERROR code so the envelope is consistent.
|
||||
if exc.status_code >= 500:
|
||||
return JSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content=_envelope(
|
||||
code="SYSTEM_ERROR",
|
||||
message=_INTERNAL_ERROR_MESSAGE,
|
||||
request=request,
|
||||
),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content=_envelope(
|
||||
code="HTTP_ERROR",
|
||||
message=str(exc.detail),
|
||||
request=request,
|
||||
),
|
||||
)
|
||||
|
||||
logger.error(
|
||||
"unhandled_exception",
|
||||
method=method,
|
||||
path=path,
|
||||
exception_type=type(exc).__name__,
|
||||
exc_info=True,
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
content=_envelope(
|
||||
code="SYSTEM_ERROR",
|
||||
message=_INTERNAL_ERROR_MESSAGE,
|
||||
request=request,
|
||||
),
|
||||
)
|
||||
69
src/everos/core/middleware/profile.py
Normal file
69
src/everos/core/middleware/profile.py
Normal file
@ -0,0 +1,69 @@
|
||||
"""Performance profiling middleware (HTML report via pyinstrument).
|
||||
|
||||
Triggered with ``?profile=true`` query parameter when ``PROFILING_ENABLED=true``
|
||||
is set. Gracefully no-ops if pyinstrument is not installed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from collections.abc import Awaitable, Callable
|
||||
|
||||
from fastapi import Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.responses import Response
|
||||
from starlette.types import ASGIApp
|
||||
|
||||
from everos.core.observability.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_TRUTHY = frozenset({"1", "true", "yes"})
|
||||
|
||||
|
||||
def _profiling_enabled() -> bool:
|
||||
"""Read ``PROFILING_ENABLED`` env var (1 / true / yes => enabled)."""
|
||||
raw = os.getenv("PROFILING_ENABLED", os.getenv("PROFILING", "false")).lower()
|
||||
return raw in _TRUTHY
|
||||
|
||||
|
||||
class ProfileMiddleware(BaseHTTPMiddleware):
|
||||
"""Returns a pyinstrument HTML report when ``?profile=true`` is set."""
|
||||
|
||||
def __init__(self, app: ASGIApp) -> None:
|
||||
super().__init__(app)
|
||||
self._enabled = _profiling_enabled()
|
||||
self._available = False
|
||||
if self._enabled:
|
||||
try:
|
||||
import pyinstrument # noqa: F401
|
||||
|
||||
self._available = True
|
||||
logger.info("profiling_middleware_enabled")
|
||||
except ImportError:
|
||||
logger.warning("profiling_requested_but_pyinstrument_missing")
|
||||
self._enabled = False
|
||||
|
||||
async def dispatch(
|
||||
self,
|
||||
request: Request,
|
||||
call_next: Callable[[Request], Awaitable[Response]],
|
||||
) -> Response:
|
||||
if not self._enabled or not self._available:
|
||||
return await call_next(request)
|
||||
|
||||
if request.query_params.get("profile", "").lower() not in _TRUTHY:
|
||||
return await call_next(request)
|
||||
|
||||
from pyinstrument import Profiler
|
||||
|
||||
profiler = Profiler()
|
||||
profiler.start()
|
||||
logger.info("profile_started", method=request.method, path=request.url.path)
|
||||
try:
|
||||
await call_next(request)
|
||||
except Exception:
|
||||
logger.exception("profile_request_failed")
|
||||
profiler.stop()
|
||||
return HTMLResponse(content=profiler.output_html(), status_code=200)
|
||||
84
src/everos/core/middleware/prometheus.py
Normal file
84
src/everos/core/middleware/prometheus.py
Normal file
@ -0,0 +1,84 @@
|
||||
"""Prometheus HTTP metrics middleware.
|
||||
|
||||
Auto-instruments incoming HTTP requests with a request counter and a
|
||||
duration histogram. Mounted via ``app.add_middleware(PrometheusMiddleware)``.
|
||||
|
||||
Skips internal endpoints (``/metrics``, ``/health``, etc.) so they do not
|
||||
inflate cardinality or pollute their own statistics.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from collections.abc import Awaitable, Callable
|
||||
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import Response
|
||||
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.core.observability.metrics import Counter, Histogram, HistogramBuckets
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
_http_requests_total = Counter(
|
||||
name="http_requests_total",
|
||||
description="Total number of HTTP requests handled.",
|
||||
labelnames=("method", "path", "status"),
|
||||
namespace="everos",
|
||||
)
|
||||
|
||||
_http_request_duration_seconds = Histogram(
|
||||
name="http_request_duration_seconds",
|
||||
description="HTTP request duration in seconds.",
|
||||
labelnames=("method", "path"),
|
||||
namespace="everos",
|
||||
buckets=HistogramBuckets.DEFAULT,
|
||||
)
|
||||
|
||||
|
||||
_SKIP_PATHS = frozenset({"/metrics", "/health", "/healthz", "/favicon.ico"})
|
||||
|
||||
|
||||
def _normalize_path(request: Request) -> str:
|
||||
"""Resolve the route template (e.g. ``/users/{user_id}``) for stable labels."""
|
||||
scope = getattr(request, "scope", {})
|
||||
route = scope.get("route") if isinstance(scope, dict) else None
|
||||
if route is not None and hasattr(route, "path"):
|
||||
return route.path
|
||||
if request.path_params:
|
||||
path = request.url.path
|
||||
for name, value in request.path_params.items():
|
||||
if str(value) in path:
|
||||
path = path.replace(str(value), f"{{{name}}}")
|
||||
return path
|
||||
return "{unmatched}"
|
||||
|
||||
|
||||
class PrometheusMiddleware(BaseHTTPMiddleware):
|
||||
"""Records ``http_requests_total`` and ``http_request_duration_seconds``."""
|
||||
|
||||
async def dispatch(
|
||||
self,
|
||||
request: Request,
|
||||
call_next: Callable[[Request], Awaitable[Response]],
|
||||
) -> Response:
|
||||
if request.url.path in _SKIP_PATHS:
|
||||
return await call_next(request)
|
||||
|
||||
method = request.method
|
||||
start = time.perf_counter()
|
||||
status = "500"
|
||||
response: Response | None = None
|
||||
try:
|
||||
response = await call_next(request)
|
||||
status = str(response.status_code)
|
||||
return response
|
||||
finally:
|
||||
duration = time.perf_counter() - start
|
||||
path = _normalize_path(request)
|
||||
_http_requests_total.labels(method=method, path=path, status=status).inc()
|
||||
_http_request_duration_seconds.labels(method=method, path=path).observe(
|
||||
duration
|
||||
)
|
||||
0
src/everos/core/observability/__init__.py
Normal file
0
src/everos/core/observability/__init__.py
Normal file
13
src/everos/core/observability/logging/__init__.py
Normal file
13
src/everos/core/observability/logging/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
"""structlog-based logging factory.
|
||||
|
||||
External usage:
|
||||
from everos.core.observability.logging import get_logger, configure_logging
|
||||
|
||||
logger = get_logger(__name__)
|
||||
logger.info("event_name", key=value)
|
||||
"""
|
||||
|
||||
from .factory import configure_logging as configure_logging
|
||||
from .factory import get_logger as get_logger
|
||||
|
||||
__all__ = ["configure_logging", "get_logger"]
|
||||
117
src/everos/core/observability/logging/factory.py
Normal file
117
src/everos/core/observability/logging/factory.py
Normal file
@ -0,0 +1,117 @@
|
||||
"""structlog logger factory.
|
||||
|
||||
Provides ``get_logger(__name__)`` for module-level logger acquisition.
|
||||
``configure_logging()`` is called once at process startup (run.py / lifespan)
|
||||
to set up the structlog processor chain and route stdlib logging through
|
||||
the same formatter so output stays uniform regardless of the caller.
|
||||
|
||||
The configuration follows structlog's official "Foreign Log Integration"
|
||||
recipe: a single ``ProcessorFormatter`` renders both everos's own
|
||||
``get_logger(...)`` calls and any stdlib ``logging.getLogger(...)`` call
|
||||
made by third-party libraries (uvicorn, fastapi, httpx, openai, ...).
|
||||
That way all three of the previously divergent prefixes — ``INFO:``,
|
||||
``[warning ]``, plus the unconfigured no-prefix output — collapse to
|
||||
one ``[level] event key=value`` shape.
|
||||
|
||||
Rust-side loggers (LanceDB / Lance / Arrow) live in the Rust ``log``
|
||||
crate and emit straight to stderr without going through Python; this
|
||||
module cannot reach them. Control their level with ``RUST_LOG`` env.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
|
||||
|
||||
def get_logger(name: str) -> Any:
|
||||
"""Return a structlog logger bound to the given module name."""
|
||||
return structlog.get_logger(name)
|
||||
|
||||
|
||||
def configure_logging(level: str = "INFO") -> None:
|
||||
"""Configure structlog and stdlib logging once at process startup.
|
||||
|
||||
After this call:
|
||||
|
||||
* Every ``structlog.get_logger(...)`` and ``logging.getLogger(...)``
|
||||
message flows through the same ``ProcessorFormatter``, so output
|
||||
format is identical regardless of which logging API the caller used.
|
||||
* Root-logger handlers are replaced with a single ``StreamHandler``
|
||||
pointing at ``sys.stdout``; any previously installed handler
|
||||
(uvicorn's default ``LOGGING_CONFIG``, libraries that call
|
||||
``logging.basicConfig``, etc.) is removed.
|
||||
|
||||
The ``uvicorn.run(..., log_config=None)`` flag is the matching half
|
||||
on the server entry point — without it, uvicorn re-installs its own
|
||||
handlers on every startup and overrides what we set here.
|
||||
|
||||
Args:
|
||||
level: Log level name (``DEBUG`` / ``INFO`` / ``WARNING`` / ``ERROR``).
|
||||
Unknown names silently fall back to ``INFO`` via
|
||||
``getattr(logging, ..., INFO)``.
|
||||
"""
|
||||
log_level = getattr(logging, level.upper(), logging.INFO)
|
||||
|
||||
shared_processors: list[Any] = [
|
||||
structlog.contextvars.merge_contextvars,
|
||||
structlog.processors.add_log_level,
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
]
|
||||
|
||||
# structlog's own loggers feed into stdlib's logging, so the root
|
||||
# logger handler decides where output lands and how it's rendered.
|
||||
structlog.configure(
|
||||
processors=[
|
||||
*shared_processors,
|
||||
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
|
||||
],
|
||||
wrapper_class=structlog.make_filtering_bound_logger(log_level),
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
|
||||
# The single formatter shared by both pipelines:
|
||||
# * structlog events arrive already wrapped via ``wrap_for_formatter``;
|
||||
# * foreign records (stdlib LogRecord) get pushed through
|
||||
# ``foreign_pre_chain`` so they pick up the same level / timestamp
|
||||
# fields before hitting ``ConsoleRenderer``.
|
||||
formatter = structlog.stdlib.ProcessorFormatter(
|
||||
foreign_pre_chain=shared_processors,
|
||||
processors=[
|
||||
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
|
||||
structlog.dev.ConsoleRenderer(),
|
||||
],
|
||||
)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
# Drop any handler we installed on a previous ``configure_logging``
|
||||
# call (identified by formatter type) so repeated invocations don't
|
||||
# produce duplicate output, but keep handlers other parties have
|
||||
# attached — pytest's caplog handler in particular has to survive,
|
||||
# otherwise tests using the ``caplog`` fixture can't see records
|
||||
# that flow through structlog.
|
||||
root = logging.getLogger()
|
||||
root.handlers = [
|
||||
h
|
||||
for h in root.handlers
|
||||
if not isinstance(h.formatter, structlog.stdlib.ProcessorFormatter)
|
||||
]
|
||||
root.addHandler(handler)
|
||||
root.setLevel(log_level)
|
||||
|
||||
# Third-party HTTP clients log every successful request at INFO level —
|
||||
# `httpx` is the worst offender (one line per call, called once per
|
||||
# LLM / embedding / rerank request). A single LoCoMo conv run easily
|
||||
# produces a thousand such lines, drowning everos's own events. They
|
||||
# are useful for debugging API failures, but failures already surface
|
||||
# via exceptions + status codes — so demote the success path to WARNING
|
||||
# and let real errors still come through.
|
||||
for noisy in ("httpx", "httpcore", "urllib3"):
|
||||
logging.getLogger(noisy).setLevel(logging.WARNING)
|
||||
34
src/everos/core/observability/metrics/__init__.py
Normal file
34
src/everos/core/observability/metrics/__init__.py
Normal file
@ -0,0 +1,34 @@
|
||||
"""Prometheus-style metrics primitives + registry.
|
||||
|
||||
External usage:
|
||||
from everos.core.observability.metrics import (
|
||||
Counter, Gauge, Histogram, HistogramBuckets,
|
||||
get_metrics_registry, generate_metrics_response,
|
||||
)
|
||||
"""
|
||||
|
||||
from .counter import Counter as Counter
|
||||
from .counter import LabeledCounter as LabeledCounter
|
||||
from .gauge import Gauge as Gauge
|
||||
from .gauge import LabeledGauge as LabeledGauge
|
||||
from .histogram import Histogram as Histogram
|
||||
from .histogram import HistogramBuckets as HistogramBuckets
|
||||
from .histogram import LabeledHistogram as LabeledHistogram
|
||||
from .registry import generate_metrics_response as generate_metrics_response
|
||||
from .registry import get_metrics_registry as get_metrics_registry
|
||||
from .registry import reset_metrics_registry as reset_metrics_registry
|
||||
from .registry import set_metrics_registry as set_metrics_registry
|
||||
|
||||
__all__ = [
|
||||
"Counter",
|
||||
"Gauge",
|
||||
"Histogram",
|
||||
"HistogramBuckets",
|
||||
"LabeledCounter",
|
||||
"LabeledGauge",
|
||||
"LabeledHistogram",
|
||||
"generate_metrics_response",
|
||||
"get_metrics_registry",
|
||||
"reset_metrics_registry",
|
||||
"set_metrics_registry",
|
||||
]
|
||||
50
src/everos/core/observability/metrics/counter.py
Normal file
50
src/everos/core/observability/metrics/counter.py
Normal file
@ -0,0 +1,50 @@
|
||||
"""Counter wrapper around ``prometheus_client.Counter``."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
from prometheus_client import Counter as PromCounter
|
||||
|
||||
from .registry import get_metrics_registry
|
||||
|
||||
|
||||
class Counter:
|
||||
"""Monotonically-increasing counter (totals, error counts)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
description: str,
|
||||
labelnames: Sequence[str] = (),
|
||||
namespace: str = "",
|
||||
subsystem: str = "",
|
||||
unit: str = "",
|
||||
) -> None:
|
||||
self._counter = PromCounter(
|
||||
name=name,
|
||||
documentation=description,
|
||||
labelnames=labelnames,
|
||||
namespace=namespace,
|
||||
subsystem=subsystem,
|
||||
unit=unit,
|
||||
registry=get_metrics_registry(),
|
||||
)
|
||||
self._labelnames = tuple(labelnames)
|
||||
|
||||
def labels(self, **labels: str) -> LabeledCounter:
|
||||
return LabeledCounter(self._counter.labels(**labels))
|
||||
|
||||
def inc(self, amount: float = 1.0) -> None:
|
||||
self._counter.inc(amount)
|
||||
|
||||
|
||||
class LabeledCounter:
|
||||
"""Counter slice with labels applied."""
|
||||
|
||||
def __init__(self, labeled: Any) -> None:
|
||||
self._labeled = labeled
|
||||
|
||||
def inc(self, amount: float = 1.0) -> None:
|
||||
self._labeled.inc(amount)
|
||||
66
src/everos/core/observability/metrics/gauge.py
Normal file
66
src/everos/core/observability/metrics/gauge.py
Normal file
@ -0,0 +1,66 @@
|
||||
"""Gauge wrapper around ``prometheus_client.Gauge``.
|
||||
|
||||
Async auto-refresh is intentionally not included in v0.1; subclass
|
||||
:class:`Gauge` and call :meth:`set` from your own scheduling logic when
|
||||
needed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
from prometheus_client import Gauge as PromGauge
|
||||
|
||||
from .registry import get_metrics_registry
|
||||
|
||||
|
||||
class Gauge:
|
||||
"""Instantaneous numeric value (queue depth, cache size)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
description: str,
|
||||
labelnames: Sequence[str] = (),
|
||||
namespace: str = "",
|
||||
subsystem: str = "",
|
||||
unit: str = "",
|
||||
) -> None:
|
||||
self._gauge = PromGauge(
|
||||
name=name,
|
||||
documentation=description,
|
||||
labelnames=labelnames,
|
||||
namespace=namespace,
|
||||
subsystem=subsystem,
|
||||
unit=unit,
|
||||
registry=get_metrics_registry(),
|
||||
)
|
||||
|
||||
def labels(self, **labels: str) -> LabeledGauge:
|
||||
return LabeledGauge(self._gauge.labels(**labels))
|
||||
|
||||
def set(self, value: float) -> None:
|
||||
self._gauge.set(value)
|
||||
|
||||
def inc(self, amount: float = 1.0) -> None:
|
||||
self._gauge.inc(amount)
|
||||
|
||||
def dec(self, amount: float = 1.0) -> None:
|
||||
self._gauge.dec(amount)
|
||||
|
||||
|
||||
class LabeledGauge:
|
||||
"""Gauge slice with labels applied."""
|
||||
|
||||
def __init__(self, labeled: Any) -> None:
|
||||
self._labeled = labeled
|
||||
|
||||
def set(self, value: float) -> None:
|
||||
self._labeled.set(value)
|
||||
|
||||
def inc(self, amount: float = 1.0) -> None:
|
||||
self._labeled.inc(amount)
|
||||
|
||||
def dec(self, amount: float = 1.0) -> None:
|
||||
self._labeled.dec(amount)
|
||||
102
src/everos/core/observability/metrics/histogram.py
Normal file
102
src/everos/core/observability/metrics/histogram.py
Normal file
@ -0,0 +1,102 @@
|
||||
"""Histogram wrapper around ``prometheus_client.Histogram``."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
from prometheus_client import Histogram as PromHistogram
|
||||
|
||||
from .registry import get_metrics_registry
|
||||
|
||||
|
||||
class HistogramBuckets:
|
||||
"""Predefined bucket configurations for common workloads."""
|
||||
|
||||
DEFAULT: tuple[float, ...] = (
|
||||
0.005,
|
||||
0.01,
|
||||
0.025,
|
||||
0.05,
|
||||
0.1,
|
||||
0.25,
|
||||
0.5,
|
||||
1.0,
|
||||
2.5,
|
||||
5.0,
|
||||
10.0,
|
||||
)
|
||||
FAST: tuple[float, ...] = (0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5)
|
||||
API_CALL: tuple[float, ...] = (
|
||||
0.01,
|
||||
0.05,
|
||||
0.1,
|
||||
0.25,
|
||||
0.5,
|
||||
1.0,
|
||||
2.0,
|
||||
5.0,
|
||||
10.0,
|
||||
30.0,
|
||||
)
|
||||
BATCH: tuple[float, ...] = (0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0)
|
||||
DATABASE: tuple[float, ...] = (
|
||||
0.001,
|
||||
0.005,
|
||||
0.01,
|
||||
0.025,
|
||||
0.05,
|
||||
0.1,
|
||||
0.25,
|
||||
0.5,
|
||||
1.0,
|
||||
2.5,
|
||||
5.0,
|
||||
)
|
||||
|
||||
|
||||
class Histogram:
|
||||
"""Distribution of observed values (latency, sizes)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
description: str,
|
||||
labelnames: Sequence[str] = (),
|
||||
namespace: str = "",
|
||||
subsystem: str = "",
|
||||
unit: str = "",
|
||||
buckets: Sequence[float] = HistogramBuckets.DEFAULT,
|
||||
) -> None:
|
||||
self._histogram = PromHistogram(
|
||||
name=name,
|
||||
documentation=description,
|
||||
labelnames=labelnames,
|
||||
namespace=namespace,
|
||||
subsystem=subsystem,
|
||||
unit=unit,
|
||||
buckets=tuple(buckets),
|
||||
registry=get_metrics_registry(),
|
||||
)
|
||||
|
||||
def labels(self, **labels: str) -> LabeledHistogram:
|
||||
return LabeledHistogram(self._histogram.labels(**labels))
|
||||
|
||||
def observe(self, amount: float) -> None:
|
||||
self._histogram.observe(amount)
|
||||
|
||||
def time(self) -> Any:
|
||||
return self._histogram.time()
|
||||
|
||||
|
||||
class LabeledHistogram:
|
||||
"""Histogram slice with labels applied."""
|
||||
|
||||
def __init__(self, labeled: Any) -> None:
|
||||
self._labeled = labeled
|
||||
|
||||
def observe(self, amount: float) -> None:
|
||||
self._labeled.observe(amount)
|
||||
|
||||
def time(self) -> Any:
|
||||
return self._labeled.time()
|
||||
35
src/everos/core/observability/metrics/registry.py
Normal file
35
src/everos/core/observability/metrics/registry.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""Prometheus metrics registry singleton."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from prometheus_client import REGISTRY, CollectorRegistry, generate_latest
|
||||
|
||||
_registry: CollectorRegistry | None = None
|
||||
|
||||
|
||||
def get_metrics_registry() -> CollectorRegistry:
|
||||
"""Return the global metrics registry.
|
||||
|
||||
Defaults to ``prometheus_client.REGISTRY``.
|
||||
"""
|
||||
global _registry
|
||||
if _registry is None:
|
||||
_registry = REGISTRY
|
||||
return _registry
|
||||
|
||||
|
||||
def set_metrics_registry(registry: CollectorRegistry) -> None:
|
||||
"""Override the global registry (mainly for tests)."""
|
||||
global _registry
|
||||
_registry = registry
|
||||
|
||||
|
||||
def generate_metrics_response() -> bytes:
|
||||
"""Render the current registry into Prometheus exposition format."""
|
||||
return generate_latest(get_metrics_registry())
|
||||
|
||||
|
||||
def reset_metrics_registry() -> None:
|
||||
"""Reset the global registry override (mainly for tests)."""
|
||||
global _registry
|
||||
_registry = None
|
||||
32
src/everos/core/observability/tracing/__init__.py
Normal file
32
src/everos/core/observability/tracing/__init__.py
Normal file
@ -0,0 +1,32 @@
|
||||
"""Tracing utilities — W3C-compatible request id generation.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.core.observability.tracing import gen_request_id
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
|
||||
def gen_request_id() -> str:
|
||||
"""Generate a request id matching the W3C trace-context spec.
|
||||
|
||||
Returns 32 lowercase hex characters (128-bit, no prefix) — the same
|
||||
format as a W3C ``trace_id`` / OpenTelemetry trace identifier. Routes
|
||||
and services that mint a fresh request id (when one wasn't injected
|
||||
by upstream middleware) should call this helper rather than rolling
|
||||
their own uuid / prefix format, so the id layer stays compatible
|
||||
with OpenTelemetry exporters and standard APM tooling.
|
||||
|
||||
Example::
|
||||
|
||||
>>> rid = gen_request_id()
|
||||
>>> len(rid)
|
||||
32
|
||||
"""
|
||||
return uuid4().hex
|
||||
|
||||
|
||||
__all__ = ["gen_request_id"]
|
||||
106
src/everos/core/persistence/__init__.py
Normal file
106
src/everos/core/persistence/__init__.py
Normal file
@ -0,0 +1,106 @@
|
||||
"""Persistence primitives.
|
||||
|
||||
Read/write toolkit for markdown files, async wrappers around the SQLite
|
||||
system DB and LanceDB index, plus a memory-root path manager. Higher
|
||||
layers (``memory``, ``infra``) layer business semantics on top of these
|
||||
building blocks; this subpackage knows nothing about Entry / MemCell /
|
||||
Episode or any other business model.
|
||||
|
||||
External usage:
|
||||
from everos.core.persistence import (
|
||||
# Path manager + lock
|
||||
MemoryRoot, memory_root_lock, LockError,
|
||||
# Markdown IO toolkit
|
||||
MarkdownReader, MarkdownWriter, ParsedMarkdown, Entry,
|
||||
parse_frontmatter, dump_frontmatter, split_entries, find_entry,
|
||||
# Frontmatter schema chassis
|
||||
BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
|
||||
DailyLogPathMixin, SkillPathMixin,
|
||||
# Async SQLite (SQLModel / SA 2.0)
|
||||
create_system_engine, create_session_factory, session_scope,
|
||||
SQLModel, Field, Relationship, BaseTable, RepoBase,
|
||||
# Async LanceDB
|
||||
open_lancedb_connection, LanceModel, Vector, BaseLanceTable, touch,
|
||||
LanceRepoBase,
|
||||
)
|
||||
"""
|
||||
|
||||
from .lancedb import BaseLanceTable as BaseLanceTable
|
||||
from .lancedb import LanceModel as LanceModel
|
||||
from .lancedb import LanceRepoBase as LanceRepoBase
|
||||
from .lancedb import Vector as Vector
|
||||
from .lancedb import open_lancedb_connection as open_lancedb_connection
|
||||
from .lancedb import touch as touch
|
||||
from .locking import LockError as LockError
|
||||
from .locking import memory_root_lock as memory_root_lock
|
||||
from .markdown import AgentScopedFrontmatter as AgentScopedFrontmatter
|
||||
from .markdown import BaseFrontmatter as BaseFrontmatter
|
||||
from .markdown import DailyLogPathMixin as DailyLogPathMixin
|
||||
from .markdown import Entry as Entry
|
||||
from .markdown import EntryId as EntryId
|
||||
from .markdown import MarkdownReader as MarkdownReader
|
||||
from .markdown import MarkdownWriter as MarkdownWriter
|
||||
from .markdown import ParsedMarkdown as ParsedMarkdown
|
||||
from .markdown import SkillPathMixin as SkillPathMixin
|
||||
from .markdown import StructuredEntry as StructuredEntry
|
||||
from .markdown import UserScopedFrontmatter as UserScopedFrontmatter
|
||||
from .markdown import dump_frontmatter as dump_frontmatter
|
||||
from .markdown import find_entry as find_entry
|
||||
from .markdown import parse_frontmatter as parse_frontmatter
|
||||
from .markdown import parse_structured_entry as parse_structured_entry
|
||||
from .markdown import render_structured_entry as render_structured_entry
|
||||
from .markdown import split_entries as split_entries
|
||||
from .memory_root import MemoryRoot as MemoryRoot
|
||||
from .memory_root import app_dir_name as app_dir_name
|
||||
from .memory_root import app_id_from_dir as app_id_from_dir
|
||||
from .memory_root import project_dir_name as project_dir_name
|
||||
from .memory_root import project_id_from_dir as project_id_from_dir
|
||||
from .sqlite import BaseTable as BaseTable
|
||||
from .sqlite import Field as Field
|
||||
from .sqlite import Relationship as Relationship
|
||||
from .sqlite import RepoBase as RepoBase
|
||||
from .sqlite import SQLModel as SQLModel
|
||||
from .sqlite import create_session_factory as create_session_factory
|
||||
from .sqlite import create_system_engine as create_system_engine
|
||||
from .sqlite import session_scope as session_scope
|
||||
|
||||
__all__ = [
|
||||
"AgentScopedFrontmatter",
|
||||
"BaseFrontmatter",
|
||||
"BaseLanceTable",
|
||||
"BaseTable",
|
||||
"DailyLogPathMixin",
|
||||
"Entry",
|
||||
"EntryId",
|
||||
"Field",
|
||||
"LanceModel",
|
||||
"LanceRepoBase",
|
||||
"LockError",
|
||||
"MarkdownReader",
|
||||
"MarkdownWriter",
|
||||
"MemoryRoot",
|
||||
"ParsedMarkdown",
|
||||
"Relationship",
|
||||
"RepoBase",
|
||||
"SkillPathMixin",
|
||||
"StructuredEntry",
|
||||
"SQLModel",
|
||||
"UserScopedFrontmatter",
|
||||
"Vector",
|
||||
"app_dir_name",
|
||||
"app_id_from_dir",
|
||||
"create_session_factory",
|
||||
"create_system_engine",
|
||||
"dump_frontmatter",
|
||||
"find_entry",
|
||||
"memory_root_lock",
|
||||
"project_dir_name",
|
||||
"project_id_from_dir",
|
||||
"open_lancedb_connection",
|
||||
"parse_frontmatter",
|
||||
"parse_structured_entry",
|
||||
"render_structured_entry",
|
||||
"session_scope",
|
||||
"split_entries",
|
||||
"touch",
|
||||
]
|
||||
34
src/everos/core/persistence/lancedb/__init__.py
Normal file
34
src/everos/core/persistence/lancedb/__init__.py
Normal file
@ -0,0 +1,34 @@
|
||||
"""LanceDB async persistence.
|
||||
|
||||
External usage (connection):
|
||||
from everos.core.persistence.lancedb import open_lancedb_connection
|
||||
|
||||
External usage (ORM model basics — re-exported from lancedb.pydantic):
|
||||
from everos.core.persistence.lancedb import (
|
||||
LanceModel, Vector, BaseLanceTable, touch,
|
||||
)
|
||||
|
||||
External usage (generic CRUD repository base):
|
||||
from everos.core.persistence.lancedb import LanceRepoBase
|
||||
"""
|
||||
|
||||
# Re-export the LanceDB-flavoured Pydantic primitives so business code has a
|
||||
# single canonical entry point for table schemas.
|
||||
from lancedb.pydantic import LanceModel as LanceModel
|
||||
from lancedb.pydantic import Vector as Vector
|
||||
|
||||
from .base import BaseLanceTable as BaseLanceTable
|
||||
from .base import touch as touch
|
||||
from .connection import open_lancedb_connection as open_lancedb_connection
|
||||
from .repository import LanceDailyLogRepoBase as LanceDailyLogRepoBase
|
||||
from .repository import LanceRepoBase as LanceRepoBase
|
||||
|
||||
__all__ = [
|
||||
"BaseLanceTable",
|
||||
"LanceDailyLogRepoBase",
|
||||
"LanceModel",
|
||||
"LanceRepoBase",
|
||||
"Vector",
|
||||
"open_lancedb_connection",
|
||||
"touch",
|
||||
]
|
||||
158
src/everos/core/persistence/lancedb/base.py
Normal file
158
src/everos/core/persistence/lancedb/base.py
Normal file
@ -0,0 +1,158 @@
|
||||
"""Common LanceDB base for everos tables.
|
||||
|
||||
:class:`BaseLanceTable` adds ``created_at`` / ``updated_at`` columns and
|
||||
the :attr:`BM25_FIELDS` declaration + :meth:`ensure_fts_indexes`
|
||||
classmethod so each schema owns *both* its column shape **and** its
|
||||
BM25 index spec — repos stay focused on queries.
|
||||
|
||||
Note:
|
||||
LanceDB has no SQL ``onupdate`` equivalent — the application must
|
||||
explicitly set ``updated_at = get_utc_now()`` before calling
|
||||
:meth:`AsyncTable.update` / :meth:`AsyncTable.merge_insert`. The
|
||||
convenience :func:`touch` helper does this in one call.
|
||||
|
||||
**Every datetime column automatically carries ``tz=UTC`` in the
|
||||
Arrow schema.** LanceDB's Pydantic→PyArrow converter does not
|
||||
understand ``typing.Annotated`` metadata, so :data:`UtcDatetime`
|
||||
cannot be used as the field type annotation. Instead,
|
||||
:meth:`BaseLanceTable.to_arrow_schema` walks the inferred schema
|
||||
and rewrites every ``timestamp[us]`` (naive) column to
|
||||
``timestamp[us, tz=UTC]``. PyArrow then auto-``astimezone(UTC)``
|
||||
aware inputs on write **and** returns aware UTC datetimes on read
|
||||
— no per-table configuration, no caller-side ``ensure_utc``.
|
||||
|
||||
Subclasses just declare ``datetime`` fields normally::
|
||||
|
||||
class Episode(BaseLanceTable):
|
||||
timestamp: dt.datetime
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import ClassVar
|
||||
|
||||
import pyarrow as pa
|
||||
from lancedb import AsyncTable
|
||||
from lancedb.index import FTS
|
||||
from lancedb.pydantic import LanceModel
|
||||
from pydantic import Field
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
|
||||
|
||||
class BaseLanceTable(LanceModel):
|
||||
"""Pydantic / LanceDB base with ``created_at`` / ``updated_at`` and
|
||||
schema-level LanceDB metadata (``TABLE_NAME`` / ``BM25_FIELDS``).
|
||||
|
||||
The schema is the single source of truth for everything LanceDB
|
||||
needs to materialise the table: column shape, table name, vector
|
||||
dim (declared per-subclass), and which columns carry an FTS index.
|
||||
Repos read these ClassVars; they do not duplicate them.
|
||||
"""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = ""
|
||||
"""LanceDB table name. Business schemas must override (e.g.
|
||||
``"episode"``). Left empty on chassis / test schemas that construct
|
||||
their table inline."""
|
||||
|
||||
BM25_FIELDS: ClassVar[list[str]] = []
|
||||
"""Columns to build LanceDB FTS (BM25) indexes on.
|
||||
|
||||
Each declared column must already exist as a ``str`` (or
|
||||
``str | None``) field on the schema. Tokens are assumed to be
|
||||
**app-layer pre-tokenised** (space-joined); the FTS index uses
|
||||
``base_tokenizer="whitespace"`` so segmentation is owned by the
|
||||
app layer (:class:`JiebaTokenizer`). The same boundary owns stop-
|
||||
word filtering (English + Chinese); FTS-side ``remove_stop_words``
|
||||
is OFF. FTS *does* keep lightweight English-aware normalisation
|
||||
(``lower_case`` / ``stem`` / ``ascii_folding``) as a belt-and-
|
||||
braces layer on the same English tokens that survive jieba.
|
||||
See ``17_lancedb_tables_design.md`` §2.4.1 and
|
||||
:meth:`ensure_fts_indexes` below for the exact knobs."""
|
||||
|
||||
created_at: dt.datetime = Field(default_factory=get_utc_now)
|
||||
updated_at: dt.datetime = Field(default_factory=get_utc_now)
|
||||
|
||||
@classmethod
|
||||
def to_arrow_schema(cls) -> pa.Schema:
|
||||
"""Patch the default Arrow schema: force every timestamp to ``tz=UTC``.
|
||||
|
||||
The base ``LanceModel.to_arrow_schema()`` infers Arrow types from
|
||||
Pydantic field annotations and emits naive ``timestamp[us]`` for
|
||||
every :class:`datetime.datetime` column. We rewrite **every**
|
||||
timestamp column to ``timestamp[us, tz=UTC]``:
|
||||
|
||||
* **on write** — PyArrow ``astimezone(UTC)``-s aware input
|
||||
automatically before serialising the i64 epoch micros.
|
||||
* **on read** — PyArrow returns aware UTC datetimes.
|
||||
|
||||
Zero per-table configuration. The rewrite also **overrides any
|
||||
non-UTC tz** a subclass might have declared explicitly, because
|
||||
project convention is: storage is always UTC. Mixed-tz columns
|
||||
would violate the two-zone discipline (see
|
||||
``docs/datetime.md``); enforcing UTC at the schema level closes
|
||||
that loophole.
|
||||
"""
|
||||
base = super().to_arrow_schema()
|
||||
return pa.schema(
|
||||
[
|
||||
pa.field(f.name, pa.timestamp("us", tz="UTC"), nullable=f.nullable)
|
||||
if pa.types.is_timestamp(f.type)
|
||||
else f
|
||||
for f in base
|
||||
]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def ensure_fts_indexes(cls, table: AsyncTable) -> None:
|
||||
"""Create FTS indexes on every column in :attr:`BM25_FIELDS`.
|
||||
|
||||
Idempotent: columns that already have an index are skipped, so
|
||||
this is safe to call on every startup. The FTS config is fixed
|
||||
to the app-layer pre-tokenisation + LanceDB normalisation
|
||||
convention (designed for **multilingual mixed content**):
|
||||
|
||||
- ``base_tokenizer="whitespace"`` — split on the spaces our
|
||||
app-layer tokenizer provider already inserted between tokens.
|
||||
- ``lower_case=True`` — Unicode-aware case-fold (English A→a;
|
||||
no-op on CJK characters).
|
||||
- ``stem=True`` — Porter / Snowball English stemmer per
|
||||
``language="English"`` (tantivy default). CJK tokens have no
|
||||
stemmer and pass through untouched.
|
||||
- ``remove_stop_words=False`` — **stop-word removal is owned by
|
||||
the app-layer** (:class:`JiebaTokenizer`), which already drops
|
||||
both Chinese and English stop-words before tokens reach the
|
||||
FTS index. Keeping FTS-side filtering off avoids double-
|
||||
filtering and a divided source of truth.
|
||||
- ``ascii_folding=True`` — strips diacritics (é→e) on Latin
|
||||
characters; no-op on CJK.
|
||||
- ``with_position=True`` — enables phrase queries.
|
||||
|
||||
Subclasses normally do not need to override this — declaring
|
||||
:attr:`BM25_FIELDS` is enough.
|
||||
"""
|
||||
if not cls.BM25_FIELDS:
|
||||
return
|
||||
indices = await table.list_indices()
|
||||
indexed_cols = {col for idx in indices for col in (idx.columns or [])}
|
||||
for field in cls.BM25_FIELDS:
|
||||
if field in indexed_cols:
|
||||
continue
|
||||
await table.create_index(
|
||||
column=field,
|
||||
config=FTS(
|
||||
with_position=True,
|
||||
base_tokenizer="whitespace",
|
||||
lower_case=True,
|
||||
stem=True,
|
||||
remove_stop_words=False,
|
||||
ascii_folding=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def touch(record: BaseLanceTable) -> BaseLanceTable:
|
||||
"""Set ``record.updated_at = now`` and return the record (chainable)."""
|
||||
record.updated_at = get_utc_now()
|
||||
return record
|
||||
68
src/everos/core/persistence/lancedb/connection.py
Normal file
68
src/everos/core/persistence/lancedb/connection.py
Normal file
@ -0,0 +1,68 @@
|
||||
"""Async LanceDB connection factory.
|
||||
|
||||
LanceDB does not live inside the SQLAlchemy ecosystem; it has its own
|
||||
``connect_async`` returning :class:`lancedb.AsyncConnection`. This module
|
||||
is a thin wrapper that:
|
||||
|
||||
1. ensures the lancedb root directory exists
|
||||
2. converts ``LanceDBSettings.read_consistency_seconds`` into the
|
||||
:class:`datetime.timedelta` value LanceDB expects
|
||||
3. installs a capped :class:`lancedb.Session` so the global index
|
||||
cache cannot grow unbounded and exhaust file descriptors
|
||||
(see :attr:`LanceDBSettings.index_cache_size_bytes` for the
|
||||
full rationale)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from pathlib import Path
|
||||
|
||||
import lancedb
|
||||
from lancedb import AsyncConnection
|
||||
|
||||
from everos.config import LanceDBSettings
|
||||
|
||||
|
||||
async def open_lancedb_connection(
|
||||
lancedb_dir: Path,
|
||||
lancedb_settings: LanceDBSettings,
|
||||
) -> AsyncConnection:
|
||||
"""Open an async LanceDB connection rooted at ``lancedb_dir``.
|
||||
|
||||
Args:
|
||||
lancedb_dir: Filesystem path to the LanceDB root (typically
|
||||
``MemoryRoot.lancedb_dir``). Created if missing.
|
||||
lancedb_settings: Tunables; the ``read_consistency_seconds`` field
|
||||
is converted to a :class:`~datetime.timedelta`, and
|
||||
``index_cache_size_bytes`` caps the global index cache.
|
||||
|
||||
Returns:
|
||||
An :class:`AsyncConnection` ready for table operations.
|
||||
"""
|
||||
# mkdir is a microsecond-fast syscall and only fires on first connect;
|
||||
# not worth pulling in anyio.Path / aiofiles for it.
|
||||
lancedb_dir.mkdir(parents=True, exist_ok=True) # noqa: ASYNC240
|
||||
|
||||
interval: dt.timedelta | None = None
|
||||
if lancedb_settings.read_consistency_seconds is not None:
|
||||
interval = dt.timedelta(seconds=lancedb_settings.read_consistency_seconds)
|
||||
|
||||
# Bound the index cache so its readers (each one holds the FDs of
|
||||
# an opened ``_indices/<uuid>/...`` directory) get LRU-evicted
|
||||
# rather than leaking. Without this, a long-running daemon's FD
|
||||
# count grows monotonically until ``EMFILE``. The metadata cache
|
||||
# is intentionally left at the lancedb default (unbounded): it
|
||||
# holds parsed in-memory manifests with zero FD pressure, and a
|
||||
# cap there would just thrash. See ``LanceDBSettings`` for the
|
||||
# measurement that picked the default size.
|
||||
session = lancedb.Session(
|
||||
index_cache_size_bytes=lancedb_settings.index_cache_size_bytes,
|
||||
metadata_cache_size_bytes=None,
|
||||
)
|
||||
|
||||
return await lancedb.connect_async(
|
||||
str(lancedb_dir),
|
||||
read_consistency_interval=interval,
|
||||
session=session,
|
||||
)
|
||||
530
src/everos/core/persistence/lancedb/repository.py
Normal file
530
src/everos/core/persistence/lancedb/repository.py
Normal file
@ -0,0 +1,530 @@
|
||||
"""Generic CRUD repository for LanceDB-backed tables.
|
||||
|
||||
``LanceRepoBase`` mirrors the SQLite ``RepoBase`` shape: a pure generic
|
||||
CRUD helper that knows nothing about a storage runtime. Concrete repos
|
||||
either pass an :class:`AsyncTable` explicitly (typical in tests) or
|
||||
override :meth:`_table_lookup` to pull the cached table from their
|
||||
storage manager (typical in
|
||||
:mod:`everos.infra.persistence.lancedb.repos`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime as dt
|
||||
from collections.abc import Sequence
|
||||
from typing import Any, ClassVar
|
||||
|
||||
from lancedb import AsyncTable
|
||||
|
||||
from everos.core.observability.logging import get_logger
|
||||
|
||||
from .base import BaseLanceTable
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _q(value: str) -> str:
|
||||
"""Escape single quotes for a LanceDB SQL-like ``where`` predicate.
|
||||
|
||||
LanceDB has no parameterised query API; predicates are strings.
|
||||
Doubling the quote (``'`` → ``''``) is the SQL-standard way to keep
|
||||
a literal single quote inside a single-quoted string. everos's PK
|
||||
convention (``<owner_id>_<entry_id>``) never carries quotes — this
|
||||
is defensive.
|
||||
"""
|
||||
return value.replace("'", "''")
|
||||
|
||||
|
||||
class LanceRepoBase[T: BaseLanceTable]:
|
||||
"""Generic CRUD repository for one LanceDB table.
|
||||
|
||||
Subclass and bind to a schema. Two ways to provide the table:
|
||||
|
||||
1. **Explicit (tests / DI)** — pass it to ``__init__``::
|
||||
|
||||
repo = EpisodeRepo(table)
|
||||
|
||||
2. **Lazy hook (production singletons)** — override
|
||||
:meth:`_table_lookup` so the repo can be instantiated as a
|
||||
module-level singleton with no live connection yet::
|
||||
|
||||
class _EpisodeRepo(LanceRepoBase[Episode]):
|
||||
schema = Episode
|
||||
|
||||
async def _table_lookup(self):
|
||||
from everos.infra.persistence.lancedb.lancedb_manager import (
|
||||
get_table,
|
||||
)
|
||||
return await get_table(self.schema.TABLE_NAME, self.schema)
|
||||
|
||||
episode_repo = _EpisodeRepo()
|
||||
await episode_repo.add([Episode(text=..., vector=[...])])
|
||||
|
||||
The LanceDB table name lives on the schema (``BaseLanceTable.TABLE_NAME``)
|
||||
so every LanceDB-side metadatum — column shape, table name,
|
||||
vector dim, BM25 index spec — sits in one place. ``table_name``
|
||||
here is a thin pass-through; subclasses normally do **not**
|
||||
override it.
|
||||
|
||||
Write paths (``add`` / ``upsert`` / ``delete`` / ``delete_by_md_path``)
|
||||
are serialised by a per-``table_name`` :class:`asyncio.Lock`. LanceDB's
|
||||
``merge_insert`` is a read-modify-write at the storage layer with no
|
||||
application-visible OCC contract — two concurrent calls against the
|
||||
same table can race on the version manifest and lose updates even
|
||||
when the row sets are disjoint (observed: cascade worker
|
||||
``asyncio.gather`` over a batch of ``user_profile`` rows where one
|
||||
write disappears). Serialising on the table name closes that window;
|
||||
reads stay unlocked so search QPS is not impacted by writers.
|
||||
|
||||
Locks live in a class-level dict keyed by table name and are never
|
||||
evicted (mirrors :mod:`everos.memory.strategies._partition_locks`
|
||||
on bpo-28427 — a lock with pending waiters must outlive any dict
|
||||
entry that points to it).
|
||||
"""
|
||||
|
||||
schema: type[T]
|
||||
|
||||
_table_locks: ClassVar[dict[str, asyncio.Lock]] = {}
|
||||
"""Per-table-name write lock pool (process-wide, lazily populated)."""
|
||||
|
||||
@property
|
||||
def table_name(self) -> str:
|
||||
"""LanceDB table name, resolved from :attr:`schema.TABLE_NAME`."""
|
||||
return self.schema.TABLE_NAME
|
||||
|
||||
@classmethod
|
||||
def _write_lock(cls, table_name: str) -> asyncio.Lock:
|
||||
"""Return the write lock for ``table_name``; create on first use.
|
||||
|
||||
``dict.setdefault`` is atomic under single-threaded asyncio (no
|
||||
``await`` between check and insert), so no meta-lock is needed.
|
||||
"""
|
||||
return cls._table_locks.setdefault(table_name, asyncio.Lock())
|
||||
|
||||
@classmethod
|
||||
def _reset_locks_for_tests(cls) -> None:
|
||||
"""Test-only: drop the write-lock pool.
|
||||
|
||||
``asyncio.Lock`` binds to the current event loop on first
|
||||
``acquire()``; pytest-asyncio creates a fresh loop per test, so
|
||||
a module-level lock surviving across tests fails with "bound to
|
||||
a different event loop". The production cascade worker runs on
|
||||
one loop forever and does not need this hook. Mirrors
|
||||
:func:`everos.memory.strategies._partition_locks._reset_for_tests`.
|
||||
"""
|
||||
cls._table_locks.clear()
|
||||
|
||||
def __init__(self, table: AsyncTable | None = None) -> None:
|
||||
"""Bind to a table directly; if ``None``, defer to ``_table_lookup``."""
|
||||
self._table_override = table
|
||||
|
||||
async def _table_lookup(self) -> AsyncTable:
|
||||
"""Resolve the table on first use. Override in subclass.
|
||||
|
||||
``LanceRepoBase`` itself has no idea where the runtime singleton
|
||||
lives. The default raises so a missing override is loud rather
|
||||
than silently broken.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{type(self).__name__}: pass table= to __init__ "
|
||||
"or override _table_lookup() to wire the storage manager."
|
||||
)
|
||||
|
||||
async def _table(self) -> AsyncTable:
|
||||
if self._table_override is not None:
|
||||
return self._table_override
|
||||
return await self._table_lookup()
|
||||
|
||||
# ── Create ─────────────────────────────────────────────────────────────
|
||||
|
||||
async def add(self, records: Sequence[T]) -> None:
|
||||
"""Insert one or more records."""
|
||||
table = await self._table()
|
||||
async with self._write_lock(self.table_name):
|
||||
await table.add(list(records))
|
||||
|
||||
# ── Upsert ─────────────────────────────────────────────────────────────
|
||||
|
||||
async def upsert(
|
||||
self,
|
||||
records: Sequence[T],
|
||||
*,
|
||||
by: str = "id",
|
||||
) -> None:
|
||||
"""Upsert records keyed by ``by`` (PK column, default ``"id"``).
|
||||
|
||||
Wraps LanceDB's ``merge_insert(on=...)`` fluent builder with the
|
||||
equivalent of ``INSERT ... ON CONFLICT(by) DO UPDATE`` — matching
|
||||
rows are replaced wholesale, non-matching rows inserted.
|
||||
|
||||
Cascade uses this when reconciling md → LanceDB: an entry seen
|
||||
for the first time inserts; an entry that was edited in md
|
||||
updates its existing row.
|
||||
"""
|
||||
table = await self._table()
|
||||
async with self._write_lock(self.table_name):
|
||||
await (
|
||||
table.merge_insert(by)
|
||||
.when_matched_update_all()
|
||||
.when_not_matched_insert_all()
|
||||
.execute(list(records))
|
||||
)
|
||||
|
||||
# ── Maintenance ────────────────────────────────────────────────────────
|
||||
|
||||
async def optimize(self, *, cleanup_older_than: dt.timedelta | None = None) -> None:
|
||||
"""Compact fragments + merge new data into the FTS / vector indexes.
|
||||
|
||||
LanceDB's ``merge_insert`` writes new data into a fresh fragment.
|
||||
The FTS (BM25) index built by :meth:`ensure_fts_indexes` only
|
||||
covers fragments visible at index-build time, so rows written
|
||||
after the initial build can become **invisible to BM25 queries**
|
||||
until ``optimize()`` runs and merges those fragments into the
|
||||
index segment that the query engine reads.
|
||||
|
||||
Symptom this guards against (verified on LoCoMo conv0): after
|
||||
steady-state cascade ingest, ``nearest_to_text("any_common_word")``
|
||||
returns 0 hits even though the column literally contains the
|
||||
token in 100% of rows — the new fragments simply hadn't been
|
||||
indexed.
|
||||
|
||||
Cascade triggers this through a per-kind throttle + trailing
|
||||
edge scheduler (``CascadeWorker._schedule_optimize``): at most
|
||||
one run per ~1s window per kind, decoupled from the drain
|
||||
loop, with a 60s heartbeat sweep as a safety net. Cost is
|
||||
O(N) data-rewrite per optimized fragment; the throttle is how
|
||||
we cap it under sustained write pressure.
|
||||
|
||||
Args:
|
||||
cleanup_older_than: When set, also prune (physically delete)
|
||||
files belonging to dataset versions older than this
|
||||
interval. ``None`` (default) compacts only — historical
|
||||
manifests, replaced data fragments, and stale index
|
||||
UUID files are kept on disk forever, which inflates the
|
||||
file count (and FD usage at scan time) without bound.
|
||||
Cascade passes a non-None value on a slower beat
|
||||
(``CascadeWorker._optimize_prune_interval``) so the
|
||||
hot drain path stays cheap. Note: this does *not*
|
||||
shrink **active** index internals (FTS ``part_N`` count
|
||||
or vector index UUID count) — those only collapse via
|
||||
``drop_index + create_index``, which is not done here.
|
||||
"""
|
||||
table = await self._table()
|
||||
await table.optimize(cleanup_older_than=cleanup_older_than)
|
||||
|
||||
async def rebuild_indexes(self) -> None:
|
||||
"""Drop and re-create every index on this table.
|
||||
|
||||
**Why this exists** — workaround for an upstream Python API gap:
|
||||
|
||||
Lance's Rust ``OptimizeOptions`` has a ``num_indices_to_merge``
|
||||
knob (default 1) that bounds the number of active index UUIDs
|
||||
per column. With ``Some(1)``, every ``optimize_indices()`` call
|
||||
merges its delta into the base — active UUID count stays at 1.
|
||||
|
||||
Two problems block us from using it from the application layer:
|
||||
|
||||
1. ``lancedb.AsyncTable.optimize()`` does **not expose** this
|
||||
parameter (verified on lancedb main 2026-05-28). It forwards
|
||||
only ``cleanup_since_ms`` and ``delete_unverified`` to Rust.
|
||||
2. Even calling Lance directly via ``pylance``, the merge
|
||||
behaviour itself is buggy on ``lance crate 4.0`` (what
|
||||
lancedb 0.30.2 embeds) — ``num_indices_to_merge=1`` does
|
||||
nothing. Fix landed in ``lance 7.x``, but ``pylance 7.x``
|
||||
can not collapse indexes on a ``lance 4.0``-format dataset
|
||||
(verified by experiment).
|
||||
|
||||
So in our current stack there is **no application-level path**
|
||||
to bound active index UUID growth. ``optimize()`` keeps
|
||||
accumulating one new UUID (vector) / one new ``part_N`` (FTS)
|
||||
per call.
|
||||
|
||||
This method is the workaround: drop every existing index and
|
||||
rebuild from the schema's ``ensure_fts_indexes`` contract. The
|
||||
rebuild is **O(N) full retrain** but cheap in practice (~0.3s
|
||||
for 50k rows × 2 FTS columns on local SSD), and during the
|
||||
window LanceDB transparently falls back to brute-force scan so
|
||||
queries and writes stay available.
|
||||
|
||||
**Cadence** — :class:`CascadeWorker` runs this on a slow loop
|
||||
(default 12h per kind). Frequency is bounded by the rebuild
|
||||
cost, not by correctness — even daily is fine functionally;
|
||||
12h is a conservative pick to keep file/UUID counts well below
|
||||
any FD ceiling under steady-state ingest.
|
||||
|
||||
**When to remove** — once lancedb exposes ``num_indices_to_merge``
|
||||
on the async Python API **and** the embedded ``lance crate``
|
||||
ships the working merge implementation, delete this method and
|
||||
switch to ``optimize(num_indices_to_merge=1)`` in the regular
|
||||
``optimize()`` path. Tracking issues / context:
|
||||
|
||||
- https://github.com/lancedb/lancedb/issues/2193
|
||||
- https://github.com/lancedb/lancedb/issues/3177
|
||||
- https://github.com/lance-format/lance/pull/6711 (partial fix
|
||||
in lance v7.0.0)
|
||||
- https://docs.rs/lancedb/latest/lancedb/table/struct.OptimizeOptions.html
|
||||
"""
|
||||
table = await self._table()
|
||||
async with self._write_lock(self.table_name):
|
||||
for idx in await table.list_indices():
|
||||
await table.drop_index(idx.name)
|
||||
await self.schema.ensure_fts_indexes(table)
|
||||
|
||||
# ── Read ───────────────────────────────────────────────────────────────
|
||||
|
||||
async def count(self) -> int:
|
||||
"""Total row count."""
|
||||
table = await self._table()
|
||||
return await table.count_rows()
|
||||
|
||||
async def get_by_id(
|
||||
self,
|
||||
id_value: str,
|
||||
*,
|
||||
id_field: str = "id",
|
||||
) -> T | None:
|
||||
"""Fetch one row by scalar PK; ``None`` if missing.
|
||||
|
||||
Uses LanceDB scalar filter ``<id_field> = '<id_value>'``. Single
|
||||
quotes in ``id_value`` are doubled to avoid breaking the SQL-like
|
||||
predicate; everos's PK convention is ``<owner_id>_<entry_id>``
|
||||
which never contains quotes, so the escape is defensive.
|
||||
"""
|
||||
table = await self._table()
|
||||
rows = (
|
||||
await table.query()
|
||||
.where(f"{id_field} = '{_q(id_value)}'")
|
||||
.limit(1)
|
||||
.to_list()
|
||||
)
|
||||
if not rows:
|
||||
return None
|
||||
return self.schema.model_validate(rows[0])
|
||||
|
||||
async def find_where(
|
||||
self,
|
||||
where: str,
|
||||
*,
|
||||
limit: int = 100,
|
||||
) -> list[T]:
|
||||
"""Scalar query returning *typed* schema instances.
|
||||
|
||||
Like :meth:`search` but returns ``list[T]`` rather than raw
|
||||
LanceDB row dicts. No vector ANN; pure scalar filter only.
|
||||
Use :meth:`search` when you need ``_distance`` or want to mix
|
||||
ANN with filters.
|
||||
"""
|
||||
table = await self._table()
|
||||
rows = await table.query().where(where).limit(limit).to_list()
|
||||
return [self.schema.model_validate(r) for r in rows]
|
||||
|
||||
async def find_one_where(self, where: str) -> T | None:
|
||||
"""Single-row variant of :meth:`find_where` (``None`` if no match)."""
|
||||
rows = await self.find_where(where, limit=1)
|
||||
return rows[0] if rows else None
|
||||
|
||||
async def find_where_paginated(
|
||||
self,
|
||||
where: str,
|
||||
*,
|
||||
sort_by: str,
|
||||
descending: bool = True,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
max_fetch: int = 20000,
|
||||
) -> tuple[list[T], int]:
|
||||
"""Paginated scalar query with in-memory sort.
|
||||
|
||||
LanceDB has no native ``ORDER BY``. The chassis fetches up to
|
||||
``max_fetch`` rows matching ``where``, sorts the resulting Arrow
|
||||
table by ``sort_by``, then slices ``page`` × ``page_size``. The
|
||||
*true* row count of the predicate is returned alongside the
|
||||
page so callers can render pagination controls without a second
|
||||
query.
|
||||
|
||||
Args:
|
||||
where: SQL-like scalar predicate. Required (no implicit
|
||||
full-table scan from ``find_where_paginated``).
|
||||
sort_by: Column name to sort the result set by.
|
||||
descending: ``True`` (default) → newest first; ``False`` →
|
||||
ascending.
|
||||
page: 1-indexed page number.
|
||||
page_size: Rows per page.
|
||||
max_fetch: Cap on rows pulled before the in-memory sort.
|
||||
When the predicate matches more rows than this cap the
|
||||
page is sorted over an *arbitrary* prefix and the page
|
||||
contents are only approximately correct — the chassis
|
||||
emits a warning so the caller learns about the
|
||||
truncation.
|
||||
|
||||
Returns:
|
||||
``(rows, total)`` — ``rows`` is the typed page,
|
||||
``total`` is ``count_rows(filter=where)`` (the predicate's
|
||||
true match count, regardless of ``max_fetch``).
|
||||
"""
|
||||
table = await self._table()
|
||||
total = await table.count_rows(filter=where)
|
||||
if total > max_fetch:
|
||||
logger.warning(
|
||||
"find_where_paginated truncated",
|
||||
extra={
|
||||
"table": self.table_name,
|
||||
"where": where,
|
||||
"total": total,
|
||||
"max_fetch": max_fetch,
|
||||
},
|
||||
)
|
||||
arrow_tbl = await table.query().where(where).limit(max_fetch).to_arrow()
|
||||
order = "descending" if descending else "ascending"
|
||||
arrow_tbl = arrow_tbl.sort_by([(sort_by, order)])
|
||||
offset = (page - 1) * page_size
|
||||
page_rows = arrow_tbl.slice(offset, page_size)
|
||||
return (
|
||||
[self.schema.model_validate(r) for r in page_rows.to_pylist()],
|
||||
total,
|
||||
)
|
||||
|
||||
async def find_by_owner(
|
||||
self,
|
||||
owner_id: str,
|
||||
*,
|
||||
limit: int = 100,
|
||||
) -> list[T]:
|
||||
"""Fetch rows by ``owner_id`` (5 business tables share this column)."""
|
||||
return await self.find_where(
|
||||
f"owner_id = '{_q(owner_id)}'",
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
async def find_by_md_path(self, md_path: str) -> T | None:
|
||||
"""Reverse-lookup from md path (cascade maps md edit → row)."""
|
||||
return await self.find_one_where(f"md_path = '{_q(md_path)}'")
|
||||
|
||||
async def search(
|
||||
self,
|
||||
*,
|
||||
vector: Sequence[float] | None = None,
|
||||
where: str | None = None,
|
||||
limit: int = 10,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Hybrid search: optional vector ANN + scalar SQL-like predicate.
|
||||
|
||||
Args:
|
||||
vector: Embedding to find nearest rows for; ``None`` skips ANN.
|
||||
where: SQL-like predicate (e.g. ``"tags = 'meeting'"``).
|
||||
limit: Max rows.
|
||||
|
||||
Returns:
|
||||
List of row dicts (LanceDB native shape — fields depend on
|
||||
``schema``; ``_distance`` added when ``vector`` is given).
|
||||
"""
|
||||
table = await self._table()
|
||||
q = table.query()
|
||||
if vector is not None:
|
||||
q = q.nearest_to(list(vector))
|
||||
if where is not None:
|
||||
q = q.where(where)
|
||||
return await q.limit(limit).to_list()
|
||||
|
||||
# ── Delete ─────────────────────────────────────────────────────────────
|
||||
|
||||
async def delete(self, predicate: str) -> None:
|
||||
"""Delete rows matching a SQL-like predicate."""
|
||||
table = await self._table()
|
||||
async with self._write_lock(self.table_name):
|
||||
await table.delete(predicate)
|
||||
|
||||
async def delete_by_md_path(self, md_path: str) -> int:
|
||||
"""Delete every row whose ``md_path`` matches; return rows deleted.
|
||||
|
||||
Cascade handler calls this when an md file is removed on disk
|
||||
(or when reverse-reconcile discovers an orphaned LanceDB row).
|
||||
Single quotes in ``md_path`` are doubled defensively.
|
||||
"""
|
||||
table = await self._table()
|
||||
async with self._write_lock(self.table_name):
|
||||
result = await table.delete(f"md_path = '{_q(md_path)}'")
|
||||
return int(result.num_deleted_rows)
|
||||
|
||||
|
||||
class LanceDailyLogRepoBase[T: BaseLanceTable](LanceRepoBase[T]):
|
||||
"""LanceRepoBase + queries unique to daily-log tables.
|
||||
|
||||
Daily-log tables (``episode`` / ``atomic_fact`` / ``foresight`` /
|
||||
``agent_case``) share a fixed schema slice: ``entry_id`` (md seq
|
||||
id), ``session_id`` (conversation scope), and ``parent_type`` /
|
||||
``parent_id`` (record lineage). The queries below compose those
|
||||
columns; ``agent_skill`` is *not* a daily-log (it is a named
|
||||
single-file entity) and uses :class:`LanceRepoBase` directly.
|
||||
"""
|
||||
|
||||
async def find_by_owner_entry(
|
||||
self,
|
||||
owner_id: str,
|
||||
entry_id: str,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> T | None:
|
||||
"""Single point-query by ``(app, project, owner_id, entry_id)``.
|
||||
|
||||
``entry_id`` is only unique within a (app, project, owner) scope —
|
||||
the same ``ac_<date>_<seq>`` recurs in another space — so the
|
||||
scope segments are part of the predicate to avoid a cross-space hit.
|
||||
"""
|
||||
return await self.find_one_where(
|
||||
f"owner_id = '{_q(owner_id)}' AND entry_id = '{_q(entry_id)}' "
|
||||
f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'"
|
||||
)
|
||||
|
||||
async def find_by_owner_entries(
|
||||
self,
|
||||
owner_id: str,
|
||||
entry_ids: Sequence[str],
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> list[T]:
|
||||
"""Bulk point-query by ``(app, project, owner_id, entry_id IN ...)``.
|
||||
|
||||
Empty ``entry_ids`` short-circuits to ``[]`` rather than emit a
|
||||
``WHERE entry_id IN ()`` predicate (LanceDB rejects empty
|
||||
tuples). The query's ``limit`` is bound to ``len(entry_ids)``
|
||||
because at most one row per id can exist under one (app, project,
|
||||
owner) scope.
|
||||
"""
|
||||
if not entry_ids:
|
||||
return []
|
||||
quoted = ", ".join(f"'{_q(eid)}'" for eid in entry_ids)
|
||||
return await self.find_where(
|
||||
f"owner_id = '{_q(owner_id)}' AND entry_id IN ({quoted}) "
|
||||
f"AND app_id = '{_q(app_id)}' AND project_id = '{_q(project_id)}'",
|
||||
limit=len(entry_ids),
|
||||
)
|
||||
|
||||
async def find_by_session(
|
||||
self,
|
||||
owner_id: str,
|
||||
session_id: str,
|
||||
*,
|
||||
limit: int = 100,
|
||||
) -> list[T]:
|
||||
"""Every row in one conversation ``session_id`` under ``owner_id``."""
|
||||
return await self.find_where(
|
||||
f"owner_id = '{_q(owner_id)}' AND session_id = '{_q(session_id)}'",
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
async def find_by_parent(
|
||||
self,
|
||||
parent_type: str,
|
||||
parent_id: str,
|
||||
*,
|
||||
limit: int = 100,
|
||||
) -> list[T]:
|
||||
"""Every row whose parent matches ``(parent_type, parent_id)``."""
|
||||
return await self.find_where(
|
||||
f"parent_type = '{_q(parent_type)}' AND parent_id = '{_q(parent_id)}'",
|
||||
limit=limit,
|
||||
)
|
||||
76
src/everos/core/persistence/locking.py
Normal file
76
src/everos/core/persistence/locking.py
Normal file
@ -0,0 +1,76 @@
|
||||
"""Process-wide exclusive lock on a memory-root.
|
||||
|
||||
Uses ``fcntl.flock`` (POSIX advisory locking, available on Linux + macOS;
|
||||
Windows is not supported — see project README on platform scope). The
|
||||
public surface is an :func:`contextlib.asynccontextmanager` so callers
|
||||
use ``async with memory_root_lock(mr):``; the underlying syscalls have
|
||||
no async equivalent so they run in a worker thread via
|
||||
:func:`anyio.to_thread.run_sync`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import fcntl
|
||||
import os
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import anyio
|
||||
|
||||
from .memory_root import MemoryRoot
|
||||
|
||||
|
||||
class LockError(RuntimeError):
|
||||
"""Raised when the memory-root lock cannot be acquired in non-blocking mode."""
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def memory_root_lock(
|
||||
memory_root: MemoryRoot,
|
||||
*,
|
||||
blocking: bool = True,
|
||||
) -> AsyncIterator[None]:
|
||||
"""Acquire an exclusive process lock on the memory-root.
|
||||
|
||||
Args:
|
||||
memory_root: The memory-root to lock. The lock anchor file
|
||||
(``<root>/.lock``) is created on first use.
|
||||
blocking: If ``True`` (default), wait until the lock is free. If
|
||||
``False``, raise :class:`LockError` immediately when another
|
||||
process holds it.
|
||||
|
||||
Raises:
|
||||
LockError: When ``blocking=False`` and the lock is already held.
|
||||
"""
|
||||
await anyio.Path(memory_root.root).mkdir(parents=True, exist_ok=True)
|
||||
lock_path = memory_root.lock_file
|
||||
|
||||
# Open the anchor file (create on first use). The fd, not the path, is
|
||||
# what fcntl operates on. ``os.open`` is microsecond-fast but offloaded
|
||||
# for consistency with the rest of the lock acquisition flow.
|
||||
fd = await anyio.to_thread.run_sync(
|
||||
lambda: os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o644)
|
||||
)
|
||||
|
||||
flags = fcntl.LOCK_EX
|
||||
if not blocking:
|
||||
flags |= fcntl.LOCK_NB
|
||||
|
||||
try:
|
||||
await anyio.to_thread.run_sync(fcntl.flock, fd, flags)
|
||||
except BlockingIOError as exc:
|
||||
await anyio.to_thread.run_sync(os.close, fd)
|
||||
raise LockError(
|
||||
f"another process already holds the memory-root lock at {lock_path}"
|
||||
) from exc
|
||||
|
||||
# Lock acquired — release + close strictly on exit. The BlockingIOError
|
||||
# path above already cleaned up its fd, so it must NOT enter this
|
||||
# finally block (otherwise we'd double-close).
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
try:
|
||||
await anyio.to_thread.run_sync(fcntl.flock, fd, fcntl.LOCK_UN)
|
||||
finally:
|
||||
await anyio.to_thread.run_sync(os.close, fd)
|
||||
62
src/everos/core/persistence/markdown/__init__.py
Normal file
62
src/everos/core/persistence/markdown/__init__.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""Markdown file IO toolkit.
|
||||
|
||||
Atomic write + YAML frontmatter parse/dump + entry marker parse +
|
||||
audit-form structured-entry parsing. Knows nothing about business
|
||||
models (no MemCell / Episode); the :class:`Entry` here is a
|
||||
*marker-delimited* span within a markdown body, not a business record.
|
||||
|
||||
External usage (IO + parse):
|
||||
from everos.core.persistence.markdown import (
|
||||
Entry, EntryId, StructuredEntry,
|
||||
MarkdownReader, MarkdownWriter, ParsedMarkdown,
|
||||
parse_frontmatter, dump_frontmatter,
|
||||
split_entries, find_entry,
|
||||
parse_structured_entry, render_structured_entry,
|
||||
)
|
||||
|
||||
External usage (frontmatter schema chassis):
|
||||
from everos.core.persistence.markdown import (
|
||||
BaseFrontmatter, UserScopedFrontmatter, AgentScopedFrontmatter,
|
||||
DailyLogPathMixin, SkillPathMixin, ProfilePathMixin,
|
||||
)
|
||||
"""
|
||||
|
||||
from .entries import Entry as Entry
|
||||
from .entries import EntryId as EntryId
|
||||
from .entries import StructuredEntry as StructuredEntry
|
||||
from .entries import find_entry as find_entry
|
||||
from .entries import parse_structured_entry as parse_structured_entry
|
||||
from .entries import render_structured_entry as render_structured_entry
|
||||
from .entries import split_entries as split_entries
|
||||
from .frontmatter import AgentScopedFrontmatter as AgentScopedFrontmatter
|
||||
from .frontmatter import BaseFrontmatter as BaseFrontmatter
|
||||
from .frontmatter import DailyLogPathMixin as DailyLogPathMixin
|
||||
from .frontmatter import ProfilePathMixin as ProfilePathMixin
|
||||
from .frontmatter import SkillPathMixin as SkillPathMixin
|
||||
from .frontmatter import UserScopedFrontmatter as UserScopedFrontmatter
|
||||
from .frontmatter import dump_frontmatter as dump_frontmatter
|
||||
from .frontmatter import parse_frontmatter as parse_frontmatter
|
||||
from .parsed import ParsedMarkdown as ParsedMarkdown
|
||||
from .reader import MarkdownReader as MarkdownReader
|
||||
from .writer import MarkdownWriter as MarkdownWriter
|
||||
|
||||
__all__ = [
|
||||
"AgentScopedFrontmatter",
|
||||
"BaseFrontmatter",
|
||||
"DailyLogPathMixin",
|
||||
"Entry",
|
||||
"EntryId",
|
||||
"MarkdownReader",
|
||||
"MarkdownWriter",
|
||||
"ParsedMarkdown",
|
||||
"ProfilePathMixin",
|
||||
"SkillPathMixin",
|
||||
"StructuredEntry",
|
||||
"UserScopedFrontmatter",
|
||||
"dump_frontmatter",
|
||||
"find_entry",
|
||||
"parse_frontmatter",
|
||||
"parse_structured_entry",
|
||||
"render_structured_entry",
|
||||
"split_entries",
|
||||
]
|
||||
368
src/everos/core/persistence/markdown/entries.py
Normal file
368
src/everos/core/persistence/markdown/entries.py
Normal file
@ -0,0 +1,368 @@
|
||||
"""Markdown entries — id format, marker spans, and audit-form parsing.
|
||||
|
||||
Three closely-related entry concepts live together here so a reader
|
||||
sees the whole entry surface in one file:
|
||||
|
||||
1. :class:`EntryId` — the ``<prefix>_<YYYYMMDD>_<NNNN>`` structured id
|
||||
stamped into each daily-log entry's open / close markers. Carries
|
||||
the prefix declared by the frontmatter schema, the date bucket, and
|
||||
the in-file zero-padded sequence.
|
||||
|
||||
2. :class:`Entry` — a marker-delimited span inside a markdown body::
|
||||
|
||||
<!-- entry:abc123 -->
|
||||
...content...
|
||||
<!-- /entry:abc123 -->
|
||||
|
||||
:func:`split_entries` and :func:`find_entry` locate these spans
|
||||
without interpreting the inner content. Higher layers (writers,
|
||||
cascade) parse it per record type.
|
||||
|
||||
3. :class:`StructuredEntry` — :class:`Entry` extended with the parsed
|
||||
audit-form body fields (header / inline / sections). Built either
|
||||
from a raw body string via :func:`parse_structured_entry` or from
|
||||
an existing :class:`Entry` via :meth:`Entry.as_structured`.
|
||||
|
||||
Audit-form layout::
|
||||
|
||||
## <header> ← optional H2 (usually entry id, for grep)
|
||||
|
||||
**key**: value ← inline fields, one per line
|
||||
**key2**: value2
|
||||
|
||||
### Section Title ← section fields: H3 + free-form text
|
||||
body content...
|
||||
|
||||
### Another Section
|
||||
more content...
|
||||
|
||||
The audit chassis is intentionally **type-agnostic** — every field
|
||||
round-trips as a string. Inline values are stringified on render
|
||||
(lists become ``[a, b, c]``, scalars use ``str()``); on parse
|
||||
everything is the raw text after the colon. Section titles are kept
|
||||
verbatim. This keeps parsing tolerant of stray fields, wrapped
|
||||
strings, and manually-typed timestamps; the strong-typed model lives
|
||||
in business writers + the SQLite/LanceDB indexes.
|
||||
|
||||
Cross-user uniqueness is handled at the database layer via a composite
|
||||
``<user_id>_<entry_id>`` field; it is *not* encoded into the
|
||||
:class:`EntryId` string itself.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
import re
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Self
|
||||
|
||||
# ── EntryId — structured id for marker stamping ─────────────────────────
|
||||
|
||||
_DATE_FMT = "%Y%m%d"
|
||||
_SEQ_DIGITS = 8
|
||||
"""Minimum zero-padding for the in-file seq.
|
||||
|
||||
8 digits keeps lexicographic order == numeric order up to 10**8
|
||||
entries per file (per user, per day). ``format()`` is "at least 8" —
|
||||
larger seqs emit more digits without truncation. ``parse`` is
|
||||
permissive: shorter (legacy 4-digit) and longer seq strings both
|
||||
parse cleanly; format normalises to >= 8 digits on round-trip.
|
||||
"""
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class EntryId:
|
||||
"""Parsed components of an entry id (``<prefix>_<YYYYMMDD>_<NNNN>``)."""
|
||||
|
||||
prefix: str
|
||||
date: _dt.date
|
||||
seq: int
|
||||
|
||||
def format(self) -> str:
|
||||
"""Render as ``<prefix>_<YYYYMMDD>_<NNNN>``."""
|
||||
return (
|
||||
f"{self.prefix}_{self.date.strftime(_DATE_FMT)}_{self.seq:0{_SEQ_DIGITS}d}"
|
||||
)
|
||||
|
||||
def __str__(self) -> str: # noqa: D401
|
||||
return self.format()
|
||||
|
||||
@classmethod
|
||||
def parse(cls, s: str) -> Self:
|
||||
"""Parse ``<prefix>_<YYYYMMDD>_<NNNN>``.
|
||||
|
||||
Uses ``rsplit("_", 2)`` so a multi-segment prefix (rare, but
|
||||
possible) is preserved as-is.
|
||||
"""
|
||||
parts = s.rsplit("_", 2)
|
||||
if len(parts) != 3:
|
||||
raise ValueError(f"invalid entry id format: {s!r}")
|
||||
prefix, date_str, seq_str = parts
|
||||
if not prefix:
|
||||
raise ValueError(f"empty prefix in entry id: {s!r}")
|
||||
try:
|
||||
d = _dt.datetime.strptime(date_str, _DATE_FMT).date()
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"invalid date in entry id: {s!r}") from exc
|
||||
try:
|
||||
seq = int(seq_str)
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"invalid seq in entry id: {s!r}") from exc
|
||||
if seq < 0:
|
||||
raise ValueError(f"negative seq in entry id: {s!r}")
|
||||
return cls(prefix=prefix, date=d, seq=seq)
|
||||
|
||||
@classmethod
|
||||
def next_for(cls, prefix: str, date: _dt.date, current_count: int) -> Self:
|
||||
"""Build the id for the next entry given the file's current count.
|
||||
|
||||
``current_count`` is the value of ``frontmatter.entry_count``
|
||||
*before* this append. The new id gets ``seq = current_count + 1``.
|
||||
"""
|
||||
if current_count < 0:
|
||||
raise ValueError(f"current_count must be >= 0, got {current_count}")
|
||||
return cls(prefix=prefix, date=date, seq=current_count + 1)
|
||||
|
||||
|
||||
# ── Entry — marker-delimited span inside a body ─────────────────────────
|
||||
|
||||
# Filename / URL-safe id alphabet for the marker.
|
||||
_ID_PATTERN = r"[A-Za-z0-9_-]+"
|
||||
_OPEN_RE = re.compile(rf"<!-- entry:({_ID_PATTERN}) -->")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Entry:
|
||||
"""One marker-delimited entry within a markdown body.
|
||||
|
||||
Attributes:
|
||||
id: Value between ``entry:`` and ``-->`` in the open marker.
|
||||
body: Content between the open and close markers, with one leading
|
||||
and one trailing newline removed (typical formatter output).
|
||||
start: Offset of the opening ``<!-- entry:id -->`` in the source body.
|
||||
end: Offset just past the closing ``<!-- /entry:id -->`` in the source.
|
||||
"""
|
||||
|
||||
id: str
|
||||
body: str
|
||||
start: int
|
||||
end: int
|
||||
|
||||
def as_structured(self) -> StructuredEntry:
|
||||
"""Parse my body as audit-form and return a :class:`StructuredEntry`.
|
||||
|
||||
The id / body / start / end fields are preserved; the parsed
|
||||
``header`` / ``inline`` / ``sections`` are added on top.
|
||||
"""
|
||||
return parse_structured_entry(self.body, _origin=self)
|
||||
|
||||
|
||||
def split_entries(body: str) -> list[Entry]:
|
||||
"""Scan ``body`` and return every entry in order.
|
||||
|
||||
Unmatched / unterminated open markers stop the scan at the first
|
||||
such marker — partial entries are not returned. Callers needing
|
||||
strict validation should layer a dedicated check on top.
|
||||
"""
|
||||
entries: list[Entry] = []
|
||||
pos = 0
|
||||
while True:
|
||||
open_match = _OPEN_RE.search(body, pos)
|
||||
if open_match is None:
|
||||
break
|
||||
entry_id = open_match.group(1)
|
||||
close_match = _close_re_for(entry_id).search(body, open_match.end())
|
||||
if close_match is None:
|
||||
# Unterminated entry — abort further scanning.
|
||||
break
|
||||
entries.append(
|
||||
Entry(
|
||||
id=entry_id,
|
||||
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
|
||||
start=open_match.start(),
|
||||
end=close_match.end(),
|
||||
)
|
||||
)
|
||||
pos = close_match.end()
|
||||
return entries
|
||||
|
||||
|
||||
def find_entry(body: str, entry_id: str) -> Entry | None:
|
||||
"""Find the first entry with ``entry_id``, or ``None``."""
|
||||
open_re = re.compile(rf"<!-- entry:{re.escape(entry_id)} -->")
|
||||
open_match = open_re.search(body)
|
||||
if open_match is None:
|
||||
return None
|
||||
close_match = _close_re_for(entry_id).search(body, open_match.end())
|
||||
if close_match is None:
|
||||
return None
|
||||
return Entry(
|
||||
id=entry_id,
|
||||
body=_strip_one_newline(body[open_match.end() : close_match.start()]),
|
||||
start=open_match.start(),
|
||||
end=close_match.end(),
|
||||
)
|
||||
|
||||
|
||||
def _close_re_for(entry_id: str) -> re.Pattern[str]:
|
||||
"""Build the close-marker regex for a specific id."""
|
||||
return re.compile(rf"<!-- /entry:{re.escape(entry_id)} -->")
|
||||
|
||||
|
||||
def _strip_one_newline(text: str) -> str:
|
||||
"""Strip one leading and one trailing newline (typical formatter padding)."""
|
||||
if text.startswith("\r\n"):
|
||||
text = text[2:]
|
||||
elif text.startswith("\n"):
|
||||
text = text[1:]
|
||||
if text.endswith("\r\n"):
|
||||
text = text[:-2]
|
||||
elif text.endswith("\n"):
|
||||
text = text[:-1]
|
||||
return text
|
||||
|
||||
|
||||
# ── StructuredEntry — Entry + parsed audit-form fields ──────────────────
|
||||
|
||||
# H2 line: ``## <header>``.
|
||||
_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
|
||||
# Inline field: ``**key**: value``. Anchored to line start so a stray
|
||||
# ``**emphasis**`` mid-paragraph isn't mistaken for a field.
|
||||
_INLINE_RE = re.compile(
|
||||
r"^\*\*(?P<key>[^*\n]+?)\*\*:\s*(?P<value>.*?)\s*$",
|
||||
re.MULTILINE,
|
||||
)
|
||||
# H3 line: ``### Title``.
|
||||
_H3_RE = re.compile(r"^###\s+(.+?)\s*$", re.MULTILINE)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StructuredEntry(Entry):
|
||||
""":class:`Entry` whose body has been parsed as audit-form data.
|
||||
|
||||
Inherits ``id`` / ``body`` / ``start`` / ``end`` from :class:`Entry`
|
||||
(zeroed when built from a raw body string with no marker context)
|
||||
and adds three parsed views of the body: the optional H2 header,
|
||||
the inline ``**key**: value`` map, and the ``### Title`` sections.
|
||||
|
||||
Audit-form values are strings only; type coercion is the caller's
|
||||
job (a strong-typed model lives in the writer / index).
|
||||
"""
|
||||
|
||||
header: str | None = None
|
||||
inline: dict[str, str] = field(default_factory=dict)
|
||||
sections: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
def render_structured_entry(
|
||||
*,
|
||||
header: str | None = None,
|
||||
inline: Mapping[str, object] | None = None,
|
||||
sections: Mapping[str, str] | None = None,
|
||||
) -> str:
|
||||
"""Render an audit-form entry body.
|
||||
|
||||
Args:
|
||||
header: Optional H2 line at the top (typically the entry id —
|
||||
redundant with the marker but useful for plain-text grep).
|
||||
inline: ``{key: value}`` rendered as ``**key**: value``. Values
|
||||
are stringified: ``list``/``tuple`` become ``[a, b, c]``;
|
||||
``None`` becomes the empty string; everything else uses
|
||||
``str()``.
|
||||
sections: ``{title: body}`` rendered as ``### Title`` plus the
|
||||
body text. Title is verbatim; body's trailing whitespace is
|
||||
stripped.
|
||||
|
||||
Returns:
|
||||
The rendered string, no trailing newline (the caller — typically
|
||||
:meth:`MarkdownWriter.append_entry` — handles markers + newlines).
|
||||
"""
|
||||
inline = inline or {}
|
||||
sections = sections or {}
|
||||
lines: list[str] = []
|
||||
|
||||
if header:
|
||||
lines.append(f"## {header}")
|
||||
lines.append("")
|
||||
|
||||
for key, value in inline.items():
|
||||
lines.append(f"**{key}**: {_render_value(value)}")
|
||||
|
||||
for title, body in sections.items():
|
||||
lines.append("")
|
||||
lines.append(f"### {title}")
|
||||
lines.append(body.rstrip())
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def parse_structured_entry(
|
||||
body: str, *, _origin: Entry | None = None
|
||||
) -> StructuredEntry:
|
||||
"""Parse an audit-form entry body. Strings only — no type coercion.
|
||||
|
||||
Tolerant of:
|
||||
|
||||
- missing H2 (``header`` will be ``None``)
|
||||
- inline fields appearing before, between or after sections
|
||||
(only matches before the first H3 are taken as the inline block)
|
||||
- extra whitespace and stray lines (silently kept inside the
|
||||
enclosing section's body)
|
||||
|
||||
When called via :meth:`Entry.as_structured`, the ``_origin`` Entry
|
||||
contributes its ``id`` / ``start`` / ``end``; otherwise those fall
|
||||
back to ``""`` / ``0`` / ``len(body)``.
|
||||
|
||||
Returns:
|
||||
:class:`StructuredEntry` with everything as strings.
|
||||
"""
|
||||
text = body.strip("\n")
|
||||
|
||||
# Split on H3 lines.
|
||||
parts = _H3_RE.split(text)
|
||||
head = parts[0]
|
||||
sections_dict: dict[str, str] = {}
|
||||
for i in range(1, len(parts), 2):
|
||||
title = parts[i].strip()
|
||||
content = parts[i + 1] if i + 1 < len(parts) else ""
|
||||
sections_dict[title] = content.strip("\n").rstrip()
|
||||
|
||||
header: str | None = None
|
||||
h2 = _H2_RE.search(head)
|
||||
if h2:
|
||||
header = h2.group(1).strip()
|
||||
|
||||
inline_dict: dict[str, str] = {
|
||||
m.group("key").strip(): m.group("value").strip()
|
||||
for m in _INLINE_RE.finditer(head)
|
||||
}
|
||||
|
||||
if _origin is not None:
|
||||
return StructuredEntry(
|
||||
id=_origin.id,
|
||||
body=_origin.body,
|
||||
start=_origin.start,
|
||||
end=_origin.end,
|
||||
header=header,
|
||||
inline=inline_dict,
|
||||
sections=sections_dict,
|
||||
)
|
||||
return StructuredEntry(
|
||||
id="",
|
||||
body=body,
|
||||
start=0,
|
||||
end=len(body),
|
||||
header=header,
|
||||
inline=inline_dict,
|
||||
sections=sections_dict,
|
||||
)
|
||||
|
||||
|
||||
def _render_value(value: object) -> str:
|
||||
"""Stringify an inline value the audit-friendly way."""
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, list | tuple):
|
||||
return "[" + ", ".join(str(item) for item in value) + "]"
|
||||
return str(value)
|
||||
300
src/everos/core/persistence/markdown/frontmatter.py
Normal file
300
src/everos/core/persistence/markdown/frontmatter.py
Normal file
@ -0,0 +1,300 @@
|
||||
"""Frontmatter — YAML block parse / dump + L1 schema chassis.
|
||||
|
||||
Frontmatter is the leading ``---``-delimited YAML block at the top of
|
||||
a markdown document::
|
||||
|
||||
---
|
||||
title: Hello
|
||||
tags: [a, b]
|
||||
---
|
||||
# Body starts here
|
||||
|
||||
Two complementary surfaces live here:
|
||||
|
||||
1. :func:`parse_frontmatter` / :func:`dump_frontmatter` — schema-free
|
||||
YAML helpers (``yaml.safe_load`` / ``yaml.safe_dump``,
|
||||
``sort_keys=False`` so caller-controlled key order is preserved).
|
||||
|
||||
2. The L1 chassis classes — :class:`BaseFrontmatter`,
|
||||
:class:`UserScopedFrontmatter`, :class:`AgentScopedFrontmatter` —
|
||||
which fix the *absolute-readonly* fields (``id`` / ``type`` /
|
||||
``schema_version``) plus scope (``user_id`` / ``agent_id`` +
|
||||
``track``). Every business frontmatter schema in
|
||||
``infra/persistence/markdown/mds/`` subclasses one of these.
|
||||
|
||||
Concrete business schemas (``UserMemcellDailyFrontmatter``,
|
||||
``SkillFrontmatter``, …) live in ``infra``; they add per-record
|
||||
business fields plus the path-resolution metadata daily-log writers
|
||||
need (``ENTRY_ID_PREFIX`` / ``DIR_NAME`` / ``FILE_PREFIX``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, ClassVar, Literal
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
# ── YAML helpers ────────────────────────────────────────────────────────
|
||||
|
||||
_DELIM = "---"
|
||||
|
||||
|
||||
def parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
|
||||
"""Parse a leading ``---\\n...\\n---\\n`` YAML block.
|
||||
|
||||
Returns:
|
||||
(meta, remainder): ``meta`` is the parsed YAML mapping (empty dict
|
||||
if no frontmatter present, malformed, or non-mapping). ``remainder``
|
||||
is everything after the closing delimiter line — including the body's
|
||||
leading content as-is.
|
||||
|
||||
Notes:
|
||||
- If the document does not start with ``---``, returns ``({}, text)``
|
||||
unchanged.
|
||||
- If a closing ``---`` line is not found, returns ``({}, text)``.
|
||||
- If the YAML block is empty (``---\\n---\\n``), returns
|
||||
``({}, remainder)``.
|
||||
- If the parsed YAML is not a mapping (e.g. a scalar list), returns
|
||||
``({}, text)`` — frontmatter must be a mapping.
|
||||
"""
|
||||
if not text.startswith(_DELIM):
|
||||
return {}, text
|
||||
|
||||
# Skip the opening "---" and the newline that must follow it.
|
||||
rest = text[len(_DELIM) :]
|
||||
if rest.startswith("\r\n"):
|
||||
rest = rest[2:]
|
||||
elif rest.startswith("\n"):
|
||||
rest = rest[1:]
|
||||
else:
|
||||
# Opening "---" not followed by a newline → not a valid frontmatter.
|
||||
return {}, text
|
||||
|
||||
closing_idx = _find_closing_delim(rest)
|
||||
if closing_idx is None:
|
||||
return {}, text
|
||||
|
||||
yaml_block = rest[:closing_idx]
|
||||
remainder = rest[closing_idx + len(_DELIM) :]
|
||||
# Drop the newline that follows the closing delimiter, if any.
|
||||
if remainder.startswith("\r\n"):
|
||||
remainder = remainder[2:]
|
||||
elif remainder.startswith("\n"):
|
||||
remainder = remainder[1:]
|
||||
|
||||
parsed: Any = yaml.safe_load(yaml_block) if yaml_block.strip() else {}
|
||||
if parsed is None:
|
||||
parsed = {}
|
||||
if not isinstance(parsed, dict):
|
||||
return {}, text
|
||||
return parsed, remainder
|
||||
|
||||
|
||||
def dump_frontmatter(meta: Mapping[str, Any]) -> str:
|
||||
"""Render a mapping as a ``---\\n<yaml>\\n---\\n`` block.
|
||||
|
||||
An empty mapping yields the empty string (no delimiters). The YAML
|
||||
payload preserves caller-supplied key order (``sort_keys=False``).
|
||||
"""
|
||||
if not meta:
|
||||
return ""
|
||||
yaml_block = yaml.safe_dump(
|
||||
dict(meta),
|
||||
sort_keys=False,
|
||||
allow_unicode=True,
|
||||
default_flow_style=False,
|
||||
)
|
||||
return f"{_DELIM}\n{yaml_block}{_DELIM}\n"
|
||||
|
||||
|
||||
def _find_closing_delim(text: str) -> int | None:
|
||||
"""Find the offset of a line that is exactly ``---``.
|
||||
|
||||
A "line" is text between two newlines (or string boundaries).
|
||||
Returns the offset of the first character of the matching line, or
|
||||
``None`` if no such line exists.
|
||||
"""
|
||||
pos = 0
|
||||
while pos < len(text):
|
||||
nl = text.find("\n", pos)
|
||||
line = text[pos:nl] if nl != -1 else text[pos:]
|
||||
if line.rstrip("\r") == _DELIM:
|
||||
return pos
|
||||
if nl == -1:
|
||||
return None
|
||||
pos = nl + 1
|
||||
return None
|
||||
|
||||
|
||||
# ── L1 schema chassis ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
class BaseFrontmatter(BaseModel):
|
||||
"""L1 fields every markdown frontmatter must carry.
|
||||
|
||||
These match the *absolute-readonly* tier in the EverOS Markdown First
|
||||
spec — they identify the record across markdown ↔ LanceDB and must
|
||||
never be rewritten by a human edit.
|
||||
|
||||
Subclasses add scope (``UserScopedFrontmatter`` /
|
||||
``AgentScopedFrontmatter``) plus per-record business fields.
|
||||
"""
|
||||
|
||||
SCOPE_DIR: ClassVar[str] = ""
|
||||
"""Top-level directory under the memory-root that holds this kind.
|
||||
|
||||
Scope mixins set this to ``"users"`` / ``"agents"``. Scope-agnostic
|
||||
schemas (rare) leave it empty; consumers that need to resolve a path
|
||||
(writers, layout reverse-lookup) must reject schemas with empty
|
||||
``SCOPE_DIR``.
|
||||
"""
|
||||
|
||||
id: str
|
||||
type: str
|
||||
schema_version: int = 1
|
||||
|
||||
# Permit additional fields so L2 system-managed metadata
|
||||
# (``md_sha256``, ``last_indexed_at``, ``lsn``, …) can ride along on
|
||||
# the same model without forcing every subclass to redeclare them.
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
@classmethod
|
||||
def path_glob(cls) -> str:
|
||||
"""Return an ``fnmatch``-style glob (relative to memory-root)
|
||||
covering every markdown file this schema describes.
|
||||
|
||||
Used by the cascade kind registry — the scanner walks every kind's
|
||||
``path_glob()`` to enumerate eligible files without hard-coding
|
||||
path patterns in cascade. The schema is the single source of truth
|
||||
for both the writer's path resolution and the scanner's enumeration.
|
||||
|
||||
Subclasses must override — typically by mixing in
|
||||
:class:`DailyLogPathMixin` or :class:`SkillPathMixin` *before* the
|
||||
scope mixin in the MRO so this abstract version is shadowed.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{cls.__name__} must declare path_glob() "
|
||||
f"(mix in DailyLogPathMixin / SkillPathMixin, or override directly)"
|
||||
)
|
||||
|
||||
|
||||
class DailyLogPathMixin:
|
||||
"""Path strategy for daily-log files.
|
||||
|
||||
Files live at ``<SCOPE_DIR>/<scope_id>/<DIR_NAME>/<FILE_PREFIX>-<YYYY-MM-DD>.md``.
|
||||
Subclasses must inherit a scope mixin (``UserScopedFrontmatter`` /
|
||||
``AgentScopedFrontmatter``) supplying ``SCOPE_DIR``, and must declare
|
||||
their own ``DIR_NAME`` / ``FILE_PREFIX`` ClassVars.
|
||||
|
||||
Place **this mixin first** so Python's MRO resolves ``path_glob()`` to
|
||||
the mixin's concrete implementation rather than
|
||||
:meth:`BaseFrontmatter.path_glob`'s ``NotImplementedError`` stub::
|
||||
|
||||
class EpisodeDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
|
||||
DIR_NAME: ClassVar[str] = "episodes"
|
||||
FILE_PREFIX: ClassVar[str] = "episode"
|
||||
...
|
||||
"""
|
||||
|
||||
DIR_NAME: ClassVar[str]
|
||||
FILE_PREFIX: ClassVar[str]
|
||||
SCOPE_DIR: ClassVar[str]
|
||||
|
||||
@classmethod
|
||||
def path_glob(cls) -> str:
|
||||
# Leading ``*/*/`` matches the <app>/<project> scope prefix that
|
||||
# precedes every user-visible dir; the scanner's ``root.glob`` is
|
||||
# anchored at root, so the prefix is mandatory (without it nothing
|
||||
# matches), and the watcher's right-anchored ``PurePosixPath.match``
|
||||
# agrees on the same shape.
|
||||
return f"*/*/{cls.SCOPE_DIR}/*/{cls.DIR_NAME}/{cls.FILE_PREFIX}-*.md"
|
||||
|
||||
|
||||
class SkillPathMixin:
|
||||
"""Path strategy for skill-directory files.
|
||||
|
||||
Each skill lives at ``<SCOPE_DIR>/<scope_id>/<SKILLS_CONTAINER_NAME>/
|
||||
<SKILL_DIR_PREFIX><skill_name>/<SKILL_MAIN_FILENAME>``. The glob covers
|
||||
every skill's main file; sibling ``references/*.md`` and ``scripts/*``
|
||||
are excluded (they ride alongside the main file and the cascade
|
||||
daemon rebuilds the index column by concatenation, see
|
||||
:class:`AgentSkillFrontmatter`'s docstring).
|
||||
|
||||
Place **this mixin first** so MRO resolves ``path_glob()`` here::
|
||||
|
||||
class AgentSkillFrontmatter(SkillPathMixin, AgentScopedFrontmatter):
|
||||
SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
|
||||
SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
|
||||
SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
|
||||
...
|
||||
"""
|
||||
|
||||
SKILLS_CONTAINER_NAME: ClassVar[str]
|
||||
SKILL_DIR_PREFIX: ClassVar[str]
|
||||
SKILL_MAIN_FILENAME: ClassVar[str]
|
||||
SCOPE_DIR: ClassVar[str]
|
||||
|
||||
@classmethod
|
||||
def path_glob(cls) -> str:
|
||||
# Leading ``*/*/`` matches the <app>/<project> scope prefix.
|
||||
return (
|
||||
f"*/*/{cls.SCOPE_DIR}/*/{cls.SKILLS_CONTAINER_NAME}/"
|
||||
f"{cls.SKILL_DIR_PREFIX}*/{cls.SKILL_MAIN_FILENAME}"
|
||||
)
|
||||
|
||||
|
||||
class ProfilePathMixin:
|
||||
"""Path strategy for single-file profile markdown.
|
||||
|
||||
Profiles live at ``<SCOPE_DIR>/<scope_id>/<PROFILE_FILENAME>`` —
|
||||
one fixed-name file directly under the scope's owner directory, no
|
||||
intermediate ``<dir>/`` segment (unlike daily-logs) and no per-name
|
||||
subdir (unlike skills). Subclasses must inherit a scope mixin
|
||||
(``UserScopedFrontmatter`` / ``AgentScopedFrontmatter``) supplying
|
||||
``SCOPE_DIR`` and declare their own ``PROFILE_FILENAME``.
|
||||
|
||||
Place **this mixin first** so MRO resolves ``path_glob()`` here::
|
||||
|
||||
class UserProfileFrontmatter(ProfilePathMixin, UserScopedFrontmatter):
|
||||
PROFILE_FILENAME: ClassVar[str] = "user.md"
|
||||
...
|
||||
"""
|
||||
|
||||
PROFILE_FILENAME: ClassVar[str]
|
||||
SCOPE_DIR: ClassVar[str]
|
||||
|
||||
@classmethod
|
||||
def path_glob(cls) -> str:
|
||||
# Leading ``*/*/`` matches the <app>/<project> scope prefix.
|
||||
return f"*/*/{cls.SCOPE_DIR}/*/{cls.PROFILE_FILENAME}"
|
||||
|
||||
|
||||
class UserScopedFrontmatter(BaseFrontmatter):
|
||||
"""Records that belong to a single user (track = ``user``).
|
||||
|
||||
The frontmatter only carries the *file-level* scope (``user_id``,
|
||||
which the path itself already expresses); business attributes like
|
||||
``group_id`` live inside each entry's structured body — see
|
||||
:class:`StructuredEntry` in :mod:`.entries`.
|
||||
"""
|
||||
|
||||
SCOPE_DIR: ClassVar[str] = "users"
|
||||
|
||||
user_id: str
|
||||
track: Literal["user"] = "user"
|
||||
|
||||
|
||||
class AgentScopedFrontmatter(BaseFrontmatter):
|
||||
"""Records that belong to a single agent (track = ``agent``).
|
||||
|
||||
Same scope-vs-business split as :class:`UserScopedFrontmatter`:
|
||||
``agent_id`` is the file-level scope; ``group_id`` etc. ride on
|
||||
each entry, not on the file frontmatter.
|
||||
"""
|
||||
|
||||
SCOPE_DIR: ClassVar[str] = "agents"
|
||||
|
||||
agent_id: str
|
||||
track: Literal["agent"] = "agent"
|
||||
31
src/everos/core/persistence/markdown/parsed.py
Normal file
31
src/everos/core/persistence/markdown/parsed.py
Normal file
@ -0,0 +1,31 @@
|
||||
"""Parsed-markdown data type.
|
||||
|
||||
The output shape of :class:`MarkdownReader` is held here, separate
|
||||
from the reader implementation: callers that only consume parse
|
||||
results don't need to import the reader machinery, and downstream
|
||||
modules (writer, business readers) can produce :class:`ParsedMarkdown`
|
||||
without going through ``MarkdownReader.read`` if they already hold
|
||||
the pieces.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from .entries import Entry
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ParsedMarkdown:
|
||||
"""A markdown document after parsing.
|
||||
|
||||
Attributes:
|
||||
frontmatter: Parsed YAML mapping (empty dict if no frontmatter block).
|
||||
body: Document text after the frontmatter block; not entry-stripped.
|
||||
entries: Marker-delimited entries discovered inside ``body``.
|
||||
"""
|
||||
|
||||
frontmatter: dict[str, Any]
|
||||
body: str
|
||||
entries: list[Entry] = field(default_factory=list)
|
||||
42
src/everos/core/persistence/markdown/reader.py
Normal file
42
src/everos/core/persistence/markdown/reader.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""Markdown file reader.
|
||||
|
||||
Loads a markdown document and splits it into:
|
||||
|
||||
1. ``frontmatter`` — parsed YAML (empty dict if absent)
|
||||
2. ``body`` — raw text after the closing ``---`` delimiter
|
||||
3. ``entries`` — marker-delimited spans inside ``body``
|
||||
|
||||
The reader is purely parsing; it does not validate frontmatter shape,
|
||||
entry content, or cross-references. Higher layers add business-aware
|
||||
checks. The :class:`ParsedMarkdown` data type lives in :mod:`.parsed`.
|
||||
|
||||
``parse`` is sync (pure in-memory string processing). ``read`` is async
|
||||
and uses :class:`anyio.Path` so file I/O does not block the event loop.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import anyio
|
||||
|
||||
from .entries import split_entries
|
||||
from .frontmatter import parse_frontmatter
|
||||
from .parsed import ParsedMarkdown
|
||||
|
||||
|
||||
class MarkdownReader:
|
||||
"""Parse markdown files / strings into :class:`ParsedMarkdown`."""
|
||||
|
||||
@staticmethod
|
||||
def parse(text: str) -> ParsedMarkdown:
|
||||
"""Parse already-loaded text (no IO)."""
|
||||
meta, body = parse_frontmatter(text)
|
||||
entries = split_entries(body)
|
||||
return ParsedMarkdown(frontmatter=meta, body=body, entries=entries)
|
||||
|
||||
@staticmethod
|
||||
async def read(path: Path) -> ParsedMarkdown:
|
||||
"""Read the file at ``path`` and parse its content."""
|
||||
text = await anyio.Path(path).read_text(encoding="utf-8")
|
||||
return MarkdownReader.parse(text)
|
||||
269
src/everos/core/persistence/markdown/writer.py
Normal file
269
src/everos/core/persistence/markdown/writer.py
Normal file
@ -0,0 +1,269 @@
|
||||
"""Markdown file writer with atomic write semantics.
|
||||
|
||||
Atomicity is provided by writing to a same-directory temp file
|
||||
(``.<name>.tmp.<uuid>``) and using :func:`os.replace` to rename it onto
|
||||
the target. Keeping the temp file in the same directory guarantees the
|
||||
rename is on the same filesystem (POSIX rename is atomic only within a
|
||||
single fs).
|
||||
|
||||
All public methods are async. File I/O (``read_text`` / ``write_text``
|
||||
/ ``mkdir``) goes through :class:`anyio.Path`; the few syscalls without
|
||||
a native async equivalent (``os.fsync`` / ``os.replace`` / ``unlink``
|
||||
in the cleanup path) are offloaded via :func:`anyio.to_thread.run_sync`.
|
||||
|
||||
In-process per-path locking
|
||||
---------------------------
|
||||
:meth:`append_entry` / :meth:`append_entries` are read-modify-write of
|
||||
the whole file (load frontmatter+body, merge an entry block, atomic
|
||||
write the result). The atomic write itself is safe, but the read→write
|
||||
window crosses ``await`` points. Concurrent asyncio tasks targeting the
|
||||
same path would otherwise lose-update each other (both read N entries,
|
||||
both produce N+1, second write overwrites the first → 1 entry lost).
|
||||
|
||||
To prevent this, an in-process per-path :class:`asyncio.Lock` is held
|
||||
across the entire read-modify-write sequence. Lock objects live on the
|
||||
writer instance (not class-level) so they bind to the event loop active
|
||||
when the writer was constructed — this avoids the
|
||||
"Lock bound to different loop" failure mode that surfaces when
|
||||
pytest-asyncio rebuilds the loop between tests but module-level writer
|
||||
singletons leak Lock objects across boundaries.
|
||||
|
||||
Process-level coordination (multi-process writers against the same
|
||||
memory-root) remains the job of
|
||||
:func:`everos.core.persistence.locking.memory_root_lock`, which uses
|
||||
``fcntl.flock``. The two locks compose: per-path async lock serialises
|
||||
tasks within one process, ``memory_root_lock`` serialises processes
|
||||
against each other.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import os
|
||||
import uuid
|
||||
from collections.abc import Mapping, Sequence
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import anyio
|
||||
|
||||
from ..memory_root import MemoryRoot
|
||||
from .entries import EntryId
|
||||
from .frontmatter import dump_frontmatter
|
||||
from .reader import MarkdownReader
|
||||
|
||||
|
||||
class MarkdownWriter:
|
||||
"""Atomic writer for markdown files inside a memory-root.
|
||||
|
||||
The ``memory_root`` reference is held to enable future enforcement that
|
||||
targets stay within the configured root; current writes do not depend on
|
||||
it for the rename itself (same-dir temp file).
|
||||
"""
|
||||
|
||||
def __init__(self, memory_root: MemoryRoot) -> None:
|
||||
self._memory_root = memory_root
|
||||
# Per-path async lock registry. ``setdefault`` is GIL-atomic, so
|
||||
# concurrent callers race only on the dict insert (resolved by
|
||||
# ``setdefault`` returning the existing value), not on the Lock.
|
||||
# Plain dict (not WeakValueDictionary): a Lock with pending waiters
|
||||
# must outlive any task awaiting it; ref-counted GC would race with
|
||||
# those waiters. See Python bpo-28427 for the WeakValueDictionary
|
||||
# multithreading hazard that bites the weak-ref approach.
|
||||
self._path_locks: dict[Path, asyncio.Lock] = {}
|
||||
|
||||
@property
|
||||
def memory_root(self) -> MemoryRoot:
|
||||
return self._memory_root
|
||||
|
||||
def lock_for(self, path: Path) -> asyncio.Lock:
|
||||
"""Return the per-path lock; create on first use.
|
||||
|
||||
Public so that higher-level writers (e.g. :class:`BaseDailyWriter`)
|
||||
can serialise their own multi-step ``read → compute → write``
|
||||
sequences against this writer's single-step ``append`` paths.
|
||||
Pair with :meth:`_append_entries_unlocked` to avoid reentrant
|
||||
re-acquisition of the same lock from within an already-locked
|
||||
critical section (``asyncio.Lock`` is *not* reentrant).
|
||||
"""
|
||||
# Resolve to an absolute canonical path so aliases (relative vs.
|
||||
# absolute, symlinks) share the same lock object.
|
||||
key = Path(path).resolve()
|
||||
lock = self._path_locks.get(key)
|
||||
if lock is None:
|
||||
lock = asyncio.Lock()
|
||||
self._path_locks[key] = lock
|
||||
return lock
|
||||
|
||||
async def write(self, path: Path, content: str) -> Path:
|
||||
"""Atomically write ``content`` to ``path``.
|
||||
|
||||
Steps:
|
||||
1. ``mkdir -p`` the parent directory.
|
||||
2. Write to ``<parent>/.<name>.tmp.<uuid>``.
|
||||
3. ``flush`` + ``fsync`` the temp file.
|
||||
4. ``os.replace`` the temp file onto ``path`` (atomic on POSIX).
|
||||
|
||||
Returns:
|
||||
``path`` (resolved as written).
|
||||
"""
|
||||
target = Path(path)
|
||||
await anyio.Path(target.parent).mkdir(parents=True, exist_ok=True)
|
||||
tmp = target.parent / f".{target.name}.tmp.{uuid.uuid4().hex}"
|
||||
try:
|
||||
await anyio.to_thread.run_sync(_write_and_fsync, tmp, content)
|
||||
await anyio.to_thread.run_sync(os.replace, tmp, target)
|
||||
except Exception:
|
||||
# Best-effort cleanup of the staging file on failure.
|
||||
await _unlink_quiet(tmp)
|
||||
raise
|
||||
return target
|
||||
|
||||
async def write_markdown(
|
||||
self,
|
||||
path: Path,
|
||||
*,
|
||||
frontmatter: Mapping[str, Any] | None = None,
|
||||
body: str = "",
|
||||
) -> Path:
|
||||
"""Assemble ``frontmatter`` + ``body`` then atomic-write to ``path``."""
|
||||
head = dump_frontmatter(frontmatter or {})
|
||||
return await self.write(path, head + body)
|
||||
|
||||
async def append_entry(
|
||||
self,
|
||||
path: Path,
|
||||
*,
|
||||
entry_body: str,
|
||||
entry_id: EntryId,
|
||||
frontmatter_updates: Mapping[str, Any] | None = None,
|
||||
) -> Path:
|
||||
"""Append a single entry block to a markdown file, merging frontmatter.
|
||||
|
||||
Convenience wrapper around :meth:`append_entries` for single-entry
|
||||
callers. See that method for full semantics.
|
||||
|
||||
Args:
|
||||
path: Target markdown file. Created if missing.
|
||||
entry_body: Content between the open and close markers.
|
||||
One leading and trailing newline are added automatically.
|
||||
entry_id: The id to stamp on this entry. The caller normally
|
||||
builds it with :meth:`EntryId.next_for`.
|
||||
frontmatter_updates: Mapping shallow-merged into existing
|
||||
frontmatter (later wins). ``None`` skips the merge.
|
||||
|
||||
Returns:
|
||||
``path`` (resolved as written).
|
||||
"""
|
||||
return await self.append_entries(
|
||||
path,
|
||||
[(entry_body, entry_id)],
|
||||
frontmatter_updates=frontmatter_updates,
|
||||
)
|
||||
|
||||
async def append_entries(
|
||||
self,
|
||||
path: Path,
|
||||
entries: Sequence[tuple[str, EntryId]],
|
||||
*,
|
||||
frontmatter_updates: Mapping[str, Any] | None = None,
|
||||
) -> Path:
|
||||
"""Append ``N`` entry blocks in a single locked read-modify-write cycle.
|
||||
|
||||
Compared with calling :meth:`append_entry` ``N`` times, this:
|
||||
|
||||
* Performs one file read + one file write instead of ``N`` of each
|
||||
(IO complexity drops from ``O(N²)`` to ``O(N)`` when the file
|
||||
already holds many entries).
|
||||
* Holds the per-path lock for one short critical section instead of
|
||||
``N`` separate acquisitions.
|
||||
* Updates ``frontmatter`` once at the end (no intermediate
|
||||
``entry_count`` flapping).
|
||||
|
||||
The caller assigns and supplies all :class:`EntryId` values — see
|
||||
:meth:`append_entry` for the rationale. The order in ``entries`` is
|
||||
the order the blocks land in the file.
|
||||
|
||||
Args:
|
||||
path: Target markdown file. Created if missing.
|
||||
entries: ``(entry_body, entry_id)`` pairs to append, in order.
|
||||
Empty sequence is allowed; the file is still touched for
|
||||
frontmatter updates if any are supplied.
|
||||
frontmatter_updates: Mapping shallow-merged into existing
|
||||
frontmatter once after all entries are appended.
|
||||
|
||||
Returns:
|
||||
``path`` (resolved as written).
|
||||
"""
|
||||
target = Path(path)
|
||||
async with self.lock_for(target):
|
||||
return await self._append_entries_unlocked(
|
||||
target,
|
||||
entries,
|
||||
frontmatter_updates=frontmatter_updates,
|
||||
)
|
||||
|
||||
async def _append_entries_unlocked(
|
||||
self,
|
||||
path: Path,
|
||||
entries: Sequence[tuple[str, EntryId]],
|
||||
*,
|
||||
frontmatter_updates: Mapping[str, Any] | None = None,
|
||||
) -> Path:
|
||||
"""Same as :meth:`append_entries` but assumes the caller already
|
||||
holds :meth:`lock_for` ``(path)``.
|
||||
|
||||
For use by higher-level writers that perform a multi-step
|
||||
``read → compute eid → write`` sequence and need to keep the lock
|
||||
held across the read and the write. Public ``append_entries`` /
|
||||
``append_entry`` always wrap this with the lock.
|
||||
|
||||
Reentrant re-acquisition is unsafe — ``asyncio.Lock`` is not
|
||||
reentrant, so calling this without holding the lock yourself
|
||||
breaks the safety contract.
|
||||
"""
|
||||
target = Path(path)
|
||||
|
||||
# 1. Load existing markdown (or initialise empty).
|
||||
if await anyio.Path(target).is_file():
|
||||
parsed = await MarkdownReader.read(target)
|
||||
meta: dict[str, Any] = dict(parsed.frontmatter)
|
||||
body = parsed.body
|
||||
else:
|
||||
meta = {}
|
||||
body = ""
|
||||
|
||||
# 2. Shallow-merge frontmatter updates.
|
||||
if frontmatter_updates:
|
||||
meta.update(frontmatter_updates)
|
||||
|
||||
# 3. Append all entry blocks in order.
|
||||
if entries:
|
||||
if body and not body.endswith("\n"):
|
||||
body += "\n"
|
||||
appended_blocks: list[str] = []
|
||||
for entry_body, entry_id in entries:
|
||||
eid_str = entry_id.format()
|
||||
appended_blocks.append(
|
||||
f"<!-- entry:{eid_str} -->\n{entry_body}\n"
|
||||
f"<!-- /entry:{eid_str} -->\n"
|
||||
)
|
||||
body = body + "".join(appended_blocks)
|
||||
|
||||
# 4. Atomic write.
|
||||
return await self.write_markdown(target, frontmatter=meta, body=body)
|
||||
|
||||
|
||||
def _write_and_fsync(tmp: Path, content: str) -> None:
|
||||
"""Sync helper: write + fsync the staging file. Offloaded to a thread."""
|
||||
with open(tmp, "w", encoding="utf-8") as fh:
|
||||
fh.write(content)
|
||||
fh.flush()
|
||||
os.fsync(fh.fileno())
|
||||
|
||||
|
||||
async def _unlink_quiet(tmp: Path) -> None:
|
||||
"""Best-effort unlink — swallow OSError so the original exception wins."""
|
||||
with contextlib.suppress(OSError):
|
||||
await anyio.Path(tmp).unlink(missing_ok=True)
|
||||
243
src/everos/core/persistence/memory_root.py
Normal file
243
src/everos/core/persistence/memory_root.py
Normal file
@ -0,0 +1,243 @@
|
||||
"""memory-root path manager.
|
||||
|
||||
Single root directory holding all persisted memory:
|
||||
|
||||
User-visible (no dot prefix, edited by humans / agents):
|
||||
agents/ per-agent records
|
||||
users/ per-user records
|
||||
knowledge/ global shared knowledge
|
||||
|
||||
System-managed (dotfile prefix, hidden by default in ls / Finder):
|
||||
.index/ derived indexes (rebuildable from markdown)
|
||||
sqlite/ system.db (+ WAL/SHM), ome.db, ome.aps.db
|
||||
lancedb/ LanceDB tables
|
||||
.tmp/ atomic-write staging directory
|
||||
.lock single-process lock anchor (created on demand by
|
||||
``memory_root_lock``)
|
||||
|
||||
User-editable (at the root):
|
||||
ome.toml OME strategy overrides (hot-reloaded)
|
||||
|
||||
The cascade queue, LSN watermark, and change audit all live in
|
||||
``system.db`` (table ``md_change_state``), not in separate dotfiles.
|
||||
|
||||
The default location and tunables come from :class:`everos.config.Settings`
|
||||
(loaded from ``config/default.toml`` + ``EVEROS_*`` environment variables);
|
||||
:meth:`MemoryRoot.default` resolves the configured path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
# ── app / project directory-name convention ──────────────────────────────────
|
||||
#
|
||||
# A memory root is partitioned by ``<app>/<project>`` *before* the user-visible
|
||||
# scope dirs (``agents`` / ``users`` / ``knowledge``), so memory for different
|
||||
# (app, project) pairs never shares a directory. The reserved id ``"default"``
|
||||
# materialises as ``default_app`` / ``default_project`` on disk (rather than a
|
||||
# bare ``default``) so a default space is visually distinct from a user-named
|
||||
# directory; every other id maps to itself.
|
||||
#
|
||||
# The mapping is symmetric: the cascade path parser reverses it (see
|
||||
# :func:`app_id_from_dir`) to recover the ids from an on-disk path. The write
|
||||
# side (here) and the read side (cascade) MUST stay in lockstep, or rebuilt
|
||||
# rows carry app/project that disagree with what was written. ``default_app`` /
|
||||
# ``default_project`` are therefore reserved directory names.
|
||||
_DEFAULT_SCOPE_ID = "default"
|
||||
_DEFAULT_APP_DIR = "default_app"
|
||||
_DEFAULT_PROJECT_DIR = "default_project"
|
||||
|
||||
# Path to the shipped OME override template; copied to ``<root>/ome.toml`` on
|
||||
# first ``ensure()`` so users have a real file to edit instead of having to
|
||||
# create one from scratch. ``parents[2]`` is the ``src/everos/`` package root
|
||||
# (memory_root.py sits at ``core/persistence/memory_root.py``).
|
||||
_OME_TEMPLATE_PATH = Path(__file__).parents[2] / "config" / "default_ome.toml"
|
||||
|
||||
|
||||
def app_dir_name(app_id: str) -> str:
|
||||
"""Map an ``app_id`` to its on-disk directory name."""
|
||||
return _DEFAULT_APP_DIR if app_id == _DEFAULT_SCOPE_ID else app_id
|
||||
|
||||
|
||||
def project_dir_name(project_id: str) -> str:
|
||||
"""Map a ``project_id`` to its on-disk directory name."""
|
||||
return _DEFAULT_PROJECT_DIR if project_id == _DEFAULT_SCOPE_ID else project_id
|
||||
|
||||
|
||||
def app_id_from_dir(dir_name: str) -> str:
|
||||
"""Inverse of :func:`app_dir_name` — recover the ``app_id`` from a dir name."""
|
||||
return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_APP_DIR else dir_name
|
||||
|
||||
|
||||
def project_id_from_dir(dir_name: str) -> str:
|
||||
"""Inverse of :func:`project_dir_name` — recover the ``project_id``."""
|
||||
return _DEFAULT_SCOPE_ID if dir_name == _DEFAULT_PROJECT_DIR else dir_name
|
||||
|
||||
|
||||
@dataclass(frozen=True, init=False)
|
||||
class MemoryRoot:
|
||||
"""Path manager for a memory-root directory.
|
||||
|
||||
Constructor accepts any path-like (``str`` or ``Path``); it is normalised
|
||||
to an absolute, resolved ``Path`` so equality and hashing are stable
|
||||
regardless of how the caller spells the path. ``init=False`` is paired
|
||||
with a hand-written ``__init__`` so the input type (``Path | str``) is
|
||||
decoupled from the stored field type (``Path``) — stdlib dataclass has
|
||||
no converter slot, and Pyright would otherwise reject ``MemoryRoot(s)``
|
||||
where ``s`` is a ``str``.
|
||||
"""
|
||||
|
||||
root: Path
|
||||
|
||||
def __init__(self, root: Path | str) -> None:
|
||||
# ``frozen=True`` forbids attribute assignment, so go through
|
||||
# ``object.__setattr__`` to install the normalised Path field.
|
||||
resolved = Path(root).expanduser().resolve()
|
||||
object.__setattr__(self, "root", resolved)
|
||||
|
||||
@classmethod
|
||||
def default(cls) -> MemoryRoot:
|
||||
"""Return the memory-root from :class:`everos.config.Settings`.
|
||||
|
||||
The effective default lives in ``config/default.toml`` (``[memory]
|
||||
root``); environment variable ``EVEROS_MEMORY__ROOT`` overrides it.
|
||||
"""
|
||||
# Lazy import to keep this module dependency-free at import time.
|
||||
from everos.config import load_settings
|
||||
|
||||
return cls(load_settings().memory.root)
|
||||
|
||||
# ── User-visible (partitioned by app / project) ──────────────────────────
|
||||
#
|
||||
# These take ``(app_id, project_id)`` because the scope dirs hang off the
|
||||
# ``<root>/<app>/<project>/`` prefix; they are request-level inputs, never
|
||||
# instance state. Both default to ``"default"`` so call sites that don't
|
||||
# yet carry scope still resolve to the default space.
|
||||
|
||||
def agents_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
|
||||
"""``<root>/<app>/<project>/agents/`` — per-agent records."""
|
||||
return (
|
||||
self.root / app_dir_name(app_id) / project_dir_name(project_id) / "agents"
|
||||
)
|
||||
|
||||
def users_dir(self, app_id: str = "default", project_id: str = "default") -> Path:
|
||||
"""``<root>/<app>/<project>/users/`` — per-user records."""
|
||||
return self.root / app_dir_name(app_id) / project_dir_name(project_id) / "users"
|
||||
|
||||
def knowledge_dir(
|
||||
self, app_id: str = "default", project_id: str = "default"
|
||||
) -> Path:
|
||||
"""``<root>/<app>/<project>/knowledge/`` — shared knowledge."""
|
||||
return (
|
||||
self.root
|
||||
/ app_dir_name(app_id)
|
||||
/ project_dir_name(project_id)
|
||||
/ "knowledge"
|
||||
)
|
||||
|
||||
# ── System-managed (dotfiles) ───────────────────────────────────────────
|
||||
|
||||
@property
|
||||
def index_dir(self) -> Path:
|
||||
"""``<root>/.index/`` — derived index root."""
|
||||
return self.root / ".index"
|
||||
|
||||
@property
|
||||
def lancedb_dir(self) -> Path:
|
||||
"""``<root>/.index/lancedb/`` — LanceDB table root."""
|
||||
return self.index_dir / "lancedb"
|
||||
|
||||
@property
|
||||
def sqlite_dir(self) -> Path:
|
||||
"""``<root>/.index/sqlite/`` — SQLite system DB root.
|
||||
|
||||
Holds ``system.db`` plus its sidecars (``-wal`` / ``-shm`` in WAL
|
||||
mode). Symmetric with :attr:`lancedb_dir`.
|
||||
"""
|
||||
return self.index_dir / "sqlite"
|
||||
|
||||
@property
|
||||
def system_db(self) -> Path:
|
||||
"""``<root>/.index/sqlite/system.db`` — SQLite DB for system
|
||||
state, audit log, task queue, LSN watermark, and other metadata.
|
||||
"""
|
||||
return self.sqlite_dir / "system.db"
|
||||
|
||||
@property
|
||||
def ome_db(self) -> Path:
|
||||
"""``<root>/.index/sqlite/ome.db`` — SQLite DB backing the Offline
|
||||
Memory Engine's own state: run records, counter store, idle store.
|
||||
Symmetric with :attr:`system_db`.
|
||||
"""
|
||||
return self.sqlite_dir / "ome.db"
|
||||
|
||||
@property
|
||||
def ome_aps_db(self) -> Path:
|
||||
"""``<root>/.index/sqlite/ome.aps.db`` — SQLite DB holding the
|
||||
APScheduler jobstore for the Offline Memory Engine. Split from
|
||||
:attr:`ome_db` so APS's sync SQLAlchemy writer and OME's async
|
||||
aiosqlite writer never contend for the same sqlite file lock.
|
||||
"""
|
||||
return self.sqlite_dir / "ome.aps.db"
|
||||
|
||||
@property
|
||||
def ome_config(self) -> Path:
|
||||
"""``<root>/ome.toml`` — user-editable OME strategy overrides.
|
||||
|
||||
Drop a file here to toggle strategies on/off or tweak per-strategy
|
||||
knobs (max_retries, gate, cron …) without restarting the server.
|
||||
The engine watches this file and hot-reloads changes within ~2 s.
|
||||
|
||||
Example to disable foresight and user-profile extraction::
|
||||
|
||||
[strategies.extract_foresight]
|
||||
enabled = false
|
||||
|
||||
[strategies.extract_user_profile]
|
||||
enabled = false
|
||||
"""
|
||||
return self.root / "ome.toml"
|
||||
|
||||
@property
|
||||
def lock_file(self) -> Path:
|
||||
"""``<root>/.lock`` — single-process exclusive lock anchor."""
|
||||
return self.root / ".lock"
|
||||
|
||||
@property
|
||||
def tmp_dir(self) -> Path:
|
||||
"""``<root>/.tmp/`` — staging directory for batch / multi-step writes.
|
||||
|
||||
Note:
|
||||
``MarkdownWriter`` does *not* use this for atomic single-file
|
||||
writes; it uses a same-directory temp file to guarantee a
|
||||
same-filesystem rename. This directory is reserved for callers
|
||||
that need scratch space outside any single target directory.
|
||||
"""
|
||||
return self.root / ".tmp"
|
||||
|
||||
# ── Operations ──────────────────────────────────────────────────────────
|
||||
|
||||
def ensure(self) -> None:
|
||||
"""Create the memory-root and the runtime-required dotfile dirs.
|
||||
|
||||
User-visible directories (``agents/`` / ``users/`` / ``knowledge/``)
|
||||
are *not* pre-created — they appear on first write of their records.
|
||||
Only directories the runtime infrastructure requires are made:
|
||||
|
||||
<root>/
|
||||
<root>/.index/
|
||||
<root>/.index/sqlite/
|
||||
<root>/.index/lancedb/
|
||||
<root>/.tmp/
|
||||
"""
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
self.index_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.sqlite_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.lancedb_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.tmp_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Materialize the OME override template on first run; existence-only
|
||||
# check preserves any edits the user has already made.
|
||||
if not self.ome_config.exists():
|
||||
self.ome_config.write_bytes(_OME_TEMPLATE_PATH.read_bytes())
|
||||
42
src/everos/core/persistence/sqlite/__init__.py
Normal file
42
src/everos/core/persistence/sqlite/__init__.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""SQLite async persistence (SQLModel + SQLAlchemy 2.0 + aiosqlite).
|
||||
|
||||
External usage (engine + sessions):
|
||||
from everos.core.persistence.sqlite import (
|
||||
create_system_engine, create_session_factory, session_scope,
|
||||
)
|
||||
|
||||
External usage (ORM model basics — re-exported from sqlmodel):
|
||||
from everos.core.persistence.sqlite import (
|
||||
SQLModel, Field, Relationship, BaseTable,
|
||||
)
|
||||
|
||||
External usage (generic CRUD repository base):
|
||||
from everos.core.persistence.sqlite import RepoBase
|
||||
|
||||
The ``system_db`` is the everos
|
||||
``<memory_root>/.index/sqlite/system.db`` SQLite file holding system
|
||||
state, audit log, task queue, LSN watermark, and other metadata.
|
||||
"""
|
||||
|
||||
# Re-export key sqlmodel symbols so business code has a single canonical
|
||||
# entry point (``everos.core.persistence.sqlite``) for ORM authoring.
|
||||
from sqlmodel import Field as Field
|
||||
from sqlmodel import Relationship as Relationship
|
||||
from sqlmodel import SQLModel as SQLModel
|
||||
|
||||
from .base import BaseTable as BaseTable
|
||||
from .engine import create_system_engine as create_system_engine
|
||||
from .repository import RepoBase as RepoBase
|
||||
from .session import create_session_factory as create_session_factory
|
||||
from .session import session_scope as session_scope
|
||||
|
||||
__all__ = [
|
||||
"BaseTable",
|
||||
"Field",
|
||||
"Relationship",
|
||||
"RepoBase",
|
||||
"SQLModel",
|
||||
"create_session_factory",
|
||||
"create_system_engine",
|
||||
"session_scope",
|
||||
]
|
||||
112
src/everos/core/persistence/sqlite/base.py
Normal file
112
src/everos/core/persistence/sqlite/base.py
Normal file
@ -0,0 +1,112 @@
|
||||
"""Common SQLModel base for everos tables.
|
||||
|
||||
:class:`BaseTable` adds ``created_at`` / ``updated_at`` columns. The
|
||||
``updated_at`` column auto-refreshes on UPDATE through SA's ``onupdate``
|
||||
hook (no explicit assignment needed in business code).
|
||||
|
||||
The **two-zone storage-UTC discipline** is enforced by a SQLAlchemy
|
||||
:class:`TypeDecorator` (:class:`UtcDateTimeColumn`) used as the SQL
|
||||
column type for every datetime field:
|
||||
|
||||
* **on write** — ``process_bind_param`` converts every datetime to
|
||||
aware UTC before SQLAlchemy emits the bound parameter. This covers
|
||||
*every* SQLAlchemy write path uniformly:
|
||||
|
||||
- ORM ``session.add()`` / ``session.merge()`` (unit-of-work flush)
|
||||
- Core ``session.execute(insert(...).values(...))``
|
||||
- Core ``session.execute(update(...).values(...))``
|
||||
- Bulk ``bulk_insert_mappings`` / ``bulk_save_objects``
|
||||
- Raw SQL with bound parameters
|
||||
|
||||
Reaching into the column type is the only place SQLAlchemy guarantees
|
||||
*every* write path passes through. Mapper events (``before_insert`` /
|
||||
``before_update``) only fire on the ORM unit-of-work path and would
|
||||
silently miss Core statements — which :mod:`everos.infra.persistence
|
||||
.sqlite.repos.md_change_state` uses heavily.
|
||||
|
||||
* **on read** — ``process_result_value`` re-attaches ``tzinfo=UTC`` to
|
||||
every naive datetime returned from SQLite (which has no native tz
|
||||
storage and always returns naive). Callers therefore never observe a
|
||||
naive datetime regardless of which read API they use.
|
||||
|
||||
Subclass with ``table=True`` to declare a real SQLite table::
|
||||
|
||||
from sqlmodel import Field
|
||||
|
||||
class Sender(BaseTable, table=True):
|
||||
id: int | None = Field(default=None, primary_key=True)
|
||||
name: str
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import DateTime
|
||||
from sqlalchemy import types as sa_types
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
from everos.component.utils.datetime import UtcDatetime, ensure_utc, get_utc_now
|
||||
|
||||
|
||||
class UtcDateTimeColumn(sa_types.TypeDecorator[_dt.datetime]):
|
||||
"""SQLAlchemy column type enforcing storage-UTC on every read/write.
|
||||
|
||||
Implementation:
|
||||
|
||||
* ``impl = DateTime`` — uses the dialect's standard DateTime SQL type
|
||||
(TEXT ISO-8601 on SQLite; ``TIMESTAMP`` on Postgres etc.).
|
||||
* ``process_bind_param`` — write hook. Awares → ``astimezone(UTC)``;
|
||||
naives → assumed already UTC (storage-boundary convention; see
|
||||
:func:`ensure_utc` docstring); ``None`` passes through.
|
||||
* ``process_result_value`` — read hook. Naive ``datetime`` →
|
||||
``replace(tzinfo=UTC)``; aware passes through unchanged.
|
||||
|
||||
``cache_ok = True`` — SQLAlchemy can safely cache statement
|
||||
compilations using this type (no per-instance mutable state).
|
||||
"""
|
||||
|
||||
impl = DateTime
|
||||
cache_ok = True
|
||||
|
||||
def process_bind_param(
|
||||
self, value: _dt.datetime | None, _dialect: Any
|
||||
) -> _dt.datetime | None:
|
||||
if value is None:
|
||||
return None
|
||||
if not isinstance(value, _dt.datetime):
|
||||
return value
|
||||
return ensure_utc(value)
|
||||
|
||||
def process_result_value(
|
||||
self, value: _dt.datetime | None, _dialect: Any
|
||||
) -> _dt.datetime | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, _dt.datetime) and value.tzinfo is None:
|
||||
return value.replace(tzinfo=_dt.UTC)
|
||||
return value
|
||||
|
||||
|
||||
class BaseTable(SQLModel):
|
||||
"""Mixin providing ``created_at`` / ``updated_at`` columns.
|
||||
|
||||
Both default to :func:`get_utc_now` on INSERT.
|
||||
``updated_at`` is auto-refreshed by SQLAlchemy on every UPDATE via the
|
||||
``onupdate`` hook — do not set it manually unless overriding intentionally.
|
||||
|
||||
Both columns use :class:`UtcDateTimeColumn` as the SQL column type
|
||||
so storage-UTC is enforced **at the SQLAlchemy bind layer** on every
|
||||
write path (ORM + Core + bulk + raw bound params).
|
||||
"""
|
||||
|
||||
created_at: UtcDatetime = Field(
|
||||
default_factory=get_utc_now,
|
||||
sa_type=UtcDateTimeColumn,
|
||||
)
|
||||
updated_at: UtcDatetime = Field(
|
||||
default_factory=get_utc_now,
|
||||
sa_type=UtcDateTimeColumn,
|
||||
sa_column_kwargs={"onupdate": get_utc_now},
|
||||
)
|
||||
74
src/everos/core/persistence/sqlite/engine.py
Normal file
74
src/everos/core/persistence/sqlite/engine.py
Normal file
@ -0,0 +1,74 @@
|
||||
"""Async SQLAlchemy engine factory + per-connection PRAGMA listener.
|
||||
|
||||
The engine connects through ``aiosqlite`` (SA URL ``sqlite+aiosqlite://``).
|
||||
PRAGMAs are *per-connection* — they must be re-applied every time the
|
||||
SA pool opens a new connection. We attach a ``connect`` event listener on
|
||||
the engine's underlying sync engine for that purpose.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import event
|
||||
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
|
||||
|
||||
from everos.config import SqliteSettings
|
||||
|
||||
|
||||
def create_system_engine(
|
||||
db_path: Path,
|
||||
sqlite_settings: SqliteSettings,
|
||||
*,
|
||||
echo: bool = False,
|
||||
) -> AsyncEngine:
|
||||
"""Create an async SQLAlchemy engine for the everos system DB.
|
||||
|
||||
``MemoryRoot.system_db`` is the conventional path; the DB holds system
|
||||
state, audit log, task queue, LSN watermark, and other metadata.
|
||||
|
||||
Args:
|
||||
db_path: Filesystem path to the system DB file. Parent directory is
|
||||
created if missing.
|
||||
sqlite_settings: Tunables (journal_mode, synchronous, foreign_keys,
|
||||
temp_store, busy_timeout, journal_size_limit, cache_size).
|
||||
echo: When ``True``, SQLAlchemy logs every statement (development).
|
||||
|
||||
Returns:
|
||||
An :class:`AsyncEngine` ready for use with :class:`AsyncSession`.
|
||||
"""
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Three slashes = relative path; four slashes = absolute. ``str(db_path)``
|
||||
# of an absolute Path begins with ``/`` so the f-string yields four.
|
||||
url = f"sqlite+aiosqlite:///{db_path}"
|
||||
engine = create_async_engine(url, echo=echo, future=True)
|
||||
|
||||
_register_pragma_listener(engine, sqlite_settings)
|
||||
return engine
|
||||
|
||||
|
||||
def _register_pragma_listener(
|
||||
engine: AsyncEngine,
|
||||
sqlite_settings: SqliteSettings,
|
||||
) -> None:
|
||||
"""Attach a ``connect`` listener that applies PRAGMAs on every new connection."""
|
||||
|
||||
@event.listens_for(engine.sync_engine, "connect")
|
||||
def _apply_pragmas(dbapi_connection, _connection_record) -> None: # type: ignore[no-untyped-def]
|
||||
cursor = dbapi_connection.cursor()
|
||||
try:
|
||||
cursor.execute(f"PRAGMA journal_mode={sqlite_settings.journal_mode}")
|
||||
cursor.execute(f"PRAGMA synchronous={sqlite_settings.synchronous}")
|
||||
cursor.execute(
|
||||
f"PRAGMA foreign_keys={'ON' if sqlite_settings.foreign_keys else 'OFF'}"
|
||||
)
|
||||
cursor.execute(f"PRAGMA temp_store={sqlite_settings.temp_store}")
|
||||
cursor.execute(f"PRAGMA busy_timeout={sqlite_settings.busy_timeout_ms}")
|
||||
cursor.execute(
|
||||
f"PRAGMA journal_size_limit={sqlite_settings.journal_size_limit_bytes}"
|
||||
)
|
||||
# cache_size: negative = KB, positive = pages.
|
||||
cursor.execute(f"PRAGMA cache_size=-{sqlite_settings.cache_size_kb}")
|
||||
finally:
|
||||
cursor.close()
|
||||
166
src/everos/core/persistence/sqlite/repository.py
Normal file
166
src/everos/core/persistence/sqlite/repository.py
Normal file
@ -0,0 +1,166 @@
|
||||
"""Generic CRUD repository for SQLModel-backed tables.
|
||||
|
||||
``RepoBase`` is a pure generic CRUD helper that sits alongside
|
||||
:class:`BaseTable`. It knows nothing about a storage runtime — concrete
|
||||
repos either pass ``session_factory`` explicitly (typical in tests) or
|
||||
override :meth:`_factory_lookup` to pull the singleton from their
|
||||
storage manager (typical in :mod:`everos.infra.persistence.sqlite.repos`).
|
||||
|
||||
Each method opens its own ``session_scope`` (auto rollback on exception,
|
||||
session closed at end). For multi-step transactional work, use the
|
||||
session factory directly via :attr:`session_factory`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
from sqlmodel import SQLModel, select
|
||||
|
||||
from .session import session_scope
|
||||
|
||||
|
||||
class RepoBase[T: SQLModel]:
|
||||
"""Generic CRUD repository for one SQLModel table.
|
||||
|
||||
Subclass and bind to a model. Two ways to provide the session factory:
|
||||
|
||||
1. **Explicit (tests / DI)** — pass it to ``__init__``::
|
||||
|
||||
repo = SenderRepo(session_factory)
|
||||
|
||||
2. **Lazy hook (production singletons)** — override
|
||||
:meth:`_factory_lookup` so the repo can be instantiated as a
|
||||
module-level singleton with no factory bound yet::
|
||||
|
||||
class _SenderRepo(RepoBase[Sender]):
|
||||
model = Sender
|
||||
def _factory_lookup(self):
|
||||
from everos.infra.persistence.sqlite.sqlite_manager import (
|
||||
get_session_factory,
|
||||
)
|
||||
return get_session_factory()
|
||||
|
||||
sender_repo = _SenderRepo()
|
||||
await sender_repo.add(Sender(name="alice"))
|
||||
"""
|
||||
|
||||
model: type[T]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session_factory: async_sessionmaker[AsyncSession] | None = None,
|
||||
) -> None:
|
||||
"""Bind to a session factory; if ``None``, defer to ``_factory_lookup``."""
|
||||
self._factory_override = session_factory
|
||||
|
||||
def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
|
||||
"""Resolve a session factory on first use. Override in subclass.
|
||||
|
||||
``RepoBase`` itself has no idea where the runtime singleton lives
|
||||
— that knowledge belongs to the infra subclass. The default raises
|
||||
so a missing override is loud rather than silently broken.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{type(self).__name__}: pass session_factory= to __init__ "
|
||||
"or override _factory_lookup() to wire the storage manager."
|
||||
)
|
||||
|
||||
@property
|
||||
def _factory(self) -> async_sessionmaker[AsyncSession]:
|
||||
if self._factory_override is not None:
|
||||
return self._factory_override
|
||||
return self._factory_lookup()
|
||||
|
||||
@property
|
||||
def session_factory(self) -> async_sessionmaker[AsyncSession]:
|
||||
"""Underlying session factory (for multi-step transactions)."""
|
||||
return self._factory
|
||||
|
||||
# ── Create ─────────────────────────────────────────────────────────────
|
||||
|
||||
async def add(self, instance: T) -> T:
|
||||
"""Insert one row, commit, refresh, return the instance."""
|
||||
async with session_scope(self._factory) as s:
|
||||
s.add(instance)
|
||||
await s.commit()
|
||||
await s.refresh(instance)
|
||||
return instance
|
||||
|
||||
async def add_many(self, instances: Sequence[T]) -> list[T]:
|
||||
"""Insert many rows in one transaction."""
|
||||
items = list(instances)
|
||||
async with session_scope(self._factory) as s:
|
||||
s.add_all(items)
|
||||
await s.commit()
|
||||
for inst in items:
|
||||
await s.refresh(inst)
|
||||
return items
|
||||
|
||||
# ── Read ───────────────────────────────────────────────────────────────
|
||||
|
||||
async def get_by_id(self, id_value: Any) -> T | None:
|
||||
"""Get a row by primary key. Returns ``None`` if not found."""
|
||||
async with session_scope(self._factory) as s:
|
||||
return await s.get(self.model, id_value)
|
||||
|
||||
async def list_all(self) -> list[T]:
|
||||
"""Return all rows (no filter, no order)."""
|
||||
async with session_scope(self._factory) as s:
|
||||
stmt = select(self.model)
|
||||
return list((await s.execute(stmt)).scalars().all())
|
||||
|
||||
async def find_where(self, **filters: Any) -> list[T]:
|
||||
"""Equality-only filtering, e.g. ``find_where(name="alice", active=True)``."""
|
||||
async with session_scope(self._factory) as s:
|
||||
stmt = select(self.model).filter_by(**filters)
|
||||
return list((await s.execute(stmt)).scalars().all())
|
||||
|
||||
async def find_one(self, **filters: Any) -> T | None:
|
||||
"""First row matching ``filters`` (no ordering); ``None`` if not found."""
|
||||
async with session_scope(self._factory) as s:
|
||||
stmt = select(self.model).filter_by(**filters).limit(1)
|
||||
return (await s.execute(stmt)).scalars().first()
|
||||
|
||||
async def count(self) -> int:
|
||||
"""Total row count (no filter)."""
|
||||
async with session_scope(self._factory) as s:
|
||||
stmt = select(func.count()).select_from(self.model)
|
||||
return int((await s.execute(stmt)).scalar_one())
|
||||
|
||||
# ── Update ─────────────────────────────────────────────────────────────
|
||||
|
||||
async def update(self, instance: T) -> T:
|
||||
"""Persist changes on an instance whose primary key already exists.
|
||||
|
||||
Uses ``session.merge`` so detached / fresh-from-Pydantic instances
|
||||
are reattached. ``BaseTable.updated_at`` auto-bumps via SA's
|
||||
``onupdate`` hook.
|
||||
"""
|
||||
async with session_scope(self._factory) as s:
|
||||
merged = await s.merge(instance)
|
||||
await s.commit()
|
||||
await s.refresh(merged)
|
||||
return merged
|
||||
|
||||
# ── Delete ─────────────────────────────────────────────────────────────
|
||||
|
||||
async def delete(self, instance: T) -> None:
|
||||
"""Delete by instance (primary key must be set)."""
|
||||
async with session_scope(self._factory) as s:
|
||||
merged = await s.merge(instance)
|
||||
await s.delete(merged)
|
||||
await s.commit()
|
||||
|
||||
async def delete_by_id(self, id_value: Any) -> bool:
|
||||
"""Delete by primary key. Returns ``True`` if a row was removed."""
|
||||
async with session_scope(self._factory) as s:
|
||||
instance = await s.get(self.model, id_value)
|
||||
if instance is None:
|
||||
return False
|
||||
await s.delete(instance)
|
||||
await s.commit()
|
||||
return True
|
||||
45
src/everos/core/persistence/sqlite/session.py
Normal file
45
src/everos/core/persistence/sqlite/session.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""Async session factory + session scope context manager."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
|
||||
|
||||
|
||||
def create_session_factory(engine: AsyncEngine) -> async_sessionmaker[AsyncSession]:
|
||||
"""Build an :class:`async_sessionmaker` bound to ``engine``.
|
||||
|
||||
``expire_on_commit=False`` keeps attribute access on instances valid
|
||||
after commit, which is the conventional setup for async SA usage.
|
||||
"""
|
||||
return async_sessionmaker(
|
||||
bind=engine,
|
||||
class_=AsyncSession,
|
||||
expire_on_commit=False,
|
||||
)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def session_scope(
|
||||
session_factory: async_sessionmaker[AsyncSession],
|
||||
) -> AsyncIterator[AsyncSession]:
|
||||
"""Yield an :class:`AsyncSession` inside a try/rollback/close block.
|
||||
|
||||
The session is rolled back on any exception in the ``async with`` body,
|
||||
then closed. Callers are responsible for calling ``await session.commit()``
|
||||
on success.
|
||||
|
||||
Usage:
|
||||
factory = create_session_factory(engine)
|
||||
async with session_scope(factory) as session:
|
||||
session.add(some_record)
|
||||
await session.commit()
|
||||
"""
|
||||
async with session_factory() as session:
|
||||
try:
|
||||
yield session
|
||||
except Exception:
|
||||
await session.rollback()
|
||||
raise
|
||||
5
src/everos/entrypoints/__init__.py
Normal file
5
src/everos/entrypoints/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Presentation layer.
|
||||
|
||||
Translates external requests (CLI / HTTP) into service-layer calls.
|
||||
Contains no business logic.
|
||||
"""
|
||||
11
src/everos/entrypoints/api/__init__.py
Normal file
11
src/everos/entrypoints/api/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
"""HTTP REST entry point (FastAPI), routed by resource.
|
||||
|
||||
External usage:
|
||||
from everos.entrypoints.api import create_app
|
||||
|
||||
app = create_app()
|
||||
"""
|
||||
|
||||
from .app import create_app as create_app
|
||||
|
||||
__all__ = ["create_app"]
|
||||
124
src/everos/entrypoints/api/app.py
Normal file
124
src/everos/entrypoints/api/app.py
Normal file
@ -0,0 +1,124 @@
|
||||
"""FastAPI application factory.
|
||||
|
||||
Wires CORS + the project's middleware stack + global exception handler +
|
||||
lifespan, and registers the public routes (``/health``, ``/metrics``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from everos.core.lifespan import (
|
||||
LifespanProvider,
|
||||
MetricsLifespanProvider,
|
||||
build_lifespan,
|
||||
)
|
||||
from everos.core.middleware import (
|
||||
DEFAULT_CORS_ALLOW_CREDENTIALS,
|
||||
DEFAULT_CORS_ALLOW_HEADERS,
|
||||
DEFAULT_CORS_ALLOW_METHODS,
|
||||
DEFAULT_CORS_ORIGINS,
|
||||
ProfileMiddleware,
|
||||
PrometheusMiddleware,
|
||||
global_exception_handler,
|
||||
)
|
||||
from everos.core.observability.logging import get_logger
|
||||
|
||||
from .lifespans import (
|
||||
CascadeLifespanProvider,
|
||||
LanceDBLifespanProvider,
|
||||
LLMLifespanProvider,
|
||||
OmeLifespanProvider,
|
||||
SqliteLifespanProvider,
|
||||
)
|
||||
from .routes import (
|
||||
get,
|
||||
health,
|
||||
memorize,
|
||||
metrics,
|
||||
search,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _docs_enabled() -> bool:
|
||||
"""Enable docs endpoints (/docs, /redoc, /openapi.json) only in dev."""
|
||||
return os.environ.get("ENV", "prod").upper() == "DEV"
|
||||
|
||||
|
||||
def create_app(
|
||||
*,
|
||||
cors_origins: list[str] | None = None,
|
||||
cors_allow_credentials: bool = DEFAULT_CORS_ALLOW_CREDENTIALS,
|
||||
cors_allow_methods: list[str] | None = None,
|
||||
cors_allow_headers: list[str] | None = None,
|
||||
lifespan_providers: list[LifespanProvider] | None = None,
|
||||
) -> FastAPI:
|
||||
"""Build the FastAPI application instance.
|
||||
|
||||
Args:
|
||||
cors_origins: Allowed CORS origins (default: ``["*"]``).
|
||||
cors_allow_credentials: Whether to allow credentials (default: True).
|
||||
cors_allow_methods: Allowed CORS methods (default: ``["*"]``).
|
||||
cors_allow_headers: Allowed CORS headers (default: ``["*"]``).
|
||||
lifespan_providers: Optional list of LifespanProvider; defaults to
|
||||
``[MetricsLifespanProvider(), SqliteLifespanProvider(),
|
||||
LanceDBLifespanProvider(), CascadeLifespanProvider(),
|
||||
OmeLifespanProvider()]``.
|
||||
|
||||
Returns:
|
||||
FastAPI: Configured application instance.
|
||||
"""
|
||||
enable_docs = _docs_enabled()
|
||||
|
||||
if lifespan_providers is None:
|
||||
lifespan_providers = [
|
||||
MetricsLifespanProvider(),
|
||||
LLMLifespanProvider(),
|
||||
SqliteLifespanProvider(),
|
||||
LanceDBLifespanProvider(),
|
||||
CascadeLifespanProvider(),
|
||||
OmeLifespanProvider(),
|
||||
]
|
||||
|
||||
app = FastAPI(
|
||||
title="everos",
|
||||
version="0.1.0",
|
||||
description="md-first memory extraction framework",
|
||||
lifespan=build_lifespan(lifespan_providers),
|
||||
docs_url="/docs" if enable_docs else None,
|
||||
redoc_url="/redoc" if enable_docs else None,
|
||||
openapi_url="/openapi.json" if enable_docs else None,
|
||||
)
|
||||
|
||||
# Exception handlers: HTTPException, validation errors, plus a fallback.
|
||||
app.add_exception_handler(HTTPException, global_exception_handler)
|
||||
app.add_exception_handler(RequestValidationError, global_exception_handler)
|
||||
app.add_exception_handler(Exception, global_exception_handler)
|
||||
|
||||
# Middleware order: earlier `add_middleware` calls become inner, later ones outer.
|
||||
# CORS innermost (matches base_app.py legacy pattern).
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=cors_origins or DEFAULT_CORS_ORIGINS,
|
||||
allow_credentials=cors_allow_credentials,
|
||||
allow_methods=cors_allow_methods or DEFAULT_CORS_ALLOW_METHODS,
|
||||
allow_headers=cors_allow_headers or DEFAULT_CORS_ALLOW_HEADERS,
|
||||
)
|
||||
app.add_middleware(PrometheusMiddleware)
|
||||
app.add_middleware(ProfileMiddleware)
|
||||
|
||||
# Routes.
|
||||
app.include_router(health.router)
|
||||
app.include_router(metrics.router)
|
||||
app.include_router(memorize.router)
|
||||
app.include_router(search.router)
|
||||
app.include_router(get.router)
|
||||
|
||||
logger.info("app_created", docs_enabled=enable_docs)
|
||||
return app
|
||||
35
src/everos/entrypoints/api/lifespans/__init__.py
Normal file
35
src/everos/entrypoints/api/lifespans/__init__.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""HTTP API lifespan providers.
|
||||
|
||||
Concrete :class:`everos.core.lifespan.LifespanProvider` implementations
|
||||
for the storage + chassis backends this entrypoint composes. They live next to
|
||||
``app.py`` because they are *application-bootstrap* details, not
|
||||
generic chassis: a different deployment mode (CLI, embedded, batch
|
||||
worker) may compose a different set of providers.
|
||||
|
||||
Putting these here also keeps ``core.lifespan`` free of concrete-
|
||||
backend imports — the chassis stays portable.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.entrypoints.api.lifespans import (
|
||||
LLMLifespanProvider,
|
||||
SqliteLifespanProvider,
|
||||
LanceDBLifespanProvider,
|
||||
CascadeLifespanProvider,
|
||||
OmeLifespanProvider,
|
||||
)
|
||||
"""
|
||||
|
||||
from .cascade import CascadeLifespanProvider as CascadeLifespanProvider
|
||||
from .lancedb import LanceDBLifespanProvider as LanceDBLifespanProvider
|
||||
from .llm import LLMLifespanProvider as LLMLifespanProvider
|
||||
from .ome import OmeLifespanProvider as OmeLifespanProvider
|
||||
from .sqlite import SqliteLifespanProvider as SqliteLifespanProvider
|
||||
|
||||
__all__ = [
|
||||
"CascadeLifespanProvider",
|
||||
"LLMLifespanProvider",
|
||||
"LanceDBLifespanProvider",
|
||||
"OmeLifespanProvider",
|
||||
"SqliteLifespanProvider",
|
||||
]
|
||||
55
src/everos/entrypoints/api/lifespans/cascade.py
Normal file
55
src/everos/entrypoints/api/lifespans/cascade.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""Cascade lifespan provider — starts/stops :class:`CascadeOrchestrator`.
|
||||
|
||||
Ordered after SqliteLifespan + LanceDBLifespan: the orchestrator
|
||||
depends on both stores being ready before its watcher / scanner /
|
||||
worker tasks can take the first row.
|
||||
|
||||
Construction reads the live :class:`Settings` to build the embedding +
|
||||
tokenizer providers. If either is misconfigured the lifespan fails
|
||||
fast — the daemon would be useless without them anyway.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from everos.component.embedding import build_embedding_provider
|
||||
from everos.component.tokenizer import build_tokenizer
|
||||
from everos.config import load_settings
|
||||
from everos.core.lifespan import LifespanProvider
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.memory.cascade import CascadeOrchestrator
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class CascadeLifespanProvider(LifespanProvider):
|
||||
"""Manage the cascade subsystem for the app lifecycle."""
|
||||
|
||||
def __init__(self, order: int = 12) -> None:
|
||||
super().__init__(name="cascade", order=order)
|
||||
self._orchestrator: CascadeOrchestrator | None = None
|
||||
|
||||
async def startup(self, app: FastAPI) -> Any:
|
||||
settings = load_settings()
|
||||
memory_root = MemoryRoot.default()
|
||||
memory_root.ensure()
|
||||
|
||||
embedder = build_embedding_provider(settings.embedding)
|
||||
tokenizer = build_tokenizer()
|
||||
self._orchestrator = CascadeOrchestrator(
|
||||
memory_root=memory_root,
|
||||
embedder=embedder,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
await self._orchestrator.start()
|
||||
logger.info("cascade_lifespan_ready")
|
||||
return self._orchestrator
|
||||
|
||||
async def shutdown(self, app: FastAPI) -> None:
|
||||
if self._orchestrator is not None:
|
||||
await self._orchestrator.stop()
|
||||
self._orchestrator = None
|
||||
55
src/everos/entrypoints/api/lifespans/lancedb.py
Normal file
55
src/everos/entrypoints/api/lifespans/lancedb.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""LanceDB lifespan provider (HTTP API entrypoint).
|
||||
|
||||
Startup:
|
||||
Open the connection via ``get_connection`` (lazy, idempotent).
|
||||
Importing :mod:`everos.infra.persistence.lancedb` also triggers the
|
||||
side-effect import of ``tables`` so business schemas are loaded
|
||||
(future: preflight registration).
|
||||
|
||||
Shutdown:
|
||||
Close the connection (also clears the table cache).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from everos.core.lifespan import LifespanProvider
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.persistence.lancedb import (
|
||||
dispose_connection,
|
||||
ensure_business_indexes,
|
||||
get_connection,
|
||||
verify_business_schemas,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class LanceDBLifespanProvider(LifespanProvider):
|
||||
"""Manage the LanceDB connection + table cache for the app lifecycle.
|
||||
|
||||
Startup runs three steps:
|
||||
|
||||
1. ``get_connection`` — lazy-open the async connection.
|
||||
2. ``verify_business_schemas`` — fail loud if an on-disk table's
|
||||
columns drift from the current Pydantic schema. LanceDB has no
|
||||
online migration; cascade is rebuildable from md so the recovery
|
||||
is documented as ``rm -rf ~/.everos/.index/lancedb``.
|
||||
3. ``ensure_business_indexes`` — idempotent FTS index creation.
|
||||
"""
|
||||
|
||||
def __init__(self, order: int = 11) -> None:
|
||||
super().__init__(name="lancedb", order=order)
|
||||
|
||||
async def startup(self, app: FastAPI) -> Any:
|
||||
conn = await get_connection()
|
||||
await verify_business_schemas()
|
||||
await ensure_business_indexes()
|
||||
logger.info("lancedb_ready", uri=conn.uri)
|
||||
return conn
|
||||
|
||||
async def shutdown(self, app: FastAPI) -> None:
|
||||
await dispose_connection()
|
||||
36
src/everos/entrypoints/api/lifespans/llm.py
Normal file
36
src/everos/entrypoints/api/lifespans/llm.py
Normal file
@ -0,0 +1,36 @@
|
||||
"""LLM lifespan provider — eagerly resolves the LLM singleton at startup.
|
||||
|
||||
The framework's core value (memory extraction) is meaningless without
|
||||
an LLM, so misconfiguration must surface as a startup failure instead
|
||||
of N silent skips per request downstream. Ordered before the storage
|
||||
stack so we fail before paying to bring sqlite / lancedb / cascade up.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from everos.component.llm import get_llm_client
|
||||
from everos.core.lifespan import LifespanProvider
|
||||
from everos.core.observability.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class LLMLifespanProvider(LifespanProvider):
|
||||
"""Resolve the LLM client at startup; raise if credentials are missing."""
|
||||
|
||||
def __init__(self, order: int = 8) -> None:
|
||||
super().__init__(name="llm", order=order)
|
||||
|
||||
async def startup(self, app: FastAPI) -> Any:
|
||||
client = get_llm_client()
|
||||
logger.info("llm_lifespan_ready")
|
||||
return client
|
||||
|
||||
async def shutdown(self, app: FastAPI) -> None:
|
||||
# The client is stateless (algo facade over openai.AsyncOpenAI);
|
||||
# nothing to tear down.
|
||||
return None
|
||||
39
src/everos/entrypoints/api/lifespans/ome.py
Normal file
39
src/everos/entrypoints/api/lifespans/ome.py
Normal file
@ -0,0 +1,39 @@
|
||||
"""OME engine lifespan provider (HTTP API entrypoint).
|
||||
|
||||
Startup: build the singleton engine via service.memorize._get_engine
|
||||
(which also registers strategies) and start it.
|
||||
|
||||
Shutdown: stop the engine.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from everos.core.lifespan import LifespanProvider
|
||||
from everos.core.observability.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class OmeLifespanProvider(LifespanProvider):
|
||||
"""Manage the OfflineEngine lifecycle for the FastAPI app."""
|
||||
|
||||
def __init__(self, order: int = 50) -> None:
|
||||
super().__init__(name="ome", order=order)
|
||||
|
||||
async def startup(self, app: FastAPI) -> Any:
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
engine = svc._get_engine() # noqa: SLF001 — service-internal accessor
|
||||
await engine.start()
|
||||
logger.info("ome_engine_started")
|
||||
return engine
|
||||
|
||||
async def shutdown(self, app: FastAPI) -> None:
|
||||
svc = importlib.import_module("everos.service.memorize")
|
||||
engine = svc._get_engine() # noqa: SLF001
|
||||
await engine.stop()
|
||||
logger.info("ome_engine_stopped")
|
||||
45
src/everos/entrypoints/api/lifespans/sqlite.py
Normal file
45
src/everos/entrypoints/api/lifespans/sqlite.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""SQLite system-DB lifespan provider (HTTP API entrypoint).
|
||||
|
||||
Startup:
|
||||
1. Build the engine via ``get_engine`` (lazy, idempotent). Importing
|
||||
:mod:`everos.infra.persistence.sqlite` also triggers the side-
|
||||
effect import of ``tables`` so every business SQLModel registers
|
||||
itself in ``SQLModel.metadata``.
|
||||
2. ``SQLModel.metadata.create_all`` so every registered table exists.
|
||||
|
||||
Shutdown:
|
||||
Dispose the engine + connection pool.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.core.lifespan import LifespanProvider
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.persistence.sqlite import dispose_engine, get_engine
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class SqliteLifespanProvider(LifespanProvider):
|
||||
"""Manage the SQLite system-DB engine + schema for the app lifecycle."""
|
||||
|
||||
def __init__(self, order: int = 10) -> None:
|
||||
super().__init__(name="sqlite", order=order)
|
||||
|
||||
async def startup(self, app: FastAPI) -> Any:
|
||||
engine = get_engine()
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
logger.info(
|
||||
"sqlite_schema_ready",
|
||||
tables=len(SQLModel.metadata.tables),
|
||||
)
|
||||
return engine
|
||||
|
||||
async def shutdown(self, app: FastAPI) -> None:
|
||||
await dispose_engine()
|
||||
5
src/everos/entrypoints/api/routes/__init__.py
Normal file
5
src/everos/entrypoints/api/routes/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""HTTP route modules.
|
||||
|
||||
Each module here exposes an ``APIRouter`` named ``router`` registered by
|
||||
:func:`everos.entrypoints.api.app.create_app` via ``app.include_router``.
|
||||
"""
|
||||
26
src/everos/entrypoints/api/routes/get.py
Normal file
26
src/everos/entrypoints/api/routes/get.py
Normal file
@ -0,0 +1,26 @@
|
||||
"""POST /api/v1/memory/get — paginated listing endpoint.
|
||||
|
||||
Thin adapter: validate the request DTO, dispatch to the service layer,
|
||||
return the envelope verbatim. ``request_id`` is generated inside the
|
||||
:class:`GetManager`; we trust the value on the way out.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from everos.memory.get import GetRequest, GetResponse
|
||||
from everos.memory.search import FilterError
|
||||
from everos.service import get as get_service
|
||||
|
||||
router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
|
||||
|
||||
|
||||
@router.post("/get", response_model=GetResponse)
|
||||
async def post_get(req: GetRequest) -> GetResponse:
|
||||
"""Paginated listing over the requested ``memory_type``."""
|
||||
try:
|
||||
return await get_service(req)
|
||||
except FilterError as exc:
|
||||
# Filter-DSL violations surface as 422 with the compile message.
|
||||
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
||||
13
src/everos/entrypoints/api/routes/health.py
Normal file
13
src/everos/entrypoints/api/routes/health.py
Normal file
@ -0,0 +1,13 @@
|
||||
"""Health check route."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
router = APIRouter(tags=["health"])
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health() -> dict[str, str]:
|
||||
"""Liveness probe — returns ``{"status": "ok"}`` with HTTP 200."""
|
||||
return {"status": "ok"}
|
||||
195
src/everos/entrypoints/api/routes/memorize.py
Normal file
195
src/everos/entrypoints/api/routes/memorize.py
Normal file
@ -0,0 +1,195 @@
|
||||
"""POST /api/v1/memory/add and /api/v1/memory/flush.
|
||||
|
||||
DTOs follow the v1 API brief (01_v1_api_brief.md §2 / §3). Routes are
|
||||
thin adapters: validate the DTO, dump to dict, hand to service. No
|
||||
business logic lives here.
|
||||
|
||||
``/flush`` is OSS-only (the cloud edition decides boundary timing
|
||||
server-side and does not expose this endpoint).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated, Any, Literal
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
from pydantic import AfterValidator, BaseModel, ConfigDict, Field
|
||||
|
||||
from everos.core.errors import MultimodalError
|
||||
from everos.core.observability.tracing import gen_request_id
|
||||
from everos.service import memorize
|
||||
|
||||
router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
|
||||
|
||||
|
||||
# ── Path-safe identifier ────────────────────────────────────────────────────
|
||||
# ``app_id`` / ``project_id`` become directory segments under the memory
|
||||
# root, so they must reject ``.`` and ``..`` (path traversal). The basic
|
||||
# character whitelist is enforced via ``pattern`` (pydantic_core uses the
|
||||
# Rust regex engine, which does NOT support lookaround), and the two
|
||||
# reserved tokens are filtered out with a follow-up ``AfterValidator``.
|
||||
_PATH_SAFE_CHARSET = r"^[a-zA-Z0-9_.-]+$"
|
||||
_PATH_TRAVERSAL_TOKENS = frozenset({".", ".."})
|
||||
|
||||
|
||||
def _reject_path_traversal(value: str) -> str:
|
||||
if value in _PATH_TRAVERSAL_TOKENS:
|
||||
raise ValueError("'.' and '..' are reserved (path traversal)")
|
||||
return value
|
||||
|
||||
|
||||
PathSafeId = Annotated[str, AfterValidator(_reject_path_traversal)]
|
||||
|
||||
|
||||
# DTOs ────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class ToolFunctionDTO(BaseModel):
|
||||
name: str
|
||||
arguments: str # JSON string per OpenAI Chat Completions spec
|
||||
|
||||
|
||||
class ToolCallDTO(BaseModel):
|
||||
id: str
|
||||
type: str = "function"
|
||||
function: ToolFunctionDTO
|
||||
|
||||
|
||||
class ContentItemDTO(BaseModel):
|
||||
"""Content piece (v1 API brief appendix A)."""
|
||||
|
||||
type: Literal["text", "image", "audio", "doc", "pdf", "html", "email"]
|
||||
text: str | None = None
|
||||
uri: str | None = None
|
||||
base64: str | None = None
|
||||
ext: str | None = None
|
||||
name: str | None = None
|
||||
extras: dict[str, Any] | None = None
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
class MessageItemDTO(BaseModel):
|
||||
sender_id: str = Field(..., min_length=1)
|
||||
sender_name: str | None = None
|
||||
role: Literal["user", "assistant", "tool"]
|
||||
timestamp: int = Field(
|
||||
...,
|
||||
gt=0,
|
||||
description=(
|
||||
"Message event time as Unix epoch in **milliseconds** "
|
||||
"(v1 API contract; the algo layer auto-detects sec vs ms "
|
||||
"for backward compat but the contract is ms)."
|
||||
),
|
||||
)
|
||||
content: str | list[ContentItemDTO]
|
||||
tool_calls: list[ToolCallDTO] | None = None
|
||||
tool_call_id: str | None = None
|
||||
|
||||
|
||||
class MemorizeAddRequest(BaseModel):
|
||||
session_id: str = Field(..., min_length=1, max_length=128)
|
||||
app_id: PathSafeId = Field(
|
||||
default="default",
|
||||
min_length=1,
|
||||
max_length=128,
|
||||
pattern=_PATH_SAFE_CHARSET,
|
||||
)
|
||||
project_id: PathSafeId = Field(
|
||||
default="default",
|
||||
min_length=1,
|
||||
max_length=128,
|
||||
pattern=_PATH_SAFE_CHARSET,
|
||||
)
|
||||
messages: list[MessageItemDTO] = Field(..., min_length=1, max_length=500)
|
||||
|
||||
|
||||
class AddResponseData(BaseModel):
|
||||
message_count: int
|
||||
status: Literal["accumulated", "extracted"]
|
||||
|
||||
|
||||
class MemorizeFlushRequest(BaseModel):
|
||||
session_id: str = Field(..., min_length=1, max_length=128)
|
||||
app_id: PathSafeId = Field(
|
||||
default="default",
|
||||
min_length=1,
|
||||
max_length=128,
|
||||
pattern=_PATH_SAFE_CHARSET,
|
||||
)
|
||||
project_id: PathSafeId = Field(
|
||||
default="default",
|
||||
min_length=1,
|
||||
max_length=128,
|
||||
pattern=_PATH_SAFE_CHARSET,
|
||||
)
|
||||
|
||||
|
||||
class FlushResponseData(BaseModel):
|
||||
status: Literal["extracted", "no_extraction"]
|
||||
|
||||
|
||||
class SuccessEnvelope[T](BaseModel):
|
||||
"""200 wrapper: ``request_id`` sits at the top level, not inside ``data``."""
|
||||
|
||||
request_id: str
|
||||
data: T
|
||||
|
||||
|
||||
# Route ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/add")
|
||||
async def add_memory(
|
||||
req: Annotated[MemorizeAddRequest, ...],
|
||||
request: Request,
|
||||
) -> SuccessEnvelope[AddResponseData]:
|
||||
"""Add messages into the user-memory + agent-memory pipelines."""
|
||||
request_id = getattr(request.state, "request_id", None) or _gen_request_id()
|
||||
try:
|
||||
result = await memorize(req.model_dump())
|
||||
except MultimodalError as exc:
|
||||
raise HTTPException(status_code=415, detail=str(exc)) from exc
|
||||
return SuccessEnvelope(
|
||||
request_id=request_id,
|
||||
data=AddResponseData(
|
||||
message_count=result.message_count,
|
||||
status=result.status,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/flush")
|
||||
async def flush_memory(
|
||||
req: Annotated[MemorizeFlushRequest, ...],
|
||||
request: Request,
|
||||
) -> SuccessEnvelope[FlushResponseData]:
|
||||
"""Force boundary detection over the current ``session_id`` buffer.
|
||||
|
||||
[OSS-only] — cloud edition decides boundary timing server-side and
|
||||
does not expose this endpoint.
|
||||
"""
|
||||
request_id = getattr(request.state, "request_id", None) or _gen_request_id()
|
||||
result = await memorize(
|
||||
{
|
||||
"session_id": req.session_id,
|
||||
"app_id": req.app_id,
|
||||
"project_id": req.project_id,
|
||||
"messages": [],
|
||||
},
|
||||
is_final=True,
|
||||
)
|
||||
# service's ``accumulated`` = nothing to flush (buffer was empty);
|
||||
# ``extracted`` = at least one cell carved out.
|
||||
status: Literal["extracted", "no_extraction"] = (
|
||||
"extracted" if result.status == "extracted" else "no_extraction"
|
||||
)
|
||||
return SuccessEnvelope(
|
||||
request_id=request_id,
|
||||
data=FlushResponseData(status=status),
|
||||
)
|
||||
|
||||
|
||||
def _gen_request_id() -> str:
|
||||
"""Fallback request id when no middleware set one."""
|
||||
return gen_request_id()
|
||||
20
src/everos/entrypoints/api/routes/metrics.py
Normal file
20
src/everos/entrypoints/api/routes/metrics.py
Normal file
@ -0,0 +1,20 @@
|
||||
"""Prometheus metrics route."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import Response
|
||||
from prometheus_client import CONTENT_TYPE_LATEST
|
||||
|
||||
from everos.core.observability.metrics import generate_metrics_response
|
||||
|
||||
router = APIRouter(tags=["metrics"])
|
||||
|
||||
|
||||
@router.get("/metrics")
|
||||
async def metrics() -> Response:
|
||||
"""Render the current Prometheus registry in exposition format."""
|
||||
return Response(
|
||||
content=generate_metrics_response(),
|
||||
media_type=CONTENT_TYPE_LATEST,
|
||||
)
|
||||
27
src/everos/entrypoints/api/routes/search.py
Normal file
27
src/everos/entrypoints/api/routes/search.py
Normal file
@ -0,0 +1,27 @@
|
||||
"""POST /api/v1/memory/search — hybrid retrieval endpoint.
|
||||
|
||||
Thin adapter: validate the request DTO, dispatch to the service layer,
|
||||
return the envelope verbatim. ``request_id`` is generated inside the
|
||||
:class:`SearchManager` (uniform for OSS + cloud); we trust that value
|
||||
on the way out.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from everos.memory.search import FilterError, SearchRequest, SearchResponse
|
||||
from everos.service import search
|
||||
|
||||
router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
|
||||
|
||||
|
||||
@router.post("/search", response_model=SearchResponse)
|
||||
async def post_search(req: SearchRequest) -> SearchResponse:
|
||||
"""Hybrid retrieval across the configured memory backends."""
|
||||
try:
|
||||
return await search(req)
|
||||
except FilterError as exc:
|
||||
# Filter-DSL violations surface as 422 with the compile message
|
||||
# (mirrors /get's contract).
|
||||
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
||||
5
src/everos/entrypoints/cli/__init__.py
Normal file
5
src/everos/entrypoints/cli/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Command line entry point.
|
||||
|
||||
Contract-first design, JSON output by default, ``--describe`` machine-readable
|
||||
mode, granular exit codes.
|
||||
"""
|
||||
5
src/everos/entrypoints/cli/commands/__init__.py
Normal file
5
src/everos/entrypoints/cli/commands/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""CLI subcommand modules.
|
||||
|
||||
Each module here exposes a ``app: typer.Typer`` instance which is mounted
|
||||
as a subcommand group by :mod:`everos.entrypoints.cli.main`.
|
||||
"""
|
||||
267
src/everos/entrypoints/cli/commands/cascade.py
Normal file
267
src/everos/entrypoints/cli/commands/cascade.py
Normal file
@ -0,0 +1,267 @@
|
||||
"""``everos cascade`` subcommand group.
|
||||
|
||||
Three one-shot operations on the cascade subsystem, all run in-process
|
||||
without standing up the FastAPI app:
|
||||
|
||||
- ``cascade sync [PATH]`` — flush the work queue. With ``PATH`` the
|
||||
command first force-enqueues that single file (used after a manual
|
||||
md edit when waiting for the watcher is impractical), then drains.
|
||||
- ``cascade status`` — print the queue + LSN summary that the daemon
|
||||
sees right now.
|
||||
- ``cascade fix`` — list every ``failed`` row. With ``--apply``, also
|
||||
reset ``retryable=TRUE`` rows back to ``pending`` and drain the
|
||||
worker once so the retry actually runs before the command returns.
|
||||
|
||||
CLI is in-process (12 doc §7.1 + 16 doc §9.2): it constructs the same
|
||||
:class:`CascadeOrchestrator` as the daemon but only calls
|
||||
``sync_once`` / ``drain_once`` / ``queue_summary``. No watcher /
|
||||
scanner background task is started.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
import typer
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
from everos.component.embedding import build_embedding_provider
|
||||
from everos.component.tokenizer import build_tokenizer
|
||||
from everos.component.utils.datetime import to_display_tz
|
||||
from everos.config import load_settings
|
||||
from everos.core.persistence import MemoryRoot
|
||||
from everos.infra.persistence.lancedb import (
|
||||
dispose_connection,
|
||||
ensure_business_indexes,
|
||||
get_connection,
|
||||
verify_business_schemas,
|
||||
)
|
||||
from everos.infra.persistence.sqlite import (
|
||||
dispose_engine,
|
||||
get_engine,
|
||||
md_change_state_repo,
|
||||
)
|
||||
from everos.memory.cascade import CascadeOrchestrator, match_kind
|
||||
|
||||
app = typer.Typer(
|
||||
name="cascade",
|
||||
help="Inspect and operate the md → LanceDB sync queue",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
|
||||
|
||||
# ── shared runtime context ───────────────────────────────────────────────
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _runtime(): # type: ignore[no-untyped-def]
|
||||
"""Stand up sqlite + lancedb the same way the API lifespan would.
|
||||
|
||||
The CLI piggybacks on the same singletons as the running daemon
|
||||
(lazy + process-wide), so if a server happens to be running on
|
||||
the same memory root, both share state correctly.
|
||||
"""
|
||||
engine = get_engine()
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
await get_connection()
|
||||
await verify_business_schemas()
|
||||
await ensure_business_indexes()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
await dispose_connection()
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
def _build_orchestrator() -> CascadeOrchestrator:
|
||||
settings = load_settings()
|
||||
memory_root = MemoryRoot.default()
|
||||
memory_root.ensure()
|
||||
embedder = build_embedding_provider(settings.embedding)
|
||||
tokenizer = build_tokenizer()
|
||||
return CascadeOrchestrator(
|
||||
memory_root=memory_root,
|
||||
embedder=embedder,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
|
||||
|
||||
# ── sync ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@app.command("sync")
|
||||
def sync(
|
||||
path: Annotated[
|
||||
Path | None,
|
||||
typer.Argument(
|
||||
help="Optional md path to force-enqueue before draining. "
|
||||
"If omitted, only the existing queue is drained.",
|
||||
),
|
||||
] = None,
|
||||
) -> None:
|
||||
"""Drain the cascade queue (and optionally re-enqueue a path first)."""
|
||||
|
||||
async def _run() -> None:
|
||||
async with _runtime():
|
||||
orchestrator = _build_orchestrator()
|
||||
if path is not None:
|
||||
rel = _resolve_relative(path)
|
||||
spec = match_kind(rel)
|
||||
if spec is None:
|
||||
typer.echo(
|
||||
f"error: path does not match any registered cascade "
|
||||
f"kind: {rel}",
|
||||
err=True,
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
await md_change_state_repo.force_enqueue(rel, spec.name)
|
||||
typer.echo(f"force-enqueued {rel} (kind={spec.name})")
|
||||
processed = await orchestrator.sync_once()
|
||||
typer.echo(f"sync complete — processed {processed} row(s)")
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
# ── status ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@app.command("status")
|
||||
def status() -> None:
|
||||
"""Print the queue / LSN summary."""
|
||||
|
||||
async def _run() -> None:
|
||||
async with _runtime():
|
||||
summary = await md_change_state_repo.queue_summary()
|
||||
lag = max(0, summary.max_lsn - summary.last_processed_lsn)
|
||||
typer.echo("queue:")
|
||||
typer.echo(f" pending: {summary.pending}")
|
||||
typer.echo(f" done: {summary.done}")
|
||||
typer.echo(
|
||||
f" failed (retryable=TRUE): {summary.failed_retryable}"
|
||||
+ (
|
||||
" (eligible for `cascade fix --apply`)"
|
||||
if summary.failed_retryable
|
||||
else ""
|
||||
)
|
||||
)
|
||||
typer.echo(
|
||||
f" failed (retryable=FALSE): {summary.failed_permanent}"
|
||||
+ (
|
||||
" (fix md and re-save to recover)"
|
||||
if summary.failed_permanent
|
||||
else ""
|
||||
)
|
||||
)
|
||||
typer.echo("lsn:")
|
||||
typer.echo(f" max: {summary.max_lsn}")
|
||||
typer.echo(f" last_processed: {summary.last_processed_lsn}")
|
||||
typer.echo(f" lag: {lag}")
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
# ── fix ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@app.command("fix")
|
||||
def fix(
|
||||
apply: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
"--apply",
|
||||
help="Re-enqueue every `retryable=TRUE` row and drain the worker.",
|
||||
),
|
||||
] = False,
|
||||
) -> None:
|
||||
"""List failed rows (default) or re-enqueue retryable ones (``--apply``)."""
|
||||
|
||||
async def _run() -> None:
|
||||
async with _runtime():
|
||||
rows = await md_change_state_repo.list_failed()
|
||||
if not rows:
|
||||
typer.echo("no failed rows")
|
||||
return
|
||||
|
||||
if not apply:
|
||||
_print_failed_table(rows)
|
||||
retryable = sum(1 for r in rows if r.retryable)
|
||||
permanent = sum(1 for r in rows if not r.retryable)
|
||||
typer.echo("")
|
||||
if retryable:
|
||||
typer.echo(
|
||||
f"run `everos cascade fix --apply` to re-enqueue "
|
||||
f"the {retryable} retryable row(s)."
|
||||
)
|
||||
if permanent:
|
||||
typer.echo(
|
||||
f"the {permanent} retryable=FALSE row(s) require "
|
||||
"editing the md and re-saving."
|
||||
)
|
||||
return
|
||||
|
||||
moved = await md_change_state_repo.reset_retryable_to_pending()
|
||||
typer.echo(f"re-enqueued {moved} retryable row(s)")
|
||||
if moved:
|
||||
orchestrator = _build_orchestrator()
|
||||
processed = await orchestrator.drain_once()
|
||||
typer.echo(f"[worker] processed {processed} row(s) on drain")
|
||||
permanent_rows = [r for r in rows if not r.retryable]
|
||||
if permanent_rows:
|
||||
typer.echo(
|
||||
f"{len(permanent_rows)} retryable=FALSE row(s) left untouched:"
|
||||
)
|
||||
for r in permanent_rows:
|
||||
typer.echo(f" {r.md_path}")
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
# ── helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _resolve_relative(p: Path) -> str:
|
||||
"""Translate an absolute / relative path arg into the memory-root rel form.
|
||||
|
||||
The state table stores paths relative to memory root, so the CLI
|
||||
must match that convention before calling :meth:`force_enqueue`.
|
||||
Outside-the-root inputs surface as an error in the caller.
|
||||
"""
|
||||
memory_root = MemoryRoot.default()
|
||||
absolute = p.expanduser().resolve()
|
||||
try:
|
||||
rel = absolute.relative_to(memory_root.root)
|
||||
except ValueError as exc:
|
||||
raise typer.BadParameter(
|
||||
f"path {p!s} is not under memory root {memory_root.root!s}"
|
||||
) from exc
|
||||
return rel.as_posix()
|
||||
|
||||
|
||||
def _print_failed_table(rows: list) -> None: # type: ignore[type-arg]
|
||||
headers = ("md_path", "retryable", "retries", "last_attempt", "error")
|
||||
widths = [
|
||||
max(len(headers[0]), max(len(r.md_path) for r in rows)),
|
||||
len(headers[1]),
|
||||
len(headers[2]),
|
||||
len(headers[3]),
|
||||
max(len(headers[4]), max(len(r.error or "") for r in rows)),
|
||||
]
|
||||
fmt = " ".join(f"{{:<{w}}}" for w in widths)
|
||||
typer.echo(f"{len(rows)} failed row(s):\n")
|
||||
typer.echo(fmt.format(*headers))
|
||||
for r in rows:
|
||||
typer.echo(
|
||||
fmt.format(
|
||||
r.md_path,
|
||||
"TRUE" if r.retryable else "FALSE",
|
||||
r.retry_count,
|
||||
to_display_tz(r.last_attempt_at).isoformat()
|
||||
if r.last_attempt_at
|
||||
else "",
|
||||
r.error or "",
|
||||
)
|
||||
)
|
||||
183
src/everos/entrypoints/cli/commands/init_cmd.py
Normal file
183
src/everos/entrypoints/cli/commands/init_cmd.py
Normal file
@ -0,0 +1,183 @@
|
||||
"""``everos init`` — generate a starter ``.env`` from the packaged template.
|
||||
|
||||
The ``env.template`` ships inside the wheel as package data at
|
||||
``everos/templates/env.template``. ``init`` reads it via
|
||||
:mod:`importlib.resources`, so the command works identically for pip-
|
||||
installed users and source-tree users (the file is the single source
|
||||
of truth).
|
||||
|
||||
Subcommand mounted as ``everos init`` (top-level leaf command — not a
|
||||
Typer group), to match the idiomatic ``alembic init`` / ``django-admin
|
||||
startproject`` shape.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from importlib import resources
|
||||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
|
||||
_TEMPLATE_PACKAGE = "everos.templates"
|
||||
_TEMPLATE_NAME = "env.template"
|
||||
|
||||
_log = logging.getLogger("everos.cli.init")
|
||||
|
||||
|
||||
def _read_template() -> str:
|
||||
"""Read the packaged ``env.template`` from wheel resources.
|
||||
|
||||
Returns the file contents as a UTF-8 string. Raises ``RuntimeError``
|
||||
on missing-file — if this fires it means the wheel was built from a
|
||||
source tree where ``src/everos/templates/env.template`` was missing
|
||||
(canonical location; auto-included via ``packages=["src/everos"]``
|
||||
in ``pyproject.toml``).
|
||||
"""
|
||||
try:
|
||||
return (
|
||||
resources.files(_TEMPLATE_PACKAGE)
|
||||
.joinpath(_TEMPLATE_NAME)
|
||||
.read_text(encoding="utf-8")
|
||||
)
|
||||
except (FileNotFoundError, ModuleNotFoundError) as exc:
|
||||
raise RuntimeError(
|
||||
f"packaged template {_TEMPLATE_NAME!r} not found under "
|
||||
f"{_TEMPLATE_PACKAGE!r}; the wheel is missing its "
|
||||
"force-include entry (see pyproject.toml "
|
||||
"[tool.hatch.build.targets.wheel.force-include])."
|
||||
) from exc
|
||||
|
||||
|
||||
def _xdg_default_path() -> Path:
|
||||
"""``$XDG_CONFIG_HOME/everos/.env`` (default ``~/.config/everos/.env``)."""
|
||||
xdg = os.environ.get("XDG_CONFIG_HOME") or "~/.config"
|
||||
return Path(xdg).expanduser() / "everos" / ".env"
|
||||
|
||||
|
||||
def _atomic_write(target: Path, content: str, mode: int = 0o600) -> None:
|
||||
"""Write ``content`` to ``target`` atomically with ``mode`` permission.
|
||||
|
||||
Writes to a tempfile in the same directory then ``os.replace``s it
|
||||
onto the target — guarantees either the full new file is visible or
|
||||
the original (if any) is untouched. Permission bits applied before
|
||||
the rename so the file is never readable by other users.
|
||||
"""
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
prefix=target.name + ".",
|
||||
dir=target.parent,
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
os.chmod(tmp_path, mode)
|
||||
os.replace(tmp_path, target)
|
||||
except Exception:
|
||||
with contextlib.suppress(OSError):
|
||||
os.unlink(tmp_path)
|
||||
raise
|
||||
|
||||
|
||||
def register(parent: typer.Typer) -> None:
|
||||
"""Attach the ``init`` command to the root CLI app."""
|
||||
|
||||
@parent.command("init")
|
||||
def init(
|
||||
to: str | None = typer.Option(
|
||||
None,
|
||||
"--to",
|
||||
help=(
|
||||
"Target path for the .env file (default: ./.env). "
|
||||
"Parent directories are created if needed."
|
||||
),
|
||||
),
|
||||
force: bool = typer.Option(
|
||||
False,
|
||||
"--force",
|
||||
help="Overwrite an existing file at the target path.",
|
||||
),
|
||||
print_: bool = typer.Option(
|
||||
False,
|
||||
"--print",
|
||||
help="Print the template to stdout instead of writing to disk.",
|
||||
),
|
||||
xdg: bool = typer.Option(
|
||||
False,
|
||||
"--xdg",
|
||||
help=(
|
||||
"Shortcut for --to=${XDG_CONFIG_HOME:-~/.config}/everos/.env "
|
||||
"(mutually exclusive with --to)."
|
||||
),
|
||||
),
|
||||
) -> None:
|
||||
"""Generate a starter ``.env`` from the packaged template.
|
||||
|
||||
Common flows::
|
||||
|
||||
everos init # writes ./.env
|
||||
everos init --xdg # writes ~/.config/everos/.env
|
||||
everos init --to /etc/foo.env --force
|
||||
everos init --print > custom.env
|
||||
|
||||
Exit codes:
|
||||
|
||||
- 0 — written successfully (or printed to stdout).
|
||||
- 1 — target file already exists and ``--force`` was not given.
|
||||
- 2 — packaged template missing (wheel build problem).
|
||||
- 3 — write failed (permissions / disk full / parent unwritable).
|
||||
"""
|
||||
if xdg and to is not None:
|
||||
typer.secho(
|
||||
"error: --xdg and --to are mutually exclusive",
|
||||
fg=typer.colors.RED,
|
||||
err=True,
|
||||
)
|
||||
raise typer.Exit(code=2)
|
||||
|
||||
try:
|
||||
template = _read_template()
|
||||
except RuntimeError as exc:
|
||||
typer.secho(f"error: {exc}", fg=typer.colors.RED, err=True)
|
||||
raise typer.Exit(code=2) from exc
|
||||
|
||||
if print_:
|
||||
sys.stdout.write(template)
|
||||
return
|
||||
|
||||
if xdg:
|
||||
target = _xdg_default_path()
|
||||
elif to is not None:
|
||||
target = Path(to).expanduser().resolve()
|
||||
else:
|
||||
target = Path.cwd() / ".env"
|
||||
|
||||
if target.exists() and not force:
|
||||
typer.secho(
|
||||
f"error: {target} already exists; pass --force to overwrite",
|
||||
fg=typer.colors.RED,
|
||||
err=True,
|
||||
)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
try:
|
||||
_atomic_write(target, template)
|
||||
except OSError as exc:
|
||||
typer.secho(
|
||||
f"error: failed to write {target}: {exc}",
|
||||
fg=typer.colors.RED,
|
||||
err=True,
|
||||
)
|
||||
raise typer.Exit(code=3) from exc
|
||||
|
||||
# Friendly next-step block (stdout — quiet enough for piping).
|
||||
size_kb = target.stat().st_size / 1024
|
||||
typer.secho(f"✓ wrote {target} ({size_kb:.1f} KB)", fg=typer.colors.GREEN)
|
||||
typer.echo("Next steps:")
|
||||
typer.echo(" 1. Edit the file and fill in the API keys (see comments inside).")
|
||||
typer.echo(" 2. Run `everos server start`.")
|
||||
typer.echo("Docs: https://github.com/evermind/everos/blob/master/QUICKSTART.md")
|
||||
161
src/everos/entrypoints/cli/commands/server.py
Normal file
161
src/everos/entrypoints/cli/commands/server.py
Normal file
@ -0,0 +1,161 @@
|
||||
"""``everos server`` subcommand group.
|
||||
|
||||
Provides ``everos server start`` to run the HTTP API via uvicorn. CLI
|
||||
parses arguments, configures structured logging, then hands off to
|
||||
uvicorn pointing at :func:`everos.entrypoints.api.app.create_app` as a
|
||||
factory.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
import uvicorn
|
||||
|
||||
app = typer.Typer(
|
||||
name="server",
|
||||
help="Run / manage the HTTP API server",
|
||||
no_args_is_help=True,
|
||||
)
|
||||
|
||||
|
||||
def _resolve_env_file(explicit: str | None) -> Path | None:
|
||||
"""Find the first existing ``.env`` along the four-layer search path.
|
||||
|
||||
Search order (highest-wins):
|
||||
|
||||
1. ``explicit`` — when the caller passed ``--env-file <path>``.
|
||||
2. ``./.env`` — the current working directory (project-local convention).
|
||||
3. ``${XDG_CONFIG_HOME:-~/.config}/everos/.env`` — XDG-standard user config.
|
||||
4. ``~/.everos/.env`` — the project's default memory-root location.
|
||||
|
||||
Returns ``None`` if none of the layers exist (caller may then fall back
|
||||
to inherited process env / CI secrets).
|
||||
"""
|
||||
candidates: list[Path] = []
|
||||
if explicit:
|
||||
candidates.append(Path(explicit).expanduser())
|
||||
candidates.append(Path.cwd() / ".env")
|
||||
xdg = os.environ.get("XDG_CONFIG_HOME") or "~/.config"
|
||||
candidates.append(Path(xdg).expanduser() / "everos" / ".env")
|
||||
candidates.append(Path("~/.everos/.env").expanduser())
|
||||
for p in candidates:
|
||||
try:
|
||||
if p.is_file():
|
||||
return p
|
||||
except OSError:
|
||||
# Path traversal / permission denied on a fallback candidate
|
||||
# must not crash the search — skip and keep going.
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _load_env_file(path: str | None) -> Path | None:
|
||||
"""Load environment variables from the resolved ``.env`` file.
|
||||
|
||||
Returns the path that was loaded, or ``None`` when no ``.env`` was
|
||||
found anywhere along the search path. Existence of a ``.env`` is
|
||||
optional — the user may rely entirely on inherited process env
|
||||
(e.g. container / CI secret injection).
|
||||
"""
|
||||
resolved = _resolve_env_file(path)
|
||||
if resolved is None:
|
||||
return None
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(resolved, override=False)
|
||||
except ImportError:
|
||||
# python-dotenv is in our deps; tolerate its absence anyway.
|
||||
pass
|
||||
return resolved
|
||||
|
||||
|
||||
@app.command("start")
|
||||
def start(
|
||||
host: str | None = typer.Option(
|
||||
None,
|
||||
"--host",
|
||||
help="Bind host (env: EVEROS_API__HOST, default: 127.0.0.1)",
|
||||
),
|
||||
port: int | None = typer.Option(
|
||||
None,
|
||||
"--port",
|
||||
help="Bind port (env: EVEROS_API__PORT, default: 8000)",
|
||||
),
|
||||
env_file: str | None = typer.Option(
|
||||
None,
|
||||
"--env-file",
|
||||
help=(
|
||||
"Path to a dotenv file (highest priority). When omitted, "
|
||||
"the server searches: ./.env → ${XDG_CONFIG_HOME:-~/.config}"
|
||||
"/everos/.env → ~/.everos/.env. Run `everos init` to create one."
|
||||
),
|
||||
),
|
||||
reload: bool = typer.Option(
|
||||
False,
|
||||
"--reload",
|
||||
help="Reload on source changes (development)",
|
||||
),
|
||||
log_level: str | None = typer.Option(
|
||||
None,
|
||||
"--log-level",
|
||||
help="Log level (env: EVEROS_LOG_LEVEL, default: INFO)",
|
||||
),
|
||||
) -> None:
|
||||
"""Start the HTTP API server."""
|
||||
loaded_env = _load_env_file(env_file)
|
||||
|
||||
# Load settings AFTER .env is in place so EVEROS_API__HOST and
|
||||
# EVEROS_API__PORT (and any other env override) are honored.
|
||||
from everos.config import load_settings
|
||||
|
||||
settings = load_settings()
|
||||
|
||||
host_resolved = host or settings.api.host
|
||||
port_resolved = port if port is not None else settings.api.port
|
||||
log_level_resolved = (log_level or os.getenv("EVEROS_LOG_LEVEL", "INFO")).upper()
|
||||
|
||||
from everos.core.observability.logging import configure_logging
|
||||
|
||||
configure_logging(level=log_level_resolved)
|
||||
|
||||
bootstrap_logger = logging.getLogger("everos.cli.server")
|
||||
if loaded_env is not None:
|
||||
bootstrap_logger.info("loaded env file: %s", loaded_env)
|
||||
else:
|
||||
bootstrap_logger.info(
|
||||
"no .env found along the search path; relying on inherited env vars "
|
||||
"(run `everos init` to generate one)"
|
||||
)
|
||||
bootstrap_logger.info("starting everos on %s:%d", host_resolved, port_resolved)
|
||||
if host_resolved == "0.0.0.0":
|
||||
bootstrap_logger.warning(
|
||||
"binding to 0.0.0.0 exposes the API on all interfaces; EverOS "
|
||||
"ships no built-in auth — see SECURITY.md"
|
||||
)
|
||||
|
||||
try:
|
||||
uvicorn.run(
|
||||
"everos.entrypoints.api.app:create_app",
|
||||
host=host_resolved,
|
||||
port=port_resolved,
|
||||
reload=reload,
|
||||
factory=True,
|
||||
log_level=log_level_resolved.lower(),
|
||||
# ``configure_logging()`` above already installed the root
|
||||
# handler + structlog ProcessorFormatter. ``log_config=None``
|
||||
# stops uvicorn from running its own ``dictConfig`` over
|
||||
# ours; otherwise uvicorn / fastapi messages revert to the
|
||||
# ``INFO:`` no-structlog format on every restart.
|
||||
log_config=None,
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
bootstrap_logger.info("interrupted; shutting down")
|
||||
except (OSError, RuntimeError) as exc:
|
||||
bootstrap_logger.error("startup failed: %s", exc)
|
||||
sys.exit(1)
|
||||
34
src/everos/entrypoints/cli/main.py
Normal file
34
src/everos/entrypoints/cli/main.py
Normal file
@ -0,0 +1,34 @@
|
||||
"""everos CLI root entry point.
|
||||
|
||||
Exposed as the ``everos`` console script in ``pyproject.toml``. Subcommand
|
||||
groups live under :mod:`everos.entrypoints.cli.commands` and are registered
|
||||
here.
|
||||
|
||||
CLI subcommands run **in-process** — they call into the service layer
|
||||
directly rather than through the HTTP API. The HTTP API and CLI are two
|
||||
sibling surfaces over the same service layer.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import typer
|
||||
|
||||
from .commands import cascade, init_cmd, server
|
||||
|
||||
app = typer.Typer(
|
||||
name="everos",
|
||||
help="everos — md-first memory extraction framework",
|
||||
no_args_is_help=True,
|
||||
add_completion=False,
|
||||
)
|
||||
|
||||
app.add_typer(server.app, name="server")
|
||||
app.add_typer(cascade.app, name="cascade")
|
||||
|
||||
# ``init`` is a top-level leaf command (not a Typer group) — match the
|
||||
# idiomatic ``alembic init`` / ``django-admin startproject`` shape.
|
||||
init_cmd.register(app)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
5
src/everos/infra/__init__.py
Normal file
5
src/everos/infra/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Infrastructure layer.
|
||||
|
||||
Adapts to external storage and persists domain models. Contains no
|
||||
business rules.
|
||||
"""
|
||||
63
src/everos/infra/ome/__init__.py
Normal file
63
src/everos/infra/ome/__init__.py
Normal file
@ -0,0 +1,63 @@
|
||||
"""Async offline strategy scheduling chassis.
|
||||
|
||||
Provides decorator-based strategy registration, event-driven triggers
|
||||
(Cron/Idle/Manual), and gate-based concurrency control.
|
||||
"""
|
||||
|
||||
from everos.infra.ome.config import OMEConfig as OMEConfig
|
||||
from everos.infra.ome.context import StrategyContext as StrategyContext
|
||||
from everos.infra.ome.decorator import offline_strategy as offline_strategy
|
||||
from everos.infra.ome.engine import OfflineEngine as OfflineEngine
|
||||
from everos.infra.ome.events import BaseEvent as BaseEvent
|
||||
from everos.infra.ome.events import CronTick as CronTick
|
||||
from everos.infra.ome.events import IdleTick as IdleTick
|
||||
from everos.infra.ome.events import ManualTick as ManualTick
|
||||
from everos.infra.ome.exceptions import (
|
||||
EmitNotDeclaredError as EmitNotDeclaredError,
|
||||
)
|
||||
from everos.infra.ome.exceptions import (
|
||||
EngineCallFromStrategyError as EngineCallFromStrategyError,
|
||||
)
|
||||
from everos.infra.ome.exceptions import (
|
||||
EngineLockHeldError as EngineLockHeldError,
|
||||
)
|
||||
from everos.infra.ome.exceptions import OMEError as OMEError
|
||||
from everos.infra.ome.exceptions import (
|
||||
StartupValidationError as StartupValidationError,
|
||||
)
|
||||
from everos.infra.ome.exceptions import (
|
||||
StrategyContractError as StrategyContractError,
|
||||
)
|
||||
from everos.infra.ome.gates import Counter as Counter
|
||||
from everos.infra.ome.records import RunRecord as RunRecord
|
||||
from everos.infra.ome.records import RunStatus as RunStatus
|
||||
from everos.infra.ome.records import StrategyRouteInfo as StrategyRouteInfo
|
||||
from everos.infra.ome.triggers import Cron as Cron
|
||||
from everos.infra.ome.triggers import Idle as Idle
|
||||
from everos.infra.ome.triggers import Immediate as Immediate
|
||||
from everos.infra.ome.triggers import Trigger as Trigger
|
||||
|
||||
__all__ = [
|
||||
"BaseEvent",
|
||||
"Counter",
|
||||
"Cron",
|
||||
"CronTick",
|
||||
"EmitNotDeclaredError",
|
||||
"EngineCallFromStrategyError",
|
||||
"EngineLockHeldError",
|
||||
"Idle",
|
||||
"IdleTick",
|
||||
"Immediate",
|
||||
"ManualTick",
|
||||
"OfflineEngine",
|
||||
"OMEConfig",
|
||||
"OMEError",
|
||||
"RunRecord",
|
||||
"RunStatus",
|
||||
"StartupValidationError",
|
||||
"StrategyContext",
|
||||
"StrategyContractError",
|
||||
"StrategyRouteInfo",
|
||||
"Trigger",
|
||||
"offline_strategy",
|
||||
]
|
||||
1
src/everos/infra/ome/_background/__init__.py
Normal file
1
src/everos/infra/ome/_background/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Internal: background loops (idle scan / config reload / crash recovery)."""
|
||||
254
src/everos/infra/ome/_background/config_reloader.py
Normal file
254
src/everos/infra/ome/_background/config_reloader.py
Normal file
@ -0,0 +1,254 @@
|
||||
"""Config hot-reload — watchfiles + tomllib + shallow merge.
|
||||
|
||||
Hot-updatable fields: enabled / max_retries / gate / cron / idle_seconds /
|
||||
scan_interval_seconds. Trigger type swap (Immediate ↔ Cron ↔ Idle),
|
||||
event subscription (Immediate.on / Idle.on), and Idle.event_field
|
||||
remain immutable — these define strategy routing and changing them
|
||||
requires a code change and redeploy.
|
||||
|
||||
Per-strategy two-phase commit: enabled is applied independently for
|
||||
emergency-stop semantics; max_retries / gate / trigger parameters
|
||||
form one atomic group that fully rolls back on any failure inside it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import tomllib
|
||||
from contextlib import suppress
|
||||
from dataclasses import replace
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pydantic import ValidationError
|
||||
from watchfiles import awatch
|
||||
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome._dispatch.registry import StrategyRegistry
|
||||
from everos.infra.ome.config import StrategyOverride, TomlRoot
|
||||
from everos.infra.ome.decorator import StrategyMeta
|
||||
from everos.infra.ome.gates import Counter
|
||||
from everos.infra.ome.triggers import Cron, Idle, Trigger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from everos.infra.ome.engine import OfflineEngine
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class _SkipAtomicGroupError(Exception):
|
||||
"""Internal sentinel raised when the non-enabled atomic group for
|
||||
one strategy must be skipped without affecting other strategies.
|
||||
"""
|
||||
|
||||
|
||||
def _apply_enabled(
|
||||
meta: StrategyMeta,
|
||||
override: StrategyOverride,
|
||||
name: str,
|
||||
registry: StrategyRegistry,
|
||||
) -> StrategyMeta:
|
||||
"""Step 1: apply `enabled` independently — never blocked by other fields."""
|
||||
if override.enabled is None or override.enabled == meta.enabled:
|
||||
return meta
|
||||
new_meta = replace(meta, enabled=override.enabled)
|
||||
registry.replace(name, new_meta)
|
||||
return new_meta
|
||||
|
||||
|
||||
def _build_atomic_meta(
|
||||
meta: StrategyMeta,
|
||||
override: StrategyOverride,
|
||||
) -> tuple[StrategyMeta, Trigger]:
|
||||
"""Step 2 pure-compute: build (new_meta, new_trigger) from current state.
|
||||
|
||||
Raises `_SkipAtomicGroupError` on type mismatches or invalid gate intros.
|
||||
No registry / engine writes happen here.
|
||||
"""
|
||||
new_meta = meta
|
||||
new_trigger: Trigger = meta.trigger
|
||||
|
||||
if override.max_retries is not None:
|
||||
new_meta = replace(new_meta, max_retries=override.max_retries)
|
||||
|
||||
if override.gate is not None:
|
||||
# Introducing a gate on a strategy that has none requires an explicit
|
||||
# threshold — silently defaulting to 1 would mean "fire on every
|
||||
# event", which is almost certainly not what the user intended.
|
||||
if meta.gate is None and override.gate.threshold is None:
|
||||
raise _SkipAtomicGroupError(
|
||||
"introducing a gate requires explicit threshold"
|
||||
)
|
||||
base = meta.gate.model_dump() if meta.gate is not None else {}
|
||||
for k, v in override.gate.model_dump(exclude_unset=True).items():
|
||||
if v is not None:
|
||||
base[k] = v
|
||||
new_meta = replace(new_meta, gate=Counter(**base))
|
||||
|
||||
if override.cron is not None:
|
||||
if not isinstance(meta.trigger, Cron):
|
||||
raise _SkipAtomicGroupError(
|
||||
f"cron given on non-Cron strategy "
|
||||
f"(actual: {type(meta.trigger).__name__})"
|
||||
)
|
||||
new_trigger = Cron(expr=override.cron)
|
||||
|
||||
if override.idle_seconds is not None or override.scan_interval_seconds is not None:
|
||||
if not isinstance(meta.trigger, Idle):
|
||||
raise _SkipAtomicGroupError(
|
||||
f"idle_* given on non-Idle strategy "
|
||||
f"(actual: {type(meta.trigger).__name__})"
|
||||
)
|
||||
updates: dict[str, int] = {}
|
||||
if override.idle_seconds is not None:
|
||||
updates["idle_seconds"] = override.idle_seconds
|
||||
if override.scan_interval_seconds is not None:
|
||||
updates["scan_interval_seconds"] = override.scan_interval_seconds
|
||||
# model_validate (not model_copy) re-runs Idle._validate_event_field on
|
||||
# the merged dict; model_copy(update=...) would skip it and let an
|
||||
# invalid event_field reach the registry.
|
||||
new_trigger = Idle.model_validate({**meta.trigger.model_dump(), **updates})
|
||||
|
||||
if new_trigger is not meta.trigger:
|
||||
new_meta = replace(new_meta, trigger=new_trigger)
|
||||
|
||||
return new_meta, new_trigger
|
||||
|
||||
|
||||
def _needs_aps_reschedule(old_trigger: Trigger, new_trigger: Trigger) -> bool:
|
||||
"""True iff scheduler-driving fields changed (cron expr / Idle scan_interval)."""
|
||||
if new_trigger is old_trigger:
|
||||
return False
|
||||
if isinstance(new_trigger, Cron) and isinstance(old_trigger, Cron):
|
||||
return new_trigger.expr != old_trigger.expr
|
||||
if isinstance(new_trigger, Idle) and isinstance(old_trigger, Idle):
|
||||
return new_trigger.scan_interval_seconds != old_trigger.scan_interval_seconds
|
||||
return False
|
||||
|
||||
|
||||
def _maybe_reschedule_aps(
|
||||
engine: OfflineEngine, name: str, new_trigger: Trigger
|
||||
) -> None:
|
||||
"""Push the new trigger's APS-relevant fields to the scheduler."""
|
||||
if isinstance(new_trigger, Cron):
|
||||
engine.reschedule_cron_job(name, new_trigger.expr)
|
||||
elif isinstance(new_trigger, Idle):
|
||||
engine.reschedule_idle_job(
|
||||
name, scan_interval_seconds=new_trigger.scan_interval_seconds
|
||||
)
|
||||
|
||||
|
||||
def _apply_one_strategy(
|
||||
name: str,
|
||||
override: StrategyOverride,
|
||||
registry: StrategyRegistry,
|
||||
engine: OfflineEngine,
|
||||
) -> None:
|
||||
"""Two-phase commit for one strategy: enabled, then atomic group."""
|
||||
meta = registry.get(name)
|
||||
meta = _apply_enabled(meta, override, name, registry)
|
||||
|
||||
try:
|
||||
new_meta, new_trigger = _build_atomic_meta(meta, override)
|
||||
if _needs_aps_reschedule(meta.trigger, new_trigger):
|
||||
_maybe_reschedule_aps(engine, name, new_trigger)
|
||||
registry.replace(name, new_meta)
|
||||
except Exception as e: # noqa: BLE001
|
||||
# User-fixable config error (typo / type mismatch / APS runtime
|
||||
# failure) — log + skip this strategy's atomic group, never the loop.
|
||||
logger.warning(
|
||||
"strategy_atomic_group_skipped",
|
||||
strategy_name=name,
|
||||
error_type=type(e).__name__,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
def apply_overrides(
|
||||
registry: StrategyRegistry,
|
||||
root: TomlRoot,
|
||||
engine: OfflineEngine,
|
||||
) -> None:
|
||||
"""Shallow-merge TomlRoot overrides into registry strategies in place.
|
||||
|
||||
Two-phase per-strategy semantics:
|
||||
Step 1 (enabled): applied independently — emergency-stop must
|
||||
never be blocked by a typo in another field.
|
||||
Step 2 (max_retries / gate / trigger params): applied as an atomic
|
||||
group. Any failure (type mismatch, invalid cron, APS reschedule
|
||||
error, ...) rolls the whole group back to the prior values.
|
||||
"""
|
||||
known = {m.name for m in registry.all()}
|
||||
for name, override in root.strategies.items():
|
||||
if name not in known:
|
||||
logger.warning("config_override_unknown_strategy", strategy_name=name)
|
||||
continue
|
||||
_apply_one_strategy(name, override, registry, engine)
|
||||
|
||||
|
||||
class ConfigReloader:
|
||||
"""Watch a TOML file and apply overrides on change."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
config_path: Path,
|
||||
registry: StrategyRegistry,
|
||||
engine: OfflineEngine,
|
||||
debounce_ms: int = 1600,
|
||||
) -> None:
|
||||
self._path = config_path
|
||||
self._registry = registry
|
||||
self._engine = engine
|
||||
self._debounce_ms = debounce_ms
|
||||
self._task: asyncio.Task[None] | None = None
|
||||
|
||||
def start(self) -> None:
|
||||
"""Fire-and-forget the watch loop. Idempotent: raises on double-start."""
|
||||
if self._path is None:
|
||||
return
|
||||
if self._task is not None and not self._task.done():
|
||||
raise RuntimeError("ConfigReloader already started")
|
||||
self._task = asyncio.create_task(self._loop())
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Cancel the watch task and await it; safe to call multiple times."""
|
||||
if self._task is not None:
|
||||
self._task.cancel()
|
||||
with suppress(asyncio.CancelledError):
|
||||
await self._task
|
||||
self._task = None
|
||||
|
||||
async def _loop(self) -> None:
|
||||
"""Initial load + per-FS-change reload; survives single-iteration failures."""
|
||||
try:
|
||||
await self._load_once()
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception("config_reload_iteration_failed")
|
||||
async for _changes in awatch(self._path, debounce=self._debounce_ms):
|
||||
try:
|
||||
await self._load_once()
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception("config_reload_iteration_failed")
|
||||
|
||||
async def _load_once(self) -> None:
|
||||
"""Read TOML off the loop, parse + validate, apply overrides."""
|
||||
|
||||
def _read_and_parse() -> TomlRoot:
|
||||
with open(self._path, "rb") as f:
|
||||
content = f.read()
|
||||
parsed = tomllib.loads(content.decode("utf-8"))
|
||||
return TomlRoot.model_validate(parsed)
|
||||
|
||||
try:
|
||||
root = await asyncio.to_thread(_read_and_parse)
|
||||
except (OSError, tomllib.TOMLDecodeError, ValidationError) as e:
|
||||
logger.warning(
|
||||
"config_reload_failed",
|
||||
error_type=type(e).__name__,
|
||||
error=str(e),
|
||||
path=str(self._path),
|
||||
)
|
||||
return
|
||||
apply_overrides(self._registry, root, self._engine)
|
||||
logger.info("config_reloaded", path=str(self._path))
|
||||
79
src/everos/infra/ome/_background/crash_recovery.py
Normal file
79
src/everos/infra/ome/_background/crash_recovery.py
Normal file
@ -0,0 +1,79 @@
|
||||
"""Startup crash recovery — stale RUNNING rows → CRASHED + re-enqueue.
|
||||
|
||||
Runs once at engine.start() before normal dispatching begins. Rows
|
||||
whose started_at is older than ``timeout_seconds`` are marked CRASHED
|
||||
and re-enqueued with a fresh run_id reusing the original event payload.
|
||||
Fresher RUNNING rows are skipped — APScheduler's own jobstore may have
|
||||
already reattached them.
|
||||
|
||||
At-most-once: ``mark_crashed`` and ``add_job`` are not atomic. If
|
||||
``add_job`` fails after ``mark_crashed``, the row stays CRASHED and
|
||||
the event is lost. Strategies needing at-least-once must add their own
|
||||
retry / monitor layer.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Awaitable, Callable
|
||||
from datetime import timedelta
|
||||
from uuid import uuid4
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome._stores.run_record import RunRecordStore
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def scan_and_resume(
|
||||
*,
|
||||
run_record_store: RunRecordStore,
|
||||
timeout_seconds: int,
|
||||
add_job: Callable[[str, str, str, str, int], Awaitable[None]],
|
||||
) -> None:
|
||||
"""Scan ``run_record`` for stale RUNNING rows, mark them CRASHED, and
|
||||
re-enqueue each via ``add_job``. See module docstring for the
|
||||
at-most-once caveat.
|
||||
|
||||
``add_job`` is called with positional args
|
||||
``(strategy_name, run_id, event_topic, event_payload, max_retries)``.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``timeout_seconds`` is not positive.
|
||||
"""
|
||||
if timeout_seconds <= 0:
|
||||
raise ValueError(f"timeout_seconds must be > 0, got {timeout_seconds}")
|
||||
now = get_utc_now()
|
||||
cutoff = now - timedelta(seconds=timeout_seconds)
|
||||
running = await run_record_store.find_running()
|
||||
for rec in running:
|
||||
if rec.started_at >= cutoff:
|
||||
continue
|
||||
await run_record_store.mark_crashed(
|
||||
run_id=rec.run_id,
|
||||
finished_at=now,
|
||||
error="crash recovery: marked CRASHED after start scan",
|
||||
)
|
||||
new_run_id = uuid4().hex
|
||||
try:
|
||||
await add_job(
|
||||
rec.strategy_name,
|
||||
new_run_id,
|
||||
rec.event_topic,
|
||||
rec.event_payload,
|
||||
rec.max_retries_snapshot,
|
||||
)
|
||||
logger.info(
|
||||
"crash_recovery_resumed",
|
||||
strategy_name=rec.strategy_name,
|
||||
event_topic=rec.event_topic,
|
||||
old_run_id=rec.run_id,
|
||||
new_run_id=new_run_id,
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception(
|
||||
"crash_recovery_resume_failed",
|
||||
strategy_name=rec.strategy_name,
|
||||
event_topic=rec.event_topic,
|
||||
old_run_id=rec.run_id,
|
||||
)
|
||||
60
src/everos/infra/ome/_background/idle_scanner.py
Normal file
60
src/everos/infra/ome/_background/idle_scanner.py
Normal file
@ -0,0 +1,60 @@
|
||||
"""IdleScanner — periodic scan of idle_store, emits IdleTick for overdue buckets."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Awaitable, Callable
|
||||
from datetime import datetime
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome._stores.idle import IdleStore
|
||||
from everos.infra.ome.events import BaseEvent, IdleTick
|
||||
from everos.infra.ome.triggers import Idle
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class IdleScanner:
|
||||
"""Scans idle_store for overdue buckets and emits IdleTick events."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
strategy_name: str,
|
||||
trigger: Idle,
|
||||
idle_store: IdleStore,
|
||||
emit: Callable[[BaseEvent], Awaitable[None]],
|
||||
) -> None:
|
||||
self._name = strategy_name
|
||||
self._trigger = trigger
|
||||
self._idle_store = idle_store
|
||||
self._emit = emit
|
||||
|
||||
async def scan_once(self, *, now: datetime | None = None) -> None:
|
||||
"""Find overdue buckets and emit IdleTick for each.
|
||||
|
||||
Per-bucket emit failures are caught and logged so a single
|
||||
downstream error (e.g. dispatch hitting a transient DB lock)
|
||||
cannot prevent sibling buckets from being notified this round.
|
||||
"""
|
||||
effective_now = now if now is not None else get_utc_now()
|
||||
overdue = await self._idle_store.scan_idle(
|
||||
self._name,
|
||||
idle_seconds=self._trigger.idle_seconds,
|
||||
now=effective_now,
|
||||
)
|
||||
for bucket_key in overdue:
|
||||
try:
|
||||
await self._emit(
|
||||
IdleTick(
|
||||
strategy_name=self._name,
|
||||
bucket_key=bucket_key,
|
||||
idle_seconds=self._trigger.idle_seconds,
|
||||
)
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception(
|
||||
"idle_emit_failed",
|
||||
strategy_name=self._name,
|
||||
bucket_key=bucket_key,
|
||||
)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user