chore: initialize EverOS 1.0.0
md-first memory extraction framework for AI agents. Markdown is the single source of truth; SQLite holds state and LanceDB provides the rebuildable vector + BM25 + scalar index. The codebase follows a single-direction DDD layering (entrypoints -> service -> memory -> infra, with component / core / config cross-cutting) enforced by import-linter. Engineering surface: - Coding conventions in .claude/rules/ (path-scoped) and workflows in .claude/skills/ (/commit, /new-branch, /pr). - GitHub Actions CI runs make lint + test + integration; pre-commit mirrors the gates locally (ruff, hygiene hooks, gitlint commit-msg). - Commit messages follow Conventional Commits, enforced by gitlint. - make lint also enforces datetime two-zone discipline and OpenAPI drift.
This commit is contained in:
5
src/everos/infra/__init__.py
Normal file
5
src/everos/infra/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Infrastructure layer.
|
||||
|
||||
Adapts to external storage and persists domain models. Contains no
|
||||
business rules.
|
||||
"""
|
||||
63
src/everos/infra/ome/__init__.py
Normal file
63
src/everos/infra/ome/__init__.py
Normal file
@ -0,0 +1,63 @@
|
||||
"""Async offline strategy scheduling chassis.
|
||||
|
||||
Provides decorator-based strategy registration, event-driven triggers
|
||||
(Cron/Idle/Manual), and gate-based concurrency control.
|
||||
"""
|
||||
|
||||
from everos.infra.ome.config import OMEConfig as OMEConfig
|
||||
from everos.infra.ome.context import StrategyContext as StrategyContext
|
||||
from everos.infra.ome.decorator import offline_strategy as offline_strategy
|
||||
from everos.infra.ome.engine import OfflineEngine as OfflineEngine
|
||||
from everos.infra.ome.events import BaseEvent as BaseEvent
|
||||
from everos.infra.ome.events import CronTick as CronTick
|
||||
from everos.infra.ome.events import IdleTick as IdleTick
|
||||
from everos.infra.ome.events import ManualTick as ManualTick
|
||||
from everos.infra.ome.exceptions import (
|
||||
EmitNotDeclaredError as EmitNotDeclaredError,
|
||||
)
|
||||
from everos.infra.ome.exceptions import (
|
||||
EngineCallFromStrategyError as EngineCallFromStrategyError,
|
||||
)
|
||||
from everos.infra.ome.exceptions import (
|
||||
EngineLockHeldError as EngineLockHeldError,
|
||||
)
|
||||
from everos.infra.ome.exceptions import OMEError as OMEError
|
||||
from everos.infra.ome.exceptions import (
|
||||
StartupValidationError as StartupValidationError,
|
||||
)
|
||||
from everos.infra.ome.exceptions import (
|
||||
StrategyContractError as StrategyContractError,
|
||||
)
|
||||
from everos.infra.ome.gates import Counter as Counter
|
||||
from everos.infra.ome.records import RunRecord as RunRecord
|
||||
from everos.infra.ome.records import RunStatus as RunStatus
|
||||
from everos.infra.ome.records import StrategyRouteInfo as StrategyRouteInfo
|
||||
from everos.infra.ome.triggers import Cron as Cron
|
||||
from everos.infra.ome.triggers import Idle as Idle
|
||||
from everos.infra.ome.triggers import Immediate as Immediate
|
||||
from everos.infra.ome.triggers import Trigger as Trigger
|
||||
|
||||
__all__ = [
|
||||
"BaseEvent",
|
||||
"Counter",
|
||||
"Cron",
|
||||
"CronTick",
|
||||
"EmitNotDeclaredError",
|
||||
"EngineCallFromStrategyError",
|
||||
"EngineLockHeldError",
|
||||
"Idle",
|
||||
"IdleTick",
|
||||
"Immediate",
|
||||
"ManualTick",
|
||||
"OfflineEngine",
|
||||
"OMEConfig",
|
||||
"OMEError",
|
||||
"RunRecord",
|
||||
"RunStatus",
|
||||
"StartupValidationError",
|
||||
"StrategyContext",
|
||||
"StrategyContractError",
|
||||
"StrategyRouteInfo",
|
||||
"Trigger",
|
||||
"offline_strategy",
|
||||
]
|
||||
1
src/everos/infra/ome/_background/__init__.py
Normal file
1
src/everos/infra/ome/_background/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Internal: background loops (idle scan / config reload / crash recovery)."""
|
||||
254
src/everos/infra/ome/_background/config_reloader.py
Normal file
254
src/everos/infra/ome/_background/config_reloader.py
Normal file
@ -0,0 +1,254 @@
|
||||
"""Config hot-reload — watchfiles + tomllib + shallow merge.
|
||||
|
||||
Hot-updatable fields: enabled / max_retries / gate / cron / idle_seconds /
|
||||
scan_interval_seconds. Trigger type swap (Immediate ↔ Cron ↔ Idle),
|
||||
event subscription (Immediate.on / Idle.on), and Idle.event_field
|
||||
remain immutable — these define strategy routing and changing them
|
||||
requires a code change and redeploy.
|
||||
|
||||
Per-strategy two-phase commit: enabled is applied independently for
|
||||
emergency-stop semantics; max_retries / gate / trigger parameters
|
||||
form one atomic group that fully rolls back on any failure inside it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import tomllib
|
||||
from contextlib import suppress
|
||||
from dataclasses import replace
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pydantic import ValidationError
|
||||
from watchfiles import awatch
|
||||
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome._dispatch.registry import StrategyRegistry
|
||||
from everos.infra.ome.config import StrategyOverride, TomlRoot
|
||||
from everos.infra.ome.decorator import StrategyMeta
|
||||
from everos.infra.ome.gates import Counter
|
||||
from everos.infra.ome.triggers import Cron, Idle, Trigger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from everos.infra.ome.engine import OfflineEngine
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class _SkipAtomicGroupError(Exception):
|
||||
"""Internal sentinel raised when the non-enabled atomic group for
|
||||
one strategy must be skipped without affecting other strategies.
|
||||
"""
|
||||
|
||||
|
||||
def _apply_enabled(
|
||||
meta: StrategyMeta,
|
||||
override: StrategyOverride,
|
||||
name: str,
|
||||
registry: StrategyRegistry,
|
||||
) -> StrategyMeta:
|
||||
"""Step 1: apply `enabled` independently — never blocked by other fields."""
|
||||
if override.enabled is None or override.enabled == meta.enabled:
|
||||
return meta
|
||||
new_meta = replace(meta, enabled=override.enabled)
|
||||
registry.replace(name, new_meta)
|
||||
return new_meta
|
||||
|
||||
|
||||
def _build_atomic_meta(
|
||||
meta: StrategyMeta,
|
||||
override: StrategyOverride,
|
||||
) -> tuple[StrategyMeta, Trigger]:
|
||||
"""Step 2 pure-compute: build (new_meta, new_trigger) from current state.
|
||||
|
||||
Raises `_SkipAtomicGroupError` on type mismatches or invalid gate intros.
|
||||
No registry / engine writes happen here.
|
||||
"""
|
||||
new_meta = meta
|
||||
new_trigger: Trigger = meta.trigger
|
||||
|
||||
if override.max_retries is not None:
|
||||
new_meta = replace(new_meta, max_retries=override.max_retries)
|
||||
|
||||
if override.gate is not None:
|
||||
# Introducing a gate on a strategy that has none requires an explicit
|
||||
# threshold — silently defaulting to 1 would mean "fire on every
|
||||
# event", which is almost certainly not what the user intended.
|
||||
if meta.gate is None and override.gate.threshold is None:
|
||||
raise _SkipAtomicGroupError(
|
||||
"introducing a gate requires explicit threshold"
|
||||
)
|
||||
base = meta.gate.model_dump() if meta.gate is not None else {}
|
||||
for k, v in override.gate.model_dump(exclude_unset=True).items():
|
||||
if v is not None:
|
||||
base[k] = v
|
||||
new_meta = replace(new_meta, gate=Counter(**base))
|
||||
|
||||
if override.cron is not None:
|
||||
if not isinstance(meta.trigger, Cron):
|
||||
raise _SkipAtomicGroupError(
|
||||
f"cron given on non-Cron strategy "
|
||||
f"(actual: {type(meta.trigger).__name__})"
|
||||
)
|
||||
new_trigger = Cron(expr=override.cron)
|
||||
|
||||
if override.idle_seconds is not None or override.scan_interval_seconds is not None:
|
||||
if not isinstance(meta.trigger, Idle):
|
||||
raise _SkipAtomicGroupError(
|
||||
f"idle_* given on non-Idle strategy "
|
||||
f"(actual: {type(meta.trigger).__name__})"
|
||||
)
|
||||
updates: dict[str, int] = {}
|
||||
if override.idle_seconds is not None:
|
||||
updates["idle_seconds"] = override.idle_seconds
|
||||
if override.scan_interval_seconds is not None:
|
||||
updates["scan_interval_seconds"] = override.scan_interval_seconds
|
||||
# model_validate (not model_copy) re-runs Idle._validate_event_field on
|
||||
# the merged dict; model_copy(update=...) would skip it and let an
|
||||
# invalid event_field reach the registry.
|
||||
new_trigger = Idle.model_validate({**meta.trigger.model_dump(), **updates})
|
||||
|
||||
if new_trigger is not meta.trigger:
|
||||
new_meta = replace(new_meta, trigger=new_trigger)
|
||||
|
||||
return new_meta, new_trigger
|
||||
|
||||
|
||||
def _needs_aps_reschedule(old_trigger: Trigger, new_trigger: Trigger) -> bool:
|
||||
"""True iff scheduler-driving fields changed (cron expr / Idle scan_interval)."""
|
||||
if new_trigger is old_trigger:
|
||||
return False
|
||||
if isinstance(new_trigger, Cron) and isinstance(old_trigger, Cron):
|
||||
return new_trigger.expr != old_trigger.expr
|
||||
if isinstance(new_trigger, Idle) and isinstance(old_trigger, Idle):
|
||||
return new_trigger.scan_interval_seconds != old_trigger.scan_interval_seconds
|
||||
return False
|
||||
|
||||
|
||||
def _maybe_reschedule_aps(
|
||||
engine: OfflineEngine, name: str, new_trigger: Trigger
|
||||
) -> None:
|
||||
"""Push the new trigger's APS-relevant fields to the scheduler."""
|
||||
if isinstance(new_trigger, Cron):
|
||||
engine.reschedule_cron_job(name, new_trigger.expr)
|
||||
elif isinstance(new_trigger, Idle):
|
||||
engine.reschedule_idle_job(
|
||||
name, scan_interval_seconds=new_trigger.scan_interval_seconds
|
||||
)
|
||||
|
||||
|
||||
def _apply_one_strategy(
|
||||
name: str,
|
||||
override: StrategyOverride,
|
||||
registry: StrategyRegistry,
|
||||
engine: OfflineEngine,
|
||||
) -> None:
|
||||
"""Two-phase commit for one strategy: enabled, then atomic group."""
|
||||
meta = registry.get(name)
|
||||
meta = _apply_enabled(meta, override, name, registry)
|
||||
|
||||
try:
|
||||
new_meta, new_trigger = _build_atomic_meta(meta, override)
|
||||
if _needs_aps_reschedule(meta.trigger, new_trigger):
|
||||
_maybe_reschedule_aps(engine, name, new_trigger)
|
||||
registry.replace(name, new_meta)
|
||||
except Exception as e: # noqa: BLE001
|
||||
# User-fixable config error (typo / type mismatch / APS runtime
|
||||
# failure) — log + skip this strategy's atomic group, never the loop.
|
||||
logger.warning(
|
||||
"strategy_atomic_group_skipped",
|
||||
strategy_name=name,
|
||||
error_type=type(e).__name__,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
def apply_overrides(
|
||||
registry: StrategyRegistry,
|
||||
root: TomlRoot,
|
||||
engine: OfflineEngine,
|
||||
) -> None:
|
||||
"""Shallow-merge TomlRoot overrides into registry strategies in place.
|
||||
|
||||
Two-phase per-strategy semantics:
|
||||
Step 1 (enabled): applied independently — emergency-stop must
|
||||
never be blocked by a typo in another field.
|
||||
Step 2 (max_retries / gate / trigger params): applied as an atomic
|
||||
group. Any failure (type mismatch, invalid cron, APS reschedule
|
||||
error, ...) rolls the whole group back to the prior values.
|
||||
"""
|
||||
known = {m.name for m in registry.all()}
|
||||
for name, override in root.strategies.items():
|
||||
if name not in known:
|
||||
logger.warning("config_override_unknown_strategy", strategy_name=name)
|
||||
continue
|
||||
_apply_one_strategy(name, override, registry, engine)
|
||||
|
||||
|
||||
class ConfigReloader:
|
||||
"""Watch a TOML file and apply overrides on change."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
config_path: Path,
|
||||
registry: StrategyRegistry,
|
||||
engine: OfflineEngine,
|
||||
debounce_ms: int = 1600,
|
||||
) -> None:
|
||||
self._path = config_path
|
||||
self._registry = registry
|
||||
self._engine = engine
|
||||
self._debounce_ms = debounce_ms
|
||||
self._task: asyncio.Task[None] | None = None
|
||||
|
||||
def start(self) -> None:
|
||||
"""Fire-and-forget the watch loop. Idempotent: raises on double-start."""
|
||||
if self._path is None:
|
||||
return
|
||||
if self._task is not None and not self._task.done():
|
||||
raise RuntimeError("ConfigReloader already started")
|
||||
self._task = asyncio.create_task(self._loop())
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Cancel the watch task and await it; safe to call multiple times."""
|
||||
if self._task is not None:
|
||||
self._task.cancel()
|
||||
with suppress(asyncio.CancelledError):
|
||||
await self._task
|
||||
self._task = None
|
||||
|
||||
async def _loop(self) -> None:
|
||||
"""Initial load + per-FS-change reload; survives single-iteration failures."""
|
||||
try:
|
||||
await self._load_once()
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception("config_reload_iteration_failed")
|
||||
async for _changes in awatch(self._path, debounce=self._debounce_ms):
|
||||
try:
|
||||
await self._load_once()
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception("config_reload_iteration_failed")
|
||||
|
||||
async def _load_once(self) -> None:
|
||||
"""Read TOML off the loop, parse + validate, apply overrides."""
|
||||
|
||||
def _read_and_parse() -> TomlRoot:
|
||||
with open(self._path, "rb") as f:
|
||||
content = f.read()
|
||||
parsed = tomllib.loads(content.decode("utf-8"))
|
||||
return TomlRoot.model_validate(parsed)
|
||||
|
||||
try:
|
||||
root = await asyncio.to_thread(_read_and_parse)
|
||||
except (OSError, tomllib.TOMLDecodeError, ValidationError) as e:
|
||||
logger.warning(
|
||||
"config_reload_failed",
|
||||
error_type=type(e).__name__,
|
||||
error=str(e),
|
||||
path=str(self._path),
|
||||
)
|
||||
return
|
||||
apply_overrides(self._registry, root, self._engine)
|
||||
logger.info("config_reloaded", path=str(self._path))
|
||||
79
src/everos/infra/ome/_background/crash_recovery.py
Normal file
79
src/everos/infra/ome/_background/crash_recovery.py
Normal file
@ -0,0 +1,79 @@
|
||||
"""Startup crash recovery — stale RUNNING rows → CRASHED + re-enqueue.
|
||||
|
||||
Runs once at engine.start() before normal dispatching begins. Rows
|
||||
whose started_at is older than ``timeout_seconds`` are marked CRASHED
|
||||
and re-enqueued with a fresh run_id reusing the original event payload.
|
||||
Fresher RUNNING rows are skipped — APScheduler's own jobstore may have
|
||||
already reattached them.
|
||||
|
||||
At-most-once: ``mark_crashed`` and ``add_job`` are not atomic. If
|
||||
``add_job`` fails after ``mark_crashed``, the row stays CRASHED and
|
||||
the event is lost. Strategies needing at-least-once must add their own
|
||||
retry / monitor layer.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Awaitable, Callable
|
||||
from datetime import timedelta
|
||||
from uuid import uuid4
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome._stores.run_record import RunRecordStore
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def scan_and_resume(
|
||||
*,
|
||||
run_record_store: RunRecordStore,
|
||||
timeout_seconds: int,
|
||||
add_job: Callable[[str, str, str, str, int], Awaitable[None]],
|
||||
) -> None:
|
||||
"""Scan ``run_record`` for stale RUNNING rows, mark them CRASHED, and
|
||||
re-enqueue each via ``add_job``. See module docstring for the
|
||||
at-most-once caveat.
|
||||
|
||||
``add_job`` is called with positional args
|
||||
``(strategy_name, run_id, event_topic, event_payload, max_retries)``.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``timeout_seconds`` is not positive.
|
||||
"""
|
||||
if timeout_seconds <= 0:
|
||||
raise ValueError(f"timeout_seconds must be > 0, got {timeout_seconds}")
|
||||
now = get_utc_now()
|
||||
cutoff = now - timedelta(seconds=timeout_seconds)
|
||||
running = await run_record_store.find_running()
|
||||
for rec in running:
|
||||
if rec.started_at >= cutoff:
|
||||
continue
|
||||
await run_record_store.mark_crashed(
|
||||
run_id=rec.run_id,
|
||||
finished_at=now,
|
||||
error="crash recovery: marked CRASHED after start scan",
|
||||
)
|
||||
new_run_id = uuid4().hex
|
||||
try:
|
||||
await add_job(
|
||||
rec.strategy_name,
|
||||
new_run_id,
|
||||
rec.event_topic,
|
||||
rec.event_payload,
|
||||
rec.max_retries_snapshot,
|
||||
)
|
||||
logger.info(
|
||||
"crash_recovery_resumed",
|
||||
strategy_name=rec.strategy_name,
|
||||
event_topic=rec.event_topic,
|
||||
old_run_id=rec.run_id,
|
||||
new_run_id=new_run_id,
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception(
|
||||
"crash_recovery_resume_failed",
|
||||
strategy_name=rec.strategy_name,
|
||||
event_topic=rec.event_topic,
|
||||
old_run_id=rec.run_id,
|
||||
)
|
||||
60
src/everos/infra/ome/_background/idle_scanner.py
Normal file
60
src/everos/infra/ome/_background/idle_scanner.py
Normal file
@ -0,0 +1,60 @@
|
||||
"""IdleScanner — periodic scan of idle_store, emits IdleTick for overdue buckets."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Awaitable, Callable
|
||||
from datetime import datetime
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome._stores.idle import IdleStore
|
||||
from everos.infra.ome.events import BaseEvent, IdleTick
|
||||
from everos.infra.ome.triggers import Idle
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class IdleScanner:
|
||||
"""Scans idle_store for overdue buckets and emits IdleTick events."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
strategy_name: str,
|
||||
trigger: Idle,
|
||||
idle_store: IdleStore,
|
||||
emit: Callable[[BaseEvent], Awaitable[None]],
|
||||
) -> None:
|
||||
self._name = strategy_name
|
||||
self._trigger = trigger
|
||||
self._idle_store = idle_store
|
||||
self._emit = emit
|
||||
|
||||
async def scan_once(self, *, now: datetime | None = None) -> None:
|
||||
"""Find overdue buckets and emit IdleTick for each.
|
||||
|
||||
Per-bucket emit failures are caught and logged so a single
|
||||
downstream error (e.g. dispatch hitting a transient DB lock)
|
||||
cannot prevent sibling buckets from being notified this round.
|
||||
"""
|
||||
effective_now = now if now is not None else get_utc_now()
|
||||
overdue = await self._idle_store.scan_idle(
|
||||
self._name,
|
||||
idle_seconds=self._trigger.idle_seconds,
|
||||
now=effective_now,
|
||||
)
|
||||
for bucket_key in overdue:
|
||||
try:
|
||||
await self._emit(
|
||||
IdleTick(
|
||||
strategy_name=self._name,
|
||||
bucket_key=bucket_key,
|
||||
idle_seconds=self._trigger.idle_seconds,
|
||||
)
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception(
|
||||
"idle_emit_failed",
|
||||
strategy_name=self._name,
|
||||
bucket_key=bucket_key,
|
||||
)
|
||||
1
src/everos/infra/ome/_dispatch/__init__.py
Normal file
1
src/everos/infra/ome/_dispatch/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Internal: event dispatch core (registry / dispatcher / runner)."""
|
||||
23
src/everos/infra/ome/_dispatch/_state.py
Normal file
23
src/everos/infra/ome/_dispatch/_state.py
Normal file
@ -0,0 +1,23 @@
|
||||
"""ContextVar shared between Runner and OfflineEngine.
|
||||
|
||||
Python copies ContextVar values into child tasks at
|
||||
``asyncio.create_task`` (by design, for trace propagation), so
|
||||
``@_refuse_inside_strategy`` reliably catches only *same-task* calls.
|
||||
Never attach it to APS callback methods (``dispatch_run`` /
|
||||
``run_idle_scan``) — cascade emits would misfire.
|
||||
``test_engine_chain_emit_through_ctx`` is the regression.
|
||||
|
||||
TODO: ``sys._getframe`` walk for a ``Runner.run`` frame is leak-proof.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextvars import ContextVar
|
||||
|
||||
from everos.infra.ome.decorator import StrategyMeta
|
||||
|
||||
_CURRENT_STRATEGY: ContextVar[StrategyMeta | None] = ContextVar(
|
||||
"current_strategy", default=None
|
||||
)
|
||||
"""Set by ``Runner.run`` around ``meta.func(event, ctx)``; read by
|
||||
``@_refuse_inside_strategy``. ``None`` = not inside a strategy frame."""
|
||||
205
src/everos/infra/ome/_dispatch/dispatcher.py
Normal file
205
src/everos/infra/ome/_dispatch/dispatcher.py
Normal file
@ -0,0 +1,205 @@
|
||||
"""EventDispatcher — routing layer applying the three OME gates.
|
||||
|
||||
For each dispatched event, every candidate strategy is run through three
|
||||
gates in order:
|
||||
|
||||
1. ``enabled`` — strategy may be hot-disabled via config
|
||||
2. ``applies_to`` — per-strategy predicate over the event payload
|
||||
3. ``Counter`` — N-of-M rate/threshold gate against
|
||||
:class:`CounterStore`
|
||||
|
||||
:meth:`dispatch` is the read-write entry point — passing the counter
|
||||
gate increments the counter and returns ``(meta, run_id)`` pairs to
|
||||
enqueue. :meth:`inspect` is its dry-run twin — same gates, no counter
|
||||
mutation; returns one :class:`StrategyRouteInfo` per matched strategy
|
||||
including a snapshot of the counter so debug callers can see why a
|
||||
strategy will or won't fire.
|
||||
|
||||
By design ``inspect`` does not accept ``force_enabled`` /
|
||||
``strategy_filter``: those are runtime overrides for the routing side
|
||||
(``trigger_manual``), not properties a debugger should second-guess.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from uuid import uuid4
|
||||
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome._dispatch.registry import StrategyRegistry
|
||||
from everos.infra.ome._stores.counter import CounterStore
|
||||
from everos.infra.ome.decorator import StrategyMeta
|
||||
from everos.infra.ome.events import BaseEvent
|
||||
from everos.infra.ome.records import CounterProgress, StrategyRouteInfo
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class EventDispatcher:
|
||||
"""Apply ``enabled / applies_to / Counter`` gates to one event."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
registry: StrategyRegistry,
|
||||
counter_store: CounterStore,
|
||||
) -> None:
|
||||
self._registry = registry
|
||||
self._counter_store = counter_store
|
||||
|
||||
async def dispatch(
|
||||
self,
|
||||
event: BaseEvent,
|
||||
*,
|
||||
force_enabled: bool = False,
|
||||
strategy_filter: str | None = None,
|
||||
) -> list[tuple[StrategyMeta, str]]:
|
||||
"""Run gates and return ``(meta, run_id)`` pairs to enqueue.
|
||||
|
||||
Args:
|
||||
event: The event to route.
|
||||
force_enabled: Bypass the ``meta.enabled`` gate. ``applies_to``
|
||||
and the counter still apply. Used by manual triggers
|
||||
with ``force=True``.
|
||||
strategy_filter: Restrict to one strategy name regardless of
|
||||
whether it subscribes to ``type(event)``. Manual triggers
|
||||
use this when targeting a strategy with a caller-supplied
|
||||
event. Raises ``KeyError`` if the name is not registered.
|
||||
|
||||
``applies_to`` callables raised by a single strategy are caught,
|
||||
logged, and treated as ``False`` for that strategy alone — sibling
|
||||
strategies still dispatch. Framework errors (e.g. CounterStore
|
||||
I/O) propagate.
|
||||
"""
|
||||
if strategy_filter is not None:
|
||||
metas: list[StrategyMeta] = [self._registry.get(strategy_filter)]
|
||||
else:
|
||||
metas = list(self._registry.lookup_by_event(type(event)))
|
||||
out: list[tuple[StrategyMeta, str]] = []
|
||||
for meta in metas:
|
||||
if not _routes_to(event, meta):
|
||||
continue
|
||||
if not force_enabled and not meta.enabled:
|
||||
continue
|
||||
if not _safe_applies(meta, event):
|
||||
continue
|
||||
if meta.gate is not None:
|
||||
bucket = _bucket_key(event, meta.gate.event_field)
|
||||
passed, _ = await self._counter_store.incr_and_check(
|
||||
meta.name,
|
||||
bucket,
|
||||
threshold=meta.gate.threshold,
|
||||
cooldown_seconds=meta.gate.cooldown_seconds,
|
||||
)
|
||||
if not passed:
|
||||
continue
|
||||
out.append((meta, uuid4().hex))
|
||||
return out
|
||||
|
||||
async def inspect(self, event: BaseEvent) -> list[StrategyRouteInfo]:
|
||||
"""Dry-run twin of :meth:`dispatch` — no counter mutation.
|
||||
|
||||
Returns one :class:`StrategyRouteInfo` per matched strategy with
|
||||
per-gate pass flags and a counter snapshot (read-only via
|
||||
``get_progress``). Same exception policy as :meth:`dispatch`:
|
||||
a strategy's faulty ``applies_to`` callable is logged and that
|
||||
strategy reports ``applies_to_pass=False`` rather than tanking
|
||||
the whole inspection.
|
||||
"""
|
||||
out: list[StrategyRouteInfo] = []
|
||||
for meta in self._registry.lookup_by_event(type(event)):
|
||||
if not _routes_to(event, meta):
|
||||
continue
|
||||
enabled_pass = bool(meta.enabled)
|
||||
applies_pass = enabled_pass and _safe_applies(meta, event)
|
||||
counter_pass = applies_pass and (meta.gate is None)
|
||||
progress: CounterProgress | None = None
|
||||
if applies_pass and meta.gate is not None:
|
||||
bucket = _bucket_key(event, meta.gate.event_field)
|
||||
cur = await self._counter_store.get_progress(
|
||||
meta.name,
|
||||
bucket,
|
||||
)
|
||||
next_cur = cur + 1
|
||||
progress = CounterProgress(
|
||||
current=next_cur, threshold=meta.gate.threshold
|
||||
)
|
||||
counter_pass = next_cur >= meta.gate.threshold
|
||||
out.append(
|
||||
StrategyRouteInfo(
|
||||
strategy_name=meta.name,
|
||||
enabled_pass=enabled_pass,
|
||||
applies_to_pass=applies_pass,
|
||||
counter_pass=counter_pass,
|
||||
counter_progress=progress,
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def _routes_to(event: BaseEvent, meta: StrategyMeta) -> bool:
|
||||
"""Narrow engine-emitted ticks to their single target strategy.
|
||||
|
||||
Cron / Idle / Manual ticks carry a ``strategy_name`` naming the
|
||||
intended recipient — without this filter two strategies listening
|
||||
on the same tick class would cross-fire. Business events have no
|
||||
such field and therefore fan out to every matching strategy.
|
||||
"""
|
||||
target = getattr(event, "strategy_name", None)
|
||||
return target is None or target == meta.name
|
||||
|
||||
|
||||
def _safe_applies(meta: StrategyMeta, event: BaseEvent) -> bool:
|
||||
"""Evaluate ``meta.applies_to`` with user-callable exceptions isolated.
|
||||
|
||||
A faulty ``applies_to`` callable is logged at exception level with
|
||||
``strategy_name`` + ``event_topic`` context and treated as
|
||||
``False`` so that a single buggy predicate cannot tank the entire
|
||||
fan-out for an event.
|
||||
"""
|
||||
try:
|
||||
return _applies(meta.applies_to, event)
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception(
|
||||
"applies_to_callable_raised",
|
||||
strategy_name=meta.name,
|
||||
event_topic=type(event).topic(),
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
def _applies(
|
||||
spec: str | Callable[[BaseEvent], bool] | None,
|
||||
event: BaseEvent,
|
||||
) -> bool:
|
||||
"""Resolve ``applies_to`` semantics.
|
||||
|
||||
* ``None`` — strategy applies to every event in its subscription
|
||||
* callable — invoke and bool-cast the result
|
||||
* str — read the named event attribute and bool-cast it; falsy
|
||||
values (``""``, ``0``, ``None``, empty containers) are treated
|
||||
as "field unset", so the strategy does NOT apply
|
||||
|
||||
Exceptions raised by a user callable propagate; the dispatcher wraps
|
||||
this call in :func:`_safe_applies` to localise blast radius.
|
||||
"""
|
||||
if spec is None:
|
||||
return True
|
||||
if callable(spec):
|
||||
return bool(spec(event))
|
||||
return bool(getattr(event, spec, None))
|
||||
|
||||
|
||||
def _bucket_key(event: BaseEvent, field: str | None) -> str:
|
||||
"""Compute a Counter-store bucket key from an event field.
|
||||
|
||||
``field=None`` means the gate is un-bucketed → single shared bucket
|
||||
``"__all__"``. Missing or ``None`` field values map to ``"__none__"``
|
||||
so a typo doesn't accidentally collapse every event into ``"__all__"``
|
||||
(the StrategyRegistry validator catches typos at startup; the sentinel
|
||||
here is the runtime safety net).
|
||||
"""
|
||||
if field is None:
|
||||
return "__all__"
|
||||
val = getattr(event, field, None)
|
||||
return str(val) if val is not None else "__none__"
|
||||
152
src/everos/infra/ome/_dispatch/registry.py
Normal file
152
src/everos/infra/ome/_dispatch/registry.py
Normal file
@ -0,0 +1,152 @@
|
||||
"""StrategyRegistry — registration + DAG cycle detection.
|
||||
|
||||
Mutated at startup via :meth:`register` / :meth:`validate`, and at
|
||||
runtime via :meth:`replace` (config hot-reload). Cycle detection is a
|
||||
Kahn-style topological pass on the event-flow DAG implied by
|
||||
``trigger.on`` (incoming) and ``emits`` (outgoing).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict, deque
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from everos.infra.ome.decorator import StrategyMeta
|
||||
from everos.infra.ome.events import BaseEvent, CronTick, IdleTick
|
||||
from everos.infra.ome.exceptions import StartupValidationError
|
||||
from everos.infra.ome.triggers import Cron, Idle, Immediate, Trigger
|
||||
|
||||
|
||||
class StrategyRegistry:
|
||||
"""Startup-time registry for offline strategies with cycle detection."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._strategies: dict[str, StrategyMeta] = {}
|
||||
|
||||
def register(self, func: Callable[..., Any]) -> None:
|
||||
"""Register a strategy function (reads ``_ome_strategy_meta``).
|
||||
|
||||
Raises ``StartupValidationError`` if ``func`` is not decorated
|
||||
with ``@offline_strategy`` or if its name is already registered.
|
||||
"""
|
||||
meta = getattr(func, "_ome_strategy_meta", None)
|
||||
if not isinstance(meta, StrategyMeta):
|
||||
fn_name = getattr(func, "__name__", repr(func))
|
||||
raise StartupValidationError(
|
||||
f"register: {fn_name} is not decorated with @offline_strategy"
|
||||
)
|
||||
if meta.name in self._strategies:
|
||||
raise StartupValidationError(
|
||||
f"register: duplicate strategy name {meta.name!r}"
|
||||
)
|
||||
self._strategies[meta.name] = meta
|
||||
|
||||
def replace(self, name: str, new_meta: StrategyMeta) -> None:
|
||||
"""Swap an already-registered strategy's meta in place (hot-reload entry).
|
||||
|
||||
Cycle / gate validation is **not** re-run; callers (currently
|
||||
:func:`apply_overrides`) must only feed metas where the
|
||||
DAG-shaping fields (``trigger.on``, ``emits``, trigger type)
|
||||
match the original. Raises ``KeyError`` if ``name`` is not yet
|
||||
registered.
|
||||
"""
|
||||
if name not in self._strategies:
|
||||
raise KeyError(name)
|
||||
self._strategies[name] = new_meta
|
||||
|
||||
def get(self, name: str) -> StrategyMeta:
|
||||
"""Return meta by name (raises ``KeyError`` if absent)."""
|
||||
return self._strategies[name]
|
||||
|
||||
def all(self) -> list[StrategyMeta]:
|
||||
"""Return a snapshot list of every registered strategy."""
|
||||
return list(self._strategies.values())
|
||||
|
||||
def lookup_by_event(self, event_cls: type[BaseEvent]) -> list[StrategyMeta]:
|
||||
"""Return strategies that may receive an event of ``event_cls``.
|
||||
|
||||
Resolution:
|
||||
* ``Immediate`` strategy listening on the class → match
|
||||
* ``CronTick`` → all Cron strategies (narrowed later by name)
|
||||
* ``IdleTick`` → all Idle strategies (narrowed later by name)
|
||||
|
||||
Engine-emitted ticks carry a ``strategy_name`` field; dispatcher
|
||||
narrows the returned set to the single target via ``_routes_to``.
|
||||
"""
|
||||
out: list[StrategyMeta] = []
|
||||
for m in self._strategies.values():
|
||||
if (
|
||||
(isinstance(m.trigger, Immediate) and event_cls in m.trigger.on)
|
||||
or (isinstance(m.trigger, Cron) and event_cls is CronTick)
|
||||
or (isinstance(m.trigger, Idle) and event_cls is IdleTick)
|
||||
):
|
||||
out.append(m)
|
||||
return out
|
||||
|
||||
def validate(self) -> None:
|
||||
"""Validate the strategy DAG for cycles and gate field existence."""
|
||||
self._validate_no_cycles()
|
||||
self._validate_gate_event_fields()
|
||||
|
||||
def _validate_no_cycles(self) -> None:
|
||||
"""Kahn topological sort over the event-flow DAG.
|
||||
|
||||
Edge ``s_a → s_b`` exists iff ``s_a.emits`` intersects
|
||||
``s_b.trigger.on``.
|
||||
"""
|
||||
adj: dict[str, set[str]] = defaultdict(set)
|
||||
indeg: dict[str, int] = dict.fromkeys(self._strategies, 0)
|
||||
|
||||
for src in self._strategies.values():
|
||||
for ev in src.emits:
|
||||
for dst in self._strategies.values():
|
||||
if (
|
||||
isinstance(dst.trigger, Immediate)
|
||||
and ev in dst.trigger.on
|
||||
and dst.name not in adj[src.name]
|
||||
):
|
||||
adj[src.name].add(dst.name)
|
||||
indeg[dst.name] += 1
|
||||
|
||||
queue = deque(n for n, d in indeg.items() if d == 0)
|
||||
visited = 0
|
||||
while queue:
|
||||
n = queue.popleft()
|
||||
visited += 1
|
||||
for nbr in adj[n]:
|
||||
indeg[nbr] -= 1
|
||||
if indeg[nbr] == 0:
|
||||
queue.append(nbr)
|
||||
|
||||
if visited < len(self._strategies):
|
||||
raise StartupValidationError("cycle detected in strategy DAG")
|
||||
|
||||
def _validate_gate_event_fields(self) -> None:
|
||||
"""Reject any ``gate.event_field`` missing from a receivable event class.
|
||||
|
||||
Without this check a typo silently collapses every event into one
|
||||
shared bucket and the rate gate stops segmenting.
|
||||
"""
|
||||
for meta in self._strategies.values():
|
||||
if meta.gate is None or meta.gate.event_field is None:
|
||||
continue
|
||||
field = meta.gate.event_field
|
||||
for ev_cls in _event_classes_for_trigger(meta.trigger):
|
||||
if field not in ev_cls.model_fields: # type: ignore[operator] # Pydantic model_fields → dict via @deprecated_instance_property (pydantic/main.py:277)
|
||||
raise StartupValidationError(
|
||||
f"strategy {meta.name!r}: gate.event_field {field!r} "
|
||||
f"not found in {ev_cls.__name__} fields "
|
||||
f"(available: {list(ev_cls.model_fields)})" # type: ignore[arg-type] # same as above
|
||||
)
|
||||
|
||||
|
||||
def _event_classes_for_trigger(trigger: Trigger) -> list[type[BaseEvent]]:
|
||||
"""Enumerate event classes a strategy with the given trigger receives."""
|
||||
if isinstance(trigger, Immediate):
|
||||
return list(trigger.on)
|
||||
if isinstance(trigger, Cron):
|
||||
return [CronTick]
|
||||
if isinstance(trigger, Idle):
|
||||
return [IdleTick]
|
||||
raise NotImplementedError(f"unknown trigger type: {type(trigger).__name__}")
|
||||
247
src/everos/infra/ome/_dispatch/runner.py
Normal file
247
src/everos/infra/ome/_dispatch/runner.py
Normal file
@ -0,0 +1,247 @@
|
||||
"""Runner — single-strategy execution with attempt-level retry + DLQ.
|
||||
|
||||
Acquires ``engine_sem`` (FIFO), drives the per-attempt RunRecord state
|
||||
machine (``RUNNING → SUCCESS / FAILED / DEAD_LETTER``), and fires
|
||||
``on_dead_letter`` after exhausted retries or contract violations.
|
||||
|
||||
Per attempt, binds ``strategy_name`` / ``run_id`` / ``attempt`` into
|
||||
``structlog.contextvars`` (so every log record carries those fields
|
||||
automatically) and sets ``_CURRENT_STRATEGY`` ContextVar around
|
||||
``meta.func`` (so ``engine.emit`` can refuse direct calls from inside
|
||||
a strategy — strategies emit via ``ctx.emit``).
|
||||
|
||||
**Idempotency contract**: if ``mark_success`` / ``mark_failed`` /
|
||||
``mark_dead_letter`` fails after the strategy body returned, the
|
||||
``RUNNING`` row stays and crash recovery on next start will treat the
|
||||
run as crashed and re-enqueue the same event. Strategy bodies must
|
||||
therefore be safe to re-execute with the same payload.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import traceback
|
||||
from collections.abc import Awaitable, Callable
|
||||
from uuid import uuid4
|
||||
|
||||
from structlog.contextvars import bound_contextvars
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome._dispatch._state import _CURRENT_STRATEGY
|
||||
from everos.infra.ome._stores.run_record import RunRecordStore
|
||||
from everos.infra.ome.decorator import StrategyMeta
|
||||
from everos.infra.ome.events import BaseEvent
|
||||
from everos.infra.ome.exceptions import EmitNotDeclaredError, StrategyContractError
|
||||
from everos.infra.ome.records import RunRecord
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class _RunCtx:
|
||||
"""Per-invocation context handed to ``meta.func(event, ctx)``.
|
||||
|
||||
Carries ``run_id``, a strategy-scoped logger, and the ``emit``
|
||||
callback that enforces the declared ``emits=[...]`` contract.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
run_id: str,
|
||||
strategy_name: str,
|
||||
emit_hook: Callable[[BaseEvent], Awaitable[None]],
|
||||
declared_emits: frozenset[type[BaseEvent]],
|
||||
) -> None:
|
||||
self.run_id = run_id
|
||||
self.logger = get_logger("ome.strategy")
|
||||
self._emit_hook = emit_hook
|
||||
self._declared = declared_emits
|
||||
self._strategy_name = strategy_name
|
||||
|
||||
async def emit(self, event: BaseEvent) -> None:
|
||||
if type(event) not in self._declared:
|
||||
raise EmitNotDeclaredError(
|
||||
strategy=self._strategy_name,
|
||||
event=event,
|
||||
)
|
||||
await self._emit_hook(event)
|
||||
|
||||
|
||||
class Runner:
|
||||
"""Drive one strategy invocation through retries to a terminal state."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
run_record_store: RunRecordStore,
|
||||
engine_sem: asyncio.Semaphore,
|
||||
emit_hook: Callable[[BaseEvent], Awaitable[None]],
|
||||
on_dead_letter: Callable[[RunRecord], None] | None = None,
|
||||
) -> None:
|
||||
self._rec = run_record_store
|
||||
self._sem = engine_sem
|
||||
self._emit_hook = emit_hook
|
||||
self._on_dead_letter = on_dead_letter
|
||||
|
||||
async def run(
|
||||
self,
|
||||
meta: StrategyMeta,
|
||||
event: BaseEvent,
|
||||
*,
|
||||
run_id: str,
|
||||
max_retries_snapshot: int,
|
||||
) -> None:
|
||||
"""Execute ``meta.func(event, ctx)`` with the attempt retry loop.
|
||||
|
||||
Holds ``engine_sem`` for the full retry chain so concurrency cap
|
||||
applies end-to-end. Each attempt gets a fresh ``run_id`` after
|
||||
the first, so the run history records every try.
|
||||
"""
|
||||
if max_retries_snapshot < 0:
|
||||
raise ValueError(
|
||||
f"max_retries_snapshot must be >= 0, got {max_retries_snapshot}"
|
||||
)
|
||||
|
||||
async with self._sem:
|
||||
event_topic = type(event).topic()
|
||||
event_payload = event.model_dump_json()
|
||||
current_run_id = run_id
|
||||
|
||||
for attempt in range(max_retries_snapshot + 1):
|
||||
if attempt > 0:
|
||||
current_run_id = uuid4().hex
|
||||
terminated = await self._run_one_attempt(
|
||||
meta=meta,
|
||||
event=event,
|
||||
current_run_id=current_run_id,
|
||||
attempt=attempt,
|
||||
event_topic=event_topic,
|
||||
event_payload=event_payload,
|
||||
max_retries_snapshot=max_retries_snapshot,
|
||||
)
|
||||
if terminated:
|
||||
return
|
||||
|
||||
async def _run_one_attempt(
|
||||
self,
|
||||
*,
|
||||
meta: StrategyMeta,
|
||||
event: BaseEvent,
|
||||
current_run_id: str,
|
||||
attempt: int,
|
||||
event_topic: str,
|
||||
event_payload: str,
|
||||
max_retries_snapshot: int,
|
||||
) -> bool:
|
||||
"""Run one attempt; return ``True`` if a terminal state was
|
||||
written (success / dead-letter or persistence failure), ``False``
|
||||
if FAILED and the caller should loop into the next attempt.
|
||||
"""
|
||||
ctx = _RunCtx(
|
||||
run_id=current_run_id,
|
||||
strategy_name=meta.name,
|
||||
emit_hook=self._emit_hook,
|
||||
declared_emits=meta.emits,
|
||||
)
|
||||
with bound_contextvars( # type: ignore[arg-type] # structlog typed as Generator; @contextmanager wraps at runtime (structlog/contextvars.py:170)
|
||||
strategy_name=meta.name,
|
||||
run_id=current_run_id,
|
||||
attempt=attempt,
|
||||
):
|
||||
if not await self._record_start(
|
||||
run_id=current_run_id,
|
||||
strategy_name=meta.name,
|
||||
attempt=attempt,
|
||||
event_topic=event_topic,
|
||||
event_payload=event_payload,
|
||||
max_retries_snapshot=max_retries_snapshot,
|
||||
):
|
||||
return True # mark_running failed; abort run, no DB row exists
|
||||
try:
|
||||
token = _CURRENT_STRATEGY.set(meta)
|
||||
try:
|
||||
await meta.func(event, ctx)
|
||||
finally:
|
||||
_CURRENT_STRATEGY.reset(token)
|
||||
except StrategyContractError as e:
|
||||
await self._terminate_dead_letter(current_run_id, _format_error(e))
|
||||
return True
|
||||
except Exception as e: # noqa: BLE001
|
||||
err = _format_error(e)
|
||||
if attempt < max_retries_snapshot:
|
||||
await self._rec.mark_failed(
|
||||
run_id=current_run_id,
|
||||
finished_at=get_utc_now(),
|
||||
error=err,
|
||||
)
|
||||
return False # caller will retry
|
||||
await self._terminate_dead_letter(current_run_id, err)
|
||||
return True
|
||||
else:
|
||||
await self._rec.mark_success(
|
||||
run_id=current_run_id,
|
||||
finished_at=get_utc_now(),
|
||||
)
|
||||
return True
|
||||
|
||||
async def _record_start(
|
||||
self,
|
||||
*,
|
||||
run_id: str,
|
||||
strategy_name: str,
|
||||
attempt: int,
|
||||
event_topic: str,
|
||||
event_payload: str,
|
||||
max_retries_snapshot: int,
|
||||
) -> bool:
|
||||
"""Persist this attempt as RUNNING; return ``False`` on write failure.
|
||||
|
||||
When the write fails (DB lock, disk full, ...) the caller
|
||||
aborts the retry loop — without a RUNNING row crash recovery
|
||||
cannot rediscover the run, and it is silently lost. The
|
||||
exception log emitted here is the only audit trail.
|
||||
"""
|
||||
try:
|
||||
await self._rec.mark_running(
|
||||
run_id=run_id,
|
||||
strategy_name=strategy_name,
|
||||
attempt=attempt,
|
||||
event_topic=event_topic,
|
||||
event_payload=event_payload,
|
||||
max_retries_snapshot=max_retries_snapshot,
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception(
|
||||
"mark_running_failed",
|
||||
run_id=run_id,
|
||||
strategy_name=strategy_name,
|
||||
attempt=attempt,
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
async def _terminate_dead_letter(self, run_id: str, error: str) -> None:
|
||||
"""Mark DEAD_LETTER and fire ``on_dead_letter`` callback if set."""
|
||||
await self._rec.mark_dead_letter(
|
||||
run_id=run_id,
|
||||
finished_at=get_utc_now(),
|
||||
error=error,
|
||||
)
|
||||
await self._fire_dead_letter_callback(run_id)
|
||||
|
||||
async def _fire_dead_letter_callback(self, run_id: str) -> None:
|
||||
if self._on_dead_letter is None:
|
||||
return
|
||||
rec = await self._rec.get(run_id)
|
||||
if rec is None:
|
||||
return
|
||||
try:
|
||||
self._on_dead_letter(rec)
|
||||
except Exception: # noqa: BLE001
|
||||
logger.exception("on_dead_letter_failed")
|
||||
|
||||
|
||||
def _format_error(e: BaseException) -> str:
|
||||
"""Format an exception with type, message, and full traceback."""
|
||||
return f"{type(e).__name__}: {e}\n{traceback.format_exc()}"
|
||||
1
src/everos/infra/ome/_stores/__init__.py
Normal file
1
src/everos/infra/ome/_stores/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Internal: SQLite-backed state stores (counter / idle / run_record)."""
|
||||
107
src/everos/infra/ome/_stores/counter.py
Normal file
107
src/everos/infra/ome/_stores/counter.py
Normal file
@ -0,0 +1,107 @@
|
||||
"""CounterStore — persistent (strategy_name, bucket_key) → counter rows.
|
||||
|
||||
Backs the ``Counter`` gate in OME's dispatch pipeline: each call to
|
||||
:meth:`CounterStore.incr_and_check` atomically increments the bucket's
|
||||
counter and reports whether the strategy should fire this time.
|
||||
|
||||
Pass semantics:
|
||||
- ``counter >= threshold`` AND cooldown elapsed → ``passed=True``
|
||||
- On pass, the row's counter resets to 0 and ``last_passed_ts``
|
||||
advances to ``now``; the next pass needs a fresh accumulation.
|
||||
- ``cooldown_seconds=0`` disables the cooldown gate (threshold alone).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from everos.component.utils.datetime import (
|
||||
from_iso_format,
|
||||
get_utc_now,
|
||||
to_iso_format,
|
||||
)
|
||||
from everos.infra.ome._stores.storage import OMEStorage
|
||||
|
||||
|
||||
class CounterStore:
|
||||
"""SQLite-backed counter for the ``Counter`` gate (see module docstring)."""
|
||||
|
||||
def __init__(self, storage: OMEStorage) -> None:
|
||||
self._storage = storage
|
||||
|
||||
async def incr_and_check(
|
||||
self,
|
||||
strategy_name: str,
|
||||
bucket_key: str,
|
||||
*,
|
||||
threshold: int,
|
||||
cooldown_seconds: int,
|
||||
) -> tuple[bool, int]:
|
||||
"""Increment ``(strategy_name, bucket_key)``'s counter atomically.
|
||||
|
||||
Args:
|
||||
strategy_name: Strategy whose counter to update.
|
||||
bucket_key: The bucket value derived from the event field
|
||||
(or ``"__all__"`` when the gate is unbucketed).
|
||||
threshold: Pass once the counter reaches this value
|
||||
(``>=``).
|
||||
cooldown_seconds: Minimum seconds since the last pass for
|
||||
the strategy/bucket; ``0`` disables the cooldown check.
|
||||
|
||||
Returns:
|
||||
``(passed, counter)``. ``counter`` is the counter value at
|
||||
the moment of the check (i.e. pre-reset on pass). Useful for
|
||||
diagnostics — ``threshold`` is *not* substituted, so callers
|
||||
observing ``counter > threshold`` learn the gate is
|
||||
over-armed (e.g. threshold was lowered via hot reload while
|
||||
the counter had already accumulated past the new value).
|
||||
"""
|
||||
now = get_utc_now()
|
||||
async with self._storage.transaction() as conn:
|
||||
cur = await conn.execute(
|
||||
"SELECT counter, last_passed_ts FROM counter_store "
|
||||
"WHERE strategy_name = ? AND bucket_key = ?",
|
||||
(strategy_name, bucket_key),
|
||||
)
|
||||
row = await cur.fetchone()
|
||||
counter = (row[0] if row else 0) + 1
|
||||
last_passed = from_iso_format(row[1]) if row and row[1] else None
|
||||
|
||||
cooldown_ok = (
|
||||
cooldown_seconds == 0
|
||||
or last_passed is None
|
||||
or now - last_passed >= timedelta(seconds=cooldown_seconds)
|
||||
)
|
||||
passed = counter >= threshold and cooldown_ok
|
||||
|
||||
new_counter = 0 if passed else counter
|
||||
new_last_passed_ts = (
|
||||
to_iso_format(now)
|
||||
if passed
|
||||
else (to_iso_format(last_passed) if last_passed else None)
|
||||
)
|
||||
await conn.execute(
|
||||
"INSERT INTO counter_store (strategy_name, bucket_key, "
|
||||
"counter, last_passed_ts) "
|
||||
"VALUES (?, ?, ?, ?) "
|
||||
"ON CONFLICT(strategy_name, bucket_key) DO UPDATE SET "
|
||||
"counter = excluded.counter, "
|
||||
"last_passed_ts = excluded.last_passed_ts",
|
||||
(strategy_name, bucket_key, new_counter, new_last_passed_ts),
|
||||
)
|
||||
return passed, counter
|
||||
|
||||
async def get_progress(self, strategy_name: str, bucket_key: str) -> int:
|
||||
"""Return the counter value persisted for this bucket (0 if absent).
|
||||
|
||||
Read-only; does not increment. Used by dispatcher inspect-mode
|
||||
to report progress without mutating state.
|
||||
"""
|
||||
async with self._storage.connect() as conn:
|
||||
cur = await conn.execute(
|
||||
"SELECT counter FROM counter_store "
|
||||
"WHERE strategy_name = ? AND bucket_key = ?",
|
||||
(strategy_name, bucket_key),
|
||||
)
|
||||
row = await cur.fetchone()
|
||||
return row[0] if row else 0
|
||||
64
src/everos/infra/ome/_stores/idle.py
Normal file
64
src/everos/infra/ome/_stores/idle.py
Normal file
@ -0,0 +1,64 @@
|
||||
"""IdleStore — last_activity_ts rows backing the Idle trigger.
|
||||
|
||||
All writes pass through ``to_iso_format`` over a tz-aware datetime, so
|
||||
``last_activity_ts`` is a fixed-format ISO 8601 string whose
|
||||
lexicographic order matches temporal order — :meth:`scan_idle` relies
|
||||
on this to keep the column un-wrapped in its predicate so SQLite can
|
||||
use ``idx_idle_scan``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from everos.component.utils.datetime import from_iso_format, to_iso_format
|
||||
from everos.infra.ome._stores.storage import OMEStorage
|
||||
|
||||
|
||||
class IdleStore:
|
||||
"""SQLite-backed last-activity tracker for the ``Idle`` trigger."""
|
||||
|
||||
def __init__(self, storage: OMEStorage) -> None:
|
||||
self._storage = storage
|
||||
|
||||
async def touch(self, strategy_name: str, bucket_key: str, *, at: datetime) -> None:
|
||||
"""UPSERT ``last_activity_ts = at`` for ``(strategy_name, bucket_key)``."""
|
||||
async with self._storage.connect() as conn:
|
||||
await conn.execute(
|
||||
"INSERT INTO idle_store "
|
||||
"(strategy_name, bucket_key, last_activity_ts) "
|
||||
"VALUES (?, ?, ?) "
|
||||
"ON CONFLICT(strategy_name, bucket_key) DO UPDATE SET "
|
||||
"last_activity_ts = excluded.last_activity_ts",
|
||||
(strategy_name, bucket_key, to_iso_format(at)),
|
||||
)
|
||||
await conn.commit()
|
||||
|
||||
async def scan_idle(
|
||||
self, strategy_name: str, *, idle_seconds: int, now: datetime
|
||||
) -> list[str]:
|
||||
"""Return bucket_keys with ``last_activity_ts`` older than ``idle_seconds``."""
|
||||
# Cutoff on the RHS so the indexed column stays un-wrapped.
|
||||
cutoff = to_iso_format(now - timedelta(seconds=idle_seconds))
|
||||
async with self._storage.connect() as conn:
|
||||
cur = await conn.execute(
|
||||
"SELECT bucket_key FROM idle_store "
|
||||
"WHERE strategy_name = ? AND last_activity_ts <= ? "
|
||||
"ORDER BY last_activity_ts ASC",
|
||||
(strategy_name, cutoff),
|
||||
)
|
||||
rows = await cur.fetchall()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
async def get_last_activity(
|
||||
self, strategy_name: str, bucket_key: str
|
||||
) -> datetime | None:
|
||||
"""Return the stored ``last_activity_ts`` (``None`` if never touched)."""
|
||||
async with self._storage.connect() as conn:
|
||||
cur = await conn.execute(
|
||||
"SELECT last_activity_ts FROM idle_store "
|
||||
"WHERE strategy_name = ? AND bucket_key = ?",
|
||||
(strategy_name, bucket_key),
|
||||
)
|
||||
row = await cur.fetchone()
|
||||
return from_iso_format(row[0]) if row else None
|
||||
168
src/everos/infra/ome/_stores/run_record.py
Normal file
168
src/everos/infra/ome/_stores/run_record.py
Normal file
@ -0,0 +1,168 @@
|
||||
"""RunRecord persistence — state machine writes + same-transaction ring-buffer trim.
|
||||
|
||||
State machine (one row per ``run_id``):
|
||||
RUNNING → SUCCESS / FAILED / DEAD_LETTER / CRASHED
|
||||
|
||||
Every :meth:`RunRecordStore.mark_running` INSERT runs inside one
|
||||
``BEGIN IMMEDIATE`` transaction with a paired DELETE that keeps only
|
||||
the newest ``max_records_per_strategy`` rows for that strategy. Bound
|
||||
is enforced atomically — no background sweeper, no transient
|
||||
over-budget state.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from everos.component.utils.datetime import (
|
||||
from_iso_format,
|
||||
get_utc_now,
|
||||
to_iso_format,
|
||||
)
|
||||
from everos.infra.ome._stores.storage import OMEStorage
|
||||
from everos.infra.ome.records import RunRecord, RunStatus
|
||||
|
||||
|
||||
class RunRecordStore:
|
||||
"""SQLite-backed persistence for ``RunRecord`` (see module docstring)."""
|
||||
|
||||
def __init__(self, storage: OMEStorage, max_records_per_strategy: int) -> None:
|
||||
self._storage = storage
|
||||
self._max = max_records_per_strategy
|
||||
|
||||
async def mark_running(
|
||||
self,
|
||||
*,
|
||||
run_id: str,
|
||||
strategy_name: str,
|
||||
attempt: int,
|
||||
event_topic: str,
|
||||
event_payload: str,
|
||||
max_retries_snapshot: int,
|
||||
) -> None:
|
||||
"""Insert a new RUNNING row and trim the strategy's ring buffer atomically."""
|
||||
async with self._storage.transaction() as conn:
|
||||
await conn.execute(
|
||||
"INSERT INTO run_record "
|
||||
"(run_id, strategy_name, status, attempt, started_at, "
|
||||
" event_topic, event_payload, max_retries_snapshot) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
run_id,
|
||||
strategy_name,
|
||||
RunStatus.RUNNING.value,
|
||||
attempt,
|
||||
to_iso_format(get_utc_now()),
|
||||
event_topic,
|
||||
event_payload,
|
||||
max_retries_snapshot,
|
||||
),
|
||||
)
|
||||
await conn.execute(
|
||||
"DELETE FROM run_record "
|
||||
"WHERE strategy_name = ? AND run_id NOT IN ("
|
||||
" SELECT run_id FROM run_record WHERE strategy_name = ? "
|
||||
" ORDER BY started_at DESC LIMIT ?)",
|
||||
(strategy_name, strategy_name, self._max),
|
||||
)
|
||||
|
||||
async def mark_success(self, *, run_id: str, finished_at: datetime) -> None:
|
||||
"""Mark RUNNING → SUCCESS."""
|
||||
await self._update_status(run_id, RunStatus.SUCCESS, finished_at, None)
|
||||
|
||||
async def mark_failed(
|
||||
self, *, run_id: str, finished_at: datetime, error: str
|
||||
) -> None:
|
||||
"""Mark RUNNING → FAILED (retry pending)."""
|
||||
await self._update_status(run_id, RunStatus.FAILED, finished_at, error)
|
||||
|
||||
async def mark_dead_letter(
|
||||
self, *, run_id: str, finished_at: datetime, error: str
|
||||
) -> None:
|
||||
"""Mark RUNNING → DEAD_LETTER (retries exhausted or non-retryable)."""
|
||||
await self._update_status(run_id, RunStatus.DEAD_LETTER, finished_at, error)
|
||||
|
||||
async def mark_crashed(
|
||||
self, *, run_id: str, finished_at: datetime, error: str
|
||||
) -> None:
|
||||
"""Mark RUNNING → CRASHED (called by crash-recovery sweep)."""
|
||||
await self._update_status(run_id, RunStatus.CRASHED, finished_at, error)
|
||||
|
||||
async def _update_status(
|
||||
self,
|
||||
run_id: str,
|
||||
status: RunStatus,
|
||||
finished_at: datetime,
|
||||
error: str | None,
|
||||
) -> None:
|
||||
async with self._storage.connect() as conn:
|
||||
await conn.execute(
|
||||
"UPDATE run_record "
|
||||
"SET status = ?, finished_at = ?, error = ? "
|
||||
"WHERE run_id = ?",
|
||||
(status.value, to_iso_format(finished_at), error, run_id),
|
||||
)
|
||||
await conn.commit()
|
||||
|
||||
async def get(self, run_id: str) -> RunRecord | None:
|
||||
"""Return the record for ``run_id`` (``None`` if absent)."""
|
||||
async with self._storage.connect() as conn:
|
||||
cur = await conn.execute(
|
||||
_SELECT_COLUMNS + " WHERE run_id = ?",
|
||||
(run_id,),
|
||||
)
|
||||
row = await cur.fetchone()
|
||||
return _row_to_record(row) if row else None
|
||||
|
||||
async def list_runs(
|
||||
self,
|
||||
*,
|
||||
strategy_name: str,
|
||||
status: RunStatus | None = None,
|
||||
limit: int = 100,
|
||||
) -> list[RunRecord]:
|
||||
"""Return ``strategy_name``'s records, newest first; optional status filter."""
|
||||
sql = _SELECT_COLUMNS + " WHERE strategy_name = ?"
|
||||
args: list[Any] = [strategy_name]
|
||||
if status is not None:
|
||||
sql += " AND status = ?"
|
||||
args.append(status.value)
|
||||
sql += " ORDER BY started_at DESC LIMIT ?"
|
||||
args.append(limit)
|
||||
async with self._storage.connect() as conn:
|
||||
cur = await conn.execute(sql, args)
|
||||
rows = await cur.fetchall()
|
||||
return [_row_to_record(r) for r in rows]
|
||||
|
||||
async def find_running(self) -> list[RunRecord]:
|
||||
"""Return every row still in RUNNING — used by crash recovery at start()."""
|
||||
async with self._storage.connect() as conn:
|
||||
cur = await conn.execute(
|
||||
_SELECT_COLUMNS + " WHERE status = ?",
|
||||
(RunStatus.RUNNING.value,),
|
||||
)
|
||||
rows = await cur.fetchall()
|
||||
return [_row_to_record(r) for r in rows]
|
||||
|
||||
|
||||
_SELECT_COLUMNS = (
|
||||
"SELECT run_id, strategy_name, status, attempt, started_at, finished_at, "
|
||||
" error, event_topic, event_payload, max_retries_snapshot "
|
||||
"FROM run_record"
|
||||
)
|
||||
|
||||
|
||||
def _row_to_record(row: tuple) -> RunRecord:
|
||||
return RunRecord(
|
||||
run_id=row[0],
|
||||
strategy_name=row[1],
|
||||
status=RunStatus(row[2]),
|
||||
attempt=row[3],
|
||||
started_at=from_iso_format(row[4]),
|
||||
finished_at=from_iso_format(row[5]) if row[5] else None,
|
||||
error=row[6],
|
||||
event_topic=row[7],
|
||||
event_payload=row[8],
|
||||
max_retries_snapshot=row[9],
|
||||
)
|
||||
115
src/everos/infra/ome/_stores/storage.py
Normal file
115
src/everos/infra/ome/_stores/storage.py
Normal file
@ -0,0 +1,115 @@
|
||||
"""OME SQLite storage — schema initialization + connection factory.
|
||||
|
||||
Single file (default ``MemoryRoot.default().ome_db`` ≡
|
||||
``<memory-root>/.index/sqlite/ome.db``). Holds 3 OME-managed tables
|
||||
(counter_store / idle_store / run_record); APS jobstore table is created
|
||||
by APScheduler itself when its SQLAlchemyJobStore connects.
|
||||
|
||||
PRAGMA scopes (see https://www.sqlite.org/pragma.html):
|
||||
- ``journal_mode=WAL`` is file-level — persisted in the db header,
|
||||
applied once in :meth:`OMEStorage.init`.
|
||||
- ``synchronous=NORMAL``, ``cache_size=-65536``, ``busy_timeout=5000``
|
||||
are connection-level and reset on every new connection, so they are
|
||||
re-applied inside :meth:`OMEStorage.connect` (which is why
|
||||
``connect`` is an ``@asynccontextmanager`` rather than a passthrough).
|
||||
This mirrors SQLAlchemy's canonical ``@event.listens_for(Engine,
|
||||
"connect")`` pattern for SQLite — aiosqlite exposes no equivalent
|
||||
hook. ``busy_timeout=5000`` matters because the APS jobstore writes
|
||||
its own table in the same db file; without it, WAL writer-vs-writer
|
||||
contention surfaces as ``SQLITE_BUSY`` instead of brief backoff.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
import aiosqlite
|
||||
|
||||
_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS counter_store (
|
||||
strategy_name TEXT NOT NULL,
|
||||
bucket_key TEXT NOT NULL,
|
||||
counter INTEGER NOT NULL DEFAULT 0,
|
||||
last_passed_ts TIMESTAMP,
|
||||
PRIMARY KEY (strategy_name, bucket_key)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS idle_store (
|
||||
strategy_name TEXT NOT NULL,
|
||||
bucket_key TEXT NOT NULL,
|
||||
last_activity_ts TIMESTAMP NOT NULL,
|
||||
PRIMARY KEY (strategy_name, bucket_key)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_idle_scan
|
||||
ON idle_store (strategy_name, last_activity_ts);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS run_record (
|
||||
run_id TEXT PRIMARY KEY,
|
||||
strategy_name TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
attempt INTEGER NOT NULL DEFAULT 0,
|
||||
started_at TIMESTAMP NOT NULL,
|
||||
finished_at TIMESTAMP,
|
||||
error TEXT,
|
||||
event_topic TEXT NOT NULL,
|
||||
event_payload TEXT NOT NULL,
|
||||
max_retries_snapshot INTEGER NOT NULL
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_run_strategy_started
|
||||
ON run_record (strategy_name, started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_run_status_started
|
||||
ON run_record (status, started_at DESC);
|
||||
"""
|
||||
|
||||
_INIT_PRAGMAS = ("PRAGMA journal_mode=WAL",)
|
||||
_CONN_PRAGMAS = (
|
||||
"PRAGMA synchronous=NORMAL",
|
||||
"PRAGMA cache_size=-65536",
|
||||
"PRAGMA busy_timeout=5000",
|
||||
)
|
||||
|
||||
|
||||
class OMEStorage:
|
||||
"""Connection factory + schema init for the OME SQLite db."""
|
||||
|
||||
def __init__(self, db_path: Path) -> None:
|
||||
self.db_path = db_path
|
||||
|
||||
async def init(self) -> None:
|
||||
"""Create parent dirs + apply file-level pragmas + create schema."""
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
async with aiosqlite.connect(self.db_path) as conn:
|
||||
for pragma in _INIT_PRAGMAS:
|
||||
await conn.execute(pragma)
|
||||
await conn.executescript(_SCHEMA)
|
||||
await conn.commit()
|
||||
|
||||
@asynccontextmanager
|
||||
async def connect(self) -> AsyncIterator[aiosqlite.Connection]:
|
||||
"""Yield an aiosqlite connection with per-connection pragmas applied."""
|
||||
async with aiosqlite.connect(self.db_path) as conn:
|
||||
for pragma in _CONN_PRAGMAS:
|
||||
await conn.execute(pragma)
|
||||
yield conn
|
||||
|
||||
@asynccontextmanager
|
||||
async def transaction(self) -> AsyncIterator[aiosqlite.Connection]:
|
||||
"""Yield a connection inside an ``IMMEDIATE`` transaction.
|
||||
|
||||
Commits on success, rolls back on any exception. Mirrors
|
||||
SQLAlchemy's ``conn.begin()`` for raw aiosqlite, which exposes
|
||||
no built-in transaction context manager. ``BEGIN IMMEDIATE``
|
||||
(rather than ``DEFERRED``) acquires the write lock upfront so
|
||||
a read-modify-write block cannot lose to a competing writer
|
||||
between its SELECT and its UPDATE.
|
||||
"""
|
||||
async with self.connect() as conn:
|
||||
try:
|
||||
await conn.execute("BEGIN IMMEDIATE")
|
||||
yield conn
|
||||
await conn.commit()
|
||||
except Exception:
|
||||
await conn.rollback()
|
||||
raise
|
||||
157
src/everos/infra/ome/config.py
Normal file
157
src/everos/infra/ome/config.py
Normal file
@ -0,0 +1,157 @@
|
||||
"""OMEConfig (engine-level) + TomlRoot (per-strategy override schema).
|
||||
|
||||
All models forbid extra keys so configuration typos surface at startup
|
||||
as StartupValidationError instead of being silently ignored.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Self
|
||||
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
|
||||
|
||||
from everos.core.persistence.memory_root import MemoryRoot
|
||||
|
||||
|
||||
def _default_jobstore_path() -> Path:
|
||||
return MemoryRoot.default().ome_db
|
||||
|
||||
|
||||
class CounterOverride(BaseModel):
|
||||
"""TOML override for a strategy's Counter gate (per-key None means keep)."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
threshold: Annotated[int, Field(gt=0)] | None = None
|
||||
cooldown_seconds: Annotated[int, Field(ge=0)] | None = None
|
||||
event_field: Annotated[str, Field(min_length=1)] | None = None
|
||||
|
||||
|
||||
class StrategyOverride(BaseModel):
|
||||
"""TOML override for one strategy's decorator parameters."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
enabled: bool | None = None
|
||||
max_retries: Annotated[int, Field(ge=0)] | None = None
|
||||
gate: CounterOverride | None = None
|
||||
cron: str | None = None
|
||||
idle_seconds: Annotated[int, Field(gt=0)] | None = None
|
||||
scan_interval_seconds: Annotated[int, Field(gt=0)] | None = None
|
||||
|
||||
@field_validator("cron")
|
||||
@classmethod
|
||||
def _validate_crontab(cls, v: str | None) -> str | None:
|
||||
if v is not None:
|
||||
CronTrigger.from_crontab(v)
|
||||
return v
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _check_idle_pair_consistency(self) -> Self:
|
||||
# One-sided overrides are merged with existing meta downstream,
|
||||
# so cross-check only when both fields are in this payload.
|
||||
if (
|
||||
self.idle_seconds is not None
|
||||
and self.scan_interval_seconds is not None
|
||||
and self.scan_interval_seconds > self.idle_seconds // 2
|
||||
):
|
||||
raise ValueError(
|
||||
"StrategyOverride: scan_interval_seconds "
|
||||
f"({self.scan_interval_seconds}) must be <= idle_seconds // 2 "
|
||||
f"({self.idle_seconds // 2})"
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
class TomlRoot(BaseModel):
|
||||
"""Top-level TOML schema for ome.toml."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
strategies: dict[str, StrategyOverride] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class OMEConfig(BaseModel):
|
||||
"""Engine-level configuration consumed by OfflineEngine."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
jobstore_path: Path = Field(
|
||||
default_factory=_default_jobstore_path,
|
||||
description="SQLite DB path holding OME's own state (run records, "
|
||||
"counter store, idle store). Defaults to "
|
||||
"``MemoryRoot.default().ome_db`` (``<memory-root>/.index/sqlite/ome.db``).",
|
||||
)
|
||||
aps_jobstore_path: Path | None = Field(
|
||||
default=None,
|
||||
description="SQLite DB path holding the APScheduler jobstore. Kept "
|
||||
"in a separate file from ``jobstore_path`` so APS's sync SQLAlchemy "
|
||||
"writer never contends with OME's async aiosqlite writer for the "
|
||||
"same SQLite file lock. When unset, defaults to a sibling "
|
||||
"``<stem>.aps.db`` next to ``jobstore_path``.",
|
||||
)
|
||||
max_concurrent_runs: Annotated[
|
||||
int,
|
||||
Field(
|
||||
gt=0,
|
||||
description="Engine-wide cap on concurrent strategy invocations "
|
||||
"(asyncio.Semaphore in Runner).",
|
||||
),
|
||||
] = 20
|
||||
max_retries: Annotated[
|
||||
int,
|
||||
Field(
|
||||
ge=0,
|
||||
description="Default retry budget per run, overridable via "
|
||||
"@offline_strategy(max_retries=...) or StrategyOverride.max_retries. "
|
||||
"0 disables retries.",
|
||||
),
|
||||
] = 1
|
||||
max_records_per_strategy: Annotated[
|
||||
int,
|
||||
Field(
|
||||
gt=0,
|
||||
description="Per-strategy RunRecord ring-buffer size; oldest "
|
||||
"entries are pruned on insert.",
|
||||
),
|
||||
] = 1000
|
||||
crash_recovery_timeout_seconds: Annotated[
|
||||
int,
|
||||
Field(
|
||||
gt=0,
|
||||
description="A run lingering in RUNNING longer than this is "
|
||||
"treated as crashed, marked CRASHED, and re-enqueued with a "
|
||||
"fresh run_id.",
|
||||
),
|
||||
] = 1800
|
||||
config_path: Path | None = Field(
|
||||
default=None,
|
||||
description="Path to ome.toml for per-strategy overrides. None "
|
||||
"disables TOML-driven hot reload.",
|
||||
)
|
||||
config_watch: bool = Field(
|
||||
default=True,
|
||||
description="When true and config_path is set, watch the file for "
|
||||
"edits and apply overrides at runtime.",
|
||||
)
|
||||
config_watch_debounce_ms: Annotated[
|
||||
int,
|
||||
Field(
|
||||
gt=0,
|
||||
description="Debounce window collapsing bursts of filesystem "
|
||||
"events (e.g. editor saves) into one reload.",
|
||||
),
|
||||
] = 1600
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _derive_aps_jobstore_path(self) -> Self:
|
||||
# When unset, materialize as a sibling of jobstore_path so callers
|
||||
# that pass only jobstore_path (e.g. tests using tmp_path) still get
|
||||
# an isolated APS db rather than the global default root.
|
||||
if self.aps_jobstore_path is None:
|
||||
self.aps_jobstore_path = self.jobstore_path.with_name(
|
||||
self.jobstore_path.stem + ".aps.db"
|
||||
)
|
||||
return self
|
||||
33
src/everos/infra/ome/context.py
Normal file
33
src/everos/infra/ome/context.py
Normal file
@ -0,0 +1,33 @@
|
||||
"""StrategyContext Protocol — injected as second arg to every strategy.
|
||||
|
||||
Strategies access run-local state through `run_id` and `logger`, and
|
||||
chain-emit follow-up events via `emit(event)`. Business IO is NOT mediated
|
||||
by this Protocol — strategies directly import their persistence adapters
|
||||
(memory → infra is allowed under the project's DDD layering).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Protocol
|
||||
|
||||
from structlog.types import FilteringBoundLogger
|
||||
|
||||
from everos.infra.ome.events import BaseEvent
|
||||
|
||||
|
||||
class StrategyContext(Protocol):
|
||||
"""Per-run context handed to a strategy function.
|
||||
|
||||
- run_id: the current RunRecord id (string).
|
||||
- logger: structlog logger; ``strategy_name`` / ``run_id`` /
|
||||
``attempt`` are auto-injected into every log record in this call
|
||||
— strategies don't have to use this specific logger to get those
|
||||
fields.
|
||||
- emit(event): chain-emit a follow-up event (must be in decorator's
|
||||
``emits=[...]``, else EmitNotDeclaredError).
|
||||
"""
|
||||
|
||||
run_id: str
|
||||
logger: FilteringBoundLogger
|
||||
|
||||
async def emit(self, event: BaseEvent) -> None: ...
|
||||
69
src/everos/infra/ome/decorator.py
Normal file
69
src/everos/infra/ome/decorator.py
Normal file
@ -0,0 +1,69 @@
|
||||
"""@offline_strategy decorator — attaches StrategyMeta to the function.
|
||||
|
||||
Decorator is side-effect-free; engine collects via explicit
|
||||
`engine.register(func)`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
from collections.abc import Awaitable, Callable
|
||||
from dataclasses import dataclass
|
||||
|
||||
from everos.infra.ome.context import StrategyContext
|
||||
from everos.infra.ome.events import BaseEvent
|
||||
from everos.infra.ome.gates import Counter
|
||||
from everos.infra.ome.triggers import Trigger
|
||||
|
||||
type AppliesTo = str | Callable[[BaseEvent], bool] | None
|
||||
type StrategyFn = Callable[[BaseEvent, StrategyContext], Awaitable[None]]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StrategyMeta:
|
||||
"""Captured at decoration time; consumed by engine.register()."""
|
||||
|
||||
name: str
|
||||
trigger: Trigger
|
||||
emits: frozenset[type[BaseEvent]]
|
||||
applies_to: AppliesTo
|
||||
gate: Counter | None
|
||||
max_retries: int | None
|
||||
enabled: bool
|
||||
func: StrategyFn
|
||||
|
||||
|
||||
def offline_strategy(
|
||||
*,
|
||||
name: str,
|
||||
trigger: Trigger,
|
||||
emits: list[type[BaseEvent]],
|
||||
applies_to: AppliesTo = None,
|
||||
gate: Counter | None = None,
|
||||
max_retries: int | None = None,
|
||||
enabled: bool = True,
|
||||
) -> Callable[[StrategyFn], StrategyFn]:
|
||||
"""Mark an async function as an OME strategy."""
|
||||
|
||||
if not name or not name.strip():
|
||||
raise ValueError("offline_strategy: name must be a non-empty string")
|
||||
|
||||
def wrap(func: StrategyFn) -> StrategyFn:
|
||||
if not inspect.iscoroutinefunction(func):
|
||||
raise TypeError(
|
||||
f"offline_strategy: {func.__name__} must be async (coroutine function)"
|
||||
)
|
||||
meta = StrategyMeta(
|
||||
name=name,
|
||||
trigger=trigger,
|
||||
emits=frozenset(emits),
|
||||
applies_to=applies_to,
|
||||
gate=gate,
|
||||
max_retries=max_retries,
|
||||
enabled=enabled,
|
||||
func=func,
|
||||
)
|
||||
func._ome_strategy_meta = meta # type: ignore[attr-defined]
|
||||
return func
|
||||
|
||||
return wrap
|
||||
797
src/everos/infra/ome/engine.py
Normal file
797
src/everos/infra/ome/engine.py
Normal file
@ -0,0 +1,797 @@
|
||||
"""OfflineEngine — OME runtime and scheduler.
|
||||
|
||||
Manages strategy registration, start-stop lifecycle, event dispatch, and
|
||||
scheduling of Cron and Idle triggers via APScheduler. Enforces single-engine
|
||||
guard via portalocker for concurrent access safety.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
import inspect
|
||||
from collections.abc import Callable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
import portalocker
|
||||
from apscheduler.executors.asyncio import AsyncIOExecutor
|
||||
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome._background.config_reloader import ConfigReloader
|
||||
from everos.infra.ome._background.crash_recovery import scan_and_resume
|
||||
from everos.infra.ome._background.idle_scanner import IdleScanner
|
||||
from everos.infra.ome._dispatch._state import _CURRENT_STRATEGY
|
||||
from everos.infra.ome._dispatch.dispatcher import EventDispatcher
|
||||
from everos.infra.ome._dispatch.registry import StrategyRegistry
|
||||
from everos.infra.ome._dispatch.runner import Runner
|
||||
from everos.infra.ome._stores.counter import CounterStore
|
||||
from everos.infra.ome._stores.idle import IdleStore
|
||||
from everos.infra.ome._stores.run_record import RunRecordStore
|
||||
from everos.infra.ome._stores.storage import OMEStorage
|
||||
from everos.infra.ome.config import OMEConfig
|
||||
from everos.infra.ome.decorator import StrategyMeta
|
||||
from everos.infra.ome.events import BaseEvent, CronTick, ManualTick, resolve_topic
|
||||
from everos.infra.ome.exceptions import (
|
||||
EngineCallFromStrategyError,
|
||||
EngineLockHeldError,
|
||||
OMEError,
|
||||
)
|
||||
from everos.infra.ome.records import RunRecord, RunStatus, StrategyRouteInfo
|
||||
from everos.infra.ome.triggers import Cron, Idle
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_ENGINES: dict[str, OfflineEngine] = {}
|
||||
|
||||
|
||||
def _refuse_inside_strategy(method: Any) -> Any:
|
||||
"""Raise :class:`EngineCallFromStrategyError` when called from a strategy.
|
||||
|
||||
Strategies must interact with the engine only via the ``(event, ctx)``
|
||||
parameters Runner provides; direct calls bypass the declared
|
||||
``emits=[...]`` contract enforced by ``ctx.emit``. Wraps sync and async
|
||||
methods alike.
|
||||
"""
|
||||
if inspect.iscoroutinefunction(method):
|
||||
|
||||
@functools.wraps(method)
|
||||
async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
||||
current = _CURRENT_STRATEGY.get()
|
||||
if current is not None:
|
||||
raise EngineCallFromStrategyError(
|
||||
strategy=current.name, method=method.__name__
|
||||
)
|
||||
return await method(self, *args, **kwargs)
|
||||
|
||||
return async_wrapper
|
||||
|
||||
@functools.wraps(method)
|
||||
def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
||||
current = _CURRENT_STRATEGY.get()
|
||||
if current is not None:
|
||||
raise EngineCallFromStrategyError(
|
||||
strategy=current.name, method=method.__name__
|
||||
)
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return sync_wrapper
|
||||
|
||||
|
||||
async def _runner_entry(
|
||||
engine_id: str,
|
||||
strategy_name: str,
|
||||
run_id: str,
|
||||
event_topic: str,
|
||||
event_payload: str,
|
||||
max_retries_snapshot: int,
|
||||
) -> None:
|
||||
"""Module-level APS jobstore callback for a single run.
|
||||
|
||||
Looks the engine up by id and hands off to
|
||||
:meth:`OfflineEngine.dispatch_run`. Pickle-safe (no closures, no
|
||||
bound methods captured into APS jobstore args).
|
||||
"""
|
||||
engine = _ENGINES.get(engine_id)
|
||||
if engine is None:
|
||||
logger.error(
|
||||
"no_engine_for_runner",
|
||||
engine_id=engine_id,
|
||||
run_id=run_id,
|
||||
)
|
||||
return
|
||||
await engine.dispatch_run(
|
||||
strategy_name=strategy_name,
|
||||
run_id=run_id,
|
||||
event_topic=event_topic,
|
||||
event_payload=event_payload,
|
||||
max_retries_snapshot=max_retries_snapshot,
|
||||
)
|
||||
|
||||
|
||||
async def _cron_entry(engine_id: str, strategy_name: str) -> None:
|
||||
"""Module-level APS jobstore callback for Cron triggers.
|
||||
|
||||
Looks the engine up by id and emits ``CronTick`` so the event flows
|
||||
back through the standard dispatch pipeline.
|
||||
"""
|
||||
engine = _ENGINES.get(engine_id)
|
||||
if engine is None:
|
||||
logger.error(
|
||||
"no_engine_for_cron",
|
||||
engine_id=engine_id,
|
||||
strategy_name=strategy_name,
|
||||
)
|
||||
return
|
||||
await engine.emit(CronTick(strategy_name=strategy_name))
|
||||
|
||||
|
||||
async def _idle_entry(engine_id: str, strategy_name: str) -> None:
|
||||
"""Module-level APS jobstore callback for Idle IntervalTriggers.
|
||||
|
||||
Looks the engine up by id and hands off to
|
||||
:meth:`OfflineEngine.run_idle_scan`.
|
||||
"""
|
||||
engine = _ENGINES.get(engine_id)
|
||||
if engine is None:
|
||||
logger.error(
|
||||
"no_engine_for_idle",
|
||||
engine_id=engine_id,
|
||||
strategy_name=strategy_name,
|
||||
)
|
||||
return
|
||||
await engine.run_idle_scan(strategy_name)
|
||||
|
||||
|
||||
class OfflineEngine:
|
||||
"""Offline Memory Engine — orchestrates strategy registration, scheduling,
|
||||
and event dispatch.
|
||||
|
||||
Lifecycle::
|
||||
|
||||
engine = OfflineEngine(config=cfg)
|
||||
engine.register(my_strategy) # before start()
|
||||
engine.on_dead_letter(cb) # before start()
|
||||
await engine.start() # acquires file lock, boots scheduler
|
||||
await engine.emit(SomeEvent(...)) # fan out through dispatcher
|
||||
await engine.stop() # graceful shutdown
|
||||
|
||||
Single-process invariant: a file lock on
|
||||
``<jobstore_path>.lock`` guarantees at most one engine per jobstore
|
||||
at any time (cross-process safe via ``portalocker``).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
config: OMEConfig,
|
||||
) -> None:
|
||||
self._config = config
|
||||
self._registry = StrategyRegistry()
|
||||
self._storage = OMEStorage(db_path=config.jobstore_path)
|
||||
self._lock_handle: Any = None
|
||||
self._started = False
|
||||
self._on_dead_letter: Callable[[RunRecord], None] | None = None
|
||||
|
||||
# late-bound (set in start())
|
||||
self._counter_store: CounterStore | None = None
|
||||
self._run_record_store: RunRecordStore | None = None
|
||||
self._dispatcher: EventDispatcher | None = None
|
||||
self._runner: Runner | None = None
|
||||
self._engine_sem: asyncio.Semaphore | None = None
|
||||
self._idle_store: IdleStore | None = None
|
||||
self._engine_id = uuid4().hex
|
||||
self._scheduler: AsyncIOScheduler | None = None
|
||||
self._config_reloader: ConfigReloader | None = None
|
||||
|
||||
# In-flight strategy-run accounting. Incremented at the moment a
|
||||
# run is enqueued onto APS (so callers that emit-then-wait observe
|
||||
# a non-zero count immediately), decremented in dispatch_run's
|
||||
# finally. APS 3.x AsyncIOExecutor.shutdown(wait=True) does NOT
|
||||
# honor wait for async coroutines (see apscheduler/executors/
|
||||
# asyncio.py:24); this counter is how stop() / drain() learn the
|
||||
# engine is genuinely idle.
|
||||
self._active_runs = 0
|
||||
self._idle_event: asyncio.Event | None = None
|
||||
|
||||
def register(self, func: Callable[..., Any]) -> None:
|
||||
"""Register a strategy decorated with :func:`offline_strategy`.
|
||||
|
||||
Must be called before :meth:`start`; registering after start raises
|
||||
:class:`OMEError` because the scheduler has already snapshotted
|
||||
the strategy set for Cron / Idle job creation.
|
||||
"""
|
||||
if self._started:
|
||||
raise OMEError("register: cannot register after start()")
|
||||
self._registry.register(func)
|
||||
|
||||
@_refuse_inside_strategy
|
||||
def reschedule_cron_job(self, name: str, expr: str) -> None:
|
||||
"""Reschedule a Cron strategy's APScheduler job to a new crontab.
|
||||
|
||||
APS reschedule_job is atomic: on success, pending invocations are
|
||||
recomputed against the new trigger; on failure it raises and APS
|
||||
state is unchanged, so callers can roll back paired registry
|
||||
mutations.
|
||||
"""
|
||||
if self._scheduler is None:
|
||||
raise OMEError("reschedule_cron_job: engine not started")
|
||||
self._scheduler.reschedule_job(
|
||||
job_id=f"cron::{name}",
|
||||
trigger=CronTrigger.from_crontab(expr),
|
||||
)
|
||||
|
||||
@_refuse_inside_strategy
|
||||
def reschedule_idle_job(self, name: str, scan_interval_seconds: int) -> None:
|
||||
"""Reschedule an Idle strategy's APScheduler scan job to a new interval."""
|
||||
if self._scheduler is None:
|
||||
raise OMEError("reschedule_idle_job: engine not started")
|
||||
self._scheduler.reschedule_job(
|
||||
job_id=f"idle::{name}",
|
||||
trigger=IntervalTrigger(seconds=scan_interval_seconds),
|
||||
)
|
||||
|
||||
def on_dead_letter(self, callback: Callable[[RunRecord], None]) -> None:
|
||||
"""Register a callback invoked after a run is marked DEAD_LETTER.
|
||||
|
||||
Must be set before start(); calls after start() are silently ignored
|
||||
(logged at WARNING) to avoid racing with the already-instantiated
|
||||
Runner that captured a snapshot of the callback. If called multiple
|
||||
times before start(), only the last callback wins (no chaining).
|
||||
"""
|
||||
if self._started:
|
||||
logger.warning("on_dead_letter_after_start_ignored")
|
||||
return
|
||||
self._on_dead_letter = callback
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Boot the engine: acquire the jobstore lock, validate the strategy
|
||||
DAG, wire up late-bound stores, launch APScheduler, run crash
|
||||
recovery, register Cron / Idle jobs, and optionally start the
|
||||
config-reloader.
|
||||
|
||||
Idempotent: a second call while running is a no-op. On failure,
|
||||
every partially-initialised resource (lock, scheduler thread,
|
||||
:data:`_ENGINES` slot, config reloader) is rolled back so a retry
|
||||
starts from a clean state.
|
||||
"""
|
||||
if self._started:
|
||||
return
|
||||
await self._storage.init()
|
||||
self._acquire_lock()
|
||||
try:
|
||||
self._registry.validate()
|
||||
self._init_components()
|
||||
self._idle_event = asyncio.Event()
|
||||
self._idle_event.set()
|
||||
self._launch_scheduler()
|
||||
_ENGINES[self._engine_id] = self
|
||||
await self._run_crash_recovery()
|
||||
self._register_scheduled_jobs()
|
||||
self._start_config_reloader()
|
||||
self._started = True
|
||||
except Exception:
|
||||
await self._rollback_partial_start()
|
||||
raise
|
||||
|
||||
def _init_components(self) -> None:
|
||||
"""Instantiate stores / dispatcher / runner / semaphore.
|
||||
|
||||
Called from :meth:`start` after the file lock is held and DAG
|
||||
validation passed; never from anywhere else.
|
||||
"""
|
||||
self._counter_store = CounterStore(storage=self._storage)
|
||||
self._run_record_store = RunRecordStore(
|
||||
storage=self._storage,
|
||||
max_records_per_strategy=self._config.max_records_per_strategy,
|
||||
)
|
||||
self._dispatcher = EventDispatcher(
|
||||
registry=self._registry,
|
||||
counter_store=self._counter_store,
|
||||
)
|
||||
self._engine_sem = asyncio.Semaphore(self._config.max_concurrent_runs)
|
||||
self._runner = Runner(
|
||||
run_record_store=self._run_record_store,
|
||||
engine_sem=self._engine_sem,
|
||||
emit_hook=self._dispatch_event,
|
||||
on_dead_letter=self._on_dead_letter,
|
||||
)
|
||||
self._idle_store = IdleStore(storage=self._storage)
|
||||
|
||||
def _launch_scheduler(self) -> None:
|
||||
"""Wire up AsyncIOScheduler + SQLAlchemyJobStore and start it.
|
||||
|
||||
The APS jobstore lives in its own SQLite file
|
||||
(``aps_jobstore_path``) so APS's sync SQLAlchemy writes never
|
||||
contend with OME's async aiosqlite writes for the same file lock
|
||||
— both writers had previously raced on a single ``ome.db`` and
|
||||
manifested as flaky ``SQLITE_BUSY: database is locked`` during
|
||||
concurrent strategy dispatch.
|
||||
"""
|
||||
self._scheduler = AsyncIOScheduler(
|
||||
jobstores={
|
||||
"default": SQLAlchemyJobStore(
|
||||
url=f"sqlite:///{self._config.aps_jobstore_path}",
|
||||
),
|
||||
},
|
||||
executors={"default": AsyncIOExecutor()},
|
||||
)
|
||||
self._scheduler.start()
|
||||
|
||||
async def _run_crash_recovery(self) -> None:
|
||||
"""Scan ``run_record`` for stale RUNNING rows and re-enqueue them.
|
||||
|
||||
Treats rows whose ``started_at`` is older than
|
||||
``crash_recovery_timeout_seconds`` as crashes from a previous
|
||||
engine session: they are marked CRASHED and re-added to APS with
|
||||
a fresh ``run_id`` reusing the original event payload.
|
||||
"""
|
||||
await scan_and_resume(
|
||||
run_record_store=self._run_record_store,
|
||||
timeout_seconds=self._config.crash_recovery_timeout_seconds,
|
||||
add_job=self._enqueue_recovery_job,
|
||||
)
|
||||
|
||||
async def _enqueue_recovery_job(
|
||||
self,
|
||||
name: str,
|
||||
run_id: str,
|
||||
event_topic: str,
|
||||
event_payload: str,
|
||||
max_retries: int,
|
||||
) -> None:
|
||||
"""Add one APS job for a re-enqueued crashed run (callback for
|
||||
:func:`scan_and_resume`).
|
||||
|
||||
Same enqueue-time bookkeeping as :meth:`_enqueue_run`: the run
|
||||
will reach :meth:`dispatch_run` like any other, so the +1/-1
|
||||
pair must wrap the ``add_job`` call here too.
|
||||
"""
|
||||
self._on_run_enqueued()
|
||||
try:
|
||||
self._scheduler.add_job(
|
||||
_runner_entry,
|
||||
trigger="date",
|
||||
run_date=get_utc_now(),
|
||||
args=[
|
||||
self._engine_id,
|
||||
name,
|
||||
run_id,
|
||||
event_topic,
|
||||
event_payload,
|
||||
max_retries,
|
||||
],
|
||||
id=run_id,
|
||||
replace_existing=False,
|
||||
misfire_grace_time=None, # type: ignore[arg-type] # APS accepts None ("no expiry"); stub omits it (apscheduler/job.py:213)
|
||||
)
|
||||
except Exception:
|
||||
self._on_run_completed()
|
||||
raise
|
||||
|
||||
def _register_scheduled_jobs(self) -> None:
|
||||
"""Add Cron / Idle APS jobs for every strategy with such a trigger.
|
||||
|
||||
Immediate-trigger strategies have nothing scheduled here — they
|
||||
fire only when their declared event class is dispatched.
|
||||
"""
|
||||
for meta in self._registry.all():
|
||||
if isinstance(meta.trigger, Cron):
|
||||
self._scheduler.add_job(
|
||||
_cron_entry,
|
||||
trigger=CronTrigger.from_crontab(meta.trigger.expr),
|
||||
args=[self._engine_id, meta.name],
|
||||
id=f"cron::{meta.name}",
|
||||
replace_existing=True,
|
||||
)
|
||||
elif isinstance(meta.trigger, Idle):
|
||||
self._scheduler.add_job(
|
||||
_idle_entry,
|
||||
trigger=IntervalTrigger(seconds=meta.trigger.scan_interval_seconds),
|
||||
args=[self._engine_id, meta.name],
|
||||
id=f"idle::{meta.name}",
|
||||
replace_existing=True,
|
||||
)
|
||||
|
||||
def _start_config_reloader(self) -> None:
|
||||
"""Start :class:`ConfigReloader` iff ``config_watch`` is on and a
|
||||
``config_path`` is provided.
|
||||
"""
|
||||
if self._config.config_watch and self._config.config_path is not None:
|
||||
self._config_reloader = ConfigReloader(
|
||||
config_path=self._config.config_path,
|
||||
registry=self._registry,
|
||||
engine=self,
|
||||
debounce_ms=self._config.config_watch_debounce_ms,
|
||||
)
|
||||
self._config_reloader.start()
|
||||
|
||||
async def _rollback_partial_start(self) -> None:
|
||||
"""Reverse-order cleanup of whatever :meth:`start` had already
|
||||
wired up before the failure: stop reloader, drain in-flight runs
|
||||
(best-effort, short timeout — startup failure shouldn't block on
|
||||
recovery jobs), shut the scheduler, drop ``_ENGINES`` slot, and
|
||||
release the file lock.
|
||||
|
||||
Same ``wait_idle → shutdown(wait=False)`` order as :meth:`stop`
|
||||
for the same reasons (pause would freeze recovery jobs that
|
||||
already own a +1).
|
||||
"""
|
||||
if self._config_reloader is not None:
|
||||
try:
|
||||
await self._config_reloader.stop()
|
||||
finally:
|
||||
self._config_reloader = None
|
||||
if self._scheduler is not None:
|
||||
try:
|
||||
await self.wait_idle(timeout=5.0)
|
||||
self._scheduler.shutdown(wait=False)
|
||||
finally:
|
||||
self._scheduler = None
|
||||
_ENGINES.pop(self._engine_id, None)
|
||||
self._release_lock()
|
||||
self._idle_event = None
|
||||
self._active_runs = 0
|
||||
|
||||
async def wait_idle(self, *, timeout: float = 30.0) -> bool: # noqa: ASYNC109
|
||||
"""Block until every in-flight strategy run has settled.
|
||||
|
||||
Returns ``True`` on idle, ``False`` if ``timeout`` elapses with
|
||||
runs still active. "In flight" means anywhere between
|
||||
:meth:`_enqueue_run` (which bumps the counter just before the
|
||||
``add_job`` call) and the end of :meth:`dispatch_run` (which
|
||||
releases it in ``finally``).
|
||||
|
||||
Why this exists: APS 3.x ``AsyncIOExecutor.shutdown(wait=True)``
|
||||
documents — in the executor source — that it cannot honor wait
|
||||
for async coroutines and simply cancels their futures
|
||||
(``apscheduler/executors/asyncio.py:24``). Anything depending on
|
||||
"all jobs really completed" has to drain through this counter,
|
||||
not the scheduler.
|
||||
"""
|
||||
if self._idle_event is None:
|
||||
return self._active_runs == 0
|
||||
try:
|
||||
await asyncio.wait_for(self._idle_event.wait(), timeout=timeout)
|
||||
return True
|
||||
except TimeoutError:
|
||||
return False
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Shut the engine down gracefully: stop the config reloader, drain
|
||||
in-flight strategy runs, shut the scheduler, drop the global
|
||||
``_ENGINES`` slot, and release the jobstore lock.
|
||||
|
||||
Idempotent: calling stop on an already-stopped engine is a no-op.
|
||||
|
||||
Drain ordering matters and is *deliberately* not
|
||||
``pause → wait_idle → shutdown``.
|
||||
|
||||
- We cannot ``pause()`` first: APS ``pause()`` freezes jobstore
|
||||
dispatch including jobs already enqueued (see
|
||||
``apscheduler/schedulers/base.py:pause``: "prevent the scheduler
|
||||
from waking up to do job processing"). Each such job already
|
||||
owns a +1 in ``_active_runs`` from :meth:`_enqueue_run`, so
|
||||
freezing dispatch deadlocks :meth:`wait_idle`.
|
||||
|
||||
- We cannot use ``shutdown(wait=True)``: APS 3.x
|
||||
``AsyncIOExecutor.shutdown`` documents in its own source that
|
||||
it cannot honor wait for async coroutines and cancels their
|
||||
futures (``apscheduler/executors/asyncio.py:24``). Cascade
|
||||
``CancelledError`` / "Event loop is closed" warnings follow.
|
||||
|
||||
Order used here: ``wait_idle`` first (lets APS finish dispatching
|
||||
everything in the jobstore and lets every dispatch_run release its
|
||||
counter), then ``shutdown(wait=False)`` (drops the executor cleanly
|
||||
because there is nothing left in flight).
|
||||
|
||||
``_ENGINES`` is popped only after the drain so ``_runner_entry``
|
||||
can still find this engine via its id while finishing the last
|
||||
few jobs.
|
||||
"""
|
||||
if not self._started:
|
||||
return
|
||||
if self._config_reloader is not None:
|
||||
await self._config_reloader.stop()
|
||||
self._config_reloader = None
|
||||
if self._scheduler is not None:
|
||||
drained = await self.wait_idle(timeout=30.0)
|
||||
if not drained:
|
||||
logger.warning(
|
||||
"ome_stop_drain_timeout",
|
||||
engine_id=self._engine_id,
|
||||
active_runs=self._active_runs,
|
||||
)
|
||||
self._scheduler.shutdown(wait=False)
|
||||
self._scheduler = None
|
||||
_ENGINES.pop(self._engine_id, None)
|
||||
self._release_lock()
|
||||
self._started = False
|
||||
self._idle_event = None
|
||||
self._active_runs = 0
|
||||
|
||||
def _acquire_lock(self) -> None:
|
||||
lock_path = Path(str(self._config.jobstore_path) + ".lock")
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
handle = open(lock_path, "a+") # noqa: SIM115
|
||||
portalocker.lock(handle, portalocker.LOCK_EX | portalocker.LOCK_NB)
|
||||
self._lock_handle = handle
|
||||
except portalocker.LockException as e:
|
||||
raise EngineLockHeldError(
|
||||
f"another OfflineEngine instance already holds {lock_path}"
|
||||
) from e
|
||||
|
||||
def _release_lock(self) -> None:
|
||||
if self._lock_handle is not None:
|
||||
try:
|
||||
portalocker.unlock(self._lock_handle)
|
||||
finally:
|
||||
self._lock_handle.close()
|
||||
self._lock_handle = None
|
||||
|
||||
@_refuse_inside_strategy
|
||||
async def emit(self, event: BaseEvent) -> None:
|
||||
"""Public engine event entry point.
|
||||
|
||||
Strategies must NOT call this directly; use ``ctx.emit`` instead.
|
||||
The :func:`_refuse_inside_strategy` guard raises
|
||||
:class:`EngineCallFromStrategyError` on in-strategy calls — only
|
||||
``ctx.emit`` enforces the strategy's declared ``emits=[...]``
|
||||
contract.
|
||||
"""
|
||||
await self._dispatch_event(event)
|
||||
|
||||
async def _dispatch_event(self, event: BaseEvent) -> None:
|
||||
"""Internal: actually run an event through dispatch.
|
||||
|
||||
Used by Runner's ``emit_hook`` so ``ctx.emit`` flows through
|
||||
dispatch without tripping the public-method guard.
|
||||
"""
|
||||
if not self._started:
|
||||
raise OMEError("emit: engine not started")
|
||||
# Touch idle_store for any Idle strategy listening on this event type
|
||||
# (best-effort; errors do not block dispatch)
|
||||
for meta in self._registry.all():
|
||||
if isinstance(meta.trigger, Idle) and type(event) in meta.trigger.on:
|
||||
bucket = getattr(event, meta.trigger.event_field, None)
|
||||
if bucket is not None:
|
||||
try:
|
||||
await self._idle_store.touch( # type: ignore[union-attr]
|
||||
meta.name,
|
||||
str(bucket),
|
||||
at=get_utc_now(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"idle_touch_failed",
|
||||
strategy_name=meta.name,
|
||||
event_field=meta.trigger.event_field,
|
||||
error=str(e),
|
||||
)
|
||||
routes = await self._dispatcher.dispatch(event)
|
||||
for meta, run_id in routes:
|
||||
self._enqueue_run(meta, event, run_id)
|
||||
|
||||
@_refuse_inside_strategy
|
||||
async def trigger_manual(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
event: BaseEvent | None = None,
|
||||
force: bool = False,
|
||||
) -> None:
|
||||
"""Manually trigger one strategy.
|
||||
|
||||
- ``event=None`` → engine self-emits ``ManualTick(strategy_name=name)``
|
||||
- ``force=True`` → bypass the ``enabled`` gate (``applies_to`` and
|
||||
``Counter`` still apply)
|
||||
|
||||
Routes through :meth:`EventDispatcher.dispatch` with
|
||||
``strategy_filter=name`` so the same three-gate logic is applied
|
||||
as for engine-driven dispatch.
|
||||
"""
|
||||
if not self._started:
|
||||
raise OMEError("trigger_manual: engine not started")
|
||||
if event is None:
|
||||
event = ManualTick(strategy_name=name)
|
||||
routes = await self._dispatcher.dispatch(
|
||||
event,
|
||||
force_enabled=force,
|
||||
strategy_filter=name,
|
||||
)
|
||||
for meta, run_id in routes:
|
||||
self._enqueue_run(meta, event, run_id)
|
||||
|
||||
def _enqueue_run(self, meta: StrategyMeta, event: BaseEvent, run_id: str) -> None:
|
||||
"""Add a one-shot APScheduler job that hands the event to Runner.
|
||||
|
||||
Computes ``max_retries_snapshot`` from meta or engine default and
|
||||
packages a pickle-safe args tuple — the dispatch tail shared by
|
||||
``_dispatch_event``, ``trigger_manual``, and crash recovery.
|
||||
|
||||
Counter ``self._active_runs`` is bumped *before* ``add_job`` so a
|
||||
caller that ``emit`` s then immediately ``wait_idle`` s observes a
|
||||
non-zero count; the matching decrement lives in
|
||||
:meth:`dispatch_run` (which is guaranteed to run for every job
|
||||
APS dispatches). If ``add_job`` itself raises, the counter is
|
||||
rolled back here.
|
||||
"""
|
||||
max_retries_snapshot = (
|
||||
meta.max_retries
|
||||
if meta.max_retries is not None
|
||||
else self._config.max_retries
|
||||
)
|
||||
event_topic = type(event).topic()
|
||||
self._on_run_enqueued()
|
||||
try:
|
||||
self._scheduler.add_job(
|
||||
_runner_entry,
|
||||
trigger="date",
|
||||
run_date=get_utc_now(),
|
||||
args=[
|
||||
self._engine_id,
|
||||
meta.name,
|
||||
run_id,
|
||||
event_topic,
|
||||
event.model_dump_json(),
|
||||
max_retries_snapshot,
|
||||
],
|
||||
id=run_id,
|
||||
replace_existing=False,
|
||||
misfire_grace_time=None, # type: ignore[arg-type] # APS accepts None ("no expiry"); stub omits it (apscheduler/job.py:213)
|
||||
)
|
||||
except Exception:
|
||||
self._on_run_completed()
|
||||
raise
|
||||
|
||||
def _on_run_enqueued(self) -> None:
|
||||
"""Bump in-flight count and mark the engine non-idle."""
|
||||
self._active_runs += 1
|
||||
if self._idle_event is not None:
|
||||
self._idle_event.clear()
|
||||
|
||||
def _on_run_completed(self) -> None:
|
||||
"""Drop in-flight count; mark the engine idle if the count hit zero.
|
||||
|
||||
Never lets the counter dip below zero — that would mask a bookkeeping
|
||||
bug rather than fix it, and a stuck-clear idle_event would deadlock
|
||||
``wait_idle``.
|
||||
"""
|
||||
if self._active_runs <= 0:
|
||||
logger.error(
|
||||
"active_runs_underflow",
|
||||
engine_id=self._engine_id,
|
||||
)
|
||||
self._active_runs = 0
|
||||
if self._idle_event is not None:
|
||||
self._idle_event.set()
|
||||
return
|
||||
self._active_runs -= 1
|
||||
if self._active_runs == 0 and self._idle_event is not None:
|
||||
self._idle_event.set()
|
||||
|
||||
async def dispatch_run(
|
||||
self,
|
||||
*,
|
||||
strategy_name: str,
|
||||
run_id: str,
|
||||
event_topic: str,
|
||||
event_payload: str,
|
||||
max_retries_snapshot: int,
|
||||
) -> None:
|
||||
"""APS jobstore callback target for one strategy run.
|
||||
|
||||
Public because the module-level :func:`_runner_entry` callback
|
||||
must cross the pickle boundary — a bound method on ``self`` is
|
||||
not picklable into the APS jobstore. Not part of the
|
||||
strategy-author API; intended to be called only by
|
||||
``_runner_entry`` (and crash recovery). Not guarded with
|
||||
``_refuse_inside_strategy`` because APS executors may inherit
|
||||
the calling task's ContextVar — a strategy that ``ctx.emit``s
|
||||
and triggers a cascade would falsely trip the guard here.
|
||||
|
||||
Closes the +1 the matching enqueue path opened, in ``finally``
|
||||
so cancellation, retries, and crashes all release the count.
|
||||
"""
|
||||
try:
|
||||
cls = resolve_topic(event_topic)
|
||||
event = cls.model_validate_json(event_payload)
|
||||
meta = self._registry.get(strategy_name)
|
||||
await self._runner.run(
|
||||
meta,
|
||||
event,
|
||||
run_id=run_id,
|
||||
max_retries_snapshot=max_retries_snapshot,
|
||||
)
|
||||
finally:
|
||||
self._on_run_completed()
|
||||
|
||||
async def run_idle_scan(self, strategy_name: str) -> None:
|
||||
"""APS IntervalTrigger callback target for one Idle strategy.
|
||||
|
||||
Constructs an :class:`IdleScanner` against the engine's idle_store
|
||||
and runs one scan, emitting :class:`IdleTick` for each overdue
|
||||
bucket. Public for the same APS-pickle reason as
|
||||
:meth:`dispatch_run`; unguarded for the same ContextVar-
|
||||
inheritance reason.
|
||||
"""
|
||||
meta = self._registry.get(strategy_name)
|
||||
if not isinstance(meta.trigger, Idle):
|
||||
logger.error(
|
||||
"idle_entry_bad_trigger_type",
|
||||
strategy_name=strategy_name,
|
||||
trigger_type=type(meta.trigger).__name__,
|
||||
)
|
||||
return
|
||||
scanner = IdleScanner(
|
||||
strategy_name=strategy_name,
|
||||
trigger=meta.trigger,
|
||||
idle_store=self._idle_store, # type: ignore[arg-type]
|
||||
emit=self.emit,
|
||||
)
|
||||
await scanner.scan_once()
|
||||
|
||||
@_refuse_inside_strategy
|
||||
async def inspect_dispatch(self, event: BaseEvent) -> list[StrategyRouteInfo]:
|
||||
"""Return per-strategy routing info for event (read-only).
|
||||
|
||||
Calls the dispatcher in inspect mode (no counter mutation).
|
||||
"""
|
||||
if not self._started:
|
||||
raise OMEError("inspect_dispatch: engine not started")
|
||||
return await self._dispatcher.inspect(event)
|
||||
|
||||
@_refuse_inside_strategy
|
||||
async def list_runs(
|
||||
self,
|
||||
strategy_name: str,
|
||||
*,
|
||||
status: RunStatus | None = None,
|
||||
limit: int = 100,
|
||||
) -> list[RunRecord]:
|
||||
"""Return run records for ``strategy_name``, optionally filtered by status.
|
||||
|
||||
Args:
|
||||
strategy_name: Strategy whose runs to fetch.
|
||||
status: Terminal status filter (e.g., ``RunStatus.SUCCESS``); ``None``
|
||||
returns runs in any state.
|
||||
limit: Maximum number of records to return; results are ordered
|
||||
``started_at DESC``.
|
||||
|
||||
Returns:
|
||||
Up to ``limit`` ``RunRecord`` instances, newest first.
|
||||
|
||||
Raises:
|
||||
OMEError: Engine has not been started.
|
||||
"""
|
||||
if not self._started:
|
||||
raise OMEError("list_runs: engine not started")
|
||||
return await self._run_record_store.list_runs(
|
||||
strategy_name=strategy_name,
|
||||
status=status,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
@_refuse_inside_strategy
|
||||
async def get_run_status(self, run_id: str) -> RunRecord | None:
|
||||
"""Fetch a single run record by ``run_id``.
|
||||
|
||||
Args:
|
||||
run_id: The 32-character ``uuid4().hex`` assigned at dispatch.
|
||||
|
||||
Returns:
|
||||
The matching ``RunRecord``, or ``None`` if no row exists for that id.
|
||||
|
||||
Raises:
|
||||
OMEError: Engine has not been started.
|
||||
"""
|
||||
if not self._started:
|
||||
raise OMEError("get_run_status: engine not started")
|
||||
return await self._run_record_store.get(run_id)
|
||||
78
src/everos/infra/ome/events.py
Normal file
78
src/everos/infra/ome/events.py
Normal file
@ -0,0 +1,78 @@
|
||||
"""OME event base class + built-in tick events.
|
||||
|
||||
All business events should subclass BaseEvent. OME emits three built-in
|
||||
ticks for engine-driven triggers (Cron / Idle / Manual).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
from datetime import datetime
|
||||
from functools import cache
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
|
||||
|
||||
class BaseEvent(BaseModel):
|
||||
"""Base for all events flowing through OME.
|
||||
|
||||
Subclasses must be Pydantic v2 models (immutable) so `model_dump_json` /
|
||||
`model_validate_json` work for crash-recovery payload persistence.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(frozen=True, extra="forbid")
|
||||
|
||||
event_id: str = Field(default_factory=lambda: uuid4().hex)
|
||||
ts: datetime = Field(default_factory=get_utc_now)
|
||||
|
||||
@classmethod
|
||||
def topic(cls) -> str:
|
||||
"""Stable cross-process identifier of this event class.
|
||||
|
||||
Returns ``"<module>:<class>"`` (colon-separated, mirroring the
|
||||
Python event-sourcing community convention). Used by OME to
|
||||
persist event identity into RunRecord.event_topic and to re-import
|
||||
the class during crash recovery via ``resolve_topic``.
|
||||
"""
|
||||
return f"{cls.__module__}:{cls.__name__}"
|
||||
|
||||
|
||||
@cache
|
||||
def resolve_topic(topic: str) -> type[BaseEvent]:
|
||||
"""Inverse of ``BaseEvent.topic()``; imports and returns the class.
|
||||
|
||||
Cached because crash recovery may resolve the same topic many times in
|
||||
a tight loop, and ``importlib.import_module`` is non-trivial.
|
||||
"""
|
||||
module_name, sep, cls_name = topic.partition(":")
|
||||
if not sep or not cls_name:
|
||||
raise ValueError(f"invalid event topic: {topic!r}")
|
||||
mod: Any = importlib.import_module(module_name)
|
||||
cls = getattr(mod, cls_name, None)
|
||||
if not (isinstance(cls, type) and issubclass(cls, BaseEvent)):
|
||||
raise TypeError(f"topic {topic!r} did not resolve to a BaseEvent subclass")
|
||||
return cls
|
||||
|
||||
|
||||
class CronTick(BaseEvent):
|
||||
"""Engine-emitted event for a strategy with `trigger=Cron(...)`."""
|
||||
|
||||
strategy_name: str
|
||||
|
||||
|
||||
class IdleTick(BaseEvent):
|
||||
"""Engine-emitted event for a strategy with `trigger=Idle(...)`."""
|
||||
|
||||
strategy_name: str
|
||||
bucket_key: str
|
||||
idle_seconds: int
|
||||
|
||||
|
||||
class ManualTick(BaseEvent):
|
||||
"""Engine-emitted event for `engine.trigger_manual(name, event=None)`."""
|
||||
|
||||
strategy_name: str
|
||||
61
src/everos/infra/ome/exceptions.py
Normal file
61
src/everos/infra/ome/exceptions.py
Normal file
@ -0,0 +1,61 @@
|
||||
"""OME exception hierarchy."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from everos.infra.ome.events import BaseEvent
|
||||
|
||||
|
||||
class OMEError(Exception):
|
||||
"""Base for all OME-internal errors."""
|
||||
|
||||
|
||||
class StartupValidationError(OMEError):
|
||||
"""Raised by engine.start() for any startup-time validation failure."""
|
||||
|
||||
|
||||
class EngineLockHeldError(OMEError):
|
||||
"""Raised when another OfflineEngine instance holds the jobstore lock."""
|
||||
|
||||
|
||||
class StrategyContractError(OMEError):
|
||||
"""Base for strategy-side contract violations.
|
||||
|
||||
Subclasses indicate a programming bug in the strategy code that no
|
||||
retry can fix (wrong API usage, undeclared emit). Runner
|
||||
short-circuits the attempt loop on these and dead-letters
|
||||
immediately — consuming the retry budget would only delay the
|
||||
inevitable and spam logs. External callers can ``except
|
||||
StrategyContractError`` to handle the whole category at once.
|
||||
"""
|
||||
|
||||
|
||||
class EngineCallFromStrategyError(StrategyContractError):
|
||||
"""A strategy called a public OfflineEngine method directly.
|
||||
|
||||
The convention is: strategy code interacts with the engine only via
|
||||
the ``(event, ctx)`` parameters Runner supplies. Engine methods
|
||||
(``emit``, ``trigger_manual``, ``inspect_dispatch``, ``list_runs``,
|
||||
``get_run_status``, ``reschedule_*``) are for external callers —
|
||||
strategies invoking them bypass the framework's contracts.
|
||||
"""
|
||||
|
||||
def __init__(self, strategy: str, method: str) -> None:
|
||||
self.strategy = strategy
|
||||
self.method = method
|
||||
super().__init__(
|
||||
f"strategy {strategy!r} called engine.{method}() directly; "
|
||||
"strategies must interact with the engine only via the "
|
||||
"(event, ctx) parameters"
|
||||
)
|
||||
|
||||
|
||||
class EmitNotDeclaredError(StrategyContractError):
|
||||
"""Raised when a strategy emits an event not listed in its decorator's emits."""
|
||||
|
||||
def __init__(self, strategy: str, event: BaseEvent) -> None:
|
||||
self.strategy = strategy
|
||||
self.event = event
|
||||
super().__init__(
|
||||
f"strategy {strategy!r} emitted {type(event).__name__!r} "
|
||||
"which is not in its declared emits"
|
||||
)
|
||||
52
src/everos/infra/ome/gates.py
Normal file
52
src/everos/infra/ome/gates.py
Normal file
@ -0,0 +1,52 @@
|
||||
"""OME gate types — declarative configuration only.
|
||||
|
||||
Counter is the only built-in gate. The actual N-counting lives in
|
||||
_stores/counter.py keyed by (strategy_name, bucket_key).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class Counter(BaseModel):
|
||||
"""Counter gate: batch trigger by accumulated event count per bucket.
|
||||
Each event increments the bucket counter; the `threshold`-th event
|
||||
passes and resets.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(frozen=True, extra="forbid")
|
||||
|
||||
threshold: Annotated[
|
||||
int,
|
||||
Field(
|
||||
gt=0,
|
||||
description=(
|
||||
"Pass once every `threshold` events; threshold=1 lets every event pass."
|
||||
),
|
||||
),
|
||||
]
|
||||
cooldown_seconds: Annotated[
|
||||
int,
|
||||
Field(
|
||||
ge=0,
|
||||
description=(
|
||||
"Minimum seconds between consecutive passes per bucket; 0 disables."
|
||||
),
|
||||
),
|
||||
] = 0
|
||||
event_field: Annotated[
|
||||
str | None,
|
||||
Field(
|
||||
description=(
|
||||
'Bucket dimension on the event (e.g. "user_id"); '
|
||||
"None means a single global bucket."
|
||||
),
|
||||
),
|
||||
] = None
|
||||
|
||||
|
||||
# Single-member alias today; becomes a union as more gate types land.
|
||||
Gate = Counter
|
||||
99
src/everos/infra/ome/records.py
Normal file
99
src/everos/infra/ome/records.py
Normal file
@ -0,0 +1,99 @@
|
||||
"""RunRecord / RunStatus / StrategyRouteInfo / CounterProgress — pure data classes.
|
||||
|
||||
Persistence in _stores/run_record.py.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import StrEnum
|
||||
from typing import Annotated, NamedTuple, Self
|
||||
|
||||
from pydantic import (
|
||||
AwareDatetime,
|
||||
BaseModel,
|
||||
ConfigDict,
|
||||
Field,
|
||||
computed_field,
|
||||
model_validator,
|
||||
)
|
||||
|
||||
|
||||
class RunStatus(StrEnum):
|
||||
"""Terminal-or-running state of a single strategy run."""
|
||||
|
||||
RUNNING = "running"
|
||||
SUCCESS = "success"
|
||||
FAILED = "failed"
|
||||
DEAD_LETTER = "dead_letter"
|
||||
CRASHED = "crashed"
|
||||
|
||||
|
||||
class RunRecord(BaseModel):
|
||||
"""One row of the run_record table."""
|
||||
|
||||
model_config = ConfigDict(frozen=True, extra="forbid")
|
||||
|
||||
run_id: Annotated[str, Field(min_length=1)]
|
||||
strategy_name: Annotated[str, Field(min_length=1)]
|
||||
status: RunStatus
|
||||
attempt: Annotated[int, Field(ge=0)]
|
||||
started_at: AwareDatetime
|
||||
finished_at: AwareDatetime | None = None
|
||||
error: Annotated[str, Field(min_length=1)] | None = None
|
||||
event_topic: Annotated[
|
||||
str,
|
||||
Field(
|
||||
min_length=1,
|
||||
description="Stable cross-process event identifier in "
|
||||
"``<module>:<class>`` form (see ``BaseEvent.topic()``).",
|
||||
),
|
||||
]
|
||||
event_payload: Annotated[
|
||||
str,
|
||||
Field(
|
||||
min_length=1,
|
||||
description="JSON-encoded event (``BaseEvent.model_dump_json`` output).",
|
||||
),
|
||||
]
|
||||
max_retries_snapshot: Annotated[int, Field(ge=0)]
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _check_status_invariants(self) -> Self:
|
||||
if self.status == RunStatus.RUNNING:
|
||||
if self.finished_at is not None:
|
||||
raise ValueError("RunRecord: RUNNING must have finished_at=None")
|
||||
if self.error is not None:
|
||||
raise ValueError("RunRecord: RUNNING must have error=None")
|
||||
else:
|
||||
if self.finished_at is None:
|
||||
raise ValueError(f"RunRecord: {self.status} must have finished_at set")
|
||||
if self.status == RunStatus.SUCCESS:
|
||||
if self.error is not None:
|
||||
raise ValueError("RunRecord: SUCCESS must have error=None")
|
||||
elif self.error is None:
|
||||
raise ValueError(f"RunRecord: {self.status} must have error set")
|
||||
return self
|
||||
|
||||
|
||||
class CounterProgress(NamedTuple):
|
||||
"""Per-bucket counter progress at inspect_dispatch time."""
|
||||
|
||||
current: int
|
||||
threshold: int
|
||||
|
||||
|
||||
class StrategyRouteInfo(BaseModel):
|
||||
"""Per-strategy dispatch decision — returned by inspect_dispatch."""
|
||||
|
||||
model_config = ConfigDict(frozen=True, extra="forbid")
|
||||
|
||||
strategy_name: Annotated[str, Field(min_length=1)]
|
||||
enabled_pass: bool
|
||||
applies_to_pass: bool
|
||||
counter_pass: bool
|
||||
counter_progress: CounterProgress | None = None
|
||||
|
||||
@computed_field # type: ignore[prop-decorator]
|
||||
@property
|
||||
def will_run(self) -> bool:
|
||||
return self.enabled_pass and self.applies_to_pass and self.counter_pass
|
||||
9
src/everos/infra/ome/testing/__init__.py
Normal file
9
src/everos/infra/ome/testing/__init__.py
Normal file
@ -0,0 +1,9 @@
|
||||
"""OME testing helpers.
|
||||
|
||||
Fake strategy context and test harness for unit testing strategies.
|
||||
"""
|
||||
|
||||
from everos.infra.ome.testing.fakes import FakeStrategyContext as FakeStrategyContext
|
||||
from everos.infra.ome.testing.harness import StrategyTestHarness as StrategyTestHarness
|
||||
|
||||
__all__ = ["FakeStrategyContext", "StrategyTestHarness"]
|
||||
38
src/everos/infra/ome/testing/fakes.py
Normal file
38
src/everos/infra/ome/testing/fakes.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""In-memory test doubles for the OME StrategyContext Protocol.
|
||||
|
||||
Use FakeStrategyContext when you want to unit-test a strategy function
|
||||
in isolation without spinning up a full OfflineEngine.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.infra.ome.events import BaseEvent
|
||||
|
||||
|
||||
class FakeStrategyContext:
|
||||
"""Implements StrategyContext Protocol; collects emit() calls in a list.
|
||||
|
||||
Attributes:
|
||||
run_id: Unique identifier for this run (default: "fake_run").
|
||||
logger: A structlog BoundLogger for test logging.
|
||||
emitted: List of BaseEvent objects passed to emit().
|
||||
"""
|
||||
|
||||
def __init__(self, *, run_id: str = "fake_run") -> None:
|
||||
"""Initialize a FakeStrategyContext.
|
||||
|
||||
Args:
|
||||
run_id: Run identifier, defaults to "fake_run".
|
||||
"""
|
||||
self.run_id = run_id
|
||||
self.logger = get_logger("ome.fake_ctx")
|
||||
self.emitted: list[BaseEvent] = []
|
||||
|
||||
async def emit(self, event: BaseEvent) -> None:
|
||||
"""Collect an event into the emitted list.
|
||||
|
||||
Args:
|
||||
event: The BaseEvent to emit.
|
||||
"""
|
||||
self.emitted.append(event)
|
||||
118
src/everos/infra/ome/testing/harness.py
Normal file
118
src/everos/infra/ome/testing/harness.py
Normal file
@ -0,0 +1,118 @@
|
||||
"""StrategyTestHarness — full OfflineEngine on a tmp SQLite db.
|
||||
|
||||
Designed for end-to-end strategy tests: register, start, emit, drain
|
||||
until terminal, inspect run records. Cleans up the tmp directory on exit.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from tempfile import mkdtemp
|
||||
from typing import Any
|
||||
|
||||
from everos.infra.ome.config import OMEConfig
|
||||
from everos.infra.ome.engine import OfflineEngine
|
||||
from everos.infra.ome.events import BaseEvent
|
||||
from everos.infra.ome.records import RunRecord, RunStatus
|
||||
|
||||
|
||||
class StrategyTestHarness:
|
||||
"""Async context manager wrapping OfflineEngine on a tmp SQLite db.
|
||||
|
||||
Provides a test-friendly interface to register strategies, emit events,
|
||||
and inspect run records.
|
||||
|
||||
Example:
|
||||
async with StrategyTestHarness() as h:
|
||||
h.register(my_strategy_func)
|
||||
await h.start()
|
||||
await h.emit(MyEvent())
|
||||
await h.drain(timeout=5)
|
||||
runs = await h.list_runs("my_strategy")
|
||||
assert len(runs) == 1
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize a StrategyTestHarness with a temp SQLite db."""
|
||||
self._tmpdir = Path(mkdtemp(prefix="ome_test_"))
|
||||
cfg = OMEConfig(
|
||||
jobstore_path=self._tmpdir / "ome.db",
|
||||
config_watch=False,
|
||||
max_concurrent_runs=20,
|
||||
max_retries=1,
|
||||
)
|
||||
self._engine = OfflineEngine(config=cfg)
|
||||
|
||||
async def __aenter__(self) -> StrategyTestHarness:
|
||||
"""Enter the async context."""
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: Any) -> None:
|
||||
"""Exit the async context and clean up temp resources."""
|
||||
try:
|
||||
await self._engine.stop()
|
||||
finally:
|
||||
shutil.rmtree(self._tmpdir, ignore_errors=True) # noqa: SLF001
|
||||
|
||||
def register(self, func: Any) -> None:
|
||||
"""Register a strategy function.
|
||||
|
||||
Args:
|
||||
func: A function decorated with @offline_strategy.
|
||||
"""
|
||||
self._engine.register(func)
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start the OfflineEngine."""
|
||||
await self._engine.start()
|
||||
|
||||
async def emit(self, event: BaseEvent) -> None:
|
||||
"""Emit an event to the engine.
|
||||
|
||||
Args:
|
||||
event: A BaseEvent subclass instance.
|
||||
"""
|
||||
await self._engine.emit(event)
|
||||
|
||||
async def drain(self, *, timeout: float = 30.0) -> None: # noqa: ASYNC109
|
||||
"""Wait until every enqueued strategy run has finished.
|
||||
|
||||
Delegates to :meth:`OfflineEngine.wait_idle`, which tracks runs
|
||||
from the moment ``_enqueue_run`` bumps the counter (so a caller
|
||||
that ``emit``s then immediately ``drain``s does NOT see false-
|
||||
idle while APS is still launching the coroutine). Polling
|
||||
``find_running`` alone — the previous implementation — missed
|
||||
that gap between ``add_job`` and ``mark_running`` and let tests
|
||||
race past in-flight jobs.
|
||||
|
||||
Args:
|
||||
timeout: Maximum seconds to wait, defaults to 30.0.
|
||||
|
||||
Raises:
|
||||
TimeoutError: if runs remain in flight after ``timeout`` seconds.
|
||||
"""
|
||||
if not await self._engine.wait_idle(timeout=timeout):
|
||||
raise TimeoutError(
|
||||
f"drain: engine still has "
|
||||
f"{self._engine._active_runs} in-flight runs after {timeout}s" # noqa: SLF001
|
||||
)
|
||||
|
||||
async def list_runs(
|
||||
self,
|
||||
strategy_name: str,
|
||||
status: RunStatus | None = None,
|
||||
) -> list[RunRecord]:
|
||||
"""List run records for a strategy, optionally filtered by status.
|
||||
|
||||
Args:
|
||||
strategy_name: The name of the strategy.
|
||||
status: Optional status filter (e.g. RunStatus.SUCCESS).
|
||||
|
||||
Returns:
|
||||
A list of RunRecord objects.
|
||||
"""
|
||||
return await self._engine._run_record_store.list_runs( # noqa: SLF001
|
||||
strategy_name=strategy_name,
|
||||
status=status,
|
||||
)
|
||||
76
src/everos/infra/ome/triggers.py
Normal file
76
src/everos/infra/ome/triggers.py
Normal file
@ -0,0 +1,76 @@
|
||||
"""OME trigger types — declarative descriptors of when a strategy fires.
|
||||
|
||||
Three concrete triggers: Immediate / Cron / Idle. Engine dispatches via
|
||||
`isinstance(meta.trigger, ...)` to pick the registration path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated, Self
|
||||
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
|
||||
|
||||
from everos.infra.ome.events import BaseEvent
|
||||
|
||||
|
||||
class _TriggerBase(BaseModel):
|
||||
model_config = ConfigDict(frozen=True, extra="forbid")
|
||||
|
||||
|
||||
class Immediate(_TriggerBase):
|
||||
"""Fire as soon as an event of any class in `on` is dispatched."""
|
||||
|
||||
on: Annotated[list[type[BaseEvent]], Field(min_length=1)]
|
||||
|
||||
|
||||
class Cron(_TriggerBase):
|
||||
"""Fire on a cron schedule. Engine emits CronTick to the strategy."""
|
||||
|
||||
expr: Annotated[str, Field(min_length=1)]
|
||||
|
||||
@field_validator("expr")
|
||||
@classmethod
|
||||
def _validate_crontab(cls, v: str) -> str:
|
||||
# Delegates to APS's own parser so the trigger object cannot
|
||||
# represent any crontab that APS would later refuse.
|
||||
CronTrigger.from_crontab(v)
|
||||
return v
|
||||
|
||||
|
||||
class Idle(_TriggerBase):
|
||||
"""Fire after every class in `on` has been silent (bucketed by
|
||||
`event_field`) for `idle_seconds` — AND across classes. Engine
|
||||
emits IdleTick.
|
||||
"""
|
||||
|
||||
on: Annotated[list[type[BaseEvent]], Field(min_length=1)]
|
||||
event_field: str
|
||||
idle_seconds: Annotated[int, Field(gt=0)]
|
||||
scan_interval_seconds: Annotated[
|
||||
int,
|
||||
Field(gt=0, description="Per-strategy scan cadence; <= idle_seconds / 2."),
|
||||
] = 60
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_event_field(self) -> Self:
|
||||
for event_cls in self.on:
|
||||
if self.event_field not in event_cls.model_fields: # type: ignore[operator] # Pydantic model_fields → dict via @deprecated_instance_property (pydantic/main.py:277)
|
||||
available = list(event_cls.model_fields) # type: ignore[arg-type] # same as above
|
||||
raise ValueError(
|
||||
f"event_field {self.event_field!r} not found in "
|
||||
f"{event_cls.__name__} fields (available: {available})"
|
||||
)
|
||||
return self
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_scan_interval_bound(self) -> Self:
|
||||
if self.scan_interval_seconds > self.idle_seconds // 2:
|
||||
raise ValueError(
|
||||
f"Idle: scan_interval_seconds ({self.scan_interval_seconds}) "
|
||||
f"must be <= idle_seconds // 2 ({self.idle_seconds // 2})"
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
Trigger = Immediate | Cron | Idle
|
||||
0
src/everos/infra/persistence/__init__.py
Normal file
0
src/everos/infra/persistence/__init__.py
Normal file
132
src/everos/infra/persistence/lancedb/__init__.py
Normal file
132
src/everos/infra/persistence/lancedb/__init__.py
Normal file
@ -0,0 +1,132 @@
|
||||
"""LanceDB business persistence layer.
|
||||
|
||||
Sits on top of :mod:`everos.core.persistence.lancedb` (connection
|
||||
factory + ``BaseLanceTable`` + ``LanceRepoBase``) and provides:
|
||||
|
||||
* lazy process-wide connection + per-name table cache
|
||||
(:mod:`.lancedb_manager`)
|
||||
* concrete schemas under :mod:`.tables`
|
||||
* concrete repository singletons under :mod:`.repos`
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.infra.persistence.lancedb import (
|
||||
get_connection, get_table, dispose_connection,
|
||||
Episode, AtomicFact, Foresight, AgentCase, AgentSkill, UserProfile,
|
||||
episode_repo, atomic_fact_repo, foresight_repo,
|
||||
agent_case_repo, agent_skill_repo, user_profile_repo,
|
||||
)
|
||||
|
||||
Three index kinds: scalar / BM25 / vector. Tables are created lazily on
|
||||
first access; row population is the cascade daemon's job (see
|
||||
``12_cascade_design.md``).
|
||||
"""
|
||||
|
||||
# Importing ``tables`` registers every business :class:`BaseLanceTable`
|
||||
# schema so callers can rely on the package alone to surface every schema.
|
||||
from . import tables as tables # noqa: F401
|
||||
from .lancedb_manager import dispose_connection as dispose_connection
|
||||
from .lancedb_manager import get_connection as get_connection
|
||||
from .lancedb_manager import get_table as get_table
|
||||
from .repos import agent_case_repo as agent_case_repo
|
||||
from .repos import agent_skill_repo as agent_skill_repo
|
||||
from .repos import atomic_fact_repo as atomic_fact_repo
|
||||
from .repos import episode_repo as episode_repo
|
||||
from .repos import foresight_repo as foresight_repo
|
||||
from .repos import user_profile_repo as user_profile_repo
|
||||
from .tables import AgentCase as AgentCase
|
||||
from .tables import AgentSkill as AgentSkill
|
||||
from .tables import AtomicFact as AtomicFact
|
||||
from .tables import Episode as Episode
|
||||
from .tables import Foresight as Foresight
|
||||
from .tables import ParentType as ParentType
|
||||
from .tables import UserProfile as UserProfile
|
||||
|
||||
_BUSINESS_SCHEMAS = (
|
||||
Episode,
|
||||
AtomicFact,
|
||||
Foresight,
|
||||
AgentCase,
|
||||
AgentSkill,
|
||||
UserProfile,
|
||||
)
|
||||
|
||||
|
||||
class LanceDBSchemaMismatchError(RuntimeError):
|
||||
"""Raised at startup when an on-disk LanceDB table's columns drift
|
||||
from the corresponding Pydantic schema.
|
||||
|
||||
Cascade re-builds LanceDB from md (the SoT), so the recovery is
|
||||
deterministic: delete the index directory and let it reindex.
|
||||
The lifespan surfaces the explicit ``rm -rf ~/.everos/.index/
|
||||
lancedb`` instruction in the error message; see
|
||||
``docs/cascade_runbook.md`` for the wider context.
|
||||
"""
|
||||
|
||||
|
||||
async def ensure_business_indexes() -> None:
|
||||
"""Ensure FTS (BM25) indexes for every business table (idempotent).
|
||||
|
||||
Called once at startup by :class:`LanceDBLifespanProvider`. Walks
|
||||
the 5 business schemas (each schema owns its ``TABLE_NAME`` +
|
||||
``BM25_FIELDS``), opens each table via :func:`get_table`, and
|
||||
delegates to ``schema.ensure_fts_indexes(table)``. Already-indexed
|
||||
columns are skipped, so re-runs are no-ops.
|
||||
|
||||
Adding a new business table = adding it to ``_BUSINESS_SCHEMAS``;
|
||||
everything else (table name, columns to index) reads off the
|
||||
schema's ClassVars.
|
||||
"""
|
||||
for schema in _BUSINESS_SCHEMAS:
|
||||
table = await get_table(schema.TABLE_NAME, schema)
|
||||
await schema.ensure_fts_indexes(table)
|
||||
|
||||
|
||||
async def verify_business_schemas() -> None:
|
||||
"""Fail loud at startup if an existing LanceDB table's columns don't
|
||||
match its current Pydantic schema.
|
||||
|
||||
LanceDB doesn't migrate columns automatically; an older index dir
|
||||
(e.g. with the pre-``content_sha256`` shape) would fail
|
||||
unpredictably on upsert. Checking column names up-front turns that
|
||||
into a clean startup error pointing the user at the recovery path
|
||||
(``rm -rf ~/.everos/.index/lancedb`` — the index is rebuildable
|
||||
from md, see ``12_cascade_design.md``).
|
||||
"""
|
||||
for schema in _BUSINESS_SCHEMAS:
|
||||
table = await get_table(schema.TABLE_NAME, schema)
|
||||
arrow_schema = await table.schema()
|
||||
actual = set(arrow_schema.names)
|
||||
expected = set(schema.model_fields.keys())
|
||||
missing = expected - actual
|
||||
extra = actual - expected
|
||||
if missing or extra:
|
||||
raise LanceDBSchemaMismatchError(
|
||||
f"LanceDB table {schema.TABLE_NAME!r} schema drift: "
|
||||
f"missing={sorted(missing)}, extra={sorted(extra)}. "
|
||||
"The index is rebuildable from md — recover with "
|
||||
"`rm -rf ~/.everos/.index/lancedb` and restart."
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AgentCase",
|
||||
"AgentSkill",
|
||||
"AtomicFact",
|
||||
"Episode",
|
||||
"Foresight",
|
||||
"LanceDBSchemaMismatchError",
|
||||
"ParentType",
|
||||
"UserProfile",
|
||||
"agent_case_repo",
|
||||
"agent_skill_repo",
|
||||
"atomic_fact_repo",
|
||||
"dispose_connection",
|
||||
"ensure_business_indexes",
|
||||
"episode_repo",
|
||||
"foresight_repo",
|
||||
"get_connection",
|
||||
"get_table",
|
||||
"user_profile_repo",
|
||||
"verify_business_schemas",
|
||||
]
|
||||
82
src/everos/infra/persistence/lancedb/lancedb_manager.py
Normal file
82
src/everos/infra/persistence/lancedb/lancedb_manager.py
Normal file
@ -0,0 +1,82 @@
|
||||
"""LanceDB connection + table singletons (lazy + process-wide, async).
|
||||
|
||||
The single place that owns the LanceDB **runtime state**: the async
|
||||
connection and per-name table cache. Connection opens lazily on first
|
||||
:func:`get_connection` call; tables are cached after first
|
||||
:func:`get_table`. The :class:`LanceDBLifespanProvider` calls
|
||||
:func:`dispose_connection` on shutdown; in scripts you can call it
|
||||
manually.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from lancedb import AsyncConnection, AsyncTable
|
||||
|
||||
from everos.config import load_settings
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.core.persistence import BaseLanceTable, MemoryRoot, open_lancedb_connection
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_conn: AsyncConnection | None = None
|
||||
_tables: dict[str, AsyncTable] = {}
|
||||
_lock = asyncio.Lock()
|
||||
|
||||
|
||||
async def get_connection() -> AsyncConnection:
|
||||
"""Return the process-wide async LanceDB connection.
|
||||
|
||||
Built on first call from ``MemoryRoot.default().lancedb_dir`` and
|
||||
``Settings.lancedb``. Subsequent calls return the same instance.
|
||||
"""
|
||||
async with _lock:
|
||||
return await _ensure_connection_locked()
|
||||
|
||||
|
||||
async def get_table(
|
||||
name: str,
|
||||
schema: type[BaseLanceTable],
|
||||
) -> AsyncTable:
|
||||
"""Open the named table (creating from ``schema`` if missing). Cached."""
|
||||
async with _lock:
|
||||
if name not in _tables:
|
||||
conn = await _ensure_connection_locked()
|
||||
existing = await conn.list_tables()
|
||||
if name in list(existing.tables):
|
||||
_tables[name] = await conn.open_table(name)
|
||||
logger.info("lancedb_table_opened", name=name)
|
||||
else:
|
||||
_tables[name] = await conn.create_table(name, schema=schema)
|
||||
logger.info("lancedb_table_created", name=name)
|
||||
return _tables[name]
|
||||
|
||||
|
||||
async def dispose_connection() -> None:
|
||||
"""Close the connection + clear table cache. Idempotent."""
|
||||
global _conn
|
||||
async with _lock:
|
||||
if _conn is not None:
|
||||
try:
|
||||
_conn.close() # AsyncConnection.close() is sync in lancedb 0.30
|
||||
except Exception:
|
||||
logger.exception("lancedb_close_failed")
|
||||
logger.info("lancedb_connection_closed")
|
||||
_conn = None
|
||||
_tables.clear()
|
||||
|
||||
|
||||
async def _ensure_connection_locked() -> AsyncConnection:
|
||||
"""Open the connection if not yet open. Caller must hold ``_lock``."""
|
||||
global _conn
|
||||
if _conn is None:
|
||||
settings = load_settings()
|
||||
memory_root = MemoryRoot.default()
|
||||
memory_root.ensure()
|
||||
_conn = await open_lancedb_connection(memory_root.lancedb_dir, settings.lancedb)
|
||||
logger.info(
|
||||
"lancedb_connection_opened",
|
||||
path=str(memory_root.lancedb_dir),
|
||||
)
|
||||
return _conn
|
||||
37
src/everos/infra/persistence/lancedb/repos/__init__.py
Normal file
37
src/everos/infra/persistence/lancedb/repos/__init__.py
Normal file
@ -0,0 +1,37 @@
|
||||
"""LanceDB repo singletons (one per business table).
|
||||
|
||||
Each repo is a module-level singleton — the table connection is
|
||||
resolved lazily on first call via :func:`..lancedb_manager.get_table`.
|
||||
Subclassing :class:`LanceRepoBase` lets each repo carry table-specific
|
||||
helpers later (e.g. ``find_by_owner``, ``search_for_owner``) without
|
||||
needing a separate factory.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.infra.persistence.lancedb.repos import (
|
||||
episode_repo,
|
||||
atomic_fact_repo,
|
||||
foresight_repo,
|
||||
agent_case_repo,
|
||||
agent_skill_repo,
|
||||
user_profile_repo,
|
||||
)
|
||||
|
||||
await episode_repo.add([Episode(...)])
|
||||
"""
|
||||
|
||||
from .agent_case import agent_case_repo as agent_case_repo
|
||||
from .agent_skill import agent_skill_repo as agent_skill_repo
|
||||
from .atomic_fact import atomic_fact_repo as atomic_fact_repo
|
||||
from .episode import episode_repo as episode_repo
|
||||
from .foresight import foresight_repo as foresight_repo
|
||||
from .user_profile import user_profile_repo as user_profile_repo
|
||||
|
||||
__all__ = [
|
||||
"agent_case_repo",
|
||||
"agent_skill_repo",
|
||||
"atomic_fact_repo",
|
||||
"episode_repo",
|
||||
"foresight_repo",
|
||||
"user_profile_repo",
|
||||
]
|
||||
20
src/everos/infra/persistence/lancedb/repos/agent_case.py
Normal file
20
src/everos/infra/persistence/lancedb/repos/agent_case.py
Normal file
@ -0,0 +1,20 @@
|
||||
"""LanceDB repo singleton for the ``agent_case`` table."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from lancedb import AsyncTable
|
||||
|
||||
from everos.core.persistence.lancedb import LanceDailyLogRepoBase
|
||||
|
||||
from ..lancedb_manager import get_table
|
||||
from ..tables.agent_case import AgentCase
|
||||
|
||||
|
||||
class _AgentCaseRepo(LanceDailyLogRepoBase[AgentCase]):
|
||||
schema = AgentCase
|
||||
|
||||
async def _table_lookup(self) -> AsyncTable:
|
||||
return await get_table(self.schema.TABLE_NAME, self.schema)
|
||||
|
||||
|
||||
agent_case_repo = _AgentCaseRepo()
|
||||
84
src/everos/infra/persistence/lancedb/repos/agent_skill.py
Normal file
84
src/everos/infra/persistence/lancedb/repos/agent_skill.py
Normal file
@ -0,0 +1,84 @@
|
||||
"""LanceDB repo singleton for the ``agent_skill`` table."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from lancedb import AsyncTable
|
||||
|
||||
from everos.core.persistence.lancedb import LanceRepoBase
|
||||
|
||||
from ..lancedb_manager import get_table
|
||||
from ..tables.agent_skill import AgentSkill
|
||||
|
||||
|
||||
class _AgentSkillRepo(LanceRepoBase[AgentSkill]):
|
||||
schema = AgentSkill
|
||||
|
||||
async def _table_lookup(self) -> AsyncTable:
|
||||
return await get_table(self.schema.TABLE_NAME, self.schema)
|
||||
|
||||
async def count_in_cluster(self, *, owner_id: str, cluster_id: str) -> int:
|
||||
"""Count skills under one ``(owner_id, cluster_id)``."""
|
||||
table = await self._table()
|
||||
return await table.count_rows(filter=_in_cluster(owner_id, cluster_id))
|
||||
|
||||
async def find_in_cluster(
|
||||
self, *, owner_id: str, cluster_id: str, limit: int
|
||||
) -> list[AgentSkill]:
|
||||
"""Scalar fetch within one cluster; no ranking, capped at ``limit``."""
|
||||
return await self.find_where(_in_cluster(owner_id, cluster_id), limit=limit)
|
||||
|
||||
async def find_topk_relevant_in_cluster(
|
||||
self,
|
||||
*,
|
||||
owner_id: str,
|
||||
cluster_id: str,
|
||||
query_vector: Sequence[float],
|
||||
top_k: int,
|
||||
) -> list[AgentSkill]:
|
||||
"""Top-K cosine-relevant skills inside one cluster.
|
||||
|
||||
Cosine ranking is pushed down to LanceDB native ANN.
|
||||
``distance_type("cosine")`` matches
|
||||
:class:`AgentSkillRecaller.dense_recall`, keeping ranking
|
||||
semantics consistent across read paths.
|
||||
|
||||
Raises:
|
||||
ValueError: When ``query_vector`` is empty — no relevance
|
||||
signal is a caller-side policy decision; use
|
||||
:meth:`find_in_cluster` for the scalar fallback.
|
||||
"""
|
||||
if not query_vector:
|
||||
raise ValueError(
|
||||
"query_vector must be non-empty; "
|
||||
"call find_in_cluster for the scalar fallback"
|
||||
)
|
||||
table = await self._table()
|
||||
rows = await (
|
||||
table.query()
|
||||
.nearest_to(list(query_vector))
|
||||
.distance_type("cosine")
|
||||
.where(_in_cluster(owner_id, cluster_id))
|
||||
.limit(top_k)
|
||||
.to_list()
|
||||
)
|
||||
# LanceDB appends ``_distance`` to ranked rows; strip it before
|
||||
# ``model_validate`` so this stays robust regardless of
|
||||
# pydantic ``extra`` mode on the schema.
|
||||
return [
|
||||
self.schema.model_validate({k: v for k, v in r.items() if k != "_distance"})
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
def _q(value: str) -> str:
|
||||
"""SQL single-quote escape for LanceDB ``where`` predicate literals."""
|
||||
return value.replace("'", "''")
|
||||
|
||||
|
||||
def _in_cluster(owner_id: str, cluster_id: str) -> str:
|
||||
return f"owner_id = '{_q(owner_id)}' AND cluster_id = '{_q(cluster_id)}'"
|
||||
|
||||
|
||||
agent_skill_repo = _AgentSkillRepo()
|
||||
20
src/everos/infra/persistence/lancedb/repos/atomic_fact.py
Normal file
20
src/everos/infra/persistence/lancedb/repos/atomic_fact.py
Normal file
@ -0,0 +1,20 @@
|
||||
"""LanceDB repo singleton for the ``atomic_fact`` table."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from lancedb import AsyncTable
|
||||
|
||||
from everos.core.persistence.lancedb import LanceDailyLogRepoBase
|
||||
|
||||
from ..lancedb_manager import get_table
|
||||
from ..tables.atomic_fact import AtomicFact
|
||||
|
||||
|
||||
class _AtomicFactRepo(LanceDailyLogRepoBase[AtomicFact]):
|
||||
schema = AtomicFact
|
||||
|
||||
async def _table_lookup(self) -> AsyncTable:
|
||||
return await get_table(self.schema.TABLE_NAME, self.schema)
|
||||
|
||||
|
||||
atomic_fact_repo = _AtomicFactRepo()
|
||||
20
src/everos/infra/persistence/lancedb/repos/episode.py
Normal file
20
src/everos/infra/persistence/lancedb/repos/episode.py
Normal file
@ -0,0 +1,20 @@
|
||||
"""LanceDB repo singleton for the ``episode`` table."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from lancedb import AsyncTable
|
||||
|
||||
from everos.core.persistence.lancedb import LanceDailyLogRepoBase
|
||||
|
||||
from ..lancedb_manager import get_table
|
||||
from ..tables.episode import Episode
|
||||
|
||||
|
||||
class _EpisodeRepo(LanceDailyLogRepoBase[Episode]):
|
||||
schema = Episode
|
||||
|
||||
async def _table_lookup(self) -> AsyncTable:
|
||||
return await get_table(self.schema.TABLE_NAME, self.schema)
|
||||
|
||||
|
||||
episode_repo = _EpisodeRepo()
|
||||
20
src/everos/infra/persistence/lancedb/repos/foresight.py
Normal file
20
src/everos/infra/persistence/lancedb/repos/foresight.py
Normal file
@ -0,0 +1,20 @@
|
||||
"""LanceDB repo singleton for the ``foresight`` table."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from lancedb import AsyncTable
|
||||
|
||||
from everos.core.persistence.lancedb import LanceDailyLogRepoBase
|
||||
|
||||
from ..lancedb_manager import get_table
|
||||
from ..tables.foresight import Foresight
|
||||
|
||||
|
||||
class _ForesightRepo(LanceDailyLogRepoBase[Foresight]):
|
||||
schema = Foresight
|
||||
|
||||
async def _table_lookup(self) -> AsyncTable:
|
||||
return await get_table(self.schema.TABLE_NAME, self.schema)
|
||||
|
||||
|
||||
foresight_repo = _ForesightRepo()
|
||||
20
src/everos/infra/persistence/lancedb/repos/user_profile.py
Normal file
20
src/everos/infra/persistence/lancedb/repos/user_profile.py
Normal file
@ -0,0 +1,20 @@
|
||||
"""LanceDB repo singleton for the ``user_profile`` table."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from lancedb import AsyncTable
|
||||
|
||||
from everos.core.persistence.lancedb import LanceRepoBase
|
||||
|
||||
from ..lancedb_manager import get_table
|
||||
from ..tables.user_profile import UserProfile
|
||||
|
||||
|
||||
class _UserProfileRepo(LanceRepoBase[UserProfile]):
|
||||
schema = UserProfile
|
||||
|
||||
async def _table_lookup(self) -> AsyncTable:
|
||||
return await get_table(self.schema.TABLE_NAME, self.schema)
|
||||
|
||||
|
||||
user_profile_repo = _UserProfileRepo()
|
||||
35
src/everos/infra/persistence/lancedb/tables/__init__.py
Normal file
35
src/everos/infra/persistence/lancedb/tables/__init__.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""LanceDB table schemas (one ``BaseLanceTable`` subclass per business table).
|
||||
|
||||
Schemas live here; cascade-daemon-driven row population is wired
|
||||
through the matching repo singletons in :mod:`..repos`.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.infra.persistence.lancedb.tables import (
|
||||
Episode,
|
||||
AtomicFact,
|
||||
Foresight,
|
||||
AgentCase,
|
||||
AgentSkill,
|
||||
UserProfile,
|
||||
ParentType,
|
||||
)
|
||||
"""
|
||||
|
||||
from ._parent_type import ParentType as ParentType
|
||||
from .agent_case import AgentCase as AgentCase
|
||||
from .agent_skill import AgentSkill as AgentSkill
|
||||
from .atomic_fact import AtomicFact as AtomicFact
|
||||
from .episode import Episode as Episode
|
||||
from .foresight import Foresight as Foresight
|
||||
from .user_profile import UserProfile as UserProfile
|
||||
|
||||
__all__ = [
|
||||
"AgentCase",
|
||||
"AgentSkill",
|
||||
"AtomicFact",
|
||||
"Episode",
|
||||
"Foresight",
|
||||
"ParentType",
|
||||
"UserProfile",
|
||||
]
|
||||
24
src/everos/infra/persistence/lancedb/tables/_parent_type.py
Normal file
24
src/everos/infra/persistence/lancedb/tables/_parent_type.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""``ParentType`` — provenance label for memory records linked back to a source.
|
||||
|
||||
Currently the only value is :attr:`ParentType.MEMCELL`: every business row
|
||||
(episode / foresight / atomic_fact / agent_case) points back to a source
|
||||
MemCell. The earlier opensource design enumerated ``"episode"`` as an
|
||||
alternative parent but the production path never wrote that value, so the
|
||||
new framework collapses the enum to its single in-use member.
|
||||
|
||||
Kept as an :class:`enum.Enum` (rather than a bare string constant) so that
|
||||
adding a future parent kind stays a non-breaking enum extension. LanceDB's
|
||||
pydantic-to-arrow conversion does not accept ``Enum`` field annotations,
|
||||
so table schemas declare ``parent_type: str = ParentType.MEMCELL.value``
|
||||
and reference the enum only at the default-value level.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import StrEnum
|
||||
|
||||
|
||||
class ParentType(StrEnum):
|
||||
"""Provenance label of a memory record's parent."""
|
||||
|
||||
MEMCELL = "memcell"
|
||||
84
src/everos/infra/persistence/lancedb/tables/agent_case.py
Normal file
84
src/everos/infra/persistence/lancedb/tables/agent_case.py
Normal file
@ -0,0 +1,84 @@
|
||||
"""LanceDB ``agent_case`` table schema.
|
||||
|
||||
Field set per 17_lancedb_tables_design.md §3.4. Each row records one
|
||||
task an agent worked on: intent, approach, optional pivotal insight,
|
||||
and a quality score. A MemCell extracted on the agent's own execution
|
||||
log yields at most one AgentCase.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import ClassVar
|
||||
|
||||
from everos.core.persistence.lancedb import BaseLanceTable, Vector
|
||||
|
||||
from ._parent_type import ParentType
|
||||
|
||||
_DIM = 1024
|
||||
|
||||
|
||||
class AgentCase(BaseLanceTable):
|
||||
"""One agent case indexed in LanceDB."""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = "agent_case"
|
||||
BM25_FIELDS: ClassVar[list[str]] = ["task_intent_tokens", "approach_tokens"]
|
||||
|
||||
id: str
|
||||
"""PK = ``<owner_id>_<entry_id>``."""
|
||||
|
||||
entry_id: str
|
||||
"""md-side seq id ``ac_<YYYYMMDD>_<NNNN>``."""
|
||||
|
||||
owner_id: str
|
||||
"""The owning ``agent_id``."""
|
||||
|
||||
owner_type: str
|
||||
"""Fixed ``"agent"`` for this table."""
|
||||
|
||||
app_id: str = "default"
|
||||
project_id: str = "default"
|
||||
"""App / project scope (default ``"default"``); cascade fills from md path."""
|
||||
|
||||
session_id: str
|
||||
timestamp: _dt.datetime
|
||||
|
||||
parent_type: str = ParentType.MEMCELL.value
|
||||
"""Source pointer — always :attr:`ParentType.MEMCELL` for agent case."""
|
||||
|
||||
parent_id: str
|
||||
"""Source memcell id (one memcell ↔ one case)."""
|
||||
|
||||
quality_score: float
|
||||
"""0.0–1.0; task completion / quality estimate."""
|
||||
|
||||
task_intent: str
|
||||
"""≤ 50 tokens; original surface form (returned for display)."""
|
||||
|
||||
task_intent_tokens: str
|
||||
"""App-layer pre-tokenised ``task_intent`` — BM25 main field
|
||||
(whitespace tokenizer); display goes through ``task_intent``."""
|
||||
|
||||
approach: str
|
||||
"""≤ 1000 tokens; step-by-step approach (display)."""
|
||||
|
||||
approach_tokens: str
|
||||
"""App-layer pre-tokenised ``approach`` — BM25 secondary field
|
||||
(whitespace tokenizer). Searched in parallel with
|
||||
``task_intent_tokens`` then merged by max score in the recall
|
||||
layer; task_intent typically scores higher because it's the
|
||||
retrieval anchor, but approach catches queries that match a step
|
||||
detail."""
|
||||
|
||||
key_insight: str | None = None
|
||||
"""≤ 40 tokens; pivotal strategy shift, optional."""
|
||||
|
||||
md_path: str
|
||||
content_sha256: str
|
||||
"""SHA-256 hex digest over the **content-bearing fields only** of
|
||||
the md entry — TaskIntent / Approach / KeyInsight sections plus
|
||||
the ``quality_score`` inline. Audit inline (owner_id /
|
||||
session_id / timestamp / parent_id) is NOT in the hash. See
|
||||
:attr:`AgentCaseHandler.content_change_keys`."""
|
||||
|
||||
vector: Vector(_DIM) # type: ignore[valid-type]
|
||||
80
src/everos/infra/persistence/lancedb/tables/agent_skill.py
Normal file
80
src/everos/infra/persistence/lancedb/tables/agent_skill.py
Normal file
@ -0,0 +1,80 @@
|
||||
"""LanceDB ``agent_skill`` table schema.
|
||||
|
||||
Field set per 17_lancedb_tables_design.md §3.5. AgentSkill is a *named
|
||||
entity* rather than a daily-log entry — PK is ``<owner_id>_<skill_name>``
|
||||
(no date / seq), and same agent + same name is the same row (upsert).
|
||||
|
||||
``content`` is cascade-assembled from ``SKILL.md`` body plus every
|
||||
``references/*.md`` sibling; ``scripts/`` is not indexed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import ClassVar
|
||||
|
||||
from everos.core.persistence.lancedb import BaseLanceTable, Vector
|
||||
|
||||
_DIM = 1024
|
||||
|
||||
|
||||
class AgentSkill(BaseLanceTable):
|
||||
"""One agent skill indexed in LanceDB."""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = "agent_skill"
|
||||
BM25_FIELDS: ClassVar[list[str]] = ["description_tokens", "content_tokens"]
|
||||
|
||||
id: str
|
||||
"""PK = ``<owner_id>_<skill_name>``."""
|
||||
|
||||
owner_id: str
|
||||
"""The owning ``agent_id``."""
|
||||
|
||||
owner_type: str
|
||||
"""Fixed ``"agent"`` for this table."""
|
||||
|
||||
app_id: str = "default"
|
||||
project_id: str = "default"
|
||||
"""App / project scope (default ``"default"``); cascade fills from md path."""
|
||||
|
||||
name: str
|
||||
"""Skill identifier; half of the PK."""
|
||||
|
||||
description: str
|
||||
"""When-to-use / purpose — original surface form (Tier-1 ad copy)."""
|
||||
|
||||
description_tokens: str
|
||||
"""App-layer pre-tokenised ``description`` — BM25 main field
|
||||
(whitespace tokenizer); display goes through ``description``."""
|
||||
|
||||
content: str
|
||||
"""Cascade-assembled body: ``SKILL.md`` main text concatenated with
|
||||
every ``references/*.md`` sibling. ``scripts/`` files are excluded."""
|
||||
|
||||
content_tokens: str
|
||||
"""App-layer pre-tokenised ``content`` (secondary BM25 field).
|
||||
Tokenised by cascade when assembling ``content`` from md sources."""
|
||||
|
||||
confidence: float
|
||||
"""0.0–1.0; LLM-emitted confidence in the skill."""
|
||||
|
||||
maturity_score: float
|
||||
"""0.0–1.0; LLM-emitted maturity score. The retrieval-time threshold
|
||||
(``maturity_threshold``) lives in MemorizeConfig, not in this row."""
|
||||
|
||||
source_case_ids: list[str]
|
||||
"""AgentCase ids that fed into this skill's synthesis (lineage)."""
|
||||
|
||||
cluster_id: str | None = None
|
||||
"""Optional MemScene clustering tag."""
|
||||
|
||||
md_path: str
|
||||
content_sha256: str
|
||||
"""SHA-256 hex digest over the **content-bearing fields only** of
|
||||
the skill: ``name`` / ``description`` (frontmatter) + SKILL.md
|
||||
body + concatenated references content + ``confidence`` /
|
||||
``maturity_score``. Cascade handler diffs by this digest to skip
|
||||
re-upsert + re-embed when neither retrieval-anchor text nor scores
|
||||
changed (e.g. the watcher fires for unrelated stat updates). See
|
||||
:attr:`AgentSkillHandler.content_change_keys`."""
|
||||
|
||||
vector: Vector(_DIM) # type: ignore[valid-type]
|
||||
62
src/everos/infra/persistence/lancedb/tables/atomic_fact.py
Normal file
62
src/everos/infra/persistence/lancedb/tables/atomic_fact.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""LanceDB ``atomic_fact`` table schema.
|
||||
|
||||
Field set per 17_lancedb_tables_design.md §3.2. Each row carries one
|
||||
atomic fact extracted by the algo layer; the parent is always the source
|
||||
MemCell — recorded via ``parent_type`` / ``parent_id``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import ClassVar
|
||||
|
||||
from everos.core.persistence.lancedb import BaseLanceTable, Vector
|
||||
|
||||
from ._parent_type import ParentType
|
||||
|
||||
_DIM = 1024
|
||||
|
||||
|
||||
class AtomicFact(BaseLanceTable):
|
||||
"""One atomic fact indexed in LanceDB."""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = "atomic_fact"
|
||||
BM25_FIELDS: ClassVar[list[str]] = ["fact_tokens"]
|
||||
|
||||
id: str
|
||||
"""PK = ``<owner_id>_<entry_id>``."""
|
||||
|
||||
entry_id: str
|
||||
"""md-side seq id ``af_<YYYYMMDD>_<NNNN>``."""
|
||||
|
||||
owner_id: str
|
||||
owner_type: str
|
||||
app_id: str = "default"
|
||||
project_id: str = "default"
|
||||
"""App / project scope (default ``"default"``); cascade fills from md path."""
|
||||
session_id: str
|
||||
timestamp: _dt.datetime
|
||||
|
||||
parent_type: str = ParentType.MEMCELL.value
|
||||
"""Source pointer — always :attr:`ParentType.MEMCELL` for atomic fact."""
|
||||
|
||||
parent_id: str
|
||||
"""Source memcell id."""
|
||||
sender_ids: list[str]
|
||||
fact: str
|
||||
"""Atomic fact text — original surface form (returned for display)."""
|
||||
|
||||
fact_tokens: str
|
||||
"""App-layer pre-tokenised ``fact`` text — space-joined tokens.
|
||||
BM25 index is built on this column (whitespace tokenizer);
|
||||
``fact`` itself is what callers display."""
|
||||
|
||||
md_path: str
|
||||
content_sha256: str
|
||||
"""SHA-256 hex digest over the **content-bearing fields only** of
|
||||
the md entry (per :attr:`AtomicFactHandler.content_change_keys`).
|
||||
Matching digest → skip re-upsert + re-embed. Audit inline fields
|
||||
(owner_id / session_id / timestamp / parent_id / sender_ids) are
|
||||
NOT in the hash."""
|
||||
|
||||
vector: Vector(_DIM) # type: ignore[valid-type]
|
||||
78
src/everos/infra/persistence/lancedb/tables/episode.py
Normal file
78
src/everos/infra/persistence/lancedb/tables/episode.py
Normal file
@ -0,0 +1,78 @@
|
||||
"""LanceDB ``episode`` table schema.
|
||||
|
||||
Field set is fixed by the LanceDB tables design spec. Rows are populated
|
||||
by the cascade daemon from ``users/<owner_id>/episodes/episode-<YYYY-MM-DD>.md``
|
||||
and from ``agents/<owner_id>/episodes/...`` symmetrically.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import ClassVar
|
||||
|
||||
from everos.core.persistence.lancedb import BaseLanceTable, Vector
|
||||
|
||||
from ._parent_type import ParentType
|
||||
|
||||
# Vector dimension is settings-managed at runtime; the class-level
|
||||
# constant pins the schema dim used at table creation.
|
||||
_DIM = 1024
|
||||
|
||||
|
||||
class Episode(BaseLanceTable):
|
||||
"""One episode record indexed in LanceDB."""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = "episode"
|
||||
BM25_FIELDS: ClassVar[list[str]] = ["episode_tokens"]
|
||||
|
||||
id: str
|
||||
"""PK = ``<owner_id>_<entry_id>`` (scalar PK)."""
|
||||
|
||||
entry_id: str
|
||||
"""md-side seq id ``ep_<YYYYMMDD>_<NNNN>`` (cascade reverse-lookup)."""
|
||||
|
||||
owner_id: str
|
||||
owner_type: str
|
||||
app_id: str = "default"
|
||||
project_id: str = "default"
|
||||
"""App / project scope (default ``"default"``); cascade fills from md path."""
|
||||
session_id: str
|
||||
timestamp: _dt.datetime
|
||||
|
||||
parent_type: str = ParentType.MEMCELL.value
|
||||
"""Source pointer — always :attr:`ParentType.MEMCELL` for episode."""
|
||||
|
||||
parent_id: str
|
||||
"""Source memcell id. The pipeline knows the memcell currently being
|
||||
processed and writes its id into the md entry's inline block; the
|
||||
cascade handler reads it back. The new everalgo Episode type no
|
||||
longer emits ``parent_id`` itself (collapsed to caller-managed),
|
||||
so this is filled entirely from everos's engineering context."""
|
||||
|
||||
sender_ids: list[str]
|
||||
"""Distinct ``role=user|assistant`` senders behind the episode."""
|
||||
|
||||
subject: str | None = None
|
||||
summary: str | None = None
|
||||
episode: str
|
||||
"""Full narrative text — original surface form (returned for display)."""
|
||||
|
||||
episode_tokens: str
|
||||
"""App-layer pre-tokenised ``episode`` text — space-joined tokens
|
||||
(e.g. produced by jieba). LanceDB FTS index is built on **this**
|
||||
column using a whitespace tokenizer; the original ``episode`` field
|
||||
is what callers display. Two-field BM25 scheme keeps tokenisation
|
||||
deterministic and provider-pluggable at the app layer."""
|
||||
|
||||
md_path: str
|
||||
content_sha256: str
|
||||
"""SHA-256 hex digest over the **content-bearing fields only** of the
|
||||
md entry (per :attr:`EpisodeHandler.content_change_keys`). On
|
||||
re-reconcile, a matching digest means none of the persistence /
|
||||
embedding-relevant fields changed — the entry is skipped (no
|
||||
re-upsert, no re-embed). Inline audit fields (owner_id /
|
||||
session_id / timestamp / parent_id / sender_ids) are intentionally
|
||||
NOT in the hash so editing them doesn't waste an embedding call.
|
||||
See ``16_cascade_impl_design.md`` §3.3."""
|
||||
|
||||
vector: Vector(_DIM) # type: ignore[valid-type]
|
||||
79
src/everos/infra/persistence/lancedb/tables/foresight.py
Normal file
79
src/everos/infra/persistence/lancedb/tables/foresight.py
Normal file
@ -0,0 +1,79 @@
|
||||
"""LanceDB ``foresight`` table schema.
|
||||
|
||||
Field set per 17_lancedb_tables_design.md §3.3. Each row carries a
|
||||
forward-looking inference about the user (intent window, planned
|
||||
action, projected need); ``start_time`` / ``end_time`` describe the
|
||||
window the foresight applies to.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import ClassVar
|
||||
|
||||
from everos.core.persistence.lancedb import BaseLanceTable, Vector
|
||||
|
||||
from ._parent_type import ParentType
|
||||
|
||||
_DIM = 1024
|
||||
|
||||
|
||||
class Foresight(BaseLanceTable):
|
||||
"""One foresight record indexed in LanceDB."""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = "foresight"
|
||||
BM25_FIELDS: ClassVar[list[str]] = ["foresight_tokens", "evidence_tokens"]
|
||||
|
||||
id: str
|
||||
"""PK = ``<owner_id>_<entry_id>``."""
|
||||
|
||||
entry_id: str
|
||||
"""md-side seq id ``fs_<YYYYMMDD>_<NNNN>``."""
|
||||
|
||||
owner_id: str
|
||||
owner_type: str
|
||||
app_id: str = "default"
|
||||
project_id: str = "default"
|
||||
"""App / project scope (default ``"default"``); cascade fills from md path."""
|
||||
session_id: str
|
||||
timestamp: _dt.datetime
|
||||
"""Foresight generation time."""
|
||||
|
||||
start_time: _dt.datetime | None = None
|
||||
"""Foresight coverage window start; tz-aware."""
|
||||
|
||||
end_time: _dt.datetime | None = None
|
||||
"""Foresight coverage window end; tz-aware."""
|
||||
|
||||
duration_days: int | None = None
|
||||
|
||||
parent_type: str = ParentType.MEMCELL.value
|
||||
"""Source pointer — always :attr:`ParentType.MEMCELL` for foresight."""
|
||||
|
||||
parent_id: str
|
||||
"""Source memcell id."""
|
||||
|
||||
sender_ids: list[str]
|
||||
foresight: str
|
||||
"""Foresight body — original surface form (returned for display)."""
|
||||
|
||||
foresight_tokens: str
|
||||
"""App-layer pre-tokenised ``foresight`` text — space-joined tokens.
|
||||
BM25 index is built on this column (whitespace tokenizer)."""
|
||||
|
||||
evidence: str | None = None
|
||||
"""Supporting evidence excerpt; may be empty."""
|
||||
|
||||
evidence_tokens: str | None = None
|
||||
"""App-layer pre-tokenised ``evidence`` (secondary BM25 field).
|
||||
``None`` whenever ``evidence`` is None."""
|
||||
|
||||
md_path: str
|
||||
content_sha256: str
|
||||
"""SHA-256 hex digest over the **content-bearing fields only** of
|
||||
the md entry — Foresight / Evidence sections plus the time-window
|
||||
inline fields (start_time / end_time / duration_days). Audit inline
|
||||
(owner_id / session_id / timestamp / parent_id / sender_ids) is NOT
|
||||
in the hash. See :attr:`ForesightHandler.content_change_keys`."""
|
||||
|
||||
vector: Vector(_DIM) # type: ignore[valid-type]
|
||||
68
src/everos/infra/persistence/lancedb/tables/user_profile.py
Normal file
68
src/everos/infra/persistence/lancedb/tables/user_profile.py
Normal file
@ -0,0 +1,68 @@
|
||||
"""LanceDB ``user_profile`` table schema.
|
||||
|
||||
Profile is a single-file kind: one ``users/<user_id>/user.md`` per
|
||||
user, replaced wholesale on edit (mirrors ``AgentSkill`` for the
|
||||
upsert/single-row contract). The LanceDB row is a typed projection
|
||||
of the md frontmatter that the cascade keeps in sync; it carries no
|
||||
vector / no BM25 because the recall surface is pure KV-by-owner
|
||||
(``fetch(owner_id)``) — when query-aware profile lookup ships later
|
||||
the schema will gain ``vector`` + ``*_tokens`` columns then.
|
||||
|
||||
``explicit_info`` / ``implicit_traits`` are heterogeneous LLM
|
||||
emissions (mostly small dicts mixed with strings) — LanceDB has no
|
||||
``list[dict]`` column type, so we stash them as JSON strings and
|
||||
unpack at the recall boundary into ``profile_data`` of the DTO.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import ClassVar
|
||||
|
||||
from everos.core.persistence.lancedb import BaseLanceTable
|
||||
|
||||
|
||||
class UserProfile(BaseLanceTable):
|
||||
"""One ``users/<user_id>/user.md`` indexed in LanceDB."""
|
||||
|
||||
TABLE_NAME: ClassVar[str] = "user_profile"
|
||||
# No BM25 columns: profile recall is KV-by-owner today.
|
||||
|
||||
id: str
|
||||
"""PK = ``owner_id`` (one row per user)."""
|
||||
|
||||
owner_id: str
|
||||
owner_type: str
|
||||
"""Always ``"user"`` for this schema; agent-side profiles would
|
||||
live in a sibling table once that schema lands."""
|
||||
|
||||
app_id: str = "default"
|
||||
project_id: str = "default"
|
||||
"""App / project scope (default ``"default"``); cascade fills from md path."""
|
||||
|
||||
summary: str
|
||||
"""Free-form one-paragraph user summary (retrieval anchor for the
|
||||
future query-aware lookup; today returned verbatim to the caller)."""
|
||||
|
||||
explicit_info_json: str
|
||||
"""JSON-serialised ``list[Any]`` — the algo's verbatim evidence
|
||||
bucket. Stored as a string because LanceDB has no
|
||||
``list[dict]`` column type. The recaller json-decodes it back into
|
||||
``profile_data['explicit_info']`` at the DTO boundary."""
|
||||
|
||||
implicit_traits_json: str
|
||||
"""Same shape as :attr:`explicit_info_json`, for the LLM-inferred
|
||||
preference bucket."""
|
||||
|
||||
profile_timestamp_ms: int
|
||||
"""Algo-emitted profile timestamp (ms epoch) — pinned to the
|
||||
timestamp of the freshest MemCell that fed into the synthesis.
|
||||
Mirrored from :attr:`UserProfileFrontmatter.profile_timestamp_ms`
|
||||
so downstream code can compare freshness without re-reading md."""
|
||||
|
||||
md_path: str
|
||||
content_sha256: str
|
||||
"""SHA-256 over the content-bearing frontmatter fields (summary +
|
||||
explicit_info_json + implicit_traits_json). Matches → cascade
|
||||
skips re-upsert. ``profile_timestamp_ms`` is intentionally not in
|
||||
the hash: it drifts with every synthesis even when the underlying
|
||||
content is identical, and the LanceDB row treats it as audit."""
|
||||
73
src/everos/infra/persistence/markdown/__init__.py
Normal file
73
src/everos/infra/persistence/markdown/__init__.py
Normal file
@ -0,0 +1,73 @@
|
||||
"""Markdown business persistence layer.
|
||||
|
||||
Sits on top of :mod:`everos.core.persistence.markdown` (atomic write +
|
||||
parse + frontmatter chassis) and provides:
|
||||
|
||||
* concrete frontmatter schemas under :mod:`.mds`
|
||||
* concrete business writers under :mod:`.writers`
|
||||
(``BaseDailyWriter`` + subclasses, ``AgentSkillWriter``,
|
||||
``ProfileWriter``)
|
||||
* concrete business readers under :mod:`.readers`
|
||||
(``BaseDailyReader`` + subclasses, ``AgentSkillReader``,
|
||||
``ProfileReader``)
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.infra.persistence.markdown import (
|
||||
BaseDailyWriter, BaseDailyReader,
|
||||
EpisodeWriter, EpisodeReader, EpisodeDailyFrontmatter,
|
||||
AtomicFactDailyFrontmatter,
|
||||
ForesightDailyFrontmatter,
|
||||
AgentCaseDailyFrontmatter,
|
||||
AgentSkillFrontmatter, AgentSkillWriter, AgentSkillReader,
|
||||
ProfileWriter, ProfileReader,
|
||||
)
|
||||
|
||||
Outer layers MUST go through this top-level package because
|
||||
``infra.persistence.markdown.**`` (sub-packages) are forbidden to outer
|
||||
layers by import-linter.
|
||||
"""
|
||||
|
||||
from .mds import AgentCaseDailyFrontmatter as AgentCaseDailyFrontmatter
|
||||
from .mds import AgentSkillFrontmatter as AgentSkillFrontmatter
|
||||
from .mds import AtomicFactDailyFrontmatter as AtomicFactDailyFrontmatter
|
||||
from .mds import EpisodeDailyFrontmatter as EpisodeDailyFrontmatter
|
||||
from .mds import ForesightDailyFrontmatter as ForesightDailyFrontmatter
|
||||
from .mds import UserProfileFrontmatter as UserProfileFrontmatter
|
||||
from .readers import AgentCaseReader as AgentCaseReader
|
||||
from .readers import AgentSkillReader as AgentSkillReader
|
||||
from .readers import AtomicFactReader as AtomicFactReader
|
||||
from .readers import BaseDailyReader as BaseDailyReader
|
||||
from .readers import EpisodeReader as EpisodeReader
|
||||
from .readers import ForesightReader as ForesightReader
|
||||
from .readers import ProfileReader as ProfileReader
|
||||
from .writers import AgentCaseWriter as AgentCaseWriter
|
||||
from .writers import AgentSkillWriter as AgentSkillWriter
|
||||
from .writers import AtomicFactWriter as AtomicFactWriter
|
||||
from .writers import BaseDailyWriter as BaseDailyWriter
|
||||
from .writers import EpisodeWriter as EpisodeWriter
|
||||
from .writers import ForesightWriter as ForesightWriter
|
||||
from .writers import ProfileWriter as ProfileWriter
|
||||
|
||||
__all__ = [
|
||||
"AgentCaseDailyFrontmatter",
|
||||
"AgentCaseReader",
|
||||
"AgentCaseWriter",
|
||||
"AgentSkillFrontmatter",
|
||||
"AgentSkillReader",
|
||||
"AgentSkillWriter",
|
||||
"AtomicFactDailyFrontmatter",
|
||||
"AtomicFactReader",
|
||||
"AtomicFactWriter",
|
||||
"BaseDailyReader",
|
||||
"BaseDailyWriter",
|
||||
"EpisodeDailyFrontmatter",
|
||||
"EpisodeReader",
|
||||
"EpisodeWriter",
|
||||
"ForesightDailyFrontmatter",
|
||||
"ForesightReader",
|
||||
"ForesightWriter",
|
||||
"ProfileReader",
|
||||
"ProfileWriter",
|
||||
"UserProfileFrontmatter",
|
||||
]
|
||||
40
src/everos/infra/persistence/markdown/mds/__init__.py
Normal file
40
src/everos/infra/persistence/markdown/mds/__init__.py
Normal file
@ -0,0 +1,40 @@
|
||||
"""Business markdown frontmatter schemas (mds = "markdown schemas").
|
||||
|
||||
Each business record kind that is stored as markdown gets a concrete
|
||||
frontmatter class here, subclassing one of the chassis classes from
|
||||
:mod:`everos.core.persistence.markdown`:
|
||||
|
||||
* :class:`UserScopedFrontmatter` for user-track records
|
||||
* :class:`AgentScopedFrontmatter` for agent-track records
|
||||
* :class:`BaseFrontmatter` for scope-agnostic records (rare)
|
||||
|
||||
Schemas drive path resolution via ClassVars; each storage strategy has
|
||||
its own conventions:
|
||||
|
||||
- **Daily-log** schemas declare ``ENTRY_ID_PREFIX`` (token in
|
||||
``<prefix>_<date>_<seq>``), ``DIR_NAME`` (sub-directory under
|
||||
``<scope>/<id>/``) and ``FILE_PREFIX`` (leading token of the daily
|
||||
filename joined with ``-<YYYY-MM-DD>.md``).
|
||||
- **Skill** schemas (:class:`AgentSkillFrontmatter`) pin the directory
|
||||
layout via five ``SKILL_*`` ClassVars (container / dir prefix /
|
||||
main filename / references / scripts).
|
||||
- **Profile** schemas declare ``PROFILE_FILENAME`` (``"user.md"`` /
|
||||
``"agent.md"`` / …) and inherit ``SCOPE_DIR`` from a scope mixin; no
|
||||
profile base class — the writer/reader pair is duck-typed.
|
||||
"""
|
||||
|
||||
from .agent_case import AgentCaseDailyFrontmatter as AgentCaseDailyFrontmatter
|
||||
from .agent_skill import AgentSkillFrontmatter as AgentSkillFrontmatter
|
||||
from .atomic_fact import AtomicFactDailyFrontmatter as AtomicFactDailyFrontmatter
|
||||
from .episode import EpisodeDailyFrontmatter as EpisodeDailyFrontmatter
|
||||
from .foresight import ForesightDailyFrontmatter as ForesightDailyFrontmatter
|
||||
from .profile import UserProfileFrontmatter as UserProfileFrontmatter
|
||||
|
||||
__all__ = [
|
||||
"AgentCaseDailyFrontmatter",
|
||||
"AgentSkillFrontmatter",
|
||||
"AtomicFactDailyFrontmatter",
|
||||
"EpisodeDailyFrontmatter",
|
||||
"ForesightDailyFrontmatter",
|
||||
"UserProfileFrontmatter",
|
||||
]
|
||||
37
src/everos/infra/persistence/markdown/mds/agent_case.py
Normal file
37
src/everos/infra/persistence/markdown/mds/agent_case.py
Normal file
@ -0,0 +1,37 @@
|
||||
"""AgentCase frontmatter — daily-log markdown for agent-scoped cases.
|
||||
|
||||
Path: ``agents/<scope_id>/.cases/agent_case-<YYYY-MM-DD>.md``.
|
||||
|
||||
The directory is dotfile-hidden (``.cases``) so users only see the
|
||||
curated ``agent_skills/`` view, not the raw per-task case log — same
|
||||
convention as ``.atomic_facts`` / ``.foresights``.
|
||||
|
||||
Each entry records one task an agent worked on: intent, approach taken,
|
||||
quality score, and an optional pivotal insight. A MemCell extracted on
|
||||
the agent's own execution log yields at most one AgentCase.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import ClassVar, Literal
|
||||
|
||||
from everos.core.persistence.markdown import (
|
||||
AgentScopedFrontmatter,
|
||||
DailyLogPathMixin,
|
||||
)
|
||||
|
||||
|
||||
class AgentCaseDailyFrontmatter(DailyLogPathMixin, AgentScopedFrontmatter):
|
||||
"""Frontmatter for ``agents/<scope>/.cases/agent_case-<YYYY-MM-DD>.md``."""
|
||||
|
||||
ENTRY_ID_PREFIX: ClassVar[str] = "ac"
|
||||
DIR_NAME: ClassVar[str] = ".cases"
|
||||
FILE_PREFIX: ClassVar[str] = "agent_case"
|
||||
|
||||
type: Literal["agent_case_daily"] = "agent_case_daily"
|
||||
file_type: Literal["agent_case_daily"] = "agent_case_daily"
|
||||
date: _dt.date
|
||||
entry_count: int = 0
|
||||
created_at: _dt.datetime | None = None
|
||||
last_appended_at: _dt.datetime | None = None
|
||||
63
src/everos/infra/persistence/markdown/mds/agent_skill.py
Normal file
63
src/everos/infra/persistence/markdown/mds/agent_skill.py
Normal file
@ -0,0 +1,63 @@
|
||||
"""AgentSkill frontmatter — single SKILL.md inside a skill directory.
|
||||
|
||||
Path: ``agents/<scope_id>/skills/skill_<name>/SKILL.md`` (plus sibling
|
||||
``references/*.md`` and ``scripts/*.<ext>`` files that are not part of
|
||||
the frontmatter contract).
|
||||
|
||||
Skills are *named entities* rather than daily-log entries: the
|
||||
LanceDB primary key is ``<owner_id>_<skill_name>`` (no date / seq).
|
||||
Upserts replace the file wholesale; the cascade daemon recomputes the
|
||||
``content`` index column by concatenating ``SKILL.md`` body with every
|
||||
``references/*.md`` sibling.
|
||||
|
||||
Five directory-shape ClassVars pin the layout in one place so the
|
||||
writer / reader pair reads off them — no duplicated string literals.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import ClassVar, Literal
|
||||
|
||||
from everos.core.persistence.markdown import (
|
||||
AgentScopedFrontmatter,
|
||||
SkillPathMixin,
|
||||
)
|
||||
|
||||
|
||||
class AgentSkillFrontmatter(SkillPathMixin, AgentScopedFrontmatter):
|
||||
"""Frontmatter for ``agents/<scope>/skills/skill_<name>/SKILL.md``."""
|
||||
|
||||
SKILLS_CONTAINER_NAME: ClassVar[str] = "skills"
|
||||
SKILL_DIR_PREFIX: ClassVar[str] = "skill_"
|
||||
SKILL_MAIN_FILENAME: ClassVar[str] = "SKILL.md"
|
||||
SKILL_REFERENCES_DIR_NAME: ClassVar[str] = "references"
|
||||
SKILL_SCRIPTS_DIR_NAME: ClassVar[str] = "scripts"
|
||||
|
||||
type: Literal["agent_skill"] = "agent_skill"
|
||||
|
||||
name: str
|
||||
"""Skill identifier — also the directory suffix
|
||||
(``skills/skill_<name>/``). Keep snake_case so it is filesystem-safe
|
||||
and ID-stable."""
|
||||
|
||||
description: str
|
||||
"""One-line summary surfaced at Tier-1 prompt injection. Short — the
|
||||
agent's startup-time scanner reads ``(name, description)`` for every
|
||||
skill, so the token budget is tight."""
|
||||
|
||||
confidence: float
|
||||
"""LLM-emitted confidence in the skill's correctness, 0.0–1.0."""
|
||||
|
||||
maturity_score: float
|
||||
"""LLM-emitted maturity score, 0.0–1.0. The retrieval-time threshold
|
||||
(``maturity_threshold``) lives in MemorizeConfig, not on this file."""
|
||||
|
||||
source_case_ids: list[str] = []
|
||||
"""AgentCase ids that fed into this skill's synthesis (lineage)."""
|
||||
|
||||
cluster_id: str | None = None
|
||||
"""Optional MemScene clustering tag; may be unset early on."""
|
||||
|
||||
created_at: _dt.datetime | None = None
|
||||
updated_at: _dt.datetime | None = None
|
||||
38
src/everos/infra/persistence/markdown/mds/atomic_fact.py
Normal file
38
src/everos/infra/persistence/markdown/mds/atomic_fact.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""AtomicFact frontmatter — daily-log markdown for user-scoped atomic facts.
|
||||
|
||||
Path: ``users/<scope_id>/.atomic_facts/atomic_fact-<YYYY-MM-DD>.md``.
|
||||
|
||||
The directory is dot-prefixed so it is hidden from end users (same
|
||||
convention as ``.index``); atomic facts are framework-internal derived md,
|
||||
not material the user is expected to read by hand.
|
||||
|
||||
Each entry carries one atomic fact extracted by the algo layer; the fact
|
||||
always hangs off the source MemCell (see ``parent_type`` in each entry's
|
||||
inline fields — handled at the StructuredEntry layer, not on the
|
||||
file-level frontmatter).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import ClassVar, Literal
|
||||
|
||||
from everos.core.persistence.markdown import (
|
||||
DailyLogPathMixin,
|
||||
UserScopedFrontmatter,
|
||||
)
|
||||
|
||||
|
||||
class AtomicFactDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
|
||||
"""Frontmatter for ``users/<scope>/.atomic_facts/atomic_fact-<YYYY-MM-DD>.md``."""
|
||||
|
||||
ENTRY_ID_PREFIX: ClassVar[str] = "af"
|
||||
DIR_NAME: ClassVar[str] = ".atomic_facts"
|
||||
FILE_PREFIX: ClassVar[str] = "atomic_fact"
|
||||
|
||||
type: Literal["atomic_fact_daily"] = "atomic_fact_daily"
|
||||
file_type: Literal["atomic_fact_daily"] = "atomic_fact_daily"
|
||||
date: _dt.date
|
||||
entry_count: int = 0
|
||||
created_at: _dt.datetime | None = None
|
||||
last_appended_at: _dt.datetime | None = None
|
||||
33
src/everos/infra/persistence/markdown/mds/episode.py
Normal file
33
src/everos/infra/persistence/markdown/mds/episode.py
Normal file
@ -0,0 +1,33 @@
|
||||
"""Episode frontmatter — daily-log markdown for user-scoped episodes.
|
||||
|
||||
Path: ``users/<scope_id>/episodes/episode-<YYYY-MM-DD>.md``.
|
||||
|
||||
This milestone uses ``session_id`` as the scope key (since owner inference
|
||||
is out of scope). When owner inference lands the scope key will switch to
|
||||
``owner_id`` while the schema stays compatible.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import ClassVar, Literal
|
||||
|
||||
from everos.core.persistence.markdown import (
|
||||
DailyLogPathMixin,
|
||||
UserScopedFrontmatter,
|
||||
)
|
||||
|
||||
|
||||
class EpisodeDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
|
||||
"""Frontmatter for ``users/<scope>/episodes/episode-<YYYY-MM-DD>.md``."""
|
||||
|
||||
ENTRY_ID_PREFIX: ClassVar[str] = "ep"
|
||||
DIR_NAME: ClassVar[str] = "episodes"
|
||||
FILE_PREFIX: ClassVar[str] = "episode"
|
||||
|
||||
type: Literal["episode_daily"] = "episode_daily"
|
||||
file_type: Literal["episode_daily"] = "episode_daily"
|
||||
date: _dt.date
|
||||
entry_count: int = 0
|
||||
created_at: _dt.datetime | None = None
|
||||
last_appended_at: _dt.datetime | None = None
|
||||
38
src/everos/infra/persistence/markdown/mds/foresight.py
Normal file
38
src/everos/infra/persistence/markdown/mds/foresight.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""Foresight frontmatter — daily-log markdown for user-scoped foresights.
|
||||
|
||||
Path: ``users/<scope_id>/.foresights/foresight-<YYYY-MM-DD>.md``.
|
||||
|
||||
The directory is dot-prefixed so it is hidden from end users (same
|
||||
convention as ``.index``); foresights are framework-internal derived md,
|
||||
not material the user is expected to read by hand.
|
||||
|
||||
Each entry carries a forward-looking inference about the user (intent
|
||||
window, planned action, projected need) with ``start_time`` /
|
||||
``end_time`` describing the covered time range. ``parent_type`` always
|
||||
points back to a MemCell.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from typing import ClassVar, Literal
|
||||
|
||||
from everos.core.persistence.markdown import (
|
||||
DailyLogPathMixin,
|
||||
UserScopedFrontmatter,
|
||||
)
|
||||
|
||||
|
||||
class ForesightDailyFrontmatter(DailyLogPathMixin, UserScopedFrontmatter):
|
||||
"""Frontmatter for ``users/<scope>/.foresights/foresight-<YYYY-MM-DD>.md``."""
|
||||
|
||||
ENTRY_ID_PREFIX: ClassVar[str] = "fs"
|
||||
DIR_NAME: ClassVar[str] = ".foresights"
|
||||
FILE_PREFIX: ClassVar[str] = "foresight"
|
||||
|
||||
type: Literal["foresight_daily"] = "foresight_daily"
|
||||
file_type: Literal["foresight_daily"] = "foresight_daily"
|
||||
date: _dt.date
|
||||
entry_count: int = 0
|
||||
created_at: _dt.datetime | None = None
|
||||
last_appended_at: _dt.datetime | None = None
|
||||
40
src/everos/infra/persistence/markdown/mds/profile.py
Normal file
40
src/everos/infra/persistence/markdown/mds/profile.py
Normal file
@ -0,0 +1,40 @@
|
||||
"""UserProfile frontmatter — single-file profile markdown for users.
|
||||
|
||||
Path: ``users/<user_id>/user.md``.
|
||||
|
||||
Carries the LLM-synthesised user profile: a free-form ``summary`` plus the
|
||||
two evidence buckets emitted by :class:`everalgo.user_memory.ProfileExtractor`
|
||||
(``explicit_info`` / ``implicit_traits``). ``profile_timestamp_ms``
|
||||
mirrors :attr:`everalgo.types.Profile.timestamp` so the
|
||||
``extract_user_profile`` strategy can compare per-user freshness against
|
||||
cluster ``last_ts`` without re-parsing the body.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, ClassVar, Literal
|
||||
|
||||
from everos.core.persistence.markdown import ProfilePathMixin, UserScopedFrontmatter
|
||||
|
||||
|
||||
class UserProfileFrontmatter(ProfilePathMixin, UserScopedFrontmatter):
|
||||
"""Frontmatter for ``users/<user_id>/user.md``."""
|
||||
|
||||
PROFILE_FILENAME: ClassVar[str] = "user.md"
|
||||
|
||||
type: Literal["user_profile"] = "user_profile"
|
||||
|
||||
summary: str = ""
|
||||
"""Free-form one-paragraph summary of the user — the retrieval anchor."""
|
||||
|
||||
explicit_info: list[Any] = []
|
||||
"""Algo-side ``explicit_info`` bucket (verbatim facts the user stated)."""
|
||||
|
||||
implicit_traits: list[Any] = []
|
||||
"""Algo-side ``implicit_traits`` bucket (LLM-inferred preferences)."""
|
||||
|
||||
profile_timestamp_ms: int = 0
|
||||
"""Algo-emitted profile timestamp (ms epoch); equals the timestamp of
|
||||
the most recent MemCell that fed into the synthesis. Compared with
|
||||
:attr:`everos.infra.persistence.sqlite.Cluster.last_ts_ms` to decide
|
||||
whether a cluster is fresh enough to drive a profile re-extraction."""
|
||||
49
src/everos/infra/persistence/markdown/readers/__init__.py
Normal file
49
src/everos/infra/persistence/markdown/readers/__init__.py
Normal file
@ -0,0 +1,49 @@
|
||||
"""Business markdown readers — symmetric with the writers.
|
||||
|
||||
Daily-log markdown is parsed via :class:`MarkdownReader` from ``core``
|
||||
(the base reader returns frontmatter dict + body + entry markers, all
|
||||
schema-agnostic). Reader classes here add the **business-aware
|
||||
locator** layer:
|
||||
|
||||
* :class:`BaseDailyReader` + subclasses — bind a daily-log schema,
|
||||
resolve ``(scope_id, date)`` to a file, locate entries by id,
|
||||
and optionally upgrade to :class:`StructuredEntry`. Symmetric
|
||||
with :class:`BaseDailyWriter`.
|
||||
* :class:`AgentSkillReader` — reads ``SKILL.md`` and parses the
|
||||
frontmatter into the caller-supplied ``AgentSkillFrontmatter``
|
||||
subclass; also reads individual reference / script files.
|
||||
* :class:`ProfileReader` — reads a fixed-name profile file
|
||||
(``user.md`` / ``agent.md`` / ``soul.md`` / …) and parses its
|
||||
frontmatter into the caller-supplied schema.
|
||||
|
||||
By design, no batch / list APIs live here: bulk enumeration for
|
||||
prompt-budget or cross-record queries goes through sqlite/lancedb
|
||||
(see the cascade daemon's index sync), not a markdown directory walk.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.infra.persistence.markdown.readers import (
|
||||
BaseDailyReader,
|
||||
EpisodeReader,
|
||||
AgentSkillReader,
|
||||
ProfileReader,
|
||||
)
|
||||
"""
|
||||
|
||||
from .agent_case_reader import AgentCaseReader as AgentCaseReader
|
||||
from .agent_skill_reader import AgentSkillReader as AgentSkillReader
|
||||
from .atomic_fact_reader import AtomicFactReader as AtomicFactReader
|
||||
from .base import BaseDailyReader as BaseDailyReader
|
||||
from .episode_reader import EpisodeReader as EpisodeReader
|
||||
from .foresight_reader import ForesightReader as ForesightReader
|
||||
from .profile_reader import ProfileReader as ProfileReader
|
||||
|
||||
__all__ = [
|
||||
"AgentCaseReader",
|
||||
"AgentSkillReader",
|
||||
"AtomicFactReader",
|
||||
"BaseDailyReader",
|
||||
"EpisodeReader",
|
||||
"ForesightReader",
|
||||
"ProfileReader",
|
||||
]
|
||||
@ -0,0 +1,31 @@
|
||||
"""AgentCase daily-log reader — symmetric with :class:`AgentCaseWriter`."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from pathlib import Path
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
|
||||
from ..mds import AgentCaseDailyFrontmatter
|
||||
from .base import BaseDailyReader
|
||||
|
||||
|
||||
class AgentCaseReader(BaseDailyReader):
|
||||
"""Read agent-case daily-log files."""
|
||||
|
||||
schema = AgentCaseDailyFrontmatter
|
||||
|
||||
def __init__(self, root: MemoryRoot) -> None:
|
||||
super().__init__(root)
|
||||
|
||||
def path_for(
|
||||
self,
|
||||
agent_id: str,
|
||||
date: _dt.date | None = None,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Resolve the agent-case daily-log path under the <app>/<project> prefix."""
|
||||
return super().path_for(agent_id, date, app_id=app_id, project_id=project_id)
|
||||
@ -0,0 +1,161 @@
|
||||
"""AgentSkillReader — typed read for the AgentSkill directory layout.
|
||||
|
||||
Pairs with :class:`AgentSkillWriter`:
|
||||
|
||||
- :meth:`read_main` reads ``SKILL.md`` and returns the caller's
|
||||
:class:`AgentSkillFrontmatter` subclass instance + the Tier-2 body, so
|
||||
the caller never deals with raw dicts.
|
||||
- :meth:`read_reference` / :meth:`read_script` are plain text reads;
|
||||
no frontmatter, no schema.
|
||||
|
||||
All three return ``None`` when the target is missing — readers do not
|
||||
raise on absence, since "skill not yet created" is a normal state for
|
||||
the upsert-style workflow. Callers that need to distinguish "missing"
|
||||
from "empty body" check for ``None`` explicitly.
|
||||
|
||||
Path resolution mirrors :class:`AgentSkillWriter` and reads the same
|
||||
ClassVars off :class:`AgentSkillFrontmatter`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TypeVar
|
||||
|
||||
import anyio
|
||||
|
||||
from everos.core.persistence import MarkdownReader, MemoryRoot
|
||||
|
||||
from ..mds import AgentSkillFrontmatter
|
||||
|
||||
T = TypeVar("T", bound=AgentSkillFrontmatter)
|
||||
|
||||
|
||||
class AgentSkillReader:
|
||||
"""Single-skill reader for the directory + progressive-disclosure layout."""
|
||||
|
||||
def __init__(self, root: MemoryRoot) -> None:
|
||||
self._root = root
|
||||
|
||||
# ── Public API ────────────────────────────────────────────────────────
|
||||
|
||||
async def read_main(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
*,
|
||||
schema: type[T],
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> tuple[T, str] | None:
|
||||
"""Read ``SKILL.md`` and parse its frontmatter into ``schema``.
|
||||
|
||||
Args:
|
||||
schema: Concrete :class:`AgentSkillFrontmatter` subclass. The
|
||||
frontmatter dict is validated against this schema via
|
||||
:meth:`pydantic.BaseModel.model_validate`; extra fields
|
||||
ride along (chassis sets ``extra="allow"``).
|
||||
|
||||
Returns:
|
||||
``(frontmatter, body)`` on success, ``None`` if the file
|
||||
does not exist. ``body`` is the raw text after the closing
|
||||
``---``; the trailing newline added by :class:`AgentSkillWriter`
|
||||
is stripped to give the *logical* body back.
|
||||
"""
|
||||
path = self._main_path(agent_id, skill_name, app_id, project_id)
|
||||
if not await anyio.Path(path).is_file():
|
||||
return None
|
||||
parsed = await MarkdownReader.read(path)
|
||||
frontmatter = schema.model_validate(parsed.frontmatter)
|
||||
body = parsed.body.rstrip("\n")
|
||||
return frontmatter, body
|
||||
|
||||
async def read_reference(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
reference_name: str,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> str | None:
|
||||
"""Read ``references/<reference_name>.md`` verbatim, ``None`` if absent."""
|
||||
path = self._reference_path(
|
||||
agent_id, skill_name, reference_name, app_id, project_id
|
||||
)
|
||||
apath = anyio.Path(path)
|
||||
if not await apath.is_file():
|
||||
return None
|
||||
text = await apath.read_text(encoding="utf-8")
|
||||
return text.rstrip("\n")
|
||||
|
||||
async def read_script(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
script_filename: str,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> str | None:
|
||||
"""Read ``scripts/<script_filename>`` verbatim, ``None`` if absent.
|
||||
|
||||
Reading ≠ executing — this only returns the source text.
|
||||
Sandboxing / exec-policy decisions belong to the caller.
|
||||
"""
|
||||
path = self._script_path(
|
||||
agent_id, skill_name, script_filename, app_id, project_id
|
||||
)
|
||||
apath = anyio.Path(path)
|
||||
if not await apath.is_file():
|
||||
return None
|
||||
text = await apath.read_text(encoding="utf-8")
|
||||
return text.rstrip("\n")
|
||||
|
||||
# ── Internals — same shape as AgentSkillWriter ────────────────────────────
|
||||
|
||||
def _skill_dir(
|
||||
self, agent_id: str, skill_name: str, app_id: str, project_id: str
|
||||
) -> Path:
|
||||
return (
|
||||
self._root.agents_dir(app_id, project_id)
|
||||
/ agent_id
|
||||
/ AgentSkillFrontmatter.SKILLS_CONTAINER_NAME
|
||||
/ f"{AgentSkillFrontmatter.SKILL_DIR_PREFIX}{skill_name}"
|
||||
)
|
||||
|
||||
def _main_path(
|
||||
self, agent_id: str, skill_name: str, app_id: str, project_id: str
|
||||
) -> Path:
|
||||
return (
|
||||
self._skill_dir(agent_id, skill_name, app_id, project_id)
|
||||
/ AgentSkillFrontmatter.SKILL_MAIN_FILENAME
|
||||
)
|
||||
|
||||
def _reference_path(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
reference_name: str,
|
||||
app_id: str,
|
||||
project_id: str,
|
||||
) -> Path:
|
||||
return (
|
||||
self._skill_dir(agent_id, skill_name, app_id, project_id)
|
||||
/ AgentSkillFrontmatter.SKILL_REFERENCES_DIR_NAME
|
||||
/ f"{reference_name}.md"
|
||||
)
|
||||
|
||||
def _script_path(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
script_filename: str,
|
||||
app_id: str,
|
||||
project_id: str,
|
||||
) -> Path:
|
||||
return (
|
||||
self._skill_dir(agent_id, skill_name, app_id, project_id)
|
||||
/ AgentSkillFrontmatter.SKILL_SCRIPTS_DIR_NAME
|
||||
/ script_filename
|
||||
)
|
||||
@ -0,0 +1,31 @@
|
||||
"""AtomicFact daily-log reader — symmetric with :class:`AtomicFactWriter`."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from pathlib import Path
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
|
||||
from ..mds import AtomicFactDailyFrontmatter
|
||||
from .base import BaseDailyReader
|
||||
|
||||
|
||||
class AtomicFactReader(BaseDailyReader):
|
||||
"""Read atomic-fact daily-log files."""
|
||||
|
||||
schema = AtomicFactDailyFrontmatter
|
||||
|
||||
def __init__(self, root: MemoryRoot) -> None:
|
||||
super().__init__(root)
|
||||
|
||||
def path_for(
|
||||
self,
|
||||
owner_id: str,
|
||||
date: _dt.date | None = None,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Resolve the atomic-fact daily-log path under the <app>/<project> prefix."""
|
||||
return super().path_for(owner_id, date, app_id=app_id, project_id=project_id)
|
||||
177
src/everos/infra/persistence/markdown/readers/base.py
Normal file
177
src/everos/infra/persistence/markdown/readers/base.py
Normal file
@ -0,0 +1,177 @@
|
||||
"""Base business reader for daily-log markdown files.
|
||||
|
||||
Symmetric to :class:`BaseDailyWriter`: reads the daily-log file for
|
||||
a given ``(scope_id, date)``, locates entries by id within it, and
|
||||
optionally upgrades them to :class:`StructuredEntry` so service-layer
|
||||
callers don't have to re-do that plumbing each time.
|
||||
|
||||
Subclass usage::
|
||||
|
||||
class _MemcellReader(BaseDailyReader):
|
||||
schema = UserMemcellDailyFrontmatter
|
||||
|
||||
reader = _MemcellReader(root)
|
||||
parsed = reader.read_for("u_jason") # today's file
|
||||
entry = reader.find_entry("u_jason", "umc_20260422_0001")
|
||||
structured = reader.find_structured("u_jason", entry.id)
|
||||
|
||||
The reader does **not** typed-parse the file's frontmatter dict — the
|
||||
schema is used only for path resolution (matching what the appender
|
||||
writes). Frontmatter validation belongs to higher-level callers that
|
||||
know the business rules.
|
||||
|
||||
Path resolution is identical to :class:`BaseDailyWriter` (same
|
||||
``SCOPE_DIR`` / ``DIR_NAME`` / ``FILE_PREFIX`` ClassVars), so a
|
||||
reader and writer bound to the same schema agree on every path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from pathlib import Path
|
||||
from typing import ClassVar
|
||||
|
||||
import anyio
|
||||
|
||||
from everos.component.utils.datetime import today_with_timezone
|
||||
from everos.core.persistence import (
|
||||
BaseFrontmatter,
|
||||
Entry,
|
||||
EntryId,
|
||||
MarkdownReader,
|
||||
MemoryRoot,
|
||||
ParsedMarkdown,
|
||||
StructuredEntry,
|
||||
find_entry,
|
||||
)
|
||||
|
||||
|
||||
class BaseDailyReader:
|
||||
"""Single-record reader for daily-log markdown files.
|
||||
|
||||
Subclasses bind a :class:`BaseFrontmatter` subclass via the
|
||||
``schema`` ClassVar. The schema must declare ``SCOPE_DIR``,
|
||||
``DIR_NAME``, and ``FILE_PREFIX`` (same set the appender uses); no
|
||||
``ENTRY_ID_PREFIX`` requirement here because the reader takes the
|
||||
entry id from the caller, not the schema.
|
||||
"""
|
||||
|
||||
schema: ClassVar[type[BaseFrontmatter]] # subclass must declare
|
||||
|
||||
def __init__(self, root: MemoryRoot) -> None:
|
||||
schema = getattr(type(self), "schema", None)
|
||||
if schema is None:
|
||||
raise TypeError(
|
||||
f"{type(self).__name__} must declare a class-level ``schema`` attribute"
|
||||
)
|
||||
for attr in ("SCOPE_DIR", "DIR_NAME", "FILE_PREFIX"):
|
||||
if not getattr(schema, attr, None):
|
||||
raise TypeError(f"{schema.__name__} missing ClassVar {attr!r}")
|
||||
self._root = root
|
||||
|
||||
# ── Public API ────────────────────────────────────────────────────────
|
||||
|
||||
async def read_for(
|
||||
self,
|
||||
scope_id: str,
|
||||
date: _dt.date | None = None,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> ParsedMarkdown | None:
|
||||
"""Read the daily-log file for ``(scope_id, date)``.
|
||||
|
||||
Args:
|
||||
scope_id: ``user_id`` or ``agent_id``.
|
||||
date: Date bucket — defaults to today in the configured TZ.
|
||||
app_id: App scope segment (defaults to the ``"default"`` space).
|
||||
project_id: Project scope segment (defaults to ``"default"``).
|
||||
|
||||
Returns:
|
||||
:class:`ParsedMarkdown` (frontmatter dict + body + entries),
|
||||
or ``None`` when the file does not exist on disk. ``None``
|
||||
avoids forcing every caller to wrap reads in try/except —
|
||||
"no file yet" is a normal early state.
|
||||
"""
|
||||
path = self._resolve_path(
|
||||
scope_id, date or today_with_timezone(), app_id, project_id
|
||||
)
|
||||
if not await anyio.Path(path).is_file():
|
||||
return None
|
||||
return await MarkdownReader.read(path)
|
||||
|
||||
async def find_entry(
|
||||
self,
|
||||
scope_id: str,
|
||||
entry_id: str | EntryId,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Entry | None:
|
||||
"""Locate the entry with ``entry_id`` inside its daily-log file.
|
||||
|
||||
The date bucket is taken from the entry id (an :class:`EntryId`
|
||||
encodes its own date), so the caller doesn't pass a date.
|
||||
Returns ``None`` if either the file or the entry is missing.
|
||||
"""
|
||||
eid = entry_id if isinstance(entry_id, EntryId) else EntryId.parse(entry_id)
|
||||
eid_str = eid.format()
|
||||
parsed = await self.read_for(
|
||||
scope_id, eid.date, app_id=app_id, project_id=project_id
|
||||
)
|
||||
if parsed is None:
|
||||
return None
|
||||
return find_entry(parsed.body, eid_str)
|
||||
|
||||
async def find_structured(
|
||||
self,
|
||||
scope_id: str,
|
||||
entry_id: str | EntryId,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> StructuredEntry | None:
|
||||
"""Locate the entry and parse its body as audit-form data.
|
||||
|
||||
Sugar over :meth:`find_entry` + :meth:`Entry.as_structured`.
|
||||
Returns ``None`` if the entry is missing.
|
||||
"""
|
||||
entry = await self.find_entry(
|
||||
scope_id, entry_id, app_id=app_id, project_id=project_id
|
||||
)
|
||||
if entry is None:
|
||||
return None
|
||||
return entry.as_structured()
|
||||
|
||||
def path_for(
|
||||
self,
|
||||
scope_id: str,
|
||||
date: _dt.date | None = None,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Return the daily-log path for ``scope_id`` on ``date`` (today default).
|
||||
|
||||
Public counterpart of :meth:`_resolve_path` — symmetric with
|
||||
:meth:`BaseDailyWriter.path_for`. Does not check existence.
|
||||
"""
|
||||
return self._resolve_path(
|
||||
scope_id, date or today_with_timezone(), app_id, project_id
|
||||
)
|
||||
|
||||
# ── Internals ─────────────────────────────────────────────────────────
|
||||
|
||||
def _resolve_path(
|
||||
self, scope_id: str, date: _dt.date, app_id: str, project_id: str
|
||||
) -> Path:
|
||||
"""Build the daily-log path for ``scope_id`` on ``date``."""
|
||||
# SCOPE_DIR ("users" / "agents") names the matching MemoryRoot method,
|
||||
# which prepends the <app>/<project> business prefix.
|
||||
scope_dir = getattr(self._root, f"{self.schema.SCOPE_DIR}_dir")
|
||||
return (
|
||||
scope_dir(app_id, project_id)
|
||||
/ scope_id
|
||||
/ self.schema.DIR_NAME
|
||||
/ f"{self.schema.FILE_PREFIX}-{date.isoformat()}.md"
|
||||
)
|
||||
@ -0,0 +1,41 @@
|
||||
"""Episode daily-log reader — symmetric with :class:`EpisodeWriter`.
|
||||
|
||||
md is the source of truth for Episode memories; this reader gives
|
||||
cascade / search / verification scripts a typed locator instead of
|
||||
raw :class:`MarkdownReader` calls.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from pathlib import Path
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
|
||||
from ..mds import EpisodeDailyFrontmatter
|
||||
from .base import BaseDailyReader
|
||||
|
||||
|
||||
class EpisodeReader(BaseDailyReader):
|
||||
"""Read episode daily-log files."""
|
||||
|
||||
schema = EpisodeDailyFrontmatter
|
||||
|
||||
def __init__(self, root: MemoryRoot) -> None:
|
||||
super().__init__(root)
|
||||
|
||||
def path_for(
|
||||
self,
|
||||
owner_id: str,
|
||||
date: _dt.date | None = None,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Resolve the daily-log path for ``owner_id`` on ``date`` (today by default).
|
||||
|
||||
Mirrors :meth:`EpisodeWriter`'s path-resolution shape so callers
|
||||
can locate the file written for a given owner / day (under the
|
||||
``<app>/<project>`` prefix) without instantiating the writer.
|
||||
"""
|
||||
return super().path_for(owner_id, date, app_id=app_id, project_id=project_id)
|
||||
@ -0,0 +1,31 @@
|
||||
"""Foresight daily-log reader — symmetric with :class:`ForesightWriter`."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from pathlib import Path
|
||||
|
||||
from everos.core.persistence import MemoryRoot
|
||||
|
||||
from ..mds import ForesightDailyFrontmatter
|
||||
from .base import BaseDailyReader
|
||||
|
||||
|
||||
class ForesightReader(BaseDailyReader):
|
||||
"""Read foresight daily-log files."""
|
||||
|
||||
schema = ForesightDailyFrontmatter
|
||||
|
||||
def __init__(self, root: MemoryRoot) -> None:
|
||||
super().__init__(root)
|
||||
|
||||
def path_for(
|
||||
self,
|
||||
owner_id: str,
|
||||
date: _dt.date | None = None,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Resolve the foresight daily-log path under the <app>/<project> prefix."""
|
||||
return super().path_for(owner_id, date, app_id=app_id, project_id=project_id)
|
||||
@ -0,0 +1,96 @@
|
||||
"""ProfileReader — typed read for the single-file profile layout.
|
||||
|
||||
Pairs with :class:`ProfileWriter`. The schema (concrete profile
|
||||
frontmatter class) is supplied per call; the reader pulls
|
||||
``SCOPE_DIR`` + ``PROFILE_FILENAME`` ClassVars off it to build the
|
||||
path, then ``MarkdownReader.read`` + ``schema.model_validate`` give
|
||||
back a typed frontmatter instance plus the body string.
|
||||
|
||||
Returns ``None`` when the profile file does not exist — "not yet
|
||||
written" is a normal early state for the upsert-style workflow.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TypeVar
|
||||
|
||||
import anyio
|
||||
|
||||
from everos.core.persistence import BaseFrontmatter, MarkdownReader, MemoryRoot
|
||||
|
||||
T = TypeVar("T", bound=BaseFrontmatter)
|
||||
|
||||
|
||||
class ProfileReader:
|
||||
"""Typed read for fixed-name profile markdown files."""
|
||||
|
||||
def __init__(self, root: MemoryRoot) -> None:
|
||||
self._root = root
|
||||
|
||||
# ── Public API ────────────────────────────────────────────────────────
|
||||
|
||||
async def read(
|
||||
self,
|
||||
scope_id: str,
|
||||
*,
|
||||
schema: type[T],
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> tuple[T, str] | None:
|
||||
"""Read the profile file and parse its frontmatter into ``schema``.
|
||||
|
||||
Args:
|
||||
scope_id: ``user_id`` or ``agent_id`` (must match the
|
||||
schema's scope mixin).
|
||||
schema: Concrete profile frontmatter class — must declare
|
||||
``SCOPE_DIR`` (via scope mixin) and ``PROFILE_FILENAME``.
|
||||
app_id: App scope segment (defaults to the ``"default"`` space).
|
||||
project_id: Project scope segment (defaults to ``"default"``).
|
||||
|
||||
Returns:
|
||||
``(frontmatter, body)`` on success; ``None`` if the file is
|
||||
missing. ``body`` is the raw text after the closing ``---``
|
||||
with the writer-added trailing newline stripped.
|
||||
"""
|
||||
path = self._resolve_path(scope_id, schema, app_id, project_id)
|
||||
if not await anyio.Path(path).is_file():
|
||||
return None
|
||||
parsed = await MarkdownReader.read(path)
|
||||
frontmatter = schema.model_validate(parsed.frontmatter)
|
||||
body = parsed.body.rstrip("\n")
|
||||
return frontmatter, body
|
||||
|
||||
def path_for(
|
||||
self,
|
||||
scope_id: str,
|
||||
*,
|
||||
schema: type[BaseFrontmatter],
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Return the profile path (no IO check)."""
|
||||
return self._resolve_path(scope_id, schema, app_id, project_id)
|
||||
|
||||
# ── Internals — same shape as ProfileWriter ───────────────────────────
|
||||
|
||||
def _resolve_path(
|
||||
self,
|
||||
scope_id: str,
|
||||
schema: type[BaseFrontmatter],
|
||||
app_id: str,
|
||||
project_id: str,
|
||||
) -> Path:
|
||||
scope_dir = getattr(schema, "SCOPE_DIR", "")
|
||||
filename = getattr(schema, "PROFILE_FILENAME", None)
|
||||
if not scope_dir:
|
||||
raise TypeError(
|
||||
f"{schema.__name__} missing ``SCOPE_DIR`` ClassVar — "
|
||||
"must inherit a scope mixin (UserScopedFrontmatter / "
|
||||
"AgentScopedFrontmatter)."
|
||||
)
|
||||
if not filename:
|
||||
raise TypeError(f"{schema.__name__} missing ``PROFILE_FILENAME`` ClassVar.")
|
||||
# SCOPE_DIR names the matching MemoryRoot method (<app>/<project> prefix).
|
||||
scope_root = getattr(self._root, f"{scope_dir}_dir")(app_id, project_id)
|
||||
return scope_root / scope_id / filename
|
||||
43
src/everos/infra/persistence/markdown/writers/__init__.py
Normal file
43
src/everos/infra/persistence/markdown/writers/__init__.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""Business markdown writers.
|
||||
|
||||
Each storage strategy from the EverOS Markdown First spec gets a class
|
||||
here:
|
||||
|
||||
* :class:`BaseDailyWriter` — daily-log append (episode / atomic
|
||||
fact / foresight / agent case). Subclass and bind ``schema``.
|
||||
* :class:`AgentSkillWriter` — directory + progressive disclosure
|
||||
(``skills/skill_<name>/{SKILL.md, references/, scripts/}``).
|
||||
Single class, no subclassing.
|
||||
* :class:`ProfileWriter` — single-file rewrite at a fixed name
|
||||
(``user.md`` / ``behaviors.md`` / ``agent.md`` / ``soul.md`` /
|
||||
``tools.md``). Single class, no subclassing — caller hands in a
|
||||
frontmatter instance whose ``PROFILE_FILENAME`` ClassVar pins
|
||||
the filename.
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.infra.persistence.markdown.writers import (
|
||||
BaseDailyWriter,
|
||||
EpisodeWriter,
|
||||
AgentSkillWriter,
|
||||
ProfileWriter,
|
||||
)
|
||||
"""
|
||||
|
||||
from .agent_case_writer import AgentCaseWriter as AgentCaseWriter
|
||||
from .agent_skill_writer import AgentSkillWriter as AgentSkillWriter
|
||||
from .atomic_fact_writer import AtomicFactWriter as AtomicFactWriter
|
||||
from .base import BaseDailyWriter as BaseDailyWriter
|
||||
from .episode_writer import EpisodeWriter as EpisodeWriter
|
||||
from .foresight_writer import ForesightWriter as ForesightWriter
|
||||
from .profile_writer import ProfileWriter as ProfileWriter
|
||||
|
||||
__all__ = [
|
||||
"AgentCaseWriter",
|
||||
"AgentSkillWriter",
|
||||
"AtomicFactWriter",
|
||||
"BaseDailyWriter",
|
||||
"EpisodeWriter",
|
||||
"ForesightWriter",
|
||||
"ProfileWriter",
|
||||
]
|
||||
@ -0,0 +1,63 @@
|
||||
"""AgentCase daily-log writer — md is the SoT for agent cases.
|
||||
|
||||
Lives on the agent track (``agents/<agent_id>/.cases/...``).
|
||||
Inline carries audit + scoring fields (``owner_id`` / ``session_id`` /
|
||||
``timestamp`` / ``parent_id`` / ``quality_score``); sections carry
|
||||
``TaskIntent`` (required, primary BM25/embed), ``Approach`` (verbatim,
|
||||
not indexed — too long), and optional ``KeyInsight`` (verbatim).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from collections.abc import Mapping
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import anyio
|
||||
|
||||
from everos.component.utils.datetime import (
|
||||
get_now_with_timezone,
|
||||
to_iso_format,
|
||||
)
|
||||
from everos.core.persistence import MarkdownReader
|
||||
|
||||
from ..mds import AgentCaseDailyFrontmatter
|
||||
from .base import BaseDailyWriter
|
||||
|
||||
|
||||
class AgentCaseWriter(BaseDailyWriter):
|
||||
"""Daily-log writer for the AgentCase schema (md = SoT).
|
||||
|
||||
``append_entry`` / ``append_entries`` come from
|
||||
:class:`BaseDailyWriter`. The scope id parameter is named ``agent_id``
|
||||
semantically (this is the agent track), but the base class accepts
|
||||
it via the generic ``scope_id`` parameter.
|
||||
"""
|
||||
|
||||
schema = AgentCaseDailyFrontmatter
|
||||
|
||||
def _frontmatter_updates(
|
||||
self,
|
||||
scope_id: str,
|
||||
date: _dt.date,
|
||||
*,
|
||||
next_count: int,
|
||||
) -> Mapping[str, Any] | None:
|
||||
return {
|
||||
"id": f"agent_case_log_{scope_id}_{date.isoformat()}",
|
||||
"type": "agent_case_daily",
|
||||
"file_type": "agent_case_daily",
|
||||
"schema_version": 1,
|
||||
"agent_id": scope_id,
|
||||
"track": "agent",
|
||||
"date": date.isoformat(),
|
||||
"entry_count": next_count,
|
||||
"last_appended_at": to_iso_format(get_now_with_timezone()),
|
||||
}
|
||||
|
||||
async def _current_count(self, path: Path) -> int:
|
||||
if not await anyio.Path(path).is_file():
|
||||
return 0
|
||||
parsed = await MarkdownReader.read(path)
|
||||
return parsed.frontmatter.get("entry_count", 0)
|
||||
@ -0,0 +1,204 @@
|
||||
"""AgentSkillWriter — upsert skill main file + reference / script attachments.
|
||||
|
||||
Skill storage is **directory + progressive disclosure** (wiki "Memory
|
||||
Types Markdown Format" v4): each skill lives under
|
||||
``agents/<agent_id>/skills/skill_<name>/`` with a ``SKILL.md`` main
|
||||
file plus ``references/*.md`` and ``scripts/*.<ext>`` siblings.
|
||||
|
||||
This writer is intentionally distinct from :class:`BaseDailyWriter`:
|
||||
|
||||
- **Upsert, not append.** Each ``write_*`` call overwrites the target
|
||||
file in full. Skills don't accumulate entry markers — the body of
|
||||
``SKILL.md`` is the latest revision; references / scripts are
|
||||
individually replaceable files.
|
||||
- **Single-skill API.** The chassis is *not* responsible for bulk
|
||||
enumeration (Tier-1 prompt scanning is a sqlite/lancedb concern,
|
||||
not a markdown-walk concern). One skill in, one skill out.
|
||||
- **No counters / hooks.** No frontmatter merging, no entry-id
|
||||
generation, no _frontmatter_updates hook — the caller hands in a
|
||||
fully-built :class:`AgentSkillFrontmatter` subclass instance and the body
|
||||
string; the writer atomically replaces the file.
|
||||
|
||||
Path resolution comes from :class:`MemoryRoot` + the ClassVars on
|
||||
:class:`AgentSkillFrontmatter` (``SKILLS_CONTAINER_NAME`` /
|
||||
``SKILL_DIR_PREFIX`` / etc.). The writer + reader pair is the single
|
||||
addressing API for skills.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from everos.core.persistence import MarkdownWriter, MemoryRoot
|
||||
|
||||
from ..mds import AgentSkillFrontmatter
|
||||
|
||||
|
||||
class AgentSkillWriter:
|
||||
"""Atomic writer for the AgentSkill directory layout.
|
||||
|
||||
Holds a :class:`MarkdownWriter` for the SKILL.md path (frontmatter +
|
||||
body) and a thin atomic-write helper for plain-text reference /
|
||||
script files (no frontmatter).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: MemoryRoot,
|
||||
*,
|
||||
writer: MarkdownWriter | None = None,
|
||||
) -> None:
|
||||
self._root = root
|
||||
self._writer = writer or MarkdownWriter(root)
|
||||
|
||||
# ── Public API ────────────────────────────────────────────────────────
|
||||
|
||||
async def write_main(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
*,
|
||||
frontmatter: AgentSkillFrontmatter,
|
||||
body: str,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Upsert ``skills/skill_<name>/SKILL.md``.
|
||||
|
||||
The file is replaced in full: ``frontmatter`` becomes the new
|
||||
YAML head, ``body`` becomes the new body. Any prior content
|
||||
(including manual human edits) is overwritten. The atomic
|
||||
rename keeps readers from ever seeing a torn write.
|
||||
|
||||
Args:
|
||||
agent_id: Owning agent.
|
||||
skill_name: Unprefixed identifier (``"contract_risk_scan"``,
|
||||
not ``"skill_contract_risk_scan"``).
|
||||
frontmatter: Fully-built schema instance — its ``model_dump``
|
||||
is what lands in the YAML head, including extra fields.
|
||||
body: Tier-2 body text. Trailing newline is normalised.
|
||||
|
||||
Returns:
|
||||
Absolute path of the written ``SKILL.md``.
|
||||
"""
|
||||
path = self._main_path(agent_id, skill_name, app_id, project_id)
|
||||
head_meta = frontmatter.model_dump(exclude_none=False)
|
||||
return await self._writer.write_markdown(
|
||||
path,
|
||||
frontmatter=head_meta,
|
||||
body=_ensure_trailing_newline(body),
|
||||
)
|
||||
|
||||
async def write_reference(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
reference_name: str,
|
||||
content: str,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Upsert ``skills/skill_<name>/references/<reference_name>.md``.
|
||||
|
||||
Reference files are plain markdown — no frontmatter, no entry
|
||||
markers. Content is written verbatim (with a normalised
|
||||
trailing newline).
|
||||
|
||||
Args:
|
||||
reference_name: Filename stem (no ``.md`` extension).
|
||||
"""
|
||||
path = self._reference_path(
|
||||
agent_id, skill_name, reference_name, app_id, project_id
|
||||
)
|
||||
return await self._writer.write(path, _ensure_trailing_newline(content))
|
||||
|
||||
async def write_script(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
script_filename: str,
|
||||
content: str,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Upsert ``skills/skill_<name>/scripts/<script_filename>``.
|
||||
|
||||
Script files are written verbatim — caller supplies the *full*
|
||||
filename (including the extension; ``redline.py`` /
|
||||
``redline.sh`` / etc.) since scripts may be in any language.
|
||||
Cascade does not index this directory.
|
||||
"""
|
||||
path = self._script_path(
|
||||
agent_id, skill_name, script_filename, app_id, project_id
|
||||
)
|
||||
return await self._writer.write(path, _ensure_trailing_newline(content))
|
||||
|
||||
# ── Path API (callers that need to echo paths in responses) ──────────
|
||||
|
||||
def main_path(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Return ``skills/skill_<name>/SKILL.md`` (does not check existence)."""
|
||||
return self._main_path(agent_id, skill_name, app_id, project_id)
|
||||
|
||||
# ── Internals — path resolution from AgentSkillFrontmatter ClassVars ──────
|
||||
|
||||
def _skill_dir(
|
||||
self, agent_id: str, skill_name: str, app_id: str, project_id: str
|
||||
) -> Path:
|
||||
return (
|
||||
self._root.agents_dir(app_id, project_id)
|
||||
/ agent_id
|
||||
/ AgentSkillFrontmatter.SKILLS_CONTAINER_NAME
|
||||
/ f"{AgentSkillFrontmatter.SKILL_DIR_PREFIX}{skill_name}"
|
||||
)
|
||||
|
||||
def _main_path(
|
||||
self, agent_id: str, skill_name: str, app_id: str, project_id: str
|
||||
) -> Path:
|
||||
return (
|
||||
self._skill_dir(agent_id, skill_name, app_id, project_id)
|
||||
/ AgentSkillFrontmatter.SKILL_MAIN_FILENAME
|
||||
)
|
||||
|
||||
def _reference_path(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
reference_name: str,
|
||||
app_id: str,
|
||||
project_id: str,
|
||||
) -> Path:
|
||||
return (
|
||||
self._skill_dir(agent_id, skill_name, app_id, project_id)
|
||||
/ AgentSkillFrontmatter.SKILL_REFERENCES_DIR_NAME
|
||||
/ f"{reference_name}.md"
|
||||
)
|
||||
|
||||
def _script_path(
|
||||
self,
|
||||
agent_id: str,
|
||||
skill_name: str,
|
||||
script_filename: str,
|
||||
app_id: str,
|
||||
project_id: str,
|
||||
) -> Path:
|
||||
return (
|
||||
self._skill_dir(agent_id, skill_name, app_id, project_id)
|
||||
/ AgentSkillFrontmatter.SKILL_SCRIPTS_DIR_NAME
|
||||
/ script_filename
|
||||
)
|
||||
|
||||
|
||||
def _ensure_trailing_newline(text: str) -> str:
|
||||
"""End the body with exactly one newline (POSIX text-file convention)."""
|
||||
if not text:
|
||||
return ""
|
||||
return text if text.endswith("\n") else text + "\n"
|
||||
@ -0,0 +1,58 @@
|
||||
"""AtomicFact daily-log writer — md is the SoT for atomic facts.
|
||||
|
||||
Caller hands pre-built ``inline`` (``owner_id`` / ``session_id`` /
|
||||
``timestamp`` / ``parent_id`` / ``sender_ids``) plus the single
|
||||
``Fact`` section. The chassis manages the in-file ``entry_id`` sequence
|
||||
(``af_<YYYYMMDD>_<NNNN>``). ``append_entry`` / ``append_entries`` come
|
||||
from :class:`BaseDailyWriter`; this subclass only declares the schema
|
||||
and the per-schema frontmatter / counter hooks.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from collections.abc import Mapping
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import anyio
|
||||
|
||||
from everos.component.utils.datetime import (
|
||||
get_now_with_timezone,
|
||||
to_iso_format,
|
||||
)
|
||||
from everos.core.persistence import MarkdownReader
|
||||
|
||||
from ..mds import AtomicFactDailyFrontmatter
|
||||
from .base import BaseDailyWriter
|
||||
|
||||
|
||||
class AtomicFactWriter(BaseDailyWriter):
|
||||
"""Daily-log writer for the AtomicFact schema (md = SoT)."""
|
||||
|
||||
schema = AtomicFactDailyFrontmatter
|
||||
|
||||
def _frontmatter_updates(
|
||||
self,
|
||||
scope_id: str,
|
||||
date: _dt.date,
|
||||
*,
|
||||
next_count: int,
|
||||
) -> Mapping[str, Any] | None:
|
||||
return {
|
||||
"id": f"atomic_fact_log_{scope_id}_{date.isoformat()}",
|
||||
"type": "atomic_fact_daily",
|
||||
"file_type": "atomic_fact_daily",
|
||||
"schema_version": 1,
|
||||
"user_id": scope_id,
|
||||
"track": "user",
|
||||
"date": date.isoformat(),
|
||||
"entry_count": next_count,
|
||||
"last_appended_at": to_iso_format(get_now_with_timezone()),
|
||||
}
|
||||
|
||||
async def _current_count(self, path: Path) -> int:
|
||||
if not await anyio.Path(path).is_file():
|
||||
return 0
|
||||
parsed = await MarkdownReader.read(path)
|
||||
return parsed.frontmatter.get("entry_count", 0)
|
||||
301
src/everos/infra/persistence/markdown/writers/base.py
Normal file
301
src/everos/infra/persistence/markdown/writers/base.py
Normal file
@ -0,0 +1,301 @@
|
||||
"""Base business writer for daily-log markdown files.
|
||||
|
||||
Daily-log files (memcell / episode / case / atomic_fact / foresight)
|
||||
share three things:
|
||||
|
||||
* scope (user-track or agent-track, derived from the schema)
|
||||
* filename pattern: ``<FILE_PREFIX>-<YYYY-MM-DD>.md`` under
|
||||
``<scope_root>/<scope_id>/<DIR_NAME>/``
|
||||
* entry id pattern: ``<ENTRY_ID_PREFIX>_<YYYYMMDD>_<NNN>``
|
||||
|
||||
:class:`BaseDailyWriter` factors out **path resolution + entry-id
|
||||
construction + today's date default**, leaving frontmatter field
|
||||
maintenance (e.g. ``entry_count`` / ``last_appended_at``) to concrete
|
||||
business subclasses.
|
||||
|
||||
Subclass usage::
|
||||
|
||||
class _MemcellWriter(BaseDailyWriter):
|
||||
schema = UserMemcellDailyFrontmatter
|
||||
|
||||
writer = _MemcellWriter(layout)
|
||||
eid = writer.append("u_jason", body="...")
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from collections.abc import Mapping, Sequence
|
||||
from pathlib import Path
|
||||
from typing import Any, ClassVar
|
||||
|
||||
import anyio
|
||||
|
||||
from everos.component.utils.datetime import today_with_timezone
|
||||
from everos.core.persistence import (
|
||||
BaseFrontmatter,
|
||||
EntryId,
|
||||
MarkdownReader,
|
||||
MarkdownWriter,
|
||||
MemoryRoot,
|
||||
render_structured_entry,
|
||||
)
|
||||
|
||||
|
||||
class BaseDailyWriter:
|
||||
"""Append a new entry to today's (or a given date's) daily-log file.
|
||||
|
||||
Subclasses bind a single :class:`BaseFrontmatter` subclass via the
|
||||
``schema`` ClassVar. The schema must declare ``SCOPE_DIR``,
|
||||
``ENTRY_ID_PREFIX``, ``DIR_NAME``, and ``FILE_PREFIX`` —
|
||||
``SCOPE_DIR`` is provided by inheriting :class:`UserScopedFrontmatter`
|
||||
or :class:`AgentScopedFrontmatter` (or by a custom scope mixin).
|
||||
|
||||
Path resolution is driven entirely by the schema's ClassVars and
|
||||
the given :class:`MemoryRoot` — write, read, and addressing for a
|
||||
single record kind all live in this writer + its reader twin, no
|
||||
separate layout layer.
|
||||
"""
|
||||
|
||||
schema: ClassVar[type[BaseFrontmatter]] # subclass must declare
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: MemoryRoot,
|
||||
*,
|
||||
writer: MarkdownWriter | None = None,
|
||||
) -> None:
|
||||
schema = getattr(type(self), "schema", None)
|
||||
if schema is None:
|
||||
raise TypeError(
|
||||
f"{type(self).__name__} must declare a class-level ``schema`` attribute"
|
||||
)
|
||||
for attr in ("SCOPE_DIR", "ENTRY_ID_PREFIX", "DIR_NAME", "FILE_PREFIX"):
|
||||
if not getattr(schema, attr, None):
|
||||
raise TypeError(f"{schema.__name__} missing ClassVar {attr!r}")
|
||||
self._root = root
|
||||
self._writer = writer or MarkdownWriter(root)
|
||||
|
||||
# ── Public API ────────────────────────────────────────────────────────
|
||||
|
||||
async def append_entry(
|
||||
self,
|
||||
scope_id: str,
|
||||
*,
|
||||
inline: Mapping[str, object],
|
||||
sections: Mapping[str, str],
|
||||
date: _dt.date | None = None,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> EntryId:
|
||||
"""Append a single rendered entry; return the freshly minted ``EntryId``.
|
||||
|
||||
Unifies the per-schema ``append_entry`` previously duplicated across
|
||||
:class:`AtomicFactWriter` / :class:`ForesightWriter` /
|
||||
:class:`EpisodeWriter` / :class:`AgentCaseWriter`. The whole flow
|
||||
(read ``entry_count``, allocate ``EntryId``, render entry body,
|
||||
update frontmatter, atomic write) runs inside one per-path lock,
|
||||
eliminating the read-modify-write race that previously allowed
|
||||
concurrent callers to silently overwrite each other's appends.
|
||||
|
||||
Args:
|
||||
scope_id: ``user_id`` or ``agent_id`` (matches the schema's
|
||||
scope flavour).
|
||||
inline: Inline metadata (``**key**: value`` lines under the
|
||||
H2 header).
|
||||
sections: ``{title: body}`` blocks rendered as ``### Title`` +
|
||||
body text.
|
||||
date: Date bucket — defaults to today in the configured TZ.
|
||||
|
||||
Returns:
|
||||
The :class:`EntryId` assigned to the new entry. Caller can
|
||||
use it to write downstream state (sqlite row, lance index).
|
||||
"""
|
||||
eids = await self.append_entries(
|
||||
scope_id,
|
||||
[(inline, sections)],
|
||||
date=date,
|
||||
app_id=app_id,
|
||||
project_id=project_id,
|
||||
)
|
||||
return eids[0]
|
||||
|
||||
async def append_entries(
|
||||
self,
|
||||
scope_id: str,
|
||||
items: Sequence[tuple[Mapping[str, object], Mapping[str, str]]],
|
||||
*,
|
||||
date: _dt.date | None = None,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> list[EntryId]:
|
||||
"""Append ``N`` rendered entries in one locked read-modify-write cycle.
|
||||
|
||||
Compared with looping :meth:`append_entry` ``N`` times, this:
|
||||
|
||||
* Performs one file read + one file write instead of ``N`` of each.
|
||||
* Holds the per-path lock for one short critical section.
|
||||
* Updates ``frontmatter`` (``entry_count`` / ``last_appended_at``)
|
||||
once at the end (no intermediate flapping).
|
||||
|
||||
Order in ``items`` is the order entries land in the file (and the
|
||||
order ``EntryId``s are allocated). Empty ``items`` is a no-op
|
||||
that returns ``[]`` without touching the file.
|
||||
|
||||
Args:
|
||||
scope_id: Subject scope (user / agent id).
|
||||
items: Sequence of ``(inline, sections)`` pairs.
|
||||
date: Date bucket — defaults to today in the configured TZ.
|
||||
|
||||
Returns:
|
||||
``N`` :class:`EntryId`s in the same order as ``items``.
|
||||
"""
|
||||
bucket = date or today_with_timezone()
|
||||
path = self._resolve_path(scope_id, bucket, app_id, project_id)
|
||||
if not items:
|
||||
return []
|
||||
|
||||
async with self._writer.lock_for(path):
|
||||
base_count = await self._current_count(path)
|
||||
eids = [
|
||||
EntryId.next_for(self.schema.ENTRY_ID_PREFIX, bucket, base_count + i)
|
||||
for i in range(len(items))
|
||||
]
|
||||
rendered = [
|
||||
(
|
||||
render_structured_entry(
|
||||
header=eid.format(),
|
||||
inline=inline,
|
||||
sections=sections,
|
||||
),
|
||||
eid,
|
||||
)
|
||||
for eid, (inline, sections) in zip(eids, items, strict=True)
|
||||
]
|
||||
frontmatter_updates = self._frontmatter_updates(
|
||||
scope_id, bucket, next_count=base_count + len(items)
|
||||
)
|
||||
await self._writer._append_entries_unlocked( # noqa: SLF001
|
||||
path,
|
||||
rendered,
|
||||
frontmatter_updates=frontmatter_updates,
|
||||
)
|
||||
return eids
|
||||
|
||||
async def append(
|
||||
self,
|
||||
scope_id: str,
|
||||
entry_body: str,
|
||||
*,
|
||||
date: _dt.date | None = None,
|
||||
frontmatter_updates: Mapping[str, Any] | None = None,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> EntryId:
|
||||
"""Append a pre-rendered ``entry_body`` to the daily-log file.
|
||||
|
||||
Kept for back-compat with callers that hand in fully rendered
|
||||
bodies (rare — most callers should use :meth:`append_entry` and
|
||||
let this class do the rendering). The whole sequence (read
|
||||
``entry_count``, allocate eid, write) runs inside the per-path
|
||||
lock.
|
||||
|
||||
Args:
|
||||
scope_id: ``user_id`` or ``agent_id`` (matches the schema's
|
||||
scope flavour).
|
||||
entry_body: Content placed between the entry markers.
|
||||
date: Date bucket — defaults to today in the configured TZ.
|
||||
frontmatter_updates: Optional fields to merge into the file's
|
||||
frontmatter (e.g. ``entry_count`` / ``last_appended_at``).
|
||||
When ``None``, the subclass hook
|
||||
:meth:`_frontmatter_updates` is consulted to build
|
||||
default updates.
|
||||
|
||||
Returns:
|
||||
The :class:`EntryId` assigned to the new entry.
|
||||
"""
|
||||
bucket = date or today_with_timezone()
|
||||
path = self._resolve_path(scope_id, bucket, app_id, project_id)
|
||||
|
||||
async with self._writer.lock_for(path):
|
||||
count = await self._current_count(path)
|
||||
eid = EntryId.next_for(self.schema.ENTRY_ID_PREFIX, bucket, count)
|
||||
|
||||
# Subclass hook: derive defaults if caller passes nothing.
|
||||
if frontmatter_updates is None:
|
||||
frontmatter_updates = self._frontmatter_updates(
|
||||
scope_id, bucket, next_count=count + 1
|
||||
)
|
||||
|
||||
await self._writer._append_entries_unlocked( # noqa: SLF001
|
||||
path,
|
||||
[(entry_body, eid)],
|
||||
frontmatter_updates=frontmatter_updates,
|
||||
)
|
||||
return eid
|
||||
|
||||
# ── Hooks (subclass override) ─────────────────────────────────────────
|
||||
|
||||
async def _current_count(self, path: Path) -> int:
|
||||
"""Return the current entry count for the file.
|
||||
|
||||
Default: number of ``<!-- entry:... -->`` blocks already present.
|
||||
Subclasses may override to read a frontmatter field (e.g.
|
||||
``entry_count``) when they trust that field over a marker scan.
|
||||
"""
|
||||
if not await anyio.Path(path).is_file():
|
||||
return 0
|
||||
parsed = await MarkdownReader.read(path)
|
||||
return len(parsed.entries)
|
||||
|
||||
def _frontmatter_updates(
|
||||
self,
|
||||
scope_id: str,
|
||||
date: _dt.date,
|
||||
*,
|
||||
next_count: int,
|
||||
) -> Mapping[str, Any] | None:
|
||||
"""Build the per-append frontmatter dict (subclass override).
|
||||
|
||||
Called only when :meth:`append`'s ``frontmatter_updates`` is
|
||||
``None``. Default returns ``None`` (no frontmatter mutation).
|
||||
Concrete business subclasses override to maintain fields like
|
||||
``id`` / ``entry_count`` / ``last_appended_at`` automatically,
|
||||
so callers don't repeat themselves on every append.
|
||||
"""
|
||||
return None
|
||||
|
||||
# ── Path API ──────────────────────────────────────────────────────────
|
||||
|
||||
def path_for(
|
||||
self,
|
||||
scope_id: str,
|
||||
date: _dt.date | None = None,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Return the daily-log path for ``scope_id`` on ``date`` (today default).
|
||||
|
||||
Public counterpart of :meth:`_resolve_path` — callers (services,
|
||||
scripts) should use this rather than poking at private attrs.
|
||||
"""
|
||||
return self._resolve_path(
|
||||
scope_id, date or today_with_timezone(), app_id, project_id
|
||||
)
|
||||
|
||||
# ── Internals ─────────────────────────────────────────────────────────
|
||||
|
||||
def _resolve_path(
|
||||
self, scope_id: str, date: _dt.date, app_id: str, project_id: str
|
||||
) -> Path:
|
||||
"""Build the daily-log path for ``scope_id`` on ``date``."""
|
||||
# SCOPE_DIR ("users" / "agents") names the matching MemoryRoot method,
|
||||
# which prepends the <app>/<project> business prefix.
|
||||
scope_dir = getattr(self._root, f"{self.schema.SCOPE_DIR}_dir")
|
||||
return (
|
||||
scope_dir(app_id, project_id)
|
||||
/ scope_id
|
||||
/ self.schema.DIR_NAME
|
||||
/ f"{self.schema.FILE_PREFIX}-{date.isoformat()}.md"
|
||||
)
|
||||
@ -0,0 +1,69 @@
|
||||
"""Episode daily-log writer — md is the SoT for Episode memories.
|
||||
|
||||
Stays in the chassis style: caller hands in pre-built ``inline`` and
|
||||
``sections`` dicts plus the scope id (``owner_id``). Domain →
|
||||
structured-entry shaping lives in the calling pipeline (cf. architecture
|
||||
rule: ``infra`` may not import ``memory``).
|
||||
|
||||
This milestone assumes well-behaved callers (no retransmit dedupe needed).
|
||||
The writer just appends; the chassis manages the in-file ``entry_id``
|
||||
sequence, which is the single source of identity for an md entry.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from collections.abc import Mapping
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import anyio
|
||||
|
||||
from everos.component.utils.datetime import (
|
||||
get_now_with_timezone,
|
||||
to_iso_format,
|
||||
)
|
||||
from everos.core.persistence import MarkdownReader
|
||||
|
||||
from ..mds import EpisodeDailyFrontmatter
|
||||
from .base import BaseDailyWriter
|
||||
|
||||
|
||||
class EpisodeWriter(BaseDailyWriter):
|
||||
"""Daily-log writer for the Episode schema (md = SoT).
|
||||
|
||||
``append_entry`` / ``append_entries`` come from
|
||||
:class:`BaseDailyWriter`; the ``entry_id`` (``ep_<YYYYMMDD>_<NNNN>``)
|
||||
is the in-file identity allocated under the per-path lock. Callers
|
||||
can derive a globally-unique id from ``(owner_id, entry_id)``
|
||||
without persisting any algo-side uuid.
|
||||
"""
|
||||
|
||||
schema = EpisodeDailyFrontmatter
|
||||
|
||||
# ── Frontmatter override (entry_count + last_appended_at) ────────────
|
||||
|
||||
def _frontmatter_updates(
|
||||
self,
|
||||
scope_id: str,
|
||||
date: _dt.date,
|
||||
*,
|
||||
next_count: int,
|
||||
) -> Mapping[str, Any] | None:
|
||||
return {
|
||||
"id": f"episode_log_{scope_id}_{date.isoformat()}",
|
||||
"type": "episode_daily",
|
||||
"file_type": "episode_daily",
|
||||
"schema_version": 1,
|
||||
"user_id": scope_id,
|
||||
"track": "user",
|
||||
"date": date.isoformat(),
|
||||
"entry_count": next_count,
|
||||
"last_appended_at": to_iso_format(get_now_with_timezone()),
|
||||
}
|
||||
|
||||
async def _current_count(self, path: Path) -> int:
|
||||
if not await anyio.Path(path).is_file():
|
||||
return 0
|
||||
parsed = await MarkdownReader.read(path)
|
||||
return parsed.frontmatter.get("entry_count", 0)
|
||||
@ -0,0 +1,58 @@
|
||||
"""Foresight daily-log writer — md is the SoT for foresights.
|
||||
|
||||
Inline carries the audit / scope + time-window fields (``owner_id`` /
|
||||
``session_id`` / ``timestamp`` / ``parent_id`` / ``sender_ids`` plus
|
||||
optional ``start_time`` / ``end_time`` / ``duration_days``). Sections
|
||||
carry the BM25-indexed content: ``Foresight`` (required, primary
|
||||
field) and optional ``Evidence`` (secondary BM25 field).
|
||||
``append_entry`` / ``append_entries`` come from :class:`BaseDailyWriter`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
from collections.abc import Mapping
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import anyio
|
||||
|
||||
from everos.component.utils.datetime import (
|
||||
get_now_with_timezone,
|
||||
to_iso_format,
|
||||
)
|
||||
from everos.core.persistence import MarkdownReader
|
||||
|
||||
from ..mds import ForesightDailyFrontmatter
|
||||
from .base import BaseDailyWriter
|
||||
|
||||
|
||||
class ForesightWriter(BaseDailyWriter):
|
||||
"""Daily-log writer for the Foresight schema (md = SoT)."""
|
||||
|
||||
schema = ForesightDailyFrontmatter
|
||||
|
||||
def _frontmatter_updates(
|
||||
self,
|
||||
scope_id: str,
|
||||
date: _dt.date,
|
||||
*,
|
||||
next_count: int,
|
||||
) -> Mapping[str, Any] | None:
|
||||
return {
|
||||
"id": f"foresight_log_{scope_id}_{date.isoformat()}",
|
||||
"type": "foresight_daily",
|
||||
"file_type": "foresight_daily",
|
||||
"schema_version": 1,
|
||||
"user_id": scope_id,
|
||||
"track": "user",
|
||||
"date": date.isoformat(),
|
||||
"entry_count": next_count,
|
||||
"last_appended_at": to_iso_format(get_now_with_timezone()),
|
||||
}
|
||||
|
||||
async def _current_count(self, path: Path) -> int:
|
||||
if not await anyio.Path(path).is_file():
|
||||
return 0
|
||||
parsed = await MarkdownReader.read(path)
|
||||
return parsed.frontmatter.get("entry_count", 0)
|
||||
127
src/everos/infra/persistence/markdown/writers/profile_writer.py
Normal file
127
src/everos/infra/persistence/markdown/writers/profile_writer.py
Normal file
@ -0,0 +1,127 @@
|
||||
"""ProfileWriter — upsert a single-file, fixed-name profile markdown.
|
||||
|
||||
Profile storage is **single-file rewrite** (the third storage strategy
|
||||
in the EverOS Markdown First spec). Each profile lives at a fixed
|
||||
filename under the agent or user directory::
|
||||
|
||||
users/<user_id>/user.md ← user profile
|
||||
users/<user_id>/behaviors.md ← user behaviour patterns
|
||||
agents/<agent_id>/agent.md ← agent playbook
|
||||
agents/<agent_id>/soul.md ← agent identity / values
|
||||
agents/<agent_id>/tools.md ← agent tool declarations
|
||||
|
||||
Compared with :class:`SkillWriter` (directory + progressive disclosure)
|
||||
and :class:`BaseDailyWriter` (per-date append + entry markers), the
|
||||
profile writer is the simplest of the three:
|
||||
|
||||
- **Upsert, not append.** Each ``write`` overwrites the file in full.
|
||||
- **Fixed path.** Caller passes ``scope_id`` only — no ``name``
|
||||
parameter; the filename is fixed by the schema's
|
||||
``PROFILE_FILENAME`` ClassVar.
|
||||
- **No business hooks.** No frontmatter merging, no entry-id
|
||||
generation. The caller hands in a fully-built schema instance.
|
||||
|
||||
The schema must declare two ClassVars:
|
||||
|
||||
- ``SCOPE_DIR`` (``"users"`` / ``"agents"``) — inherited from
|
||||
:class:`UserScopedFrontmatter` / :class:`AgentScopedFrontmatter`.
|
||||
- ``PROFILE_FILENAME`` (``"user.md"`` / ``"agent.md"`` / …) —
|
||||
declared on the concrete profile schema itself.
|
||||
|
||||
There is no ``ProfileFrontmatter`` base class: profile schemas are
|
||||
duck-typed via the two ClassVars. Subclasses inherit the scope mixin
|
||||
and add ``PROFILE_FILENAME`` plus their business fields directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from everos.core.persistence import BaseFrontmatter, MarkdownWriter, MemoryRoot
|
||||
|
||||
|
||||
class ProfileWriter:
|
||||
"""Atomic writer for the single-file profile layout."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: MemoryRoot,
|
||||
*,
|
||||
writer: MarkdownWriter | None = None,
|
||||
) -> None:
|
||||
self._root = root
|
||||
self._writer = writer or MarkdownWriter(root)
|
||||
|
||||
# ── Public API ────────────────────────────────────────────────────────
|
||||
|
||||
async def write(
|
||||
self,
|
||||
scope_id: str,
|
||||
*,
|
||||
frontmatter: BaseFrontmatter,
|
||||
body: str,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Upsert ``<app>/<project>/<scope>/<scope_id>/<PROFILE_FILENAME>``.
|
||||
|
||||
Args:
|
||||
scope_id: ``user_id`` or ``agent_id`` (must match the
|
||||
schema's scope mixin).
|
||||
frontmatter: Fully-built schema instance — its ``model_dump``
|
||||
lands as the YAML head, including extra fields.
|
||||
body: Profile body text. Trailing newline is normalised.
|
||||
app_id: App scope segment (defaults to the ``"default"`` space).
|
||||
project_id: Project scope segment (defaults to ``"default"``).
|
||||
|
||||
Returns:
|
||||
Absolute path of the written profile file.
|
||||
"""
|
||||
path = self._resolve_path(scope_id, type(frontmatter), app_id, project_id)
|
||||
head_meta = frontmatter.model_dump(exclude_none=False)
|
||||
return await self._writer.write_markdown(
|
||||
path,
|
||||
frontmatter=head_meta,
|
||||
body=_ensure_trailing_newline(body),
|
||||
)
|
||||
|
||||
def path_for(
|
||||
self,
|
||||
scope_id: str,
|
||||
*,
|
||||
schema: type[BaseFrontmatter],
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> Path:
|
||||
"""Return the profile path (no IO check)."""
|
||||
return self._resolve_path(scope_id, schema, app_id, project_id)
|
||||
|
||||
# ── Internals ─────────────────────────────────────────────────────────
|
||||
|
||||
def _resolve_path(
|
||||
self,
|
||||
scope_id: str,
|
||||
schema: type[BaseFrontmatter],
|
||||
app_id: str,
|
||||
project_id: str,
|
||||
) -> Path:
|
||||
scope_dir = getattr(schema, "SCOPE_DIR", "")
|
||||
filename = getattr(schema, "PROFILE_FILENAME", None)
|
||||
if not scope_dir:
|
||||
raise TypeError(
|
||||
f"{schema.__name__} missing ``SCOPE_DIR`` ClassVar — "
|
||||
"must inherit a scope mixin (UserScopedFrontmatter / "
|
||||
"AgentScopedFrontmatter)."
|
||||
)
|
||||
if not filename:
|
||||
raise TypeError(f"{schema.__name__} missing ``PROFILE_FILENAME`` ClassVar.")
|
||||
# SCOPE_DIR names the matching MemoryRoot method (<app>/<project> prefix).
|
||||
scope_root = getattr(self._root, f"{scope_dir}_dir")(app_id, project_id)
|
||||
return scope_root / scope_id / filename
|
||||
|
||||
|
||||
def _ensure_trailing_newline(text: str) -> str:
|
||||
"""End the body with exactly one newline (POSIX text-file convention)."""
|
||||
if not text:
|
||||
return ""
|
||||
return text if text.endswith("\n") else text + "\n"
|
||||
66
src/everos/infra/persistence/sqlite/__init__.py
Normal file
66
src/everos/infra/persistence/sqlite/__init__.py
Normal file
@ -0,0 +1,66 @@
|
||||
"""SQLite business persistence layer.
|
||||
|
||||
Sits on top of :mod:`everos.core.persistence.sqlite` (engine + sessions +
|
||||
``BaseTable`` + ``RepoBase``) and provides:
|
||||
|
||||
* lazy process-wide engine + session-factory singletons
|
||||
(:mod:`.sqlite_manager`)
|
||||
* concrete table schemas under :mod:`.tables`
|
||||
* concrete repository singletons under :mod:`.repos`
|
||||
|
||||
External usage::
|
||||
|
||||
from everos.infra.persistence.sqlite import (
|
||||
get_engine, get_session_factory, dispose_engine,
|
||||
# business tables / repos are re-exported here too —
|
||||
# callers MUST go through this top-level package because
|
||||
# ``infra.persistence.sqlite.**`` (sub-packages) are forbidden
|
||||
# to ``service`` / ``memory`` / ``entrypoints`` by import-linter.
|
||||
UnprocessedBuffer, Memcell, ConversationStatus,
|
||||
unprocessed_buffer_repo, memcell_repo, conversation_status_repo,
|
||||
)
|
||||
|
||||
The :class:`SqliteLifespanProvider` runs ``SQLModel.metadata.create_all``
|
||||
on app startup and ``dispose_engine`` on shutdown, so business code does
|
||||
not need to manage either.
|
||||
"""
|
||||
|
||||
# Importing ``tables`` registers every business SQLModel in
|
||||
# ``SQLModel.metadata`` so ``SqliteLifespanProvider.startup`` can
|
||||
# ``create_all`` without callers having to import each model module.
|
||||
from . import tables as tables # noqa: F401
|
||||
from .repos import QueueSummary as QueueSummary
|
||||
from .repos import cluster_repo as cluster_repo
|
||||
from .repos import conversation_status_repo as conversation_status_repo
|
||||
from .repos import md_change_state_repo as md_change_state_repo
|
||||
from .repos import memcell_repo as memcell_repo
|
||||
from .repos import mint_cluster_id as mint_cluster_id
|
||||
from .repos import unprocessed_buffer_repo as unprocessed_buffer_repo
|
||||
from .sqlite_manager import dispose_engine as dispose_engine
|
||||
from .sqlite_manager import get_engine as get_engine
|
||||
from .sqlite_manager import get_session_factory as get_session_factory
|
||||
from .tables import Cluster as Cluster
|
||||
from .tables import ClusterMember as ClusterMember
|
||||
from .tables import ConversationStatus as ConversationStatus
|
||||
from .tables import MdChangeState as MdChangeState
|
||||
from .tables import Memcell as Memcell
|
||||
from .tables import UnprocessedBuffer as UnprocessedBuffer
|
||||
|
||||
__all__ = [
|
||||
"Cluster",
|
||||
"ClusterMember",
|
||||
"ConversationStatus",
|
||||
"MdChangeState",
|
||||
"Memcell",
|
||||
"QueueSummary",
|
||||
"UnprocessedBuffer",
|
||||
"cluster_repo",
|
||||
"conversation_status_repo",
|
||||
"dispose_engine",
|
||||
"get_engine",
|
||||
"get_session_factory",
|
||||
"md_change_state_repo",
|
||||
"memcell_repo",
|
||||
"mint_cluster_id",
|
||||
"unprocessed_buffer_repo",
|
||||
]
|
||||
23
src/everos/infra/persistence/sqlite/repos/__init__.py
Normal file
23
src/everos/infra/persistence/sqlite/repos/__init__.py
Normal file
@ -0,0 +1,23 @@
|
||||
"""Business SQLite repository singletons.
|
||||
|
||||
Repository instances for business tables, wired to the process-wide
|
||||
engine singleton.
|
||||
"""
|
||||
|
||||
from .cluster import cluster_repo as cluster_repo
|
||||
from .cluster import mint_cluster_id as mint_cluster_id
|
||||
from .conversation_status import conversation_status_repo as conversation_status_repo
|
||||
from .md_change_state import QueueSummary as QueueSummary
|
||||
from .md_change_state import md_change_state_repo as md_change_state_repo
|
||||
from .memcell import memcell_repo as memcell_repo
|
||||
from .unprocessed_buffer import unprocessed_buffer_repo as unprocessed_buffer_repo
|
||||
|
||||
__all__ = [
|
||||
"QueueSummary",
|
||||
"cluster_repo",
|
||||
"conversation_status_repo",
|
||||
"md_change_state_repo",
|
||||
"memcell_repo",
|
||||
"mint_cluster_id",
|
||||
"unprocessed_buffer_repo",
|
||||
]
|
||||
240
src/everos/infra/persistence/sqlite/repos/cluster.py
Normal file
240
src/everos/infra/persistence/sqlite/repos/cluster.py
Normal file
@ -0,0 +1,240 @@
|
||||
"""Repository for the ``cluster`` + ``cluster_member`` pair.
|
||||
|
||||
Bridges between the storage row shape and the algo-side
|
||||
:class:`everalgo.clustering.Cluster` value object. Callers always work in
|
||||
the algo type — this repo handles the centroid bytes round-trip, the
|
||||
preview JSON round-trip, and the membership join so the algo's
|
||||
``members: list[str]`` field is always fully populated on read. The
|
||||
``last_ts`` field is stored as int milliseconds (matches the algo type
|
||||
exactly) to keep the round-trip lossless across SQLite's tz-naive
|
||||
``DateTime`` storage.
|
||||
|
||||
The single ``upsert_with_members`` write path is what every cluster
|
||||
strategy invokes after a merge / new-cluster decision: it stamps the
|
||||
``cluster`` row (UPSERT) and reconciles the ``cluster_member`` rows
|
||||
(diff-then-insert; pre-existing members are kept, new members appended)
|
||||
so calls are idempotent even if a strategy retries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
|
||||
import numpy as np
|
||||
from everalgo.clustering import Cluster as AlgoCluster
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
from everos.core.persistence.sqlite import RepoBase, session_scope
|
||||
|
||||
from ..sqlite_manager import get_session_factory
|
||||
from ..tables import Cluster, ClusterMember
|
||||
|
||||
_CENTROID_DTYPE = np.float32
|
||||
|
||||
|
||||
def mint_cluster_id() -> str:
|
||||
"""Mint a fresh cluster id (mirrors ``_mint_memcell_id``: ``cl_<12hex>``)."""
|
||||
return f"cl_{uuid.uuid4().hex[:12]}"
|
||||
|
||||
|
||||
class _ClusterRepo(RepoBase[Cluster]):
|
||||
model = Cluster
|
||||
|
||||
def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
|
||||
return get_session_factory()
|
||||
|
||||
# ── Reads ──────────────────────────────────────────────────────────────
|
||||
|
||||
async def get_with_members(self, cluster_id: str) -> AlgoCluster | None:
|
||||
"""Fetch one cluster as a fully-hydrated algo value object.
|
||||
|
||||
Returns ``None`` when no row matches ``cluster_id`` — downstream
|
||||
strategies that race the writer should treat this as a transient
|
||||
miss and let OME retry the run.
|
||||
"""
|
||||
async with session_scope(self._factory) as s:
|
||||
row = await s.get(Cluster, cluster_id)
|
||||
if row is None:
|
||||
return None
|
||||
members_by_cluster = await _load_members_by_cluster(s, [cluster_id])
|
||||
return _row_to_algo(row, members_by_cluster.get(cluster_id, []))
|
||||
|
||||
async def list_for_owner(
|
||||
self,
|
||||
owner_id: str,
|
||||
kind: str,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> list[AlgoCluster]:
|
||||
"""All clusters for ``(app, project, owner, kind)``, as algo objects.
|
||||
|
||||
Hot path for the cluster strategies (``cluster_by_geometry`` /
|
||||
``cluster_by_llm`` need the full ``existing_clusters`` list). Each
|
||||
returned cluster carries its full ``members`` view, populated from
|
||||
the join with :class:`ClusterMember`. Scoping by (app, project)
|
||||
keeps one space's clusters from merging into another's.
|
||||
"""
|
||||
async with session_scope(self._factory) as s:
|
||||
rows = list(
|
||||
(
|
||||
await s.execute(
|
||||
select(Cluster)
|
||||
.where(Cluster.app_id == app_id)
|
||||
.where(Cluster.project_id == project_id)
|
||||
.where(Cluster.owner_id == owner_id)
|
||||
.where(Cluster.kind == kind)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
if not rows:
|
||||
return []
|
||||
ids = [r.cluster_id for r in rows]
|
||||
members_by_cluster = await _load_members_by_cluster(s, ids)
|
||||
return [
|
||||
_row_to_algo(row, members_by_cluster.get(row.cluster_id, []))
|
||||
for row in rows
|
||||
]
|
||||
|
||||
async def find_cluster_id_for_member(
|
||||
self,
|
||||
member_type: str,
|
||||
member_id: str,
|
||||
) -> str | None:
|
||||
"""Reverse lookup: ``(member_type, member_id) → cluster_id``.
|
||||
|
||||
Returns ``None`` when the entity is not yet attached to any cluster.
|
||||
Backed by ``ix_cluster_member_reverse`` so it is O(log N).
|
||||
"""
|
||||
async with session_scope(self._factory) as s:
|
||||
stmt = (
|
||||
select(ClusterMember.cluster_id)
|
||||
.where(ClusterMember.member_type == member_type)
|
||||
.where(ClusterMember.member_id == member_id)
|
||||
.limit(1)
|
||||
)
|
||||
return (await s.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
# ── Write ──────────────────────────────────────────────────────────────
|
||||
|
||||
async def upsert_with_members(
|
||||
self,
|
||||
algo_cluster: AlgoCluster,
|
||||
*,
|
||||
owner_id: str,
|
||||
owner_type: str,
|
||||
kind: str,
|
||||
member_type: str,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> None:
|
||||
"""Persist one algo cluster snapshot + its membership rows.
|
||||
|
||||
``algo_cluster.id`` must be non-None (caller-minted via
|
||||
:func:`mint_cluster_id` for a brand-new cluster, or carried
|
||||
through from a merge return). ``algo_cluster.members`` is the
|
||||
full member list — the repo diffs against existing membership
|
||||
and inserts only the new rows so the call is idempotent under
|
||||
OME's at-least-once retry semantics.
|
||||
"""
|
||||
cluster_id = algo_cluster.id
|
||||
if not cluster_id:
|
||||
raise ValueError(
|
||||
"upsert_with_members requires algo_cluster.id (mint via "
|
||||
"mint_cluster_id() before passing in)."
|
||||
)
|
||||
now = get_utc_now()
|
||||
centroid_blob = np.asarray(
|
||||
algo_cluster.centroid, dtype=_CENTROID_DTYPE
|
||||
).tobytes()
|
||||
preview_json = json.dumps(list(algo_cluster.preview), ensure_ascii=False)
|
||||
|
||||
async with session_scope(self._factory) as s:
|
||||
cluster_stmt = (
|
||||
sqlite_insert(Cluster)
|
||||
.values(
|
||||
cluster_id=cluster_id,
|
||||
app_id=app_id,
|
||||
project_id=project_id,
|
||||
owner_id=owner_id,
|
||||
owner_type=owner_type,
|
||||
kind=kind,
|
||||
centroid_blob=centroid_blob,
|
||||
count=algo_cluster.count,
|
||||
last_ts_ms=algo_cluster.last_ts,
|
||||
preview_json=preview_json,
|
||||
)
|
||||
.on_conflict_do_update(
|
||||
index_elements=["cluster_id"],
|
||||
set_={
|
||||
"centroid_blob": centroid_blob,
|
||||
"count": algo_cluster.count,
|
||||
"last_ts_ms": algo_cluster.last_ts,
|
||||
"preview_json": preview_json,
|
||||
},
|
||||
)
|
||||
)
|
||||
await s.execute(cluster_stmt)
|
||||
|
||||
existing = set(
|
||||
(
|
||||
await s.execute(
|
||||
select(ClusterMember.member_id).where(
|
||||
ClusterMember.cluster_id == cluster_id
|
||||
)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
new_member_rows = [
|
||||
ClusterMember(
|
||||
cluster_id=cluster_id,
|
||||
member_id=mid,
|
||||
member_type=member_type,
|
||||
added_ts=now,
|
||||
)
|
||||
for mid in algo_cluster.members
|
||||
if mid not in existing
|
||||
]
|
||||
if new_member_rows:
|
||||
s.add_all(new_member_rows)
|
||||
await s.commit()
|
||||
|
||||
|
||||
def _row_to_algo(row: Cluster, members: list[str]) -> AlgoCluster:
|
||||
centroid = np.frombuffer(row.centroid_blob, dtype=_CENTROID_DTYPE)
|
||||
preview = json.loads(row.preview_json) if row.preview_json else []
|
||||
return AlgoCluster(
|
||||
id=row.cluster_id,
|
||||
centroid=centroid,
|
||||
count=row.count,
|
||||
last_ts=row.last_ts_ms,
|
||||
preview=preview,
|
||||
members=list(members),
|
||||
)
|
||||
|
||||
|
||||
async def _load_members_by_cluster(
|
||||
session: AsyncSession,
|
||||
cluster_ids: list[str],
|
||||
) -> dict[str, list[str]]:
|
||||
"""One query → ``{cluster_id: [member_id, ...]}`` (insertion order)."""
|
||||
stmt = (
|
||||
select(ClusterMember.cluster_id, ClusterMember.member_id)
|
||||
.where(ClusterMember.cluster_id.in_(cluster_ids))
|
||||
.order_by(ClusterMember.added_ts)
|
||||
)
|
||||
buckets: dict[str, list[str]] = {}
|
||||
for cluster_id, member_id in (await session.execute(stmt)).all():
|
||||
buckets.setdefault(cluster_id, []).append(member_id)
|
||||
return buckets
|
||||
|
||||
|
||||
cluster_repo = _ClusterRepo()
|
||||
@ -0,0 +1,90 @@
|
||||
"""Repository for ``conversation_status`` — singleton bound to ``sqlite_manager``.
|
||||
|
||||
Upsert helpers for the (session_id, track) window pointer.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from everos.core.persistence.sqlite import RepoBase, session_scope
|
||||
|
||||
from ..sqlite_manager import get_session_factory
|
||||
from ..tables import ConversationStatus
|
||||
|
||||
|
||||
class _ConversationStatusRepo(RepoBase[ConversationStatus]):
|
||||
model = ConversationStatus
|
||||
|
||||
def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
|
||||
return get_session_factory()
|
||||
|
||||
async def touch_last_message_ts(
|
||||
self,
|
||||
session_id: str,
|
||||
track: str,
|
||||
ts: dt.datetime,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> None:
|
||||
"""Upsert (app, project, session, track); set ``last_message_ts``."""
|
||||
await self._upsert(
|
||||
session_id, track, app_id=app_id, project_id=project_id, last_message_ts=ts
|
||||
)
|
||||
|
||||
async def touch_last_memcell_ts(
|
||||
self,
|
||||
session_id: str,
|
||||
track: str,
|
||||
ts: dt.datetime,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> None:
|
||||
"""Upsert (app, project, session, track); set ``last_memcell_ts``."""
|
||||
await self._upsert(
|
||||
session_id, track, app_id=app_id, project_id=project_id, last_memcell_ts=ts
|
||||
)
|
||||
|
||||
async def _upsert(
|
||||
self,
|
||||
session_id: str,
|
||||
track: str,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
last_message_ts: dt.datetime | None = None,
|
||||
last_memcell_ts: dt.datetime | None = None,
|
||||
) -> None:
|
||||
async with session_scope(self._factory) as s:
|
||||
stmt = select(ConversationStatus).where(
|
||||
ConversationStatus.app_id == app_id,
|
||||
ConversationStatus.project_id == project_id,
|
||||
ConversationStatus.session_id == session_id,
|
||||
ConversationStatus.track == track,
|
||||
)
|
||||
existing = (await s.execute(stmt)).scalars().first()
|
||||
if existing is None:
|
||||
s.add(
|
||||
ConversationStatus(
|
||||
app_id=app_id,
|
||||
project_id=project_id,
|
||||
session_id=session_id,
|
||||
track=track,
|
||||
last_message_ts=last_message_ts,
|
||||
last_memcell_ts=last_memcell_ts,
|
||||
)
|
||||
)
|
||||
else:
|
||||
if last_message_ts is not None:
|
||||
existing.last_message_ts = last_message_ts
|
||||
if last_memcell_ts is not None:
|
||||
existing.last_memcell_ts = last_memcell_ts
|
||||
await s.commit()
|
||||
|
||||
|
||||
conversation_status_repo = _ConversationStatusRepo()
|
||||
434
src/everos/infra/persistence/sqlite/repos/md_change_state.py
Normal file
434
src/everos/infra/persistence/sqlite/repos/md_change_state.py
Normal file
@ -0,0 +1,434 @@
|
||||
"""Repository for ``md_change_state`` — cascade work queue.
|
||||
|
||||
Sole writer of the table. The worker, watcher, scanner, and CLI all
|
||||
go through this repo so the state-machine invariants (``processing``
|
||||
claim semantics, retryable flag lifecycle) live in one place.
|
||||
|
||||
LSN ordering is **best-effort**, not strictly monotonic across
|
||||
concurrent writers: :meth:`upsert` derives ``lsn = MAX(lsn) + 1``
|
||||
which is a classic read-modify-write that two parallel writers could
|
||||
race on (BEGIN DEFERRED leaves the SELECT half unprotected; cross-
|
||||
process this is even more visible). The table schema does **not**
|
||||
declare ``lsn UNIQUE`` and no caller depends on strict monotonicity —
|
||||
the worker uses ``ORDER BY lsn LIMIT N`` for fairness only, and a
|
||||
collision merely reorders two rows by a few ms; both rows are still
|
||||
processed and the next upsert bumps the counter past the duplicate.
|
||||
If a future feature needs strict monotonicity (e.g. CDC / audit log),
|
||||
revisit by giving ``upsert`` its own ``BEGIN IMMEDIATE`` transaction.
|
||||
|
||||
Status values:
|
||||
|
||||
- ``pending`` — visible to the worker.
|
||||
- ``processing`` — internal claim state (one worker is on it).
|
||||
- ``done`` — handler succeeded.
|
||||
- ``failed`` — handler exhausted retries or hit unrecoverable error
|
||||
(see ``retryable`` for the eligibility flag).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
|
||||
from sqlalchemy import func, select, update
|
||||
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from everos.component.utils.datetime import get_utc_now
|
||||
from everos.core.persistence.sqlite import RepoBase, session_scope
|
||||
|
||||
from ..sqlite_manager import get_session_factory
|
||||
from ..tables import MdChangeState
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class QueueSummary:
|
||||
"""Aggregate counts for ``cascade status`` CLI output.
|
||||
|
||||
``pending`` includes the internal ``processing`` rows so the public
|
||||
state machine (12 doc §6) stays three-valued.
|
||||
"""
|
||||
|
||||
pending: int
|
||||
"""Rows the worker hasn't completed yet (includes ``processing``)."""
|
||||
|
||||
done: int
|
||||
"""Rows landed successfully."""
|
||||
|
||||
failed_retryable: int
|
||||
"""``status='failed' AND retryable=TRUE`` — eligible for
|
||||
``cascade fix --apply`` re-enqueue."""
|
||||
|
||||
failed_permanent: int
|
||||
"""``status='failed' AND retryable=FALSE`` — requires the user to
|
||||
edit the md and re-save."""
|
||||
|
||||
max_lsn: int
|
||||
"""Largest ``lsn`` ever assigned; 0 if the table is empty."""
|
||||
|
||||
last_processed_lsn: int
|
||||
"""Largest ``lsn`` whose row has reached a terminal state
|
||||
(``done`` or ``failed``); 0 if no terminal rows yet."""
|
||||
|
||||
|
||||
class _MdChangeStateRepo(RepoBase[MdChangeState]):
|
||||
model = MdChangeState
|
||||
|
||||
def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
|
||||
return get_session_factory()
|
||||
|
||||
# ── Writers: watcher / scanner / CLI sync ──────────────────────────────
|
||||
|
||||
async def upsert(
|
||||
self,
|
||||
md_path: str,
|
||||
*,
|
||||
kind: str,
|
||||
change_type: str,
|
||||
mtime: float,
|
||||
) -> int:
|
||||
"""Enqueue or re-enqueue ``md_path``; return the assigned LSN.
|
||||
|
||||
Behaviour:
|
||||
|
||||
- **New row** → insert with ``status='pending'``,
|
||||
``lsn = MAX(lsn) + 1``.
|
||||
- **Existing row** → bump ``last_changed_at``, refresh
|
||||
``kind`` / ``change_type`` / ``mtime``, reset status back to
|
||||
``pending``, zero ``retry_count`` / ``error`` / ``retryable``,
|
||||
and assign a fresh ``MAX(lsn) + 1`` so the worker re-processes
|
||||
this path *after* anything queued in between.
|
||||
|
||||
The fresh LSN on re-enqueue is the property that lets the worker
|
||||
rely on ``ORDER BY lsn`` for ordering without losing fairness
|
||||
when a file flickers in and out of the queue. The ``MAX(lsn)+1``
|
||||
derivation is best-effort under concurrent writers — see module
|
||||
docstring for the trade-off.
|
||||
"""
|
||||
now = get_utc_now()
|
||||
async with session_scope(self._factory) as s:
|
||||
new_lsn = await _next_lsn(s)
|
||||
stmt = (
|
||||
sqlite_insert(MdChangeState)
|
||||
.values(
|
||||
md_path=md_path,
|
||||
kind=kind,
|
||||
change_type=change_type,
|
||||
mtime=mtime,
|
||||
first_seen_at=now,
|
||||
last_changed_at=now,
|
||||
lsn=new_lsn,
|
||||
status="pending",
|
||||
retryable=None,
|
||||
last_attempt_at=None,
|
||||
retry_count=0,
|
||||
error=None,
|
||||
)
|
||||
.on_conflict_do_update(
|
||||
index_elements=["md_path"],
|
||||
set_={
|
||||
"kind": kind,
|
||||
"change_type": change_type,
|
||||
"mtime": mtime,
|
||||
"last_changed_at": now,
|
||||
"lsn": new_lsn,
|
||||
"status": "pending",
|
||||
"retryable": None,
|
||||
"last_attempt_at": None,
|
||||
"retry_count": 0,
|
||||
"error": None,
|
||||
},
|
||||
)
|
||||
)
|
||||
await s.execute(stmt)
|
||||
await s.commit()
|
||||
return new_lsn
|
||||
|
||||
async def force_enqueue(self, md_path: str, kind: str) -> int:
|
||||
"""`cascade sync --path` entry: re-enqueue regardless of status.
|
||||
|
||||
Semantically the same as :meth:`upsert` with ``change_type
|
||||
='modified'``; named separately because the CLI flow has no
|
||||
watcher / scanner event to attribute the row to.
|
||||
"""
|
||||
return await self.upsert(
|
||||
md_path,
|
||||
kind=kind,
|
||||
change_type="modified",
|
||||
mtime=0.0,
|
||||
)
|
||||
|
||||
# ── Worker claim ───────────────────────────────────────────────────────
|
||||
|
||||
async def claim_one(self, md_path: str) -> MdChangeState | None:
|
||||
"""Atomically transition one row ``pending → processing``.
|
||||
|
||||
Implements the worker's claim contract: only the caller whose
|
||||
``UPDATE`` returns ``rowcount == 1`` "owns" the row and should
|
||||
run the handler. All other concurrent callers get ``None`` and
|
||||
must move on (no exception — claim contention is not an error).
|
||||
"""
|
||||
now = get_utc_now()
|
||||
async with session_scope(self._factory) as s:
|
||||
result = await s.execute(
|
||||
update(MdChangeState)
|
||||
.where(MdChangeState.md_path == md_path)
|
||||
.where(MdChangeState.status == "pending")
|
||||
.values(status="processing", last_attempt_at=now)
|
||||
)
|
||||
await s.commit()
|
||||
if result.rowcount != 1:
|
||||
return None
|
||||
row = await s.get(MdChangeState, md_path)
|
||||
return row
|
||||
|
||||
async def claim_pending_batch(self, limit: int = 100) -> list[MdChangeState]:
|
||||
"""Claim up to ``limit`` pending rows in LSN order.
|
||||
|
||||
Returns the claimed rows (now ``status='processing'``); empty
|
||||
list if none were pending. Sibling workers / processes may race
|
||||
on the same prefix — the per-row ``WHERE status='pending'``
|
||||
filter ensures each row lands in exactly one batch.
|
||||
"""
|
||||
if limit <= 0:
|
||||
return []
|
||||
now = get_utc_now()
|
||||
async with session_scope(self._factory) as s:
|
||||
picks = (
|
||||
(
|
||||
await s.execute(
|
||||
select(MdChangeState.md_path)
|
||||
.where(MdChangeState.status == "pending")
|
||||
.order_by(MdChangeState.lsn)
|
||||
.limit(limit)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
if not picks:
|
||||
return []
|
||||
update_result = await s.execute(
|
||||
update(MdChangeState)
|
||||
.where(MdChangeState.md_path.in_(picks))
|
||||
.where(MdChangeState.status == "pending")
|
||||
.values(status="processing", last_attempt_at=now)
|
||||
)
|
||||
await s.commit()
|
||||
if update_result.rowcount == 0:
|
||||
return []
|
||||
rows = (
|
||||
(
|
||||
await s.execute(
|
||||
select(MdChangeState)
|
||||
.where(MdChangeState.md_path.in_(picks))
|
||||
.where(MdChangeState.status == "processing")
|
||||
.order_by(MdChangeState.lsn)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
return list(rows)
|
||||
|
||||
# ── Worker result reporting ────────────────────────────────────────────
|
||||
|
||||
async def mark_done(self, md_path: str) -> None:
|
||||
"""Transition the row to ``done`` after a successful handler run.
|
||||
|
||||
Guarded by ``WHERE status='processing'`` so the call is a no-op
|
||||
if a concurrent :meth:`upsert` (watcher / scanner re-enqueue)
|
||||
has flipped the row back to ``pending`` while the worker was
|
||||
running the handler. In that case the next
|
||||
:meth:`claim_pending_batch` drain re-runs the handler against
|
||||
the latest md state — losing the stale ``done`` write rather
|
||||
than the new ``pending`` is the correct trade.
|
||||
"""
|
||||
now = get_utc_now()
|
||||
async with session_scope(self._factory) as s:
|
||||
await s.execute(
|
||||
update(MdChangeState)
|
||||
.where(MdChangeState.md_path == md_path)
|
||||
.where(MdChangeState.status == "processing")
|
||||
.values(
|
||||
status="done",
|
||||
last_attempt_at=now,
|
||||
error=None,
|
||||
retryable=None,
|
||||
)
|
||||
)
|
||||
await s.commit()
|
||||
|
||||
async def mark_failed(
|
||||
self,
|
||||
md_path: str,
|
||||
*,
|
||||
retryable: bool,
|
||||
error: str,
|
||||
new_retry_count: int,
|
||||
) -> None:
|
||||
"""Transition the row to ``failed`` with the given diagnostic.
|
||||
|
||||
Args:
|
||||
md_path: The row's primary key.
|
||||
retryable: ``True`` for transient failures (HTTP 5xx,
|
||||
connection reset, 429) — ``cascade fix --apply`` will
|
||||
re-enqueue. ``False`` for unrecoverable failures
|
||||
(YAML parse, schema mismatch) — needs user edit.
|
||||
error: Truncated failure message for ``cascade fix`` output.
|
||||
new_retry_count: The retry count *after* this attempt (the
|
||||
caller knows whether it was a retry or the final
|
||||
attempt).
|
||||
|
||||
Guarded by ``WHERE status='processing'`` for the same reason as
|
||||
:meth:`mark_done` — a concurrent re-enqueue must win over a
|
||||
terminal write tied to a stale claim.
|
||||
"""
|
||||
now = get_utc_now()
|
||||
async with session_scope(self._factory) as s:
|
||||
# Same guard as ``mark_done``: only flip ``processing → failed``.
|
||||
# A concurrent watcher / scanner upsert may have reset the row
|
||||
# back to ``pending`` (file changed during processing) — in
|
||||
# that case the failure verdict is stale and we let the next
|
||||
# drain re-attempt against the new md state instead of
|
||||
# stamping ``failed`` over the live pending row.
|
||||
await s.execute(
|
||||
update(MdChangeState)
|
||||
.where(MdChangeState.md_path == md_path)
|
||||
.where(MdChangeState.status == "processing")
|
||||
.values(
|
||||
status="failed",
|
||||
retryable=retryable,
|
||||
last_attempt_at=now,
|
||||
error=error,
|
||||
retry_count=new_retry_count,
|
||||
)
|
||||
)
|
||||
await s.commit()
|
||||
|
||||
# ── Startup recovery ───────────────────────────────────────────────────
|
||||
|
||||
async def recover_orphan_processing(self) -> int:
|
||||
"""Reset every ``processing`` row to ``pending``; return the count.
|
||||
|
||||
Cascade runs single-process today, so any row in ``processing``
|
||||
when the orchestrator boots is leftover from a prior crash
|
||||
(the worker died between :meth:`claim_pending_batch` and
|
||||
``mark_done`` / ``mark_failed``). Idempotent — no rows in
|
||||
``processing`` is a clean no-op.
|
||||
"""
|
||||
async with session_scope(self._factory) as s:
|
||||
result = await s.execute(
|
||||
update(MdChangeState)
|
||||
.where(MdChangeState.status == "processing")
|
||||
.values(status="pending", last_attempt_at=None)
|
||||
)
|
||||
await s.commit()
|
||||
return int(result.rowcount or 0)
|
||||
|
||||
# ── CLI fix / status ───────────────────────────────────────────────────
|
||||
|
||||
async def list_failed(self) -> list[MdChangeState]:
|
||||
"""Return every ``status='failed'`` row, oldest LSN first.
|
||||
|
||||
Drives the ``cascade fix`` (no ``--apply``) preview table — the
|
||||
CLI splits the result by ``retryable`` into two sections.
|
||||
"""
|
||||
async with session_scope(self._factory) as s:
|
||||
rows = (
|
||||
(
|
||||
await s.execute(
|
||||
select(MdChangeState)
|
||||
.where(MdChangeState.status == "failed")
|
||||
.order_by(MdChangeState.lsn)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
return list(rows)
|
||||
|
||||
async def reset_retryable_to_pending(self) -> int:
|
||||
"""`cascade fix --apply` engine: re-enqueue every retryable row.
|
||||
|
||||
Affects only ``status='failed' AND retryable=TRUE``. Rows with
|
||||
``retryable=FALSE`` are left untouched — they need the user to
|
||||
edit the md and re-save (the scanner / watcher will pick up the
|
||||
change and re-enqueue them naturally).
|
||||
|
||||
Returns the number of rows transitioned.
|
||||
"""
|
||||
now = get_utc_now()
|
||||
async with session_scope(self._factory) as s:
|
||||
result = await s.execute(
|
||||
update(MdChangeState)
|
||||
.where(MdChangeState.status == "failed")
|
||||
.where(MdChangeState.retryable.is_(True))
|
||||
.values(
|
||||
status="pending",
|
||||
retryable=None,
|
||||
retry_count=0,
|
||||
error=None,
|
||||
last_changed_at=now,
|
||||
)
|
||||
)
|
||||
await s.commit()
|
||||
return int(result.rowcount or 0)
|
||||
|
||||
async def queue_summary(self) -> QueueSummary:
|
||||
"""Aggregate the table for the ``cascade status`` CLI."""
|
||||
async with session_scope(self._factory) as s:
|
||||
pending = await _count_where(
|
||||
s, MdChangeState.status.in_(["pending", "processing"])
|
||||
)
|
||||
done = await _count_where(s, MdChangeState.status == "done")
|
||||
failed_retryable = await _count_where(
|
||||
s,
|
||||
(MdChangeState.status == "failed")
|
||||
& (MdChangeState.retryable.is_(True)),
|
||||
)
|
||||
failed_permanent = await _count_where(
|
||||
s,
|
||||
(MdChangeState.status == "failed")
|
||||
& (MdChangeState.retryable.is_(False)),
|
||||
)
|
||||
max_lsn_stmt = select(func.coalesce(func.max(MdChangeState.lsn), 0))
|
||||
max_lsn = int((await s.execute(max_lsn_stmt)).scalar_one())
|
||||
last_processed_lsn = int(
|
||||
(
|
||||
await s.execute(
|
||||
select(func.coalesce(func.max(MdChangeState.lsn), 0)).where(
|
||||
MdChangeState.status.in_(["done", "failed"])
|
||||
)
|
||||
)
|
||||
).scalar_one()
|
||||
)
|
||||
return QueueSummary(
|
||||
pending=pending,
|
||||
done=done,
|
||||
failed_retryable=failed_retryable,
|
||||
failed_permanent=failed_permanent,
|
||||
max_lsn=max_lsn,
|
||||
last_processed_lsn=last_processed_lsn,
|
||||
)
|
||||
|
||||
|
||||
async def _next_lsn(session: AsyncSession) -> int:
|
||||
"""Pick the next global LSN (``MAX(lsn) + 1``).
|
||||
|
||||
Called inside the same write transaction as the UPSERT so SQLite's
|
||||
WAL writer serialisation guarantees no two writers see the same
|
||||
``MAX``. Empty table returns 1.
|
||||
"""
|
||||
result = await session.execute(
|
||||
select(func.coalesce(func.max(MdChangeState.lsn), 0))
|
||||
)
|
||||
return int(result.scalar_one()) + 1
|
||||
|
||||
|
||||
async def _count_where(session: AsyncSession, predicate: object) -> int:
|
||||
"""``SELECT COUNT(*) WHERE <predicate>`` returning a Python int."""
|
||||
stmt = select(func.count()).select_from(MdChangeState).where(predicate) # type: ignore[arg-type]
|
||||
return int((await session.execute(stmt)).scalar_one())
|
||||
|
||||
|
||||
md_change_state_repo = _MdChangeStateRepo()
|
||||
52
src/everos/infra/persistence/sqlite/repos/memcell.py
Normal file
52
src/everos/infra/persistence/sqlite/repos/memcell.py
Normal file
@ -0,0 +1,52 @@
|
||||
"""Repository for ``memcell`` table — singleton bound to ``sqlite_manager``.
|
||||
|
||||
Pure persistence: callers build the SQLModel ``Memcell`` rows (including
|
||||
``message_ids_json`` / ``sender_ids_json``) and hand them in. The pipeline
|
||||
is responsible for mapping algo-side messages back to everos
|
||||
``message_id`` because algo's ``Message`` does not carry per-message
|
||||
identifiers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from everos.core.persistence.sqlite import RepoBase, session_scope
|
||||
|
||||
from ..sqlite_manager import get_session_factory
|
||||
from ..tables import Memcell
|
||||
|
||||
|
||||
class _MemcellRepo(RepoBase[Memcell]):
|
||||
model = Memcell
|
||||
|
||||
def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
|
||||
return get_session_factory()
|
||||
|
||||
async def insert_many(self, rows: list[Memcell]) -> list[Memcell]:
|
||||
"""Insert MemCell rows in one transaction; rows are constructed by caller."""
|
||||
async with session_scope(self._factory) as s:
|
||||
s.add_all(rows)
|
||||
await s.commit()
|
||||
for r in rows:
|
||||
await s.refresh(r)
|
||||
return rows
|
||||
|
||||
async def find_by_ids(self, memcell_ids: list[str]) -> list[Memcell]:
|
||||
"""Bulk fetch rows by primary key list — preserves caller order.
|
||||
|
||||
Used by offline strategies that pull every memcell in a cluster
|
||||
(membership lives in :class:`ClusterMember` and is supplied to
|
||||
the strategy via :class:`everalgo.clustering.Cluster.members`).
|
||||
"""
|
||||
if not memcell_ids:
|
||||
return []
|
||||
async with session_scope(self._factory) as s:
|
||||
stmt = select(Memcell).where(Memcell.memcell_id.in_(memcell_ids))
|
||||
rows = list((await s.execute(stmt)).scalars().all())
|
||||
by_id = {r.memcell_id: r for r in rows}
|
||||
return [by_id[mid] for mid in memcell_ids if mid in by_id]
|
||||
|
||||
|
||||
memcell_repo = _MemcellRepo()
|
||||
@ -0,0 +1,83 @@
|
||||
"""Repository for ``unprocessed_buffer`` — chat message accumulator.
|
||||
|
||||
Singleton bound to the process-wide ``sqlite_manager`` session factory.
|
||||
|
||||
Pure SQLModel persistence: row ↔ domain conversion lives in
|
||||
``everos.memory.extract.pipeline`` (the only caller that needs it).
|
||||
|
||||
Exposes:
|
||||
|
||||
- :meth:`list_for_track` — load all rows of (session_id, track), ordered by ts.
|
||||
- :meth:`replace` — atomically swap all rows of (session_id, track) for a
|
||||
freshly-built list of :class:`UnprocessedBuffer` rows.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from everos.core.persistence.sqlite import RepoBase, session_scope
|
||||
|
||||
from ..sqlite_manager import get_session_factory
|
||||
from ..tables import UnprocessedBuffer
|
||||
|
||||
|
||||
class _UnprocessedBufferRepo(RepoBase[UnprocessedBuffer]):
|
||||
model = UnprocessedBuffer
|
||||
|
||||
def _factory_lookup(self) -> async_sessionmaker[AsyncSession]:
|
||||
return get_session_factory()
|
||||
|
||||
async def list_for_track(
|
||||
self,
|
||||
session_id: str,
|
||||
track: str,
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> list[UnprocessedBuffer]:
|
||||
"""Return all rows of (app, project, session, track), ts asc."""
|
||||
async with session_scope(self._factory) as s:
|
||||
stmt = (
|
||||
select(UnprocessedBuffer)
|
||||
.where(
|
||||
UnprocessedBuffer.app_id == app_id,
|
||||
UnprocessedBuffer.project_id == project_id,
|
||||
UnprocessedBuffer.session_id == session_id,
|
||||
UnprocessedBuffer.track == track,
|
||||
)
|
||||
.order_by(UnprocessedBuffer.timestamp.asc()) # type: ignore[union-attr]
|
||||
)
|
||||
return list((await s.execute(stmt)).scalars().all())
|
||||
|
||||
async def replace(
|
||||
self,
|
||||
session_id: str,
|
||||
track: str,
|
||||
rows: list[UnprocessedBuffer],
|
||||
*,
|
||||
app_id: str = "default",
|
||||
project_id: str = "default",
|
||||
) -> None:
|
||||
"""Atomically rewrite all rows of (app, project, session, track).
|
||||
|
||||
Delete-then-insert in one transaction. Empty ``rows`` clears the slice.
|
||||
The delete is scoped to the same (app, project) as the incoming rows so
|
||||
one space's buffer never wipes another's.
|
||||
"""
|
||||
async with session_scope(self._factory) as s:
|
||||
await s.execute(
|
||||
delete(UnprocessedBuffer).where(
|
||||
UnprocessedBuffer.app_id == app_id,
|
||||
UnprocessedBuffer.project_id == project_id,
|
||||
UnprocessedBuffer.session_id == session_id,
|
||||
UnprocessedBuffer.track == track,
|
||||
)
|
||||
)
|
||||
if rows:
|
||||
s.add_all(rows)
|
||||
await s.commit()
|
||||
|
||||
|
||||
unprocessed_buffer_repo = _UnprocessedBufferRepo()
|
||||
63
src/everos/infra/persistence/sqlite/sqlite_manager.py
Normal file
63
src/everos/infra/persistence/sqlite/sqlite_manager.py
Normal file
@ -0,0 +1,63 @@
|
||||
"""SQLite engine + session-factory singletons (lazy + process-wide).
|
||||
|
||||
The single place that owns the SQLite **runtime state**: the async
|
||||
SQLAlchemy engine and the session factory bound to it. Built lazily on
|
||||
first :func:`get_engine` / :func:`get_session_factory` call from
|
||||
:func:`everos.config.load_settings` + :meth:`MemoryRoot.default`. The
|
||||
:class:`SqliteLifespanProvider` calls :func:`dispose_engine` on shutdown
|
||||
to drain the connection pool; in scripts you can call it manually.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
|
||||
|
||||
from everos.config import load_settings
|
||||
from everos.core.observability.logging import get_logger
|
||||
from everos.core.persistence import (
|
||||
MemoryRoot,
|
||||
create_session_factory,
|
||||
create_system_engine,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_engine: AsyncEngine | None = None
|
||||
_session_factory: async_sessionmaker[AsyncSession] | None = None
|
||||
|
||||
|
||||
def get_engine() -> AsyncEngine:
|
||||
"""Return the process-wide async SQLAlchemy engine.
|
||||
|
||||
Built on first call from ``MemoryRoot.default()`` and ``Settings.sqlite``.
|
||||
Subsequent calls return the same instance.
|
||||
"""
|
||||
global _engine
|
||||
if _engine is None:
|
||||
settings = load_settings()
|
||||
memory_root = MemoryRoot.default()
|
||||
memory_root.ensure()
|
||||
_engine = create_system_engine(memory_root.system_db, settings.sqlite)
|
||||
logger.info(
|
||||
"sqlite_engine_built",
|
||||
db_path=str(memory_root.system_db),
|
||||
)
|
||||
return _engine
|
||||
|
||||
|
||||
def get_session_factory() -> async_sessionmaker[AsyncSession]:
|
||||
"""Return the process-wide async session factory."""
|
||||
global _session_factory
|
||||
if _session_factory is None:
|
||||
_session_factory = create_session_factory(get_engine())
|
||||
return _session_factory
|
||||
|
||||
|
||||
async def dispose_engine() -> None:
|
||||
"""Dispose the engine + connection pool. Idempotent."""
|
||||
global _engine, _session_factory
|
||||
if _engine is not None:
|
||||
await _engine.dispose()
|
||||
logger.info("sqlite_engine_disposed")
|
||||
_engine = None
|
||||
_session_factory = None
|
||||
24
src/everos/infra/persistence/sqlite/tables/__init__.py
Normal file
24
src/everos/infra/persistence/sqlite/tables/__init__.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""Business SQLModel table schemas.
|
||||
|
||||
Each business table lives in its own module here (e.g. ``memcell.py``,
|
||||
``unprocessed_buffer.py``). The package ``__init__`` re-exports them so
|
||||
``SQLModel.metadata.create_all`` (run by
|
||||
:class:`everos.core.lifespan.SqliteLifespanProvider` at startup) sees
|
||||
every registered table.
|
||||
"""
|
||||
|
||||
from .cluster import Cluster as Cluster
|
||||
from .cluster import ClusterMember as ClusterMember
|
||||
from .conversation_status import ConversationStatus as ConversationStatus
|
||||
from .md_change_state import MdChangeState as MdChangeState
|
||||
from .memcell import Memcell as Memcell
|
||||
from .unprocessed_buffer import UnprocessedBuffer as UnprocessedBuffer
|
||||
|
||||
__all__ = [
|
||||
"Cluster",
|
||||
"ClusterMember",
|
||||
"ConversationStatus",
|
||||
"MdChangeState",
|
||||
"Memcell",
|
||||
"UnprocessedBuffer",
|
||||
]
|
||||
99
src/everos/infra/persistence/sqlite/tables/cluster.py
Normal file
99
src/everos/infra/persistence/sqlite/tables/cluster.py
Normal file
@ -0,0 +1,99 @@
|
||||
"""``cluster`` — persisted snapshot of one ``everalgo.clustering.Cluster``.
|
||||
|
||||
Mirrors the algo-side frozen value object (centroid + count + last_ts +
|
||||
preview) plus everos engineering metadata (``owner_id`` / ``owner_type``
|
||||
/ ``kind``) so a single SQLite table can hold both the user-memory cluster
|
||||
track (episode embeddings) and the agent-case cluster track (task_intent
|
||||
embeddings). The ``members`` field on the algo type is persisted in the
|
||||
sibling :class:`ClusterMember` table to keep the relation queryable from
|
||||
both directions (forward by ``cluster_id``, reverse by ``(member_type,
|
||||
member_id)``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import Index, LargeBinary
|
||||
|
||||
from everos.component.utils.datetime import UtcDatetime
|
||||
from everos.core.persistence.sqlite import BaseTable, Field
|
||||
from everos.core.persistence.sqlite.base import UtcDateTimeColumn
|
||||
|
||||
|
||||
class Cluster(BaseTable, table=True):
|
||||
"""One row per cluster. PK ``cluster_id`` (``cl_<12hex>``)."""
|
||||
|
||||
__tablename__ = "cluster" # type: ignore[assignment]
|
||||
__table_args__ = (
|
||||
# List all clusters for one (app, project, owner, kind) on each strategy
|
||||
# invocation; scope-first composite so clustering never mixes spaces.
|
||||
Index("ix_cluster_owner_kind", "app_id", "project_id", "owner_id", "kind"),
|
||||
)
|
||||
|
||||
cluster_id: str = Field(primary_key=True)
|
||||
"""Caller-minted opaque id (algo type carries it through verbatim).
|
||||
Format: ``cl_<12 hex chars>`` to mirror :func:`memcell._mint_memcell_id`."""
|
||||
|
||||
app_id: str = Field(default="default")
|
||||
project_id: str = Field(default="default")
|
||||
"""App / project scope segments. The aggregation key is
|
||||
``(app_id, project_id, owner_id, kind)`` so a cluster set never spans
|
||||
two spaces."""
|
||||
|
||||
owner_id: str = Field(index=True)
|
||||
"""``user_id`` (kind=``user_memory``) or ``agent_id`` (kind=``agent_case``)."""
|
||||
|
||||
owner_type: str
|
||||
"""``"user"`` or ``"agent"`` — redundant with ``kind`` today but kept
|
||||
explicit so future kinds (e.g. tenant-level) can plug in without a
|
||||
schema change."""
|
||||
|
||||
kind: str
|
||||
"""``"user_memory"`` (episode-vector cluster, drives profile extraction)
|
||||
or ``"agent_case"`` (task_intent-vector cluster, drives skill extraction)."""
|
||||
|
||||
centroid_blob: bytes = Field(sa_type=LargeBinary)
|
||||
"""``np.float32`` centroid serialised via ``ndarray.tobytes()``. The
|
||||
repo round-trips bytes ↔ ``np.ndarray`` so callers see the algo type."""
|
||||
|
||||
count: int
|
||||
"""Number of members merged into this cluster (algo-maintained)."""
|
||||
|
||||
last_ts_ms: int
|
||||
"""Most recent member's timestamp as Unix epoch milliseconds — matches
|
||||
:attr:`everalgo.clustering.Cluster.last_ts` exactly so no lossy
|
||||
datetime ↔ int conversion is needed across the storage boundary."""
|
||||
|
||||
preview_json: str
|
||||
"""JSON-encoded ``list[str]`` — short text samples used by
|
||||
:func:`cluster_by_llm` ranking. Repo round-trips JSON ↔ list."""
|
||||
|
||||
|
||||
class ClusterMember(BaseTable, table=True):
|
||||
"""One row per (cluster, entity) link.
|
||||
|
||||
Forward lookup (``cluster_id → list[member_id]``) is the algo-side
|
||||
``Cluster.members`` view. Reverse lookup (``(member_type, member_id)
|
||||
→ cluster_id``) is served by the composite index below — needed when
|
||||
a downstream consumer holds an entity id and wants its cluster.
|
||||
|
||||
``member_type`` is informational on the row (the parent ``Cluster.kind``
|
||||
already disambiguates), but kept explicit so the reverse index can be
|
||||
a single composite (member_type, member_id) without joining back.
|
||||
"""
|
||||
|
||||
__tablename__ = "cluster_member" # type: ignore[assignment]
|
||||
__table_args__ = (Index("ix_cluster_member_reverse", "member_type", "member_id"),)
|
||||
|
||||
cluster_id: str = Field(primary_key=True, foreign_key="cluster.cluster_id")
|
||||
"""Parent cluster id."""
|
||||
|
||||
member_id: str = Field(primary_key=True)
|
||||
"""``memcell_id`` (member_type=``memcell``) or md entry_id
|
||||
(member_type=``case``) — the entity grouped into this cluster."""
|
||||
|
||||
member_type: str
|
||||
"""``"memcell"`` or ``"case"``. Echoes the parent cluster's ``kind``
|
||||
domain but kept on the row so the reverse index is self-contained."""
|
||||
|
||||
added_ts: UtcDatetime = Field(sa_type=UtcDateTimeColumn)
|
||||
"""When this entity was first attached to the cluster."""
|
||||
@ -0,0 +1,38 @@
|
||||
"""``conversation_status`` — window pointer per (app, project, session, track).
|
||||
|
||||
The window pointer is scoped by ``app_id`` / ``project_id`` so the same
|
||||
``session_id`` may recur in different spaces without colliding; those two
|
||||
segments lead the composite ``UniqueConstraint``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import UniqueConstraint
|
||||
|
||||
from everos.component.utils.datetime import UtcDatetime
|
||||
from everos.core.persistence.sqlite import BaseTable, Field
|
||||
from everos.core.persistence.sqlite.base import UtcDateTimeColumn
|
||||
|
||||
|
||||
class ConversationStatus(BaseTable, table=True):
|
||||
"""One row per (app, project, session, track). Tracks latest msg / memcell ts."""
|
||||
|
||||
__tablename__ = "conversation_status" # type: ignore[assignment]
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"app_id",
|
||||
"project_id",
|
||||
"session_id",
|
||||
"track",
|
||||
name="uq_conversation_status_session_track",
|
||||
),
|
||||
)
|
||||
|
||||
id: int | None = Field(default=None, primary_key=True)
|
||||
app_id: str = Field(default="default")
|
||||
project_id: str = Field(default="default")
|
||||
"""App / project scope segments (default ``"default"``)."""
|
||||
session_id: str = Field(index=True)
|
||||
track: str
|
||||
last_message_ts: UtcDatetime | None = Field(default=None, sa_type=UtcDateTimeColumn)
|
||||
last_memcell_ts: UtcDatetime | None = Field(default=None, sa_type=UtcDateTimeColumn)
|
||||
119
src/everos/infra/persistence/sqlite/tables/md_change_state.py
Normal file
119
src/everos/infra/persistence/sqlite/tables/md_change_state.py
Normal file
@ -0,0 +1,119 @@
|
||||
"""``md_change_state`` — cascade work queue.
|
||||
|
||||
One row per markdown path. Both watcher (real-time fsevents) and
|
||||
scanner (periodic sweep) UPSERT into this table; the worker consumes
|
||||
``pending`` rows in ``lsn`` order, transitions them through an
|
||||
internal ``processing`` claim state, and lands them in ``done`` or
|
||||
``failed`` (with a ``retryable`` flag).
|
||||
|
||||
Schema sourced from ``12_cascade_design.md`` §4.1 + decisions DD-3 …
|
||||
DD-12; the four indexes below are required by ``13_cascade_design.md``
|
||||
§7 status / fix queries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import Index, text
|
||||
|
||||
from everos.component.utils.datetime import UtcDatetime, get_utc_now
|
||||
from everos.core.persistence.sqlite import BaseTable, Field
|
||||
from everos.core.persistence.sqlite.base import UtcDateTimeColumn
|
||||
|
||||
|
||||
class MdChangeState(BaseTable, table=True):
|
||||
"""One row per markdown path; UPSERT-driven work queue for cascade.
|
||||
|
||||
The public state machine is the 3-tuple ``pending`` / ``done`` /
|
||||
``failed`` (12 doc §6). ``processing`` is an internal claim state
|
||||
used by :meth:`MdChangeStateRepo.claim_one` and rolled back into
|
||||
``pending`` for CLI / status output (16 doc §4.2 — DD-12 keeps the
|
||||
public surface clean).
|
||||
"""
|
||||
|
||||
__tablename__ = "md_change_state" # type: ignore[assignment]
|
||||
__table_args__ = (
|
||||
# Worker scans pending rows in lsn order — partial index drops
|
||||
# done/failed rows from the b-tree and keeps it tight.
|
||||
Index(
|
||||
"idx_md_change_pending",
|
||||
"status",
|
||||
"lsn",
|
||||
sqlite_where=text("status = 'pending'"),
|
||||
),
|
||||
# `cascade fix --apply` only ever touches failed + retryable=TRUE
|
||||
# rows — partial index makes that pass essentially O(retryable).
|
||||
Index(
|
||||
"idx_md_change_retryable",
|
||||
"status",
|
||||
"retryable",
|
||||
sqlite_where=text("status = 'failed' AND retryable = 1"),
|
||||
),
|
||||
# Scanner reverse-reconcile (disk → state) compares mtime.
|
||||
Index("idx_md_change_mtime", "mtime"),
|
||||
# `cascade status` aggregates by kind.
|
||||
Index("idx_md_change_kind", "kind"),
|
||||
)
|
||||
|
||||
md_path: str = Field(primary_key=True)
|
||||
"""Path relative to the memory-root (e.g. ``users/u_jason/
|
||||
episodes/episode-2026-05-12.md``). Every reverse-link anchors here."""
|
||||
|
||||
kind: str = Field(nullable=False, index=True)
|
||||
"""Kind registry name (e.g. ``"episode"``); worker dispatches the
|
||||
matching handler."""
|
||||
|
||||
change_type: str = Field(nullable=False)
|
||||
"""``"added"`` | ``"modified"`` | ``"deleted"``. A hint for the
|
||||
worker — handler re-derives truth from the actual file state."""
|
||||
|
||||
mtime: float = Field(default=0.0, nullable=False)
|
||||
"""File mtime captured when the row was last UPSERTed. Scanner
|
||||
compares this against the on-disk mtime to identify dirty paths."""
|
||||
|
||||
first_seen_at: UtcDatetime = Field(
|
||||
default_factory=get_utc_now, sa_type=UtcDateTimeColumn
|
||||
)
|
||||
"""When the path was first enqueued."""
|
||||
|
||||
last_changed_at: UtcDatetime = Field(
|
||||
default_factory=get_utc_now, sa_type=UtcDateTimeColumn
|
||||
)
|
||||
"""Most recent UPSERT timestamp (re-stamped on every re-enqueue)."""
|
||||
|
||||
lsn: int = Field(nullable=False, index=True)
|
||||
"""Global monotonic sequence (``MAX(lsn) + 1`` per UPSERT). Worker
|
||||
processes pending rows in ascending lsn order; the gap between
|
||||
``MAX(lsn)`` and the last processed lsn is the queue lag."""
|
||||
|
||||
status: str = Field(default="pending", nullable=False, index=True)
|
||||
"""Lifecycle:
|
||||
|
||||
- ``"pending"`` — waiting for the worker.
|
||||
- ``"processing"`` — claimed by a worker (internal; CLI rolls into
|
||||
pending for display).
|
||||
- ``"done"`` — handler completed successfully.
|
||||
- ``"failed"`` — handler exhausted retries or hit an
|
||||
unrecoverable error (see :attr:`retryable`).
|
||||
"""
|
||||
|
||||
retryable: bool | None = Field(default=None)
|
||||
"""Meaningful only when ``status='failed'``.
|
||||
|
||||
- ``TRUE`` — RecoverableError exhausted MAX_RETRY; ``cascade fix
|
||||
--apply`` will re-enqueue this row (pending, retry_count reset).
|
||||
- ``FALSE`` — UnrecoverableError (malformed YAML, schema error
|
||||
etc.); requires editing the md and re-saving.
|
||||
- ``NULL`` — not a failed row (pending / processing / done).
|
||||
"""
|
||||
|
||||
last_attempt_at: UtcDatetime | None = Field(default=None, sa_type=UtcDateTimeColumn)
|
||||
"""Timestamp of the most recent worker attempt (success or
|
||||
failure)."""
|
||||
|
||||
retry_count: int = Field(default=0, nullable=False)
|
||||
"""Number of retries the worker has *actually issued* (the first
|
||||
attempt does not count). Reaches MAX_RETRY (default 3) before the
|
||||
row transitions to ``failed`` with ``retryable=TRUE``."""
|
||||
|
||||
error: str | None = Field(default=None)
|
||||
"""Most recent failure message (truncated upstream if needed)."""
|
||||
55
src/everos/infra/persistence/sqlite/tables/memcell.py
Normal file
55
src/everos/infra/persistence/sqlite/tables/memcell.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""``memcell`` — metadata + payload archive for boundary-detected MemCells.
|
||||
|
||||
Holds ``message_ids_json`` / ``sender_ids_json`` (JSON arrays of audit
|
||||
ids) plus ``payload_json`` — the full :class:`everalgo.types.MemCell`
|
||||
serialised via ``model_dump_json``. The payload is what
|
||||
``unprocessed_buffer`` cannot keep (boundary's delete-then-insert clears
|
||||
the staging slice once messages fold into a cell): downstream offline
|
||||
strategies that need the raw chat messages (e.g. profile extraction)
|
||||
deserialise the payload back into an algo ``MemCell``. Episode markdown
|
||||
still carries the LLM-synthesised narrative; ``payload_json`` is the
|
||||
chat-stream archive that narrative was distilled from.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import Index
|
||||
|
||||
from everos.component.utils.datetime import UtcDatetime
|
||||
from everos.core.persistence.sqlite import BaseTable, Field
|
||||
from everos.core.persistence.sqlite.base import UtcDateTimeColumn
|
||||
|
||||
|
||||
class Memcell(BaseTable, table=True):
|
||||
"""One row per MemCell. PK ``memcell_id`` (uuid4)."""
|
||||
|
||||
__tablename__ = "memcell" # type: ignore[assignment]
|
||||
__table_args__ = (
|
||||
# Scope-first composite: app/project partition the lookup before the
|
||||
# session window so cross-(app, project) rows never share an index slot.
|
||||
Index(
|
||||
"ix_memcell_session",
|
||||
"app_id",
|
||||
"project_id",
|
||||
"session_id",
|
||||
"track",
|
||||
"timestamp",
|
||||
),
|
||||
)
|
||||
|
||||
memcell_id: str = Field(primary_key=True)
|
||||
app_id: str = Field(default="default")
|
||||
project_id: str = Field(default="default")
|
||||
"""App / project scope segments. Default to ``"default"`` so the column is
|
||||
always populated; callers in a non-default space pass real ids."""
|
||||
session_id: str = Field(index=True)
|
||||
track: str
|
||||
raw_type: str
|
||||
message_ids_json: str
|
||||
sender_ids_json: str
|
||||
payload_json: str
|
||||
"""``MemCell.model_dump_json()`` — the full algo-side MemCell (items =
|
||||
chat messages / tool calls) serialised at boundary time so offline
|
||||
strategies can deserialise it back into an algo MemCell long after
|
||||
``unprocessed_buffer`` has dropped the staging rows."""
|
||||
timestamp: UtcDatetime = Field(sa_type=UtcDateTimeColumn)
|
||||
@ -0,0 +1,52 @@
|
||||
"""``unprocessed_buffer`` — chat-stream messages waiting on boundary detection.
|
||||
|
||||
Schema property: presence in the table = pending; absence = consumed.
|
||||
There is no ``consumed`` column. Pipeline uses ``replace(session, track,
|
||||
remaining)`` to atomically rewrite the (session, track) slice each turn.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import Index
|
||||
|
||||
from everos.component.utils.datetime import UtcDatetime
|
||||
from everos.core.persistence.sqlite import BaseTable, Field
|
||||
from everos.core.persistence.sqlite.base import UtcDateTimeColumn
|
||||
|
||||
|
||||
class UnprocessedBuffer(BaseTable, table=True):
|
||||
"""One row per unprocessed message. PK ``message_id``."""
|
||||
|
||||
__tablename__ = "unprocessed_buffer" # type: ignore[assignment]
|
||||
__table_args__ = (
|
||||
# Scope-first composite: app/project partition the (session, track)
|
||||
# staging slice so different spaces never share a buffer window.
|
||||
Index(
|
||||
"ix_unprocessed_buffer_lookup",
|
||||
"app_id",
|
||||
"project_id",
|
||||
"session_id",
|
||||
"track",
|
||||
"timestamp",
|
||||
),
|
||||
)
|
||||
|
||||
message_id: str = Field(primary_key=True)
|
||||
app_id: str = Field(default="default")
|
||||
project_id: str = Field(default="default")
|
||||
"""App / project scope segments (default ``"default"``)."""
|
||||
session_id: str = Field(index=True)
|
||||
track: str = Field(index=True)
|
||||
sender_id: str
|
||||
sender_name: str | None = None
|
||||
role: str
|
||||
timestamp: UtcDatetime = Field(sa_type=UtcDateTimeColumn)
|
||||
# JSON-serialised raw ContentItem list (mirrors src_old
|
||||
# RawMessage.content_items). Keeps the original multimodal payload
|
||||
# available so a future parser can reach back to image / audio / etc.
|
||||
content_items_json: str
|
||||
# Derived plain-text concatenation of ``type=text`` entries — what
|
||||
# downstream LLM-facing extractors and md writer consume today.
|
||||
text: str
|
||||
tool_calls_json: str | None = None
|
||||
tool_call_id: str | None = None
|
||||
Reference in New Issue
Block a user