"""Metadata repositories for Memory Gateway. SQLite is the default POC store. The in-memory implementation is retained for small isolated tests and for cases where persistence is explicitly disabled. """ from __future__ import annotations import json import sqlite3 from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Iterable, Optional, Protocol from .backend_contracts import BackendOperation, CommitJob, OutboxEvent, OutboxEventStatus from .config import get_config from .schemas import AuditLog, EpisodeRecord, MemoryRecord, ProfileRecord, UserRecord from .schemas_v2 import BackendRefStatus, BackendType, MemoryRef, MemoryRefType class MetadataRepository(Protocol): def create_user(self, user: UserRecord) -> UserRecord: ... def get_user(self, user_id: str) -> Optional[UserRecord]: ... def upsert_memory(self, memory: MemoryRecord) -> MemoryRecord: ... def get_memory(self, memory_id: str) -> Optional[MemoryRecord]: ... def delete_memory(self, memory_id: str) -> bool: ... def list_memories(self) -> Iterable[MemoryRecord]: ... def append_episode(self, episode: EpisodeRecord) -> EpisodeRecord: ... def list_session_episodes(self, session_id: str) -> list[EpisodeRecord]: ... def get_profile(self, user_id: str) -> Optional[ProfileRecord]: ... def upsert_profile(self, profile: ProfileRecord) -> ProfileRecord: ... def add_audit(self, audit: AuditLog) -> AuditLog: ... def list_audit(self, limit: int = 100) -> list[AuditLog]: ... def save_memory_ref(self, ref: MemoryRef) -> MemoryRef: ... def get_memory_ref(self, ref_id: str) -> MemoryRef | None: ... def list_memory_refs( self, gateway_id: str | None = None, workspace_id: str | None = None, user_id: str | None = None, agent_id: str | None = None, session_id: str | None = None, namespace: str | None = None, backend_type: BackendType | str | None = None, ref_type: MemoryRefType | str | None = None, status: BackendRefStatus | str | None = None, limit: int = 100, ) -> list[MemoryRef]: ... def save_outbox_event(self, event: OutboxEvent) -> OutboxEvent: ... def list_outbox_events( self, status: OutboxEventStatus | str | None = None, backend_type: BackendType | str | None = None, operation: BackendOperation | str | None = None, gateway_id: str | None = None, payload_ref: str | None = None, limit: int = 100, ) -> list[OutboxEvent]: ... def list_outbox_events_by_job(self, job_id: str, limit: int = 100) -> list[OutboxEvent]: ... def claim_outbox_event(self, event_id: str) -> OutboxEvent | None: ... def claim_pending_outbox_events( self, limit: int, worker_id: str, lease_seconds: int, ) -> list[OutboxEvent]: ... def release_expired_processing_events(self, now: datetime | None = None) -> list[OutboxEvent]: ... def update_outbox_event_status( self, event_id: str, status: OutboxEventStatus | str, last_error: str | None = None, ) -> OutboxEvent | None: ... def save_commit_job(self, job: CommitJob) -> CommitJob: ... def get_commit_job(self, job_id: str) -> CommitJob | None: ... def update_commit_job_status( self, job_id: str, status: str, error_message: str | None = None, created_refs_count: int | None = None, ) -> CommitJob | None: ... def count_memory_refs( self, gateway_id: str | None = None, session_id: str | None = None, status: BackendRefStatus | str | None = None, ) -> int: ... def _json_dump_model(model) -> str: return json.dumps(model.model_dump(mode="json"), ensure_ascii=False) def _json_load_model(model_cls, payload: str): return model_cls.model_validate(json.loads(payload)) def _enum_value(value): return value.value if hasattr(value, "value") else value def _safe_timedelta(seconds: int) -> timedelta: return timedelta(seconds=max(1, int(seconds))) class InMemoryRepository: def __init__(self) -> None: self.users: dict[str, UserRecord] = {} self.memories: dict[str, MemoryRecord] = {} self.episodes: dict[str, EpisodeRecord] = {} self.profiles: dict[str, ProfileRecord] = {} self.audit_logs: list[AuditLog] = [] self.memory_refs: dict[str, MemoryRef] = {} self.outbox_events: dict[str, OutboxEvent] = {} self.commit_jobs: dict[str, CommitJob] = {} def create_user(self, user: UserRecord) -> UserRecord: now = datetime.now(timezone.utc) user.created_at = now user.updated_at = now self.users[user.id] = user self.profiles.setdefault( user.id, ProfileRecord(user_id=user.id, namespace=user.profile_namespace or f"user/{user.id}/profile"), ) return user def get_user(self, user_id: str) -> Optional[UserRecord]: return self.users.get(user_id) def upsert_memory(self, memory: MemoryRecord) -> MemoryRecord: now = datetime.now(timezone.utc) existing = self.memories.get(memory.id) if existing: memory.version = existing.version + 1 memory.created_at = existing.created_at memory.updated_at = now self.memories[memory.id] = memory return memory def get_memory(self, memory_id: str) -> Optional[MemoryRecord]: return self.memories.get(memory_id) def delete_memory(self, memory_id: str) -> bool: return self.memories.pop(memory_id, None) is not None def list_memories(self) -> Iterable[MemoryRecord]: return list(self.memories.values()) def append_episode(self, episode: EpisodeRecord) -> EpisodeRecord: self.episodes[episode.id] = episode return episode def list_session_episodes(self, session_id: str) -> list[EpisodeRecord]: return [episode for episode in self.episodes.values() if episode.session_id == session_id] def get_profile(self, user_id: str) -> Optional[ProfileRecord]: return self.profiles.get(user_id) def upsert_profile(self, profile: ProfileRecord) -> ProfileRecord: profile.updated_at = datetime.now(timezone.utc) profile.version += 1 self.profiles[profile.user_id] = profile return profile def add_audit(self, audit: AuditLog) -> AuditLog: self.audit_logs.append(audit) return audit def list_audit(self, limit: int = 100) -> list[AuditLog]: return self.audit_logs[-limit:] def save_memory_ref(self, ref: MemoryRef) -> MemoryRef: now = datetime.now(timezone.utc) existing = self.memory_refs.get(ref.id) if existing: ref.created_at = existing.created_at ref.updated_at = now self.memory_refs[ref.id] = ref return ref def get_memory_ref(self, ref_id: str) -> MemoryRef | None: return self.memory_refs.get(ref_id) def list_memory_refs( self, gateway_id: str | None = None, workspace_id: str | None = None, user_id: str | None = None, agent_id: str | None = None, session_id: str | None = None, namespace: str | None = None, backend_type: BackendType | str | None = None, ref_type: MemoryRefType | str | None = None, status: BackendRefStatus | str | None = None, limit: int = 100, ) -> list[MemoryRef]: refs = list(self.memory_refs.values()) def matches(ref: MemoryRef) -> bool: return ( (workspace_id is None or ref.workspace_id == workspace_id) and (gateway_id is None or ref.gateway_id == gateway_id) and (user_id is None or ref.user_id == user_id) and (agent_id is None or ref.agent_id == agent_id) and (session_id is None or ref.session_id == session_id) and (namespace is None or ref.namespace == namespace) and (backend_type is None or ref.backend_type.value == _enum_value(backend_type)) and (ref_type is None or ref.ref_type.value == _enum_value(ref_type)) and (status is None or ref.status.value == _enum_value(status)) ) refs = [ref for ref in refs if matches(ref)] refs.sort(key=lambda ref: ref.created_at, reverse=True) return refs[:limit] def save_outbox_event(self, event: OutboxEvent) -> OutboxEvent: now = datetime.now(timezone.utc) existing = self.outbox_events.get(event.id) if existing: event.created_at = existing.created_at event.updated_at = now self.outbox_events[event.id] = event return event def list_outbox_events( self, status: OutboxEventStatus | str | None = None, backend_type: BackendType | str | None = None, operation: BackendOperation | str | None = None, gateway_id: str | None = None, payload_ref: str | None = None, limit: int = 100, ) -> list[OutboxEvent]: events = list(self.outbox_events.values()) def matches(event: OutboxEvent) -> bool: return ( (status is None or event.status.value == _enum_value(status)) and (backend_type is None or event.backend_type.value == _enum_value(backend_type)) and (operation is None or event.operation.value == _enum_value(operation)) and (gateway_id is None or event.gateway_id == gateway_id) and (payload_ref is None or event.payload_ref == payload_ref) ) events = [event for event in events if matches(event)] events.sort(key=lambda event: event.created_at, reverse=True) return events[:limit] def list_outbox_events_by_job(self, job_id: str, limit: int = 100) -> list[OutboxEvent]: return self.list_outbox_events(payload_ref=f"commit_job:{job_id}", limit=limit) def claim_outbox_event(self, event_id: str) -> OutboxEvent | None: event = self.outbox_events.get(event_id) now = datetime.now(timezone.utc) if not event or event.status != OutboxEventStatus.PENDING: return None if event.next_retry_at and event.next_retry_at > now: return None event.status = OutboxEventStatus.PROCESSING event.locked_by = "inline" event.locked_at = now event.lease_expires_at = now + _safe_timedelta(300) event.updated_at = now self.outbox_events[event.id] = event return event def claim_pending_outbox_events( self, limit: int, worker_id: str, lease_seconds: int, ) -> list[OutboxEvent]: now = datetime.now(timezone.utc) candidates = [ event for event in self.outbox_events.values() if event.status == OutboxEventStatus.PENDING and (event.next_retry_at is None or event.next_retry_at <= now) ] candidates.sort(key=lambda event: event.created_at) claimed: list[OutboxEvent] = [] for event in candidates[:limit]: event.status = OutboxEventStatus.PROCESSING event.locked_by = worker_id event.locked_at = now event.lease_expires_at = now + _safe_timedelta(lease_seconds) event.updated_at = now self.outbox_events[event.id] = event claimed.append(event) return claimed def release_expired_processing_events(self, now: datetime | None = None) -> list[OutboxEvent]: now = now or datetime.now(timezone.utc) released: list[OutboxEvent] = [] for event in list(self.outbox_events.values()): if ( event.status == OutboxEventStatus.PROCESSING and event.lease_expires_at is not None and event.lease_expires_at <= now ): event.status = OutboxEventStatus.PENDING event.locked_by = None event.locked_at = None event.lease_expires_at = None event.updated_at = now self.outbox_events[event.id] = event released.append(event) return released def update_outbox_event_status( self, event_id: str, status: OutboxEventStatus | str, last_error: str | None = None, ) -> OutboxEvent | None: event = self.outbox_events.get(event_id) if not event: return None event.status = OutboxEventStatus(_enum_value(status)) event.last_error = last_error event.updated_at = datetime.now(timezone.utc) if event.status != OutboxEventStatus.PROCESSING: event.locked_by = None event.locked_at = None event.lease_expires_at = None if event.status in {OutboxEventStatus.FAILED, OutboxEventStatus.DEAD_LETTER}: event.attempt_count += 1 self.outbox_events[event.id] = event return event def save_commit_job(self, job: CommitJob) -> CommitJob: now = datetime.now(timezone.utc) existing = self.commit_jobs.get(job.job_id) if existing: job.created_at = existing.created_at job.updated_at = now self.commit_jobs[job.job_id] = job return job def count_memory_refs( self, gateway_id: str | None = None, session_id: str | None = None, status: BackendRefStatus | str | None = None, ) -> int: return len(self.list_memory_refs(gateway_id=gateway_id, session_id=session_id, status=status, limit=100000)) def get_commit_job(self, job_id: str) -> CommitJob | None: return self.commit_jobs.get(job_id) def update_commit_job_status( self, job_id: str, status: str, error_message: str | None = None, created_refs_count: int | None = None, ) -> CommitJob | None: job = self.commit_jobs.get(job_id) if not job: return None from .schemas_v2 import OperationStatus job.status = OperationStatus(_enum_value(status)) job.error_message = error_message if created_refs_count is not None: job.created_refs_count = created_refs_count now = datetime.now(timezone.utc) job.updated_at = now if job.status.value == "running" and job.started_at is None: job.started_at = now if job.status.value in {"success", "failed", "partial_success"}: job.finished_at = now self.commit_jobs[job.job_id] = job return job class SQLiteRepository: def __init__(self, db_path: str | Path) -> None: self.db_path = Path(db_path) self.db_path.parent.mkdir(parents=True, exist_ok=True) self._init_schema() def _connect(self) -> sqlite3.Connection: conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row return conn def _init_schema(self) -> None: with self._connect() as conn: conn.executescript( """ CREATE TABLE IF NOT EXISTS users ( id TEXT PRIMARY KEY, payload TEXT NOT NULL, updated_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS profiles ( user_id TEXT PRIMARY KEY, payload TEXT NOT NULL, updated_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS memories ( id TEXT PRIMARY KEY, user_id TEXT NOT NULL, agent_id TEXT, workspace_id TEXT, session_id TEXT, namespace TEXT NOT NULL, memory_type TEXT NOT NULL, visibility TEXT NOT NULL, importance REAL NOT NULL, confidence REAL NOT NULL, expires_at TEXT, archived_at TEXT, payload TEXT NOT NULL, updated_at TEXT NOT NULL ); CREATE INDEX IF NOT EXISTS idx_memories_user ON memories(user_id); CREATE INDEX IF NOT EXISTS idx_memories_namespace ON memories(namespace); CREATE INDEX IF NOT EXISTS idx_memories_workspace ON memories(workspace_id); CREATE TABLE IF NOT EXISTS episodes ( id TEXT PRIMARY KEY, user_id TEXT NOT NULL, agent_id TEXT, workspace_id TEXT, session_id TEXT NOT NULL, namespace TEXT NOT NULL, payload TEXT NOT NULL, created_at TEXT NOT NULL ); CREATE INDEX IF NOT EXISTS idx_episodes_session ON episodes(session_id); CREATE TABLE IF NOT EXISTS audit_logs ( id TEXT PRIMARY KEY, actor_user_id TEXT, actor_agent_id TEXT, action TEXT NOT NULL, target_type TEXT NOT NULL, target_id TEXT, namespace TEXT, decision TEXT NOT NULL, payload TEXT NOT NULL, created_at TEXT NOT NULL ); CREATE INDEX IF NOT EXISTS idx_audit_created ON audit_logs(created_at); CREATE TABLE IF NOT EXISTS memory_refs ( id TEXT PRIMARY KEY, gateway_id TEXT NOT NULL, workspace_id TEXT NOT NULL, user_id TEXT NOT NULL, agent_id TEXT, session_id TEXT, turn_id TEXT, namespace TEXT, backend_type TEXT NOT NULL, ref_type TEXT NOT NULL, native_id TEXT, native_uri TEXT, provenance_id TEXT, idempotency_key TEXT, content_hash TEXT, source_type TEXT, source_event_id TEXT, status TEXT NOT NULL, error_message TEXT, metadata_json TEXT NOT NULL, payload TEXT NOT NULL, created_at TEXT NOT NULL, updated_at TEXT NOT NULL ); CREATE INDEX IF NOT EXISTS idx_memory_refs_gateway ON memory_refs(gateway_id); CREATE INDEX IF NOT EXISTS idx_memory_refs_scope ON memory_refs(workspace_id, user_id, agent_id, session_id); CREATE INDEX IF NOT EXISTS idx_memory_refs_backend ON memory_refs(backend_type, ref_type, status); CREATE INDEX IF NOT EXISTS idx_memory_refs_namespace ON memory_refs(namespace); CREATE TABLE IF NOT EXISTS outbox_events ( id TEXT PRIMARY KEY, event_type TEXT NOT NULL, gateway_id TEXT NOT NULL, workspace_id TEXT NOT NULL, user_id TEXT NOT NULL, agent_id TEXT, session_id TEXT, backend_type TEXT NOT NULL, operation TEXT NOT NULL, payload_ref TEXT, status TEXT NOT NULL, attempt_count INTEGER NOT NULL, max_attempts INTEGER NOT NULL, next_retry_at TEXT, last_error TEXT, locked_by TEXT, locked_at TEXT, lease_expires_at TEXT, metadata_json TEXT NOT NULL, payload TEXT NOT NULL, created_at TEXT NOT NULL, updated_at TEXT NOT NULL ); CREATE INDEX IF NOT EXISTS idx_outbox_events_status ON outbox_events(status, next_retry_at); CREATE INDEX IF NOT EXISTS idx_outbox_events_backend ON outbox_events(backend_type, operation); CREATE INDEX IF NOT EXISTS idx_outbox_events_gateway ON outbox_events(gateway_id); CREATE TABLE IF NOT EXISTS commit_jobs ( job_id TEXT PRIMARY KEY, workspace_id TEXT NOT NULL, user_id TEXT NOT NULL, agent_id TEXT, session_id TEXT NOT NULL, namespace TEXT, status TEXT NOT NULL, requested_by TEXT, created_refs_count INTEGER NOT NULL, error_message TEXT, payload TEXT NOT NULL, created_at TEXT NOT NULL, updated_at TEXT NOT NULL, started_at TEXT, finished_at TEXT ); CREATE INDEX IF NOT EXISTS idx_commit_jobs_session ON commit_jobs(session_id); CREATE INDEX IF NOT EXISTS idx_commit_jobs_status ON commit_jobs(status); """ ) self._ensure_memory_ref_columns(conn) self._ensure_outbox_event_columns(conn) conn.execute( """ DELETE FROM memory_refs WHERE rowid NOT IN ( SELECT MAX(rowid) FROM memory_refs GROUP BY gateway_id, backend_type, ref_type ) """ ) conn.execute( """ DROP INDEX IF EXISTS uq_memory_refs_gateway_backend_ref_type """ ) def _ensure_memory_ref_columns(self, conn: sqlite3.Connection) -> None: columns = {row["name"] for row in conn.execute("PRAGMA table_info(memory_refs)").fetchall()} additions = { "idempotency_key": "TEXT", "content_hash": "TEXT", } for column, column_type in additions.items(): if column not in columns: conn.execute(f"ALTER TABLE memory_refs ADD COLUMN {column} {column_type}") def _ensure_outbox_event_columns(self, conn: sqlite3.Connection) -> None: columns = {row["name"] for row in conn.execute("PRAGMA table_info(outbox_events)").fetchall()} additions = { "locked_by": "TEXT", "locked_at": "TEXT", "lease_expires_at": "TEXT", } for column, column_type in additions.items(): if column not in columns: conn.execute(f"ALTER TABLE outbox_events ADD COLUMN {column} {column_type}") def create_user(self, user: UserRecord) -> UserRecord: now = datetime.now(timezone.utc) user.created_at = user.created_at or now user.updated_at = now with self._connect() as conn: conn.execute( "INSERT OR REPLACE INTO users(id, payload, updated_at) VALUES (?, ?, ?)", (user.id, _json_dump_model(user), user.updated_at.isoformat()), ) self.upsert_profile(ProfileRecord(user_id=user.id, namespace=user.profile_namespace or f"user/{user.id}/profile")) return user def get_user(self, user_id: str) -> Optional[UserRecord]: with self._connect() as conn: row = conn.execute("SELECT payload FROM users WHERE id = ?", (user_id,)).fetchone() return _json_load_model(UserRecord, row["payload"]) if row else None def upsert_memory(self, memory: MemoryRecord) -> MemoryRecord: existing = self.get_memory(memory.id) now = datetime.now(timezone.utc) if existing: memory.version = existing.version + 1 memory.created_at = existing.created_at memory.updated_at = now with self._connect() as conn: conn.execute( """ INSERT OR REPLACE INTO memories( id, user_id, agent_id, workspace_id, session_id, namespace, memory_type, visibility, importance, confidence, expires_at, archived_at, payload, updated_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( memory.id, memory.user_id, memory.agent_id, memory.workspace_id, memory.session_id, memory.namespace, memory.memory_type.value, memory.visibility.value, memory.importance, memory.confidence, memory.expires_at.isoformat() if memory.expires_at else None, memory.archived_at.isoformat() if memory.archived_at else None, _json_dump_model(memory), memory.updated_at.isoformat(), ), ) return memory def get_memory(self, memory_id: str) -> Optional[MemoryRecord]: with self._connect() as conn: row = conn.execute("SELECT payload FROM memories WHERE id = ?", (memory_id,)).fetchone() return _json_load_model(MemoryRecord, row["payload"]) if row else None def delete_memory(self, memory_id: str) -> bool: with self._connect() as conn: cursor = conn.execute("DELETE FROM memories WHERE id = ?", (memory_id,)) return cursor.rowcount > 0 def list_memories(self) -> Iterable[MemoryRecord]: with self._connect() as conn: rows = conn.execute("SELECT payload FROM memories").fetchall() return [_json_load_model(MemoryRecord, row["payload"]) for row in rows] def append_episode(self, episode: EpisodeRecord) -> EpisodeRecord: with self._connect() as conn: conn.execute( """ INSERT OR REPLACE INTO episodes( id, user_id, agent_id, workspace_id, session_id, namespace, payload, created_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, ( episode.id, episode.user_id, episode.agent_id, episode.workspace_id, episode.session_id, episode.namespace, _json_dump_model(episode), episode.created_at.isoformat(), ), ) return episode def list_session_episodes(self, session_id: str) -> list[EpisodeRecord]: with self._connect() as conn: rows = conn.execute( "SELECT payload FROM episodes WHERE session_id = ? ORDER BY created_at ASC", (session_id,), ).fetchall() return [_json_load_model(EpisodeRecord, row["payload"]) for row in rows] def get_profile(self, user_id: str) -> Optional[ProfileRecord]: with self._connect() as conn: row = conn.execute("SELECT payload FROM profiles WHERE user_id = ?", (user_id,)).fetchone() return _json_load_model(ProfileRecord, row["payload"]) if row else None def upsert_profile(self, profile: ProfileRecord) -> ProfileRecord: profile.updated_at = datetime.now(timezone.utc) with self._connect() as conn: conn.execute( "INSERT OR REPLACE INTO profiles(user_id, payload, updated_at) VALUES (?, ?, ?)", (profile.user_id, _json_dump_model(profile), profile.updated_at.isoformat()), ) return profile def add_audit(self, audit: AuditLog) -> AuditLog: with self._connect() as conn: conn.execute( """ INSERT OR REPLACE INTO audit_logs( id, actor_user_id, actor_agent_id, action, target_type, target_id, namespace, decision, payload, created_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( audit.id, audit.actor_user_id, audit.actor_agent_id, audit.action, audit.target_type, audit.target_id, audit.namespace, audit.decision, _json_dump_model(audit), audit.created_at.isoformat(), ), ) return audit def list_audit(self, limit: int = 100) -> list[AuditLog]: with self._connect() as conn: rows = conn.execute( "SELECT payload FROM audit_logs ORDER BY created_at DESC LIMIT ?", (limit,), ).fetchall() return [_json_load_model(AuditLog, row["payload"]) for row in rows] def save_memory_ref(self, ref: MemoryRef) -> MemoryRef: existing = None with self._connect() as conn: row = conn.execute("SELECT payload FROM memory_refs WHERE id = ?", (ref.id,)).fetchone() if row: existing = _json_load_model(MemoryRef, row["payload"]) now = datetime.now(timezone.utc) if existing: ref.created_at = existing.created_at ref.updated_at = now with self._connect() as conn: conn.execute( """ INSERT OR REPLACE INTO memory_refs( id, gateway_id, workspace_id, user_id, agent_id, session_id, turn_id, namespace, backend_type, ref_type, native_id, native_uri, provenance_id, idempotency_key, content_hash, source_type, source_event_id, status, error_message, metadata_json, payload, created_at, updated_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( ref.id, ref.gateway_id, ref.workspace_id, ref.user_id, ref.agent_id, ref.session_id, ref.turn_id, ref.namespace, ref.backend_type.value, ref.ref_type.value, ref.native_id, ref.native_uri, ref.provenance_id, ref.idempotency_key, ref.content_hash, ref.source_type, ref.source_event_id, ref.status.value, ref.error_message, json.dumps(ref.metadata, ensure_ascii=False), _json_dump_model(ref), ref.created_at.isoformat(), ref.updated_at.isoformat(), ), ) return ref def get_memory_ref(self, ref_id: str) -> MemoryRef | None: with self._connect() as conn: row = conn.execute("SELECT payload FROM memory_refs WHERE id = ?", (ref_id,)).fetchone() return _json_load_model(MemoryRef, row["payload"]) if row else None def list_memory_refs( self, gateway_id: str | None = None, workspace_id: str | None = None, user_id: str | None = None, agent_id: str | None = None, session_id: str | None = None, namespace: str | None = None, backend_type: BackendType | str | None = None, ref_type: MemoryRefType | str | None = None, status: BackendRefStatus | str | None = None, limit: int = 100, ) -> list[MemoryRef]: clauses: list[str] = [] params: list[str | int] = [] filters = { "gateway_id": gateway_id, "workspace_id": workspace_id, "user_id": user_id, "agent_id": agent_id, "session_id": session_id, "namespace": namespace, "backend_type": _enum_value(backend_type) if backend_type is not None else None, "ref_type": _enum_value(ref_type) if ref_type is not None else None, "status": _enum_value(status) if status is not None else None, } for key, value in filters.items(): if value is not None: clauses.append(f"{key} = ?") params.append(value) where = f"WHERE {' AND '.join(clauses)}" if clauses else "" params.append(limit) with self._connect() as conn: rows = conn.execute( f"SELECT payload FROM memory_refs {where} ORDER BY created_at DESC LIMIT ?", params, ).fetchall() return [_json_load_model(MemoryRef, row["payload"]) for row in rows] def save_outbox_event(self, event: OutboxEvent) -> OutboxEvent: existing = None with self._connect() as conn: row = conn.execute("SELECT payload FROM outbox_events WHERE id = ?", (event.id,)).fetchone() if row: existing = _json_load_model(OutboxEvent, row["payload"]) now = datetime.now(timezone.utc) if existing: event.created_at = existing.created_at event.updated_at = now with self._connect() as conn: conn.execute( """ INSERT OR REPLACE INTO outbox_events( id, event_type, gateway_id, workspace_id, user_id, agent_id, session_id, backend_type, operation, payload_ref, status, attempt_count, max_attempts, next_retry_at, last_error, locked_by, locked_at, lease_expires_at, metadata_json, payload, created_at, updated_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( event.id, event.event_type, event.gateway_id, event.workspace_id, event.user_id, event.agent_id, event.session_id, event.backend_type.value, event.operation.value, event.payload_ref, event.status.value, event.attempt_count, event.max_attempts, event.next_retry_at.isoformat() if event.next_retry_at else None, event.last_error, event.locked_by, event.locked_at.isoformat() if event.locked_at else None, event.lease_expires_at.isoformat() if event.lease_expires_at else None, json.dumps(event.metadata, ensure_ascii=False), _json_dump_model(event), event.created_at.isoformat(), event.updated_at.isoformat(), ), ) return event def list_outbox_events( self, status: OutboxEventStatus | str | None = None, backend_type: BackendType | str | None = None, operation: BackendOperation | str | None = None, gateway_id: str | None = None, payload_ref: str | None = None, limit: int = 100, ) -> list[OutboxEvent]: clauses: list[str] = [] params: list[str | int] = [] filters = { "status": _enum_value(status) if status is not None else None, "backend_type": _enum_value(backend_type) if backend_type is not None else None, "operation": _enum_value(operation) if operation is not None else None, "gateway_id": gateway_id, "payload_ref": payload_ref, } for key, value in filters.items(): if value is not None: clauses.append(f"{key} = ?") params.append(value) where = f"WHERE {' AND '.join(clauses)}" if clauses else "" params.append(limit) with self._connect() as conn: rows = conn.execute( f"SELECT payload FROM outbox_events {where} ORDER BY created_at DESC LIMIT ?", params, ).fetchall() return [_json_load_model(OutboxEvent, row["payload"]) for row in rows] def list_outbox_events_by_job(self, job_id: str, limit: int = 100) -> list[OutboxEvent]: return self.list_outbox_events(payload_ref=f"commit_job:{job_id}", limit=limit) def claim_outbox_event(self, event_id: str) -> OutboxEvent | None: with self._connect() as conn: row = conn.execute("SELECT payload FROM outbox_events WHERE id = ?", (event_id,)).fetchone() if not row: return None event = _json_load_model(OutboxEvent, row["payload"]) now = datetime.now(timezone.utc) if event.status != OutboxEventStatus.PENDING: return None if event.next_retry_at and event.next_retry_at > now: return None event.status = OutboxEventStatus.PROCESSING event.locked_by = "inline" event.locked_at = now event.lease_expires_at = now + _safe_timedelta(300) event.updated_at = now with self._connect() as conn: cursor = conn.execute( """ UPDATE outbox_events SET status = ?, locked_by = ?, locked_at = ?, lease_expires_at = ?, payload = ?, metadata_json = ?, updated_at = ? WHERE id = ? AND status = ? AND (next_retry_at IS NULL OR next_retry_at <= ?) """, ( event.status.value, event.locked_by, event.locked_at.isoformat() if event.locked_at else None, event.lease_expires_at.isoformat() if event.lease_expires_at else None, _json_dump_model(event), json.dumps(event.metadata, ensure_ascii=False), event.updated_at.isoformat(), event.id, OutboxEventStatus.PENDING.value, now.isoformat(), ), ) return event if cursor.rowcount else None def claim_pending_outbox_events( self, limit: int, worker_id: str, lease_seconds: int, ) -> list[OutboxEvent]: now = datetime.now(timezone.utc) now_iso = now.isoformat() with self._connect() as conn: rows = conn.execute( """ SELECT payload FROM outbox_events WHERE status = ? AND (next_retry_at IS NULL OR next_retry_at <= ?) ORDER BY created_at ASC LIMIT ? """, (OutboxEventStatus.PENDING.value, now_iso, limit), ).fetchall() claimed: list[OutboxEvent] = [] with self._connect() as conn: for row in rows: event = _json_load_model(OutboxEvent, row["payload"]) if event.status != OutboxEventStatus.PENDING: continue event.status = OutboxEventStatus.PROCESSING event.locked_by = worker_id event.locked_at = now event.lease_expires_at = now + _safe_timedelta(lease_seconds) event.updated_at = now cursor = conn.execute( """ UPDATE outbox_events SET status = ?, locked_by = ?, locked_at = ?, lease_expires_at = ?, payload = ?, metadata_json = ?, updated_at = ? WHERE id = ? AND status = ? AND (next_retry_at IS NULL OR next_retry_at <= ?) """, ( event.status.value, event.locked_by, event.locked_at.isoformat() if event.locked_at else None, event.lease_expires_at.isoformat() if event.lease_expires_at else None, _json_dump_model(event), json.dumps(event.metadata, ensure_ascii=False), event.updated_at.isoformat(), event.id, OutboxEventStatus.PENDING.value, now_iso, ), ) if cursor.rowcount: claimed.append(event) return claimed def release_expired_processing_events(self, now: datetime | None = None) -> list[OutboxEvent]: now = now or datetime.now(timezone.utc) with self._connect() as conn: rows = conn.execute( """ SELECT payload FROM outbox_events WHERE status = ? AND lease_expires_at IS NOT NULL AND lease_expires_at <= ? """, (OutboxEventStatus.PROCESSING.value, now.isoformat()), ).fetchall() released: list[OutboxEvent] = [] for row in rows: event = _json_load_model(OutboxEvent, row["payload"]) event.status = OutboxEventStatus.PENDING event.locked_by = None event.locked_at = None event.lease_expires_at = None event.updated_at = now released.append(self.save_outbox_event(event)) return released def update_outbox_event_status( self, event_id: str, status: OutboxEventStatus | str, last_error: str | None = None, ) -> OutboxEvent | None: with self._connect() as conn: row = conn.execute("SELECT payload FROM outbox_events WHERE id = ?", (event_id,)).fetchone() if not row: return None event = _json_load_model(OutboxEvent, row["payload"]) event.status = OutboxEventStatus(_enum_value(status)) event.last_error = last_error event.updated_at = datetime.now(timezone.utc) if event.status != OutboxEventStatus.PROCESSING: event.locked_by = None event.locked_at = None event.lease_expires_at = None if event.status in {OutboxEventStatus.FAILED, OutboxEventStatus.DEAD_LETTER}: event.attempt_count += 1 return self.save_outbox_event(event) def save_commit_job(self, job: CommitJob) -> CommitJob: existing = None with self._connect() as conn: row = conn.execute("SELECT payload FROM commit_jobs WHERE job_id = ?", (job.job_id,)).fetchone() if row: existing = _json_load_model(CommitJob, row["payload"]) now = datetime.now(timezone.utc) if existing: job.created_at = existing.created_at job.updated_at = now with self._connect() as conn: conn.execute( """ INSERT OR REPLACE INTO commit_jobs( job_id, workspace_id, user_id, agent_id, session_id, namespace, status, requested_by, created_refs_count, error_message, payload, created_at, updated_at, started_at, finished_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( job.job_id, job.workspace_id, job.user_id, job.agent_id, job.session_id, job.namespace, job.status.value, job.requested_by, job.created_refs_count, job.error_message, _json_dump_model(job), job.created_at.isoformat(), job.updated_at.isoformat(), job.started_at.isoformat() if job.started_at else None, job.finished_at.isoformat() if job.finished_at else None, ), ) return job def get_commit_job(self, job_id: str) -> CommitJob | None: with self._connect() as conn: row = conn.execute("SELECT payload FROM commit_jobs WHERE job_id = ?", (job_id,)).fetchone() return _json_load_model(CommitJob, row["payload"]) if row else None def update_commit_job_status( self, job_id: str, status: str, error_message: str | None = None, created_refs_count: int | None = None, ) -> CommitJob | None: from .schemas_v2 import OperationStatus job = self.get_commit_job(job_id) if not job: return None job.status = OperationStatus(_enum_value(status)) job.error_message = error_message if created_refs_count is not None: job.created_refs_count = created_refs_count now = datetime.now(timezone.utc) job.updated_at = now if job.status.value == "running" and job.started_at is None: job.started_at = now if job.status.value in {"success", "failed", "partial_success"}: job.finished_at = now return self.save_commit_job(job) def count_memory_refs( self, gateway_id: str | None = None, session_id: str | None = None, status: BackendRefStatus | str | None = None, ) -> int: return len(self.list_memory_refs(gateway_id=gateway_id, session_id=session_id, status=status, limit=100000)) def build_repository() -> MetadataRepository: config = get_config() if config.storage.backend == "memory": return InMemoryRepository() return SQLiteRepository(config.storage.sqlite_path) repository = build_repository()