1104 lines
45 KiB
Python
1104 lines
45 KiB
Python
"""Metadata repositories for Memory Gateway.
|
|
|
|
SQLite is the default POC store. The in-memory implementation is retained for
|
|
small isolated tests and for cases where persistence is explicitly disabled.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import sqlite3
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from typing import Iterable, Optional, Protocol
|
|
|
|
from .backend_contracts import BackendOperation, CommitJob, OutboxEvent, OutboxEventStatus
|
|
from .config import get_config
|
|
from .schemas import AuditLog, EpisodeRecord, MemoryRecord, ProfileRecord, UserRecord
|
|
from .schemas_v2 import BackendRefStatus, BackendType, MemoryRef, MemoryRefType
|
|
|
|
|
|
class MetadataRepository(Protocol):
|
|
def create_user(self, user: UserRecord) -> UserRecord: ...
|
|
def get_user(self, user_id: str) -> Optional[UserRecord]: ...
|
|
def upsert_memory(self, memory: MemoryRecord) -> MemoryRecord: ...
|
|
def get_memory(self, memory_id: str) -> Optional[MemoryRecord]: ...
|
|
def delete_memory(self, memory_id: str) -> bool: ...
|
|
def list_memories(self) -> Iterable[MemoryRecord]: ...
|
|
def append_episode(self, episode: EpisodeRecord) -> EpisodeRecord: ...
|
|
def list_session_episodes(self, session_id: str) -> list[EpisodeRecord]: ...
|
|
def get_profile(self, user_id: str) -> Optional[ProfileRecord]: ...
|
|
def upsert_profile(self, profile: ProfileRecord) -> ProfileRecord: ...
|
|
def add_audit(self, audit: AuditLog) -> AuditLog: ...
|
|
def list_audit(self, limit: int = 100) -> list[AuditLog]: ...
|
|
def save_memory_ref(self, ref: MemoryRef) -> MemoryRef: ...
|
|
def get_memory_ref(self, ref_id: str) -> MemoryRef | None: ...
|
|
def list_memory_refs(
|
|
self,
|
|
gateway_id: str | None = None,
|
|
workspace_id: str | None = None,
|
|
user_id: str | None = None,
|
|
agent_id: str | None = None,
|
|
session_id: str | None = None,
|
|
namespace: str | None = None,
|
|
backend_type: BackendType | str | None = None,
|
|
ref_type: MemoryRefType | str | None = None,
|
|
status: BackendRefStatus | str | None = None,
|
|
limit: int = 100,
|
|
) -> list[MemoryRef]: ...
|
|
def save_outbox_event(self, event: OutboxEvent) -> OutboxEvent: ...
|
|
def list_outbox_events(
|
|
self,
|
|
status: OutboxEventStatus | str | None = None,
|
|
backend_type: BackendType | str | None = None,
|
|
operation: BackendOperation | str | None = None,
|
|
gateway_id: str | None = None,
|
|
payload_ref: str | None = None,
|
|
limit: int = 100,
|
|
) -> list[OutboxEvent]: ...
|
|
def list_outbox_events_by_job(self, job_id: str, limit: int = 100) -> list[OutboxEvent]: ...
|
|
def claim_outbox_event(self, event_id: str) -> OutboxEvent | None: ...
|
|
def claim_pending_outbox_events(
|
|
self,
|
|
limit: int,
|
|
worker_id: str,
|
|
lease_seconds: int,
|
|
) -> list[OutboxEvent]: ...
|
|
def release_expired_processing_events(self, now: datetime | None = None) -> list[OutboxEvent]: ...
|
|
def update_outbox_event_status(
|
|
self,
|
|
event_id: str,
|
|
status: OutboxEventStatus | str,
|
|
last_error: str | None = None,
|
|
) -> OutboxEvent | None: ...
|
|
def save_commit_job(self, job: CommitJob) -> CommitJob: ...
|
|
def get_commit_job(self, job_id: str) -> CommitJob | None: ...
|
|
def update_commit_job_status(
|
|
self,
|
|
job_id: str,
|
|
status: str,
|
|
error_message: str | None = None,
|
|
created_refs_count: int | None = None,
|
|
) -> CommitJob | None: ...
|
|
def count_memory_refs(
|
|
self,
|
|
gateway_id: str | None = None,
|
|
session_id: str | None = None,
|
|
status: BackendRefStatus | str | None = None,
|
|
) -> int: ...
|
|
|
|
|
|
def _json_dump_model(model) -> str:
|
|
return json.dumps(model.model_dump(mode="json"), ensure_ascii=False)
|
|
|
|
|
|
def _json_load_model(model_cls, payload: str):
|
|
return model_cls.model_validate(json.loads(payload))
|
|
|
|
|
|
def _enum_value(value):
|
|
return value.value if hasattr(value, "value") else value
|
|
|
|
|
|
def _safe_timedelta(seconds: int) -> timedelta:
|
|
return timedelta(seconds=max(1, int(seconds)))
|
|
|
|
|
|
class InMemoryRepository:
|
|
def __init__(self) -> None:
|
|
self.users: dict[str, UserRecord] = {}
|
|
self.memories: dict[str, MemoryRecord] = {}
|
|
self.episodes: dict[str, EpisodeRecord] = {}
|
|
self.profiles: dict[str, ProfileRecord] = {}
|
|
self.audit_logs: list[AuditLog] = []
|
|
self.memory_refs: dict[str, MemoryRef] = {}
|
|
self.outbox_events: dict[str, OutboxEvent] = {}
|
|
self.commit_jobs: dict[str, CommitJob] = {}
|
|
|
|
def create_user(self, user: UserRecord) -> UserRecord:
|
|
now = datetime.now(timezone.utc)
|
|
user.created_at = now
|
|
user.updated_at = now
|
|
self.users[user.id] = user
|
|
self.profiles.setdefault(
|
|
user.id,
|
|
ProfileRecord(user_id=user.id, namespace=user.profile_namespace or f"user/{user.id}/profile"),
|
|
)
|
|
return user
|
|
|
|
def get_user(self, user_id: str) -> Optional[UserRecord]:
|
|
return self.users.get(user_id)
|
|
|
|
def upsert_memory(self, memory: MemoryRecord) -> MemoryRecord:
|
|
now = datetime.now(timezone.utc)
|
|
existing = self.memories.get(memory.id)
|
|
if existing:
|
|
memory.version = existing.version + 1
|
|
memory.created_at = existing.created_at
|
|
memory.updated_at = now
|
|
self.memories[memory.id] = memory
|
|
return memory
|
|
|
|
def get_memory(self, memory_id: str) -> Optional[MemoryRecord]:
|
|
return self.memories.get(memory_id)
|
|
|
|
def delete_memory(self, memory_id: str) -> bool:
|
|
return self.memories.pop(memory_id, None) is not None
|
|
|
|
def list_memories(self) -> Iterable[MemoryRecord]:
|
|
return list(self.memories.values())
|
|
|
|
def append_episode(self, episode: EpisodeRecord) -> EpisodeRecord:
|
|
self.episodes[episode.id] = episode
|
|
return episode
|
|
|
|
def list_session_episodes(self, session_id: str) -> list[EpisodeRecord]:
|
|
return [episode for episode in self.episodes.values() if episode.session_id == session_id]
|
|
|
|
def get_profile(self, user_id: str) -> Optional[ProfileRecord]:
|
|
return self.profiles.get(user_id)
|
|
|
|
def upsert_profile(self, profile: ProfileRecord) -> ProfileRecord:
|
|
profile.updated_at = datetime.now(timezone.utc)
|
|
profile.version += 1
|
|
self.profiles[profile.user_id] = profile
|
|
return profile
|
|
|
|
def add_audit(self, audit: AuditLog) -> AuditLog:
|
|
self.audit_logs.append(audit)
|
|
return audit
|
|
|
|
def list_audit(self, limit: int = 100) -> list[AuditLog]:
|
|
return self.audit_logs[-limit:]
|
|
|
|
def save_memory_ref(self, ref: MemoryRef) -> MemoryRef:
|
|
now = datetime.now(timezone.utc)
|
|
existing = self.memory_refs.get(ref.id)
|
|
if existing:
|
|
ref.created_at = existing.created_at
|
|
ref.updated_at = now
|
|
self.memory_refs[ref.id] = ref
|
|
return ref
|
|
|
|
def get_memory_ref(self, ref_id: str) -> MemoryRef | None:
|
|
return self.memory_refs.get(ref_id)
|
|
|
|
def list_memory_refs(
|
|
self,
|
|
gateway_id: str | None = None,
|
|
workspace_id: str | None = None,
|
|
user_id: str | None = None,
|
|
agent_id: str | None = None,
|
|
session_id: str | None = None,
|
|
namespace: str | None = None,
|
|
backend_type: BackendType | str | None = None,
|
|
ref_type: MemoryRefType | str | None = None,
|
|
status: BackendRefStatus | str | None = None,
|
|
limit: int = 100,
|
|
) -> list[MemoryRef]:
|
|
refs = list(self.memory_refs.values())
|
|
|
|
def matches(ref: MemoryRef) -> bool:
|
|
return (
|
|
(workspace_id is None or ref.workspace_id == workspace_id)
|
|
and (gateway_id is None or ref.gateway_id == gateway_id)
|
|
and (user_id is None or ref.user_id == user_id)
|
|
and (agent_id is None or ref.agent_id == agent_id)
|
|
and (session_id is None or ref.session_id == session_id)
|
|
and (namespace is None or ref.namespace == namespace)
|
|
and (backend_type is None or ref.backend_type.value == _enum_value(backend_type))
|
|
and (ref_type is None or ref.ref_type.value == _enum_value(ref_type))
|
|
and (status is None or ref.status.value == _enum_value(status))
|
|
)
|
|
|
|
refs = [ref for ref in refs if matches(ref)]
|
|
refs.sort(key=lambda ref: ref.created_at, reverse=True)
|
|
return refs[:limit]
|
|
|
|
def save_outbox_event(self, event: OutboxEvent) -> OutboxEvent:
|
|
now = datetime.now(timezone.utc)
|
|
existing = self.outbox_events.get(event.id)
|
|
if existing:
|
|
event.created_at = existing.created_at
|
|
event.updated_at = now
|
|
self.outbox_events[event.id] = event
|
|
return event
|
|
|
|
def list_outbox_events(
|
|
self,
|
|
status: OutboxEventStatus | str | None = None,
|
|
backend_type: BackendType | str | None = None,
|
|
operation: BackendOperation | str | None = None,
|
|
gateway_id: str | None = None,
|
|
payload_ref: str | None = None,
|
|
limit: int = 100,
|
|
) -> list[OutboxEvent]:
|
|
events = list(self.outbox_events.values())
|
|
|
|
def matches(event: OutboxEvent) -> bool:
|
|
return (
|
|
(status is None or event.status.value == _enum_value(status))
|
|
and (backend_type is None or event.backend_type.value == _enum_value(backend_type))
|
|
and (operation is None or event.operation.value == _enum_value(operation))
|
|
and (gateway_id is None or event.gateway_id == gateway_id)
|
|
and (payload_ref is None or event.payload_ref == payload_ref)
|
|
)
|
|
|
|
events = [event for event in events if matches(event)]
|
|
events.sort(key=lambda event: event.created_at, reverse=True)
|
|
return events[:limit]
|
|
|
|
def list_outbox_events_by_job(self, job_id: str, limit: int = 100) -> list[OutboxEvent]:
|
|
return self.list_outbox_events(payload_ref=f"commit_job:{job_id}", limit=limit)
|
|
|
|
def claim_outbox_event(self, event_id: str) -> OutboxEvent | None:
|
|
event = self.outbox_events.get(event_id)
|
|
now = datetime.now(timezone.utc)
|
|
if not event or event.status != OutboxEventStatus.PENDING:
|
|
return None
|
|
if event.next_retry_at and event.next_retry_at > now:
|
|
return None
|
|
event.status = OutboxEventStatus.PROCESSING
|
|
event.locked_by = "inline"
|
|
event.locked_at = now
|
|
event.lease_expires_at = now + _safe_timedelta(300)
|
|
event.updated_at = now
|
|
self.outbox_events[event.id] = event
|
|
return event
|
|
|
|
def claim_pending_outbox_events(
|
|
self,
|
|
limit: int,
|
|
worker_id: str,
|
|
lease_seconds: int,
|
|
) -> list[OutboxEvent]:
|
|
now = datetime.now(timezone.utc)
|
|
candidates = [
|
|
event
|
|
for event in self.outbox_events.values()
|
|
if event.status == OutboxEventStatus.PENDING
|
|
and (event.next_retry_at is None or event.next_retry_at <= now)
|
|
]
|
|
candidates.sort(key=lambda event: event.created_at)
|
|
claimed: list[OutboxEvent] = []
|
|
for event in candidates[:limit]:
|
|
event.status = OutboxEventStatus.PROCESSING
|
|
event.locked_by = worker_id
|
|
event.locked_at = now
|
|
event.lease_expires_at = now + _safe_timedelta(lease_seconds)
|
|
event.updated_at = now
|
|
self.outbox_events[event.id] = event
|
|
claimed.append(event)
|
|
return claimed
|
|
|
|
def release_expired_processing_events(self, now: datetime | None = None) -> list[OutboxEvent]:
|
|
now = now or datetime.now(timezone.utc)
|
|
released: list[OutboxEvent] = []
|
|
for event in list(self.outbox_events.values()):
|
|
if (
|
|
event.status == OutboxEventStatus.PROCESSING
|
|
and event.lease_expires_at is not None
|
|
and event.lease_expires_at <= now
|
|
):
|
|
event.status = OutboxEventStatus.PENDING
|
|
event.locked_by = None
|
|
event.locked_at = None
|
|
event.lease_expires_at = None
|
|
event.updated_at = now
|
|
self.outbox_events[event.id] = event
|
|
released.append(event)
|
|
return released
|
|
|
|
def update_outbox_event_status(
|
|
self,
|
|
event_id: str,
|
|
status: OutboxEventStatus | str,
|
|
last_error: str | None = None,
|
|
) -> OutboxEvent | None:
|
|
event = self.outbox_events.get(event_id)
|
|
if not event:
|
|
return None
|
|
event.status = OutboxEventStatus(_enum_value(status))
|
|
event.last_error = last_error
|
|
event.updated_at = datetime.now(timezone.utc)
|
|
if event.status != OutboxEventStatus.PROCESSING:
|
|
event.locked_by = None
|
|
event.locked_at = None
|
|
event.lease_expires_at = None
|
|
if event.status in {OutboxEventStatus.FAILED, OutboxEventStatus.DEAD_LETTER}:
|
|
event.attempt_count += 1
|
|
self.outbox_events[event.id] = event
|
|
return event
|
|
|
|
def save_commit_job(self, job: CommitJob) -> CommitJob:
|
|
now = datetime.now(timezone.utc)
|
|
existing = self.commit_jobs.get(job.job_id)
|
|
if existing:
|
|
job.created_at = existing.created_at
|
|
job.updated_at = now
|
|
self.commit_jobs[job.job_id] = job
|
|
return job
|
|
|
|
def count_memory_refs(
|
|
self,
|
|
gateway_id: str | None = None,
|
|
session_id: str | None = None,
|
|
status: BackendRefStatus | str | None = None,
|
|
) -> int:
|
|
return len(self.list_memory_refs(gateway_id=gateway_id, session_id=session_id, status=status, limit=100000))
|
|
|
|
def get_commit_job(self, job_id: str) -> CommitJob | None:
|
|
return self.commit_jobs.get(job_id)
|
|
|
|
def update_commit_job_status(
|
|
self,
|
|
job_id: str,
|
|
status: str,
|
|
error_message: str | None = None,
|
|
created_refs_count: int | None = None,
|
|
) -> CommitJob | None:
|
|
job = self.commit_jobs.get(job_id)
|
|
if not job:
|
|
return None
|
|
from .schemas_v2 import OperationStatus
|
|
|
|
job.status = OperationStatus(_enum_value(status))
|
|
job.error_message = error_message
|
|
if created_refs_count is not None:
|
|
job.created_refs_count = created_refs_count
|
|
now = datetime.now(timezone.utc)
|
|
job.updated_at = now
|
|
if job.status.value == "running" and job.started_at is None:
|
|
job.started_at = now
|
|
if job.status.value in {"success", "failed", "partial_success"}:
|
|
job.finished_at = now
|
|
self.commit_jobs[job.job_id] = job
|
|
return job
|
|
|
|
|
|
class SQLiteRepository:
|
|
def __init__(self, db_path: str | Path) -> None:
|
|
self.db_path = Path(db_path)
|
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
self._init_schema()
|
|
|
|
def _connect(self) -> sqlite3.Connection:
|
|
conn = sqlite3.connect(self.db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
def _init_schema(self) -> None:
|
|
with self._connect() as conn:
|
|
conn.executescript(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS users (
|
|
id TEXT PRIMARY KEY,
|
|
payload TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL
|
|
);
|
|
CREATE TABLE IF NOT EXISTS profiles (
|
|
user_id TEXT PRIMARY KEY,
|
|
payload TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL
|
|
);
|
|
CREATE TABLE IF NOT EXISTS memories (
|
|
id TEXT PRIMARY KEY,
|
|
user_id TEXT NOT NULL,
|
|
agent_id TEXT,
|
|
workspace_id TEXT,
|
|
session_id TEXT,
|
|
namespace TEXT NOT NULL,
|
|
memory_type TEXT NOT NULL,
|
|
visibility TEXT NOT NULL,
|
|
importance REAL NOT NULL,
|
|
confidence REAL NOT NULL,
|
|
expires_at TEXT,
|
|
archived_at TEXT,
|
|
payload TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_memories_user ON memories(user_id);
|
|
CREATE INDEX IF NOT EXISTS idx_memories_namespace ON memories(namespace);
|
|
CREATE INDEX IF NOT EXISTS idx_memories_workspace ON memories(workspace_id);
|
|
CREATE TABLE IF NOT EXISTS episodes (
|
|
id TEXT PRIMARY KEY,
|
|
user_id TEXT NOT NULL,
|
|
agent_id TEXT,
|
|
workspace_id TEXT,
|
|
session_id TEXT NOT NULL,
|
|
namespace TEXT NOT NULL,
|
|
payload TEXT NOT NULL,
|
|
created_at TEXT NOT NULL
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_episodes_session ON episodes(session_id);
|
|
CREATE TABLE IF NOT EXISTS audit_logs (
|
|
id TEXT PRIMARY KEY,
|
|
actor_user_id TEXT,
|
|
actor_agent_id TEXT,
|
|
action TEXT NOT NULL,
|
|
target_type TEXT NOT NULL,
|
|
target_id TEXT,
|
|
namespace TEXT,
|
|
decision TEXT NOT NULL,
|
|
payload TEXT NOT NULL,
|
|
created_at TEXT NOT NULL
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_audit_created ON audit_logs(created_at);
|
|
CREATE TABLE IF NOT EXISTS memory_refs (
|
|
id TEXT PRIMARY KEY,
|
|
gateway_id TEXT NOT NULL,
|
|
workspace_id TEXT NOT NULL,
|
|
user_id TEXT NOT NULL,
|
|
agent_id TEXT,
|
|
session_id TEXT,
|
|
turn_id TEXT,
|
|
namespace TEXT,
|
|
backend_type TEXT NOT NULL,
|
|
ref_type TEXT NOT NULL,
|
|
native_id TEXT,
|
|
native_uri TEXT,
|
|
provenance_id TEXT,
|
|
idempotency_key TEXT,
|
|
content_hash TEXT,
|
|
source_type TEXT,
|
|
source_event_id TEXT,
|
|
status TEXT NOT NULL,
|
|
error_message TEXT,
|
|
metadata_json TEXT NOT NULL,
|
|
payload TEXT NOT NULL,
|
|
created_at TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_memory_refs_gateway ON memory_refs(gateway_id);
|
|
CREATE INDEX IF NOT EXISTS idx_memory_refs_scope ON memory_refs(workspace_id, user_id, agent_id, session_id);
|
|
CREATE INDEX IF NOT EXISTS idx_memory_refs_backend ON memory_refs(backend_type, ref_type, status);
|
|
CREATE INDEX IF NOT EXISTS idx_memory_refs_namespace ON memory_refs(namespace);
|
|
CREATE TABLE IF NOT EXISTS outbox_events (
|
|
id TEXT PRIMARY KEY,
|
|
event_type TEXT NOT NULL,
|
|
gateway_id TEXT NOT NULL,
|
|
workspace_id TEXT NOT NULL,
|
|
user_id TEXT NOT NULL,
|
|
agent_id TEXT,
|
|
session_id TEXT,
|
|
backend_type TEXT NOT NULL,
|
|
operation TEXT NOT NULL,
|
|
payload_ref TEXT,
|
|
status TEXT NOT NULL,
|
|
attempt_count INTEGER NOT NULL,
|
|
max_attempts INTEGER NOT NULL,
|
|
next_retry_at TEXT,
|
|
last_error TEXT,
|
|
locked_by TEXT,
|
|
locked_at TEXT,
|
|
lease_expires_at TEXT,
|
|
metadata_json TEXT NOT NULL,
|
|
payload TEXT NOT NULL,
|
|
created_at TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_outbox_events_status ON outbox_events(status, next_retry_at);
|
|
CREATE INDEX IF NOT EXISTS idx_outbox_events_backend ON outbox_events(backend_type, operation);
|
|
CREATE INDEX IF NOT EXISTS idx_outbox_events_gateway ON outbox_events(gateway_id);
|
|
CREATE TABLE IF NOT EXISTS commit_jobs (
|
|
job_id TEXT PRIMARY KEY,
|
|
workspace_id TEXT NOT NULL,
|
|
user_id TEXT NOT NULL,
|
|
agent_id TEXT,
|
|
session_id TEXT NOT NULL,
|
|
namespace TEXT,
|
|
status TEXT NOT NULL,
|
|
requested_by TEXT,
|
|
created_refs_count INTEGER NOT NULL,
|
|
error_message TEXT,
|
|
payload TEXT NOT NULL,
|
|
created_at TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL,
|
|
started_at TEXT,
|
|
finished_at TEXT
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_commit_jobs_session ON commit_jobs(session_id);
|
|
CREATE INDEX IF NOT EXISTS idx_commit_jobs_status ON commit_jobs(status);
|
|
"""
|
|
)
|
|
self._ensure_memory_ref_columns(conn)
|
|
self._ensure_outbox_event_columns(conn)
|
|
conn.execute(
|
|
"""
|
|
DELETE FROM memory_refs
|
|
WHERE rowid NOT IN (
|
|
SELECT MAX(rowid)
|
|
FROM memory_refs
|
|
GROUP BY gateway_id, backend_type, ref_type
|
|
)
|
|
"""
|
|
)
|
|
conn.execute(
|
|
"""
|
|
DROP INDEX IF EXISTS uq_memory_refs_gateway_backend_ref_type
|
|
"""
|
|
)
|
|
|
|
def _ensure_memory_ref_columns(self, conn: sqlite3.Connection) -> None:
|
|
columns = {row["name"] for row in conn.execute("PRAGMA table_info(memory_refs)").fetchall()}
|
|
additions = {
|
|
"idempotency_key": "TEXT",
|
|
"content_hash": "TEXT",
|
|
}
|
|
for column, column_type in additions.items():
|
|
if column not in columns:
|
|
conn.execute(f"ALTER TABLE memory_refs ADD COLUMN {column} {column_type}")
|
|
|
|
def _ensure_outbox_event_columns(self, conn: sqlite3.Connection) -> None:
|
|
columns = {row["name"] for row in conn.execute("PRAGMA table_info(outbox_events)").fetchall()}
|
|
additions = {
|
|
"locked_by": "TEXT",
|
|
"locked_at": "TEXT",
|
|
"lease_expires_at": "TEXT",
|
|
}
|
|
for column, column_type in additions.items():
|
|
if column not in columns:
|
|
conn.execute(f"ALTER TABLE outbox_events ADD COLUMN {column} {column_type}")
|
|
|
|
def create_user(self, user: UserRecord) -> UserRecord:
|
|
now = datetime.now(timezone.utc)
|
|
user.created_at = user.created_at or now
|
|
user.updated_at = now
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO users(id, payload, updated_at) VALUES (?, ?, ?)",
|
|
(user.id, _json_dump_model(user), user.updated_at.isoformat()),
|
|
)
|
|
self.upsert_profile(ProfileRecord(user_id=user.id, namespace=user.profile_namespace or f"user/{user.id}/profile"))
|
|
return user
|
|
|
|
def get_user(self, user_id: str) -> Optional[UserRecord]:
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM users WHERE id = ?", (user_id,)).fetchone()
|
|
return _json_load_model(UserRecord, row["payload"]) if row else None
|
|
|
|
def upsert_memory(self, memory: MemoryRecord) -> MemoryRecord:
|
|
existing = self.get_memory(memory.id)
|
|
now = datetime.now(timezone.utc)
|
|
if existing:
|
|
memory.version = existing.version + 1
|
|
memory.created_at = existing.created_at
|
|
memory.updated_at = now
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"""
|
|
INSERT OR REPLACE INTO memories(
|
|
id, user_id, agent_id, workspace_id, session_id, namespace,
|
|
memory_type, visibility, importance, confidence, expires_at,
|
|
archived_at, payload, updated_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
memory.id,
|
|
memory.user_id,
|
|
memory.agent_id,
|
|
memory.workspace_id,
|
|
memory.session_id,
|
|
memory.namespace,
|
|
memory.memory_type.value,
|
|
memory.visibility.value,
|
|
memory.importance,
|
|
memory.confidence,
|
|
memory.expires_at.isoformat() if memory.expires_at else None,
|
|
memory.archived_at.isoformat() if memory.archived_at else None,
|
|
_json_dump_model(memory),
|
|
memory.updated_at.isoformat(),
|
|
),
|
|
)
|
|
return memory
|
|
|
|
def get_memory(self, memory_id: str) -> Optional[MemoryRecord]:
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM memories WHERE id = ?", (memory_id,)).fetchone()
|
|
return _json_load_model(MemoryRecord, row["payload"]) if row else None
|
|
|
|
def delete_memory(self, memory_id: str) -> bool:
|
|
with self._connect() as conn:
|
|
cursor = conn.execute("DELETE FROM memories WHERE id = ?", (memory_id,))
|
|
return cursor.rowcount > 0
|
|
|
|
def list_memories(self) -> Iterable[MemoryRecord]:
|
|
with self._connect() as conn:
|
|
rows = conn.execute("SELECT payload FROM memories").fetchall()
|
|
return [_json_load_model(MemoryRecord, row["payload"]) for row in rows]
|
|
|
|
def append_episode(self, episode: EpisodeRecord) -> EpisodeRecord:
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"""
|
|
INSERT OR REPLACE INTO episodes(
|
|
id, user_id, agent_id, workspace_id, session_id, namespace, payload, created_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
episode.id,
|
|
episode.user_id,
|
|
episode.agent_id,
|
|
episode.workspace_id,
|
|
episode.session_id,
|
|
episode.namespace,
|
|
_json_dump_model(episode),
|
|
episode.created_at.isoformat(),
|
|
),
|
|
)
|
|
return episode
|
|
|
|
def list_session_episodes(self, session_id: str) -> list[EpisodeRecord]:
|
|
with self._connect() as conn:
|
|
rows = conn.execute(
|
|
"SELECT payload FROM episodes WHERE session_id = ? ORDER BY created_at ASC",
|
|
(session_id,),
|
|
).fetchall()
|
|
return [_json_load_model(EpisodeRecord, row["payload"]) for row in rows]
|
|
|
|
def get_profile(self, user_id: str) -> Optional[ProfileRecord]:
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM profiles WHERE user_id = ?", (user_id,)).fetchone()
|
|
return _json_load_model(ProfileRecord, row["payload"]) if row else None
|
|
|
|
def upsert_profile(self, profile: ProfileRecord) -> ProfileRecord:
|
|
profile.updated_at = datetime.now(timezone.utc)
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO profiles(user_id, payload, updated_at) VALUES (?, ?, ?)",
|
|
(profile.user_id, _json_dump_model(profile), profile.updated_at.isoformat()),
|
|
)
|
|
return profile
|
|
|
|
def add_audit(self, audit: AuditLog) -> AuditLog:
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"""
|
|
INSERT OR REPLACE INTO audit_logs(
|
|
id, actor_user_id, actor_agent_id, action, target_type, target_id,
|
|
namespace, decision, payload, created_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
audit.id,
|
|
audit.actor_user_id,
|
|
audit.actor_agent_id,
|
|
audit.action,
|
|
audit.target_type,
|
|
audit.target_id,
|
|
audit.namespace,
|
|
audit.decision,
|
|
_json_dump_model(audit),
|
|
audit.created_at.isoformat(),
|
|
),
|
|
)
|
|
return audit
|
|
|
|
def list_audit(self, limit: int = 100) -> list[AuditLog]:
|
|
with self._connect() as conn:
|
|
rows = conn.execute(
|
|
"SELECT payload FROM audit_logs ORDER BY created_at DESC LIMIT ?",
|
|
(limit,),
|
|
).fetchall()
|
|
return [_json_load_model(AuditLog, row["payload"]) for row in rows]
|
|
|
|
def save_memory_ref(self, ref: MemoryRef) -> MemoryRef:
|
|
existing = None
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM memory_refs WHERE id = ?", (ref.id,)).fetchone()
|
|
if row:
|
|
existing = _json_load_model(MemoryRef, row["payload"])
|
|
now = datetime.now(timezone.utc)
|
|
if existing:
|
|
ref.created_at = existing.created_at
|
|
ref.updated_at = now
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"""
|
|
INSERT OR REPLACE INTO memory_refs(
|
|
id, gateway_id, workspace_id, user_id, agent_id, session_id, turn_id,
|
|
namespace, backend_type, ref_type, native_id, native_uri, provenance_id,
|
|
idempotency_key, content_hash, source_type, source_event_id, status, error_message, metadata_json,
|
|
payload, created_at, updated_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
ref.id,
|
|
ref.gateway_id,
|
|
ref.workspace_id,
|
|
ref.user_id,
|
|
ref.agent_id,
|
|
ref.session_id,
|
|
ref.turn_id,
|
|
ref.namespace,
|
|
ref.backend_type.value,
|
|
ref.ref_type.value,
|
|
ref.native_id,
|
|
ref.native_uri,
|
|
ref.provenance_id,
|
|
ref.idempotency_key,
|
|
ref.content_hash,
|
|
ref.source_type,
|
|
ref.source_event_id,
|
|
ref.status.value,
|
|
ref.error_message,
|
|
json.dumps(ref.metadata, ensure_ascii=False),
|
|
_json_dump_model(ref),
|
|
ref.created_at.isoformat(),
|
|
ref.updated_at.isoformat(),
|
|
),
|
|
)
|
|
return ref
|
|
|
|
def get_memory_ref(self, ref_id: str) -> MemoryRef | None:
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM memory_refs WHERE id = ?", (ref_id,)).fetchone()
|
|
return _json_load_model(MemoryRef, row["payload"]) if row else None
|
|
|
|
def list_memory_refs(
|
|
self,
|
|
gateway_id: str | None = None,
|
|
workspace_id: str | None = None,
|
|
user_id: str | None = None,
|
|
agent_id: str | None = None,
|
|
session_id: str | None = None,
|
|
namespace: str | None = None,
|
|
backend_type: BackendType | str | None = None,
|
|
ref_type: MemoryRefType | str | None = None,
|
|
status: BackendRefStatus | str | None = None,
|
|
limit: int = 100,
|
|
) -> list[MemoryRef]:
|
|
clauses: list[str] = []
|
|
params: list[str | int] = []
|
|
filters = {
|
|
"gateway_id": gateway_id,
|
|
"workspace_id": workspace_id,
|
|
"user_id": user_id,
|
|
"agent_id": agent_id,
|
|
"session_id": session_id,
|
|
"namespace": namespace,
|
|
"backend_type": _enum_value(backend_type) if backend_type is not None else None,
|
|
"ref_type": _enum_value(ref_type) if ref_type is not None else None,
|
|
"status": _enum_value(status) if status is not None else None,
|
|
}
|
|
for key, value in filters.items():
|
|
if value is not None:
|
|
clauses.append(f"{key} = ?")
|
|
params.append(value)
|
|
where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
|
|
params.append(limit)
|
|
with self._connect() as conn:
|
|
rows = conn.execute(
|
|
f"SELECT payload FROM memory_refs {where} ORDER BY created_at DESC LIMIT ?",
|
|
params,
|
|
).fetchall()
|
|
return [_json_load_model(MemoryRef, row["payload"]) for row in rows]
|
|
|
|
def save_outbox_event(self, event: OutboxEvent) -> OutboxEvent:
|
|
existing = None
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM outbox_events WHERE id = ?", (event.id,)).fetchone()
|
|
if row:
|
|
existing = _json_load_model(OutboxEvent, row["payload"])
|
|
now = datetime.now(timezone.utc)
|
|
if existing:
|
|
event.created_at = existing.created_at
|
|
event.updated_at = now
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"""
|
|
INSERT OR REPLACE INTO outbox_events(
|
|
id, event_type, gateway_id, workspace_id, user_id, agent_id, session_id,
|
|
backend_type, operation, payload_ref, status, attempt_count, max_attempts,
|
|
next_retry_at, last_error, locked_by, locked_at, lease_expires_at,
|
|
metadata_json, payload, created_at, updated_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
event.id,
|
|
event.event_type,
|
|
event.gateway_id,
|
|
event.workspace_id,
|
|
event.user_id,
|
|
event.agent_id,
|
|
event.session_id,
|
|
event.backend_type.value,
|
|
event.operation.value,
|
|
event.payload_ref,
|
|
event.status.value,
|
|
event.attempt_count,
|
|
event.max_attempts,
|
|
event.next_retry_at.isoformat() if event.next_retry_at else None,
|
|
event.last_error,
|
|
event.locked_by,
|
|
event.locked_at.isoformat() if event.locked_at else None,
|
|
event.lease_expires_at.isoformat() if event.lease_expires_at else None,
|
|
json.dumps(event.metadata, ensure_ascii=False),
|
|
_json_dump_model(event),
|
|
event.created_at.isoformat(),
|
|
event.updated_at.isoformat(),
|
|
),
|
|
)
|
|
return event
|
|
|
|
def list_outbox_events(
|
|
self,
|
|
status: OutboxEventStatus | str | None = None,
|
|
backend_type: BackendType | str | None = None,
|
|
operation: BackendOperation | str | None = None,
|
|
gateway_id: str | None = None,
|
|
payload_ref: str | None = None,
|
|
limit: int = 100,
|
|
) -> list[OutboxEvent]:
|
|
clauses: list[str] = []
|
|
params: list[str | int] = []
|
|
filters = {
|
|
"status": _enum_value(status) if status is not None else None,
|
|
"backend_type": _enum_value(backend_type) if backend_type is not None else None,
|
|
"operation": _enum_value(operation) if operation is not None else None,
|
|
"gateway_id": gateway_id,
|
|
"payload_ref": payload_ref,
|
|
}
|
|
for key, value in filters.items():
|
|
if value is not None:
|
|
clauses.append(f"{key} = ?")
|
|
params.append(value)
|
|
where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
|
|
params.append(limit)
|
|
with self._connect() as conn:
|
|
rows = conn.execute(
|
|
f"SELECT payload FROM outbox_events {where} ORDER BY created_at DESC LIMIT ?",
|
|
params,
|
|
).fetchall()
|
|
return [_json_load_model(OutboxEvent, row["payload"]) for row in rows]
|
|
|
|
def list_outbox_events_by_job(self, job_id: str, limit: int = 100) -> list[OutboxEvent]:
|
|
return self.list_outbox_events(payload_ref=f"commit_job:{job_id}", limit=limit)
|
|
|
|
def claim_outbox_event(self, event_id: str) -> OutboxEvent | None:
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM outbox_events WHERE id = ?", (event_id,)).fetchone()
|
|
if not row:
|
|
return None
|
|
event = _json_load_model(OutboxEvent, row["payload"])
|
|
now = datetime.now(timezone.utc)
|
|
if event.status != OutboxEventStatus.PENDING:
|
|
return None
|
|
if event.next_retry_at and event.next_retry_at > now:
|
|
return None
|
|
event.status = OutboxEventStatus.PROCESSING
|
|
event.locked_by = "inline"
|
|
event.locked_at = now
|
|
event.lease_expires_at = now + _safe_timedelta(300)
|
|
event.updated_at = now
|
|
with self._connect() as conn:
|
|
cursor = conn.execute(
|
|
"""
|
|
UPDATE outbox_events
|
|
SET status = ?, locked_by = ?, locked_at = ?, lease_expires_at = ?,
|
|
payload = ?, metadata_json = ?, updated_at = ?
|
|
WHERE id = ?
|
|
AND status = ?
|
|
AND (next_retry_at IS NULL OR next_retry_at <= ?)
|
|
""",
|
|
(
|
|
event.status.value,
|
|
event.locked_by,
|
|
event.locked_at.isoformat() if event.locked_at else None,
|
|
event.lease_expires_at.isoformat() if event.lease_expires_at else None,
|
|
_json_dump_model(event),
|
|
json.dumps(event.metadata, ensure_ascii=False),
|
|
event.updated_at.isoformat(),
|
|
event.id,
|
|
OutboxEventStatus.PENDING.value,
|
|
now.isoformat(),
|
|
),
|
|
)
|
|
return event if cursor.rowcount else None
|
|
|
|
def claim_pending_outbox_events(
|
|
self,
|
|
limit: int,
|
|
worker_id: str,
|
|
lease_seconds: int,
|
|
) -> list[OutboxEvent]:
|
|
now = datetime.now(timezone.utc)
|
|
now_iso = now.isoformat()
|
|
with self._connect() as conn:
|
|
rows = conn.execute(
|
|
"""
|
|
SELECT payload FROM outbox_events
|
|
WHERE status = ?
|
|
AND (next_retry_at IS NULL OR next_retry_at <= ?)
|
|
ORDER BY created_at ASC
|
|
LIMIT ?
|
|
""",
|
|
(OutboxEventStatus.PENDING.value, now_iso, limit),
|
|
).fetchall()
|
|
claimed: list[OutboxEvent] = []
|
|
with self._connect() as conn:
|
|
for row in rows:
|
|
event = _json_load_model(OutboxEvent, row["payload"])
|
|
if event.status != OutboxEventStatus.PENDING:
|
|
continue
|
|
event.status = OutboxEventStatus.PROCESSING
|
|
event.locked_by = worker_id
|
|
event.locked_at = now
|
|
event.lease_expires_at = now + _safe_timedelta(lease_seconds)
|
|
event.updated_at = now
|
|
cursor = conn.execute(
|
|
"""
|
|
UPDATE outbox_events
|
|
SET status = ?, locked_by = ?, locked_at = ?, lease_expires_at = ?,
|
|
payload = ?, metadata_json = ?, updated_at = ?
|
|
WHERE id = ?
|
|
AND status = ?
|
|
AND (next_retry_at IS NULL OR next_retry_at <= ?)
|
|
""",
|
|
(
|
|
event.status.value,
|
|
event.locked_by,
|
|
event.locked_at.isoformat() if event.locked_at else None,
|
|
event.lease_expires_at.isoformat() if event.lease_expires_at else None,
|
|
_json_dump_model(event),
|
|
json.dumps(event.metadata, ensure_ascii=False),
|
|
event.updated_at.isoformat(),
|
|
event.id,
|
|
OutboxEventStatus.PENDING.value,
|
|
now_iso,
|
|
),
|
|
)
|
|
if cursor.rowcount:
|
|
claimed.append(event)
|
|
return claimed
|
|
|
|
def release_expired_processing_events(self, now: datetime | None = None) -> list[OutboxEvent]:
|
|
now = now or datetime.now(timezone.utc)
|
|
with self._connect() as conn:
|
|
rows = conn.execute(
|
|
"""
|
|
SELECT payload FROM outbox_events
|
|
WHERE status = ?
|
|
AND lease_expires_at IS NOT NULL
|
|
AND lease_expires_at <= ?
|
|
""",
|
|
(OutboxEventStatus.PROCESSING.value, now.isoformat()),
|
|
).fetchall()
|
|
released: list[OutboxEvent] = []
|
|
for row in rows:
|
|
event = _json_load_model(OutboxEvent, row["payload"])
|
|
event.status = OutboxEventStatus.PENDING
|
|
event.locked_by = None
|
|
event.locked_at = None
|
|
event.lease_expires_at = None
|
|
event.updated_at = now
|
|
released.append(self.save_outbox_event(event))
|
|
return released
|
|
|
|
def update_outbox_event_status(
|
|
self,
|
|
event_id: str,
|
|
status: OutboxEventStatus | str,
|
|
last_error: str | None = None,
|
|
) -> OutboxEvent | None:
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM outbox_events WHERE id = ?", (event_id,)).fetchone()
|
|
if not row:
|
|
return None
|
|
event = _json_load_model(OutboxEvent, row["payload"])
|
|
event.status = OutboxEventStatus(_enum_value(status))
|
|
event.last_error = last_error
|
|
event.updated_at = datetime.now(timezone.utc)
|
|
if event.status != OutboxEventStatus.PROCESSING:
|
|
event.locked_by = None
|
|
event.locked_at = None
|
|
event.lease_expires_at = None
|
|
if event.status in {OutboxEventStatus.FAILED, OutboxEventStatus.DEAD_LETTER}:
|
|
event.attempt_count += 1
|
|
return self.save_outbox_event(event)
|
|
|
|
def save_commit_job(self, job: CommitJob) -> CommitJob:
|
|
existing = None
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM commit_jobs WHERE job_id = ?", (job.job_id,)).fetchone()
|
|
if row:
|
|
existing = _json_load_model(CommitJob, row["payload"])
|
|
now = datetime.now(timezone.utc)
|
|
if existing:
|
|
job.created_at = existing.created_at
|
|
job.updated_at = now
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"""
|
|
INSERT OR REPLACE INTO commit_jobs(
|
|
job_id, workspace_id, user_id, agent_id, session_id, namespace,
|
|
status, requested_by, created_refs_count, error_message, payload,
|
|
created_at, updated_at, started_at, finished_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
job.job_id,
|
|
job.workspace_id,
|
|
job.user_id,
|
|
job.agent_id,
|
|
job.session_id,
|
|
job.namespace,
|
|
job.status.value,
|
|
job.requested_by,
|
|
job.created_refs_count,
|
|
job.error_message,
|
|
_json_dump_model(job),
|
|
job.created_at.isoformat(),
|
|
job.updated_at.isoformat(),
|
|
job.started_at.isoformat() if job.started_at else None,
|
|
job.finished_at.isoformat() if job.finished_at else None,
|
|
),
|
|
)
|
|
return job
|
|
|
|
def get_commit_job(self, job_id: str) -> CommitJob | None:
|
|
with self._connect() as conn:
|
|
row = conn.execute("SELECT payload FROM commit_jobs WHERE job_id = ?", (job_id,)).fetchone()
|
|
return _json_load_model(CommitJob, row["payload"]) if row else None
|
|
|
|
def update_commit_job_status(
|
|
self,
|
|
job_id: str,
|
|
status: str,
|
|
error_message: str | None = None,
|
|
created_refs_count: int | None = None,
|
|
) -> CommitJob | None:
|
|
from .schemas_v2 import OperationStatus
|
|
|
|
job = self.get_commit_job(job_id)
|
|
if not job:
|
|
return None
|
|
job.status = OperationStatus(_enum_value(status))
|
|
job.error_message = error_message
|
|
if created_refs_count is not None:
|
|
job.created_refs_count = created_refs_count
|
|
now = datetime.now(timezone.utc)
|
|
job.updated_at = now
|
|
if job.status.value == "running" and job.started_at is None:
|
|
job.started_at = now
|
|
if job.status.value in {"success", "failed", "partial_success"}:
|
|
job.finished_at = now
|
|
return self.save_commit_job(job)
|
|
|
|
def count_memory_refs(
|
|
self,
|
|
gateway_id: str | None = None,
|
|
session_id: str | None = None,
|
|
status: BackendRefStatus | str | None = None,
|
|
) -> int:
|
|
return len(self.list_memory_refs(gateway_id=gateway_id, session_id=session_id, status=status, limit=100000))
|
|
|
|
|
|
def build_repository() -> MetadataRepository:
|
|
config = get_config()
|
|
if config.storage.backend == "memory":
|
|
return InMemoryRepository()
|
|
return SQLiteRepository(config.storage.sqlite_path)
|
|
|
|
|
|
repository = build_repository()
|