Files
memory-gateway/core/service.py

463 lines
15 KiB
Python

from __future__ import annotations
import hashlib
import mimetypes
import secrets
import uuid
from pathlib import Path
from typing import Any
from fastapi import UploadFile
from .config import GatewayConfig
from .repository import MemoryRepository
def new_resource_id() -> str:
return f"r_{uuid.uuid4().hex}"
def resource_session_id(user_id: str, resource_id: str) -> str:
return f"resource:{user_id}:{resource_id}"
def public_resource_uri(user_id: str, resource_id: str) -> str:
return f"resource://{user_id}/{resource_id}"
def infer_content_type(filename: str | None, mime_type: str | None) -> str:
mime = (mime_type or mimetypes.guess_type(filename or "")[0] or "").lower()
suffix = Path(filename or "").suffix.lower()
if mime.startswith("image/"):
return "image"
if mime.startswith("audio/"):
return "audio"
if mime == "application/pdf" or suffix == ".pdf":
return "pdf"
if mime in {"text/html", "application/xhtml+xml"} or suffix in {".html", ".htm"}:
return "html"
if mime.startswith("text/plain") or suffix in {".txt", ".md", ".csv", ".log"}:
return "text"
return "doc"
def _safe_filename(filename: str | None) -> str:
name = Path(filename or "upload.bin").name
return name or "upload.bin"
def _copy_upload(file: UploadFile, destination: Path) -> tuple[str, int]:
sha256 = hashlib.sha256()
size = 0
destination.parent.mkdir(parents=True, exist_ok=True)
with destination.open("wb") as out:
while True:
chunk = file.file.read(1024 * 1024)
if not chunk:
break
size += len(chunk)
sha256.update(chunk)
out.write(chunk)
return sha256.hexdigest(), size
class MemoryGatewayService:
def __init__(
self,
config: GatewayConfig,
repository: MemoryRepository,
everos_client: Any,
) -> None:
self.config = config
self.repository = repository
self.everos_client = everos_client
def create_user(self, user_id: str) -> dict[str, Any]:
user_key = f"uk_{secrets.token_urlsafe(32)}"
user = self.repository.create_user(user_id, user_key)
return {
"user_id": user["id"],
"user_key": user["user_key"],
"created_at": user["created_at"],
}
def authenticate_user(self, user_id: str, user_key: str) -> bool:
user = self.repository.get_user(user_id)
if user is None:
return False
return secrets.compare_digest(str(user["user_key"]), user_key)
async def upload_resource(
self,
*,
user_id: str,
app_id: str,
project_id: str,
file: UploadFile,
title: str | None,
description: str | None,
) -> dict[str, Any]:
resource_id = new_resource_id()
session_id = resource_session_id(user_id, resource_id)
original_filename = _safe_filename(file.filename)
mime_type = file.content_type or mimetypes.guess_type(original_filename)[0]
content_type = infer_content_type(original_filename, mime_type)
stored_path = self.config.storage_dir / user_id / resource_id / original_filename
sha256, size_bytes = _copy_upload(file, stored_path)
internal_uri = stored_path.resolve().as_uri()
resource = self.repository.create_resource(
id=resource_id,
user_id=user_id,
app_id=app_id,
project_id=project_id,
session_id=session_id,
original_filename=original_filename,
mime_type=mime_type,
content_type=content_type,
uri=internal_uri,
uri_public=False,
sha256=sha256,
size_bytes=size_bytes,
title=title,
description=description,
status="ingesting",
error_message=None,
)
try:
await self.everos_client.add_memory(
self._build_add_payload(
resource=resource,
user_id=user_id,
app_id=app_id,
project_id=project_id,
filename=original_filename,
)
)
await self.everos_client.flush_memory(session_id, app_id, project_id)
except Exception as exc:
failed = self.repository.update_resource_status(
resource_id,
"failed",
str(exc),
)
return self._resource_summary(failed or resource)
extracted = self.repository.update_resource_status(resource_id, "extracted")
return self._resource_summary(extracted or resource)
def _build_add_payload(
self,
*,
resource: dict[str, Any],
user_id: str,
app_id: str,
project_id: str,
filename: str,
) -> dict[str, Any]:
return {
"session_id": resource["session_id"],
"app_id": app_id,
"project_id": project_id,
"messages": [
{
"sender_id": user_id,
"role": "user",
"timestamp": 1781068800000,
"content": [
{
"type": resource["content_type"],
"uri": resource["uri"],
"name": filename,
"ext": Path(filename).suffix.lstrip(".") or None,
"extras": {
"resource_id": resource["id"],
"source": "user_upload",
},
}
],
}
],
}
def list_resources(self, user_id: str) -> list[dict[str, Any]]:
return [self._resource_detail(item) for item in self.repository.list_resources(user_id)]
def get_resource_detail(
self,
resource_id: str,
user_id: str,
) -> dict[str, Any] | None:
resource = self.repository.get_resource_for_user(resource_id, user_id)
if resource is None:
return None
return self._resource_detail(resource)
def delete_resource(self, resource_id: str, user_id: str) -> dict[str, Any] | None:
before = self.repository.get_resource_for_user(resource_id, user_id)
if before is None:
return None
resource = self.repository.soft_delete_resource(resource_id, user_id)
return self._resource_summary(resource)
async def search_memories(
self,
*,
user_id: str,
query: str,
conversation_id: str | None,
scope: list[str],
top_k: int,
app_id: str,
project_id: str,
) -> dict[str, Any]:
results: list[dict[str, Any]] = []
session_resource_map: dict[str, dict[str, Any]] = {}
if "current_chat" in scope and conversation_id:
payload = self._search_payload(
user_id=user_id,
query=query,
top_k=top_k,
app_id=app_id,
project_id=project_id,
filters={"session_id": f"chat:{conversation_id}"},
)
results.extend(
self._extract_results(
await self.everos_client.search_memory(payload),
source_scope="current_chat",
session_resource_map=session_resource_map,
user_id=user_id,
)
)
if "resources" in scope:
resources = self.repository.list_extracted_resources(
user_id,
app_id,
project_id,
)
session_resource_map.update({item["session_id"]: item for item in resources})
session_ids = [item["session_id"] for item in resources]
for batch in _chunks(session_ids, self.config.resource_search_batch_size):
payload = self._search_payload(
user_id=user_id,
query=query,
top_k=top_k,
app_id=app_id,
project_id=project_id,
filters={"session_id": {"in": batch}},
)
results.extend(
self._extract_results(
await self.everos_client.search_memory(payload),
source_scope="resources",
session_resource_map=session_resource_map,
user_id=user_id,
)
)
if "all_user_memory" in scope:
payload = self._search_payload(
user_id=user_id,
query=query,
top_k=top_k,
app_id=app_id,
project_id=project_id,
filters=None,
)
results.extend(
self._extract_results(
await self.everos_client.search_memory(payload),
source_scope="all_user_memory",
session_resource_map=session_resource_map,
user_id=user_id,
)
)
filtered = self._apply_tombstones(user_id, results)
overridden = self._apply_overrides(user_id, filtered)
return {"results": overridden}
def _search_payload(
self,
*,
user_id: str,
query: str,
top_k: int,
app_id: str,
project_id: str,
filters: dict[str, Any] | None,
) -> dict[str, Any]:
payload: dict[str, Any] = {
"user_id": user_id,
"query": query,
"top_k": top_k,
"app_id": app_id,
"project_id": project_id,
}
if filters is not None:
payload["filters"] = filters
return payload
def _extract_results(
self,
response: dict[str, Any],
*,
source_scope: str,
session_resource_map: dict[str, dict[str, Any]],
user_id: str,
) -> list[dict[str, Any]]:
data = response.get("data", {})
raw_items: list[dict[str, Any]] = []
for key in (
"episodes",
"profiles",
"agent_cases",
"agent_skills",
"unprocessed_messages",
):
raw_items.extend(data.get(key, []) or [])
normalized = []
for raw in raw_items:
session_id = raw.get("session_id")
resource = session_resource_map.get(session_id)
if resource is None and isinstance(session_id, str):
resource = self.repository.get_resource_by_session_for_user(
session_id,
user_id,
)
normalized.append(
{
"id": raw.get("id"),
"session_id": session_id,
"text": _display_text(raw),
"score": raw.get("score"),
"source_scope": source_scope,
"resource_id": resource["id"] if resource else None,
"resource_uri": (
public_resource_uri(user_id, resource["id"]) if resource else None
),
"raw": raw,
}
)
return normalized
def _apply_tombstones(
self,
user_id: str,
results: list[dict[str, Any]],
) -> list[dict[str, Any]]:
tombstones = self.repository.get_tombstones(user_id)
memory_ids = {item["memory_id"] for item in tombstones if item["memory_id"]}
session_ids = {item["session_id"] for item in tombstones if item["session_id"]}
return [
item
for item in results
if item.get("id") not in memory_ids
and item.get("session_id") not in session_ids
]
def _apply_overrides(
self,
user_id: str,
results: list[dict[str, Any]],
) -> list[dict[str, Any]]:
overrides = {
item["memory_id"]: item
for item in self.repository.get_active_overrides(user_id)
if item["memory_id"]
}
for result in results:
override = overrides.get(result.get("id"))
if override:
result["text"] = override["override_text"]
result["override_id"] = override["id"]
return results
def upsert_override(
self,
*,
user_id: str,
memory_id: str,
session_id: str | None,
override_text: str,
) -> dict[str, Any]:
override = self.repository.upsert_override(
user_id,
memory_id,
session_id,
override_text,
)
return {"memory_id": memory_id, "override_id": override["id"], "status": "active"}
def delete_memory(
self,
*,
user_id: str,
memory_id: str,
session_id: str | None,
reason: str | None,
) -> dict[str, Any]:
tombstone = self.repository.add_tombstone(
user_id,
memory_id,
session_id,
reason,
)
return {"memory_id": memory_id, "tombstone_id": tombstone["id"], "status": "deleted"}
def _resource_summary(self, resource: dict[str, Any]) -> dict[str, Any]:
return {
"resource_id": resource["id"],
"session_id": resource["session_id"],
"uri": public_resource_uri(resource["user_id"], resource["id"]),
"status": resource["status"],
}
def _resource_detail(self, resource: dict[str, Any]) -> dict[str, Any]:
return {
"resource_id": resource["id"],
"user_id": resource["user_id"],
"filename": resource["original_filename"],
"content_type": resource["content_type"],
"mime_type": resource["mime_type"],
"uri": public_resource_uri(resource["user_id"], resource["id"]),
"session_id": resource["session_id"],
"status": resource["status"],
"title": resource["title"],
"description": resource["description"],
"created_at": resource["created_at"],
"updated_at": resource["updated_at"],
}
def _chunks(items: list[str], size: int) -> list[list[str]]:
if not items:
return []
return [items[index : index + size] for index in range(0, len(items), size)]
def _display_text(raw: dict[str, Any]) -> str:
for key in (
"episode",
"summary",
"content",
"profile_data",
"task_intent",
"approach",
"key_insight",
"name",
"description",
):
value = raw.get(key)
if value is None:
continue
if isinstance(value, str):
return value
return str(value)
return ""