from __future__ import annotations import asyncio import base64 import hashlib import mimetypes import secrets import shutil import time import uuid from pathlib import Path from typing import Any from urllib.parse import unquote, urlparse from fastapi import UploadFile from .config import GatewayConfig from .repository import MemoryRepository def new_resource_id() -> str: return f"r_{uuid.uuid4().hex}" def resource_session_id(user_id: str, resource_id: str) -> str: return f"resource:{user_id}:{resource_id}" def public_resource_uri(user_id: str, resource_id: str) -> str: return f"resource://{user_id}/{resource_id}" def current_timestamp_ms() -> int: return int(time.time() * 1000) def infer_content_type(filename: str | None, mime_type: str | None) -> str: mime = (mime_type or mimetypes.guess_type(filename or "")[0] or "").lower() suffix = Path(filename or "").suffix.lower() if mime.startswith("image/"): return "image" if mime.startswith("audio/"): return "audio" if mime == "application/pdf" or suffix == ".pdf": return "pdf" if mime in {"text/html", "application/xhtml+xml"} or suffix in {".html", ".htm"}: return "html" if mime.startswith("text/plain") or suffix in {".txt", ".md", ".csv", ".log"}: return "text" return "doc" def _safe_filename(filename: str | None) -> str: name = Path(filename or "upload.bin").name return name or "upload.bin" class UploadTooLarge(ValueError): pass class UnsupportedContentType(ValueError): pass def _copy_upload( file: UploadFile, destination: Path, max_upload_bytes: int, cleanup_root: Path, ) -> tuple[str, int]: sha256 = hashlib.sha256() size = 0 destination.parent.mkdir(parents=True, exist_ok=True) try: with destination.open("wb") as out: while True: chunk = file.file.read(1024 * 1024) if not chunk: break size += len(chunk) if size > max_upload_bytes: raise UploadTooLarge( f"upload exceeds max size of {max_upload_bytes} bytes" ) sha256.update(chunk) out.write(chunk) return sha256.hexdigest(), size except Exception: destination.unlink(missing_ok=True) _remove_empty_parents(destination.parent, stop_at=cleanup_root) raise def _mime_allowed(mime_type: str | None, allowed_mime_types: tuple[str, ...]) -> bool: mime = (mime_type or "").lower() if not mime: return False for allowed in allowed_mime_types: item = allowed.lower() if item.endswith("/*") and mime.startswith(item[:-1]): return True if item == mime: return True return False def _remove_empty_parents(path: Path, stop_at: Path | None = None) -> None: current = path stop = stop_at.resolve() if stop_at is not None else None while True: try: resolved = current.resolve() if stop is not None and resolved == stop: return current.rmdir() except OSError: return parent = current.parent if parent == current: return current = parent class MemoryGatewayService: def __init__( self, config: GatewayConfig, repository: MemoryRepository, everos_client: Any, ) -> None: self.config = config self.repository = repository self.everos_client = everos_client def create_user(self, user_id: str) -> dict[str, Any]: user_key = f"uk_{secrets.token_urlsafe(32)}" user = self.repository.create_user(user_id, user_key) return { "user_id": user["id"], "user_key": user["user_key"], "created_at": user["created_at"], } def authenticate_user(self, user_id: str, user_key: str) -> bool: user = self.repository.get_user(user_id) if user is None: return False return secrets.compare_digest(str(user["user_key"]), user_key) async def upload_resource( self, *, user_id: str, app_id: str, project_id: str, file: UploadFile, title: str | None, description: str | None, ) -> dict[str, Any]: resource_id = new_resource_id() session_id = resource_session_id(user_id, resource_id) original_filename = _safe_filename(file.filename) mime_type = file.content_type or mimetypes.guess_type(original_filename)[0] if not _mime_allowed(mime_type, self.config.allowed_mime_types): raise UnsupportedContentType(f"unsupported content type: {mime_type}") content_type = infer_content_type(original_filename, mime_type) stored_path = self.config.storage_dir / user_id / resource_id / original_filename sha256, size_bytes = _copy_upload( file, stored_path, self.config.max_upload_bytes, self.config.storage_dir, ) existing = self.repository.find_active_resource_by_sha256( user_id=user_id, app_id=app_id, project_id=project_id, sha256=sha256, ) if existing is not None: shutil.rmtree(stored_path.parent, ignore_errors=True) return self._resource_summary(existing) internal_uri = stored_path.resolve().as_uri() resource = self.repository.create_resource( id=resource_id, user_id=user_id, app_id=app_id, project_id=project_id, session_id=session_id, original_filename=original_filename, mime_type=mime_type, content_type=content_type, uri=internal_uri, uri_public=False, sha256=sha256, size_bytes=size_bytes, title=title, description=description, status="ingesting", error_message=None, ) try: await self._retry_everos_call( lambda: self.everos_client.add_memory( self._build_add_payload( resource=resource, user_id=user_id, app_id=app_id, project_id=project_id, filename=original_filename, ) ) ) await self._retry_everos_call( lambda: self.everos_client.flush_memory(session_id, app_id, project_id) ) except Exception as exc: failed = self.repository.update_resource_status( resource_id, "failed", str(exc), ) return self._resource_summary(failed or resource) extracted = self.repository.update_resource_status(resource_id, "extracted") return self._resource_summary(extracted or resource) async def _retry_everos_call(self, operation: Any) -> Any: attempts = max(1, self.config.everos_ingest_attempts) last_error: Exception | None = None for attempt in range(attempts): try: return await operation() except Exception as exc: last_error = exc if attempt == attempts - 1: break delay = self.config.everos_retry_delay_seconds if delay > 0: await asyncio.sleep(delay) if last_error is None: raise RuntimeError("EverOS operation failed") raise last_error def _build_add_payload( self, *, resource: dict[str, Any], user_id: str, app_id: str, project_id: str, filename: str, ) -> dict[str, Any]: content_item = self._build_content_item(resource=resource, filename=filename) return { "session_id": resource["session_id"], "app_id": app_id, "project_id": project_id, "messages": [ { "sender_id": user_id, "role": "user", "timestamp": current_timestamp_ms(), "content": [content_item], } ], } def _build_content_item( self, *, resource: dict[str, Any], filename: str, ) -> dict[str, Any]: content_type = str(resource["content_type"]) path = self._resource_file_path(resource) content = path.read_bytes() item = { "type": content_type, "name": filename, "ext": Path(filename).suffix.lstrip(".") or None, "extras": { "resource_id": resource["id"], "source": "user_upload", }, } if content_type == "text": item["text"] = content.decode("utf-8", errors="replace") else: item["base64"] = base64.b64encode(content).decode("ascii") return item def _resource_file_path(self, resource: dict[str, Any]) -> Path: uri = str(resource["uri"]) parsed = urlparse(uri) if parsed.scheme != "file": raise ValueError(f"unsupported resource uri scheme: {parsed.scheme}") return Path(unquote(parsed.path)).resolve(strict=True) def list_resources(self, user_id: str) -> list[dict[str, Any]]: return [self._resource_detail(item) for item in self.repository.list_resources(user_id)] def get_resource_detail( self, resource_id: str, user_id: str, ) -> dict[str, Any] | None: resource = self.repository.get_resource_for_user(resource_id, user_id) if resource is None: return None return self._resource_detail(resource) def delete_resource(self, resource_id: str, user_id: str) -> dict[str, Any] | None: before = self.repository.get_resource_for_user(resource_id, user_id) if before is None: return None resource = self.repository.soft_delete_resource(resource_id, user_id) self._cleanup_resource_file(before) return self._resource_summary(resource) def _cleanup_resource_file(self, resource: dict[str, Any]) -> None: uri = str(resource.get("uri") or "") if not uri.startswith("file://"): return parsed = urlparse(uri) if parsed.scheme != "file": return try: path = Path(unquote(parsed.path)).resolve() storage_root = self.config.storage_dir.resolve() except OSError: return if not path.is_relative_to(storage_root): return try: path.unlink(missing_ok=True) except OSError: return _remove_empty_parents(path.parent, stop_at=storage_root) async def search_memories( self, *, user_id: str, query: str, conversation_id: str | None, scope: list[str], top_k: int, app_id: str, project_id: str, ) -> dict[str, Any]: results: list[dict[str, Any]] = [] session_resource_map: dict[str, dict[str, Any]] = {} if "current_chat" in scope and conversation_id: payload = self._search_payload( user_id=user_id, query=query, top_k=top_k, app_id=app_id, project_id=project_id, filters={"session_id": f"chat:{conversation_id}"}, ) results.extend( self._extract_results( await self.everos_client.search_memory(payload), source_scope="current_chat", session_resource_map=session_resource_map, user_id=user_id, ) ) if "resources" in scope: resources = self.repository.list_extracted_resources( user_id, app_id, project_id, ) session_resource_map.update({item["session_id"]: item for item in resources}) session_ids = [item["session_id"] for item in resources] for batch in _chunks(session_ids, self.config.resource_search_batch_size): payload = self._search_payload( user_id=user_id, query=query, top_k=top_k, app_id=app_id, project_id=project_id, filters={"session_id": {"in": batch}}, ) results.extend( self._extract_results( await self.everos_client.search_memory(payload), source_scope="resources", session_resource_map=session_resource_map, user_id=user_id, ) ) if "all_user_memory" in scope: payload = self._search_payload( user_id=user_id, query=query, top_k=top_k, app_id=app_id, project_id=project_id, filters=None, ) results.extend( self._extract_results( await self.everos_client.search_memory(payload), source_scope="all_user_memory", session_resource_map=session_resource_map, user_id=user_id, ) ) filtered = self._apply_tombstones(user_id, results) overridden = self._apply_overrides(user_id, filtered) return {"results": overridden} async def add_memory( self, *, session_id: str, app_id: str, project_id: str, messages: list[dict[str, Any]], ) -> dict[str, Any]: payload = { "session_id": session_id, "app_id": app_id, "project_id": project_id, "messages": messages, } return { "session_id": session_id, "everos": await self.everos_client.add_memory(payload), } async def flush_memory( self, *, session_id: str, app_id: str, project_id: str, ) -> dict[str, Any]: return { "session_id": session_id, "everos": await self.everos_client.flush_memory( session_id, app_id, project_id, ), } def _search_payload( self, *, user_id: str, query: str, top_k: int, app_id: str, project_id: str, filters: dict[str, Any] | None, ) -> dict[str, Any]: payload: dict[str, Any] = { "user_id": user_id, "query": query, "top_k": top_k, "app_id": app_id, "project_id": project_id, } if filters is not None: payload["filters"] = filters return payload def _extract_results( self, response: dict[str, Any], *, source_scope: str, session_resource_map: dict[str, dict[str, Any]], user_id: str, ) -> list[dict[str, Any]]: data = response.get("data", {}) raw_items: list[dict[str, Any]] = [] for key in ( "episodes", "profiles", "agent_cases", "agent_skills", "unprocessed_messages", ): raw_items.extend(data.get(key, []) or []) normalized = [] for raw in raw_items: session_id = raw.get("session_id") resource = session_resource_map.get(session_id) if resource is None and isinstance(session_id, str): resource = self.repository.get_resource_by_session_for_user( session_id, user_id, ) normalized.append( { "id": raw.get("id"), "session_id": session_id, "text": _display_text(raw), "score": raw.get("score"), "source_scope": source_scope, "resource_id": resource["id"] if resource else None, "resource_uri": ( public_resource_uri(user_id, resource["id"]) if resource else None ), "raw": raw, } ) return normalized def _apply_tombstones( self, user_id: str, results: list[dict[str, Any]], ) -> list[dict[str, Any]]: tombstones = self.repository.get_tombstones(user_id) memory_ids = {item["memory_id"] for item in tombstones if item["memory_id"]} session_ids = {item["session_id"] for item in tombstones if item["session_id"]} return [ item for item in results if item.get("id") not in memory_ids and item.get("session_id") not in session_ids ] def _apply_overrides( self, user_id: str, results: list[dict[str, Any]], ) -> list[dict[str, Any]]: overrides = { item["memory_id"]: item for item in self.repository.get_active_overrides(user_id) if item["memory_id"] } for result in results: override = overrides.get(result.get("id")) if override: result["text"] = override["override_text"] result["override_id"] = override["id"] return results def upsert_override( self, *, user_id: str, memory_id: str, session_id: str | None, override_text: str, ) -> dict[str, Any]: override = self.repository.upsert_override( user_id, memory_id, session_id, override_text, ) return {"memory_id": memory_id, "override_id": override["id"], "status": "active"} def assert_memory_session_owned(self, user_id: str, session_id: str) -> None: if session_id == f"memory_edit:{user_id}": return if session_id.startswith("resource:"): resource = self.repository.get_resource_by_session_for_user( session_id, user_id, ) if resource is not None: return raise PermissionError("memory session does not belong to user") def delete_memory( self, *, user_id: str, memory_id: str, session_id: str | None, reason: str | None, ) -> dict[str, Any]: tombstone = self.repository.add_tombstone( user_id, memory_id, session_id, reason, ) return {"memory_id": memory_id, "tombstone_id": tombstone["id"], "status": "deleted"} def _resource_summary(self, resource: dict[str, Any]) -> dict[str, Any]: return { "resource_id": resource["id"], "session_id": resource["session_id"], "uri": public_resource_uri(resource["user_id"], resource["id"]), "status": resource["status"], } def _resource_detail(self, resource: dict[str, Any]) -> dict[str, Any]: return { "resource_id": resource["id"], "user_id": resource["user_id"], "filename": resource["original_filename"], "content_type": resource["content_type"], "mime_type": resource["mime_type"], "uri": public_resource_uri(resource["user_id"], resource["id"]), "session_id": resource["session_id"], "status": resource["status"], "title": resource["title"], "description": resource["description"], "created_at": resource["created_at"], "updated_at": resource["updated_at"], } def _chunks(items: list[str], size: int) -> list[list[str]]: if not items: return [] return [items[index : index + size] for index in range(0, len(items), size)] def _display_text(raw: dict[str, Any]) -> str: for key in ( "episode", "summary", "content", "profile_data", "task_intent", "approach", "key_insight", "name", "description", ): value = raw.get(key) if value is None: continue if isinstance(value, str): return value return str(value) return ""