feat: add multipart memory uploads

This commit is contained in:
2026-06-22 15:53:29 +08:00
parent 12c767cd68
commit f77454b4cc
6 changed files with 1076 additions and 1255 deletions

View File

@ -138,6 +138,25 @@ def _remove_empty_parents(path: Path, stop_at: Path | None = None) -> None:
current = parent
def _read_upload_bytes(
file: UploadFile,
max_upload_bytes: int,
) -> tuple[bytes, str, int]:
sha256 = hashlib.sha256()
size = 0
chunks: list[bytes] = []
while True:
chunk = file.file.read(1024 * 1024)
if not chunk:
break
size += len(chunk)
if size > max_upload_bytes:
raise UploadTooLarge(f"upload exceeds max size of {max_upload_bytes} bytes")
sha256.update(chunk)
chunks.append(chunk)
return b"".join(chunks), sha256.hexdigest(), size
class MemoryGatewayService:
def __init__(
self,
@ -617,6 +636,41 @@ class MemoryGatewayService:
raise
return {"session_id": session_id, "backend": backend}
async def add_memory_with_uploads(
self,
*,
user_id: str,
session_id: str,
app_id: str,
project_id: str,
messages: list[dict[str, Any]],
upload_files: dict[str, UploadFile],
) -> dict[str, Any]:
messages, attachments, generated_paths = self._prepare_uploaded_memory_files(
user_id=user_id,
session_id=session_id,
app_id=app_id,
project_id=project_id,
messages=messages,
upload_files=upload_files,
)
payload = {
"session_id": session_id,
"app_id": app_id,
"project_id": project_id,
"messages": messages,
}
try:
backend = await self.backend_client.add_memory(payload)
for attachment in attachments:
self.repository.create_attachment(**attachment)
except Exception:
for path in generated_paths:
path.unlink(missing_ok=True)
_remove_empty_parents(path.parent, stop_at=self.config.storage_dir)
raise
return {"session_id": session_id, "backend": backend}
def _register_resource_attachment(
self,
resource: dict[str, Any],
@ -713,6 +767,119 @@ class MemoryGatewayService:
raise
return attachments, generated_paths
def _prepare_uploaded_memory_files(
self,
*,
user_id: str,
session_id: str,
app_id: str,
project_id: str,
messages: list[dict[str, Any]],
upload_files: dict[str, UploadFile],
) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[Path]]:
attachments: list[dict[str, Any]] = []
generated_paths: list[Path] = []
used_upload_ids: set[str] = set()
try:
for message in messages:
content = message.get("content")
if not isinstance(content, list):
continue
for index, item in enumerate(content):
if not isinstance(item, dict) or "upload_id" not in item:
continue
upload_id = str(item.get("upload_id") or "").strip()
if not upload_id:
raise InvalidAttachment("upload_id must not be empty")
if upload_id in used_upload_ids:
raise InvalidAttachment(f"duplicate upload_id: {upload_id}")
file = upload_files.get(upload_id)
if file is None:
raise InvalidAttachment(
f"missing upload file for upload_id: {upload_id}"
)
used_upload_ids.add(upload_id)
content[index] = self._materialize_uploaded_content_item(
user_id=user_id,
session_id=session_id,
app_id=app_id,
project_id=project_id,
item=item,
file=file,
attachments=attachments,
generated_paths=generated_paths,
)
unused_upload_ids = sorted(set(upload_files) - used_upload_ids)
if unused_upload_ids:
raise InvalidAttachment(
f"unused upload file field: {unused_upload_ids[0]}"
)
except Exception:
for path in generated_paths:
path.unlink(missing_ok=True)
_remove_empty_parents(path.parent, stop_at=self.config.storage_dir)
raise
return messages, attachments, generated_paths
def _materialize_uploaded_content_item(
self,
*,
user_id: str,
session_id: str,
app_id: str,
project_id: str,
item: dict[str, Any],
file: UploadFile,
attachments: list[dict[str, Any]],
generated_paths: list[Path],
) -> dict[str, Any]:
name = _safe_filename(str(item.get("name") or file.filename or "upload.bin"))
mime_type = file.content_type or mimetypes.guess_type(name)[0]
if not _mime_allowed(mime_type, self.config.allowed_mime_types):
raise UnsupportedContentType(f"unsupported content type: {mime_type}")
content_type = normalize_content_type(
name,
mime_type,
str(item.get("type") or ""),
)
data, sha256, _size_bytes = _read_upload_bytes(
file,
self.config.max_upload_bytes,
)
path = self.config.storage_dir / user_id / "memory_attachments" / sha256 / name
if not path.exists():
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(data)
generated_paths.append(path)
content_item = {
key: value for key, value in item.items() if key not in {"upload_id", "uri"}
}
content_item["type"] = content_type
content_item["name"] = name
content_item["ext"] = Path(name).suffix.lstrip(".") or content_item.get("ext")
if content_type == "text":
content_item.pop("base64", None)
content_item["text"] = data.decode("utf-8", errors="replace")
else:
content_item.pop("text", None)
content_item["base64"] = base64.b64encode(data).decode("ascii")
attachments.append(
{
"id": f"a_{uuid.uuid4().hex}",
"user_id": user_id,
"app_id": app_id,
"project_id": project_id,
"session_id": session_id,
"resource_id": None,
"content_type": content_type,
"name": name,
"internal_uri": path.resolve().as_uri(),
"source": "memory_add_upload",
"sha256": sha256,
}
)
return content_item
async def flush_memory(
self,
*,