feat: add multipart memory uploads
This commit is contained in:
167
core/service.py
167
core/service.py
@ -138,6 +138,25 @@ def _remove_empty_parents(path: Path, stop_at: Path | None = None) -> None:
|
||||
current = parent
|
||||
|
||||
|
||||
def _read_upload_bytes(
|
||||
file: UploadFile,
|
||||
max_upload_bytes: int,
|
||||
) -> tuple[bytes, str, int]:
|
||||
sha256 = hashlib.sha256()
|
||||
size = 0
|
||||
chunks: list[bytes] = []
|
||||
while True:
|
||||
chunk = file.file.read(1024 * 1024)
|
||||
if not chunk:
|
||||
break
|
||||
size += len(chunk)
|
||||
if size > max_upload_bytes:
|
||||
raise UploadTooLarge(f"upload exceeds max size of {max_upload_bytes} bytes")
|
||||
sha256.update(chunk)
|
||||
chunks.append(chunk)
|
||||
return b"".join(chunks), sha256.hexdigest(), size
|
||||
|
||||
|
||||
class MemoryGatewayService:
|
||||
def __init__(
|
||||
self,
|
||||
@ -617,6 +636,41 @@ class MemoryGatewayService:
|
||||
raise
|
||||
return {"session_id": session_id, "backend": backend}
|
||||
|
||||
async def add_memory_with_uploads(
|
||||
self,
|
||||
*,
|
||||
user_id: str,
|
||||
session_id: str,
|
||||
app_id: str,
|
||||
project_id: str,
|
||||
messages: list[dict[str, Any]],
|
||||
upload_files: dict[str, UploadFile],
|
||||
) -> dict[str, Any]:
|
||||
messages, attachments, generated_paths = self._prepare_uploaded_memory_files(
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
app_id=app_id,
|
||||
project_id=project_id,
|
||||
messages=messages,
|
||||
upload_files=upload_files,
|
||||
)
|
||||
payload = {
|
||||
"session_id": session_id,
|
||||
"app_id": app_id,
|
||||
"project_id": project_id,
|
||||
"messages": messages,
|
||||
}
|
||||
try:
|
||||
backend = await self.backend_client.add_memory(payload)
|
||||
for attachment in attachments:
|
||||
self.repository.create_attachment(**attachment)
|
||||
except Exception:
|
||||
for path in generated_paths:
|
||||
path.unlink(missing_ok=True)
|
||||
_remove_empty_parents(path.parent, stop_at=self.config.storage_dir)
|
||||
raise
|
||||
return {"session_id": session_id, "backend": backend}
|
||||
|
||||
def _register_resource_attachment(
|
||||
self,
|
||||
resource: dict[str, Any],
|
||||
@ -713,6 +767,119 @@ class MemoryGatewayService:
|
||||
raise
|
||||
return attachments, generated_paths
|
||||
|
||||
def _prepare_uploaded_memory_files(
|
||||
self,
|
||||
*,
|
||||
user_id: str,
|
||||
session_id: str,
|
||||
app_id: str,
|
||||
project_id: str,
|
||||
messages: list[dict[str, Any]],
|
||||
upload_files: dict[str, UploadFile],
|
||||
) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[Path]]:
|
||||
attachments: list[dict[str, Any]] = []
|
||||
generated_paths: list[Path] = []
|
||||
used_upload_ids: set[str] = set()
|
||||
try:
|
||||
for message in messages:
|
||||
content = message.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
for index, item in enumerate(content):
|
||||
if not isinstance(item, dict) or "upload_id" not in item:
|
||||
continue
|
||||
upload_id = str(item.get("upload_id") or "").strip()
|
||||
if not upload_id:
|
||||
raise InvalidAttachment("upload_id must not be empty")
|
||||
if upload_id in used_upload_ids:
|
||||
raise InvalidAttachment(f"duplicate upload_id: {upload_id}")
|
||||
file = upload_files.get(upload_id)
|
||||
if file is None:
|
||||
raise InvalidAttachment(
|
||||
f"missing upload file for upload_id: {upload_id}"
|
||||
)
|
||||
used_upload_ids.add(upload_id)
|
||||
content[index] = self._materialize_uploaded_content_item(
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
app_id=app_id,
|
||||
project_id=project_id,
|
||||
item=item,
|
||||
file=file,
|
||||
attachments=attachments,
|
||||
generated_paths=generated_paths,
|
||||
)
|
||||
unused_upload_ids = sorted(set(upload_files) - used_upload_ids)
|
||||
if unused_upload_ids:
|
||||
raise InvalidAttachment(
|
||||
f"unused upload file field: {unused_upload_ids[0]}"
|
||||
)
|
||||
except Exception:
|
||||
for path in generated_paths:
|
||||
path.unlink(missing_ok=True)
|
||||
_remove_empty_parents(path.parent, stop_at=self.config.storage_dir)
|
||||
raise
|
||||
return messages, attachments, generated_paths
|
||||
|
||||
def _materialize_uploaded_content_item(
|
||||
self,
|
||||
*,
|
||||
user_id: str,
|
||||
session_id: str,
|
||||
app_id: str,
|
||||
project_id: str,
|
||||
item: dict[str, Any],
|
||||
file: UploadFile,
|
||||
attachments: list[dict[str, Any]],
|
||||
generated_paths: list[Path],
|
||||
) -> dict[str, Any]:
|
||||
name = _safe_filename(str(item.get("name") or file.filename or "upload.bin"))
|
||||
mime_type = file.content_type or mimetypes.guess_type(name)[0]
|
||||
if not _mime_allowed(mime_type, self.config.allowed_mime_types):
|
||||
raise UnsupportedContentType(f"unsupported content type: {mime_type}")
|
||||
content_type = normalize_content_type(
|
||||
name,
|
||||
mime_type,
|
||||
str(item.get("type") or ""),
|
||||
)
|
||||
data, sha256, _size_bytes = _read_upload_bytes(
|
||||
file,
|
||||
self.config.max_upload_bytes,
|
||||
)
|
||||
path = self.config.storage_dir / user_id / "memory_attachments" / sha256 / name
|
||||
if not path.exists():
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_bytes(data)
|
||||
generated_paths.append(path)
|
||||
content_item = {
|
||||
key: value for key, value in item.items() if key not in {"upload_id", "uri"}
|
||||
}
|
||||
content_item["type"] = content_type
|
||||
content_item["name"] = name
|
||||
content_item["ext"] = Path(name).suffix.lstrip(".") or content_item.get("ext")
|
||||
if content_type == "text":
|
||||
content_item.pop("base64", None)
|
||||
content_item["text"] = data.decode("utf-8", errors="replace")
|
||||
else:
|
||||
content_item.pop("text", None)
|
||||
content_item["base64"] = base64.b64encode(data).decode("ascii")
|
||||
attachments.append(
|
||||
{
|
||||
"id": f"a_{uuid.uuid4().hex}",
|
||||
"user_id": user_id,
|
||||
"app_id": app_id,
|
||||
"project_id": project_id,
|
||||
"session_id": session_id,
|
||||
"resource_id": None,
|
||||
"content_type": content_type,
|
||||
"name": name,
|
||||
"internal_uri": path.resolve().as_uri(),
|
||||
"source": "memory_add_upload",
|
||||
"sha256": sha256,
|
||||
}
|
||||
)
|
||||
return content_item
|
||||
|
||||
async def flush_memory(
|
||||
self,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user