merge: personal user filesystem minio integration
This commit is contained in:
201
app-instance/backend/beaver/services/user_file_resolver.py
Normal file
201
app-instance/backend/beaver/services/user_file_resolver.py
Normal file
@ -0,0 +1,201 @@
|
||||
"""Resolve the user-visible file system for web and agent callers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from beaver.foundation.config.schema import BeaverConfig
|
||||
|
||||
from .user_files import (
|
||||
LocalUserFileStorage,
|
||||
MinIOStorageConfig,
|
||||
MinIOUserFileStorage,
|
||||
USER_FILE_ROOTS,
|
||||
UserFileError,
|
||||
UserFileService,
|
||||
)
|
||||
|
||||
|
||||
class UserFileConfigurationError(UserFileError):
|
||||
"""Raised when user file storage is not configured for this backend."""
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class FileAuthContext:
|
||||
"""Authenticated identity used by the personal file system boundary."""
|
||||
|
||||
username: str
|
||||
backend_id: str
|
||||
storage_namespace: str
|
||||
user_id: str | None = None
|
||||
scopes: tuple[str, ...] = field(default_factory=tuple)
|
||||
auth_source: str = "beaver-web-token"
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFileStorageStatus:
|
||||
configured: bool
|
||||
storage_mode: str
|
||||
roots: list[str]
|
||||
workspace_visible: bool = False
|
||||
detail: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
payload: dict[str, Any] = {
|
||||
"configured": self.configured,
|
||||
"storage_mode": self.storage_mode,
|
||||
"roots": self.roots,
|
||||
"workspace_visible": self.workspace_visible,
|
||||
}
|
||||
if self.detail:
|
||||
payload["detail"] = self.detail
|
||||
return payload
|
||||
|
||||
|
||||
class UserFileStorageResolver:
|
||||
"""Build `UserFileService` from the current Beaver identity and config."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
config: BeaverConfig,
|
||||
workspace: Path,
|
||||
auth_context: FileAuthContext,
|
||||
) -> None:
|
||||
self.config = config
|
||||
self.workspace = Path(workspace)
|
||||
self.auth_context = auth_context
|
||||
|
||||
async def service(self) -> UserFileService:
|
||||
mode = _storage_mode(self.config)
|
||||
if mode == "local":
|
||||
return UserFileService(LocalUserFileStorage(self.workspace / "user_files"))
|
||||
settings = await self._load_minio_settings()
|
||||
return UserFileService(
|
||||
MinIOUserFileStorage(
|
||||
MinIOStorageConfig(
|
||||
endpoint=str(settings.get("endpoint") or ""),
|
||||
access_key=str(settings.get("access_key") or ""),
|
||||
secret_key=str(settings.get("secret_key") or ""),
|
||||
bucket=str(settings.get("bucket") or ""),
|
||||
secure=bool(settings.get("secure", False)),
|
||||
region=_clean_optional(settings.get("region")),
|
||||
namespace=str(settings.get("namespace") or self.auth_context.storage_namespace),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
async def status(self) -> UserFileStorageStatus:
|
||||
mode = _storage_mode(self.config)
|
||||
if mode == "local":
|
||||
return UserFileStorageStatus(
|
||||
configured=True,
|
||||
storage_mode="local",
|
||||
roots=list(USER_FILE_ROOTS),
|
||||
workspace_visible=False,
|
||||
)
|
||||
try:
|
||||
await self._load_minio_settings()
|
||||
except UserFileConfigurationError as exc:
|
||||
return UserFileStorageStatus(
|
||||
configured=False,
|
||||
storage_mode="object",
|
||||
roots=list(USER_FILE_ROOTS),
|
||||
workspace_visible=False,
|
||||
detail=str(exc),
|
||||
)
|
||||
return UserFileStorageStatus(
|
||||
configured=True,
|
||||
storage_mode="object",
|
||||
roots=list(USER_FILE_ROOTS),
|
||||
workspace_visible=False,
|
||||
)
|
||||
|
||||
async def _load_minio_settings(self) -> dict[str, Any]:
|
||||
backend_id = self.auth_context.backend_id.strip()
|
||||
if not backend_id:
|
||||
raise UserFileConfigurationError("User file storage backend identity is not configured")
|
||||
base_url = self.config.authz.base_url.strip()
|
||||
if not (self.config.authz.enabled and base_url):
|
||||
raise UserFileConfigurationError("AuthZ is required for deployed user file storage")
|
||||
token = (
|
||||
os.getenv("BEAVER_AUTHZ_INTERNAL_TOKEN", "").strip()
|
||||
or os.getenv("AUTHZ_INTERNAL_TOKEN", "").strip()
|
||||
)
|
||||
if not token:
|
||||
raise UserFileConfigurationError("AuthZ internal token is not configured for user file storage")
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=self.config.authz.request_timeout_seconds,
|
||||
follow_redirects=True,
|
||||
trust_env=False,
|
||||
) as client:
|
||||
response = await client.get(
|
||||
f"{base_url.rstrip('/')}/internal/backends/{backend_id}/settings/minio",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
except httpx.HTTPError as exc:
|
||||
raise UserFileConfigurationError(f"Unable to load user file storage settings: {exc}") from exc
|
||||
if response.status_code == 404:
|
||||
raise UserFileConfigurationError("MinIO user file storage is not configured")
|
||||
if response.is_error:
|
||||
raise UserFileConfigurationError(
|
||||
f"Unable to load user file storage settings: HTTP {response.status_code}"
|
||||
)
|
||||
payload = response.json()
|
||||
if not isinstance(payload, dict):
|
||||
raise UserFileConfigurationError("Invalid MinIO settings response")
|
||||
if not all(str(payload.get(key) or "").strip() for key in ("endpoint", "access_key", "secret_key", "bucket")):
|
||||
raise UserFileConfigurationError("MinIO user file storage settings are incomplete")
|
||||
payload.setdefault("namespace", self.auth_context.storage_namespace)
|
||||
return payload
|
||||
|
||||
|
||||
def build_file_auth_context(
|
||||
*,
|
||||
username: str,
|
||||
config: BeaverConfig,
|
||||
user_id: str | None = None,
|
||||
scopes: tuple[str, ...] = (),
|
||||
auth_source: str = "beaver-web-token",
|
||||
) -> FileAuthContext:
|
||||
backend_id = (
|
||||
config.backend_identity.backend_id.strip()
|
||||
or os.getenv("BEAVER_BACKEND_IDENTITY__BACKEND_ID", "").strip()
|
||||
or username.strip()
|
||||
)
|
||||
namespace = default_user_file_namespace(backend_id)
|
||||
return FileAuthContext(
|
||||
username=username.strip(),
|
||||
backend_id=backend_id,
|
||||
storage_namespace=namespace,
|
||||
user_id=user_id,
|
||||
scopes=scopes,
|
||||
auth_source=auth_source,
|
||||
)
|
||||
|
||||
|
||||
def default_user_file_namespace(backend_id: str) -> str:
|
||||
cleaned = backend_id.strip().strip("/")
|
||||
return f"users/{cleaned}" if cleaned else "users/unconfigured"
|
||||
|
||||
|
||||
def _storage_mode(config: BeaverConfig) -> str:
|
||||
raw = os.getenv("BEAVER_USER_FILES_STORAGE_MODE", "").strip().lower()
|
||||
if raw in {"local", "dev-local", "development"}:
|
||||
return "local"
|
||||
if raw in {"minio", "object", "object-storage"}:
|
||||
return "minio"
|
||||
if config.authz.enabled and config.authz.base_url.strip() and config.backend_identity.backend_id.strip():
|
||||
return "minio"
|
||||
return "local"
|
||||
|
||||
|
||||
def _clean_optional(value: Any) -> str | None:
|
||||
text = str(value or "").strip()
|
||||
return text or None
|
||||
630
app-instance/backend/beaver/services/user_files.py
Normal file
630
app-instance/backend/beaver/services/user_files.py
Normal file
@ -0,0 +1,630 @@
|
||||
"""User-visible file system service.
|
||||
|
||||
This module owns the personal file-system boundary exposed to users and
|
||||
agents. Storage backends can change, but callers see only virtual paths under
|
||||
fixed roots.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import suppress
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from io import BytesIO
|
||||
import mimetypes
|
||||
from pathlib import Path, PurePosixPath
|
||||
import shutil
|
||||
import tempfile
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
USER_FILE_ROOTS = ("uploads", "outputs", "shared", "tasks")
|
||||
MAX_PREVIEW_BYTES = 1024 * 1024
|
||||
AGENT_UPLOADS_ERROR = "uploads/ is user-provided input storage; agents may read it but must not write it"
|
||||
AGENT_DELETE_ERROR = "agents cannot delete user-visible files; use the Files page or user-side APIs"
|
||||
|
||||
|
||||
class UserFileError(ValueError):
|
||||
"""Base error for user file operations."""
|
||||
|
||||
|
||||
class UserFilePathError(UserFileError):
|
||||
"""Raised when a user file path violates the virtual path policy."""
|
||||
|
||||
|
||||
class UserFileNotFoundError(UserFileError):
|
||||
"""Raised when a user file path does not exist."""
|
||||
|
||||
|
||||
class UserFileSizeError(UserFileError):
|
||||
"""Raised when a user file upload exceeds configured limits."""
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class AgentUserFilePolicy:
|
||||
task_id: str | None = None
|
||||
fallback_scope: str = "interactive"
|
||||
|
||||
@property
|
||||
def task_namespace(self) -> str:
|
||||
if self.task_id:
|
||||
return f"tasks/{self.task_id}"
|
||||
scope = _safe_scope(self.fallback_scope)
|
||||
return f"tasks/interactive/{scope}"
|
||||
|
||||
def validate_read(self, path: str) -> str:
|
||||
return normalize_user_path(path, allow_root=False)
|
||||
|
||||
def validate_write(self, path: str) -> str:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
root = normalized.split("/", 1)[0]
|
||||
if root == "uploads":
|
||||
raise UserFilePathError(AGENT_UPLOADS_ERROR)
|
||||
if root == "tasks":
|
||||
self._validate_task_namespace(normalized)
|
||||
return normalized
|
||||
|
||||
def validate_mkdir(self, path: str) -> str:
|
||||
return self.validate_write(path)
|
||||
|
||||
def validate_delete(self, path: str) -> str:
|
||||
normalize_user_path(path, allow_root=False)
|
||||
raise UserFilePathError(AGENT_DELETE_ERROR)
|
||||
|
||||
def _validate_task_namespace(self, normalized: str) -> None:
|
||||
namespace = self.task_namespace
|
||||
if normalized == "tasks" or not normalized.startswith(f"{namespace}/"):
|
||||
raise UserFilePathError(f"Agent task files must be written under {namespace}/")
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFileEntry:
|
||||
name: str
|
||||
path: str
|
||||
type: str
|
||||
size: int | None = None
|
||||
content_type: str | None = None
|
||||
modified: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, object]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"path": self.path,
|
||||
"type": self.type,
|
||||
"size": self.size,
|
||||
"content_type": self.content_type,
|
||||
"modified": self.modified,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFileContent:
|
||||
name: str
|
||||
path: str
|
||||
size: int
|
||||
content_type: str
|
||||
modified: str | None
|
||||
content: bytes
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilePreview:
|
||||
name: str
|
||||
path: str
|
||||
size: int
|
||||
content_type: str
|
||||
modified: str | None
|
||||
is_binary: bool
|
||||
is_truncated: bool
|
||||
content: str | None
|
||||
|
||||
def to_dict(self) -> dict[str, object]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"path": self.path,
|
||||
"size": self.size,
|
||||
"content_type": self.content_type,
|
||||
"modified": self.modified,
|
||||
"is_binary": self.is_binary,
|
||||
"is_truncated": self.is_truncated,
|
||||
"content": self.content,
|
||||
}
|
||||
|
||||
|
||||
class UserFileStorage(Protocol):
|
||||
async def list_dir(self, path: str) -> list[UserFileEntry]:
|
||||
...
|
||||
|
||||
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
|
||||
...
|
||||
|
||||
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
|
||||
...
|
||||
|
||||
async def write_file_stream(
|
||||
self,
|
||||
path: str,
|
||||
stream: object,
|
||||
*,
|
||||
content_type: str,
|
||||
max_bytes: int | None = None,
|
||||
part_size: int = 10 * 1024 * 1024,
|
||||
) -> UserFileEntry:
|
||||
...
|
||||
|
||||
async def delete_path(self, path: str) -> bool:
|
||||
...
|
||||
|
||||
async def mkdir(self, path: str) -> UserFileEntry:
|
||||
...
|
||||
|
||||
|
||||
class UserFileService:
|
||||
def __init__(self, storage: UserFileStorage) -> None:
|
||||
self.storage = storage
|
||||
|
||||
async def browse(self, path: str = "") -> dict[str, object]:
|
||||
normalized = normalize_user_path(path, allow_root=True)
|
||||
if normalized == "":
|
||||
return {
|
||||
"path": "",
|
||||
"items": [
|
||||
UserFileEntry(name=root, path=root, type="directory").to_dict()
|
||||
for root in USER_FILE_ROOTS
|
||||
],
|
||||
}
|
||||
entries = await self.storage.list_dir(normalized)
|
||||
return {"path": normalized, "items": [entry.to_dict() for entry in entries]}
|
||||
|
||||
async def upload(self, directory: str, filename: str, content: bytes, *, content_type: str) -> dict[str, object]:
|
||||
if not is_safe_filename(filename):
|
||||
raise UserFilePathError("Invalid filename")
|
||||
target = normalize_user_path(_join_user_path(directory, filename), allow_root=False)
|
||||
return (await self.storage.write_file(target, content, content_type=content_type)).to_dict()
|
||||
|
||||
async def upload_stream(
|
||||
self,
|
||||
directory: str,
|
||||
filename: str,
|
||||
stream: object,
|
||||
*,
|
||||
content_type: str,
|
||||
max_bytes: int | None = None,
|
||||
part_size: int = 10 * 1024 * 1024,
|
||||
) -> dict[str, object]:
|
||||
if not is_safe_filename(filename):
|
||||
raise UserFilePathError("Invalid filename")
|
||||
target = normalize_user_path(_join_user_path(directory, filename), allow_root=False)
|
||||
return (
|
||||
await self.storage.write_file_stream(
|
||||
target,
|
||||
stream,
|
||||
content_type=content_type,
|
||||
max_bytes=max_bytes,
|
||||
part_size=part_size,
|
||||
)
|
||||
).to_dict()
|
||||
|
||||
async def write_file(self, path: str, content: bytes | str, *, content_type: str = "text/plain") -> dict[str, object]:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
raw = content.encode("utf-8") if isinstance(content, str) else bytes(content)
|
||||
return (await self.storage.write_file(normalized, raw, content_type=content_type)).to_dict()
|
||||
|
||||
async def download(self, path: str) -> UserFileContent:
|
||||
return await self.storage.read_file(normalize_user_path(path, allow_root=False))
|
||||
|
||||
async def preview(self, path: str, *, max_bytes: int = MAX_PREVIEW_BYTES) -> dict[str, object]:
|
||||
content = await self.storage.read_file(normalize_user_path(path, allow_root=False), max_bytes=max_bytes)
|
||||
is_binary = _is_probably_binary(content.content, content.content_type)
|
||||
text = None if is_binary else content.content.decode("utf-8", errors="replace")
|
||||
return UserFilePreview(
|
||||
name=content.name,
|
||||
path=content.path,
|
||||
size=content.size,
|
||||
content_type=content.content_type,
|
||||
modified=content.modified,
|
||||
is_binary=is_binary,
|
||||
is_truncated=content.size > len(content.content),
|
||||
content=text,
|
||||
).to_dict()
|
||||
|
||||
async def delete(self, path: str) -> bool:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
if normalized in USER_FILE_ROOTS:
|
||||
raise UserFilePathError("Cannot delete virtual root folders")
|
||||
return await self.storage.delete_path(normalized)
|
||||
|
||||
async def mkdir(self, path: str) -> dict[str, object]:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
if normalized in USER_FILE_ROOTS:
|
||||
raise UserFilePathError("Virtual root folders already exist")
|
||||
return (await self.storage.mkdir(normalized)).to_dict()
|
||||
|
||||
|
||||
class LocalUserFileStorage:
|
||||
"""Filesystem-backed storage adapter for tests and local development."""
|
||||
|
||||
def __init__(self, root: Path) -> None:
|
||||
self.root = Path(root).expanduser().resolve()
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
for name in USER_FILE_ROOTS:
|
||||
(self.root / name).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
async def list_dir(self, path: str) -> list[UserFileEntry]:
|
||||
target = self._path(path)
|
||||
if not target.exists():
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
if not target.is_dir():
|
||||
raise UserFilePathError("Path is not a directory")
|
||||
entries: list[UserFileEntry] = []
|
||||
for child in sorted(target.iterdir(), key=lambda item: (not item.is_dir(), item.name.lower())):
|
||||
if child.name.startswith("."):
|
||||
continue
|
||||
entries.append(self._entry(child))
|
||||
return entries
|
||||
|
||||
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
|
||||
target = self._path(path)
|
||||
if not target.is_file():
|
||||
raise UserFileNotFoundError("File not found")
|
||||
raw = target.read_bytes()
|
||||
selected = raw[:max_bytes] if max_bytes is not None else raw
|
||||
stat = target.stat()
|
||||
content_type, _ = mimetypes.guess_type(target.name)
|
||||
return UserFileContent(
|
||||
name=target.name,
|
||||
path=self._relative(target),
|
||||
size=stat.st_size,
|
||||
content_type=content_type or "application/octet-stream",
|
||||
modified=_iso_from_timestamp(stat.st_mtime),
|
||||
content=selected,
|
||||
)
|
||||
|
||||
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
|
||||
target = self._path(path)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_bytes(content)
|
||||
return self._entry(target, content_type=content_type)
|
||||
|
||||
async def write_file_stream(
|
||||
self,
|
||||
path: str,
|
||||
stream: object,
|
||||
*,
|
||||
content_type: str,
|
||||
max_bytes: int | None = None,
|
||||
part_size: int = 10 * 1024 * 1024,
|
||||
) -> UserFileEntry:
|
||||
target = self._path(path)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_name = tempfile.mkstemp(prefix=f".{target.name}.", suffix=".tmp", dir=target.parent)
|
||||
tmp_path = Path(tmp_name)
|
||||
total = 0
|
||||
try:
|
||||
with open(fd, "wb", closefd=True) as output:
|
||||
while True:
|
||||
chunk = stream.read(part_size) # type: ignore[attr-defined]
|
||||
if not chunk:
|
||||
break
|
||||
total += len(chunk)
|
||||
if max_bytes is not None and total > max_bytes:
|
||||
raise UserFileSizeError(_size_error(max_bytes))
|
||||
output.write(chunk)
|
||||
tmp_path.replace(target)
|
||||
except Exception:
|
||||
with suppress(FileNotFoundError):
|
||||
tmp_path.unlink()
|
||||
raise
|
||||
return self._entry(target, content_type=content_type)
|
||||
|
||||
async def delete_path(self, path: str) -> bool:
|
||||
target = self._path(path)
|
||||
if not target.exists():
|
||||
return False
|
||||
if target.is_dir():
|
||||
shutil.rmtree(target)
|
||||
else:
|
||||
target.unlink()
|
||||
return True
|
||||
|
||||
async def mkdir(self, path: str) -> UserFileEntry:
|
||||
target = self._path(path)
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
return self._entry(target)
|
||||
|
||||
def _path(self, path: str) -> Path:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
target = (self.root / normalized).resolve()
|
||||
try:
|
||||
target.relative_to(self.root)
|
||||
except ValueError as exc:
|
||||
raise UserFilePathError("Path escapes user file root") from exc
|
||||
return target
|
||||
|
||||
def _relative(self, path: Path) -> str:
|
||||
return path.relative_to(self.root).as_posix()
|
||||
|
||||
def _entry(self, path: Path, *, content_type: str | None = None) -> UserFileEntry:
|
||||
stat = path.stat()
|
||||
guessed_type, _ = mimetypes.guess_type(path.name)
|
||||
return UserFileEntry(
|
||||
name=path.name,
|
||||
path=self._relative(path),
|
||||
type="directory" if path.is_dir() else "file",
|
||||
size=None if path.is_dir() else stat.st_size,
|
||||
content_type=None if path.is_dir() else (content_type or guessed_type or "application/octet-stream"),
|
||||
modified=_iso_from_timestamp(stat.st_mtime),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class MinIOStorageConfig:
|
||||
endpoint: str
|
||||
access_key: str
|
||||
secret_key: str
|
||||
bucket: str
|
||||
secure: bool = False
|
||||
region: str | None = None
|
||||
namespace: str = ""
|
||||
|
||||
|
||||
class MinIOUserFileStorage:
|
||||
"""MinIO-backed user file storage adapter."""
|
||||
|
||||
def __init__(self, config: MinIOStorageConfig) -> None:
|
||||
if not config.endpoint or not config.access_key or not config.secret_key or not config.bucket:
|
||||
raise ValueError("MinIO storage requires endpoint, access key, secret key, and bucket")
|
||||
from minio import Minio
|
||||
|
||||
self.config = config
|
||||
self.client = Minio(
|
||||
endpoint=config.endpoint,
|
||||
access_key=config.access_key,
|
||||
secret_key=config.secret_key,
|
||||
secure=config.secure,
|
||||
region=config.region,
|
||||
)
|
||||
|
||||
async def list_dir(self, path: str) -> list[UserFileEntry]:
|
||||
prefix = self._object_prefix(path)
|
||||
objects = self.client.list_objects(self.config.bucket, prefix=prefix, recursive=False)
|
||||
entries: list[UserFileEntry] = []
|
||||
for obj in objects:
|
||||
object_name = str(obj.object_name or "")
|
||||
user_path = self._user_path(object_name)
|
||||
if not user_path or user_path == path or user_path.endswith("/.keep"):
|
||||
continue
|
||||
trimmed = user_path.rstrip("/")
|
||||
name = PurePosixPath(trimmed).name
|
||||
is_dir = bool(getattr(obj, "is_dir", False)) or object_name.endswith("/")
|
||||
entries.append(
|
||||
UserFileEntry(
|
||||
name=name,
|
||||
path=trimmed,
|
||||
type="directory" if is_dir else "file",
|
||||
size=None if is_dir else getattr(obj, "size", None),
|
||||
content_type=None if is_dir else "application/octet-stream",
|
||||
modified=obj.last_modified.isoformat() if getattr(obj, "last_modified", None) else None,
|
||||
)
|
||||
)
|
||||
return sorted(entries, key=lambda item: (item.type != "directory", item.name.lower()))
|
||||
|
||||
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
|
||||
object_name = self._object_name(path)
|
||||
try:
|
||||
stat = self.client.stat_object(self.config.bucket, object_name)
|
||||
if max_bytes is None:
|
||||
response = self.client.get_object(self.config.bucket, object_name)
|
||||
else:
|
||||
response = self.client.get_object(self.config.bucket, object_name, length=max_bytes)
|
||||
raw = response.read()
|
||||
response.close()
|
||||
response.release_conn()
|
||||
except Exception as exc:
|
||||
raise UserFileNotFoundError("File not found") from exc
|
||||
return UserFileContent(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
size=int(stat.size or len(raw)),
|
||||
content_type=stat.content_type or "application/octet-stream",
|
||||
modified=stat.last_modified.isoformat() if stat.last_modified else None,
|
||||
content=raw,
|
||||
)
|
||||
|
||||
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
|
||||
object_name = self._object_name(path)
|
||||
result = self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
BytesIO(content),
|
||||
length=len(content),
|
||||
content_type=content_type,
|
||||
)
|
||||
return UserFileEntry(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
type="file",
|
||||
size=len(content),
|
||||
content_type=content_type,
|
||||
modified=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
async def write_file_stream(
|
||||
self,
|
||||
path: str,
|
||||
stream: object,
|
||||
*,
|
||||
content_type: str,
|
||||
max_bytes: int | None = None,
|
||||
part_size: int = 10 * 1024 * 1024,
|
||||
) -> UserFileEntry:
|
||||
object_name = self._object_name(path)
|
||||
reader = _LimitedReadStream(stream, max_bytes=max_bytes)
|
||||
try:
|
||||
self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
reader,
|
||||
length=-1,
|
||||
part_size=max(5 * 1024 * 1024, part_size),
|
||||
content_type=content_type,
|
||||
)
|
||||
except UserFileSizeError:
|
||||
try:
|
||||
self.client.remove_object(self.config.bucket, object_name)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
return UserFileEntry(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
type="file",
|
||||
size=reader.bytes_read,
|
||||
content_type=content_type,
|
||||
modified=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
async def delete_path(self, path: str) -> bool:
|
||||
object_name = self._object_name(path)
|
||||
removed = False
|
||||
try:
|
||||
self.client.remove_object(self.config.bucket, object_name)
|
||||
removed = True
|
||||
except Exception:
|
||||
pass
|
||||
prefix = f"{object_name.rstrip('/')}/"
|
||||
for obj in self.client.list_objects(self.config.bucket, prefix=prefix, recursive=True):
|
||||
self.client.remove_object(self.config.bucket, str(obj.object_name))
|
||||
removed = True
|
||||
return removed
|
||||
|
||||
async def mkdir(self, path: str) -> UserFileEntry:
|
||||
object_name = f"{self._object_name(path).rstrip('/')}/.keep"
|
||||
self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
BytesIO(b""),
|
||||
length=0,
|
||||
content_type="application/x-directory",
|
||||
)
|
||||
return UserFileEntry(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
type="directory",
|
||||
size=None,
|
||||
modified=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
def _namespace(self) -> str:
|
||||
return self.config.namespace.strip("/")
|
||||
|
||||
def _object_name(self, path: str) -> str:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
namespace = self._namespace()
|
||||
object_name = f"{namespace}/{normalized}" if namespace else normalized
|
||||
if object_name.startswith("/") or "/../" in f"/{object_name}/":
|
||||
raise UserFilePathError("Object path escapes namespace")
|
||||
return object_name
|
||||
|
||||
def _object_prefix(self, path: str) -> str:
|
||||
return f"{self._object_name(path).rstrip('/')}/"
|
||||
|
||||
def _user_path(self, object_name: str) -> str:
|
||||
namespace = self._namespace()
|
||||
if namespace:
|
||||
prefix = f"{namespace}/"
|
||||
if not object_name.startswith(prefix):
|
||||
raise UserFilePathError("Object path escapes namespace")
|
||||
return object_name[len(prefix) :]
|
||||
return object_name
|
||||
|
||||
|
||||
def normalize_user_path(path: str | None, *, allow_root: bool) -> str:
|
||||
original = (path or "").replace("\\", "/").strip()
|
||||
if original.startswith("/"):
|
||||
raise UserFilePathError("Absolute paths are not allowed")
|
||||
raw = original.strip("/")
|
||||
if raw == "":
|
||||
if allow_root:
|
||||
return ""
|
||||
raise UserFilePathError("Path is required")
|
||||
posix = PurePosixPath(raw)
|
||||
if posix.is_absolute():
|
||||
raise UserFilePathError("Absolute paths are not allowed")
|
||||
parts = [part for part in posix.parts if part not in ("", ".")]
|
||||
if any(part == ".." for part in parts):
|
||||
raise UserFilePathError("Parent-directory traversal is not allowed")
|
||||
if any(part.startswith(".") for part in parts):
|
||||
raise UserFilePathError("Hidden implementation paths are not allowed")
|
||||
if not parts or parts[0] not in USER_FILE_ROOTS:
|
||||
raise UserFilePathError("Path must be under uploads, outputs, shared, or tasks")
|
||||
return "/".join(parts)
|
||||
|
||||
|
||||
def is_safe_filename(filename: str) -> bool:
|
||||
return bool(filename) and "/" not in filename and "\\" not in filename and not filename.startswith(".")
|
||||
|
||||
|
||||
def _join_user_path(directory: str, filename: str) -> str:
|
||||
normalized_dir = normalize_user_path(directory, allow_root=False)
|
||||
return f"{normalized_dir.rstrip('/')}/{filename}"
|
||||
|
||||
|
||||
def _is_probably_binary(raw: bytes, content_type: str) -> bool:
|
||||
if content_type.startswith("text/") or content_type in {
|
||||
"application/json",
|
||||
"application/javascript",
|
||||
"application/xml",
|
||||
"application/x-yaml",
|
||||
}:
|
||||
return False
|
||||
if not raw:
|
||||
return False
|
||||
if b"\x00" in raw[:4096]:
|
||||
return True
|
||||
try:
|
||||
raw[:4096].decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _iso_from_timestamp(value: float) -> str:
|
||||
return datetime.fromtimestamp(value, tz=timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _safe_scope(value: str | None) -> str:
|
||||
raw = (value or "interactive").strip()
|
||||
allowed = [char if char.isalnum() or char in ("-", "_") else "-" for char in raw]
|
||||
cleaned = "".join(allowed).strip("-_")
|
||||
return cleaned or "interactive"
|
||||
|
||||
|
||||
class _LimitedReadStream:
|
||||
def __init__(self, stream: object, *, max_bytes: int | None = None) -> None:
|
||||
self.stream = stream
|
||||
self.max_bytes = max_bytes
|
||||
self.bytes_read = 0
|
||||
|
||||
def read(self, size: int = -1) -> bytes:
|
||||
chunk = self.stream.read(size) # type: ignore[attr-defined]
|
||||
if not chunk:
|
||||
return b""
|
||||
self.bytes_read += len(chunk)
|
||||
if self.max_bytes is not None and self.bytes_read > self.max_bytes:
|
||||
raise UserFileSizeError(_size_error(self.max_bytes))
|
||||
return chunk
|
||||
|
||||
|
||||
def _size_error(max_bytes: int) -> str:
|
||||
return f"File too large (max {_human_size(max_bytes)})"
|
||||
|
||||
|
||||
def _human_size(size: int) -> str:
|
||||
units = ("B", "KB", "MB", "GB", "TB")
|
||||
value = float(size)
|
||||
for unit in units:
|
||||
if value < 1024 or unit == units[-1]:
|
||||
return f"{value:.0f}{unit}" if unit == "B" else f"{value:.1f}{unit}"
|
||||
value /= 1024
|
||||
return f"{size}B"
|
||||
Reference in New Issue
Block a user