feat: integrate MinIO-backed user filesystem
This commit is contained in:
@ -47,6 +47,12 @@ from beaver.tools.builtins import (
|
||||
SkillsListTool,
|
||||
TerminalTool,
|
||||
TodoTool,
|
||||
UserFilesCopyToWorkspaceTool,
|
||||
UserFilesListTool,
|
||||
UserFilesMkdirTool,
|
||||
UserFilesPublishOutputTool,
|
||||
UserFilesReadTool,
|
||||
UserFilesWriteTool,
|
||||
WebFetchTool,
|
||||
WebSearchTool,
|
||||
WriteFileTool,
|
||||
@ -222,6 +228,12 @@ class EngineLoader:
|
||||
ObjectBackedTool(SearchFilesTool()),
|
||||
ObjectBackedTool(WriteFileTool()),
|
||||
ObjectBackedTool(PatchFileTool()),
|
||||
ObjectBackedTool(UserFilesListTool()),
|
||||
ObjectBackedTool(UserFilesReadTool()),
|
||||
ObjectBackedTool(UserFilesWriteTool()),
|
||||
ObjectBackedTool(UserFilesMkdirTool()),
|
||||
ObjectBackedTool(UserFilesCopyToWorkspaceTool()),
|
||||
ObjectBackedTool(UserFilesPublishOutputTool()),
|
||||
ObjectBackedTool(WebFetchTool()),
|
||||
ObjectBackedTool(WebSearchTool()),
|
||||
ObjectBackedTool(TerminalTool()),
|
||||
|
||||
@ -621,11 +621,17 @@ class AgentLoop:
|
||||
"tool_registry": tool_registry,
|
||||
"skills_loader": skills_loader,
|
||||
"draft_service": getattr(loaded, "draft_service", None),
|
||||
"beaver_config": loaded.config,
|
||||
"task_id": task_id,
|
||||
"run_id": resolved_run_id,
|
||||
**self.runtime_services,
|
||||
},
|
||||
metadata={
|
||||
"source": source,
|
||||
"agent_name": self.profile.name,
|
||||
"session_id": resolved_session_id,
|
||||
"task_id": task_id,
|
||||
"run_id": resolved_run_id,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
@ -110,6 +111,12 @@ END;
|
||||
"""
|
||||
|
||||
|
||||
def _sqlite_journal_mode() -> str:
|
||||
requested = os.getenv("BEAVER_SQLITE_JOURNAL_MODE", "DELETE").strip().upper()
|
||||
allowed = {"DELETE", "TRUNCATE", "PERSIST", "MEMORY", "OFF", "WAL"}
|
||||
return requested if requested in allowed else "DELETE"
|
||||
|
||||
|
||||
class SessionStore:
|
||||
"""SQLite-backed session store."""
|
||||
|
||||
@ -119,7 +126,9 @@ class SessionStore:
|
||||
self._lock = threading.Lock()
|
||||
self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False, isolation_level=None)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._conn.execute("PRAGMA journal_mode=WAL")
|
||||
self._conn.execute("PRAGMA mmap_size=0")
|
||||
self._conn.execute("PRAGMA busy_timeout=5000")
|
||||
self._conn.execute(f"PRAGMA journal_mode={_sqlite_journal_mode()}")
|
||||
self._conn.execute("PRAGMA foreign_keys=ON")
|
||||
self._init_schema()
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@ from .schema import (
|
||||
)
|
||||
|
||||
LOCAL_MCP_CATEGORIES: dict[str, dict[str, str]] = {
|
||||
"local_filesystem_mcp": {"category": "filesystem", "display_name": "本地文件工具"},
|
||||
"local_filesystem_mcp": {"category": "filesystem", "display_name": "个人智能体文件系统工具"},
|
||||
"local_runtime_mcp": {"category": "runtime", "display_name": "本地运行工具"},
|
||||
"local_memory_mcp": {"category": "memory", "display_name": "本地记忆工具"},
|
||||
"local_skills_mcp": {"category": "skills", "display_name": "本地技能工具"},
|
||||
|
||||
@ -109,3 +109,15 @@ class AuthzClient:
|
||||
async def delete_outlook_settings(self, backend_id: str) -> dict[str, Any]:
|
||||
data = await self._request("DELETE", f"/backends/{backend_id}/settings/outlook")
|
||||
return data if isinstance(data, dict) else {}
|
||||
|
||||
async def get_minio_settings(self, backend_id: str) -> dict[str, Any]:
|
||||
data = await self._request("GET", f"/backends/{backend_id}/settings/minio")
|
||||
return data if isinstance(data, dict) else {}
|
||||
|
||||
async def set_minio_settings(self, backend_id: str, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
data = await self._request("POST", f"/backends/{backend_id}/settings/minio", json_body=payload)
|
||||
return data if isinstance(data, dict) else {}
|
||||
|
||||
async def delete_minio_settings(self, backend_id: str) -> dict[str, Any]:
|
||||
data = await self._request("DELETE", f"/backends/{backend_id}/settings/minio")
|
||||
return data if isinstance(data, dict) else {}
|
||||
|
||||
@ -27,12 +27,8 @@ from beaver.tools.builtins import (
|
||||
CronTool,
|
||||
DelegateTool,
|
||||
ExecuteCodeTool,
|
||||
ListDirectoryTool,
|
||||
MemoryTool,
|
||||
PatchFileTool,
|
||||
ProcessTool,
|
||||
ReadFileTool,
|
||||
SearchFilesTool,
|
||||
SendMessageTool,
|
||||
SkillManageTool,
|
||||
SkillViewTool,
|
||||
@ -40,6 +36,12 @@ from beaver.tools.builtins import (
|
||||
SpawnTool,
|
||||
TerminalTool,
|
||||
TodoTool,
|
||||
UserFilesCopyToWorkspaceTool,
|
||||
UserFilesListTool,
|
||||
UserFilesMkdirTool,
|
||||
UserFilesPublishOutputTool,
|
||||
UserFilesReadTool,
|
||||
UserFilesWriteTool,
|
||||
WebFetchTool,
|
||||
WebSearchTool,
|
||||
WriteFileTool,
|
||||
@ -47,7 +49,7 @@ from beaver.tools.builtins import (
|
||||
|
||||
|
||||
LOCAL_TOOL_CATEGORIES = {
|
||||
"filesystem": "Beaver Local Filesystem Tools",
|
||||
"filesystem": "Beaver Personal Agent Filesystem Tools",
|
||||
"runtime": "Beaver Local Runtime Tools",
|
||||
"memory": "Beaver Local Memory Tools",
|
||||
"skills": "Beaver Local Skills Tools",
|
||||
@ -84,11 +86,12 @@ def _category_tools(category: str, workspace: Path) -> tuple[list[BaseTool], Too
|
||||
|
||||
if category == "filesystem":
|
||||
tools: list[BaseTool] = [
|
||||
ObjectBackedTool(ListDirectoryTool()),
|
||||
ObjectBackedTool(ReadFileTool()),
|
||||
ObjectBackedTool(SearchFilesTool()),
|
||||
ObjectBackedTool(WriteFileTool()),
|
||||
ObjectBackedTool(PatchFileTool()),
|
||||
ObjectBackedTool(UserFilesListTool()),
|
||||
ObjectBackedTool(UserFilesReadTool()),
|
||||
ObjectBackedTool(UserFilesWriteTool()),
|
||||
ObjectBackedTool(UserFilesMkdirTool()),
|
||||
ObjectBackedTool(UserFilesCopyToWorkspaceTool()),
|
||||
ObjectBackedTool(UserFilesPublishOutputTool()),
|
||||
]
|
||||
elif category == "runtime":
|
||||
tools = [
|
||||
|
||||
@ -24,6 +24,19 @@ from beaver.integrations.mcp import MCPConnectionManager
|
||||
from beaver.services.agent_service import NOTIFICATION_SESSION_ID, AgentService
|
||||
from beaver.services.cron_service import CronService, schedule_from_api
|
||||
from beaver.services.skillhub_service import SkillHubService
|
||||
from beaver.services.user_files import (
|
||||
USER_FILE_ROOTS,
|
||||
UserFileError,
|
||||
UserFileNotFoundError,
|
||||
UserFilePathError,
|
||||
UserFileSizeError,
|
||||
UserFileService,
|
||||
)
|
||||
from beaver.services.user_file_resolver import (
|
||||
UserFileConfigurationError,
|
||||
UserFileStorageResolver,
|
||||
build_file_auth_context,
|
||||
)
|
||||
from beaver.skills.learning import SkillLearningWorker, SkillLearningWorkerConfig
|
||||
from beaver.skills.catalog.utils import parse_frontmatter
|
||||
|
||||
@ -306,6 +319,28 @@ def create_app(
|
||||
app.state.handoff_codes = {}
|
||||
app.state.auth_file = Path(os.getenv("BEAVER_AUTH_FILE") or "")
|
||||
max_file_size = 50 * 1024 * 1024
|
||||
max_user_file_upload_size = _int_env("BEAVER_USER_FILES_MAX_UPLOAD_BYTES", 5 * 1024 * 1024 * 1024)
|
||||
user_file_upload_part_size = _int_env("BEAVER_USER_FILES_UPLOAD_PART_SIZE", 10 * 1024 * 1024)
|
||||
|
||||
def _user_file_resolver(request: Request, authorization: str | None) -> UserFileStorageResolver:
|
||||
username = _require_web_user(app, authorization)
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
auth_context = build_file_auth_context(username=username, config=loaded.config)
|
||||
return UserFileStorageResolver(config=loaded.config, workspace=loaded.workspace, auth_context=auth_context)
|
||||
|
||||
async def _user_file_service(request: Request, authorization: str | None) -> UserFileService:
|
||||
return await _user_file_resolver(request, authorization).service()
|
||||
|
||||
def _user_file_http_error(exc: UserFileError) -> HTTPException:
|
||||
if isinstance(exc, UserFileNotFoundError):
|
||||
return HTTPException(status_code=404, detail=str(exc) or "File not found")
|
||||
if isinstance(exc, UserFilePathError):
|
||||
return HTTPException(status_code=400, detail=str(exc) or "Invalid path")
|
||||
if isinstance(exc, UserFileSizeError):
|
||||
return HTTPException(status_code=413, detail=str(exc) or "File too large")
|
||||
if isinstance(exc, UserFileConfigurationError):
|
||||
return HTTPException(status_code=503, detail=str(exc) or "User file storage is not configured")
|
||||
return HTTPException(status_code=400, detail=str(exc) or "User file operation failed")
|
||||
|
||||
@app.get("/api/ping", response_model=WebStatusResponse)
|
||||
async def ping(request: Request) -> WebStatusResponse:
|
||||
@ -747,6 +782,101 @@ def create_app(
|
||||
return {"ok": True}
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
@app.get("/api/user-files/status")
|
||||
async def user_files_status(
|
||||
request: Request,
|
||||
authorization: str | None = Header(default=None),
|
||||
) -> dict[str, Any]:
|
||||
return (await _user_file_resolver(request, authorization).status()).to_dict()
|
||||
|
||||
@app.get("/api/user-files/browse")
|
||||
async def browse_user_files(
|
||||
request: Request,
|
||||
path: str = "",
|
||||
authorization: str | None = Header(default=None),
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await (await _user_file_service(request, authorization)).browse(path)
|
||||
except UserFileError as exc:
|
||||
raise _user_file_http_error(exc) from exc
|
||||
|
||||
@app.get("/api/user-files/download")
|
||||
async def download_user_file(
|
||||
path: str,
|
||||
request: Request,
|
||||
authorization: str | None = Header(default=None),
|
||||
) -> Response:
|
||||
try:
|
||||
content = await (await _user_file_service(request, authorization)).download(path)
|
||||
except UserFileError as exc:
|
||||
raise _user_file_http_error(exc) from exc
|
||||
disposition = "inline" if content.content_type.startswith("image/") else "attachment"
|
||||
return Response(
|
||||
content=content.content,
|
||||
media_type=content.content_type,
|
||||
headers={"Content-Disposition": content_disposition(disposition, content.name)},
|
||||
)
|
||||
|
||||
@app.get("/api/user-files/preview")
|
||||
async def preview_user_file(
|
||||
path: str,
|
||||
request: Request,
|
||||
authorization: str | None = Header(default=None),
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await (await _user_file_service(request, authorization)).preview(path)
|
||||
except UserFileError as exc:
|
||||
raise _user_file_http_error(exc) from exc
|
||||
|
||||
@app.post("/api/user-files/upload")
|
||||
async def upload_user_file(
|
||||
request: Request,
|
||||
file: UploadFile = File(...),
|
||||
path: str = Form("uploads"),
|
||||
authorization: str | None = Header(default=None),
|
||||
) -> dict[str, Any]:
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="No filename provided")
|
||||
file_size = getattr(file, "size", None)
|
||||
if isinstance(file_size, int) and file_size > max_user_file_upload_size:
|
||||
raise HTTPException(status_code=413, detail=f"File too large (max {_human_upload_size(max_user_file_upload_size)})")
|
||||
try:
|
||||
return await (await _user_file_service(request, authorization)).upload_stream(
|
||||
path,
|
||||
file.filename,
|
||||
file.file,
|
||||
content_type=file.content_type or "application/octet-stream",
|
||||
max_bytes=max_user_file_upload_size,
|
||||
part_size=user_file_upload_part_size,
|
||||
)
|
||||
except UserFileError as exc:
|
||||
raise _user_file_http_error(exc) from exc
|
||||
|
||||
@app.delete("/api/user-files/delete")
|
||||
async def delete_user_file(
|
||||
path: str,
|
||||
request: Request,
|
||||
authorization: str | None = Header(default=None),
|
||||
) -> dict[str, bool]:
|
||||
try:
|
||||
removed = await (await _user_file_service(request, authorization)).delete(path)
|
||||
except UserFileError as exc:
|
||||
raise _user_file_http_error(exc) from exc
|
||||
if removed:
|
||||
return {"ok": True}
|
||||
raise HTTPException(status_code=404, detail="Path not found")
|
||||
|
||||
@app.post("/api/user-files/mkdir")
|
||||
async def create_user_file_directory(
|
||||
path: str,
|
||||
request: Request,
|
||||
authorization: str | None = Header(default=None),
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await (await _user_file_service(request, authorization)).mkdir(path)
|
||||
except UserFileError as exc:
|
||||
raise _user_file_http_error(exc) from exc
|
||||
|
||||
@app.get("/api/workspace/browse")
|
||||
async def browse_workspace_dir(request: Request, path: str = "") -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
@ -2576,6 +2706,27 @@ def _handoff_replay_window_seconds() -> int:
|
||||
return 15
|
||||
|
||||
|
||||
def _int_env(name: str, default: int) -> int:
|
||||
raw = os.getenv(name, "").strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
value = int(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
return value if value > 0 else default
|
||||
|
||||
|
||||
def _human_upload_size(size: int) -> str:
|
||||
units = ("B", "KB", "MB", "GB", "TB")
|
||||
value = float(size)
|
||||
for unit in units:
|
||||
if value < 1024 or unit == units[-1]:
|
||||
return f"{value:.0f}{unit}" if unit == "B" else f"{value:.1f}{unit}"
|
||||
value /= 1024
|
||||
return f"{size}B"
|
||||
|
||||
|
||||
def _prune_handoff_codes(app: FastAPI) -> None:
|
||||
now = time.time()
|
||||
replay_window = _handoff_replay_window_seconds()
|
||||
|
||||
201
app-instance/backend/beaver/services/user_file_resolver.py
Normal file
201
app-instance/backend/beaver/services/user_file_resolver.py
Normal file
@ -0,0 +1,201 @@
|
||||
"""Resolve the user-visible file system for web and agent callers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from beaver.foundation.config.schema import BeaverConfig
|
||||
|
||||
from .user_files import (
|
||||
LocalUserFileStorage,
|
||||
MinIOStorageConfig,
|
||||
MinIOUserFileStorage,
|
||||
USER_FILE_ROOTS,
|
||||
UserFileError,
|
||||
UserFileService,
|
||||
)
|
||||
|
||||
|
||||
class UserFileConfigurationError(UserFileError):
|
||||
"""Raised when user file storage is not configured for this backend."""
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class FileAuthContext:
|
||||
"""Authenticated identity used by the personal file system boundary."""
|
||||
|
||||
username: str
|
||||
backend_id: str
|
||||
storage_namespace: str
|
||||
user_id: str | None = None
|
||||
scopes: tuple[str, ...] = field(default_factory=tuple)
|
||||
auth_source: str = "beaver-web-token"
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFileStorageStatus:
|
||||
configured: bool
|
||||
storage_mode: str
|
||||
roots: list[str]
|
||||
workspace_visible: bool = False
|
||||
detail: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
payload: dict[str, Any] = {
|
||||
"configured": self.configured,
|
||||
"storage_mode": self.storage_mode,
|
||||
"roots": self.roots,
|
||||
"workspace_visible": self.workspace_visible,
|
||||
}
|
||||
if self.detail:
|
||||
payload["detail"] = self.detail
|
||||
return payload
|
||||
|
||||
|
||||
class UserFileStorageResolver:
|
||||
"""Build `UserFileService` from the current Beaver identity and config."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
config: BeaverConfig,
|
||||
workspace: Path,
|
||||
auth_context: FileAuthContext,
|
||||
) -> None:
|
||||
self.config = config
|
||||
self.workspace = Path(workspace)
|
||||
self.auth_context = auth_context
|
||||
|
||||
async def service(self) -> UserFileService:
|
||||
mode = _storage_mode(self.config)
|
||||
if mode == "local":
|
||||
return UserFileService(LocalUserFileStorage(self.workspace / "user_files"))
|
||||
settings = await self._load_minio_settings()
|
||||
return UserFileService(
|
||||
MinIOUserFileStorage(
|
||||
MinIOStorageConfig(
|
||||
endpoint=str(settings.get("endpoint") or ""),
|
||||
access_key=str(settings.get("access_key") or ""),
|
||||
secret_key=str(settings.get("secret_key") or ""),
|
||||
bucket=str(settings.get("bucket") or ""),
|
||||
secure=bool(settings.get("secure", False)),
|
||||
region=_clean_optional(settings.get("region")),
|
||||
namespace=str(settings.get("namespace") or self.auth_context.storage_namespace),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
async def status(self) -> UserFileStorageStatus:
|
||||
mode = _storage_mode(self.config)
|
||||
if mode == "local":
|
||||
return UserFileStorageStatus(
|
||||
configured=True,
|
||||
storage_mode="local",
|
||||
roots=list(USER_FILE_ROOTS),
|
||||
workspace_visible=False,
|
||||
)
|
||||
try:
|
||||
await self._load_minio_settings()
|
||||
except UserFileConfigurationError as exc:
|
||||
return UserFileStorageStatus(
|
||||
configured=False,
|
||||
storage_mode="object",
|
||||
roots=list(USER_FILE_ROOTS),
|
||||
workspace_visible=False,
|
||||
detail=str(exc),
|
||||
)
|
||||
return UserFileStorageStatus(
|
||||
configured=True,
|
||||
storage_mode="object",
|
||||
roots=list(USER_FILE_ROOTS),
|
||||
workspace_visible=False,
|
||||
)
|
||||
|
||||
async def _load_minio_settings(self) -> dict[str, Any]:
|
||||
backend_id = self.auth_context.backend_id.strip()
|
||||
if not backend_id:
|
||||
raise UserFileConfigurationError("User file storage backend identity is not configured")
|
||||
base_url = self.config.authz.base_url.strip()
|
||||
if not (self.config.authz.enabled and base_url):
|
||||
raise UserFileConfigurationError("AuthZ is required for deployed user file storage")
|
||||
token = (
|
||||
os.getenv("BEAVER_AUTHZ_INTERNAL_TOKEN", "").strip()
|
||||
or os.getenv("AUTHZ_INTERNAL_TOKEN", "").strip()
|
||||
)
|
||||
if not token:
|
||||
raise UserFileConfigurationError("AuthZ internal token is not configured for user file storage")
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=self.config.authz.request_timeout_seconds,
|
||||
follow_redirects=True,
|
||||
trust_env=False,
|
||||
) as client:
|
||||
response = await client.get(
|
||||
f"{base_url.rstrip('/')}/internal/backends/{backend_id}/settings/minio",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
except httpx.HTTPError as exc:
|
||||
raise UserFileConfigurationError(f"Unable to load user file storage settings: {exc}") from exc
|
||||
if response.status_code == 404:
|
||||
raise UserFileConfigurationError("MinIO user file storage is not configured")
|
||||
if response.is_error:
|
||||
raise UserFileConfigurationError(
|
||||
f"Unable to load user file storage settings: HTTP {response.status_code}"
|
||||
)
|
||||
payload = response.json()
|
||||
if not isinstance(payload, dict):
|
||||
raise UserFileConfigurationError("Invalid MinIO settings response")
|
||||
if not all(str(payload.get(key) or "").strip() for key in ("endpoint", "access_key", "secret_key", "bucket")):
|
||||
raise UserFileConfigurationError("MinIO user file storage settings are incomplete")
|
||||
payload.setdefault("namespace", self.auth_context.storage_namespace)
|
||||
return payload
|
||||
|
||||
|
||||
def build_file_auth_context(
|
||||
*,
|
||||
username: str,
|
||||
config: BeaverConfig,
|
||||
user_id: str | None = None,
|
||||
scopes: tuple[str, ...] = (),
|
||||
auth_source: str = "beaver-web-token",
|
||||
) -> FileAuthContext:
|
||||
backend_id = (
|
||||
config.backend_identity.backend_id.strip()
|
||||
or os.getenv("BEAVER_BACKEND_IDENTITY__BACKEND_ID", "").strip()
|
||||
or username.strip()
|
||||
)
|
||||
namespace = default_user_file_namespace(backend_id)
|
||||
return FileAuthContext(
|
||||
username=username.strip(),
|
||||
backend_id=backend_id,
|
||||
storage_namespace=namespace,
|
||||
user_id=user_id,
|
||||
scopes=scopes,
|
||||
auth_source=auth_source,
|
||||
)
|
||||
|
||||
|
||||
def default_user_file_namespace(backend_id: str) -> str:
|
||||
cleaned = backend_id.strip().strip("/")
|
||||
return f"users/{cleaned}" if cleaned else "users/unconfigured"
|
||||
|
||||
|
||||
def _storage_mode(config: BeaverConfig) -> str:
|
||||
raw = os.getenv("BEAVER_USER_FILES_STORAGE_MODE", "").strip().lower()
|
||||
if raw in {"local", "dev-local", "development"}:
|
||||
return "local"
|
||||
if raw in {"minio", "object", "object-storage"}:
|
||||
return "minio"
|
||||
if config.authz.enabled and config.authz.base_url.strip() and config.backend_identity.backend_id.strip():
|
||||
return "minio"
|
||||
return "local"
|
||||
|
||||
|
||||
def _clean_optional(value: Any) -> str | None:
|
||||
text = str(value or "").strip()
|
||||
return text or None
|
||||
630
app-instance/backend/beaver/services/user_files.py
Normal file
630
app-instance/backend/beaver/services/user_files.py
Normal file
@ -0,0 +1,630 @@
|
||||
"""User-visible file system service.
|
||||
|
||||
This module owns the personal file-system boundary exposed to users and
|
||||
agents. Storage backends can change, but callers see only virtual paths under
|
||||
fixed roots.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import suppress
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from io import BytesIO
|
||||
import mimetypes
|
||||
from pathlib import Path, PurePosixPath
|
||||
import shutil
|
||||
import tempfile
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
USER_FILE_ROOTS = ("uploads", "outputs", "shared", "tasks")
|
||||
MAX_PREVIEW_BYTES = 1024 * 1024
|
||||
AGENT_UPLOADS_ERROR = "uploads/ is user-provided input storage; agents may read it but must not write it"
|
||||
AGENT_DELETE_ERROR = "agents cannot delete user-visible files; use the Files page or user-side APIs"
|
||||
|
||||
|
||||
class UserFileError(ValueError):
|
||||
"""Base error for user file operations."""
|
||||
|
||||
|
||||
class UserFilePathError(UserFileError):
|
||||
"""Raised when a user file path violates the virtual path policy."""
|
||||
|
||||
|
||||
class UserFileNotFoundError(UserFileError):
|
||||
"""Raised when a user file path does not exist."""
|
||||
|
||||
|
||||
class UserFileSizeError(UserFileError):
|
||||
"""Raised when a user file upload exceeds configured limits."""
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class AgentUserFilePolicy:
|
||||
task_id: str | None = None
|
||||
fallback_scope: str = "interactive"
|
||||
|
||||
@property
|
||||
def task_namespace(self) -> str:
|
||||
if self.task_id:
|
||||
return f"tasks/{self.task_id}"
|
||||
scope = _safe_scope(self.fallback_scope)
|
||||
return f"tasks/interactive/{scope}"
|
||||
|
||||
def validate_read(self, path: str) -> str:
|
||||
return normalize_user_path(path, allow_root=False)
|
||||
|
||||
def validate_write(self, path: str) -> str:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
root = normalized.split("/", 1)[0]
|
||||
if root == "uploads":
|
||||
raise UserFilePathError(AGENT_UPLOADS_ERROR)
|
||||
if root == "tasks":
|
||||
self._validate_task_namespace(normalized)
|
||||
return normalized
|
||||
|
||||
def validate_mkdir(self, path: str) -> str:
|
||||
return self.validate_write(path)
|
||||
|
||||
def validate_delete(self, path: str) -> str:
|
||||
normalize_user_path(path, allow_root=False)
|
||||
raise UserFilePathError(AGENT_DELETE_ERROR)
|
||||
|
||||
def _validate_task_namespace(self, normalized: str) -> None:
|
||||
namespace = self.task_namespace
|
||||
if normalized == "tasks" or not normalized.startswith(f"{namespace}/"):
|
||||
raise UserFilePathError(f"Agent task files must be written under {namespace}/")
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFileEntry:
|
||||
name: str
|
||||
path: str
|
||||
type: str
|
||||
size: int | None = None
|
||||
content_type: str | None = None
|
||||
modified: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, object]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"path": self.path,
|
||||
"type": self.type,
|
||||
"size": self.size,
|
||||
"content_type": self.content_type,
|
||||
"modified": self.modified,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFileContent:
|
||||
name: str
|
||||
path: str
|
||||
size: int
|
||||
content_type: str
|
||||
modified: str | None
|
||||
content: bytes
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilePreview:
|
||||
name: str
|
||||
path: str
|
||||
size: int
|
||||
content_type: str
|
||||
modified: str | None
|
||||
is_binary: bool
|
||||
is_truncated: bool
|
||||
content: str | None
|
||||
|
||||
def to_dict(self) -> dict[str, object]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"path": self.path,
|
||||
"size": self.size,
|
||||
"content_type": self.content_type,
|
||||
"modified": self.modified,
|
||||
"is_binary": self.is_binary,
|
||||
"is_truncated": self.is_truncated,
|
||||
"content": self.content,
|
||||
}
|
||||
|
||||
|
||||
class UserFileStorage(Protocol):
|
||||
async def list_dir(self, path: str) -> list[UserFileEntry]:
|
||||
...
|
||||
|
||||
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
|
||||
...
|
||||
|
||||
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
|
||||
...
|
||||
|
||||
async def write_file_stream(
|
||||
self,
|
||||
path: str,
|
||||
stream: object,
|
||||
*,
|
||||
content_type: str,
|
||||
max_bytes: int | None = None,
|
||||
part_size: int = 10 * 1024 * 1024,
|
||||
) -> UserFileEntry:
|
||||
...
|
||||
|
||||
async def delete_path(self, path: str) -> bool:
|
||||
...
|
||||
|
||||
async def mkdir(self, path: str) -> UserFileEntry:
|
||||
...
|
||||
|
||||
|
||||
class UserFileService:
|
||||
def __init__(self, storage: UserFileStorage) -> None:
|
||||
self.storage = storage
|
||||
|
||||
async def browse(self, path: str = "") -> dict[str, object]:
|
||||
normalized = normalize_user_path(path, allow_root=True)
|
||||
if normalized == "":
|
||||
return {
|
||||
"path": "",
|
||||
"items": [
|
||||
UserFileEntry(name=root, path=root, type="directory").to_dict()
|
||||
for root in USER_FILE_ROOTS
|
||||
],
|
||||
}
|
||||
entries = await self.storage.list_dir(normalized)
|
||||
return {"path": normalized, "items": [entry.to_dict() for entry in entries]}
|
||||
|
||||
async def upload(self, directory: str, filename: str, content: bytes, *, content_type: str) -> dict[str, object]:
|
||||
if not is_safe_filename(filename):
|
||||
raise UserFilePathError("Invalid filename")
|
||||
target = normalize_user_path(_join_user_path(directory, filename), allow_root=False)
|
||||
return (await self.storage.write_file(target, content, content_type=content_type)).to_dict()
|
||||
|
||||
async def upload_stream(
|
||||
self,
|
||||
directory: str,
|
||||
filename: str,
|
||||
stream: object,
|
||||
*,
|
||||
content_type: str,
|
||||
max_bytes: int | None = None,
|
||||
part_size: int = 10 * 1024 * 1024,
|
||||
) -> dict[str, object]:
|
||||
if not is_safe_filename(filename):
|
||||
raise UserFilePathError("Invalid filename")
|
||||
target = normalize_user_path(_join_user_path(directory, filename), allow_root=False)
|
||||
return (
|
||||
await self.storage.write_file_stream(
|
||||
target,
|
||||
stream,
|
||||
content_type=content_type,
|
||||
max_bytes=max_bytes,
|
||||
part_size=part_size,
|
||||
)
|
||||
).to_dict()
|
||||
|
||||
async def write_file(self, path: str, content: bytes | str, *, content_type: str = "text/plain") -> dict[str, object]:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
raw = content.encode("utf-8") if isinstance(content, str) else bytes(content)
|
||||
return (await self.storage.write_file(normalized, raw, content_type=content_type)).to_dict()
|
||||
|
||||
async def download(self, path: str) -> UserFileContent:
|
||||
return await self.storage.read_file(normalize_user_path(path, allow_root=False))
|
||||
|
||||
async def preview(self, path: str, *, max_bytes: int = MAX_PREVIEW_BYTES) -> dict[str, object]:
|
||||
content = await self.storage.read_file(normalize_user_path(path, allow_root=False), max_bytes=max_bytes)
|
||||
is_binary = _is_probably_binary(content.content, content.content_type)
|
||||
text = None if is_binary else content.content.decode("utf-8", errors="replace")
|
||||
return UserFilePreview(
|
||||
name=content.name,
|
||||
path=content.path,
|
||||
size=content.size,
|
||||
content_type=content.content_type,
|
||||
modified=content.modified,
|
||||
is_binary=is_binary,
|
||||
is_truncated=content.size > len(content.content),
|
||||
content=text,
|
||||
).to_dict()
|
||||
|
||||
async def delete(self, path: str) -> bool:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
if normalized in USER_FILE_ROOTS:
|
||||
raise UserFilePathError("Cannot delete virtual root folders")
|
||||
return await self.storage.delete_path(normalized)
|
||||
|
||||
async def mkdir(self, path: str) -> dict[str, object]:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
if normalized in USER_FILE_ROOTS:
|
||||
raise UserFilePathError("Virtual root folders already exist")
|
||||
return (await self.storage.mkdir(normalized)).to_dict()
|
||||
|
||||
|
||||
class LocalUserFileStorage:
|
||||
"""Filesystem-backed storage adapter for tests and local development."""
|
||||
|
||||
def __init__(self, root: Path) -> None:
|
||||
self.root = Path(root).expanduser().resolve()
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
for name in USER_FILE_ROOTS:
|
||||
(self.root / name).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
async def list_dir(self, path: str) -> list[UserFileEntry]:
|
||||
target = self._path(path)
|
||||
if not target.exists():
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
if not target.is_dir():
|
||||
raise UserFilePathError("Path is not a directory")
|
||||
entries: list[UserFileEntry] = []
|
||||
for child in sorted(target.iterdir(), key=lambda item: (not item.is_dir(), item.name.lower())):
|
||||
if child.name.startswith("."):
|
||||
continue
|
||||
entries.append(self._entry(child))
|
||||
return entries
|
||||
|
||||
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
|
||||
target = self._path(path)
|
||||
if not target.is_file():
|
||||
raise UserFileNotFoundError("File not found")
|
||||
raw = target.read_bytes()
|
||||
selected = raw[:max_bytes] if max_bytes is not None else raw
|
||||
stat = target.stat()
|
||||
content_type, _ = mimetypes.guess_type(target.name)
|
||||
return UserFileContent(
|
||||
name=target.name,
|
||||
path=self._relative(target),
|
||||
size=stat.st_size,
|
||||
content_type=content_type or "application/octet-stream",
|
||||
modified=_iso_from_timestamp(stat.st_mtime),
|
||||
content=selected,
|
||||
)
|
||||
|
||||
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
|
||||
target = self._path(path)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_bytes(content)
|
||||
return self._entry(target, content_type=content_type)
|
||||
|
||||
async def write_file_stream(
|
||||
self,
|
||||
path: str,
|
||||
stream: object,
|
||||
*,
|
||||
content_type: str,
|
||||
max_bytes: int | None = None,
|
||||
part_size: int = 10 * 1024 * 1024,
|
||||
) -> UserFileEntry:
|
||||
target = self._path(path)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_name = tempfile.mkstemp(prefix=f".{target.name}.", suffix=".tmp", dir=target.parent)
|
||||
tmp_path = Path(tmp_name)
|
||||
total = 0
|
||||
try:
|
||||
with open(fd, "wb", closefd=True) as output:
|
||||
while True:
|
||||
chunk = stream.read(part_size) # type: ignore[attr-defined]
|
||||
if not chunk:
|
||||
break
|
||||
total += len(chunk)
|
||||
if max_bytes is not None and total > max_bytes:
|
||||
raise UserFileSizeError(_size_error(max_bytes))
|
||||
output.write(chunk)
|
||||
tmp_path.replace(target)
|
||||
except Exception:
|
||||
with suppress(FileNotFoundError):
|
||||
tmp_path.unlink()
|
||||
raise
|
||||
return self._entry(target, content_type=content_type)
|
||||
|
||||
async def delete_path(self, path: str) -> bool:
|
||||
target = self._path(path)
|
||||
if not target.exists():
|
||||
return False
|
||||
if target.is_dir():
|
||||
shutil.rmtree(target)
|
||||
else:
|
||||
target.unlink()
|
||||
return True
|
||||
|
||||
async def mkdir(self, path: str) -> UserFileEntry:
|
||||
target = self._path(path)
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
return self._entry(target)
|
||||
|
||||
def _path(self, path: str) -> Path:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
target = (self.root / normalized).resolve()
|
||||
try:
|
||||
target.relative_to(self.root)
|
||||
except ValueError as exc:
|
||||
raise UserFilePathError("Path escapes user file root") from exc
|
||||
return target
|
||||
|
||||
def _relative(self, path: Path) -> str:
|
||||
return path.relative_to(self.root).as_posix()
|
||||
|
||||
def _entry(self, path: Path, *, content_type: str | None = None) -> UserFileEntry:
|
||||
stat = path.stat()
|
||||
guessed_type, _ = mimetypes.guess_type(path.name)
|
||||
return UserFileEntry(
|
||||
name=path.name,
|
||||
path=self._relative(path),
|
||||
type="directory" if path.is_dir() else "file",
|
||||
size=None if path.is_dir() else stat.st_size,
|
||||
content_type=None if path.is_dir() else (content_type or guessed_type or "application/octet-stream"),
|
||||
modified=_iso_from_timestamp(stat.st_mtime),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class MinIOStorageConfig:
|
||||
endpoint: str
|
||||
access_key: str
|
||||
secret_key: str
|
||||
bucket: str
|
||||
secure: bool = False
|
||||
region: str | None = None
|
||||
namespace: str = ""
|
||||
|
||||
|
||||
class MinIOUserFileStorage:
|
||||
"""MinIO-backed user file storage adapter."""
|
||||
|
||||
def __init__(self, config: MinIOStorageConfig) -> None:
|
||||
if not config.endpoint or not config.access_key or not config.secret_key or not config.bucket:
|
||||
raise ValueError("MinIO storage requires endpoint, access key, secret key, and bucket")
|
||||
from minio import Minio
|
||||
|
||||
self.config = config
|
||||
self.client = Minio(
|
||||
endpoint=config.endpoint,
|
||||
access_key=config.access_key,
|
||||
secret_key=config.secret_key,
|
||||
secure=config.secure,
|
||||
region=config.region,
|
||||
)
|
||||
|
||||
async def list_dir(self, path: str) -> list[UserFileEntry]:
|
||||
prefix = self._object_prefix(path)
|
||||
objects = self.client.list_objects(self.config.bucket, prefix=prefix, recursive=False)
|
||||
entries: list[UserFileEntry] = []
|
||||
for obj in objects:
|
||||
object_name = str(obj.object_name or "")
|
||||
user_path = self._user_path(object_name)
|
||||
if not user_path or user_path == path or user_path.endswith("/.keep"):
|
||||
continue
|
||||
trimmed = user_path.rstrip("/")
|
||||
name = PurePosixPath(trimmed).name
|
||||
is_dir = bool(getattr(obj, "is_dir", False)) or object_name.endswith("/")
|
||||
entries.append(
|
||||
UserFileEntry(
|
||||
name=name,
|
||||
path=trimmed,
|
||||
type="directory" if is_dir else "file",
|
||||
size=None if is_dir else getattr(obj, "size", None),
|
||||
content_type=None if is_dir else "application/octet-stream",
|
||||
modified=obj.last_modified.isoformat() if getattr(obj, "last_modified", None) else None,
|
||||
)
|
||||
)
|
||||
return sorted(entries, key=lambda item: (item.type != "directory", item.name.lower()))
|
||||
|
||||
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
|
||||
object_name = self._object_name(path)
|
||||
try:
|
||||
stat = self.client.stat_object(self.config.bucket, object_name)
|
||||
if max_bytes is None:
|
||||
response = self.client.get_object(self.config.bucket, object_name)
|
||||
else:
|
||||
response = self.client.get_object(self.config.bucket, object_name, length=max_bytes)
|
||||
raw = response.read()
|
||||
response.close()
|
||||
response.release_conn()
|
||||
except Exception as exc:
|
||||
raise UserFileNotFoundError("File not found") from exc
|
||||
return UserFileContent(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
size=int(stat.size or len(raw)),
|
||||
content_type=stat.content_type or "application/octet-stream",
|
||||
modified=stat.last_modified.isoformat() if stat.last_modified else None,
|
||||
content=raw,
|
||||
)
|
||||
|
||||
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
|
||||
object_name = self._object_name(path)
|
||||
result = self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
BytesIO(content),
|
||||
length=len(content),
|
||||
content_type=content_type,
|
||||
)
|
||||
return UserFileEntry(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
type="file",
|
||||
size=len(content),
|
||||
content_type=content_type,
|
||||
modified=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
async def write_file_stream(
|
||||
self,
|
||||
path: str,
|
||||
stream: object,
|
||||
*,
|
||||
content_type: str,
|
||||
max_bytes: int | None = None,
|
||||
part_size: int = 10 * 1024 * 1024,
|
||||
) -> UserFileEntry:
|
||||
object_name = self._object_name(path)
|
||||
reader = _LimitedReadStream(stream, max_bytes=max_bytes)
|
||||
try:
|
||||
self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
reader,
|
||||
length=-1,
|
||||
part_size=max(5 * 1024 * 1024, part_size),
|
||||
content_type=content_type,
|
||||
)
|
||||
except UserFileSizeError:
|
||||
try:
|
||||
self.client.remove_object(self.config.bucket, object_name)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
return UserFileEntry(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
type="file",
|
||||
size=reader.bytes_read,
|
||||
content_type=content_type,
|
||||
modified=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
async def delete_path(self, path: str) -> bool:
|
||||
object_name = self._object_name(path)
|
||||
removed = False
|
||||
try:
|
||||
self.client.remove_object(self.config.bucket, object_name)
|
||||
removed = True
|
||||
except Exception:
|
||||
pass
|
||||
prefix = f"{object_name.rstrip('/')}/"
|
||||
for obj in self.client.list_objects(self.config.bucket, prefix=prefix, recursive=True):
|
||||
self.client.remove_object(self.config.bucket, str(obj.object_name))
|
||||
removed = True
|
||||
return removed
|
||||
|
||||
async def mkdir(self, path: str) -> UserFileEntry:
|
||||
object_name = f"{self._object_name(path).rstrip('/')}/.keep"
|
||||
self.client.put_object(
|
||||
self.config.bucket,
|
||||
object_name,
|
||||
BytesIO(b""),
|
||||
length=0,
|
||||
content_type="application/x-directory",
|
||||
)
|
||||
return UserFileEntry(
|
||||
name=PurePosixPath(path).name,
|
||||
path=path,
|
||||
type="directory",
|
||||
size=None,
|
||||
modified=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
def _namespace(self) -> str:
|
||||
return self.config.namespace.strip("/")
|
||||
|
||||
def _object_name(self, path: str) -> str:
|
||||
normalized = normalize_user_path(path, allow_root=False)
|
||||
namespace = self._namespace()
|
||||
object_name = f"{namespace}/{normalized}" if namespace else normalized
|
||||
if object_name.startswith("/") or "/../" in f"/{object_name}/":
|
||||
raise UserFilePathError("Object path escapes namespace")
|
||||
return object_name
|
||||
|
||||
def _object_prefix(self, path: str) -> str:
|
||||
return f"{self._object_name(path).rstrip('/')}/"
|
||||
|
||||
def _user_path(self, object_name: str) -> str:
|
||||
namespace = self._namespace()
|
||||
if namespace:
|
||||
prefix = f"{namespace}/"
|
||||
if not object_name.startswith(prefix):
|
||||
raise UserFilePathError("Object path escapes namespace")
|
||||
return object_name[len(prefix) :]
|
||||
return object_name
|
||||
|
||||
|
||||
def normalize_user_path(path: str | None, *, allow_root: bool) -> str:
|
||||
original = (path or "").replace("\\", "/").strip()
|
||||
if original.startswith("/"):
|
||||
raise UserFilePathError("Absolute paths are not allowed")
|
||||
raw = original.strip("/")
|
||||
if raw == "":
|
||||
if allow_root:
|
||||
return ""
|
||||
raise UserFilePathError("Path is required")
|
||||
posix = PurePosixPath(raw)
|
||||
if posix.is_absolute():
|
||||
raise UserFilePathError("Absolute paths are not allowed")
|
||||
parts = [part for part in posix.parts if part not in ("", ".")]
|
||||
if any(part == ".." for part in parts):
|
||||
raise UserFilePathError("Parent-directory traversal is not allowed")
|
||||
if any(part.startswith(".") for part in parts):
|
||||
raise UserFilePathError("Hidden implementation paths are not allowed")
|
||||
if not parts or parts[0] not in USER_FILE_ROOTS:
|
||||
raise UserFilePathError("Path must be under uploads, outputs, shared, or tasks")
|
||||
return "/".join(parts)
|
||||
|
||||
|
||||
def is_safe_filename(filename: str) -> bool:
|
||||
return bool(filename) and "/" not in filename and "\\" not in filename and not filename.startswith(".")
|
||||
|
||||
|
||||
def _join_user_path(directory: str, filename: str) -> str:
|
||||
normalized_dir = normalize_user_path(directory, allow_root=False)
|
||||
return f"{normalized_dir.rstrip('/')}/{filename}"
|
||||
|
||||
|
||||
def _is_probably_binary(raw: bytes, content_type: str) -> bool:
|
||||
if content_type.startswith("text/") or content_type in {
|
||||
"application/json",
|
||||
"application/javascript",
|
||||
"application/xml",
|
||||
"application/x-yaml",
|
||||
}:
|
||||
return False
|
||||
if not raw:
|
||||
return False
|
||||
if b"\x00" in raw[:4096]:
|
||||
return True
|
||||
try:
|
||||
raw[:4096].decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _iso_from_timestamp(value: float) -> str:
|
||||
return datetime.fromtimestamp(value, tz=timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _safe_scope(value: str | None) -> str:
|
||||
raw = (value or "interactive").strip()
|
||||
allowed = [char if char.isalnum() or char in ("-", "_") else "-" for char in raw]
|
||||
cleaned = "".join(allowed).strip("-_")
|
||||
return cleaned or "interactive"
|
||||
|
||||
|
||||
class _LimitedReadStream:
|
||||
def __init__(self, stream: object, *, max_bytes: int | None = None) -> None:
|
||||
self.stream = stream
|
||||
self.max_bytes = max_bytes
|
||||
self.bytes_read = 0
|
||||
|
||||
def read(self, size: int = -1) -> bytes:
|
||||
chunk = self.stream.read(size) # type: ignore[attr-defined]
|
||||
if not chunk:
|
||||
return b""
|
||||
self.bytes_read += len(chunk)
|
||||
if self.max_bytes is not None and self.bytes_read > self.max_bytes:
|
||||
raise UserFileSizeError(_size_error(self.max_bytes))
|
||||
return chunk
|
||||
|
||||
|
||||
def _size_error(max_bytes: int) -> str:
|
||||
return f"File too large (max {_human_size(max_bytes)})"
|
||||
|
||||
|
||||
def _human_size(size: int) -> str:
|
||||
units = ("B", "KB", "MB", "GB", "TB")
|
||||
value = float(size)
|
||||
for unit in units:
|
||||
if value < 1024 or unit == units[-1]:
|
||||
return f"{value:.0f}{unit}" if unit == "B" else f"{value:.1f}{unit}"
|
||||
value /= 1024
|
||||
return f"{size}B"
|
||||
@ -180,8 +180,10 @@ class ObjectBackedTool(BaseTool):
|
||||
|
||||
if "current_session_id" not in arguments and hasattr(self.backend, "current_session_id"):
|
||||
arguments["current_session_id"] = context.session_id
|
||||
if "workspace" not in arguments and hasattr(self.backend, "workspace"):
|
||||
if "workspace" not in arguments and (hasattr(self.backend, "workspace") or self._backend_accepts_argument("workspace")):
|
||||
arguments["workspace"] = context.workspace
|
||||
if "services" not in arguments and self._backend_accepts_argument("services"):
|
||||
arguments["services"] = context.services
|
||||
if "metadata" not in arguments and self._backend_accepts_argument("metadata"):
|
||||
arguments["metadata"] = context.metadata
|
||||
|
||||
|
||||
@ -9,6 +9,15 @@ from .skill_view import SkillViewTool, skill_view
|
||||
from .session_search import SessionSearchTool, session_search
|
||||
from .terminal import ExecuteCodeTool, ProcessTool, TerminalTool
|
||||
from .utility import ClarifyTool, DelegateTool, SendMessageTool, SpawnTool, TodoTool
|
||||
from .user_files import (
|
||||
UserFilesCopyToWorkspaceTool,
|
||||
UserFilesDeleteTool,
|
||||
UserFilesListTool,
|
||||
UserFilesMkdirTool,
|
||||
UserFilesPublishOutputTool,
|
||||
UserFilesReadTool,
|
||||
UserFilesWriteTool,
|
||||
)
|
||||
from .web import WebFetchTool, WebSearchTool
|
||||
|
||||
__all__ = [
|
||||
@ -30,6 +39,13 @@ __all__ = [
|
||||
"SessionSearchTool",
|
||||
"TerminalTool",
|
||||
"TodoTool",
|
||||
"UserFilesCopyToWorkspaceTool",
|
||||
"UserFilesDeleteTool",
|
||||
"UserFilesListTool",
|
||||
"UserFilesMkdirTool",
|
||||
"UserFilesPublishOutputTool",
|
||||
"UserFilesReadTool",
|
||||
"UserFilesWriteTool",
|
||||
"ClarifyTool",
|
||||
"WebFetchTool",
|
||||
"WebSearchTool",
|
||||
|
||||
@ -14,7 +14,7 @@ from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
from pathlib import Path
|
||||
from pathlib import Path, PurePosixPath
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
@ -24,6 +24,7 @@ MAX_READ_CHARS = 120_000
|
||||
MAX_SEARCH_RESULTS = 200
|
||||
MAX_SEARCH_FILE_BYTES = 2_000_000
|
||||
MAX_SEARCH_FILES = 5_000
|
||||
USER_FILE_VIRTUAL_ROOTS = {"uploads", "outputs", "shared", "tasks"}
|
||||
SKIP_DIR_NAMES = {
|
||||
".git",
|
||||
".hg",
|
||||
@ -161,9 +162,28 @@ def _workspace_root(workspace: str | None) -> Path:
|
||||
return root
|
||||
|
||||
|
||||
def _virtual_user_file_error(user_path: str | None) -> str | None:
|
||||
raw = str(user_path or ".").replace("\\", "/").strip()
|
||||
if not raw or raw in {".", "./"}:
|
||||
return None
|
||||
try:
|
||||
parts = [part for part in PurePosixPath(raw.strip("/")).parts if part not in ("", ".")]
|
||||
except TypeError:
|
||||
return None
|
||||
if parts and parts[0] in USER_FILE_VIRTUAL_ROOTS:
|
||||
return (
|
||||
f"{user_path} is a personal agent file system path, not a workspace path. "
|
||||
"Use user_files_read or user_files_copy_to_workspace for reads; use "
|
||||
"user_files_write for shared/tasks files or user_files_publish_output for outputs."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_existing_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
|
||||
"""Resolve a user path and ensure the real target stays inside workspace."""
|
||||
|
||||
if error := _virtual_user_file_error(user_path):
|
||||
raise WorkspacePathError(error)
|
||||
root = _workspace_root(workspace)
|
||||
raw_path = Path(user_path or ".").expanduser()
|
||||
candidate = raw_path if raw_path.is_absolute() else root / raw_path
|
||||
@ -178,6 +198,8 @@ def _resolve_existing_path(workspace: str | None, user_path: str | None) -> tupl
|
||||
|
||||
|
||||
def _resolve_writable_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
|
||||
if error := _virtual_user_file_error(user_path):
|
||||
raise WorkspacePathError(error)
|
||||
root = _workspace_root(workspace)
|
||||
if not user_path or not str(user_path).strip():
|
||||
raise WorkspacePathError("path is required")
|
||||
|
||||
389
app-instance/backend/beaver/tools/builtins/user_files.py
Normal file
389
app-instance/backend/beaver/tools/builtins/user_files.py
Normal file
@ -0,0 +1,389 @@
|
||||
"""Agent-facing tools for the user-visible file system."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from beaver.foundation.config.loader import load_config
|
||||
from beaver.services.user_file_resolver import UserFileStorageResolver, build_file_auth_context
|
||||
from beaver.services.user_files import AgentUserFilePolicy, UserFileError, UserFilePathError, UserFileService
|
||||
|
||||
|
||||
MAX_WORKSPACE_STAGE_BYTES = 50 * 1024 * 1024
|
||||
|
||||
|
||||
def _json_result(success: bool, **payload: Any) -> str:
|
||||
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def _service(workspace: str | None, services: dict[str, Any] | None = None) -> UserFileService:
|
||||
if not workspace:
|
||||
raise UserFileError("workspace is not configured for user file tools")
|
||||
config = (services or {}).get("beaver_config")
|
||||
if config is None:
|
||||
config = load_config(workspace=workspace)
|
||||
backend_id = config.backend_identity.backend_id.strip() or config.backend_identity.client_id.strip() or "agent"
|
||||
auth_context = build_file_auth_context(
|
||||
username=backend_id,
|
||||
config=config,
|
||||
user_id=(services or {}).get("user_id"),
|
||||
auth_source="beaver-agent-runtime",
|
||||
)
|
||||
return await UserFileStorageResolver(
|
||||
config=config,
|
||||
workspace=Path(workspace),
|
||||
auth_context=auth_context,
|
||||
).service()
|
||||
|
||||
|
||||
def _agent_policy(services: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None) -> AgentUserFilePolicy:
|
||||
payload = services or {}
|
||||
meta = metadata or {}
|
||||
task_id = str(payload.get("task_id") or meta.get("task_id") or "").strip() or None
|
||||
fallback = str(payload.get("run_id") or meta.get("run_id") or meta.get("session_id") or "interactive")
|
||||
return AgentUserFilePolicy(task_id=task_id, fallback_scope=fallback)
|
||||
|
||||
|
||||
def _workspace_root(workspace: str | None) -> Path:
|
||||
if not workspace:
|
||||
raise UserFilePathError("workspace is not configured for user file tools")
|
||||
root = Path(workspace).expanduser().resolve()
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
return root
|
||||
|
||||
|
||||
def _resolve_workspace_source(workspace: str | None, source_path: str) -> tuple[Path, Path]:
|
||||
root = _workspace_root(workspace)
|
||||
if not source_path or not str(source_path).strip():
|
||||
raise UserFilePathError("source_path is required")
|
||||
raw = Path(str(source_path)).expanduser()
|
||||
candidate = raw if raw.is_absolute() else root / raw
|
||||
resolved = candidate.resolve(strict=True)
|
||||
try:
|
||||
resolved.relative_to(root)
|
||||
except ValueError as exc:
|
||||
raise UserFilePathError("source_path escapes workspace") from exc
|
||||
if not resolved.is_file():
|
||||
raise UserFilePathError("source_path must be a file")
|
||||
return root, resolved
|
||||
|
||||
|
||||
def _resolve_workspace_destination(workspace: str | None, target_path: str) -> tuple[Path, Path]:
|
||||
root = _workspace_root(workspace)
|
||||
if not target_path or not str(target_path).strip():
|
||||
raise UserFilePathError("workspace_path is required")
|
||||
raw = Path(str(target_path)).expanduser()
|
||||
if raw.is_absolute():
|
||||
raise UserFilePathError("workspace_path must be relative")
|
||||
candidate = (root / raw).resolve()
|
||||
try:
|
||||
candidate.relative_to(root)
|
||||
except ValueError as exc:
|
||||
raise UserFilePathError("workspace_path escapes workspace") from exc
|
||||
return root, candidate
|
||||
|
||||
|
||||
def _relative_path(root: Path, path: Path) -> str:
|
||||
return path.relative_to(root).as_posix()
|
||||
|
||||
|
||||
USER_FILES_LIST_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"description": "User file path under uploads, outputs, shared, or tasks. Empty path lists the virtual roots.",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
USER_FILES_READ_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {"type": "string", "description": "User file path to read."},
|
||||
"max_bytes": {
|
||||
"type": "integer",
|
||||
"default": 120000,
|
||||
"minimum": 1,
|
||||
"maximum": 1000000,
|
||||
"description": "Maximum bytes to return in model context.",
|
||||
},
|
||||
},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
USER_FILES_WRITE_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {"type": "string", "description": "User file path to create or replace."},
|
||||
"content": {"type": "string", "description": "Text content to write."},
|
||||
"content_type": {"type": "string", "default": "text/plain"},
|
||||
},
|
||||
"required": ["path", "content"],
|
||||
}
|
||||
|
||||
USER_FILES_DELETE_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {"path": {"type": "string", "description": "User file or directory path to delete."}},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
USER_FILES_MKDIR_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {"path": {"type": "string", "description": "User file directory path to create."}},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
USER_FILES_COPY_TO_WORKSPACE_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Readable user file path under uploads, outputs, shared, or an authorized tasks namespace.",
|
||||
},
|
||||
"workspace_path": {
|
||||
"type": "string",
|
||||
"description": "Optional relative workspace destination. Defaults to user-files/tasks/{task_id}/<filename> or user-files/runs/<scope>/<filename>.",
|
||||
},
|
||||
},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
USER_FILES_PUBLISH_OUTPUT_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"source_path": {
|
||||
"type": "string",
|
||||
"description": "Workspace file path to publish. Absolute paths are allowed only if they stay inside the workspace.",
|
||||
},
|
||||
"target_path": {
|
||||
"type": "string",
|
||||
"description": "Output path under outputs/, such as outputs/report.md.",
|
||||
},
|
||||
"content_type": {
|
||||
"type": "string",
|
||||
"description": "Optional content type. If omitted, Beaver guesses from the target filename.",
|
||||
},
|
||||
},
|
||||
"required": ["source_path", "target_path"],
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesListTool:
|
||||
name: str = "user_files_list"
|
||||
description: str = (
|
||||
"List files and folders in the personal agent file system. Use the virtual roots only: "
|
||||
"uploads for files the user provides to the agent, outputs for agent-generated results, "
|
||||
"shared for reusable user/agent reference material, and tasks for files bound to a specific task. "
|
||||
"An empty path lists the four roots; this tool never exposes MinIO buckets, credentials, or internal workspace paths."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = True
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_LIST_PARAMETERS))
|
||||
|
||||
async def execute(self, *, path: str = "", workspace: str | None = None, services: dict[str, Any] | None = None) -> str:
|
||||
try:
|
||||
return _json_result(True, **await (await _service(workspace, services)).browse(path))
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesReadTool:
|
||||
name: str = "user_files_read"
|
||||
description: str = (
|
||||
"Read a bounded text preview from the personal agent file system. Use this to inspect user-provided "
|
||||
"files in uploads, long-lived shared material in shared, task files in tasks, or generated outputs in outputs. "
|
||||
"The path must stay under uploads, outputs, shared, or tasks; internal workspace and MinIO implementation paths are hidden."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = True
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_READ_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str,
|
||||
max_bytes: int = 120000,
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
path = _agent_policy(services, metadata).validate_read(path)
|
||||
limit = max(1, min(int(max_bytes), 1_000_000))
|
||||
return _json_result(True, **await (await _service(workspace, services)).preview(path, max_bytes=limit))
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesWriteTool:
|
||||
name: str = "user_files_write"
|
||||
description: str = (
|
||||
"Create or replace a text file in the personal agent file system. Store agent-generated deliverables "
|
||||
"under outputs, reusable long-lived context under shared, and task-bound files under the current "
|
||||
"tasks/{task_id}/ namespace. Never write to uploads; uploaded files are immutable agent inputs. "
|
||||
"For modifications to uploaded files, copy them to the workspace, edit there, then publish to outputs."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_WRITE_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str,
|
||||
content: str,
|
||||
content_type: str = "text/plain",
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
path = _agent_policy(services, metadata).validate_write(path)
|
||||
return _json_result(True, **await (await _service(workspace, services)).write_file(path, content, content_type=content_type))
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesDeleteTool:
|
||||
name: str = "user_files_delete"
|
||||
description: str = (
|
||||
"Agent deletion is disabled for the personal agent file system. User-visible file deletion is owned by "
|
||||
"the Files page or user-side APIs; agents should use task/workspace cleanup instead."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_DELETE_PARAMETERS))
|
||||
|
||||
async def execute(self, *, path: str, workspace: str | None = None, services: dict[str, Any] | None = None) -> str:
|
||||
try:
|
||||
_agent_policy(services).validate_delete(path)
|
||||
return _json_result(False, path=path, deleted=False)
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesMkdirTool:
|
||||
name: str = "user_files_mkdir"
|
||||
description: str = (
|
||||
"Create a subfolder in the personal agent file system under uploads, outputs, shared, or tasks. "
|
||||
"Use folders to organize agent outputs, reusable shared material, or current task-specific files. "
|
||||
"Do not create folders under uploads because uploads is user-owned input storage."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_MKDIR_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str,
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
path = _agent_policy(services, metadata).validate_mkdir(path)
|
||||
return _json_result(True, **await (await _service(workspace, services)).mkdir(path))
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesCopyToWorkspaceTool:
|
||||
name: str = "user_files_copy_to_workspace"
|
||||
description: str = (
|
||||
"Copy a readable file from the personal agent file system into the internal workspace before editing, "
|
||||
"running, or validating it. Use this for user-uploaded files under uploads: the original upload remains "
|
||||
"unchanged, and the returned workspace_path can be used with workspace tools like read_file or patch_file."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_COPY_TO_WORKSPACE_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str,
|
||||
workspace_path: str | None = None,
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
policy = _agent_policy(services, metadata)
|
||||
path = policy.validate_read(path)
|
||||
content = await (await _service(workspace, services)).download(path)
|
||||
if content.size > MAX_WORKSPACE_STAGE_BYTES:
|
||||
raise UserFilePathError(f"File is too large to copy to workspace (max {MAX_WORKSPACE_STAGE_BYTES} bytes)")
|
||||
default_path = f"user-files/{policy.task_namespace}/{Path(path).name}"
|
||||
root, destination = _resolve_workspace_destination(workspace, workspace_path or default_path)
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
destination.write_bytes(content.content)
|
||||
return _json_result(
|
||||
True,
|
||||
path=path,
|
||||
workspace_path=_relative_path(root, destination),
|
||||
bytes=len(content.content),
|
||||
content_type=content.content_type,
|
||||
)
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
except OSError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesPublishOutputTool:
|
||||
name: str = "user_files_publish_output"
|
||||
description: str = (
|
||||
"Publish a validated workspace file to the personal agent file system under outputs/. Use this after "
|
||||
"staging and editing files in the workspace. Publishing never writes to uploads, and it hides MinIO "
|
||||
"bucket, namespace, and credential details from the agent."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_PUBLISH_OUTPUT_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
source_path: str,
|
||||
target_path: str,
|
||||
content_type: str | None = None,
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
root, source = _resolve_workspace_source(workspace, source_path)
|
||||
normalized_target = target_path.strip().strip("/")
|
||||
if not normalized_target.startswith("outputs/"):
|
||||
raise UserFilePathError("Published output target must be under outputs/")
|
||||
guessed_type, _ = mimetypes.guess_type(normalized_target)
|
||||
raw = source.read_bytes()
|
||||
entry = await (await _service(workspace, services)).write_file(
|
||||
normalized_target,
|
||||
raw,
|
||||
content_type=content_type or guessed_type or "application/octet-stream",
|
||||
)
|
||||
return _json_result(
|
||||
True,
|
||||
source_path=_relative_path(root, source),
|
||||
target_path=normalized_target,
|
||||
bytes=len(raw),
|
||||
**entry,
|
||||
)
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), source_path=source_path, target_path=target_path)
|
||||
except OSError as exc:
|
||||
return _json_result(False, error=str(exc), source_path=source_path, target_path=target_path)
|
||||
Reference in New Issue
Block a user