merge: personal user filesystem minio integration

This commit is contained in:
2026-06-03 16:32:29 +08:00
56 changed files with 4780 additions and 115 deletions

View File

@ -48,6 +48,12 @@ from beaver.tools.builtins import (
SkillsListTool,
TerminalTool,
TodoTool,
UserFilesCopyToWorkspaceTool,
UserFilesListTool,
UserFilesMkdirTool,
UserFilesPublishOutputTool,
UserFilesReadTool,
UserFilesWriteTool,
WebFetchTool,
WebSearchTool,
WriteFileTool,
@ -220,6 +226,12 @@ class EngineLoader:
ObjectBackedTool(SearchFilesTool()),
ObjectBackedTool(WriteFileTool()),
ObjectBackedTool(PatchFileTool()),
ObjectBackedTool(UserFilesListTool()),
ObjectBackedTool(UserFilesReadTool()),
ObjectBackedTool(UserFilesWriteTool()),
ObjectBackedTool(UserFilesMkdirTool()),
ObjectBackedTool(UserFilesCopyToWorkspaceTool()),
ObjectBackedTool(UserFilesPublishOutputTool()),
ObjectBackedTool(WebFetchTool()),
ObjectBackedTool(WebSearchTool()),
ObjectBackedTool(TerminalTool()),

View File

@ -657,11 +657,17 @@ class AgentLoop:
"tool_registry": tool_registry,
"skills_loader": skills_loader,
"draft_service": getattr(loaded, "draft_service", None),
"beaver_config": loaded.config,
"task_id": task_id,
"run_id": resolved_run_id,
**self.runtime_services,
},
metadata={
"source": source,
"agent_name": self.profile.name,
"session_id": resolved_session_id,
"task_id": task_id,
"run_id": resolved_run_id,
},
)

View File

@ -12,6 +12,7 @@
from __future__ import annotations
import json
import os
import sqlite3
import threading
import time
@ -110,6 +111,12 @@ END;
"""
def _sqlite_journal_mode() -> str:
requested = os.getenv("BEAVER_SQLITE_JOURNAL_MODE", "DELETE").strip().upper()
allowed = {"DELETE", "TRUNCATE", "PERSIST", "MEMORY", "OFF", "WAL"}
return requested if requested in allowed else "DELETE"
class SessionStore:
"""SQLite-backed session store."""
@ -119,7 +126,9 @@ class SessionStore:
self._lock = threading.Lock()
self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False, isolation_level=None)
self._conn.row_factory = sqlite3.Row
self._conn.execute("PRAGMA journal_mode=WAL")
self._conn.execute("PRAGMA mmap_size=0")
self._conn.execute("PRAGMA busy_timeout=5000")
self._conn.execute(f"PRAGMA journal_mode={_sqlite_journal_mode()}")
self._conn.execute("PRAGMA foreign_keys=ON")
self._init_schema()

View File

@ -21,7 +21,7 @@ from .schema import (
)
LOCAL_MCP_CATEGORIES: dict[str, dict[str, str]] = {
"local_filesystem_mcp": {"category": "filesystem", "display_name": "本地文件工具"},
"local_filesystem_mcp": {"category": "filesystem", "display_name": "个人智能体文件系统工具"},
"local_runtime_mcp": {"category": "runtime", "display_name": "本地运行工具"},
"local_memory_mcp": {"category": "memory", "display_name": "本地记忆工具"},
"local_skills_mcp": {"category": "skills", "display_name": "本地技能工具"},

View File

@ -109,3 +109,15 @@ class AuthzClient:
async def delete_outlook_settings(self, backend_id: str) -> dict[str, Any]:
data = await self._request("DELETE", f"/backends/{backend_id}/settings/outlook")
return data if isinstance(data, dict) else {}
async def get_minio_settings(self, backend_id: str) -> dict[str, Any]:
data = await self._request("GET", f"/backends/{backend_id}/settings/minio")
return data if isinstance(data, dict) else {}
async def set_minio_settings(self, backend_id: str, payload: dict[str, Any]) -> dict[str, Any]:
data = await self._request("POST", f"/backends/{backend_id}/settings/minio", json_body=payload)
return data if isinstance(data, dict) else {}
async def delete_minio_settings(self, backend_id: str) -> dict[str, Any]:
data = await self._request("DELETE", f"/backends/{backend_id}/settings/minio")
return data if isinstance(data, dict) else {}

View File

@ -27,12 +27,8 @@ from beaver.tools.builtins import (
CronTool,
DelegateTool,
ExecuteCodeTool,
ListDirectoryTool,
MemoryTool,
PatchFileTool,
ProcessTool,
ReadFileTool,
SearchFilesTool,
SendMessageTool,
SkillManageTool,
SkillViewTool,
@ -40,6 +36,12 @@ from beaver.tools.builtins import (
SpawnTool,
TerminalTool,
TodoTool,
UserFilesCopyToWorkspaceTool,
UserFilesListTool,
UserFilesMkdirTool,
UserFilesPublishOutputTool,
UserFilesReadTool,
UserFilesWriteTool,
WebFetchTool,
WebSearchTool,
WriteFileTool,
@ -47,7 +49,7 @@ from beaver.tools.builtins import (
LOCAL_TOOL_CATEGORIES = {
"filesystem": "Beaver Local Filesystem Tools",
"filesystem": "Beaver Personal Agent Filesystem Tools",
"runtime": "Beaver Local Runtime Tools",
"memory": "Beaver Local Memory Tools",
"skills": "Beaver Local Skills Tools",
@ -84,11 +86,12 @@ def _category_tools(category: str, workspace: Path) -> tuple[list[BaseTool], Too
if category == "filesystem":
tools: list[BaseTool] = [
ObjectBackedTool(ListDirectoryTool()),
ObjectBackedTool(ReadFileTool()),
ObjectBackedTool(SearchFilesTool()),
ObjectBackedTool(WriteFileTool()),
ObjectBackedTool(PatchFileTool()),
ObjectBackedTool(UserFilesListTool()),
ObjectBackedTool(UserFilesReadTool()),
ObjectBackedTool(UserFilesWriteTool()),
ObjectBackedTool(UserFilesMkdirTool()),
ObjectBackedTool(UserFilesCopyToWorkspaceTool()),
ObjectBackedTool(UserFilesPublishOutputTool()),
]
elif category == "runtime":
tools = [

View File

@ -36,6 +36,19 @@ from beaver.integrations.mcp import MCPConnectionManager
from beaver.services.agent_service import NOTIFICATION_SESSION_ID, AgentService
from beaver.services.cron_service import CronService, schedule_from_api
from beaver.services.skillhub_service import SkillHubService
from beaver.services.user_files import (
USER_FILE_ROOTS,
UserFileError,
UserFileNotFoundError,
UserFilePathError,
UserFileSizeError,
UserFileService,
)
from beaver.services.user_file_resolver import (
UserFileConfigurationError,
UserFileStorageResolver,
build_file_auth_context,
)
from beaver.skills.learning import SkillLearningWorker, SkillLearningWorkerConfig
from beaver.skills.catalog.utils import parse_frontmatter
@ -485,6 +498,28 @@ def create_app(
app.state.handoff_codes = {}
app.state.auth_file = Path(os.getenv("BEAVER_AUTH_FILE") or "")
max_file_size = 50 * 1024 * 1024
max_user_file_upload_size = _int_env("BEAVER_USER_FILES_MAX_UPLOAD_BYTES", 5 * 1024 * 1024 * 1024)
user_file_upload_part_size = _int_env("BEAVER_USER_FILES_UPLOAD_PART_SIZE", 10 * 1024 * 1024)
def _user_file_resolver(request: Request, authorization: str | None) -> UserFileStorageResolver:
username = _require_web_user(app, authorization)
loaded = get_agent_service(request).create_loop().boot()
auth_context = build_file_auth_context(username=username, config=loaded.config)
return UserFileStorageResolver(config=loaded.config, workspace=loaded.workspace, auth_context=auth_context)
async def _user_file_service(request: Request, authorization: str | None) -> UserFileService:
return await _user_file_resolver(request, authorization).service()
def _user_file_http_error(exc: UserFileError) -> HTTPException:
if isinstance(exc, UserFileNotFoundError):
return HTTPException(status_code=404, detail=str(exc) or "File not found")
if isinstance(exc, UserFilePathError):
return HTTPException(status_code=400, detail=str(exc) or "Invalid path")
if isinstance(exc, UserFileSizeError):
return HTTPException(status_code=413, detail=str(exc) or "File too large")
if isinstance(exc, UserFileConfigurationError):
return HTTPException(status_code=503, detail=str(exc) or "User file storage is not configured")
return HTTPException(status_code=400, detail=str(exc) or "User file operation failed")
@app.get("/api/ping", response_model=WebStatusResponse)
async def ping(request: Request) -> WebStatusResponse:
@ -1279,6 +1314,101 @@ def create_app(
return {"ok": True}
raise HTTPException(status_code=404, detail="File not found")
@app.get("/api/user-files/status")
async def user_files_status(
request: Request,
authorization: str | None = Header(default=None),
) -> dict[str, Any]:
return (await _user_file_resolver(request, authorization).status()).to_dict()
@app.get("/api/user-files/browse")
async def browse_user_files(
request: Request,
path: str = "",
authorization: str | None = Header(default=None),
) -> dict[str, Any]:
try:
return await (await _user_file_service(request, authorization)).browse(path)
except UserFileError as exc:
raise _user_file_http_error(exc) from exc
@app.get("/api/user-files/download")
async def download_user_file(
path: str,
request: Request,
authorization: str | None = Header(default=None),
) -> Response:
try:
content = await (await _user_file_service(request, authorization)).download(path)
except UserFileError as exc:
raise _user_file_http_error(exc) from exc
disposition = "inline" if content.content_type.startswith("image/") else "attachment"
return Response(
content=content.content,
media_type=content.content_type,
headers={"Content-Disposition": content_disposition(disposition, content.name)},
)
@app.get("/api/user-files/preview")
async def preview_user_file(
path: str,
request: Request,
authorization: str | None = Header(default=None),
) -> dict[str, Any]:
try:
return await (await _user_file_service(request, authorization)).preview(path)
except UserFileError as exc:
raise _user_file_http_error(exc) from exc
@app.post("/api/user-files/upload")
async def upload_user_file(
request: Request,
file: UploadFile = File(...),
path: str = Form("uploads"),
authorization: str | None = Header(default=None),
) -> dict[str, Any]:
if not file.filename:
raise HTTPException(status_code=400, detail="No filename provided")
file_size = getattr(file, "size", None)
if isinstance(file_size, int) and file_size > max_user_file_upload_size:
raise HTTPException(status_code=413, detail=f"File too large (max {_human_upload_size(max_user_file_upload_size)})")
try:
return await (await _user_file_service(request, authorization)).upload_stream(
path,
file.filename,
file.file,
content_type=file.content_type or "application/octet-stream",
max_bytes=max_user_file_upload_size,
part_size=user_file_upload_part_size,
)
except UserFileError as exc:
raise _user_file_http_error(exc) from exc
@app.delete("/api/user-files/delete")
async def delete_user_file(
path: str,
request: Request,
authorization: str | None = Header(default=None),
) -> dict[str, bool]:
try:
removed = await (await _user_file_service(request, authorization)).delete(path)
except UserFileError as exc:
raise _user_file_http_error(exc) from exc
if removed:
return {"ok": True}
raise HTTPException(status_code=404, detail="Path not found")
@app.post("/api/user-files/mkdir")
async def create_user_file_directory(
path: str,
request: Request,
authorization: str | None = Header(default=None),
) -> dict[str, Any]:
try:
return await (await _user_file_service(request, authorization)).mkdir(path)
except UserFileError as exc:
raise _user_file_http_error(exc) from exc
@app.get("/api/workspace/browse")
async def browse_workspace_dir(request: Request, path: str = "") -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
@ -3165,6 +3295,27 @@ def _handoff_replay_window_seconds() -> int:
return 15
def _int_env(name: str, default: int) -> int:
raw = os.getenv(name, "").strip()
if not raw:
return default
try:
value = int(raw)
except ValueError:
return default
return value if value > 0 else default
def _human_upload_size(size: int) -> str:
units = ("B", "KB", "MB", "GB", "TB")
value = float(size)
for unit in units:
if value < 1024 or unit == units[-1]:
return f"{value:.0f}{unit}" if unit == "B" else f"{value:.1f}{unit}"
value /= 1024
return f"{size}B"
def _prune_handoff_codes(app: FastAPI) -> None:
now = time.time()
replay_window = _handoff_replay_window_seconds()

View File

@ -0,0 +1,201 @@
"""Resolve the user-visible file system for web and agent callers."""
from __future__ import annotations
from dataclasses import dataclass, field
import os
from pathlib import Path
from typing import Any
import httpx
from beaver.foundation.config.schema import BeaverConfig
from .user_files import (
LocalUserFileStorage,
MinIOStorageConfig,
MinIOUserFileStorage,
USER_FILE_ROOTS,
UserFileError,
UserFileService,
)
class UserFileConfigurationError(UserFileError):
"""Raised when user file storage is not configured for this backend."""
@dataclass(slots=True)
class FileAuthContext:
"""Authenticated identity used by the personal file system boundary."""
username: str
backend_id: str
storage_namespace: str
user_id: str | None = None
scopes: tuple[str, ...] = field(default_factory=tuple)
auth_source: str = "beaver-web-token"
@dataclass(slots=True)
class UserFileStorageStatus:
configured: bool
storage_mode: str
roots: list[str]
workspace_visible: bool = False
detail: str | None = None
def to_dict(self) -> dict[str, Any]:
payload: dict[str, Any] = {
"configured": self.configured,
"storage_mode": self.storage_mode,
"roots": self.roots,
"workspace_visible": self.workspace_visible,
}
if self.detail:
payload["detail"] = self.detail
return payload
class UserFileStorageResolver:
"""Build `UserFileService` from the current Beaver identity and config."""
def __init__(
self,
*,
config: BeaverConfig,
workspace: Path,
auth_context: FileAuthContext,
) -> None:
self.config = config
self.workspace = Path(workspace)
self.auth_context = auth_context
async def service(self) -> UserFileService:
mode = _storage_mode(self.config)
if mode == "local":
return UserFileService(LocalUserFileStorage(self.workspace / "user_files"))
settings = await self._load_minio_settings()
return UserFileService(
MinIOUserFileStorage(
MinIOStorageConfig(
endpoint=str(settings.get("endpoint") or ""),
access_key=str(settings.get("access_key") or ""),
secret_key=str(settings.get("secret_key") or ""),
bucket=str(settings.get("bucket") or ""),
secure=bool(settings.get("secure", False)),
region=_clean_optional(settings.get("region")),
namespace=str(settings.get("namespace") or self.auth_context.storage_namespace),
)
)
)
async def status(self) -> UserFileStorageStatus:
mode = _storage_mode(self.config)
if mode == "local":
return UserFileStorageStatus(
configured=True,
storage_mode="local",
roots=list(USER_FILE_ROOTS),
workspace_visible=False,
)
try:
await self._load_minio_settings()
except UserFileConfigurationError as exc:
return UserFileStorageStatus(
configured=False,
storage_mode="object",
roots=list(USER_FILE_ROOTS),
workspace_visible=False,
detail=str(exc),
)
return UserFileStorageStatus(
configured=True,
storage_mode="object",
roots=list(USER_FILE_ROOTS),
workspace_visible=False,
)
async def _load_minio_settings(self) -> dict[str, Any]:
backend_id = self.auth_context.backend_id.strip()
if not backend_id:
raise UserFileConfigurationError("User file storage backend identity is not configured")
base_url = self.config.authz.base_url.strip()
if not (self.config.authz.enabled and base_url):
raise UserFileConfigurationError("AuthZ is required for deployed user file storage")
token = (
os.getenv("BEAVER_AUTHZ_INTERNAL_TOKEN", "").strip()
or os.getenv("AUTHZ_INTERNAL_TOKEN", "").strip()
)
if not token:
raise UserFileConfigurationError("AuthZ internal token is not configured for user file storage")
try:
async with httpx.AsyncClient(
timeout=self.config.authz.request_timeout_seconds,
follow_redirects=True,
trust_env=False,
) as client:
response = await client.get(
f"{base_url.rstrip('/')}/internal/backends/{backend_id}/settings/minio",
headers={"Authorization": f"Bearer {token}"},
)
except httpx.HTTPError as exc:
raise UserFileConfigurationError(f"Unable to load user file storage settings: {exc}") from exc
if response.status_code == 404:
raise UserFileConfigurationError("MinIO user file storage is not configured")
if response.is_error:
raise UserFileConfigurationError(
f"Unable to load user file storage settings: HTTP {response.status_code}"
)
payload = response.json()
if not isinstance(payload, dict):
raise UserFileConfigurationError("Invalid MinIO settings response")
if not all(str(payload.get(key) or "").strip() for key in ("endpoint", "access_key", "secret_key", "bucket")):
raise UserFileConfigurationError("MinIO user file storage settings are incomplete")
payload.setdefault("namespace", self.auth_context.storage_namespace)
return payload
def build_file_auth_context(
*,
username: str,
config: BeaverConfig,
user_id: str | None = None,
scopes: tuple[str, ...] = (),
auth_source: str = "beaver-web-token",
) -> FileAuthContext:
backend_id = (
config.backend_identity.backend_id.strip()
or os.getenv("BEAVER_BACKEND_IDENTITY__BACKEND_ID", "").strip()
or username.strip()
)
namespace = default_user_file_namespace(backend_id)
return FileAuthContext(
username=username.strip(),
backend_id=backend_id,
storage_namespace=namespace,
user_id=user_id,
scopes=scopes,
auth_source=auth_source,
)
def default_user_file_namespace(backend_id: str) -> str:
cleaned = backend_id.strip().strip("/")
return f"users/{cleaned}" if cleaned else "users/unconfigured"
def _storage_mode(config: BeaverConfig) -> str:
raw = os.getenv("BEAVER_USER_FILES_STORAGE_MODE", "").strip().lower()
if raw in {"local", "dev-local", "development"}:
return "local"
if raw in {"minio", "object", "object-storage"}:
return "minio"
if config.authz.enabled and config.authz.base_url.strip() and config.backend_identity.backend_id.strip():
return "minio"
return "local"
def _clean_optional(value: Any) -> str | None:
text = str(value or "").strip()
return text or None

View File

@ -0,0 +1,630 @@
"""User-visible file system service.
This module owns the personal file-system boundary exposed to users and
agents. Storage backends can change, but callers see only virtual paths under
fixed roots.
"""
from __future__ import annotations
from contextlib import suppress
from dataclasses import dataclass
from datetime import datetime, timezone
from io import BytesIO
import mimetypes
from pathlib import Path, PurePosixPath
import shutil
import tempfile
from typing import Protocol
USER_FILE_ROOTS = ("uploads", "outputs", "shared", "tasks")
MAX_PREVIEW_BYTES = 1024 * 1024
AGENT_UPLOADS_ERROR = "uploads/ is user-provided input storage; agents may read it but must not write it"
AGENT_DELETE_ERROR = "agents cannot delete user-visible files; use the Files page or user-side APIs"
class UserFileError(ValueError):
"""Base error for user file operations."""
class UserFilePathError(UserFileError):
"""Raised when a user file path violates the virtual path policy."""
class UserFileNotFoundError(UserFileError):
"""Raised when a user file path does not exist."""
class UserFileSizeError(UserFileError):
"""Raised when a user file upload exceeds configured limits."""
@dataclass(frozen=True, slots=True)
class AgentUserFilePolicy:
task_id: str | None = None
fallback_scope: str = "interactive"
@property
def task_namespace(self) -> str:
if self.task_id:
return f"tasks/{self.task_id}"
scope = _safe_scope(self.fallback_scope)
return f"tasks/interactive/{scope}"
def validate_read(self, path: str) -> str:
return normalize_user_path(path, allow_root=False)
def validate_write(self, path: str) -> str:
normalized = normalize_user_path(path, allow_root=False)
root = normalized.split("/", 1)[0]
if root == "uploads":
raise UserFilePathError(AGENT_UPLOADS_ERROR)
if root == "tasks":
self._validate_task_namespace(normalized)
return normalized
def validate_mkdir(self, path: str) -> str:
return self.validate_write(path)
def validate_delete(self, path: str) -> str:
normalize_user_path(path, allow_root=False)
raise UserFilePathError(AGENT_DELETE_ERROR)
def _validate_task_namespace(self, normalized: str) -> None:
namespace = self.task_namespace
if normalized == "tasks" or not normalized.startswith(f"{namespace}/"):
raise UserFilePathError(f"Agent task files must be written under {namespace}/")
@dataclass(slots=True)
class UserFileEntry:
name: str
path: str
type: str
size: int | None = None
content_type: str | None = None
modified: str | None = None
def to_dict(self) -> dict[str, object]:
return {
"name": self.name,
"path": self.path,
"type": self.type,
"size": self.size,
"content_type": self.content_type,
"modified": self.modified,
}
@dataclass(slots=True)
class UserFileContent:
name: str
path: str
size: int
content_type: str
modified: str | None
content: bytes
@dataclass(slots=True)
class UserFilePreview:
name: str
path: str
size: int
content_type: str
modified: str | None
is_binary: bool
is_truncated: bool
content: str | None
def to_dict(self) -> dict[str, object]:
return {
"name": self.name,
"path": self.path,
"size": self.size,
"content_type": self.content_type,
"modified": self.modified,
"is_binary": self.is_binary,
"is_truncated": self.is_truncated,
"content": self.content,
}
class UserFileStorage(Protocol):
async def list_dir(self, path: str) -> list[UserFileEntry]:
...
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
...
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
...
async def write_file_stream(
self,
path: str,
stream: object,
*,
content_type: str,
max_bytes: int | None = None,
part_size: int = 10 * 1024 * 1024,
) -> UserFileEntry:
...
async def delete_path(self, path: str) -> bool:
...
async def mkdir(self, path: str) -> UserFileEntry:
...
class UserFileService:
def __init__(self, storage: UserFileStorage) -> None:
self.storage = storage
async def browse(self, path: str = "") -> dict[str, object]:
normalized = normalize_user_path(path, allow_root=True)
if normalized == "":
return {
"path": "",
"items": [
UserFileEntry(name=root, path=root, type="directory").to_dict()
for root in USER_FILE_ROOTS
],
}
entries = await self.storage.list_dir(normalized)
return {"path": normalized, "items": [entry.to_dict() for entry in entries]}
async def upload(self, directory: str, filename: str, content: bytes, *, content_type: str) -> dict[str, object]:
if not is_safe_filename(filename):
raise UserFilePathError("Invalid filename")
target = normalize_user_path(_join_user_path(directory, filename), allow_root=False)
return (await self.storage.write_file(target, content, content_type=content_type)).to_dict()
async def upload_stream(
self,
directory: str,
filename: str,
stream: object,
*,
content_type: str,
max_bytes: int | None = None,
part_size: int = 10 * 1024 * 1024,
) -> dict[str, object]:
if not is_safe_filename(filename):
raise UserFilePathError("Invalid filename")
target = normalize_user_path(_join_user_path(directory, filename), allow_root=False)
return (
await self.storage.write_file_stream(
target,
stream,
content_type=content_type,
max_bytes=max_bytes,
part_size=part_size,
)
).to_dict()
async def write_file(self, path: str, content: bytes | str, *, content_type: str = "text/plain") -> dict[str, object]:
normalized = normalize_user_path(path, allow_root=False)
raw = content.encode("utf-8") if isinstance(content, str) else bytes(content)
return (await self.storage.write_file(normalized, raw, content_type=content_type)).to_dict()
async def download(self, path: str) -> UserFileContent:
return await self.storage.read_file(normalize_user_path(path, allow_root=False))
async def preview(self, path: str, *, max_bytes: int = MAX_PREVIEW_BYTES) -> dict[str, object]:
content = await self.storage.read_file(normalize_user_path(path, allow_root=False), max_bytes=max_bytes)
is_binary = _is_probably_binary(content.content, content.content_type)
text = None if is_binary else content.content.decode("utf-8", errors="replace")
return UserFilePreview(
name=content.name,
path=content.path,
size=content.size,
content_type=content.content_type,
modified=content.modified,
is_binary=is_binary,
is_truncated=content.size > len(content.content),
content=text,
).to_dict()
async def delete(self, path: str) -> bool:
normalized = normalize_user_path(path, allow_root=False)
if normalized in USER_FILE_ROOTS:
raise UserFilePathError("Cannot delete virtual root folders")
return await self.storage.delete_path(normalized)
async def mkdir(self, path: str) -> dict[str, object]:
normalized = normalize_user_path(path, allow_root=False)
if normalized in USER_FILE_ROOTS:
raise UserFilePathError("Virtual root folders already exist")
return (await self.storage.mkdir(normalized)).to_dict()
class LocalUserFileStorage:
"""Filesystem-backed storage adapter for tests and local development."""
def __init__(self, root: Path) -> None:
self.root = Path(root).expanduser().resolve()
self.root.mkdir(parents=True, exist_ok=True)
for name in USER_FILE_ROOTS:
(self.root / name).mkdir(parents=True, exist_ok=True)
async def list_dir(self, path: str) -> list[UserFileEntry]:
target = self._path(path)
if not target.exists():
target.mkdir(parents=True, exist_ok=True)
if not target.is_dir():
raise UserFilePathError("Path is not a directory")
entries: list[UserFileEntry] = []
for child in sorted(target.iterdir(), key=lambda item: (not item.is_dir(), item.name.lower())):
if child.name.startswith("."):
continue
entries.append(self._entry(child))
return entries
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
target = self._path(path)
if not target.is_file():
raise UserFileNotFoundError("File not found")
raw = target.read_bytes()
selected = raw[:max_bytes] if max_bytes is not None else raw
stat = target.stat()
content_type, _ = mimetypes.guess_type(target.name)
return UserFileContent(
name=target.name,
path=self._relative(target),
size=stat.st_size,
content_type=content_type or "application/octet-stream",
modified=_iso_from_timestamp(stat.st_mtime),
content=selected,
)
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
target = self._path(path)
target.parent.mkdir(parents=True, exist_ok=True)
target.write_bytes(content)
return self._entry(target, content_type=content_type)
async def write_file_stream(
self,
path: str,
stream: object,
*,
content_type: str,
max_bytes: int | None = None,
part_size: int = 10 * 1024 * 1024,
) -> UserFileEntry:
target = self._path(path)
target.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_name = tempfile.mkstemp(prefix=f".{target.name}.", suffix=".tmp", dir=target.parent)
tmp_path = Path(tmp_name)
total = 0
try:
with open(fd, "wb", closefd=True) as output:
while True:
chunk = stream.read(part_size) # type: ignore[attr-defined]
if not chunk:
break
total += len(chunk)
if max_bytes is not None and total > max_bytes:
raise UserFileSizeError(_size_error(max_bytes))
output.write(chunk)
tmp_path.replace(target)
except Exception:
with suppress(FileNotFoundError):
tmp_path.unlink()
raise
return self._entry(target, content_type=content_type)
async def delete_path(self, path: str) -> bool:
target = self._path(path)
if not target.exists():
return False
if target.is_dir():
shutil.rmtree(target)
else:
target.unlink()
return True
async def mkdir(self, path: str) -> UserFileEntry:
target = self._path(path)
target.mkdir(parents=True, exist_ok=True)
return self._entry(target)
def _path(self, path: str) -> Path:
normalized = normalize_user_path(path, allow_root=False)
target = (self.root / normalized).resolve()
try:
target.relative_to(self.root)
except ValueError as exc:
raise UserFilePathError("Path escapes user file root") from exc
return target
def _relative(self, path: Path) -> str:
return path.relative_to(self.root).as_posix()
def _entry(self, path: Path, *, content_type: str | None = None) -> UserFileEntry:
stat = path.stat()
guessed_type, _ = mimetypes.guess_type(path.name)
return UserFileEntry(
name=path.name,
path=self._relative(path),
type="directory" if path.is_dir() else "file",
size=None if path.is_dir() else stat.st_size,
content_type=None if path.is_dir() else (content_type or guessed_type or "application/octet-stream"),
modified=_iso_from_timestamp(stat.st_mtime),
)
@dataclass(slots=True)
class MinIOStorageConfig:
endpoint: str
access_key: str
secret_key: str
bucket: str
secure: bool = False
region: str | None = None
namespace: str = ""
class MinIOUserFileStorage:
"""MinIO-backed user file storage adapter."""
def __init__(self, config: MinIOStorageConfig) -> None:
if not config.endpoint or not config.access_key or not config.secret_key or not config.bucket:
raise ValueError("MinIO storage requires endpoint, access key, secret key, and bucket")
from minio import Minio
self.config = config
self.client = Minio(
endpoint=config.endpoint,
access_key=config.access_key,
secret_key=config.secret_key,
secure=config.secure,
region=config.region,
)
async def list_dir(self, path: str) -> list[UserFileEntry]:
prefix = self._object_prefix(path)
objects = self.client.list_objects(self.config.bucket, prefix=prefix, recursive=False)
entries: list[UserFileEntry] = []
for obj in objects:
object_name = str(obj.object_name or "")
user_path = self._user_path(object_name)
if not user_path or user_path == path or user_path.endswith("/.keep"):
continue
trimmed = user_path.rstrip("/")
name = PurePosixPath(trimmed).name
is_dir = bool(getattr(obj, "is_dir", False)) or object_name.endswith("/")
entries.append(
UserFileEntry(
name=name,
path=trimmed,
type="directory" if is_dir else "file",
size=None if is_dir else getattr(obj, "size", None),
content_type=None if is_dir else "application/octet-stream",
modified=obj.last_modified.isoformat() if getattr(obj, "last_modified", None) else None,
)
)
return sorted(entries, key=lambda item: (item.type != "directory", item.name.lower()))
async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent:
object_name = self._object_name(path)
try:
stat = self.client.stat_object(self.config.bucket, object_name)
if max_bytes is None:
response = self.client.get_object(self.config.bucket, object_name)
else:
response = self.client.get_object(self.config.bucket, object_name, length=max_bytes)
raw = response.read()
response.close()
response.release_conn()
except Exception as exc:
raise UserFileNotFoundError("File not found") from exc
return UserFileContent(
name=PurePosixPath(path).name,
path=path,
size=int(stat.size or len(raw)),
content_type=stat.content_type or "application/octet-stream",
modified=stat.last_modified.isoformat() if stat.last_modified else None,
content=raw,
)
async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry:
object_name = self._object_name(path)
result = self.client.put_object(
self.config.bucket,
object_name,
BytesIO(content),
length=len(content),
content_type=content_type,
)
return UserFileEntry(
name=PurePosixPath(path).name,
path=path,
type="file",
size=len(content),
content_type=content_type,
modified=datetime.now(timezone.utc).isoformat(),
)
async def write_file_stream(
self,
path: str,
stream: object,
*,
content_type: str,
max_bytes: int | None = None,
part_size: int = 10 * 1024 * 1024,
) -> UserFileEntry:
object_name = self._object_name(path)
reader = _LimitedReadStream(stream, max_bytes=max_bytes)
try:
self.client.put_object(
self.config.bucket,
object_name,
reader,
length=-1,
part_size=max(5 * 1024 * 1024, part_size),
content_type=content_type,
)
except UserFileSizeError:
try:
self.client.remove_object(self.config.bucket, object_name)
except Exception:
pass
raise
return UserFileEntry(
name=PurePosixPath(path).name,
path=path,
type="file",
size=reader.bytes_read,
content_type=content_type,
modified=datetime.now(timezone.utc).isoformat(),
)
async def delete_path(self, path: str) -> bool:
object_name = self._object_name(path)
removed = False
try:
self.client.remove_object(self.config.bucket, object_name)
removed = True
except Exception:
pass
prefix = f"{object_name.rstrip('/')}/"
for obj in self.client.list_objects(self.config.bucket, prefix=prefix, recursive=True):
self.client.remove_object(self.config.bucket, str(obj.object_name))
removed = True
return removed
async def mkdir(self, path: str) -> UserFileEntry:
object_name = f"{self._object_name(path).rstrip('/')}/.keep"
self.client.put_object(
self.config.bucket,
object_name,
BytesIO(b""),
length=0,
content_type="application/x-directory",
)
return UserFileEntry(
name=PurePosixPath(path).name,
path=path,
type="directory",
size=None,
modified=datetime.now(timezone.utc).isoformat(),
)
def _namespace(self) -> str:
return self.config.namespace.strip("/")
def _object_name(self, path: str) -> str:
normalized = normalize_user_path(path, allow_root=False)
namespace = self._namespace()
object_name = f"{namespace}/{normalized}" if namespace else normalized
if object_name.startswith("/") or "/../" in f"/{object_name}/":
raise UserFilePathError("Object path escapes namespace")
return object_name
def _object_prefix(self, path: str) -> str:
return f"{self._object_name(path).rstrip('/')}/"
def _user_path(self, object_name: str) -> str:
namespace = self._namespace()
if namespace:
prefix = f"{namespace}/"
if not object_name.startswith(prefix):
raise UserFilePathError("Object path escapes namespace")
return object_name[len(prefix) :]
return object_name
def normalize_user_path(path: str | None, *, allow_root: bool) -> str:
original = (path or "").replace("\\", "/").strip()
if original.startswith("/"):
raise UserFilePathError("Absolute paths are not allowed")
raw = original.strip("/")
if raw == "":
if allow_root:
return ""
raise UserFilePathError("Path is required")
posix = PurePosixPath(raw)
if posix.is_absolute():
raise UserFilePathError("Absolute paths are not allowed")
parts = [part for part in posix.parts if part not in ("", ".")]
if any(part == ".." for part in parts):
raise UserFilePathError("Parent-directory traversal is not allowed")
if any(part.startswith(".") for part in parts):
raise UserFilePathError("Hidden implementation paths are not allowed")
if not parts or parts[0] not in USER_FILE_ROOTS:
raise UserFilePathError("Path must be under uploads, outputs, shared, or tasks")
return "/".join(parts)
def is_safe_filename(filename: str) -> bool:
return bool(filename) and "/" not in filename and "\\" not in filename and not filename.startswith(".")
def _join_user_path(directory: str, filename: str) -> str:
normalized_dir = normalize_user_path(directory, allow_root=False)
return f"{normalized_dir.rstrip('/')}/{filename}"
def _is_probably_binary(raw: bytes, content_type: str) -> bool:
if content_type.startswith("text/") or content_type in {
"application/json",
"application/javascript",
"application/xml",
"application/x-yaml",
}:
return False
if not raw:
return False
if b"\x00" in raw[:4096]:
return True
try:
raw[:4096].decode("utf-8")
except UnicodeDecodeError:
return True
return False
def _iso_from_timestamp(value: float) -> str:
return datetime.fromtimestamp(value, tz=timezone.utc).isoformat()
def _safe_scope(value: str | None) -> str:
raw = (value or "interactive").strip()
allowed = [char if char.isalnum() or char in ("-", "_") else "-" for char in raw]
cleaned = "".join(allowed).strip("-_")
return cleaned or "interactive"
class _LimitedReadStream:
def __init__(self, stream: object, *, max_bytes: int | None = None) -> None:
self.stream = stream
self.max_bytes = max_bytes
self.bytes_read = 0
def read(self, size: int = -1) -> bytes:
chunk = self.stream.read(size) # type: ignore[attr-defined]
if not chunk:
return b""
self.bytes_read += len(chunk)
if self.max_bytes is not None and self.bytes_read > self.max_bytes:
raise UserFileSizeError(_size_error(self.max_bytes))
return chunk
def _size_error(max_bytes: int) -> str:
return f"File too large (max {_human_size(max_bytes)})"
def _human_size(size: int) -> str:
units = ("B", "KB", "MB", "GB", "TB")
value = float(size)
for unit in units:
if value < 1024 or unit == units[-1]:
return f"{value:.0f}{unit}" if unit == "B" else f"{value:.1f}{unit}"
value /= 1024
return f"{size}B"

View File

@ -180,8 +180,10 @@ class ObjectBackedTool(BaseTool):
if "current_session_id" not in arguments and hasattr(self.backend, "current_session_id"):
arguments["current_session_id"] = context.session_id
if "workspace" not in arguments and hasattr(self.backend, "workspace"):
if "workspace" not in arguments and (hasattr(self.backend, "workspace") or self._backend_accepts_argument("workspace")):
arguments["workspace"] = context.workspace
if "services" not in arguments and self._backend_accepts_argument("services"):
arguments["services"] = context.services
if "metadata" not in arguments and self._backend_accepts_argument("metadata"):
arguments["metadata"] = context.metadata

View File

@ -9,6 +9,15 @@ from .skill_view import SkillViewTool, skill_view
from .session_search import SessionSearchTool, session_search
from .terminal import ExecuteCodeTool, ProcessTool, TerminalTool
from .utility import ClarifyTool, DelegateTool, SendMessageTool, SpawnTool, TodoTool
from .user_files import (
UserFilesCopyToWorkspaceTool,
UserFilesDeleteTool,
UserFilesListTool,
UserFilesMkdirTool,
UserFilesPublishOutputTool,
UserFilesReadTool,
UserFilesWriteTool,
)
from .web import WebFetchTool, WebSearchTool
__all__ = [
@ -30,6 +39,13 @@ __all__ = [
"SessionSearchTool",
"TerminalTool",
"TodoTool",
"UserFilesCopyToWorkspaceTool",
"UserFilesDeleteTool",
"UserFilesListTool",
"UserFilesMkdirTool",
"UserFilesPublishOutputTool",
"UserFilesReadTool",
"UserFilesWriteTool",
"ClarifyTool",
"WebFetchTool",
"WebSearchTool",

View File

@ -14,7 +14,7 @@ from __future__ import annotations
from dataclasses import dataclass, field
import json
from pathlib import Path
from pathlib import Path, PurePosixPath
from typing import Any, Iterable
@ -24,6 +24,7 @@ MAX_READ_CHARS = 120_000
MAX_SEARCH_RESULTS = 200
MAX_SEARCH_FILE_BYTES = 2_000_000
MAX_SEARCH_FILES = 5_000
USER_FILE_VIRTUAL_ROOTS = {"uploads", "outputs", "shared", "tasks"}
SKIP_DIR_NAMES = {
".git",
".hg",
@ -161,9 +162,28 @@ def _workspace_root(workspace: str | None) -> Path:
return root
def _virtual_user_file_error(user_path: str | None) -> str | None:
raw = str(user_path or ".").replace("\\", "/").strip()
if not raw or raw in {".", "./"}:
return None
try:
parts = [part for part in PurePosixPath(raw.strip("/")).parts if part not in ("", ".")]
except TypeError:
return None
if parts and parts[0] in USER_FILE_VIRTUAL_ROOTS:
return (
f"{user_path} is a personal agent file system path, not a workspace path. "
"Use user_files_read or user_files_copy_to_workspace for reads; use "
"user_files_write for shared/tasks files or user_files_publish_output for outputs."
)
return None
def _resolve_existing_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
"""Resolve a user path and ensure the real target stays inside workspace."""
if error := _virtual_user_file_error(user_path):
raise WorkspacePathError(error)
root = _workspace_root(workspace)
raw_path = Path(user_path or ".").expanduser()
candidate = raw_path if raw_path.is_absolute() else root / raw_path
@ -178,6 +198,8 @@ def _resolve_existing_path(workspace: str | None, user_path: str | None) -> tupl
def _resolve_writable_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
if error := _virtual_user_file_error(user_path):
raise WorkspacePathError(error)
root = _workspace_root(workspace)
if not user_path or not str(user_path).strip():
raise WorkspacePathError("path is required")

View File

@ -0,0 +1,389 @@
"""Agent-facing tools for the user-visible file system."""
from __future__ import annotations
from dataclasses import dataclass, field
import json
import mimetypes
from pathlib import Path
from typing import Any
from beaver.foundation.config.loader import load_config
from beaver.services.user_file_resolver import UserFileStorageResolver, build_file_auth_context
from beaver.services.user_files import AgentUserFilePolicy, UserFileError, UserFilePathError, UserFileService
MAX_WORKSPACE_STAGE_BYTES = 50 * 1024 * 1024
def _json_result(success: bool, **payload: Any) -> str:
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
async def _service(workspace: str | None, services: dict[str, Any] | None = None) -> UserFileService:
if not workspace:
raise UserFileError("workspace is not configured for user file tools")
config = (services or {}).get("beaver_config")
if config is None:
config = load_config(workspace=workspace)
backend_id = config.backend_identity.backend_id.strip() or config.backend_identity.client_id.strip() or "agent"
auth_context = build_file_auth_context(
username=backend_id,
config=config,
user_id=(services or {}).get("user_id"),
auth_source="beaver-agent-runtime",
)
return await UserFileStorageResolver(
config=config,
workspace=Path(workspace),
auth_context=auth_context,
).service()
def _agent_policy(services: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None) -> AgentUserFilePolicy:
payload = services or {}
meta = metadata or {}
task_id = str(payload.get("task_id") or meta.get("task_id") or "").strip() or None
fallback = str(payload.get("run_id") or meta.get("run_id") or meta.get("session_id") or "interactive")
return AgentUserFilePolicy(task_id=task_id, fallback_scope=fallback)
def _workspace_root(workspace: str | None) -> Path:
if not workspace:
raise UserFilePathError("workspace is not configured for user file tools")
root = Path(workspace).expanduser().resolve()
root.mkdir(parents=True, exist_ok=True)
return root
def _resolve_workspace_source(workspace: str | None, source_path: str) -> tuple[Path, Path]:
root = _workspace_root(workspace)
if not source_path or not str(source_path).strip():
raise UserFilePathError("source_path is required")
raw = Path(str(source_path)).expanduser()
candidate = raw if raw.is_absolute() else root / raw
resolved = candidate.resolve(strict=True)
try:
resolved.relative_to(root)
except ValueError as exc:
raise UserFilePathError("source_path escapes workspace") from exc
if not resolved.is_file():
raise UserFilePathError("source_path must be a file")
return root, resolved
def _resolve_workspace_destination(workspace: str | None, target_path: str) -> tuple[Path, Path]:
root = _workspace_root(workspace)
if not target_path or not str(target_path).strip():
raise UserFilePathError("workspace_path is required")
raw = Path(str(target_path)).expanduser()
if raw.is_absolute():
raise UserFilePathError("workspace_path must be relative")
candidate = (root / raw).resolve()
try:
candidate.relative_to(root)
except ValueError as exc:
raise UserFilePathError("workspace_path escapes workspace") from exc
return root, candidate
def _relative_path(root: Path, path: Path) -> str:
return path.relative_to(root).as_posix()
USER_FILES_LIST_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {
"type": "string",
"default": "",
"description": "User file path under uploads, outputs, shared, or tasks. Empty path lists the virtual roots.",
}
},
}
USER_FILES_READ_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {"type": "string", "description": "User file path to read."},
"max_bytes": {
"type": "integer",
"default": 120000,
"minimum": 1,
"maximum": 1000000,
"description": "Maximum bytes to return in model context.",
},
},
"required": ["path"],
}
USER_FILES_WRITE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {"type": "string", "description": "User file path to create or replace."},
"content": {"type": "string", "description": "Text content to write."},
"content_type": {"type": "string", "default": "text/plain"},
},
"required": ["path", "content"],
}
USER_FILES_DELETE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {"path": {"type": "string", "description": "User file or directory path to delete."}},
"required": ["path"],
}
USER_FILES_MKDIR_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {"path": {"type": "string", "description": "User file directory path to create."}},
"required": ["path"],
}
USER_FILES_COPY_TO_WORKSPACE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Readable user file path under uploads, outputs, shared, or an authorized tasks namespace.",
},
"workspace_path": {
"type": "string",
"description": "Optional relative workspace destination. Defaults to user-files/tasks/{task_id}/<filename> or user-files/runs/<scope>/<filename>.",
},
},
"required": ["path"],
}
USER_FILES_PUBLISH_OUTPUT_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"source_path": {
"type": "string",
"description": "Workspace file path to publish. Absolute paths are allowed only if they stay inside the workspace.",
},
"target_path": {
"type": "string",
"description": "Output path under outputs/, such as outputs/report.md.",
},
"content_type": {
"type": "string",
"description": "Optional content type. If omitted, Beaver guesses from the target filename.",
},
},
"required": ["source_path", "target_path"],
}
@dataclass(slots=True)
class UserFilesListTool:
name: str = "user_files_list"
description: str = (
"List files and folders in the personal agent file system. Use the virtual roots only: "
"uploads for files the user provides to the agent, outputs for agent-generated results, "
"shared for reusable user/agent reference material, and tasks for files bound to a specific task. "
"An empty path lists the four roots; this tool never exposes MinIO buckets, credentials, or internal workspace paths."
)
toolset: str = "user_files"
always_available: bool = True
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_LIST_PARAMETERS))
async def execute(self, *, path: str = "", workspace: str | None = None, services: dict[str, Any] | None = None) -> str:
try:
return _json_result(True, **await (await _service(workspace, services)).browse(path))
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesReadTool:
name: str = "user_files_read"
description: str = (
"Read a bounded text preview from the personal agent file system. Use this to inspect user-provided "
"files in uploads, long-lived shared material in shared, task files in tasks, or generated outputs in outputs. "
"The path must stay under uploads, outputs, shared, or tasks; internal workspace and MinIO implementation paths are hidden."
)
toolset: str = "user_files"
always_available: bool = True
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_READ_PARAMETERS))
async def execute(
self,
*,
path: str,
max_bytes: int = 120000,
workspace: str | None = None,
services: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> str:
try:
path = _agent_policy(services, metadata).validate_read(path)
limit = max(1, min(int(max_bytes), 1_000_000))
return _json_result(True, **await (await _service(workspace, services)).preview(path, max_bytes=limit))
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesWriteTool:
name: str = "user_files_write"
description: str = (
"Create or replace a text file in the personal agent file system. Store agent-generated deliverables "
"under outputs, reusable long-lived context under shared, and task-bound files under the current "
"tasks/{task_id}/ namespace. Never write to uploads; uploaded files are immutable agent inputs. "
"For modifications to uploaded files, copy them to the workspace, edit there, then publish to outputs."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_WRITE_PARAMETERS))
async def execute(
self,
*,
path: str,
content: str,
content_type: str = "text/plain",
workspace: str | None = None,
services: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> str:
try:
path = _agent_policy(services, metadata).validate_write(path)
return _json_result(True, **await (await _service(workspace, services)).write_file(path, content, content_type=content_type))
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesDeleteTool:
name: str = "user_files_delete"
description: str = (
"Agent deletion is disabled for the personal agent file system. User-visible file deletion is owned by "
"the Files page or user-side APIs; agents should use task/workspace cleanup instead."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_DELETE_PARAMETERS))
async def execute(self, *, path: str, workspace: str | None = None, services: dict[str, Any] | None = None) -> str:
try:
_agent_policy(services).validate_delete(path)
return _json_result(False, path=path, deleted=False)
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesMkdirTool:
name: str = "user_files_mkdir"
description: str = (
"Create a subfolder in the personal agent file system under uploads, outputs, shared, or tasks. "
"Use folders to organize agent outputs, reusable shared material, or current task-specific files. "
"Do not create folders under uploads because uploads is user-owned input storage."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_MKDIR_PARAMETERS))
async def execute(
self,
*,
path: str,
workspace: str | None = None,
services: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> str:
try:
path = _agent_policy(services, metadata).validate_mkdir(path)
return _json_result(True, **await (await _service(workspace, services)).mkdir(path))
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesCopyToWorkspaceTool:
name: str = "user_files_copy_to_workspace"
description: str = (
"Copy a readable file from the personal agent file system into the internal workspace before editing, "
"running, or validating it. Use this for user-uploaded files under uploads: the original upload remains "
"unchanged, and the returned workspace_path can be used with workspace tools like read_file or patch_file."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_COPY_TO_WORKSPACE_PARAMETERS))
async def execute(
self,
*,
path: str,
workspace_path: str | None = None,
workspace: str | None = None,
services: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> str:
try:
policy = _agent_policy(services, metadata)
path = policy.validate_read(path)
content = await (await _service(workspace, services)).download(path)
if content.size > MAX_WORKSPACE_STAGE_BYTES:
raise UserFilePathError(f"File is too large to copy to workspace (max {MAX_WORKSPACE_STAGE_BYTES} bytes)")
default_path = f"user-files/{policy.task_namespace}/{Path(path).name}"
root, destination = _resolve_workspace_destination(workspace, workspace_path or default_path)
destination.parent.mkdir(parents=True, exist_ok=True)
destination.write_bytes(content.content)
return _json_result(
True,
path=path,
workspace_path=_relative_path(root, destination),
bytes=len(content.content),
content_type=content.content_type,
)
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
except OSError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesPublishOutputTool:
name: str = "user_files_publish_output"
description: str = (
"Publish a validated workspace file to the personal agent file system under outputs/. Use this after "
"staging and editing files in the workspace. Publishing never writes to uploads, and it hides MinIO "
"bucket, namespace, and credential details from the agent."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_PUBLISH_OUTPUT_PARAMETERS))
async def execute(
self,
*,
source_path: str,
target_path: str,
content_type: str | None = None,
workspace: str | None = None,
services: dict[str, Any] | None = None,
) -> str:
try:
root, source = _resolve_workspace_source(workspace, source_path)
normalized_target = target_path.strip().strip("/")
if not normalized_target.startswith("outputs/"):
raise UserFilePathError("Published output target must be under outputs/")
guessed_type, _ = mimetypes.guess_type(normalized_target)
raw = source.read_bytes()
entry = await (await _service(workspace, services)).write_file(
normalized_target,
raw,
content_type=content_type or guessed_type or "application/octet-stream",
)
return _json_result(
True,
source_path=_relative_path(root, source),
target_path=normalized_target,
bytes=len(raw),
**entry,
)
except UserFileError as exc:
return _json_result(False, error=str(exc), source_path=source_path, target_path=target_path)
except OSError as exc:
return _json_result(False, error=str(exc), source_path=source_path, target_path=target_path)

View File

@ -0,0 +1,104 @@
# User File System MinIO/AuthZ Setup
The user file system is exposed through Beaver APIs and `user_files_*` tools. MinIO remains an implementation detail.
The ordinary Files page should only call Beaver's `/api/user-files/*` routes and render the virtual roots `uploads/`, `outputs/`, `shared/`, and `tasks/`. It should not show bucket names, endpoint fields, access keys, secret keys, object prefixes, or MinIO administration actions.
## AuthZ Settings
Each backend identity can store MinIO settings in AuthZ:
```bash
curl -X POST "$AUTHZ_URL/backends/$BACKEND_ID/settings/minio" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $AUTHZ_ADMIN_TOKEN" \
-d '{
"endpoint": "minio.example.internal:9000",
"access_key": "user-access-key",
"secret_key": "user-secret-key",
"bucket": "beaver-user-files",
"namespace": "users/{backend_id}",
"secure": false,
"region": null
}'
```
Public reads return masked settings. Internal reads require `AUTHZ_INTERNAL_TOKEN` and return the secret key for protected MCP services.
Deployed personal files use a shared bucket with a backend-scoped namespace. For backend `alice`, Beaver maps:
- `uploads/report.pdf` to `users/alice/uploads/report.pdf`
- `outputs/summary.md` to `users/alice/outputs/summary.md`
- `tasks/task-123/result.json` to `users/alice/tasks/task-123/result.json`
The MinIO policy for Alice's access key must be limited to `beaver-user-files/users/alice/*`. The frontend must still only show Beaver virtual paths, not the shared bucket or namespace.
Check the public, masked view:
```bash
curl "$AUTHZ_URL/backends/$BACKEND_ID/settings/minio" \
-H "Authorization: Bearer $AUTHZ_ADMIN_TOKEN"
```
Check the internal protected view used by MCP services:
```bash
curl "$AUTHZ_URL/internal/backends/$BACKEND_ID/settings/minio" \
-H "Authorization: Bearer $AUTHZ_INTERNAL_TOKEN"
```
## Protected MinIO MCP
Run the MinIO MCP service in protected mode:
```bash
bw-minio-mcp serve \
--host 0.0.0.0 \
--port 8001 \
--authz-url "$AUTHZ_URL" \
--authz-token "$AUTHZ_INTERNAL_TOKEN" \
--resource-server-url "$MINIO_MCP_PUBLIC_URL/mcp" \
--state-root /var/lib/bw-minio-mcp
```
In protected mode, the MCP service does not use static MinIO credentials at startup. Each authenticated tool call resolves the backend identity from the bearer token, loads that backend's MinIO settings from AuthZ, and constructs a per-call provider.
Outside protected mode, `bw-minio-mcp serve` requires explicit `--endpoint`, `--access-key`, and `--secret-key` values. It intentionally has no embedded production fallback credentials.
## Beaver Runtime
Beaver should register the MinIO MCP endpoint with backend-token auth when raw object tools are needed:
```json
{
"tools": {
"mcpServers": {
"minio_mcp": {
"url": "https://minio-mcp.example.internal/mcp",
"auth": "oauth_backend_token",
"authAudience": "mcp:minio_mcp"
}
}
},
"authz": {
"baseUrl": "https://authz.example.internal",
"backendId": "backend-user-id"
}
}
```
Product-level file interactions should still go through Beaver's user file system:
- Frontend: `/api/user-files/status`, `/api/user-files/browse`, `/api/user-files/upload`, `/api/user-files/preview`, `/api/user-files/download`, `/api/user-files/delete`, and `/api/user-files/mkdir`.
- Agent tools: `user_files_list`, `user_files_read`, `user_files_write`, `user_files_delete`, and `user_files_mkdir`.
- Storage boundary: only `uploads/`, `outputs/`, `shared/`, and `tasks/` are valid user paths.
The local workspace browser APIs and generic filesystem tools are retained for runtime/development compatibility, but they are not the user-visible file boundary.
## Verification Checklist
- The Files page root renders exactly `uploads`, `outputs`, `shared`, and `tasks`.
- The Files page source does not call `/api/workspace/browse`.
- `/api/user-files/status` does not return local workspace paths or MinIO bucket details.
- AuthZ public settings responses mask `secret_key`.
- Protected `BW_MinIO_Mcp` returns a clear configuration error if a backend has no MinIO settings instead of falling back to another user's credentials.

View File

@ -0,0 +1,12 @@
# User File System Tooling Boundary
The `personal-user-filesystem` change adds `user_files_*` tools for files that users can upload, inspect, and receive from agents. These tools enforce the same virtual roots as the web API:
- `uploads/`
- `outputs/`
- `shared/`
- `tasks/`
The existing local workspace filesystem tools remain registered for internal runtime and development workflows. They are workspace-scoped, but they are not the user-visible file boundary. Agents should use `user_files_*` tools when reading user-provided files or writing user-facing outputs.
Follow-up for stronger isolation: add a runtime policy switch that disables or narrows local workspace filesystem tools for ordinary personal-agent tasks, while keeping `user_files_*` available.

View File

@ -11,6 +11,7 @@ dependencies = [
"httpx>=0.28.0,<1.0.0",
"json-repair>=0.39.0,<1.0.0",
"litellm>=1.79.0,<2.0.0",
"minio>=7.2.0,<8.0.0",
"openai>=1.79.0,<2.0.0",
"pydantic>=2.12.0,<3.0.0",
"python-multipart>=0.0.20,<1.0.0",

View File

@ -472,4 +472,5 @@ def test_load_config_adds_managed_local_mcp_servers(tmp_path) -> None:
assert local.kind == "local"
assert local.category == "filesystem"
assert local.managed is True
assert local.display_name == "个人智能体文件系统工具"
assert "beaver.interfaces.mcp.tools_server" in local.args

View File

@ -6,7 +6,7 @@ import os
from pathlib import Path
from beaver.tools import ObjectBackedTool, ToolContext
from beaver.tools.builtins import ListDirectoryTool, ReadFileTool, SearchFilesTool
from beaver.tools.builtins import ListDirectoryTool, PatchFileTool, ReadFileTool, SearchFilesTool, WriteFileTool
def _run_tool(tool, arguments: dict, workspace: Path):
@ -127,3 +127,23 @@ def test_read_file_rejects_binary_files(tmp_path: Path) -> None:
assert payload["success"] is False
assert "binary" in payload["error"]
def test_workspace_tools_reject_user_file_virtual_paths(tmp_path: Path) -> None:
workspace = tmp_path / "workspace"
workspace.mkdir()
read = _run_tool(ReadFileTool(), {"path": "uploads/get_helm.sh"}, workspace)
listed = _run_tool(ListDirectoryTool(), {"path": "outputs"}, workspace)
written = _run_tool(WriteFileTool(), {"path": "shared/profile.json", "content": "{}"}, workspace)
patched = _run_tool(
PatchFileTool(),
{"path": "tasks/task-123/draft.md", "old_text": "a", "new_text": "b"},
workspace,
)
for result in (read, listed, written, patched):
payload = _payload(result)
assert result.success is False
assert payload["success"] is False
assert "personal agent file system path" in payload["error"]
assert "user_files_read" in payload["error"]

View File

@ -0,0 +1,22 @@
from __future__ import annotations
from beaver.interfaces.mcp.tools_server import _category_tools
def test_local_filesystem_mcp_exposes_personal_user_file_tools_only(tmp_path) -> None:
tools, _context = _category_tools("filesystem", tmp_path)
names = [tool.spec.name for tool in tools]
assert names == [
"user_files_list",
"user_files_read",
"user_files_write",
"user_files_mkdir",
"user_files_copy_to_workspace",
"user_files_publish_output",
]
assert "read_file" not in names
assert "search_files" not in names
assert "list_directory" not in names
assert all("personal agent file system" in tool.spec.description for tool in tools)

View File

@ -0,0 +1,153 @@
from __future__ import annotations
from io import BytesIO
import pytest
from beaver.services.user_files import (
LocalUserFileStorage,
MinIOStorageConfig,
MinIOUserFileStorage,
UserFileNotFoundError,
UserFilePathError,
UserFileSizeError,
UserFileService,
normalize_user_path,
)
def test_normalize_user_path_accepts_fixed_roots() -> None:
assert normalize_user_path("uploads/readme.txt", allow_root=False) == "uploads/readme.txt"
assert normalize_user_path("outputs/report.md", allow_root=False) == "outputs/report.md"
assert normalize_user_path("tasks/task-123/draft.md", allow_root=False) == "tasks/task-123/draft.md"
assert normalize_user_path("", allow_root=True) == ""
@pytest.mark.parametrize(
"path",
[
"../secret.txt",
"/uploads/input.txt",
"/outputs/result.txt",
"/shared/profile.json",
"/tasks/task-123/draft.md",
"uploads/../state/config.json",
"memory/private.txt",
"uploads/.internal",
"",
],
)
def test_normalize_user_path_rejects_invalid_paths(path: str) -> None:
with pytest.raises(UserFilePathError):
normalize_user_path(path, allow_root=False)
@pytest.mark.asyncio
async def test_user_file_service_root_and_round_trip(tmp_path) -> None:
service = UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
root = await service.browse("")
uploaded = await service.upload(
"uploads",
"hello.txt",
b"hello user files",
content_type="text/plain",
)
uploads = await service.browse("uploads")
preview = await service.preview("uploads/hello.txt")
downloaded = await service.download("uploads/hello.txt")
deleted = await service.delete("uploads/hello.txt")
assert [item["name"] for item in root["items"]] == ["uploads", "outputs", "shared", "tasks"]
assert uploaded["path"] == "uploads/hello.txt"
assert uploaded["content_type"] == "text/plain"
assert [item["name"] for item in uploads["items"]] == ["hello.txt"]
assert preview["content"] == "hello user files"
assert downloaded.content == b"hello user files"
assert deleted is True
@pytest.mark.asyncio
async def test_user_file_service_stream_upload_and_size_limit(tmp_path) -> None:
service = UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
uploaded = await service.upload_stream(
"uploads",
"streamed.txt",
BytesIO(b"streamed user file"),
content_type="text/plain",
max_bytes=1024,
part_size=4,
)
preview = await service.preview("uploads/streamed.txt")
assert uploaded["path"] == "uploads/streamed.txt"
assert uploaded["size"] == len(b"streamed user file")
assert preview["content"] == "streamed user file"
with pytest.raises(UserFileSizeError):
await service.upload_stream(
"uploads",
"too-large.txt",
BytesIO(b"abcdef"),
content_type="text/plain",
max_bytes=5,
part_size=2,
)
with pytest.raises(UserFileNotFoundError):
await service.preview("uploads/too-large.txt")
@pytest.mark.asyncio
async def test_user_file_service_rejects_root_delete_and_traversal(tmp_path) -> None:
service = UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
with pytest.raises(UserFilePathError):
await service.delete("uploads")
with pytest.raises(UserFilePathError):
await service.upload("../workspace", "hello.txt", b"x", content_type="text/plain")
@pytest.mark.asyncio
async def test_user_file_service_creates_nested_directories(tmp_path) -> None:
service = UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
created = await service.mkdir("tasks/task-123/references")
tasks = await service.browse("tasks/task-123")
assert created["path"] == "tasks/task-123/references"
assert created["type"] == "directory"
assert [item["name"] for item in tasks["items"]] == ["references"]
def test_minio_storage_maps_virtual_paths_under_namespace() -> None:
storage = object.__new__(MinIOUserFileStorage)
storage.config = MinIOStorageConfig(
endpoint="minio.local:9000",
access_key="alice-access",
secret_key="alice-secret",
bucket="beaver-user-files",
namespace="users/alice",
)
assert storage._object_name("uploads/report.pdf") == "users/alice/uploads/report.pdf"
assert storage._object_name("tasks/task-123/result.json") == "users/alice/tasks/task-123/result.json"
assert storage._user_path("users/alice/outputs/summary.md") == "outputs/summary.md"
def test_minio_storage_rejects_paths_that_escape_namespace() -> None:
storage = object.__new__(MinIOUserFileStorage)
storage.config = MinIOStorageConfig(
endpoint="minio.local:9000",
access_key="alice-access",
secret_key="alice-secret",
bucket="beaver-user-files",
namespace="users/alice",
)
with pytest.raises(UserFilePathError):
storage._object_name("uploads/../state/config.json")
with pytest.raises(UserFilePathError):
storage._user_path("users/bob/uploads/secret.txt")

View File

@ -0,0 +1,177 @@
from __future__ import annotations
import json
import pytest
from beaver.foundation.config.schema import AuthzConfig, BackendIdentityConfig, BeaverConfig
from beaver.tools.base import ObjectBackedTool, ToolContext
from beaver.tools.builtins import (
UserFilesCopyToWorkspaceTool,
UserFilesListTool,
UserFilesPublishOutputTool,
UserFilesReadTool,
UserFilesWriteTool,
)
@pytest.mark.asyncio
async def test_user_file_tools_write_read_and_list(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path))
write = ObjectBackedTool(UserFilesWriteTool())
read = ObjectBackedTool(UserFilesReadTool())
list_files = ObjectBackedTool(UserFilesListTool())
written = await write.invoke(
{"path": "outputs/summary.md", "content": "# Summary", "content_type": "text/markdown"},
context,
)
listed = await list_files.invoke({"path": "outputs"}, context)
loaded = await read.invoke({"path": "outputs/summary.md"}, context)
assert written.success is True
assert json.loads(written.content)["path"] == "outputs/summary.md"
assert listed.success is True
assert [item["name"] for item in json.loads(listed.content)["items"]] == ["summary.md"]
assert loaded.success is True
assert json.loads(loaded.content)["content"] == "# Summary"
@pytest.mark.asyncio
async def test_user_file_tools_reject_agent_write_to_uploads(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path))
write = ObjectBackedTool(UserFilesWriteTool())
result = await write.invoke({"path": "uploads/notes.txt", "content": "changed"}, context)
assert result.success is False
assert "uploads/ is user-provided input storage" in (result.error or "")
@pytest.mark.asyncio
async def test_user_file_tools_enforce_current_task_namespace(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path), services={"task_id": "task-123"})
write = ObjectBackedTool(UserFilesWriteTool())
current = await write.invoke({"path": "tasks/task-123/drafts/notes.md", "content": "ok"}, context)
direct = await write.invoke({"path": "tasks/notes.md", "content": "bad"}, context)
other = await write.invoke({"path": "tasks/task-456/notes.md", "content": "bad"}, context)
assert current.success is True
assert direct.success is False
assert "tasks/task-123/" in (direct.error or "")
assert other.success is False
assert "tasks/task-123/" in (other.error or "")
@pytest.mark.asyncio
async def test_user_file_tools_allow_shared_context_write(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path), services={"task_id": "task-123"})
write = ObjectBackedTool(UserFilesWriteTool())
read = ObjectBackedTool(UserFilesReadTool())
written = await write.invoke({"path": "shared/profile.json", "content": "{\"name\":\"Alice\"}"}, context)
loaded = await read.invoke({"path": "shared/profile.json"}, context)
assert written.success is True
assert loaded.success is True
assert json.loads(loaded.content)["content"] == "{\"name\":\"Alice\"}"
@pytest.mark.asyncio
async def test_user_file_tools_copy_to_workspace_and_publish_output(tmp_path) -> None:
uploads_dir = tmp_path / "user_files" / "uploads"
uploads_dir.mkdir(parents=True)
(uploads_dir / "get_helm.sh").write_text(": ${USE_SUDO:=\"true\"}\n", encoding="utf-8")
context = ToolContext(
workspace=str(tmp_path),
services={"task_id": "task-123"},
metadata={"run_id": "run-1"},
)
copy_tool = ObjectBackedTool(UserFilesCopyToWorkspaceTool())
publish_tool = ObjectBackedTool(UserFilesPublishOutputTool())
read = ObjectBackedTool(UserFilesReadTool())
copied = await copy_tool.invoke({"path": "uploads/get_helm.sh"}, context)
copied_payload = json.loads(copied.content)
staged = tmp_path / copied_payload["workspace_path"]
staged.write_text(": ${USE_SUDO:=\"false\"}\n", encoding="utf-8")
published = await publish_tool.invoke(
{"source_path": copied_payload["workspace_path"], "target_path": "outputs/get_helm.no-sudo.sh"},
context,
)
original = await read.invoke({"path": "uploads/get_helm.sh"}, context)
output = await read.invoke({"path": "outputs/get_helm.no-sudo.sh"}, context)
assert copied.success is True
assert copied_payload["workspace_path"] == "user-files/tasks/task-123/get_helm.sh"
assert published.success is True
assert json.loads(original.content)["content"] == ": ${USE_SUDO:=\"true\"}\n"
assert json.loads(output.content)["content"] == ": ${USE_SUDO:=\"false\"}\n"
@pytest.mark.asyncio
async def test_user_file_publish_rejects_non_output_target_and_workspace_escape(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path))
source = tmp_path / "result.txt"
source.write_text("done", encoding="utf-8")
outside = tmp_path.parent / "outside.txt"
outside.write_text("outside", encoding="utf-8")
publish_tool = ObjectBackedTool(UserFilesPublishOutputTool())
upload_target = await publish_tool.invoke({"source_path": "result.txt", "target_path": "uploads/result.txt"}, context)
escaped_source = await publish_tool.invoke({"source_path": str(outside), "target_path": "outputs/result.txt"}, context)
assert upload_target.success is False
assert "outputs/" in (upload_target.error or "")
assert escaped_source.success is False
assert "escapes workspace" in (escaped_source.error or "")
@pytest.mark.asyncio
async def test_user_file_tools_reject_internal_workspace_paths(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path))
read = ObjectBackedTool(UserFilesReadTool())
write = ObjectBackedTool(UserFilesWriteTool())
read_result = await read.invoke({"path": "uploads/../../state/secrets.json"}, context)
write_result = await write.invoke({"path": "workspace/debug.txt", "content": "x"}, context)
assert read_result.success is False
assert "Parent-directory traversal" in read_result.error
assert write_result.success is False
assert "Path must be under" in write_result.error
@pytest.mark.asyncio
async def test_user_file_tools_reject_absolute_style_user_paths(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path), services={"task_id": "task-123"})
read = ObjectBackedTool(UserFilesReadTool())
write = ObjectBackedTool(UserFilesWriteTool())
list_files = ObjectBackedTool(UserFilesListTool())
read_result = await read.invoke({"path": "/uploads/input.txt"}, context)
write_result = await write.invoke({"path": "/outputs/result.txt", "content": "x"}, context)
task_write = await write.invoke({"path": "/tasks/task-123/draft.md", "content": "x"}, context)
list_result = await list_files.invoke({"path": "/shared/profile.json"}, context)
for result in (read_result, write_result, task_write, list_result):
assert result.success is False
assert "Absolute paths are not allowed" in (result.error or "")
@pytest.mark.asyncio
async def test_user_file_tools_report_missing_deployed_minio_settings(tmp_path, monkeypatch) -> None:
monkeypatch.delenv("BEAVER_AUTHZ_INTERNAL_TOKEN", raising=False)
monkeypatch.delenv("AUTHZ_INTERNAL_TOKEN", raising=False)
config = BeaverConfig(
authz=AuthzConfig(enabled=True, base_url="http://authz.local"),
backend_identity=BackendIdentityConfig(backend_id="alice", client_id="alice", client_secret="secret"),
)
context = ToolContext(workspace=str(tmp_path), services={"beaver_config": config})
write = ObjectBackedTool(UserFilesWriteTool())
result = await write.invoke({"path": "outputs/summary.md", "content": "# Summary"}, context)
assert result.success is False
assert "AuthZ internal token is not configured" in (result.error or "")

View File

@ -6,6 +6,14 @@ from fastapi.testclient import TestClient
from beaver.interfaces.web.app import create_app
from beaver.services.agent_service import AgentService
from beaver.services.user_file_resolver import UserFileStorageResolver
from beaver.services.user_files import LocalUserFileStorage, UserFileService
def _auth_headers(app, username: str = "alice") -> dict[str, str]:
token = f"test-token-{username}"
app.state.auth_tokens[token] = username
return {"Authorization": f"Bearer {token}"}
def test_workspace_browser_api_manages_workspace_files(tmp_path: Path) -> None:
@ -68,3 +76,145 @@ def test_attachment_file_api_round_trips_uploaded_file(tmp_path: Path) -> None:
assert deleted.status_code == 200
assert deleted.json() == {"ok": True}
assert missing.status_code == 404
def test_user_files_api_uses_virtual_roots_and_hides_workspace(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
headers = _auth_headers(app)
root = client.get("/api/user-files/browse", headers=headers)
status = client.get("/api/user-files/status", headers=headers)
upload = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("hello.txt", b"hello user files", "text/plain")},
headers=headers,
)
uploads = client.get("/api/user-files/browse", params={"path": "uploads"}, headers=headers)
preview = client.get("/api/user-files/preview", params={"path": "uploads/hello.txt"}, headers=headers)
download = client.get("/api/user-files/download", params={"path": "uploads/hello.txt"}, headers=headers)
assert root.status_code == 200
assert [item["name"] for item in root.json()["items"]] == ["uploads", "outputs", "shared", "tasks"]
assert all("bucket" not in item for item in root.json()["items"])
assert status.status_code == 200
assert status.json()["workspace_visible"] is False
assert "base_path" not in status.json()
assert upload.status_code == 200
assert upload.json()["path"] == "uploads/hello.txt"
assert uploads.status_code == 200
assert [item["name"] for item in uploads.json()["items"]] == ["hello.txt"]
assert preview.status_code == 200
assert preview.json()["content"] == "hello user files"
assert download.status_code == 200
assert download.content == b"hello user files"
def test_user_files_api_rejects_invalid_paths_and_root_delete(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
headers = _auth_headers(app)
traversal = client.get("/api/user-files/browse", params={"path": "uploads/../state"}, headers=headers)
unknown_root = client.get("/api/user-files/browse", params={"path": "memory/private.txt"}, headers=headers)
absolute_browse = client.get("/api/user-files/browse", params={"path": "/uploads/input.txt"}, headers=headers)
absolute_download = client.get("/api/user-files/download", params={"path": "/outputs/result.txt"}, headers=headers)
absolute_preview = client.get("/api/user-files/preview", params={"path": "/shared/profile.json"}, headers=headers)
absolute_mkdir = client.post("/api/user-files/mkdir", params={"path": "/tasks/task-123/draft.md"}, headers=headers)
absolute_upload = client.post(
"/api/user-files/upload",
data={"path": "/uploads"},
files={"file": ("input.txt", b"x", "text/plain")},
headers=headers,
)
delete_root = client.delete("/api/user-files/delete", params={"path": "uploads"}, headers=headers)
assert traversal.status_code == 400
assert unknown_root.status_code == 400
assert absolute_browse.status_code == 400
assert absolute_download.status_code == 400
assert absolute_preview.status_code == 400
assert absolute_mkdir.status_code == 400
assert absolute_upload.status_code == 400
assert delete_root.status_code == 400
def test_user_files_api_rejects_anonymous_access_before_storage(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
browse = client.get("/api/user-files/browse")
status = client.get("/api/user-files/status")
upload = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("hello.txt", b"hello user files", "text/plain")},
)
delete = client.delete("/api/user-files/delete", params={"path": "uploads/hello.txt"})
mkdir = client.post("/api/user-files/mkdir", params={"path": "uploads/new"})
assert browse.status_code == 401
assert status.status_code == 401
assert upload.status_code == 401
assert delete.status_code == 401
assert mkdir.status_code == 401
def test_user_files_api_authenticated_request_resolves_identity(tmp_path: Path, monkeypatch) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
seen = []
async def fake_service(self):
seen.append(self.auth_context)
return UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
monkeypatch.setattr(UserFileStorageResolver, "service", fake_service)
with TestClient(app) as client:
alice_headers = _auth_headers(app, "alice")
upload = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("alice.txt", b"alice", "text/plain")},
headers=alice_headers,
)
assert upload.status_code == 200
assert seen
assert seen[0].username == "alice"
assert seen[0].backend_id == "alice"
assert seen[0].storage_namespace == "users/alice"
def test_user_files_api_streams_upload_and_enforces_configured_limit(tmp_path: Path, monkeypatch) -> None:
monkeypatch.setenv("BEAVER_USER_FILES_MAX_UPLOAD_BYTES", "5")
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
headers = _auth_headers(app)
ok_upload = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("small.txt", b"abcde", "text/plain")},
headers=headers,
)
too_large = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("large.txt", b"abcdef", "text/plain")},
headers=headers,
)
preview = client.get("/api/user-files/preview", params={"path": "uploads/small.txt"}, headers=headers)
assert ok_upload.status_code == 200
assert ok_upload.json()["path"] == "uploads/small.txt"
assert too_large.status_code == 413
assert "File too large" in too_large.json()["detail"]
assert preview.status_code == 200
assert preview.json()["content"] == "abcde"

View File

@ -192,6 +192,49 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" },
]
[[package]]
name = "argon2-cffi"
version = "25.1.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "argon2-cffi-bindings" },
]
sdist = { url = "https://files.pythonhosted.org/packages/0e/89/ce5af8a7d472a67cc819d5d998aa8c82c5d860608c4db9f46f1162d7dab9/argon2_cffi-25.1.0.tar.gz", hash = "sha256:694ae5cc8a42f4c4e2bf2ca0e64e51e23a040c6a517a85074683d3959e1346c1", size = 45706, upload-time = "2025-06-03T06:55:32.073Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4f/d3/a8b22fa575b297cd6e3e3b0155c7e25db170edf1c74783d6a31a2490b8d9/argon2_cffi-25.1.0-py3-none-any.whl", hash = "sha256:fdc8b074db390fccb6eb4a3604ae7231f219aa669a2652e0f20e16ba513d5741", size = 14657, upload-time = "2025-06-03T06:55:30.804Z" },
]
[[package]]
name = "argon2-cffi-bindings"
version = "25.1.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cffi" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5c/2d/db8af0df73c1cf454f71b2bbe5e356b8c1f8041c979f505b3d3186e520a9/argon2_cffi_bindings-25.1.0.tar.gz", hash = "sha256:b957f3e6ea4d55d820e40ff76f450952807013d361a65d7f28acc0acbf29229d", size = 1783441, upload-time = "2025-07-30T10:02:05.147Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/60/97/3c0a35f46e52108d4707c44b95cfe2afcafc50800b5450c197454569b776/argon2_cffi_bindings-25.1.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:3d3f05610594151994ca9ccb3c771115bdb4daef161976a266f0dd8aa9996b8f", size = 54393, upload-time = "2025-07-30T10:01:40.97Z" },
{ url = "https://files.pythonhosted.org/packages/9d/f4/98bbd6ee89febd4f212696f13c03ca302b8552e7dbf9c8efa11ea4a388c3/argon2_cffi_bindings-25.1.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8b8efee945193e667a396cbc7b4fb7d357297d6234d30a489905d96caabde56b", size = 29328, upload-time = "2025-07-30T10:01:41.916Z" },
{ url = "https://files.pythonhosted.org/packages/43/24/90a01c0ef12ac91a6be05969f29944643bc1e5e461155ae6559befa8f00b/argon2_cffi_bindings-25.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3c6702abc36bf3ccba3f802b799505def420a1b7039862014a65db3205967f5a", size = 31269, upload-time = "2025-07-30T10:01:42.716Z" },
{ url = "https://files.pythonhosted.org/packages/d4/d3/942aa10782b2697eee7af5e12eeff5ebb325ccfb86dd8abda54174e377e4/argon2_cffi_bindings-25.1.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1c70058c6ab1e352304ac7e3b52554daadacd8d453c1752e547c76e9c99ac44", size = 86558, upload-time = "2025-07-30T10:01:43.943Z" },
{ url = "https://files.pythonhosted.org/packages/0d/82/b484f702fec5536e71836fc2dbc8c5267b3f6e78d2d539b4eaa6f0db8bf8/argon2_cffi_bindings-25.1.0-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e2fd3bfbff3c5d74fef31a722f729bf93500910db650c925c2d6ef879a7e51cb", size = 92364, upload-time = "2025-07-30T10:01:44.887Z" },
{ url = "https://files.pythonhosted.org/packages/c9/c1/a606ff83b3f1735f3759ad0f2cd9e038a0ad11a3de3b6c673aa41c24bb7b/argon2_cffi_bindings-25.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4f9665de60b1b0e99bcd6be4f17d90339698ce954cfd8d9cf4f91c995165a92", size = 85637, upload-time = "2025-07-30T10:01:46.225Z" },
{ url = "https://files.pythonhosted.org/packages/44/b4/678503f12aceb0262f84fa201f6027ed77d71c5019ae03b399b97caa2f19/argon2_cffi_bindings-25.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ba92837e4a9aa6a508c8d2d7883ed5a8f6c308c89a4790e1e447a220deb79a85", size = 91934, upload-time = "2025-07-30T10:01:47.203Z" },
{ url = "https://files.pythonhosted.org/packages/f0/c7/f36bd08ef9bd9f0a9cff9428406651f5937ce27b6c5b07b92d41f91ae541/argon2_cffi_bindings-25.1.0-cp314-cp314t-win32.whl", hash = "sha256:84a461d4d84ae1295871329b346a97f68eade8c53b6ed9a7ca2d7467f3c8ff6f", size = 28158, upload-time = "2025-07-30T10:01:48.341Z" },
{ url = "https://files.pythonhosted.org/packages/b3/80/0106a7448abb24a2c467bf7d527fe5413b7fdfa4ad6d6a96a43a62ef3988/argon2_cffi_bindings-25.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b55aec3565b65f56455eebc9b9f34130440404f27fe21c3b375bf1ea4d8fbae6", size = 32597, upload-time = "2025-07-30T10:01:49.112Z" },
{ url = "https://files.pythonhosted.org/packages/05/b8/d663c9caea07e9180b2cb662772865230715cbd573ba3b5e81793d580316/argon2_cffi_bindings-25.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:87c33a52407e4c41f3b70a9c2d3f6056d88b10dad7695be708c5021673f55623", size = 28231, upload-time = "2025-07-30T10:01:49.92Z" },
{ url = "https://files.pythonhosted.org/packages/1d/57/96b8b9f93166147826da5f90376e784a10582dd39a393c99bb62cfcf52f0/argon2_cffi_bindings-25.1.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:aecba1723ae35330a008418a91ea6cfcedf6d31e5fbaa056a166462ff066d500", size = 54121, upload-time = "2025-07-30T10:01:50.815Z" },
{ url = "https://files.pythonhosted.org/packages/0a/08/a9bebdb2e0e602dde230bdde8021b29f71f7841bd54801bcfd514acb5dcf/argon2_cffi_bindings-25.1.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2630b6240b495dfab90aebe159ff784d08ea999aa4b0d17efa734055a07d2f44", size = 29177, upload-time = "2025-07-30T10:01:51.681Z" },
{ url = "https://files.pythonhosted.org/packages/b6/02/d297943bcacf05e4f2a94ab6f462831dc20158614e5d067c35d4e63b9acb/argon2_cffi_bindings-25.1.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:7aef0c91e2c0fbca6fc68e7555aa60ef7008a739cbe045541e438373bc54d2b0", size = 31090, upload-time = "2025-07-30T10:01:53.184Z" },
{ url = "https://files.pythonhosted.org/packages/c1/93/44365f3d75053e53893ec6d733e4a5e3147502663554b4d864587c7828a7/argon2_cffi_bindings-25.1.0-cp39-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e021e87faa76ae0d413b619fe2b65ab9a037f24c60a1e6cc43457ae20de6dc6", size = 81246, upload-time = "2025-07-30T10:01:54.145Z" },
{ url = "https://files.pythonhosted.org/packages/09/52/94108adfdd6e2ddf58be64f959a0b9c7d4ef2fa71086c38356d22dc501ea/argon2_cffi_bindings-25.1.0-cp39-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e924cfc503018a714f94a49a149fdc0b644eaead5d1f089330399134fa028a", size = 87126, upload-time = "2025-07-30T10:01:55.074Z" },
{ url = "https://files.pythonhosted.org/packages/72/70/7a2993a12b0ffa2a9271259b79cc616e2389ed1a4d93842fac5a1f923ffd/argon2_cffi_bindings-25.1.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c87b72589133f0346a1cb8d5ecca4b933e3c9b64656c9d175270a000e73b288d", size = 80343, upload-time = "2025-07-30T10:01:56.007Z" },
{ url = "https://files.pythonhosted.org/packages/78/9a/4e5157d893ffc712b74dbd868c7f62365618266982b64accab26bab01edc/argon2_cffi_bindings-25.1.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1db89609c06afa1a214a69a462ea741cf735b29a57530478c06eb81dd403de99", size = 86777, upload-time = "2025-07-30T10:01:56.943Z" },
{ url = "https://files.pythonhosted.org/packages/74/cd/15777dfde1c29d96de7f18edf4cc94c385646852e7c7b0320aa91ccca583/argon2_cffi_bindings-25.1.0-cp39-abi3-win32.whl", hash = "sha256:473bcb5f82924b1becbb637b63303ec8d10e84c8d241119419897a26116515d2", size = 27180, upload-time = "2025-07-30T10:01:57.759Z" },
{ url = "https://files.pythonhosted.org/packages/e2/c6/a759ece8f1829d1f162261226fbfd2c6832b3ff7657384045286d2afa384/argon2_cffi_bindings-25.1.0-cp39-abi3-win_amd64.whl", hash = "sha256:a98cd7d17e9f7ce244c0803cad3c23a7d379c301ba618a5fa76a67d116618b98", size = 31715, upload-time = "2025-07-30T10:01:58.56Z" },
{ url = "https://files.pythonhosted.org/packages/42/b9/f8d6fa329ab25128b7e98fd83a3cb34d9db5b059a9847eddb840a0af45dd/argon2_cffi_bindings-25.1.0-cp39-abi3-win_arm64.whl", hash = "sha256:b0fdbcf513833809c882823f98dc2f931cf659d9a1429616ac3adebb49f5db94", size = 27149, upload-time = "2025-07-30T10:01:59.329Z" },
]
[[package]]
name = "attrs"
version = "26.1.0"
@ -244,6 +287,7 @@ dependencies = [
{ name = "httpx" },
{ name = "json-repair" },
{ name = "litellm" },
{ name = "minio" },
{ name = "openai" },
{ name = "pydantic" },
{ name = "python-multipart" },
@ -287,6 +331,7 @@ requires-dist = [
{ name = "lark-oapi", marker = "extra == 'channels'", specifier = ">=1.4.22,<2.0.0" },
{ name = "lark-oapi", marker = "extra == 'feishu'", specifier = ">=1.4.22,<2.0.0" },
{ name = "litellm", specifier = ">=1.79.0,<2.0.0" },
{ name = "minio", specifier = ">=7.2.0,<8.0.0" },
{ name = "openai", specifier = ">=1.79.0,<2.0.0" },
{ name = "pydantic", specifier = ">=2.12.0,<3.0.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.0,<10.0.0" },
@ -1459,6 +1504,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
]
[[package]]
name = "minio"
version = "7.2.20"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "argon2-cffi" },
{ name = "certifi" },
{ name = "pycryptodome" },
{ name = "typing-extensions" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/40/df/6dfc6540f96a74125a11653cce717603fd5b7d0001a8e847b3e54e72d238/minio-7.2.20.tar.gz", hash = "sha256:95898b7a023fbbfde375985aa77e2cd6a0762268db79cf886f002a9ea8e68598", size = 136113, upload-time = "2025-11-27T00:37:15.569Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3e/9a/b697530a882588a84db616580f2ba5d1d515c815e11c30d219145afeec87/minio-7.2.20-py3-none-any.whl", hash = "sha256:eb33dd2fb80e04c3726a76b13241c6be3c4c46f8d81e1d58e757786f6501897e", size = 93751, upload-time = "2025-11-27T00:37:13.993Z" },
]
[[package]]
name = "more-itertools"
version = "11.0.2"