Files
beaver_project/app-instance/backend/beaver/tools/builtins/user_files.py
2026-06-03 12:06:34 +08:00

390 lines
16 KiB
Python

"""Agent-facing tools for the user-visible file system."""
from __future__ import annotations
from dataclasses import dataclass, field
import json
import mimetypes
from pathlib import Path
from typing import Any
from beaver.foundation.config.loader import load_config
from beaver.services.user_file_resolver import UserFileStorageResolver, build_file_auth_context
from beaver.services.user_files import AgentUserFilePolicy, UserFileError, UserFilePathError, UserFileService
MAX_WORKSPACE_STAGE_BYTES = 50 * 1024 * 1024
def _json_result(success: bool, **payload: Any) -> str:
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
async def _service(workspace: str | None, services: dict[str, Any] | None = None) -> UserFileService:
if not workspace:
raise UserFileError("workspace is not configured for user file tools")
config = (services or {}).get("beaver_config")
if config is None:
config = load_config(workspace=workspace)
backend_id = config.backend_identity.backend_id.strip() or config.backend_identity.client_id.strip() or "agent"
auth_context = build_file_auth_context(
username=backend_id,
config=config,
user_id=(services or {}).get("user_id"),
auth_source="beaver-agent-runtime",
)
return await UserFileStorageResolver(
config=config,
workspace=Path(workspace),
auth_context=auth_context,
).service()
def _agent_policy(services: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None) -> AgentUserFilePolicy:
payload = services or {}
meta = metadata or {}
task_id = str(payload.get("task_id") or meta.get("task_id") or "").strip() or None
fallback = str(payload.get("run_id") or meta.get("run_id") or meta.get("session_id") or "interactive")
return AgentUserFilePolicy(task_id=task_id, fallback_scope=fallback)
def _workspace_root(workspace: str | None) -> Path:
if not workspace:
raise UserFilePathError("workspace is not configured for user file tools")
root = Path(workspace).expanduser().resolve()
root.mkdir(parents=True, exist_ok=True)
return root
def _resolve_workspace_source(workspace: str | None, source_path: str) -> tuple[Path, Path]:
root = _workspace_root(workspace)
if not source_path or not str(source_path).strip():
raise UserFilePathError("source_path is required")
raw = Path(str(source_path)).expanduser()
candidate = raw if raw.is_absolute() else root / raw
resolved = candidate.resolve(strict=True)
try:
resolved.relative_to(root)
except ValueError as exc:
raise UserFilePathError("source_path escapes workspace") from exc
if not resolved.is_file():
raise UserFilePathError("source_path must be a file")
return root, resolved
def _resolve_workspace_destination(workspace: str | None, target_path: str) -> tuple[Path, Path]:
root = _workspace_root(workspace)
if not target_path or not str(target_path).strip():
raise UserFilePathError("workspace_path is required")
raw = Path(str(target_path)).expanduser()
if raw.is_absolute():
raise UserFilePathError("workspace_path must be relative")
candidate = (root / raw).resolve()
try:
candidate.relative_to(root)
except ValueError as exc:
raise UserFilePathError("workspace_path escapes workspace") from exc
return root, candidate
def _relative_path(root: Path, path: Path) -> str:
return path.relative_to(root).as_posix()
USER_FILES_LIST_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {
"type": "string",
"default": "",
"description": "User file path under uploads, outputs, shared, or tasks. Empty path lists the virtual roots.",
}
},
}
USER_FILES_READ_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {"type": "string", "description": "User file path to read."},
"max_bytes": {
"type": "integer",
"default": 120000,
"minimum": 1,
"maximum": 1000000,
"description": "Maximum bytes to return in model context.",
},
},
"required": ["path"],
}
USER_FILES_WRITE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {"type": "string", "description": "User file path to create or replace."},
"content": {"type": "string", "description": "Text content to write."},
"content_type": {"type": "string", "default": "text/plain"},
},
"required": ["path", "content"],
}
USER_FILES_DELETE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {"path": {"type": "string", "description": "User file or directory path to delete."}},
"required": ["path"],
}
USER_FILES_MKDIR_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {"path": {"type": "string", "description": "User file directory path to create."}},
"required": ["path"],
}
USER_FILES_COPY_TO_WORKSPACE_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Readable user file path under uploads, outputs, shared, or an authorized tasks namespace.",
},
"workspace_path": {
"type": "string",
"description": "Optional relative workspace destination. Defaults to user-files/tasks/{task_id}/<filename> or user-files/runs/<scope>/<filename>.",
},
},
"required": ["path"],
}
USER_FILES_PUBLISH_OUTPUT_PARAMETERS: dict[str, Any] = {
"type": "object",
"properties": {
"source_path": {
"type": "string",
"description": "Workspace file path to publish. Absolute paths are allowed only if they stay inside the workspace.",
},
"target_path": {
"type": "string",
"description": "Output path under outputs/, such as outputs/report.md.",
},
"content_type": {
"type": "string",
"description": "Optional content type. If omitted, Beaver guesses from the target filename.",
},
},
"required": ["source_path", "target_path"],
}
@dataclass(slots=True)
class UserFilesListTool:
name: str = "user_files_list"
description: str = (
"List files and folders in the personal agent file system. Use the virtual roots only: "
"uploads for files the user provides to the agent, outputs for agent-generated results, "
"shared for reusable user/agent reference material, and tasks for files bound to a specific task. "
"An empty path lists the four roots; this tool never exposes MinIO buckets, credentials, or internal workspace paths."
)
toolset: str = "user_files"
always_available: bool = True
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_LIST_PARAMETERS))
async def execute(self, *, path: str = "", workspace: str | None = None, services: dict[str, Any] | None = None) -> str:
try:
return _json_result(True, **await (await _service(workspace, services)).browse(path))
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesReadTool:
name: str = "user_files_read"
description: str = (
"Read a bounded text preview from the personal agent file system. Use this to inspect user-provided "
"files in uploads, long-lived shared material in shared, task files in tasks, or generated outputs in outputs. "
"The path must stay under uploads, outputs, shared, or tasks; internal workspace and MinIO implementation paths are hidden."
)
toolset: str = "user_files"
always_available: bool = True
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_READ_PARAMETERS))
async def execute(
self,
*,
path: str,
max_bytes: int = 120000,
workspace: str | None = None,
services: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> str:
try:
path = _agent_policy(services, metadata).validate_read(path)
limit = max(1, min(int(max_bytes), 1_000_000))
return _json_result(True, **await (await _service(workspace, services)).preview(path, max_bytes=limit))
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesWriteTool:
name: str = "user_files_write"
description: str = (
"Create or replace a text file in the personal agent file system. Store agent-generated deliverables "
"under outputs, reusable long-lived context under shared, and task-bound files under the current "
"tasks/{task_id}/ namespace. Never write to uploads; uploaded files are immutable agent inputs. "
"For modifications to uploaded files, copy them to the workspace, edit there, then publish to outputs."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_WRITE_PARAMETERS))
async def execute(
self,
*,
path: str,
content: str,
content_type: str = "text/plain",
workspace: str | None = None,
services: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> str:
try:
path = _agent_policy(services, metadata).validate_write(path)
return _json_result(True, **await (await _service(workspace, services)).write_file(path, content, content_type=content_type))
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesDeleteTool:
name: str = "user_files_delete"
description: str = (
"Agent deletion is disabled for the personal agent file system. User-visible file deletion is owned by "
"the Files page or user-side APIs; agents should use task/workspace cleanup instead."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_DELETE_PARAMETERS))
async def execute(self, *, path: str, workspace: str | None = None, services: dict[str, Any] | None = None) -> str:
try:
_agent_policy(services).validate_delete(path)
return _json_result(False, path=path, deleted=False)
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesMkdirTool:
name: str = "user_files_mkdir"
description: str = (
"Create a subfolder in the personal agent file system under uploads, outputs, shared, or tasks. "
"Use folders to organize agent outputs, reusable shared material, or current task-specific files. "
"Do not create folders under uploads because uploads is user-owned input storage."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_MKDIR_PARAMETERS))
async def execute(
self,
*,
path: str,
workspace: str | None = None,
services: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> str:
try:
path = _agent_policy(services, metadata).validate_mkdir(path)
return _json_result(True, **await (await _service(workspace, services)).mkdir(path))
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesCopyToWorkspaceTool:
name: str = "user_files_copy_to_workspace"
description: str = (
"Copy a readable file from the personal agent file system into the internal workspace before editing, "
"running, or validating it. Use this for user-uploaded files under uploads: the original upload remains "
"unchanged, and the returned workspace_path can be used with workspace tools like read_file or patch_file."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_COPY_TO_WORKSPACE_PARAMETERS))
async def execute(
self,
*,
path: str,
workspace_path: str | None = None,
workspace: str | None = None,
services: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> str:
try:
policy = _agent_policy(services, metadata)
path = policy.validate_read(path)
content = await (await _service(workspace, services)).download(path)
if content.size > MAX_WORKSPACE_STAGE_BYTES:
raise UserFilePathError(f"File is too large to copy to workspace (max {MAX_WORKSPACE_STAGE_BYTES} bytes)")
default_path = f"user-files/{policy.task_namespace}/{Path(path).name}"
root, destination = _resolve_workspace_destination(workspace, workspace_path or default_path)
destination.parent.mkdir(parents=True, exist_ok=True)
destination.write_bytes(content.content)
return _json_result(
True,
path=path,
workspace_path=_relative_path(root, destination),
bytes=len(content.content),
content_type=content.content_type,
)
except UserFileError as exc:
return _json_result(False, error=str(exc), path=path)
except OSError as exc:
return _json_result(False, error=str(exc), path=path)
@dataclass(slots=True)
class UserFilesPublishOutputTool:
name: str = "user_files_publish_output"
description: str = (
"Publish a validated workspace file to the personal agent file system under outputs/. Use this after "
"staging and editing files in the workspace. Publishing never writes to uploads, and it hides MinIO "
"bucket, namespace, and credential details from the agent."
)
toolset: str = "user_files"
always_available: bool = False
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_PUBLISH_OUTPUT_PARAMETERS))
async def execute(
self,
*,
source_path: str,
target_path: str,
content_type: str | None = None,
workspace: str | None = None,
services: dict[str, Any] | None = None,
) -> str:
try:
root, source = _resolve_workspace_source(workspace, source_path)
normalized_target = target_path.strip().strip("/")
if not normalized_target.startswith("outputs/"):
raise UserFilePathError("Published output target must be under outputs/")
guessed_type, _ = mimetypes.guess_type(normalized_target)
raw = source.read_bytes()
entry = await (await _service(workspace, services)).write_file(
normalized_target,
raw,
content_type=content_type or guessed_type or "application/octet-stream",
)
return _json_result(
True,
source_path=_relative_path(root, source),
target_path=normalized_target,
bytes=len(raw),
**entry,
)
except UserFileError as exc:
return _json_result(False, error=str(exc), source_path=source_path, target_path=target_path)
except OSError as exc:
return _json_result(False, error=str(exc), source_path=source_path, target_path=target_path)