merge: personal user filesystem minio integration
This commit is contained in:
@ -9,6 +9,15 @@ from .skill_view import SkillViewTool, skill_view
|
||||
from .session_search import SessionSearchTool, session_search
|
||||
from .terminal import ExecuteCodeTool, ProcessTool, TerminalTool
|
||||
from .utility import ClarifyTool, DelegateTool, SendMessageTool, SpawnTool, TodoTool
|
||||
from .user_files import (
|
||||
UserFilesCopyToWorkspaceTool,
|
||||
UserFilesDeleteTool,
|
||||
UserFilesListTool,
|
||||
UserFilesMkdirTool,
|
||||
UserFilesPublishOutputTool,
|
||||
UserFilesReadTool,
|
||||
UserFilesWriteTool,
|
||||
)
|
||||
from .web import WebFetchTool, WebSearchTool
|
||||
|
||||
__all__ = [
|
||||
@ -30,6 +39,13 @@ __all__ = [
|
||||
"SessionSearchTool",
|
||||
"TerminalTool",
|
||||
"TodoTool",
|
||||
"UserFilesCopyToWorkspaceTool",
|
||||
"UserFilesDeleteTool",
|
||||
"UserFilesListTool",
|
||||
"UserFilesMkdirTool",
|
||||
"UserFilesPublishOutputTool",
|
||||
"UserFilesReadTool",
|
||||
"UserFilesWriteTool",
|
||||
"ClarifyTool",
|
||||
"WebFetchTool",
|
||||
"WebSearchTool",
|
||||
|
||||
@ -14,7 +14,7 @@ from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
from pathlib import Path
|
||||
from pathlib import Path, PurePosixPath
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
@ -24,6 +24,7 @@ MAX_READ_CHARS = 120_000
|
||||
MAX_SEARCH_RESULTS = 200
|
||||
MAX_SEARCH_FILE_BYTES = 2_000_000
|
||||
MAX_SEARCH_FILES = 5_000
|
||||
USER_FILE_VIRTUAL_ROOTS = {"uploads", "outputs", "shared", "tasks"}
|
||||
SKIP_DIR_NAMES = {
|
||||
".git",
|
||||
".hg",
|
||||
@ -161,9 +162,28 @@ def _workspace_root(workspace: str | None) -> Path:
|
||||
return root
|
||||
|
||||
|
||||
def _virtual_user_file_error(user_path: str | None) -> str | None:
|
||||
raw = str(user_path or ".").replace("\\", "/").strip()
|
||||
if not raw or raw in {".", "./"}:
|
||||
return None
|
||||
try:
|
||||
parts = [part for part in PurePosixPath(raw.strip("/")).parts if part not in ("", ".")]
|
||||
except TypeError:
|
||||
return None
|
||||
if parts and parts[0] in USER_FILE_VIRTUAL_ROOTS:
|
||||
return (
|
||||
f"{user_path} is a personal agent file system path, not a workspace path. "
|
||||
"Use user_files_read or user_files_copy_to_workspace for reads; use "
|
||||
"user_files_write for shared/tasks files or user_files_publish_output for outputs."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_existing_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
|
||||
"""Resolve a user path and ensure the real target stays inside workspace."""
|
||||
|
||||
if error := _virtual_user_file_error(user_path):
|
||||
raise WorkspacePathError(error)
|
||||
root = _workspace_root(workspace)
|
||||
raw_path = Path(user_path or ".").expanduser()
|
||||
candidate = raw_path if raw_path.is_absolute() else root / raw_path
|
||||
@ -178,6 +198,8 @@ def _resolve_existing_path(workspace: str | None, user_path: str | None) -> tupl
|
||||
|
||||
|
||||
def _resolve_writable_path(workspace: str | None, user_path: str | None) -> tuple[Path, Path]:
|
||||
if error := _virtual_user_file_error(user_path):
|
||||
raise WorkspacePathError(error)
|
||||
root = _workspace_root(workspace)
|
||||
if not user_path or not str(user_path).strip():
|
||||
raise WorkspacePathError("path is required")
|
||||
|
||||
389
app-instance/backend/beaver/tools/builtins/user_files.py
Normal file
389
app-instance/backend/beaver/tools/builtins/user_files.py
Normal file
@ -0,0 +1,389 @@
|
||||
"""Agent-facing tools for the user-visible file system."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from beaver.foundation.config.loader import load_config
|
||||
from beaver.services.user_file_resolver import UserFileStorageResolver, build_file_auth_context
|
||||
from beaver.services.user_files import AgentUserFilePolicy, UserFileError, UserFilePathError, UserFileService
|
||||
|
||||
|
||||
MAX_WORKSPACE_STAGE_BYTES = 50 * 1024 * 1024
|
||||
|
||||
|
||||
def _json_result(success: bool, **payload: Any) -> str:
|
||||
return json.dumps({"success": success, **payload}, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def _service(workspace: str | None, services: dict[str, Any] | None = None) -> UserFileService:
|
||||
if not workspace:
|
||||
raise UserFileError("workspace is not configured for user file tools")
|
||||
config = (services or {}).get("beaver_config")
|
||||
if config is None:
|
||||
config = load_config(workspace=workspace)
|
||||
backend_id = config.backend_identity.backend_id.strip() or config.backend_identity.client_id.strip() or "agent"
|
||||
auth_context = build_file_auth_context(
|
||||
username=backend_id,
|
||||
config=config,
|
||||
user_id=(services or {}).get("user_id"),
|
||||
auth_source="beaver-agent-runtime",
|
||||
)
|
||||
return await UserFileStorageResolver(
|
||||
config=config,
|
||||
workspace=Path(workspace),
|
||||
auth_context=auth_context,
|
||||
).service()
|
||||
|
||||
|
||||
def _agent_policy(services: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None) -> AgentUserFilePolicy:
|
||||
payload = services or {}
|
||||
meta = metadata or {}
|
||||
task_id = str(payload.get("task_id") or meta.get("task_id") or "").strip() or None
|
||||
fallback = str(payload.get("run_id") or meta.get("run_id") or meta.get("session_id") or "interactive")
|
||||
return AgentUserFilePolicy(task_id=task_id, fallback_scope=fallback)
|
||||
|
||||
|
||||
def _workspace_root(workspace: str | None) -> Path:
|
||||
if not workspace:
|
||||
raise UserFilePathError("workspace is not configured for user file tools")
|
||||
root = Path(workspace).expanduser().resolve()
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
return root
|
||||
|
||||
|
||||
def _resolve_workspace_source(workspace: str | None, source_path: str) -> tuple[Path, Path]:
|
||||
root = _workspace_root(workspace)
|
||||
if not source_path or not str(source_path).strip():
|
||||
raise UserFilePathError("source_path is required")
|
||||
raw = Path(str(source_path)).expanduser()
|
||||
candidate = raw if raw.is_absolute() else root / raw
|
||||
resolved = candidate.resolve(strict=True)
|
||||
try:
|
||||
resolved.relative_to(root)
|
||||
except ValueError as exc:
|
||||
raise UserFilePathError("source_path escapes workspace") from exc
|
||||
if not resolved.is_file():
|
||||
raise UserFilePathError("source_path must be a file")
|
||||
return root, resolved
|
||||
|
||||
|
||||
def _resolve_workspace_destination(workspace: str | None, target_path: str) -> tuple[Path, Path]:
|
||||
root = _workspace_root(workspace)
|
||||
if not target_path or not str(target_path).strip():
|
||||
raise UserFilePathError("workspace_path is required")
|
||||
raw = Path(str(target_path)).expanduser()
|
||||
if raw.is_absolute():
|
||||
raise UserFilePathError("workspace_path must be relative")
|
||||
candidate = (root / raw).resolve()
|
||||
try:
|
||||
candidate.relative_to(root)
|
||||
except ValueError as exc:
|
||||
raise UserFilePathError("workspace_path escapes workspace") from exc
|
||||
return root, candidate
|
||||
|
||||
|
||||
def _relative_path(root: Path, path: Path) -> str:
|
||||
return path.relative_to(root).as_posix()
|
||||
|
||||
|
||||
USER_FILES_LIST_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"description": "User file path under uploads, outputs, shared, or tasks. Empty path lists the virtual roots.",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
USER_FILES_READ_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {"type": "string", "description": "User file path to read."},
|
||||
"max_bytes": {
|
||||
"type": "integer",
|
||||
"default": 120000,
|
||||
"minimum": 1,
|
||||
"maximum": 1000000,
|
||||
"description": "Maximum bytes to return in model context.",
|
||||
},
|
||||
},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
USER_FILES_WRITE_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {"type": "string", "description": "User file path to create or replace."},
|
||||
"content": {"type": "string", "description": "Text content to write."},
|
||||
"content_type": {"type": "string", "default": "text/plain"},
|
||||
},
|
||||
"required": ["path", "content"],
|
||||
}
|
||||
|
||||
USER_FILES_DELETE_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {"path": {"type": "string", "description": "User file or directory path to delete."}},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
USER_FILES_MKDIR_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {"path": {"type": "string", "description": "User file directory path to create."}},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
USER_FILES_COPY_TO_WORKSPACE_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Readable user file path under uploads, outputs, shared, or an authorized tasks namespace.",
|
||||
},
|
||||
"workspace_path": {
|
||||
"type": "string",
|
||||
"description": "Optional relative workspace destination. Defaults to user-files/tasks/{task_id}/<filename> or user-files/runs/<scope>/<filename>.",
|
||||
},
|
||||
},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
USER_FILES_PUBLISH_OUTPUT_PARAMETERS: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"source_path": {
|
||||
"type": "string",
|
||||
"description": "Workspace file path to publish. Absolute paths are allowed only if they stay inside the workspace.",
|
||||
},
|
||||
"target_path": {
|
||||
"type": "string",
|
||||
"description": "Output path under outputs/, such as outputs/report.md.",
|
||||
},
|
||||
"content_type": {
|
||||
"type": "string",
|
||||
"description": "Optional content type. If omitted, Beaver guesses from the target filename.",
|
||||
},
|
||||
},
|
||||
"required": ["source_path", "target_path"],
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesListTool:
|
||||
name: str = "user_files_list"
|
||||
description: str = (
|
||||
"List files and folders in the personal agent file system. Use the virtual roots only: "
|
||||
"uploads for files the user provides to the agent, outputs for agent-generated results, "
|
||||
"shared for reusable user/agent reference material, and tasks for files bound to a specific task. "
|
||||
"An empty path lists the four roots; this tool never exposes MinIO buckets, credentials, or internal workspace paths."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = True
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_LIST_PARAMETERS))
|
||||
|
||||
async def execute(self, *, path: str = "", workspace: str | None = None, services: dict[str, Any] | None = None) -> str:
|
||||
try:
|
||||
return _json_result(True, **await (await _service(workspace, services)).browse(path))
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesReadTool:
|
||||
name: str = "user_files_read"
|
||||
description: str = (
|
||||
"Read a bounded text preview from the personal agent file system. Use this to inspect user-provided "
|
||||
"files in uploads, long-lived shared material in shared, task files in tasks, or generated outputs in outputs. "
|
||||
"The path must stay under uploads, outputs, shared, or tasks; internal workspace and MinIO implementation paths are hidden."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = True
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_READ_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str,
|
||||
max_bytes: int = 120000,
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
path = _agent_policy(services, metadata).validate_read(path)
|
||||
limit = max(1, min(int(max_bytes), 1_000_000))
|
||||
return _json_result(True, **await (await _service(workspace, services)).preview(path, max_bytes=limit))
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesWriteTool:
|
||||
name: str = "user_files_write"
|
||||
description: str = (
|
||||
"Create or replace a text file in the personal agent file system. Store agent-generated deliverables "
|
||||
"under outputs, reusable long-lived context under shared, and task-bound files under the current "
|
||||
"tasks/{task_id}/ namespace. Never write to uploads; uploaded files are immutable agent inputs. "
|
||||
"For modifications to uploaded files, copy them to the workspace, edit there, then publish to outputs."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_WRITE_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str,
|
||||
content: str,
|
||||
content_type: str = "text/plain",
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
path = _agent_policy(services, metadata).validate_write(path)
|
||||
return _json_result(True, **await (await _service(workspace, services)).write_file(path, content, content_type=content_type))
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesDeleteTool:
|
||||
name: str = "user_files_delete"
|
||||
description: str = (
|
||||
"Agent deletion is disabled for the personal agent file system. User-visible file deletion is owned by "
|
||||
"the Files page or user-side APIs; agents should use task/workspace cleanup instead."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_DELETE_PARAMETERS))
|
||||
|
||||
async def execute(self, *, path: str, workspace: str | None = None, services: dict[str, Any] | None = None) -> str:
|
||||
try:
|
||||
_agent_policy(services).validate_delete(path)
|
||||
return _json_result(False, path=path, deleted=False)
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesMkdirTool:
|
||||
name: str = "user_files_mkdir"
|
||||
description: str = (
|
||||
"Create a subfolder in the personal agent file system under uploads, outputs, shared, or tasks. "
|
||||
"Use folders to organize agent outputs, reusable shared material, or current task-specific files. "
|
||||
"Do not create folders under uploads because uploads is user-owned input storage."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_MKDIR_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str,
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
path = _agent_policy(services, metadata).validate_mkdir(path)
|
||||
return _json_result(True, **await (await _service(workspace, services)).mkdir(path))
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesCopyToWorkspaceTool:
|
||||
name: str = "user_files_copy_to_workspace"
|
||||
description: str = (
|
||||
"Copy a readable file from the personal agent file system into the internal workspace before editing, "
|
||||
"running, or validating it. Use this for user-uploaded files under uploads: the original upload remains "
|
||||
"unchanged, and the returned workspace_path can be used with workspace tools like read_file or patch_file."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_COPY_TO_WORKSPACE_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
path: str,
|
||||
workspace_path: str | None = None,
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
policy = _agent_policy(services, metadata)
|
||||
path = policy.validate_read(path)
|
||||
content = await (await _service(workspace, services)).download(path)
|
||||
if content.size > MAX_WORKSPACE_STAGE_BYTES:
|
||||
raise UserFilePathError(f"File is too large to copy to workspace (max {MAX_WORKSPACE_STAGE_BYTES} bytes)")
|
||||
default_path = f"user-files/{policy.task_namespace}/{Path(path).name}"
|
||||
root, destination = _resolve_workspace_destination(workspace, workspace_path or default_path)
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
destination.write_bytes(content.content)
|
||||
return _json_result(
|
||||
True,
|
||||
path=path,
|
||||
workspace_path=_relative_path(root, destination),
|
||||
bytes=len(content.content),
|
||||
content_type=content.content_type,
|
||||
)
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
except OSError as exc:
|
||||
return _json_result(False, error=str(exc), path=path)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UserFilesPublishOutputTool:
|
||||
name: str = "user_files_publish_output"
|
||||
description: str = (
|
||||
"Publish a validated workspace file to the personal agent file system under outputs/. Use this after "
|
||||
"staging and editing files in the workspace. Publishing never writes to uploads, and it hides MinIO "
|
||||
"bucket, namespace, and credential details from the agent."
|
||||
)
|
||||
toolset: str = "user_files"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(default_factory=lambda: dict(USER_FILES_PUBLISH_OUTPUT_PARAMETERS))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
*,
|
||||
source_path: str,
|
||||
target_path: str,
|
||||
content_type: str | None = None,
|
||||
workspace: str | None = None,
|
||||
services: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
root, source = _resolve_workspace_source(workspace, source_path)
|
||||
normalized_target = target_path.strip().strip("/")
|
||||
if not normalized_target.startswith("outputs/"):
|
||||
raise UserFilePathError("Published output target must be under outputs/")
|
||||
guessed_type, _ = mimetypes.guess_type(normalized_target)
|
||||
raw = source.read_bytes()
|
||||
entry = await (await _service(workspace, services)).write_file(
|
||||
normalized_target,
|
||||
raw,
|
||||
content_type=content_type or guessed_type or "application/octet-stream",
|
||||
)
|
||||
return _json_result(
|
||||
True,
|
||||
source_path=_relative_path(root, source),
|
||||
target_path=normalized_target,
|
||||
bytes=len(raw),
|
||||
**entry,
|
||||
)
|
||||
except UserFileError as exc:
|
||||
return _json_result(False, error=str(exc), source_path=source_path, target_path=target_path)
|
||||
except OSError as exc:
|
||||
return _json_result(False, error=str(exc), source_path=source_path, target_path=target_path)
|
||||
Reference in New Issue
Block a user