feat: integrate MinIO-backed user filesystem

This commit is contained in:
Codex
2026-06-03 12:06:34 +08:00
parent a27560102b
commit ffa1249403
56 changed files with 4810 additions and 116 deletions

View File

@ -199,4 +199,5 @@ def test_load_config_adds_managed_local_mcp_servers(tmp_path) -> None:
assert local.kind == "local"
assert local.category == "filesystem"
assert local.managed is True
assert local.display_name == "个人智能体文件系统工具"
assert "beaver.interfaces.mcp.tools_server" in local.args

View File

@ -6,7 +6,7 @@ import os
from pathlib import Path
from beaver.tools import ObjectBackedTool, ToolContext
from beaver.tools.builtins import ListDirectoryTool, ReadFileTool, SearchFilesTool
from beaver.tools.builtins import ListDirectoryTool, PatchFileTool, ReadFileTool, SearchFilesTool, WriteFileTool
def _run_tool(tool, arguments: dict, workspace: Path):
@ -127,3 +127,23 @@ def test_read_file_rejects_binary_files(tmp_path: Path) -> None:
assert payload["success"] is False
assert "binary" in payload["error"]
def test_workspace_tools_reject_user_file_virtual_paths(tmp_path: Path) -> None:
workspace = tmp_path / "workspace"
workspace.mkdir()
read = _run_tool(ReadFileTool(), {"path": "uploads/get_helm.sh"}, workspace)
listed = _run_tool(ListDirectoryTool(), {"path": "outputs"}, workspace)
written = _run_tool(WriteFileTool(), {"path": "shared/profile.json", "content": "{}"}, workspace)
patched = _run_tool(
PatchFileTool(),
{"path": "tasks/task-123/draft.md", "old_text": "a", "new_text": "b"},
workspace,
)
for result in (read, listed, written, patched):
payload = _payload(result)
assert result.success is False
assert payload["success"] is False
assert "personal agent file system path" in payload["error"]
assert "user_files_read" in payload["error"]

View File

@ -0,0 +1,22 @@
from __future__ import annotations
from beaver.interfaces.mcp.tools_server import _category_tools
def test_local_filesystem_mcp_exposes_personal_user_file_tools_only(tmp_path) -> None:
tools, _context = _category_tools("filesystem", tmp_path)
names = [tool.spec.name for tool in tools]
assert names == [
"user_files_list",
"user_files_read",
"user_files_write",
"user_files_mkdir",
"user_files_copy_to_workspace",
"user_files_publish_output",
]
assert "read_file" not in names
assert "search_files" not in names
assert "list_directory" not in names
assert all("personal agent file system" in tool.spec.description for tool in tools)

View File

@ -0,0 +1,153 @@
from __future__ import annotations
from io import BytesIO
import pytest
from beaver.services.user_files import (
LocalUserFileStorage,
MinIOStorageConfig,
MinIOUserFileStorage,
UserFileNotFoundError,
UserFilePathError,
UserFileSizeError,
UserFileService,
normalize_user_path,
)
def test_normalize_user_path_accepts_fixed_roots() -> None:
assert normalize_user_path("uploads/readme.txt", allow_root=False) == "uploads/readme.txt"
assert normalize_user_path("outputs/report.md", allow_root=False) == "outputs/report.md"
assert normalize_user_path("tasks/task-123/draft.md", allow_root=False) == "tasks/task-123/draft.md"
assert normalize_user_path("", allow_root=True) == ""
@pytest.mark.parametrize(
"path",
[
"../secret.txt",
"/uploads/input.txt",
"/outputs/result.txt",
"/shared/profile.json",
"/tasks/task-123/draft.md",
"uploads/../state/config.json",
"memory/private.txt",
"uploads/.internal",
"",
],
)
def test_normalize_user_path_rejects_invalid_paths(path: str) -> None:
with pytest.raises(UserFilePathError):
normalize_user_path(path, allow_root=False)
@pytest.mark.asyncio
async def test_user_file_service_root_and_round_trip(tmp_path) -> None:
service = UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
root = await service.browse("")
uploaded = await service.upload(
"uploads",
"hello.txt",
b"hello user files",
content_type="text/plain",
)
uploads = await service.browse("uploads")
preview = await service.preview("uploads/hello.txt")
downloaded = await service.download("uploads/hello.txt")
deleted = await service.delete("uploads/hello.txt")
assert [item["name"] for item in root["items"]] == ["uploads", "outputs", "shared", "tasks"]
assert uploaded["path"] == "uploads/hello.txt"
assert uploaded["content_type"] == "text/plain"
assert [item["name"] for item in uploads["items"]] == ["hello.txt"]
assert preview["content"] == "hello user files"
assert downloaded.content == b"hello user files"
assert deleted is True
@pytest.mark.asyncio
async def test_user_file_service_stream_upload_and_size_limit(tmp_path) -> None:
service = UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
uploaded = await service.upload_stream(
"uploads",
"streamed.txt",
BytesIO(b"streamed user file"),
content_type="text/plain",
max_bytes=1024,
part_size=4,
)
preview = await service.preview("uploads/streamed.txt")
assert uploaded["path"] == "uploads/streamed.txt"
assert uploaded["size"] == len(b"streamed user file")
assert preview["content"] == "streamed user file"
with pytest.raises(UserFileSizeError):
await service.upload_stream(
"uploads",
"too-large.txt",
BytesIO(b"abcdef"),
content_type="text/plain",
max_bytes=5,
part_size=2,
)
with pytest.raises(UserFileNotFoundError):
await service.preview("uploads/too-large.txt")
@pytest.mark.asyncio
async def test_user_file_service_rejects_root_delete_and_traversal(tmp_path) -> None:
service = UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
with pytest.raises(UserFilePathError):
await service.delete("uploads")
with pytest.raises(UserFilePathError):
await service.upload("../workspace", "hello.txt", b"x", content_type="text/plain")
@pytest.mark.asyncio
async def test_user_file_service_creates_nested_directories(tmp_path) -> None:
service = UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
created = await service.mkdir("tasks/task-123/references")
tasks = await service.browse("tasks/task-123")
assert created["path"] == "tasks/task-123/references"
assert created["type"] == "directory"
assert [item["name"] for item in tasks["items"]] == ["references"]
def test_minio_storage_maps_virtual_paths_under_namespace() -> None:
storage = object.__new__(MinIOUserFileStorage)
storage.config = MinIOStorageConfig(
endpoint="minio.local:9000",
access_key="alice-access",
secret_key="alice-secret",
bucket="beaver-user-files",
namespace="users/alice",
)
assert storage._object_name("uploads/report.pdf") == "users/alice/uploads/report.pdf"
assert storage._object_name("tasks/task-123/result.json") == "users/alice/tasks/task-123/result.json"
assert storage._user_path("users/alice/outputs/summary.md") == "outputs/summary.md"
def test_minio_storage_rejects_paths_that_escape_namespace() -> None:
storage = object.__new__(MinIOUserFileStorage)
storage.config = MinIOStorageConfig(
endpoint="minio.local:9000",
access_key="alice-access",
secret_key="alice-secret",
bucket="beaver-user-files",
namespace="users/alice",
)
with pytest.raises(UserFilePathError):
storage._object_name("uploads/../state/config.json")
with pytest.raises(UserFilePathError):
storage._user_path("users/bob/uploads/secret.txt")

View File

@ -0,0 +1,177 @@
from __future__ import annotations
import json
import pytest
from beaver.foundation.config.schema import AuthzConfig, BackendIdentityConfig, BeaverConfig
from beaver.tools.base import ObjectBackedTool, ToolContext
from beaver.tools.builtins import (
UserFilesCopyToWorkspaceTool,
UserFilesListTool,
UserFilesPublishOutputTool,
UserFilesReadTool,
UserFilesWriteTool,
)
@pytest.mark.asyncio
async def test_user_file_tools_write_read_and_list(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path))
write = ObjectBackedTool(UserFilesWriteTool())
read = ObjectBackedTool(UserFilesReadTool())
list_files = ObjectBackedTool(UserFilesListTool())
written = await write.invoke(
{"path": "outputs/summary.md", "content": "# Summary", "content_type": "text/markdown"},
context,
)
listed = await list_files.invoke({"path": "outputs"}, context)
loaded = await read.invoke({"path": "outputs/summary.md"}, context)
assert written.success is True
assert json.loads(written.content)["path"] == "outputs/summary.md"
assert listed.success is True
assert [item["name"] for item in json.loads(listed.content)["items"]] == ["summary.md"]
assert loaded.success is True
assert json.loads(loaded.content)["content"] == "# Summary"
@pytest.mark.asyncio
async def test_user_file_tools_reject_agent_write_to_uploads(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path))
write = ObjectBackedTool(UserFilesWriteTool())
result = await write.invoke({"path": "uploads/notes.txt", "content": "changed"}, context)
assert result.success is False
assert "uploads/ is user-provided input storage" in (result.error or "")
@pytest.mark.asyncio
async def test_user_file_tools_enforce_current_task_namespace(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path), services={"task_id": "task-123"})
write = ObjectBackedTool(UserFilesWriteTool())
current = await write.invoke({"path": "tasks/task-123/drafts/notes.md", "content": "ok"}, context)
direct = await write.invoke({"path": "tasks/notes.md", "content": "bad"}, context)
other = await write.invoke({"path": "tasks/task-456/notes.md", "content": "bad"}, context)
assert current.success is True
assert direct.success is False
assert "tasks/task-123/" in (direct.error or "")
assert other.success is False
assert "tasks/task-123/" in (other.error or "")
@pytest.mark.asyncio
async def test_user_file_tools_allow_shared_context_write(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path), services={"task_id": "task-123"})
write = ObjectBackedTool(UserFilesWriteTool())
read = ObjectBackedTool(UserFilesReadTool())
written = await write.invoke({"path": "shared/profile.json", "content": "{\"name\":\"Alice\"}"}, context)
loaded = await read.invoke({"path": "shared/profile.json"}, context)
assert written.success is True
assert loaded.success is True
assert json.loads(loaded.content)["content"] == "{\"name\":\"Alice\"}"
@pytest.mark.asyncio
async def test_user_file_tools_copy_to_workspace_and_publish_output(tmp_path) -> None:
uploads_dir = tmp_path / "user_files" / "uploads"
uploads_dir.mkdir(parents=True)
(uploads_dir / "get_helm.sh").write_text(": ${USE_SUDO:=\"true\"}\n", encoding="utf-8")
context = ToolContext(
workspace=str(tmp_path),
services={"task_id": "task-123"},
metadata={"run_id": "run-1"},
)
copy_tool = ObjectBackedTool(UserFilesCopyToWorkspaceTool())
publish_tool = ObjectBackedTool(UserFilesPublishOutputTool())
read = ObjectBackedTool(UserFilesReadTool())
copied = await copy_tool.invoke({"path": "uploads/get_helm.sh"}, context)
copied_payload = json.loads(copied.content)
staged = tmp_path / copied_payload["workspace_path"]
staged.write_text(": ${USE_SUDO:=\"false\"}\n", encoding="utf-8")
published = await publish_tool.invoke(
{"source_path": copied_payload["workspace_path"], "target_path": "outputs/get_helm.no-sudo.sh"},
context,
)
original = await read.invoke({"path": "uploads/get_helm.sh"}, context)
output = await read.invoke({"path": "outputs/get_helm.no-sudo.sh"}, context)
assert copied.success is True
assert copied_payload["workspace_path"] == "user-files/tasks/task-123/get_helm.sh"
assert published.success is True
assert json.loads(original.content)["content"] == ": ${USE_SUDO:=\"true\"}\n"
assert json.loads(output.content)["content"] == ": ${USE_SUDO:=\"false\"}\n"
@pytest.mark.asyncio
async def test_user_file_publish_rejects_non_output_target_and_workspace_escape(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path))
source = tmp_path / "result.txt"
source.write_text("done", encoding="utf-8")
outside = tmp_path.parent / "outside.txt"
outside.write_text("outside", encoding="utf-8")
publish_tool = ObjectBackedTool(UserFilesPublishOutputTool())
upload_target = await publish_tool.invoke({"source_path": "result.txt", "target_path": "uploads/result.txt"}, context)
escaped_source = await publish_tool.invoke({"source_path": str(outside), "target_path": "outputs/result.txt"}, context)
assert upload_target.success is False
assert "outputs/" in (upload_target.error or "")
assert escaped_source.success is False
assert "escapes workspace" in (escaped_source.error or "")
@pytest.mark.asyncio
async def test_user_file_tools_reject_internal_workspace_paths(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path))
read = ObjectBackedTool(UserFilesReadTool())
write = ObjectBackedTool(UserFilesWriteTool())
read_result = await read.invoke({"path": "uploads/../../state/secrets.json"}, context)
write_result = await write.invoke({"path": "workspace/debug.txt", "content": "x"}, context)
assert read_result.success is False
assert "Parent-directory traversal" in read_result.error
assert write_result.success is False
assert "Path must be under" in write_result.error
@pytest.mark.asyncio
async def test_user_file_tools_reject_absolute_style_user_paths(tmp_path) -> None:
context = ToolContext(workspace=str(tmp_path), services={"task_id": "task-123"})
read = ObjectBackedTool(UserFilesReadTool())
write = ObjectBackedTool(UserFilesWriteTool())
list_files = ObjectBackedTool(UserFilesListTool())
read_result = await read.invoke({"path": "/uploads/input.txt"}, context)
write_result = await write.invoke({"path": "/outputs/result.txt", "content": "x"}, context)
task_write = await write.invoke({"path": "/tasks/task-123/draft.md", "content": "x"}, context)
list_result = await list_files.invoke({"path": "/shared/profile.json"}, context)
for result in (read_result, write_result, task_write, list_result):
assert result.success is False
assert "Absolute paths are not allowed" in (result.error or "")
@pytest.mark.asyncio
async def test_user_file_tools_report_missing_deployed_minio_settings(tmp_path, monkeypatch) -> None:
monkeypatch.delenv("BEAVER_AUTHZ_INTERNAL_TOKEN", raising=False)
monkeypatch.delenv("AUTHZ_INTERNAL_TOKEN", raising=False)
config = BeaverConfig(
authz=AuthzConfig(enabled=True, base_url="http://authz.local"),
backend_identity=BackendIdentityConfig(backend_id="alice", client_id="alice", client_secret="secret"),
)
context = ToolContext(workspace=str(tmp_path), services={"beaver_config": config})
write = ObjectBackedTool(UserFilesWriteTool())
result = await write.invoke({"path": "outputs/summary.md", "content": "# Summary"}, context)
assert result.success is False
assert "AuthZ internal token is not configured" in (result.error or "")

View File

@ -6,6 +6,14 @@ from fastapi.testclient import TestClient
from beaver.interfaces.web.app import create_app
from beaver.services.agent_service import AgentService
from beaver.services.user_file_resolver import UserFileStorageResolver
from beaver.services.user_files import LocalUserFileStorage, UserFileService
def _auth_headers(app, username: str = "alice") -> dict[str, str]:
token = f"test-token-{username}"
app.state.auth_tokens[token] = username
return {"Authorization": f"Bearer {token}"}
def test_workspace_browser_api_manages_workspace_files(tmp_path: Path) -> None:
@ -68,3 +76,145 @@ def test_attachment_file_api_round_trips_uploaded_file(tmp_path: Path) -> None:
assert deleted.status_code == 200
assert deleted.json() == {"ok": True}
assert missing.status_code == 404
def test_user_files_api_uses_virtual_roots_and_hides_workspace(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
headers = _auth_headers(app)
root = client.get("/api/user-files/browse", headers=headers)
status = client.get("/api/user-files/status", headers=headers)
upload = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("hello.txt", b"hello user files", "text/plain")},
headers=headers,
)
uploads = client.get("/api/user-files/browse", params={"path": "uploads"}, headers=headers)
preview = client.get("/api/user-files/preview", params={"path": "uploads/hello.txt"}, headers=headers)
download = client.get("/api/user-files/download", params={"path": "uploads/hello.txt"}, headers=headers)
assert root.status_code == 200
assert [item["name"] for item in root.json()["items"]] == ["uploads", "outputs", "shared", "tasks"]
assert all("bucket" not in item for item in root.json()["items"])
assert status.status_code == 200
assert status.json()["workspace_visible"] is False
assert "base_path" not in status.json()
assert upload.status_code == 200
assert upload.json()["path"] == "uploads/hello.txt"
assert uploads.status_code == 200
assert [item["name"] for item in uploads.json()["items"]] == ["hello.txt"]
assert preview.status_code == 200
assert preview.json()["content"] == "hello user files"
assert download.status_code == 200
assert download.content == b"hello user files"
def test_user_files_api_rejects_invalid_paths_and_root_delete(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
headers = _auth_headers(app)
traversal = client.get("/api/user-files/browse", params={"path": "uploads/../state"}, headers=headers)
unknown_root = client.get("/api/user-files/browse", params={"path": "memory/private.txt"}, headers=headers)
absolute_browse = client.get("/api/user-files/browse", params={"path": "/uploads/input.txt"}, headers=headers)
absolute_download = client.get("/api/user-files/download", params={"path": "/outputs/result.txt"}, headers=headers)
absolute_preview = client.get("/api/user-files/preview", params={"path": "/shared/profile.json"}, headers=headers)
absolute_mkdir = client.post("/api/user-files/mkdir", params={"path": "/tasks/task-123/draft.md"}, headers=headers)
absolute_upload = client.post(
"/api/user-files/upload",
data={"path": "/uploads"},
files={"file": ("input.txt", b"x", "text/plain")},
headers=headers,
)
delete_root = client.delete("/api/user-files/delete", params={"path": "uploads"}, headers=headers)
assert traversal.status_code == 400
assert unknown_root.status_code == 400
assert absolute_browse.status_code == 400
assert absolute_download.status_code == 400
assert absolute_preview.status_code == 400
assert absolute_mkdir.status_code == 400
assert absolute_upload.status_code == 400
assert delete_root.status_code == 400
def test_user_files_api_rejects_anonymous_access_before_storage(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
browse = client.get("/api/user-files/browse")
status = client.get("/api/user-files/status")
upload = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("hello.txt", b"hello user files", "text/plain")},
)
delete = client.delete("/api/user-files/delete", params={"path": "uploads/hello.txt"})
mkdir = client.post("/api/user-files/mkdir", params={"path": "uploads/new"})
assert browse.status_code == 401
assert status.status_code == 401
assert upload.status_code == 401
assert delete.status_code == 401
assert mkdir.status_code == 401
def test_user_files_api_authenticated_request_resolves_identity(tmp_path: Path, monkeypatch) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
seen = []
async def fake_service(self):
seen.append(self.auth_context)
return UserFileService(LocalUserFileStorage(tmp_path / "user-files"))
monkeypatch.setattr(UserFileStorageResolver, "service", fake_service)
with TestClient(app) as client:
alice_headers = _auth_headers(app, "alice")
upload = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("alice.txt", b"alice", "text/plain")},
headers=alice_headers,
)
assert upload.status_code == 200
assert seen
assert seen[0].username == "alice"
assert seen[0].backend_id == "alice"
assert seen[0].storage_namespace == "users/alice"
def test_user_files_api_streams_upload_and_enforces_configured_limit(tmp_path: Path, monkeypatch) -> None:
monkeypatch.setenv("BEAVER_USER_FILES_MAX_UPLOAD_BYTES", "5")
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
headers = _auth_headers(app)
ok_upload = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("small.txt", b"abcde", "text/plain")},
headers=headers,
)
too_large = client.post(
"/api/user-files/upload",
data={"path": "uploads"},
files={"file": ("large.txt", b"abcdef", "text/plain")},
headers=headers,
)
preview = client.get("/api/user-files/preview", params={"path": "uploads/small.txt"}, headers=headers)
assert ok_upload.status_code == 200
assert ok_upload.json()["path"] == "uploads/small.txt"
assert too_large.status_code == 413
assert "File too large" in too_large.json()["detail"]
assert preview.status_code == 200
assert preview.json()["content"] == "abcde"