From 7f238a3168efa28d3b6254e4d0291f117c565986 Mon Sep 17 00:00:00 2001 From: Ivan087 Date: Tue, 12 May 2026 16:15:14 +0800 Subject: [PATCH 01/15] refactor: full-stack restructure with multi-tenancy, workspace management, and K8s diagnostics - Add Workspace domain (entity, repository, service, handler, DTO) - Add multi-tenant K8s client with tenant binding and quota management - Add K8s diagnostics client (instance diagnostics) - Add authorization middleware (authz package) - Restructure frontend to feature-based architecture (features/) - Add User Management page in configuration - Add AccessDenied page and route guards - Refactor shared components (form inputs, layout, UI) - Update Tailwind config for new design system - Add comprehensive documentation (docs/, tasks/, plans) - Improve cluster service with better kubeconfig handling - Add tests for crypto, config, helm client, tenant binding --- .gitignore | 15 + AGENTS.md | 47 + CLAUDE.md | 47 + Makefile | 88 +- Multi-Tenant Kubeconfig.md | 127 ++ README.md | 497 +++-- backend/Dockerfile | 7 +- backend/cmd/api/main.go | 122 +- backend/config/bootstrap.example.json | 11 +- backend/docker-compose.yml | 22 +- .../adapter/input/http/dto/artifact_dto.go | 18 +- .../adapter/input/http/dto/auth_dto.go | 68 +- .../adapter/input/http/dto/cluster_dto.go | 61 +- .../adapter/input/http/dto/converter.go | 30 +- .../adapter/input/http/dto/error_dto.go | 1 - .../adapter/input/http/dto/instance_dto.go | 104 +- .../adapter/input/http/dto/monitoring_dto.go | 91 +- .../adapter/input/http/dto/registry_dto.go | 55 +- .../input/http/rest/artifact_handler.go | 49 +- .../adapter/input/http/rest/auth_handler.go | 218 ++- .../input/http/rest/cluster_handler.go | 14 + .../input/http/rest/instance_handler.go | 334 +++- .../input/http/rest/registry_handler.go | 10 + .../internal/adapter/input/http/rest/utils.go | 3 +- .../input/http/rest/workspace_handler.go | 165 ++ backend/internal/adapter/output/factory.go | 103 +- .../adapter/output/helm/real/helm_client.go | 69 +- .../output/helm/real/helm_client_test.go | 45 + .../adapter/output/k8s/diagnostics_client.go | 294 +++ .../adapter/output/k8s/tenant_client.go | 388 ++++ .../adapter/output/k8s/tenant_client_test.go | 172 ++ .../adapter/output/k8s/tenant_mock.go | 36 + .../output/oci/mock/oci_client_mock.go | 160 +- .../adapter/output/oci/real/oci_client.go | 434 ++++- .../mock/cluster_repository_mock.go | 37 +- .../mock/instance_repository_mock.go | 31 +- .../mock/registry_repository_mock.go | 35 +- .../persistence/mock/user_repository_mock.go | 27 +- .../mock/workspace_repository_mock.go | 162 ++ .../postgres/cluster_repository.go | 321 ++-- .../adapter/output/persistence/postgres/db.go | 127 ++ .../postgres/instance_repository.go | 389 ++-- .../postgres/registry_repository.go | 191 +- .../persistence/postgres/user_repository.go | 36 +- .../postgres/workspace_repository.go | 345 ++++ backend/internal/bootstrap/config.go | 200 +- backend/internal/bootstrap/config_test.go | 103 ++ backend/internal/bootstrap/seeder.go | 33 + backend/internal/domain/entity/artifact.go | 227 ++- backend/internal/domain/entity/cluster.go | 37 +- backend/internal/domain/entity/errors.go | 18 +- backend/internal/domain/entity/instance.go | 2 + .../domain/entity/instance_diagnostics.go | 70 + backend/internal/domain/entity/metrics.go | 89 +- backend/internal/domain/entity/registry.go | 16 +- .../internal/domain/entity/tenant_binding.go | 123 ++ .../domain/entity/tenant_binding_test.go | 38 + backend/internal/domain/entity/user.go | 28 +- backend/internal/domain/entity/workspace.go | 150 ++ .../domain/repository/cluster_repository.go | 11 +- .../internal/domain/repository/helm_client.go | 15 +- .../repository/instance_diagnostics_client.go | 11 + .../domain/repository/instance_repository.go | 13 +- .../domain/repository/metrics_client.go | 3 +- .../internal/domain/repository/oci_client.go | 21 +- .../domain/repository/registry_repository.go | 11 +- .../domain/repository/tenant_kube_client.go | 15 + .../domain/repository/user_repository.go | 11 +- .../domain/repository/workspace_repository.go | 26 + .../domain/service/artifact_service.go | 43 +- .../internal/domain/service/auth_service.go | 367 +++- .../domain/service/cluster_service.go | 77 +- .../domain/service/instance_service.go | 305 ++- .../domain/service/instance_service_test.go | 74 +- .../domain/service/monitoring_service.go | 35 +- .../internal/domain/service/quota_quantity.go | 54 + .../domain/service/registry_service.go | 79 +- .../domain/service/workspace_service.go | 308 ++++ backend/internal/pkg/authz/authz.go | 144 ++ backend/internal/pkg/crypto/crypto_test.go | 3 +- backend/internal/pkg/jwt/jwt.go | 97 +- backend/scripts/docker-quick-start.sh | 5 +- backend/scripts/generate-bootstrap-config.sh | 79 +- backend/scripts/quick-start-production.sh | 5 +- backend/scripts/test-all-modes.sh | 7 +- database.md | 598 ++++++ docker-compose.yml | 87 +- docs/bug-report.md | 164 ++ docs/bugs-user-a.md | 92 + docs/bugs-user-b.md | 149 ++ docs/bugs-user-c.md | 109 ++ docs/security/bugs-security.md | 284 +++ docs/test-scenarios.md | 1640 +++++++++++++++++ docs/test-users.json | 79 + docs/test2-quota.md | 156 ++ docs/test2-report.md | 141 ++ docs/test2-ui-overflow.md | 271 +++ docs/test2-values-priority.md | 110 ++ docs/user-guide.md | 752 ++++++++ frontend/package-lock.json | 13 +- frontend/package.json | 3 +- frontend/src/api/index.ts | 113 +- frontend/src/app/App.tsx | 13 +- frontend/src/app/constants/navigation.tsx | 180 +- frontend/src/app/providers/AuthContext.ts | 18 +- frontend/src/app/providers/AuthProvider.tsx | 83 +- frontend/src/app/providers/auth-model.ts | 149 ++ frontend/src/app/routes/AccessDeniedPage.tsx | 24 + frontend/src/app/routes/AppRoutes.tsx | 129 +- frontend/src/app/routes/RouteGuard.tsx | 13 +- .../instances/components/DiagnosticsModal.tsx | 244 +++ .../instances/components/EntriesModal.tsx | 86 +- .../instances/components/InstanceCard.tsx | 87 +- .../instances/components/ModifyModal.tsx | 70 +- .../pages/InstancesManagementPage.tsx | 56 +- .../registries/components/LaunchModal.tsx | 507 ++++- .../registries/components/TagCard.tsx | 67 +- .../registries/pages/ArtifactBrowserPage.tsx | 83 +- .../pages/RegistriesBrowserPage.tsx | 59 +- frontend/src/features/auth/pages/AuthPage.tsx | 189 +- .../clusters/components/ClusterForm.tsx | 120 +- .../clusters/components/ClusterList.tsx | 83 +- .../clusters/pages/ClusterConfigPage.tsx | 50 +- .../registries/components/RegistryForm.tsx | 42 +- .../registries/components/RegistryList.tsx | 53 +- .../registries/pages/RegistryConfigPage.tsx | 25 +- .../users/pages/UserManagementPage.tsx | 502 +++++ frontend/src/features/home/pages/HomePage.tsx | 365 ++-- .../components/ClusterMonitorCard.tsx | 78 +- .../clusters/components/NodeMetricCard.tsx | 48 +- .../clusters/pages/MonitoringClustersPage.tsx | 2 +- frontend/src/index.css | 23 +- .../components/data-display/StatsCard.tsx | 6 +- .../shared/components/feedback/EmptyState.tsx | 6 +- .../components/feedback/EmptyStateSimple.tsx | 6 +- .../components/feedback/LoadingState.tsx | 7 +- .../src/shared/components/form/Checkbox.tsx | 5 +- .../shared/components/form/DropdownSelect.tsx | 25 +- .../src/shared/components/form/FormField.tsx | 4 +- frontend/src/shared/components/form/Input.tsx | 9 +- .../components/form/SchemaFormGenerator.tsx | 72 +- .../components/form/SearchableSelect.tsx | 31 +- .../src/shared/components/form/Textarea.tsx | 5 +- .../src/shared/components/layout/AppShell.tsx | 4 +- .../src/shared/components/layout/Modal.tsx | 16 +- .../shared/components/layout/PageHeader.tsx | 7 +- .../layout/SidebarLayout/SidebarLayout.tsx | 2 +- .../layout/SidebarLayout/SidebarNav.tsx | 20 +- .../src/shared/components/layout/Tabs.tsx | 6 +- .../components/layout/TopNavLayout/TopNav.tsx | 23 +- .../layout/TopNavLayout/TopNavLayout.tsx | 3 + frontend/src/shared/components/ui/Badge.tsx | 18 +- frontend/src/shared/components/ui/Button.tsx | 15 +- frontend/src/shared/components/ui/Card.tsx | 9 +- frontend/tailwind.config.js | 36 +- plans.md | 15 + tasks/lessons.md | 8 + tasks/session-notes.md | 30 + tasks/todo.md | 36 + ...titenant_rbac_api_contract.cpython-312.pyc | Bin 0 -> 23503 bytes ...itenant_rbac_ui_playwright.cpython-312.pyc | Bin 0 -> 6189 bytes test/chart_values_yaml_api_contract.py | 77 + test/current-platform-smoke.sh | 104 ++ test/docker-compose.verify.yml | 61 + test/frontend-interactions-audit.py | 176 ++ test/frontend-playwright-smoke.py | 96 + .../instance_card_action_layout_playwright.py | 160 ++ test/multitenant_rbac_api_contract.py | 384 ++++ test/multitenant_rbac_ui_playwright.py | 117 ++ test/readme-deployment-refresh.sh | 17 + test/user_namespace_quota_api_contract.py | 198 ++ test/vllm_k3s_deploy_smoke.py | 283 +++ 172 files changed, 15703 insertions(+), 3162 deletions(-) create mode 100644 AGENTS.md create mode 100644 CLAUDE.md create mode 100644 Multi-Tenant Kubeconfig.md create mode 100644 backend/internal/adapter/input/http/rest/workspace_handler.go create mode 100644 backend/internal/adapter/output/helm/real/helm_client_test.go create mode 100644 backend/internal/adapter/output/k8s/diagnostics_client.go create mode 100644 backend/internal/adapter/output/k8s/tenant_client.go create mode 100644 backend/internal/adapter/output/k8s/tenant_client_test.go create mode 100644 backend/internal/adapter/output/k8s/tenant_mock.go create mode 100644 backend/internal/adapter/output/persistence/mock/workspace_repository_mock.go create mode 100644 backend/internal/adapter/output/persistence/postgres/workspace_repository.go create mode 100644 backend/internal/bootstrap/config_test.go create mode 100644 backend/internal/domain/entity/instance_diagnostics.go create mode 100644 backend/internal/domain/entity/tenant_binding.go create mode 100644 backend/internal/domain/entity/tenant_binding_test.go create mode 100644 backend/internal/domain/entity/workspace.go create mode 100644 backend/internal/domain/repository/instance_diagnostics_client.go create mode 100644 backend/internal/domain/repository/tenant_kube_client.go create mode 100644 backend/internal/domain/repository/workspace_repository.go create mode 100644 backend/internal/domain/service/quota_quantity.go create mode 100644 backend/internal/domain/service/workspace_service.go create mode 100644 backend/internal/pkg/authz/authz.go create mode 100644 database.md create mode 100644 docs/bug-report.md create mode 100644 docs/bugs-user-a.md create mode 100644 docs/bugs-user-b.md create mode 100644 docs/bugs-user-c.md create mode 100644 docs/security/bugs-security.md create mode 100644 docs/test-scenarios.md create mode 100644 docs/test-users.json create mode 100644 docs/test2-quota.md create mode 100644 docs/test2-report.md create mode 100644 docs/test2-ui-overflow.md create mode 100644 docs/test2-values-priority.md create mode 100644 docs/user-guide.md create mode 100644 frontend/src/app/providers/auth-model.ts create mode 100644 frontend/src/app/routes/AccessDeniedPage.tsx create mode 100644 frontend/src/features/artifact/instances/components/DiagnosticsModal.tsx create mode 100644 frontend/src/features/configuration/users/pages/UserManagementPage.tsx create mode 100644 plans.md create mode 100644 tasks/lessons.md create mode 100644 tasks/session-notes.md create mode 100644 tasks/todo.md create mode 100644 test/__pycache__/multitenant_rbac_api_contract.cpython-312.pyc create mode 100644 test/__pycache__/multitenant_rbac_ui_playwright.cpython-312.pyc create mode 100644 test/chart_values_yaml_api_contract.py create mode 100644 test/current-platform-smoke.sh create mode 100644 test/docker-compose.verify.yml create mode 100644 test/frontend-interactions-audit.py create mode 100644 test/frontend-playwright-smoke.py create mode 100644 test/instance_card_action_layout_playwright.py create mode 100644 test/multitenant_rbac_api_contract.py create mode 100644 test/multitenant_rbac_ui_playwright.py create mode 100644 test/readme-deployment-refresh.sh create mode 100644 test/user_namespace_quota_api_contract.py create mode 100644 test/vllm_k3s_deploy_smoke.py diff --git a/.gitignore b/.gitignore index fea6c0e..639c669 100644 --- a/.gitignore +++ b/.gitignore @@ -60,4 +60,19 @@ redis_data/ tmp/ temp/ *.tmp +.fuse_hidden* + +# Debug scripts +debug_*.py +test_*.py + +# Next.js build output (including stale caches) +frontend/.next*/ +frontend/next-env.d.ts + +# Compiled binary +backend/ocdp-backend + +# IDE / AI temp +.claude/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..bbe281f --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,47 @@ +# Project Overview + + +# 🤖 Claude Code Agentic Workflow (Strictly Follow) + +作为本项目的资深 AI 研发工程师,你在执行任何指令时,必须严格遵守以下核心原则与工作流。 + +## Ⅰ. 核心原则 (Core Principles) +1. **No Laziness (拒绝偷懒):** 必须找到问题的根本原因 (Root Causes)。禁止使用临时补丁 (Hack/Temporary fixes)。保持高级工程师的标准。 +2. **Demand Elegance (苛求优雅):** 对于非琐碎的修改,停下来问自己:“有更优雅的实现方式吗?”如果你发现之前的代码很 Hacky,在掌握全局上下文后,用优雅的方式重构它(但不要过度设计)。 +3. **Test-Driven Quality (测试驱动质量):** 在项目根目录维护 `test/` 文件夹,存放结构化测试脚本。每个脚本顶部必须用注释注明其覆盖的功能范围。当代码发生重大变更时,必须执行 `test/` 下所有相关测试脚本并确保通过,方可视为任务完成。 + +## Ⅱ. 任务管理闭环 (Task Management Protocol) +你必须通过读写 `tasks/` 目录下的文件来管理你的工作状态: +1. **Plan First:** 在开始实现前,将计划写入 `tasks/todo.md`,必须是可勾选的 Checkbox 列表。 +2. **Verify Plan:** 在动手写代码前,先和我(User)确认这个计划是否合理。 +3. **Track Progress:** 边做边在 `todo.md` 中打勾标记完成状态。 +4. **Explain Changes:** 在每执行完一个步骤时,给出高层次的代码修改总结。 +5. **Document Results:** 任务完成后,在 `todo.md` 中补充 Review 总结。 +6. **Capture Lessons:** 如果被我纠正了错误,立刻更新 `tasks/lessons.md`。 + +## Ⅲ. 工作流编排 (Workflow Orchestration) + +### 1. 强制规划模式 (Plan Node Default) +- 对于任何非琐碎任务(涉及 3 个以上步骤或架构决策),必须进入规划模式。 +- 提前写好详细的 Spec 以减少歧义。 +- **一旦情况不对劲(报错连连),立即停止盲目推进**,重新评估并制定新计划。 + +### 2. 经验自我迭代 (Self-Improvement Loop) +- 在每次会话开始时,主动读取 `tasks/lessons.md`,复习该项目的历史教训。 +- 针对犯过的错误,为自己制定防止再次踩坑的规则。 +- 无情地迭代这些经验,直到你的错误率显著下降。 + +### 3. 自主修复 Bug (Autonomous Bug Fixing) +- 当我给你一个 Bug 报告时:**直接去修。不要等我手把手教你。** +- 主动利用 CLI 权限去查看日志、定位错误代码、运行失败的测试用例,然后解决它。 +- 要求对用户“零上下文切换”——你去修复 CI 测试,不需要我告诉你具体该怎么做。 + +### 4. 交付前绝对验证 (Verification Before Done) +- **永远不要在没有证明代码能跑的情况下,把任务标记为“完成”。** +- 问自己:“Staff Engineer(主任工程师)会批准这段代码吗?” +- 必须主动运行测试(例如 `go test`, `npm run build`),检查日志,并向我证明正确性。 +- 对比修改前后的 Diff,确保行为符合预期。 + +### 5. 复杂问题拆解 (Agentic Strategy) +- 遇到极其复杂的问题时,不要试图在一个终端窗口内硬扛。 +- 拆解子任务,主动进行探索性研究,针对焦点问题逐一击破。 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..bbe281f --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,47 @@ +# Project Overview + + +# 🤖 Claude Code Agentic Workflow (Strictly Follow) + +作为本项目的资深 AI 研发工程师,你在执行任何指令时,必须严格遵守以下核心原则与工作流。 + +## Ⅰ. 核心原则 (Core Principles) +1. **No Laziness (拒绝偷懒):** 必须找到问题的根本原因 (Root Causes)。禁止使用临时补丁 (Hack/Temporary fixes)。保持高级工程师的标准。 +2. **Demand Elegance (苛求优雅):** 对于非琐碎的修改,停下来问自己:“有更优雅的实现方式吗?”如果你发现之前的代码很 Hacky,在掌握全局上下文后,用优雅的方式重构它(但不要过度设计)。 +3. **Test-Driven Quality (测试驱动质量):** 在项目根目录维护 `test/` 文件夹,存放结构化测试脚本。每个脚本顶部必须用注释注明其覆盖的功能范围。当代码发生重大变更时,必须执行 `test/` 下所有相关测试脚本并确保通过,方可视为任务完成。 + +## Ⅱ. 任务管理闭环 (Task Management Protocol) +你必须通过读写 `tasks/` 目录下的文件来管理你的工作状态: +1. **Plan First:** 在开始实现前,将计划写入 `tasks/todo.md`,必须是可勾选的 Checkbox 列表。 +2. **Verify Plan:** 在动手写代码前,先和我(User)确认这个计划是否合理。 +3. **Track Progress:** 边做边在 `todo.md` 中打勾标记完成状态。 +4. **Explain Changes:** 在每执行完一个步骤时,给出高层次的代码修改总结。 +5. **Document Results:** 任务完成后,在 `todo.md` 中补充 Review 总结。 +6. **Capture Lessons:** 如果被我纠正了错误,立刻更新 `tasks/lessons.md`。 + +## Ⅲ. 工作流编排 (Workflow Orchestration) + +### 1. 强制规划模式 (Plan Node Default) +- 对于任何非琐碎任务(涉及 3 个以上步骤或架构决策),必须进入规划模式。 +- 提前写好详细的 Spec 以减少歧义。 +- **一旦情况不对劲(报错连连),立即停止盲目推进**,重新评估并制定新计划。 + +### 2. 经验自我迭代 (Self-Improvement Loop) +- 在每次会话开始时,主动读取 `tasks/lessons.md`,复习该项目的历史教训。 +- 针对犯过的错误,为自己制定防止再次踩坑的规则。 +- 无情地迭代这些经验,直到你的错误率显著下降。 + +### 3. 自主修复 Bug (Autonomous Bug Fixing) +- 当我给你一个 Bug 报告时:**直接去修。不要等我手把手教你。** +- 主动利用 CLI 权限去查看日志、定位错误代码、运行失败的测试用例,然后解决它。 +- 要求对用户“零上下文切换”——你去修复 CI 测试,不需要我告诉你具体该怎么做。 + +### 4. 交付前绝对验证 (Verification Before Done) +- **永远不要在没有证明代码能跑的情况下,把任务标记为“完成”。** +- 问自己:“Staff Engineer(主任工程师)会批准这段代码吗?” +- 必须主动运行测试(例如 `go test`, `npm run build`),检查日志,并向我证明正确性。 +- 对比修改前后的 Diff,确保行为符合预期。 + +### 5. 复杂问题拆解 (Agentic Strategy) +- 遇到极其复杂的问题时,不要试图在一个终端窗口内硬扛。 +- 拆解子任务,主动进行探索性研究,针对焦点问题逐一击破。 diff --git a/Makefile b/Makefile index e181899..f9f5d1a 100644 --- a/Makefile +++ b/Makefile @@ -1,56 +1,68 @@ # ============================================================ -# OCDP stack orchestration Makefile -# run-2: 构建前端静态资源 + 启动 nginx(统一入口)和 backend 栈 -# clean-2: 清理 run-2 产生的容器 / 卷 / 网络 +# OCDP root orchestration Makefile # ============================================================ SHELL := /bin/bash COMPOSE_BIN ?= docker compose - ROOT_COMPOSE := docker-compose.yml -BACKEND_COMPOSE := backend/docker-compose.yml -BACKEND_PROFILE := backend +COMPOSE := $(COMPOSE_BIN) -f $(ROOT_COMPOSE) -COMPOSE_STACK := $(COMPOSE_BIN) -f $(ROOT_COMPOSE) -f $(BACKEND_COMPOSE) --profile $(BACKEND_PROFILE) -COMPOSE_STACK_ALL := $(COMPOSE_BIN) -f $(ROOT_COMPOSE) -f $(BACKEND_COMPOSE) -STACK_ENV := ADAPTER_MODE=production BACKEND_BUILD_CONTEXT=$(abspath backend) BACKEND_BUILD_DOCKERFILE=$(abspath backend/Dockerfile) BACKEND_MOCK_BUILD_DOCKERFILE=$(abspath backend/Dockerfile.mock) INIT_DB_SQL_PATH=$(abspath backend/scripts/init-db.sql) +.PHONY: help install run-2 clean-2 docker-dev docker-prod docker-up docker-down docker-logs docker-ps test -STACK_SERVICES := postgres backend nginx +.DEFAULT_GOAL := help -.PHONY: run-2 clean-2 build-backend +help: + @echo "" + @echo "OCDP commands" + @echo "────────────────────────────────────────" + @echo " make install Install local Go / frontend dependencies" + @echo " make run-2 Build and start full Docker Compose stack in background" + @echo " make docker-dev Alias of run-2, kept for old docs / muscle memory" + @echo " make docker-prod Alias of run-2" + @echo " make docker-up Alias of run-2" + @echo " make docker-down Stop containers, keep volumes" + @echo " make clean-2 Stop containers and remove project volumes" + @echo " make docker-logs Follow Compose logs" + @echo " make docker-ps Show Compose service status" + @echo " make test Run structured verification script" + @echo "" + @echo "Default local ports: web=18080, https=18443, backend=18081, postgres=15432" + @echo "Override with WEB_HTTP_PORT / WEB_HTTPS_PORT / BACKEND_PORT / POSTGRES_PORT." + @echo "" + +install: + @echo "→ Downloading backend modules" + @cd backend && go mod download + @echo "→ Installing frontend dependencies" + @cd frontend && npm ci run-2: - @echo "═══════════════════════════════════════════════" - @echo "🚀 run-2: rebuild static assets + start web gateway stack" - @echo "═══════════════════════════════════════════════" + @echo "→ Building and starting OCDP stack" + @$(COMPOSE) up --build -d postgres backend nginx @echo "" - @export COMPOSE_PROJECT_NAME=ocdp && \ - export ADAPTER_MODE=production && \ - export BACKEND_BUILD_CONTEXT=$(abspath backend) && \ - export BACKEND_BUILD_DOCKERFILE=$(abspath backend/Dockerfile) && \ - export BACKEND_MOCK_BUILD_DOCKERFILE=$(abspath backend/Dockerfile.mock) && \ - export INIT_DB_SQL_PATH=$(abspath backend/scripts/init-db.sql) && \ - echo "→ Rebuilding frontend static assets" && \ - $(COMPOSE_STACK) run --rm frontend-build && \ - echo "" && \ - echo "→ Rebuilding backend image" && \ - $(COMPOSE_STACK) build backend && \ - echo "" && \ - echo "→ Bringing up backend + nginx services" && \ - $(COMPOSE_STACK) up -d $(STACK_SERVICES) + @$(COMPOSE) ps @echo "" - @echo "✅ Services online:" - @echo "═══════════════════════════════════════════════" + @echo "Web: http://localhost:$${WEB_HTTP_PORT:-18080}" + @echo "Backend: http://localhost:$${BACKEND_PORT:-18081}/health" + +docker-dev: run-2 + +docker-prod: run-2 + +docker-up: run-2 + +docker-down: + @$(COMPOSE) down --remove-orphans clean-2: - @echo "═══════════════════════════════════════════════" - @echo "🧹 clean-2: tearing down run-2 stack" - @echo "═══════════════════════════════════════════════" - @$(COMPOSE_STACK_ALL) down --remove-orphans || true - @$(COMPOSE_STACK_ALL) down -v --remove-orphans || true - @$(COMPOSE_BIN) -f $(BACKEND_COMPOSE) down -v --remove-orphans || true - @echo "✅ Environment cleaned" - @echo "═══════════════════════════════════════════════" + @$(COMPOSE) down -v --remove-orphans +docker-logs: + @$(COMPOSE) logs -f +docker-ps: + @$(COMPOSE) ps + +test: + @test/readme-deployment-refresh.sh diff --git a/Multi-Tenant Kubeconfig.md b/Multi-Tenant Kubeconfig.md new file mode 100644 index 0000000..4363f4d --- /dev/null +++ b/Multi-Tenant Kubeconfig.md @@ -0,0 +1,127 @@ +# Technical Specification: Multi-Tenant Kubeconfig & Auth Gateway + +## 1. System Overview & Goals +- **Objective**: Develop a backend API service that automates Kubernetes multi-tenant onboarding (Namespace + Quota isolation) and securely distributes short-lived, dynamic `kubeconfig` files using the Kubernetes `TokenRequest` API. +- **Architecture Independence**: This backend service acts as a standalone control plane. It is **not** strictly bound to a BFF pattern and does **not** need to run inside the target Kubernetes cluster (it supports Out-of-Cluster execution). +- **Out of Scope**: This spec does NOT cover the frontend UI implementation or the downstream workload deployment. It focuses strictly on identity, tenant provisioning, and credential brokering. +- **Security Principles**: Adhere strictly to Zero-Knowledge architecture (no token storage in DB), Ephemeral Credentials (short-lived tokens only), and Least Privilege (the Gateway must NOT be a `cluster-admin`). + +## 2. Architecture & Topology +- **Tech Stack**: Go `net/http` (or FastAPI), utilizing the official Kubernetes Client SDK (`client-go` or `kubernetes-client/python`). +- **Control Plane Flow**: + 1. Client/Frontend -> Gateway: User requests environment access. + 2. Gateway -> K8s API: Gateway authenticates to the target K8s cluster using its own master credentials (e.g., an Out-of-Cluster `kubeconfig`). + 3. Gateway -> K8s API: Executes Namespace/SA creation (if new) or calls `TokenRequest` API (if existing). + 4. Gateway -> Client/Frontend: Returns a generated `kubeconfig` YAML string with the short-lived JWT token. + +## 3. Core Business Logic Workflows + +### Phase 1: Tenant Initialization (Onboarding) +Triggered when a new user registers or requests a workspace for the first time. The Gateway must execute a K8s transaction creating four resources: +1. **Namespace**: `tenant-{user_uuid}` +2. **ServiceAccount**: `sa-tenant-admin` (Created inside the tenant's namespace). +3. **RoleBinding**: Bind `sa-tenant-admin` to the `admin` (or custom) ClusterRole, strictly isolated within `tenant-{user_uuid}`. +4. **ResourceQuota**: Enforce limits (e.g., `requests.cpu: "4"`, `limits.memory: "16Gi"`) to prevent noisy neighbors. + +### Phase 2: Credential Distribution (Dynamic Token) +Triggered when the user requests CLI access or downloads a kubeconfig. +1. Locate the user's associated Namespace and ServiceAccount, verifying the user's ownership of the workspace. +2. Audit Logging: Record the credential issuance event (User, IP, Workspace) into the database. +3. Call the `authentication.k8s.io/v1 TokenRequest` API targeting `sa-tenant-admin` in the specific tenant's namespace. +4. Set `expirationSeconds: 7200` (2 hours). Hard limit; cannot be extended. +5. Retrieve the generated JWT token and inject it into a pre-defined `kubeconfig` text template. + +### Phase 3: Automated Renewal & Emergency Suspension +- **Session Management**: If accessed via a Web UI, the Gateway intercepts requests, attaches the dynamic token, and forwards them. If the token is within 10 minutes of expiration, the Gateway automatically issues a new TokenRequest. +- **Emergency Suspension**: If a workspace is marked compromised, the Gateway deletes its K8s `RoleBinding`, instantly revoking access for all currently active tokens of that tenant. + +## 4. API Contracts + +### 4.1. Initialize Tenant Workspace +- **Route**: `POST /api/v1/workspaces/init` +- **Auth**: Gateway Session / Bearer Token +- **Rate Limit**: Strictly rate-limited per user to prevent Namespace exhaustion. +- **Request Payload**: + ```json + { + "tier": "basic" // Determines the ResourceQuota template + } +- **Response Payload (201 Created)**: + ```json + { + "namespace": "tenant-a1b2c3d4", + "status": "provisioned", + "quota": {"cpu": "4", "memory": "8Gi"} + } + ``` +### 4.2. Generate Dynamic Kubeconfig +- **Route**: `GET /api/v1/workspaces/credentials/kubeconfig` +- **Auth**: Gateway Session / Bearer Token +- **Request Payload(200 OK)**: Returns raw `application/x-yaml`content. + ```yaml + apiVersion: v1 + clusters: + - cluster: + server: https:// + certificate-authority-data: + name: internal-cluster + contexts: + - context: + cluster: internal-cluster + namespace: tenant-a1b2c3d4 # Default context locked to their namespace + user: sa-tenant-admin + name: tenant-context + current-context: tenant-context + kind: Config + users: + - name: sa-tenant-admin + user: + token: "eyJhbGciOiJSUzI1NiIs..." # Short-lived token injected here + ``` + +### 4.3. Suspend Workspace (Emergency Kill Switch) +- **Route**: POST /api/v1/workspaces/{id}/suspend +- **Auth**: Admin Only +- **Behavior**: Updates DB status to suspended and deletes the associated K8s RoleBinding. + + +### 5. Data Architecture & Persistence +- **Database**: PostgreSQL (Relational mapping between Users and K8s Namespaces). +- **Table**: `users` + - `id` (UUID, PK),`email`,`password_hash`,`status` +- **Table**: `workspaces` + + - `id` (UUID, PK) + + - `user_id` (UUID, FK to Users table) + + - `k8s_namespace` (String, unique) + + - `k8s_sa_name` (String) + + - `tier` (String) + + - `created_at` (Timestamp) +- **Table**: `audit_logs`(Security Compliance) + - `id` (UUID, PK), `user_id` (UUID), `workspace_id` (UUID), `action` (e.g., IssueKubeconfig), `ip_address`, `created_at` +- **Constraint**: We do NOT store the K8s Token in the database. Tokens are ephemeral and generated on-the-fly. + +## 6. Security, Threat Mitigation & Infrastructure Constraints + +### 6.1 Threat Model +| Threat | Mitigation Strategy | +| :--- | :--- | +| **Gateway Compromise** | The Gateway uses a strictly restricted K8s role. It cannot read existing `Secrets` or interfere with other tenants' running Pods. | +| **Token Theft (XSS)** | Application-level Auth must use `HttpOnly, Secure` Cookies. Generated Kubeconfigs expire in 2 hours. | +| **Resource Abuse (Mining)** | Hardcoded `ResourceQuota` per tenant upon creation. Global `LimitRange` enforced at the cluster level. | + +### 6.2 Restricted Gateway Credentials (Crucial) +The Gateway requires a K8s credential (Out-of-Cluster `kubeconfig` or Cloud IAM Role) to operate. **This credential MAY NOT have `cluster-admin` privileges.** It should be bound to a custom `ClusterRole` with ONLY the following permissions: +- `create`, `get`, `list` on `namespaces`, `resourcequotas`. +- `create`, `get`, `list` on `serviceaccounts`, `rolebindings`. +- `create` on `serviceaccounts/token` (CRITICAL for TokenRequest API). +- *Strictly prohibited*: `get` or `list` on `secrets`, `pods`, or `deployments`. + +### 6.3 Deployment & Networking +- **Deployment Agnostic**: The application will be packaged as a Docker image and can be deployed via Docker Compose, standalone VMs, or within a Kubernetes cluster. +- **CORS/CSP**: Since this might not be a single-origin BFF, explicit CORS policies (`Access-Control-Allow-Origin`) must be tightly defined if the frontend is hosted on a separate domain. Wildcards (`*`) are prohibited. \ No newline at end of file diff --git a/README.md b/README.md index 20b9f9c..2fee644 100644 --- a/README.md +++ b/README.md @@ -1,336 +1,267 @@ -# OCDP - Open Cloud Development Platform +# OCDP - Open Cloud Deployment Platform -[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) -[![Go Version](https://img.shields.io/badge/go-1.24+-00ADD8?logo=go)](https://go.dev/) -[![Node Version](https://img.shields.io/badge/node-20+-339933?logo=node.js)](https://nodejs.org/) -[![Docker](https://img.shields.io/badge/docker-20.10+-2496ED?logo=docker)](https://www.docker.com/) +OCDP 是一个面向 Kubernetes 的大模型推理部署平台。当前核心场景是:用户在页面选择 Harbor 中的 `vllm-serve` Helm Chart,填写实例名称、命名空间和 values 后,后端从 Harbor 拉取封装好的 OCI Helm Chart,并通过 Helm SDK 部署到已配置好的 Kubernetes 集群。 -开源云原生开发平台,用于管理 Kubernetes 集群、OCI Registry 和 Helm Charts 部署。 +## 当前能力 ---- +- Registry 管理:保存 Harbor / OCI Registry 地址与凭据,敏感字段加密入库。 +- Artifact 浏览:通过 Harbor v2.0 API 浏览当前凭据可见的项目、repositories 和 chart tags,避免依赖 `/v2/_catalog` 全局 catalog 权限。 +- 一键部署:从前端发起实例创建,后端拉取 Chart 并在目标集群执行 Helm install/upgrade/uninstall。 +- 集群管理:保存 Kubernetes API Server、CA、客户端证书或 token,用于后端连接集群。 +- 实例管理:查看部署状态、Helm revision、Service/Ingress 入口信息。 +- 认证:内置 JWT 登录,首次启动可通过 bootstrap 注入管理员账号。 -## ✨ 特性 +## 技术栈 -- 🎯 **Registry 管理** - 支持 Harbor、Docker Registry、OCI 标准仓库 -- 📦 **Artifact 浏览** - 浏览和管理 Helm Charts、容器镜像 -- 🚀 **一键部署** - 可视化部署 Helm Charts 到 Kubernetes 集群 -- 🔍 **智能过滤** - 按 MediaType 过滤 artifacts(chart、image、other) -- 🎨 **现代 UI** - 响应式设计,基于 React + TypeScript -- 🔐 **安全认证** - JWT 认证,加密存储敏感信息 -- 🐳 **容器化** - 完整的 Docker 支持,多种运行模式 -- 🔄 **热重载** - 开发模式支持代码热重载 +- 后端:Go 1.24,Gorilla Mux,Hexagonal Architecture,PostgreSQL,ORAS SDK,Helm SDK,Kubernetes client-go。 +- 前端:React 18,TypeScript,Vite,TailwindCSS。 +- 部署:Docker Compose,Nginx 静态文件与 `/api` 反向代理,PostgreSQL 持久化。 ---- +## 项目结构 -## 🚀 快速开始 - -### 前置要求 - -- Docker 20.10+ -- Docker Compose 2.0+ -- (可选) Make 工具 - -### 5分钟快速体验 - -```bash -# 1. 克隆项目 -git clone -cd ocdp-go - -# 2. 启动开发环境(Mock 模式,无需数据库) -make docker-dev - -# 3. 访问应用 -# - 前端:http://localhost:5173 -# - 后端:http://localhost:8080 -# - 默认账号:admin / admin123 -``` - -**详细指南**:查看 [快速开始指南](./QUICK_START.md) - ---- - -## 📚 文档导航 - -### 📖 核心文档(必读) -- 🚀 [快速开始](./QUICK_START.md) - 5分钟快速上手 -- 📋 [使用指南](./USAGE_GUIDE.md) - 详细使用说明(推荐) -- 💡 [命令速查表](./COMMANDS_CHEATSHEET.md) - 常用命令快速参考 -- 📚 [文档中心](./docs/README.md) - 完整文档索引 - -### 🔧 专业文档 -- 📐 [开发规范](./docs/development/specification.md) - 代码规范和架构 -- 🚢 [部署指南](./docs/deployment/docker-guide.md) - 生产环境部署 -- 🔒 [安全实践](./docs/security/security-implementation.md) - 安全配置 -- 🎨 [功能文档](./docs/features/) - 详细功能说明 - -### 🔗 其他资源 -- 📋 [OpenAPI 规范](./backend/docs/openapi.yaml) - RESTful API 定义 -- 📦 [历史文档](./docs/archive/) - 项目演进历史 - ---- - -## 🏗️ 技术架构 - -### 技术栈 - -**后端**: -- Go 1.24+ (Hexagonal Architecture) -- PostgreSQL 16 -- Redis 7 - -**前端**: -- React 18 -- TypeScript 5 -- Vite 6 -- TailwindCSS 3 - -**容器化**: -- Docker -- Docker Compose -- Multi-stage builds - -### 架构图 - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Frontend │ -│ React + TypeScript + Vite │ -└──────────────────────────┬──────────────────────────────────┘ - │ HTTP/REST -┌──────────────────────────┼──────────────────────────────────┐ -│ │ Backend API │ -│ ▼ │ -│ ┌─────────────────────┐ │ -│ │ Input Adapters │ │ -│ │ (REST/GraphQL) │ │ -│ └──────────┬──────────┘ │ -│ │ │ -│ ┌──────────▼──────────┐ │ -│ │ Domain Services │ │ -│ │ (Business Logic) │ │ -│ └──────────┬──────────┘ │ -│ │ │ -│ ┌──────────▼──────────┐ │ -│ │ Output Adapters │ │ -│ │ (Repos/Clients) │ │ -│ └──────────┬──────────┘ │ -└───────────────────────┼─┴────────────────────────────────┘ - │ - ┌───────────────┼───────────────┐ - │ │ │ - ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ - │ PG DB │ │ Redis │ │ OCI │ - │ │ │ │ │ Registry│ - └─────────┘ └─────────┘ └─────────┘ -``` - -### 运行模式 - -| 模式 | 特点 | 适用场景 | 命令 | -|------|------|----------|------| -| **开发模式** | Mock 数据,热重载 | 日常开发 | `make docker-dev` | -| **生产模式** | 真实数据库,完整功能 | 生产部署 | `make docker-prod` | -| **Mock 模式** | 独立测试单个服务 | 单元测试 | `make docker-test-backend` | - ---- - -## 🛠️ 开发指南 - -### 项目结构 - -``` +```text ocdp-go/ -├── backend/ # Go 后端服务 -│ ├── cmd/api/ # 应用入口 -│ ├── internal/ # 内部代码 -│ │ ├── adapter/ # 适配器层 -│ │ ├── domain/ # 领域层 -│ │ └── bootstrap/ # 启动配置 -│ ├── Dockerfile # 生产环境 -│ ├── Dockerfile.dev # 开发环境 -│ └── Dockerfile.mock # Mock 测试 -│ -├── frontend/ # React 前端应用 -│ ├── src/ -│ │ ├── core/ # 核心功能 -│ │ ├── features/ # 功能模块 -│ │ └── shared/ # 共享组件 -│ ├── Dockerfile # 生产环境 -│ ├── Dockerfile.dev # 开发环境 -│ └── Dockerfile.mock # Mock 测试 -│ -├── api/ # API 规范 -│ └── openapi.yaml # OpenAPI 定义 -│ -├── docs/ # 项目文档 -│ ├── features/ # 功能文档 -│ ├── deployment/ # 部署文档 -│ └── development/ # 开发文档 -│ -├── docker-compose.yml # 统一配置(使用 profiles) -└── Makefile # 便捷命令 +├── backend/ # Go 后端 +│ ├── cmd/api/ # API 入口 +│ ├── internal/adapter/input/ # HTTP REST handlers / DTO +│ ├── internal/adapter/output/ # PostgreSQL / ORAS / Helm / K8s 实现 +│ ├── internal/domain/ # Entity / Repository interface / Service +│ └── internal/bootstrap/ # 首次启动数据注入 +├── frontend/ # React + Vite 前端 +├── infra/nginx/ # Nginx 网关配置和 TLS 证书 +├── docker-compose.yml # 本地完整部署:PostgreSQL + Backend + 前端 build + Nginx +├── backend/docker-compose.yml # PostgreSQL + Backend + pgAdmin +├── Makefile # 推荐入口:install / run-2 / docker-dev / docker-down +└── tasks/ # Agent 工作记录 ``` -### 常用命令 +## 后端部署链路 -```bash -# Docker 服务(推荐) -make docker-dev # 启动开发环境 -make docker-prod # 启动生产环境 -make docker-test-backend # 测试后端 -make docker-test-frontend # 测试前端 -make docker-logs # 查看日志 -make docker-down # 停止服务 +1. 前端调用 `POST /api/v1/clusters/{clusterId}/instances`,提交 `name`、`namespace`、`registryId`、`repository`、`tag` 和可选 `values`。 +2. 后端 `InstanceService.CreateInstance` 校验集群、Registry 和实例名唯一性,创建 pending 记录。 +3. Chart 浏览使用 Harbor v2.0 API;实际部署时后端使用 ORAS SDK 访问 Harbor,将指定 repository/tag 的 Helm Chart layer 下载到 `/tmp/charts/{chart}-{version}.tgz`。 +4. 后端用数据库中保存的集群凭据生成临时 kubeconfig。 +5. Helm SDK 加载本地 chart 包,并对目标集群执行 `install`;后续通过 Helm status 同步实例状态。 +6. 删除、升级和回滚实例同样通过 Helm SDK 操作目标集群。 -# OpenAPI 工作流 -make openapi-validate # 验证 API 规范 -make openapi-gen # 生成代码 -make openapi-docs # 生成文档 +## 部署前准备 -# 本地开发(不使用 Docker) -make install # 安装依赖 -make dev-local # 启动本地开发 -make test # 运行测试 +需要本机已安装: + +- Docker +- Docker Compose v2 或更高版本 +- Make,可选;没有 Make 时可直接执行 Compose 命令 + +根目录 `.env` 用于开发环境启动时注入端口、数据库、初始账号、Harbor 和 Kubernetes 集群。它是开发/测试 bootstrap 数据,不是长期配置中心;系统启动后建议在页面里维护 Registry 和 Cluster。不要提交真实 `.env`。 + +关键变量如下,实际值以你的 `.env` 为准: + +```dotenv +# 登录账号 bootstrap +BOOTSTRAP_ADMIN_USER=admin +BOOTSTRAP_ADMIN_PASS=change-me +BOOTSTRAP_ADMIN_EMAIL=admin@example.com + +# Harbor bootstrap +BOOTSTRAP_REGISTRY_NAME=harbor +BOOTSTRAP_REGISTRY_URL=https://harbor.example.com +BOOTSTRAP_REGISTRY_DESC=Harbor Registry +# 推荐使用 Harbor robot 账号,只授予目标项目 pull/read 权限 +BOOTSTRAP_REGISTRY_ROBOT_USER='robot$project+ocdp' +BOOTSTRAP_REGISTRY_ROBOT_PASS='robot-token' + +# 可选 fallback;未配置 ROBOT 变量时才会使用 +BOOTSTRAP_REGISTRY_USER=admin-or-user +BOOTSTRAP_REGISTRY_PASS=change-me +BOOTSTRAP_REGISTRY_INSECURE=false + +# Kubernetes 集群 bootstrap,名称列表用逗号分隔 +BOOTSTRAP_CLUSTERS=cluster1,cluster2 +BOOTSTRAP_CLUSTER_CLUSTER1_HOST=https://x.x.x.x:6443 +BOOTSTRAP_CLUSTER_CLUSTER1_DESC=GPU Cluster 1 +BOOTSTRAP_CLUSTER_CLUSTER1_CA=base64-ca-data +BOOTSTRAP_CLUSTER_CLUSTER1_CERT=base64-client-cert-data +BOOTSTRAP_CLUSTER_CLUSTER1_KEY=base64-client-key-data + +# 如使用 token,可配置 TOKEN;CERT/KEY 可按实际鉴权方式留空 +BOOTSTRAP_CLUSTER_CLUSTER2_HOST=https://x.x.x.x:6443 +BOOTSTRAP_CLUSTER_CLUSTER2_TOKEN=token-value + +# 服务端口,默认使用高位端口避免和本机其他项目冲突 +WEB_HTTP_PORT=18080 +WEB_HTTPS_PORT=18443 +BACKEND_PORT=18081 +POSTGRES_PORT=15432 + +# 安全与数据库 +JWT_SECRET=replace-with-a-strong-secret +ENCRYPTION_KEY=replace-with-32-byte-key +POSTGRES_DB=ocdp +POSTGRES_USER=postgres +POSTGRES_PASSWORD=replace-me + +# 可选:Docker 构建后端时使用的 Go module proxy。 +# 国内网络建议保留默认值;如公司网络要求,也可改回 https://proxy.golang.org,direct。 +GOPROXY=https://goproxy.cn,direct +GOSUMDB=sum.golang.google.cn ``` -### 开发工作流 +说明: -1. **启动开发环境**: - ```bash - make docker-dev - ``` +- `BOOTSTRAP_CONFIG_JSON` 优先级最高,适合把完整 bootstrap 配置作为 JSON 注入。 +- 没有 `BOOTSTRAP_CONFIG_JSON` 时,后端会读取 `BOOTSTRAP_*` 变量生成初始账号、Registry 和 Cluster。 +- 没有任何显式 bootstrap 配置时,后端不会预注入用户、Registry 或 Cluster;代码中不再保留真实 Harbor、admin 或集群 fallback。 +- 初始管理员必须显式配置 `BOOTSTRAP_ADMIN_USER` 和 `BOOTSTRAP_ADMIN_PASS`。如果只配置 Registry/Cluster 而未配置管理员账号,系统不会自动创建默认账号。 +- Registry bootstrap 凭据优先级为 `BOOTSTRAP_REGISTRY_ROBOT_USER/PASS`,然后才是 `BOOTSTRAP_REGISTRY_USER/PASS`。Harbor robot 账号需要能访问目标项目的 repositories 和 artifacts。 +- Harbor robot 用户名通常包含 `$`。本项目 Compose 已使用 raw `env_file` 传给后端;如果你在 shell 里临时 `export BOOTSTRAP_REGISTRY_ROBOT_USER=...`,请用单引号包住值,避免 shell 展开 `$project`。 +- 已存在同名用户、Registry 或 Cluster 时,bootstrap 会跳过,不会覆盖数据库里的记录。 +- `ENCRYPTION_KEY` 用于加密保存 Harbor 密码和集群凭据;生产环境首次启动后不要随意更换,否则旧数据无法解密。 -2. **修改代码**(自动热重载): - - 后端:编辑 `backend/` 下的 Go 文件 - - 前端:编辑 `frontend/src/` 下的 React 组件 +## 推荐部署流程 -3. **查看日志**: - ```bash - make docker-logs - ``` - -4. **测试功能**: - - 前端:http://localhost:5173 - - 后端:http://localhost:8080 - -5. **提交代码**: - ```bash - git add . - git commit -m "feat: add new feature" - git push - ``` - ---- - -## 🧪 测试 - -### 后端测试 +当前推荐使用根目录 Makefile。`docker-dev`、`docker-prod`、`docker-up` 都是兼容旧文档的别名,实际会启动同一套完整 Docker Compose 栈:PostgreSQL、Backend、前端静态构建和 Nginx。 ```bash -# 启动后端 Mock -make docker-test-backend-bg +# 1. 在根目录检查 .env +ls .env -# 测试健康检查 -curl http://localhost:8080/health +# 2. 可选:安装本地依赖。只部署 Docker 栈时不是必须,但这个命令可用。 +make install -# 测试登录 -curl -X POST http://localhost:8080/api/v1/auth/login \ - -H "Content-Type: application/json" \ - -d '{"username":"admin","password":"admin123"}' +# 3. 如果默认高位端口仍被其他项目占用,再临时换端口 +export WEB_HTTP_PORT=18080 +export WEB_HTTPS_PORT=18443 +export BACKEND_PORT=18081 +export POSTGRES_PORT=15432 -# 测试 API -curl http://localhost:8080/api/v1/registries -curl http://localhost:8080/api/v1/clusters -``` +# 4. 构建并后台启动完整栈 +make run-2 -### 前端测试 - -```bash -# 启动前端 Mock -make docker-test-frontend-bg - -# 访问前端 -open http://localhost:3000 -``` - -### 集成测试 - -```bash -# 启动完整环境 +# 兼容旧文档,也可以执行: +make docker-dev make docker-prod -# 运行测试套件 -make test +# 5. 查看服务 +make docker-ps ``` ---- +访问地址: -## 📦 部署 +- 前端入口:http://localhost:${WEB_HTTP_PORT:-18080} +- 后端健康检查:http://localhost:${BACKEND_PORT:-18081}/health +- Swagger UI:http://localhost:${BACKEND_PORT:-18081}/api/docs +- Nginx 健康检查:http://localhost:${WEB_HTTP_PORT:-18080}/healthz -### Docker Compose 部署(推荐) +没有 Make 时,直接用根目录 Compose 文件即可。注意要加 `--build`,因为后端镜像和前端静态资源需要构建: ```bash -# 1. 配置环境变量 -export JWT_SECRET="your-production-secret" -export ENCRYPTION_KEY="your-32-byte-encryption-key" - -# 2. 启动服务 -docker compose up -d - -# 3. 查看状态 +docker compose up --build -d postgres backend nginx docker compose ps ``` -### Kubernetes 部署 +如果直接执行 `docker compose up`,Compose 也会使用同一个完整栈;但在代码或 Dockerfile 改动后建议显式加 `--build`,避免复用旧镜像。 -查看 [Kubernetes 部署指南](./docs/deployment/kubernetes-guide.md) +## 验证部署 ---- +```bash +# 健康检查 +curl http://localhost:${BACKEND_PORT:-18081}/health +curl http://localhost:${WEB_HTTP_PORT:-18080}/healthz -## 🤝 贡献 +# 登录,返回 token。把 password 替换成 .env 里的 BOOTSTRAP_ADMIN_PASS。 +curl -s -X POST http://localhost:${BACKEND_PORT:-18081}/api/v1/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":""}' -欢迎贡献代码!请遵循以下步骤: +# 查看 bootstrap 是否生效,需要带 Bearer token +curl http://localhost:${BACKEND_PORT:-18081}/api/v1/registries \ + -H "Authorization: Bearer " -1. Fork 项目 -2. 创建功能分支 (`git checkout -b feature/amazing-feature`) -3. 提交更改 (`git commit -m 'feat: add amazing feature'`) -4. 推送分支 (`git push origin feature/amazing-feature`) -5. 创建 Pull Request +curl http://localhost:${BACKEND_PORT:-18081}/api/v1/clusters \ + -H "Authorization: Bearer " +``` -### 开发规范 +页面验证: -- **代码风格**:Go (gofmt),TypeScript (ESLint + Prettier) -- **提交规范**:遵循 [Conventional Commits](https://www.conventionalcommits.org/) -- **测试覆盖**:新功能必须包含测试 +1. 打开前端入口并登录。 +2. 进入 Chart Browser,确认能看到 Harbor 中的 `vllm-serve` 或 nginx chart repository。当前默认只展示可部署 Helm chart。 +3. 选择 chart tag,点击 Launch。 +4. 选择目标集群、命名空间,填写实例名和 values。values 支持 schema 表单或 YAML;YAML 会在前端校验,并由后端解析为 Helm values map。 +5. 提交后到实例页面查看状态;后端会异步安装并同步 Helm 状态。 ---- +命令行 smoke test: -## 📄 许可证 +```bash +# 只验证登录、Registry health、Harbor chart 浏览和 values schema +BASE_URL=http://localhost:${BACKEND_PORT:-18081}/api/v1 \ +ADMIN_USER="${BOOTSTRAP_ADMIN_USER:-admin}" \ +ADMIN_PASS="" \ +./test/current-platform-smoke.sh -本项目采用 MIT 许可证 - 查看 [LICENSE](LICENSE) 文件了解详情 +# 允许真实部署时,会创建测试 release 并在结束后调用平台删除 +RUN_DEPLOY_TEST=true \ +TEST_NAMESPACE=ocdp-smoke \ +TEST_RELEASE=ocdp-smoke-nginx \ +BASE_URL=http://localhost:${BACKEND_PORT:-18081}/api/v1 \ +ADMIN_PASS="" \ +./test/current-platform-smoke.sh +``` ---- +## 常用运维命令 -## 🙏 致谢 +```bash +# 查看日志 +make docker-logs -- [Go](https://go.dev/) - 后端开发语言 -- [React](https://react.dev/) - 前端框架 -- [Vite](https://vitejs.dev/) - 构建工具 -- [Docker](https://www.docker.com/) - 容器化平台 -- [Kubernetes](https://kubernetes.io/) - 容器编排 -- [Harbor](https://goharbor.io/) - OCI Registry +# 重启后端 +docker compose restart backend ---- +# 如果后端容器被重建过,Nginx 可能仍缓存旧 upstream IP;只需重启本项目 Nginx +docker compose restart nginx -## 📞 联系方式 +# 停止本项目服务,但保留数据卷 +make docker-down -- **项目主页**:https://github.com/your-org/ocdp-go -- **问题反馈**:https://github.com/your-org/ocdp-go/issues -- **文档网站**:https://docs.ocdp.example.com +# 清理本项目容器和数据卷,谨慎使用 +make clean-2 +``` ---- +## 本地开发与测试 -
- Built with ❤️ by the OCDP Team -
+后端: + +```bash +cd backend +go test ./... +go run cmd/api/main.go +``` + +前端: + +```bash +cd frontend +npm ci +npm run build +``` + +Mock 后端仍可通过 `backend/docker-compose.yml` 的 `mock` profile 启动: + +```bash +docker compose -f backend/docker-compose.yml --profile mock up -d backend-mock +``` + +## 注意事项 + +- 不要为了端口冲突停止其他项目;优先通过 `WEB_HTTP_PORT`、`WEB_HTTPS_PORT`、`BACKEND_PORT`、`POSTGRES_PORT` 换端口。当前默认端口已经是 `18080/18443/18081/15432`。 +- 如果旧文档提到 `make docker-dev`、`make docker-prod`,现在这些命令仍可用,都会启动同一套 Docker 栈。 +- 如果之前用旧配置启动失败过,PostgreSQL 卷里可能残留旧的加密数据,表现为 `/api/v1/clusters` 或 `/api/v1/registries` 解密失败。开发/重装环境可执行 `make clean-2 && make docker-dev` 重新初始化;生产环境不要直接删卷,应先备份数据库。 +- `vllm-serve` 必须以 Helm Chart OCI artifact 的形式存在于 Harbor 中;后端会寻找 Helm Chart layer 并保存为 `.tgz`。 +- Harbor 浏览使用 `/api/v2.0/projects`、project repositories 和 artifacts API。若 robot 账号无法列项目或 artifacts,页面会显示明确错误;请检查 Harbor 项目成员/robot 权限,而不是给普通用户开放全局 catalog。 +- values YAML 已按 YAML 解析;顶层必须是 mapping,例如 `replicaCount: 1`。 +- Nginx 默认同时监听 HTTP 和 HTTPS,证书位于 `infra/nginx/certs/`,生产环境应替换为正式证书。 +- `make clean-2` 会删除本项目 Compose 卷,包括 PostgreSQL 数据;只想停服务时使用 `docker compose ... down --remove-orphans`。 + +## API 文档 + +- OpenAPI YAML:[backend/docs/openapi.yaml](./backend/docs/openapi.yaml) +- 运行后 Swagger UI:`/api/docs` diff --git a/backend/Dockerfile b/backend/Dockerfile index ae98b1f..f4f6dae 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -4,12 +4,17 @@ # ================================================== FROM golang:1.24-alpine AS builder +ARG GOPROXY=https://goproxy.cn,direct +ARG GOSUMDB=sum.golang.google.cn +ENV GOPROXY=${GOPROXY} +ENV GOSUMDB=${GOSUMDB} + RUN apk add --no-cache git make WORKDIR /build COPY go.mod go.sum ./ -RUN go mod download +RUN sh -c 'for i in 1 2 3; do go mod download && exit 0; echo "go mod download failed, retrying ($i/3)" >&2; sleep 5; done; go mod download' COPY . . RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o ocdp-backend cmd/api/main.go diff --git a/backend/cmd/api/main.go b/backend/cmd/api/main.go index d32c592..4f1a3be 100644 --- a/backend/cmd/api/main.go +++ b/backend/cmd/api/main.go @@ -27,6 +27,7 @@ import ( "log" "net/http" "os" + "strings" "time" "github.com/gorilla/mux" @@ -35,6 +36,7 @@ import ( "github.com/ocdp/cluster-service/internal/adapter/output" "github.com/ocdp/cluster-service/internal/bootstrap" "github.com/ocdp/cluster-service/internal/domain/service" + "github.com/ocdp/cluster-service/internal/pkg/authz" "github.com/ocdp/cluster-service/internal/pkg/crypto" "github.com/ocdp/cluster-service/internal/pkg/jwt" "github.com/ocdp/cluster-service/internal/pkg/password" @@ -72,6 +74,7 @@ func main() { // ===== 5. 创建 Domain Services ===== authService := service.NewAuthService( repos.UserRepo, + repos.WorkspaceRepo, passwordHasher, tokenGenerator, ) @@ -97,20 +100,31 @@ func main() { repos.HelmClient, repos.OCIClient, repos.EntryClient, + repos.BindingRepo, ) + instanceService.SetDiagnosticsClient(repos.DiagnosticsClient) + instanceService.SetTenantProvisioning(repos.WorkspaceRepo, repos.TenantKubeClient) monitoringService := service.NewMonitoringService( repos.ClusterRepo, repos.MetricsClient, ) + workspaceService := service.NewWorkspaceService( + repos.WorkspaceRepo, + repos.BindingRepo, + repos.ClusterRepo, + repos.TenantKubeClient, + repos.AuditRepo, + ) + log.Println("✅ Domain Services initialized") // ===== 6. 加载并执行 Bootstrap 预注入 ===== bootstrapConfig, err := bootstrap.LoadBootstrapConfig() if err != nil { log.Printf("⚠️ Warning: Failed to load bootstrap config: %v", err) - // 使用默认配置 + // 使用安全的空配置,避免在配置错误时写入任何预置账号或集群凭据。 bootstrapConfig = bootstrap.GetDefaultBootstrapConfig() } @@ -126,6 +140,7 @@ func main() { artifactHandler := rest.NewArtifactHandler(artifactService) instanceHandler := rest.NewInstanceHandler(instanceService) monitoringHandler := rest.NewMonitoringHandler(monitoringService) + workspaceHandler := rest.NewWorkspaceHandler(workspaceService) swaggerHandler := rest.NewSwaggerHandler() log.Println("✅ Input Adapters (REST handlers) initialized") @@ -133,11 +148,13 @@ func main() { // ===== 8. 设置路由 ===== router := setupRouter( authHandler, + authService, clusterHandler, registryHandler, artifactHandler, instanceHandler, monitoringHandler, + workspaceHandler, swaggerHandler, ) @@ -191,11 +208,13 @@ func getEnv(key, defaultValue string) string { // setupRouter 设置路由 func setupRouter( authHandler *rest.AuthHandler, + authService *service.AuthService, clusterHandler *rest.ClusterHandler, registryHandler *rest.RegistryHandler, artifactHandler *rest.ArtifactHandler, instanceHandler *rest.InstanceHandler, monitoringHandler *rest.MonitoringHandler, + workspaceHandler *rest.WorkspaceHandler, swaggerHandler *rest.SwaggerHandler, ) *mux.Router { router := mux.NewRouter().StrictSlash(true) @@ -222,45 +241,63 @@ func setupRouter( api := router.PathPrefix("/api/v1").Subrouter() // ===== 认证路由 ===== - api.HandleFunc("/auth/register", authHandler.Register) api.HandleFunc("/auth/login", authHandler.Login) api.HandleFunc("/auth/refresh", authHandler.RefreshToken) + protected := api.PathPrefix("").Subrouter() + protected.Use(authMiddleware(authService)) + protected.HandleFunc("/auth/me", authHandler.Me).Methods(http.MethodGet) + protected.HandleFunc("/auth/register", authHandler.Register).Methods(http.MethodPost) + protected.HandleFunc("/users", authHandler.ListUsers).Methods(http.MethodGet) + protected.HandleFunc("/users", authHandler.Register).Methods(http.MethodPost) + protected.HandleFunc("/users/{user_id}", authHandler.UpdateUser).Methods(http.MethodPut) + protected.HandleFunc("/users/{user_id}", authHandler.DeleteUser).Methods(http.MethodDelete) + // ===== 集群路由 ===== - api.HandleFunc("/clusters", clusterHandler.CreateCluster).Methods(http.MethodPost) - api.HandleFunc("/clusters", clusterHandler.GetAllClusters).Methods(http.MethodGet) - api.HandleFunc("/clusters/{cluster_id}", clusterHandler.GetCluster).Methods(http.MethodGet) - api.HandleFunc("/clusters/{cluster_id}", clusterHandler.UpdateCluster).Methods(http.MethodPut) - api.HandleFunc("/clusters/{cluster_id}", clusterHandler.DeleteCluster).Methods(http.MethodDelete) - api.HandleFunc("/clusters/{cluster_id}/health", clusterHandler.GetClusterHealth).Methods(http.MethodGet) + protected.HandleFunc("/clusters", clusterHandler.CreateCluster).Methods(http.MethodPost) + protected.HandleFunc("/clusters", clusterHandler.GetAllClusters).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}", clusterHandler.GetCluster).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}", clusterHandler.UpdateCluster).Methods(http.MethodPut) + protected.HandleFunc("/clusters/{cluster_id}", clusterHandler.DeleteCluster).Methods(http.MethodDelete) + protected.HandleFunc("/clusters/{cluster_id}/health", clusterHandler.GetClusterHealth).Methods(http.MethodGet) // ===== Registry 路由 ===== - api.HandleFunc("/registries", registryHandler.CreateRegistry).Methods(http.MethodPost) - api.HandleFunc("/registries", registryHandler.GetAllRegistries).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}", registryHandler.GetRegistry).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}", registryHandler.UpdateRegistry).Methods(http.MethodPut) - api.HandleFunc("/registries/{registry_id}", registryHandler.DeleteRegistry).Methods(http.MethodDelete) - api.HandleFunc("/registries/{registry_id}/health", registryHandler.GetRegistryHealth).Methods(http.MethodGet) + protected.HandleFunc("/registries", registryHandler.CreateRegistry).Methods(http.MethodPost) + protected.HandleFunc("/registries", registryHandler.GetAllRegistries).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}", registryHandler.GetRegistry).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}", registryHandler.UpdateRegistry).Methods(http.MethodPut) + protected.HandleFunc("/registries/{registry_id}", registryHandler.DeleteRegistry).Methods(http.MethodDelete) + protected.HandleFunc("/registries/{registry_id}/health", registryHandler.GetRegistryHealth).Methods(http.MethodGet) // ===== Artifact 路由 ===== - api.HandleFunc("/registries/{registry_id}/repositories", artifactHandler.ListRepositories).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts", artifactHandler.ListArtifacts).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}", artifactHandler.GetArtifact).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}/values-schema", artifactHandler.GetArtifactValuesSchema).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories", artifactHandler.ListRepositories).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts", artifactHandler.ListArtifacts).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}", artifactHandler.GetArtifact).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}/values-schema", artifactHandler.GetArtifactValuesSchema).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}/values-yaml", artifactHandler.GetArtifactValuesYAML).Methods(http.MethodGet) // ===== Instance 路由 ===== - api.HandleFunc("/clusters/{cluster_id}/instances", instanceHandler.CreateInstance).Methods(http.MethodPost) - api.HandleFunc("/clusters/{cluster_id}/instances", instanceHandler.ListInstances).Methods(http.MethodGet) - api.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.GetInstance).Methods(http.MethodGet) - api.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.UpdateInstance).Methods(http.MethodPut) - api.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.DeleteInstance).Methods(http.MethodDelete) - api.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/entries", instanceHandler.ListInstanceEntries).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances", instanceHandler.CreateInstance).Methods(http.MethodPost) + protected.HandleFunc("/clusters/{cluster_id}/instances", instanceHandler.ListInstances).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.GetInstance).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.UpdateInstance).Methods(http.MethodPut) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.DeleteInstance).Methods(http.MethodDelete) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/entries", instanceHandler.ListInstanceEntries).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/diagnostics", instanceHandler.GetInstanceDiagnostics).Methods(http.MethodGet) // ===== Monitoring 路由 ===== - api.HandleFunc("/monitoring/clusters", monitoringHandler.ListClusterMonitoring).Methods(http.MethodGet) - api.HandleFunc("/monitoring/clusters/{cluster_id}", monitoringHandler.GetClusterMonitoring).Methods(http.MethodGet) - api.HandleFunc("/monitoring/clusters/{cluster_id}/nodes", monitoringHandler.GetNodeMetrics).Methods(http.MethodGet) - api.HandleFunc("/monitoring/summary", monitoringHandler.GetMonitoringSummary).Methods(http.MethodGet) + protected.HandleFunc("/monitoring/clusters", monitoringHandler.ListClusterMonitoring).Methods(http.MethodGet) + protected.HandleFunc("/monitoring/clusters/{cluster_id}", monitoringHandler.GetClusterMonitoring).Methods(http.MethodGet) + protected.HandleFunc("/monitoring/clusters/{cluster_id}/nodes", monitoringHandler.GetNodeMetrics).Methods(http.MethodGet) + protected.HandleFunc("/monitoring/summary", monitoringHandler.GetMonitoringSummary).Methods(http.MethodGet) + + // ===== Workspace 路由 ===== + protected.HandleFunc("/workspaces", workspaceHandler.ListWorkspaces).Methods(http.MethodGet) + protected.HandleFunc("/workspaces", workspaceHandler.CreateWorkspace).Methods(http.MethodPost) + protected.HandleFunc("/workspaces/credentials/kubeconfig", workspaceHandler.IssueCurrentKubeconfig).Methods(http.MethodGet) + protected.HandleFunc("/workspaces/{workspace_id}/clusters", workspaceHandler.InitClusterBinding).Methods(http.MethodPost) + protected.HandleFunc("/workspaces/{workspace_id}/kubeconfig", workspaceHandler.IssueKubeconfig).Methods(http.MethodPost) + protected.HandleFunc("/workspaces/{workspace_id}/suspend", workspaceHandler.SuspendWorkspace).Methods(http.MethodPost) // 处理 MethodNotAllowed 错误(OPTIONS 请求会触发) router.MethodNotAllowedHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -275,6 +312,35 @@ func setupRouter( return router } +func authMiddleware(authService *service.AuthService) mux.MiddlewareFunc { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + header := r.Header.Get("Authorization") + if !strings.HasPrefix(header, "Bearer ") { + writeJSONError(w, http.StatusUnauthorized, "Unauthorized", "missing bearer token") + return + } + token := strings.TrimSpace(strings.TrimPrefix(header, "Bearer ")) + if token == "" { + writeJSONError(w, http.StatusUnauthorized, "Unauthorized", "missing bearer token") + return + } + principal, err := authService.VerifyAccessToken(r.Context(), token) + if err != nil { + writeJSONError(w, http.StatusUnauthorized, "Unauthorized", err.Error()) + return + } + next.ServeHTTP(w, r.WithContext(authz.WithPrincipal(r.Context(), principal))) + }) + } +} + +func writeJSONError(w http.ResponseWriter, status int, code, message string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _, _ = w.Write([]byte(fmt.Sprintf(`{"error":%q,"message":%q}`, code, message))) +} + // loggingMiddleware 日志中间件 func loggingMiddleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/backend/config/bootstrap.example.json b/backend/config/bootstrap.example.json index fab493e..55f3ab2 100644 --- a/backend/config/bootstrap.example.json +++ b/backend/config/bootstrap.example.json @@ -2,9 +2,9 @@ "enabled": true, "users": [ { - "username": "admin", - "password": "change-me-in-production", - "email": "admin@example.com" + "username": "bootstrap-admin", + "password": "replace-with-a-strong-password", + "email": "bootstrap-admin@example.local" } ], "registries": [ @@ -12,8 +12,8 @@ "name": "my-harbor", "url": "https://harbor.example.com", "description": "Harbor Registry", - "username": "admin", - "password": "change-me", + "username": "robot$project+ocdp", + "password": "replace-with-robot-token", "insecure": false } ], @@ -28,4 +28,3 @@ } ] } - diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index 4dab3fd..71a3071 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -37,7 +37,7 @@ services: POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} POSTGRES_INITDB_ARGS: "--encoding=UTF8 --lc-collate=C --lc-ctype=C" ports: - - "${POSTGRES_PORT:-5432}:5432" + - "${POSTGRES_PORT:-15432}:5432" volumes: - postgres_data:/var/lib/postgresql/data - ${INIT_DB_SQL_PATH:-./scripts/init-db.sql}:/docker-entrypoint-initdb.d/01-init.sql:ro @@ -58,9 +58,16 @@ services: build: context: ${BACKEND_BUILD_CONTEXT:-.} dockerfile: ${BACKEND_BUILD_DOCKERFILE:-Dockerfile} + args: + GOPROXY: ${GOPROXY:-https://goproxy.cn,direct} + GOSUMDB: ${GOSUMDB:-sum.golang.google.cn} image: ocdp-backend:latest container_name: ocdp-backend restart: unless-stopped + env_file: + - path: ../.env + required: false + format: raw environment: ADAPTER_MODE: ${ADAPTER_MODE:-production} PORT: 8080 @@ -68,12 +75,12 @@ services: ENCRYPTION_KEY: ${ENCRYPTION_KEY:-change-me-32-bytes-long-key-here} DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432/${POSTGRES_DB:-ocdp}?sslmode=disable ports: - - "${BACKEND_PORT:-8080}:8080" + - "${BACKEND_PORT:-18081}:8080" volumes: - ./config:/app/config:ro - ./data:/app/data healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + test: ["CMD", "curl", "-f", "http://127.0.0.1:8080/health"] interval: 30s timeout: 10s retries: 3 @@ -94,6 +101,9 @@ services: build: context: ${BACKEND_BUILD_CONTEXT:-.} dockerfile: ${BACKEND_MOCK_BUILD_DOCKERFILE:-Dockerfile.mock} + args: + GOPROXY: ${GOPROXY:-https://goproxy.cn,direct} + GOSUMDB: ${GOSUMDB:-sum.golang.google.cn} container_name: ocdp-backend-mock restart: unless-stopped environment: @@ -102,9 +112,9 @@ services: JWT_SECRET: ${JWT_SECRET:-test-jwt-secret-key} ENCRYPTION_KEY: ${ENCRYPTION_KEY:-test-encryption-key-32-bytes-long} ports: - - "${BACKEND_PORT:-8080}:8080" + - "${BACKEND_PORT:-18081}:8080" healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + test: ["CMD", "curl", "-f", "http://127.0.0.1:8080/health"] interval: 30s timeout: 10s retries: 3 @@ -124,7 +134,7 @@ services: restart: unless-stopped environment: PGADMIN_DEFAULT_EMAIL: ${PGADMIN_EMAIL:-admin@ocdp.local} - PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_PASSWORD:-admin} + PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_PASSWORD:-change-me} PGADMIN_CONFIG_SERVER_MODE: "False" PGADMIN_CONFIG_MASTER_PASSWORD_REQUIRED: "False" ports: diff --git a/backend/internal/adapter/input/http/dto/artifact_dto.go b/backend/internal/adapter/input/http/dto/artifact_dto.go index cd71cca..bb20702 100644 --- a/backend/internal/adapter/input/http/dto/artifact_dto.go +++ b/backend/internal/adapter/input/http/dto/artifact_dto.go @@ -6,9 +6,9 @@ type RepositoryListResponse struct { RegistryURL string `json:"registryUrl"` Repositories []string `json:"repositories"` Total int `json:"total"` - CatalogSupported bool `json:"catalogSupported"` // Whether _catalog API is supported - Source string `json:"source"` // Data source: "catalog" | "preconfigured" | "unavailable" - Message string `json:"message,omitempty"` // User-friendly message + CatalogSupported bool `json:"catalogSupported"` // Whether _catalog API is supported + Source string `json:"source"` // Data source: "catalog" | "preconfigured" | "unavailable" + Message string `json:"message,omitempty"` // User-friendly message } // ArtifactResponse Artifact 响应(简化版本,只包含核心字段) @@ -23,11 +23,11 @@ type ArtifactResponse struct { // TagResponse Tag 响应(前端期望的扁平化结构) type TagResponse struct { - RepositoryName string `json:"repositoryName"` // Repository name - Tag string `json:"tag"` // Tag name (e.g. "1.0.0", "latest") - Type string `json:"type"` // Artifact type: chart, image, other + RepositoryName string `json:"repositoryName"` // Repository name + Tag string `json:"tag"` // Tag name (e.g. "1.0.0", "latest") + Type string `json:"type"` // Artifact type: chart, image, other MediaType string `json:"mediaType,omitempty"` - Size int64 `json:"size"` // Artifact size (bytes) + Size int64 `json:"size"` // Artifact size (bytes) } // ArtifactListResponse Artifact 列表响应(包装格式,用于详细接口) @@ -42,3 +42,7 @@ type ValuesSchemaResponse struct { Schema string `json:"schema"` } +// ValuesYAMLResponse Helm Chart 默认 values.yaml 响应 +type ValuesYAMLResponse struct { + ValuesYAML string `json:"valuesYaml"` +} diff --git a/backend/internal/adapter/input/http/dto/auth_dto.go b/backend/internal/adapter/input/http/dto/auth_dto.go index cb2c823..29208de 100644 --- a/backend/internal/adapter/input/http/dto/auth_dto.go +++ b/backend/internal/adapter/input/http/dto/auth_dto.go @@ -2,8 +2,18 @@ package dto // RegisterRequest 用户注册请求 type RegisterRequest struct { - Username string `json:"username" binding:"required"` - Password string `json:"password" binding:"required,min=6"` + Username string `json:"username" binding:"required"` + Password string `json:"password" binding:"required,min=6"` + Role string `json:"role,omitempty"` + WorkspaceID string `json:"workspaceId,omitempty"` + Namespace string `json:"namespace,omitempty"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + IsActive *bool `json:"isActive,omitempty"` + MustChangePassword *bool `json:"mustChangePassword,omitempty"` } // LoginRequest 用户登录请求 @@ -19,17 +29,53 @@ type RefreshTokenRequest struct { // AuthResponse 认证响应 type AuthResponse struct { - AccessToken string `json:"accessToken"` - RefreshToken string `json:"refreshToken"` - UserID string `json:"userId"` - Username string `json:"username"` + AccessToken string `json:"accessToken"` + RefreshToken string `json:"refreshToken"` + UserID string `json:"userId"` + Username string `json:"username"` + Role string `json:"role"` + WorkspaceID string `json:"workspaceId"` + WorkspaceName string `json:"workspaceName,omitempty"` + Namespace string `json:"namespace,omitempty"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + Permissions []string `json:"permissions,omitempty"` + PermissionVersion int `json:"permissionVersion"` } // UserResponse 用户信息响应 type UserResponse struct { - ID string `json:"id"` - Username string `json:"username"` - Email string `json:"email"` - CreatedAt string `json:"createdAt"` - UpdatedAt string `json:"updatedAt"` + ID string `json:"id"` + Username string `json:"username"` + Email string `json:"email"` + Role string `json:"role"` + WorkspaceID string `json:"workspaceId"` + WorkspaceName string `json:"workspaceName,omitempty"` + Namespace string `json:"namespace,omitempty"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + IsActive bool `json:"isActive"` + MustChangePassword bool `json:"mustChangePassword"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` +} + +// UpdateUserRequest 管理员更新用户状态/角色请求 +type UpdateUserRequest struct { + Role string `json:"role,omitempty"` + WorkspaceID string `json:"workspaceId,omitempty"` + Namespace string `json:"namespace,omitempty"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + IsActive *bool `json:"isActive,omitempty"` + MustChangePassword *bool `json:"mustChangePassword,omitempty"` } diff --git a/backend/internal/adapter/input/http/dto/cluster_dto.go b/backend/internal/adapter/input/http/dto/cluster_dto.go index d84a816..d764221 100644 --- a/backend/internal/adapter/input/http/dto/cluster_dto.go +++ b/backend/internal/adapter/input/http/dto/cluster_dto.go @@ -2,30 +2,38 @@ package dto // CreateClusterRequest 创建集群请求 type CreateClusterRequest struct { - Name string `json:"name" binding:"required"` - Host string `json:"host" binding:"required"` - CAData string `json:"caData"` - CADataAlt string `json:"ca_data"` - CertData string `json:"certData"` - CertDataAlt string `json:"cert_data"` - KeyData string `json:"keyData"` - KeyDataAlt string `json:"key_data"` - Token string `json:"token"` - Description string `json:"description"` + Name string `json:"name" binding:"required"` + Host string `json:"host" binding:"required"` + CAData string `json:"caData"` + CADataAlt string `json:"ca_data"` + CertData string `json:"certData"` + CertDataAlt string `json:"cert_data"` + KeyData string `json:"keyData"` + KeyDataAlt string `json:"key_data"` + Token string `json:"token"` + Description string `json:"description"` + Visibility string `json:"visibility"` + GlobalShared bool `json:"globalShared"` + GlobalSharedAlt bool `json:"global_shared"` + DefaultNamespace string `json:"defaultNamespace"` } // UpdateClusterRequest 更新集群请求 type UpdateClusterRequest struct { - Name string `json:"name"` - Host string `json:"host"` - CAData string `json:"caData"` - CADataAlt string `json:"ca_data"` - CertData string `json:"certData"` - CertDataAlt string `json:"cert_data"` - KeyData string `json:"keyData"` - KeyDataAlt string `json:"key_data"` - Token string `json:"token"` - Description string `json:"description"` + Name string `json:"name"` + Host string `json:"host"` + CAData string `json:"caData"` + CADataAlt string `json:"ca_data"` + CertData string `json:"certData"` + CertDataAlt string `json:"cert_data"` + KeyData string `json:"keyData"` + KeyDataAlt string `json:"key_data"` + Token string `json:"token"` + Description string `json:"description"` + Visibility string `json:"visibility"` + GlobalShared bool `json:"globalShared"` + GlobalSharedAlt bool `json:"global_shared"` + DefaultNamespace string `json:"defaultNamespace"` } // Normalize 将多种命名风格的字段合并到统一字段 @@ -56,10 +64,15 @@ func (r *UpdateClusterRequest) Normalize() { // ClusterResponse 集群响应(敏感数据已脱敏) type ClusterResponse struct { - ID string `json:"id"` - Name string `json:"name"` - Host string `json:"host"` - Description string `json:"description"` + ID string `json:"id"` + Name string `json:"name"` + Host string `json:"host"` + Description string `json:"description"` + WorkspaceID string `json:"workspaceId"` + OwnerID string `json:"ownerId"` + Visibility string `json:"visibility"` + DefaultNamespace string `json:"defaultNamespace,omitempty"` + AllowedActions []string `json:"allowedActions,omitempty"` // 认证配置状态(不返回实际证书数据,仅返回是否已配置) HasCAData bool `json:"hasCaData"` HasCertData bool `json:"hasCertData"` diff --git a/backend/internal/adapter/input/http/dto/converter.go b/backend/internal/adapter/input/http/dto/converter.go index 40fa9b6..ce87571 100644 --- a/backend/internal/adapter/input/http/dto/converter.go +++ b/backend/internal/adapter/input/http/dto/converter.go @@ -9,6 +9,9 @@ import ( func ToRegistryResponse(registry *entity.Registry) *RegistryResponse { response := &RegistryResponse{ ID: registry.ID, + WorkspaceID: registry.WorkspaceID, + OwnerID: registry.OwnerID, + Visibility: registry.Visibility, Name: registry.Name, URL: registry.URL, Description: registry.Description, @@ -17,33 +20,37 @@ func ToRegistryResponse(registry *entity.Registry) *RegistryResponse { CreatedAt: registry.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), UpdatedAt: registry.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), } - + // 脱敏处理密码 if registry.Password != "" { response.HasPassword = true response.Password = crypto.MaskSensitiveData(registry.Password) } - + return response } // ToClusterResponse 转换 Cluster 实体为响应 DTO(脱敏) func ToClusterResponse(cluster *entity.Cluster) *ClusterResponse { response := &ClusterResponse{ - ID: cluster.ID, - Name: cluster.Name, - Host: cluster.Host, - Description: cluster.Description, - CreatedAt: cluster.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: cluster.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), + ID: cluster.ID, + WorkspaceID: cluster.WorkspaceID, + OwnerID: cluster.OwnerID, + Visibility: cluster.Visibility, + Name: cluster.Name, + Host: cluster.Host, + Description: cluster.Description, + DefaultNamespace: cluster.DefaultNamespace, + CreatedAt: cluster.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), + UpdatedAt: cluster.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), } - + // 设置认证配置状态标志 response.HasCAData = cluster.CAData != "" response.HasCertData = cluster.CertData != "" response.HasKeyData = cluster.KeyData != "" response.HasToken = cluster.Token != "" - + // 脱敏处理敏感数据(仅显示掩码) if cluster.CAData != "" { response.CAData = crypto.MaskSensitiveData(cluster.CAData) @@ -57,7 +64,6 @@ func ToClusterResponse(cluster *entity.Cluster) *ClusterResponse { if cluster.Token != "" { response.Token = crypto.MaskSensitiveData(cluster.Token) } - + return response } - diff --git a/backend/internal/adapter/input/http/dto/error_dto.go b/backend/internal/adapter/input/http/dto/error_dto.go index 6b33da6..990e830 100644 --- a/backend/internal/adapter/input/http/dto/error_dto.go +++ b/backend/internal/adapter/input/http/dto/error_dto.go @@ -12,4 +12,3 @@ type SuccessResponse struct { Message string `json:"message"` Data interface{} `json:"data,omitempty"` } - diff --git a/backend/internal/adapter/input/http/dto/instance_dto.go b/backend/internal/adapter/input/http/dto/instance_dto.go index 76aca25..b354055 100644 --- a/backend/internal/adapter/input/http/dto/instance_dto.go +++ b/backend/internal/adapter/input/http/dto/instance_dto.go @@ -43,23 +43,26 @@ type DeleteInstanceRequest struct { // InstanceResponse 实例响应 type InstanceResponse struct { - ID string `json:"id"` - ClusterID string `json:"clusterId"` - Name string `json:"name"` - Namespace string `json:"namespace"` - RegistryID string `json:"registryId"` - Repository string `json:"repository"` - Chart string `json:"chart"` - Version string `json:"version"` - Description string `json:"description"` - Status string `json:"status"` - StatusReason string `json:"statusReason,omitempty"` - LastOperation string `json:"lastOperation,omitempty"` - LastError string `json:"lastError,omitempty"` - Revision int `json:"revision"` - Values map[string]interface{} `json:"values,omitempty"` - CreatedAt string `json:"createdAt"` - UpdatedAt string `json:"updatedAt"` + ID string `json:"id"` + ClusterID string `json:"clusterId"` + Name string `json:"name"` + Namespace string `json:"namespace"` + RegistryID string `json:"registryId"` + Repository string `json:"repository"` + Chart string `json:"chart"` + Version string `json:"version"` + Description string `json:"description"` + Status string `json:"status"` + WorkspaceID string `json:"workspaceId"` + OwnerID string `json:"ownerId"` + AllowedActions []string `json:"allowedActions,omitempty"` + StatusReason string `json:"statusReason,omitempty"` + LastOperation string `json:"lastOperation,omitempty"` + LastError string `json:"lastError,omitempty"` + Revision int `json:"revision"` + Values map[string]interface{} `json:"values,omitempty"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` } // InstanceStatusResponse 实例状态响应 @@ -131,3 +134,70 @@ type InstanceEntryResponse struct { Hosts []InstanceEntryHostResponse `json:"hosts,omitempty"` TLS []InstanceEntryTLSResponse `json:"tls,omitempty"` } + +type InstanceDiagnosticsResponse struct { + InstanceName string `json:"instanceName"` + Namespace string `json:"namespace"` + Pods []InstancePodDiagnostics `json:"pods"` + Services []InstanceServiceDiagnostics `json:"services"` + Events []InstanceEventDiagnostics `json:"events"` + Logs []InstancePodLogResponse `json:"logs"` + CollectedAt string `json:"collectedAt"` +} + +type InstancePodDiagnostics struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + Phase string `json:"phase"` + NodeName string `json:"nodeName,omitempty"` + PodIP string `json:"podIp,omitempty"` + HostIP string `json:"hostIp,omitempty"` + RestartCount int32 `json:"restartCount"` + Containers []InstanceContainerDiagnostics `json:"containers"` + Conditions []InstanceConditionDiagnostics `json:"conditions"` + CreationTimestamp string `json:"creationTimestamp,omitempty"` +} + +type InstanceContainerDiagnostics struct { + Name string `json:"name"` + Image string `json:"image"` + Ready bool `json:"ready"` + RestartCount int32 `json:"restartCount"` + State string `json:"state"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` +} + +type InstanceConditionDiagnostics struct { + Type string `json:"type"` + Status string `json:"status"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` +} + +type InstanceServiceDiagnostics struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + Type string `json:"type"` + ClusterIP string `json:"clusterIP,omitempty"` + Ports []InstanceEntryPortResponse `json:"ports,omitempty"` +} + +type InstanceEventDiagnostics struct { + Type string `json:"type"` + Reason string `json:"reason"` + Message string `json:"message"` + InvolvedKind string `json:"involvedKind"` + InvolvedName string `json:"involvedName"` + Count int32 `json:"count"` + FirstTimestamp string `json:"firstTimestamp,omitempty"` + LastTimestamp string `json:"lastTimestamp,omitempty"` +} + +type InstancePodLogResponse struct { + Pod string `json:"pod"` + Container string `json:"container"` + TailLines int64 `json:"tailLines"` + Log string `json:"log,omitempty"` + Error string `json:"error,omitempty"` +} diff --git a/backend/internal/adapter/input/http/dto/monitoring_dto.go b/backend/internal/adapter/input/http/dto/monitoring_dto.go index bcb3496..6886948 100644 --- a/backend/internal/adapter/input/http/dto/monitoring_dto.go +++ b/backend/internal/adapter/input/http/dto/monitoring_dto.go @@ -8,29 +8,29 @@ import ( // ClusterMetricsResponse 集群监控响应 type ClusterMetricsResponse struct { - ClusterID string `json:"clusterId"` - ClusterName string `json:"clusterName"` - Status string `json:"status"` - Uptime string `json:"uptime"` - NodeCount int `json:"nodeCount"` - PodCount int `json:"podCount"` - LastCheck time.Time `json:"lastCheck"` - TotalCPU string `json:"totalCpu"` - TotalMemory string `json:"totalMemory"` - TotalGPU int `json:"totalGpu"` - UsedCPU string `json:"usedCpu"` - UsedMemory string `json:"usedMemory"` - UsedGPU int `json:"usedGpu"` - CPUUsage float64 `json:"cpuUsage"` - MemoryUsage float64 `json:"memoryUsage"` - GPUUsage float64 `json:"gpuUsage"` - MaxNodeCPU string `json:"maxNodeCpu"` - MaxNodeMemory string `json:"maxNodeMemory"` - MaxNodeGPU int `json:"maxNodeGpu"` - MaxNodeCPUUsage float64 `json:"maxNodeCpuUsage"` - MaxNodeMemUsage float64 `json:"maxNodeMemUsage"` - MaxNodeGPUUsage float64 `json:"maxNodeGpuUsage"` - Nodes []NodeMetricsResponse `json:"nodes,omitempty"` + ClusterID string `json:"clusterId"` + ClusterName string `json:"clusterName"` + Status string `json:"status"` + Uptime string `json:"uptime"` + NodeCount int `json:"nodeCount"` + PodCount int `json:"podCount"` + LastCheck time.Time `json:"lastCheck"` + TotalCPU string `json:"totalCpu"` + TotalMemory string `json:"totalMemory"` + TotalGPU int `json:"totalGpu"` + UsedCPU string `json:"usedCpu"` + UsedMemory string `json:"usedMemory"` + UsedGPU int `json:"usedGpu"` + CPUUsage float64 `json:"cpuUsage"` + MemoryUsage float64 `json:"memoryUsage"` + GPUUsage float64 `json:"gpuUsage"` + MaxNodeCPU string `json:"maxNodeCpu"` + MaxNodeMemory string `json:"maxNodeMemory"` + MaxNodeGPU int `json:"maxNodeGpu"` + MaxNodeCPUUsage float64 `json:"maxNodeCpuUsage"` + MaxNodeMemUsage float64 `json:"maxNodeMemUsage"` + MaxNodeGPUUsage float64 `json:"maxNodeGpuUsage"` + Nodes []NodeMetricsResponse `json:"nodes,omitempty"` } // NodeMetricsResponse 节点监控响应 @@ -72,28 +72,28 @@ type MonitoringSummaryResponse struct { // ToClusterMetricsResponse 转换为响应 func ToClusterMetricsResponse(m *entity.ClusterMetrics) *ClusterMetricsResponse { resp := &ClusterMetricsResponse{ - ClusterID: m.ClusterID, - ClusterName: m.ClusterName, - Status: m.Status, - Uptime: m.Uptime, - NodeCount: m.NodeCount, - PodCount: m.PodCount, - LastCheck: m.LastCheck, - TotalCPU: m.TotalCPU, - TotalMemory: m.TotalMemory, - TotalGPU: m.TotalGPU, - UsedCPU: m.UsedCPU, - UsedMemory: m.UsedMemory, - UsedGPU: m.UsedGPU, - CPUUsage: m.CPUUsage, - MemoryUsage: m.MemoryUsage, - GPUUsage: m.GPUUsage, - MaxNodeCPU: m.MaxNodeCPU, - MaxNodeMemory: m.MaxNodeMemory, - MaxNodeGPU: m.MaxNodeGPU, - MaxNodeCPUUsage: m.MaxNodeCPUUsage, - MaxNodeMemUsage: m.MaxNodeMemUsage, - MaxNodeGPUUsage: m.MaxNodeGPUUsage, + ClusterID: m.ClusterID, + ClusterName: m.ClusterName, + Status: m.Status, + Uptime: m.Uptime, + NodeCount: m.NodeCount, + PodCount: m.PodCount, + LastCheck: m.LastCheck, + TotalCPU: m.TotalCPU, + TotalMemory: m.TotalMemory, + TotalGPU: m.TotalGPU, + UsedCPU: m.UsedCPU, + UsedMemory: m.UsedMemory, + UsedGPU: m.UsedGPU, + CPUUsage: m.CPUUsage, + MemoryUsage: m.MemoryUsage, + GPUUsage: m.GPUUsage, + MaxNodeCPU: m.MaxNodeCPU, + MaxNodeMemory: m.MaxNodeMemory, + MaxNodeGPU: m.MaxNodeGPU, + MaxNodeCPUUsage: m.MaxNodeCPUUsage, + MaxNodeMemUsage: m.MaxNodeMemUsage, + MaxNodeGPUUsage: m.MaxNodeGPUUsage, } if len(m.Nodes) > 0 { @@ -140,4 +140,3 @@ func ToMonitoringSummaryResponse(s *entity.MonitoringSummary) *MonitoringSummary LastUpdate: s.LastUpdate, } } - diff --git a/backend/internal/adapter/input/http/dto/registry_dto.go b/backend/internal/adapter/input/http/dto/registry_dto.go index 25de40a..0c53b69 100644 --- a/backend/internal/adapter/input/http/dto/registry_dto.go +++ b/backend/internal/adapter/input/http/dto/registry_dto.go @@ -2,36 +2,46 @@ package dto // CreateRegistryRequest 创建 Registry 请求 type CreateRegistryRequest struct { - Name string `json:"name" binding:"required"` - URL string `json:"url" binding:"required"` - Username string `json:"username"` - Password string `json:"password"` - Description string `json:"description"` - Insecure bool `json:"insecure"` + Name string `json:"name" binding:"required"` + URL string `json:"url" binding:"required"` + Username string `json:"username"` + Password string `json:"password"` + Description string `json:"description"` + Insecure bool `json:"insecure"` + Visibility string `json:"visibility"` + GlobalShared bool `json:"globalShared"` + GlobalSharedAlt bool `json:"global_shared"` } // UpdateRegistryRequest 更新 Registry 请求 type UpdateRegistryRequest struct { - Name string `json:"name"` - URL string `json:"url"` - Username string `json:"username"` - Password string `json:"password"` - Description string `json:"description"` - Insecure bool `json:"insecure"` + Name string `json:"name"` + URL string `json:"url"` + Username string `json:"username"` + Password string `json:"password"` + Description string `json:"description"` + Insecure bool `json:"insecure"` + Visibility string `json:"visibility"` + GlobalShared bool `json:"globalShared"` + GlobalSharedAlt bool `json:"global_shared"` } // RegistryResponse Registry 响应(敏感数据已脱敏) type RegistryResponse struct { - ID string `json:"id"` - Name string `json:"name"` - URL string `json:"url"` - Description string `json:"description"` - Username string `json:"username,omitempty"` // 明文返回用户名(不敏感) - Password string `json:"password,omitempty"` // 脱敏显示(••••••••) - HasPassword bool `json:"hasPassword"` // 是否已设置密码 - Insecure bool `json:"insecure"` - CreatedAt string `json:"createdAt"` - UpdatedAt string `json:"updatedAt"` + ID string `json:"id"` + Name string `json:"name"` + URL string `json:"url"` + Description string `json:"description"` + WorkspaceID string `json:"workspaceId"` + OwnerID string `json:"ownerId"` + Visibility string `json:"visibility"` + AllowedActions []string `json:"allowedActions,omitempty"` + Username string `json:"username,omitempty"` // 明文返回用户名(不敏感) + Password string `json:"password,omitempty"` // 脱敏显示(••••••••) + HasPassword bool `json:"hasPassword"` // 是否已设置密码 + Insecure bool `json:"insecure"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` } // RegistryHealthResponse Registry 健康状态响应 @@ -39,4 +49,3 @@ type RegistryHealthResponse struct { Healthy bool `json:"healthy"` Message string `json:"message,omitempty"` } - diff --git a/backend/internal/adapter/input/http/rest/artifact_handler.go b/backend/internal/adapter/input/http/rest/artifact_handler.go index d04bdbc..d86c5b7 100644 --- a/backend/internal/adapter/input/http/rest/artifact_handler.go +++ b/backend/internal/adapter/input/http/rest/artifact_handler.go @@ -29,14 +29,19 @@ func NewArtifactHandler(artifactService *service.ArtifactService) *ArtifactHandl // @Accept json // @Produce json // @Param registry_id path string true "Registry ID" +// @Param artifact_type query string false "Artifact type filter (chart, all)" default(chart) // @Success 200 {object} dto.RepositoryListResponse // @Failure 500 {object} dto.ErrorResponse // @Router /registries/{registry_id}/repositories [get] func (h *ArtifactHandler) ListRepositories(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) registryID := vars["registry_id"] + artifactType := r.URL.Query().Get("artifact_type") + if artifactType == "" { + artifactType = "chart" + } - repositories, err := h.artifactService.ListRepositories(r.Context(), registryID) + repositories, err := h.artifactService.ListRepositories(r.Context(), registryID, artifactType) if err != nil { respondError(w, http.StatusInternalServerError, "Failed to list repositories", err.Error()) return @@ -50,13 +55,17 @@ func (h *ArtifactHandler) ListRepositories(w http.ResponseWriter, r *http.Reques } // Determine source and message based on repository count - source := "catalog" + source := "harbor-api" catalogSupported := true message := "" if len(repositories) == 0 { source = "unavailable" - message = "No repositories found in this registry" + if artifactType == "chart" { + message = "No chart repositories found in this registry" + } else { + message = "No repositories found in this registry" + } } response := &dto.RepositoryListResponse{ @@ -191,3 +200,37 @@ func (h *ArtifactHandler) GetArtifactValuesSchema(w http.ResponseWriter, r *http respondJSON(w, http.StatusOK, response) } + +// GetArtifactValuesYAML 获取 Helm Chart 的默认 values.yaml +// @Summary 获取 Helm Chart 默认 Values YAML +// @Description 获取 Helm Chart 包内原始 values.yaml,用于高级覆盖编辑 +// @Tags Artifacts +// @Accept json +// @Produce json +// @Param registry_id path string true "Registry ID" +// @Param repository_name path string true "Repository Name (URL encoded)" +// @Param reference path string true "Artifact Reference (tag or digest)" +// @Success 200 {object} dto.ValuesYAMLResponse +// @Failure 500 {object} dto.ErrorResponse +// @Router /registries/{registry_id}/repositories/{repository_name}/artifacts/{reference}/values-yaml [get] +func (h *ArtifactHandler) GetArtifactValuesYAML(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + registryID := vars["registry_id"] + repositoryName := vars["repository_name"] + reference := vars["reference"] + + valuesYAML, err := h.artifactService.GetValuesYAML(r.Context(), registryID, repositoryName, reference) + if err != nil { + switch { + case errors.Is(err, entity.ErrRegistryNotFound), + errors.Is(err, entity.ErrRepositoryNotFound), + errors.Is(err, entity.ErrArtifactNotFound): + respondError(w, http.StatusNotFound, "Values YAML not found", err.Error()) + default: + respondError(w, http.StatusInternalServerError, "Failed to get values YAML", err.Error()) + } + return + } + + respondJSON(w, http.StatusOK, &dto.ValuesYAMLResponse{ValuesYAML: valuesYAML}) +} diff --git a/backend/internal/adapter/input/http/rest/auth_handler.go b/backend/internal/adapter/input/http/rest/auth_handler.go index f67acda..f751a21 100644 --- a/backend/internal/adapter/input/http/rest/auth_handler.go +++ b/backend/internal/adapter/input/http/rest/auth_handler.go @@ -1,11 +1,16 @@ package rest import ( + "context" "encoding/json" "net/http" + "strings" + "github.com/gorilla/mux" "github.com/ocdp/cluster-service/internal/adapter/input/http/dto" + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/service" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // AuthHandler 认证 Handler @@ -20,9 +25,9 @@ func NewAuthHandler(authService *service.AuthService) *AuthHandler { } } -// Register 用户注册 -// @Summary 用户注册 -// @Description 创建一个新的后台用户 +// Register 管理员创建用户 +// @Summary 管理员创建用户 +// @Description 创建一个新的后台用户。公开自注册已禁用,只允许 admin 调用。 // @Tags Auth // @Accept json // @Produce json @@ -38,22 +43,64 @@ func (h *AuthHandler) Register(w http.ResponseWriter, r *http.Request) { } // 调用领域服务 - user, err := h.authService.Register(r.Context(), req.Username, req.Password) + user, err := h.authService.Register(r.Context(), req.Username, req.Password, req.Role, req.WorkspaceID, service.UserWorkspaceOptions{ + Namespace: req.Namespace, + DefaultClusterID: req.DefaultClusterID, + QuotaCPU: req.QuotaCPU, + QuotaMemory: req.QuotaMemory, + QuotaGPU: req.QuotaGPU, + QuotaGPUMem: req.QuotaGPUMem, + }, req.IsActive, req.MustChangePassword) if err != nil { - respondError(w, http.StatusBadRequest, "Registration failed", err.Error()) + respondServiceError(w, err, "Registration failed") return } - // 返回响应 - response := &dto.UserResponse{ - ID: user.ID, - Username: user.Username, - Email: user.Email, - CreatedAt: user.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: user.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), - } + respondJSON(w, http.StatusCreated, h.convertUserResponse(r.Context(), user)) +} - respondJSON(w, http.StatusCreated, response) +func (h *AuthHandler) ListUsers(w http.ResponseWriter, r *http.Request) { + users, err := h.authService.ListUsers(r.Context()) + if err != nil { + respondServiceError(w, err, "Failed to list users") + return + } + responses := make([]*dto.UserResponse, 0, len(users)) + for _, user := range users { + responses = append(responses, h.convertUserResponse(r.Context(), user)) + } + respondJSON(w, http.StatusOK, responses) +} + +func (h *AuthHandler) UpdateUser(w http.ResponseWriter, r *http.Request) { + userID := mux.Vars(r)["user_id"] + var req dto.UpdateUserRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + user, err := h.authService.UpdateUser(r.Context(), userID, req.Role, req.WorkspaceID, service.UserWorkspaceOptions{ + Namespace: req.Namespace, + DefaultClusterID: req.DefaultClusterID, + QuotaCPU: req.QuotaCPU, + QuotaMemory: req.QuotaMemory, + QuotaGPU: req.QuotaGPU, + QuotaGPUMem: req.QuotaGPUMem, + }, req.IsActive, req.MustChangePassword) + if err != nil { + respondServiceError(w, err, "Failed to update user") + return + } + respondJSON(w, http.StatusOK, h.convertUserResponse(r.Context(), user)) +} + +func (h *AuthHandler) DeleteUser(w http.ResponseWriter, r *http.Request) { + userID := mux.Vars(r)["user_id"] + if err := h.authService.DeleteUser(r.Context(), userID); err != nil { + respondServiceError(w, err, "Failed to delete user") + return + } + w.WriteHeader(http.StatusNoContent) } // Login 用户登录 @@ -74,25 +121,58 @@ func (h *AuthHandler) Login(w http.ResponseWriter, r *http.Request) { } // 调用领域服务 - accessToken, refreshToken, err := h.authService.Login(r.Context(), req.Username, req.Password) + accessToken, refreshToken, user, err := h.authService.Login(r.Context(), req.Username, req.Password) if err != nil { respondError(w, http.StatusUnauthorized, "Login failed", err.Error()) return } - // 获取用户信息 - // TODO: 从 token 解析用户信息或从服务获取 + workspace, _ := h.authService.GetWorkspaceByID(r.Context(), user.WorkspaceID) // 返回响应 response := &dto.AuthResponse{ - AccessToken: accessToken, - RefreshToken: refreshToken, - Username: req.Username, + AccessToken: accessToken, + RefreshToken: refreshToken, + UserID: user.ID, + Username: user.Username, + Role: user.Role, + WorkspaceID: user.WorkspaceID, + WorkspaceName: workspaceName(workspace), + Namespace: workspaceNamespace(workspace), + DefaultClusterID: workspaceDefaultClusterID(workspace), + QuotaCPU: workspaceQuotaCPU(workspace), + QuotaMemory: workspaceQuotaMemory(workspace), + QuotaGPU: workspaceQuotaGPU(workspace), + QuotaGPUMem: workspaceQuotaGPUMem(workspace), + Permissions: authz.PermissionsForRole(user.Role), + PermissionVersion: 1, } respondJSON(w, http.StatusOK, response) } +func (h *AuthHandler) convertUserResponse(ctx context.Context, user *entity.User) *dto.UserResponse { + workspace, _ := h.authService.GetWorkspaceByID(ctx, user.WorkspaceID) + return &dto.UserResponse{ + ID: user.ID, + Username: user.Username, + Email: user.Email, + Role: user.Role, + WorkspaceID: user.WorkspaceID, + WorkspaceName: workspaceName(workspace), + Namespace: workspaceNamespace(workspace), + DefaultClusterID: workspaceDefaultClusterID(workspace), + QuotaCPU: workspaceQuotaCPU(workspace), + QuotaMemory: workspaceQuotaMemory(workspace), + QuotaGPU: workspaceQuotaGPU(workspace), + QuotaGPUMem: workspaceQuotaGPUMem(workspace), + IsActive: user.IsActive, + MustChangePassword: user.MustChangePassword, + CreatedAt: user.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), + UpdatedAt: user.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), + } +} + // RefreshToken 刷新 Token // @Summary 刷新访问令牌 // @Description 使用刷新令牌获取新的访问令牌 @@ -111,17 +191,109 @@ func (h *AuthHandler) RefreshToken(w http.ResponseWriter, r *http.Request) { } // 调用领域服务 - newAccessToken, err := h.authService.RefreshToken(r.Context(), req.RefreshToken) + newAccessToken, user, err := h.authService.RefreshToken(r.Context(), req.RefreshToken) if err != nil { respondError(w, http.StatusUnauthorized, "Token refresh failed", err.Error()) return } + workspace, _ := h.authService.GetWorkspaceByID(r.Context(), user.WorkspaceID) // 返回响应 response := &dto.AuthResponse{ - AccessToken: newAccessToken, - RefreshToken: req.RefreshToken, + AccessToken: newAccessToken, + RefreshToken: req.RefreshToken, + UserID: user.ID, + Username: user.Username, + Role: user.Role, + WorkspaceID: user.WorkspaceID, + WorkspaceName: workspaceName(workspace), + Namespace: workspaceNamespace(workspace), + DefaultClusterID: workspaceDefaultClusterID(workspace), + QuotaCPU: workspaceQuotaCPU(workspace), + QuotaMemory: workspaceQuotaMemory(workspace), + QuotaGPU: workspaceQuotaGPU(workspace), + QuotaGPUMem: workspaceQuotaGPUMem(workspace), + Permissions: authz.PermissionsForRole(user.Role), + PermissionVersion: 1, } respondJSON(w, http.StatusOK, response) } + +func (h *AuthHandler) Me(w http.ResponseWriter, r *http.Request) { + header := r.Header.Get("Authorization") + token := strings.TrimSpace(strings.TrimPrefix(header, "Bearer ")) + if token == "" || token == header { + respondError(w, http.StatusUnauthorized, "Unauthorized", "missing bearer token") + return + } + principal, err := h.authService.VerifyAccessToken(r.Context(), token) + if err != nil { + respondError(w, http.StatusUnauthorized, "Unauthorized", err.Error()) + return + } + respondJSON(w, http.StatusOK, &dto.AuthResponse{ + UserID: principal.UserID, + Username: principal.Username, + Role: principal.Role, + WorkspaceID: principal.WorkspaceID, + WorkspaceName: principal.WorkspaceName, + Namespace: principal.Namespace, + DefaultClusterID: principal.DefaultClusterID, + QuotaCPU: principal.QuotaCPU, + QuotaMemory: principal.QuotaMemory, + QuotaGPU: principal.QuotaGPU, + QuotaGPUMem: principal.QuotaGPUMem, + Permissions: principal.Permissions, + PermissionVersion: principal.PermissionVersion, + }) +} + +func workspaceName(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.Name +} + +func workspaceNamespace(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.K8sNamespace +} + +func workspaceDefaultClusterID(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.DefaultClusterID +} + +func workspaceQuotaCPU(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.QuotaCPU +} + +func workspaceQuotaMemory(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.QuotaMemory +} + +func workspaceQuotaGPU(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.QuotaGPU +} + +func workspaceQuotaGPUMem(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.QuotaGPUMem +} diff --git a/backend/internal/adapter/input/http/rest/cluster_handler.go b/backend/internal/adapter/input/http/rest/cluster_handler.go index c887f8e..b68e882 100644 --- a/backend/internal/adapter/input/http/rest/cluster_handler.go +++ b/backend/internal/adapter/input/http/rest/cluster_handler.go @@ -45,6 +45,11 @@ func (h *ClusterHandler) CreateCluster(w http.ResponseWriter, r *http.Request) { // 创建实体 cluster := entity.NewCluster(req.Name, req.Host) cluster.Description = req.Description + cluster.Visibility = req.Visibility + if req.GlobalShared || req.GlobalSharedAlt { + cluster.Visibility = "global_shared" + } + cluster.DefaultNamespace = req.DefaultNamespace if req.CertData != "" && req.KeyData != "" { cluster.SetCertAuth(req.CAData, req.CertData, req.KeyData) @@ -147,6 +152,15 @@ func (h *ClusterHandler) UpdateCluster(w http.ResponseWriter, r *http.Request) { // 更新字段 cluster.Update(req.Name, req.Host, req.Description) + if req.Visibility != "" { + cluster.Visibility = req.Visibility + } + if req.GlobalShared || req.GlobalSharedAlt { + cluster.Visibility = "global_shared" + } + if req.DefaultNamespace != "" { + cluster.DefaultNamespace = req.DefaultNamespace + } if req.CertData != "" && req.KeyData != "" { cluster.SetCertAuth(req.CAData, req.CertData, req.KeyData) diff --git a/backend/internal/adapter/input/http/rest/instance_handler.go b/backend/internal/adapter/input/http/rest/instance_handler.go index 777e965..7592a06 100644 --- a/backend/internal/adapter/input/http/rest/instance_handler.go +++ b/backend/internal/adapter/input/http/rest/instance_handler.go @@ -2,13 +2,17 @@ package rest import ( "encoding/json" + "fmt" "net/http" + "strconv" "strings" + "time" "github.com/gorilla/mux" "github.com/ocdp/cluster-service/internal/adapter/input/http/dto" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/service" + "gopkg.in/yaml.v3" ) // InstanceHandler 实例 Handler @@ -69,6 +73,14 @@ func (h *InstanceHandler) CreateInstance(w http.ResponseWriter, r *http.Request) } if req.ValuesYAML != "" { instance.SetValuesYAML(req.ValuesYAML) + if req.Values == nil { + values, err := parseValuesYAML(req.ValuesYAML) + if err != nil { + respondError(w, http.StatusBadRequest, "Invalid values YAML", err.Error()) + return + } + instance.SetValues(values) + } } // 调用领域服务 @@ -77,28 +89,7 @@ func (h *InstanceHandler) CreateInstance(w http.ResponseWriter, r *http.Request) return } - // 返回响应 - response := &dto.InstanceResponse{ - ID: instance.ID, - ClusterID: instance.ClusterID, - Name: instance.Name, - Namespace: instance.Namespace, - RegistryID: instance.RegistryID, - Repository: instance.Repository, - Chart: instance.Chart, - Version: instance.Version, - Description: instance.Description, - Status: string(instance.Status), - StatusReason: instance.StatusReason, - LastOperation: string(instance.LastOperation), - LastError: instance.LastError, - Revision: instance.Revision, - Values: instance.Values, - CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), - } - - respondJSON(w, http.StatusCreated, response) + respondJSON(w, http.StatusCreated, convertInstanceResponse(instance, true)) } // GetInstance 获取实例详情 @@ -113,6 +104,7 @@ func (h *InstanceHandler) CreateInstance(w http.ResponseWriter, r *http.Request) // @Router /clusters/{cluster_id}/instances/{instance_id} [get] func (h *InstanceHandler) GetInstance(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) + clusterID := vars["cluster_id"] instanceID := vars["instance_id"] instance, err := h.instanceService.GetInstance(r.Context(), instanceID) @@ -120,28 +112,12 @@ func (h *InstanceHandler) GetInstance(w http.ResponseWriter, r *http.Request) { respondError(w, http.StatusNotFound, "Instance not found", err.Error()) return } - - response := &dto.InstanceResponse{ - ID: instance.ID, - ClusterID: instance.ClusterID, - Name: instance.Name, - Namespace: instance.Namespace, - RegistryID: instance.RegistryID, - Repository: instance.Repository, - Chart: instance.Chart, - Version: instance.Version, - Description: instance.Description, - Status: string(instance.Status), - StatusReason: instance.StatusReason, - LastOperation: string(instance.LastOperation), - LastError: instance.LastError, - Revision: instance.Revision, - Values: instance.Values, - CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), + if instance.ClusterID != clusterID { + respondError(w, http.StatusNotFound, "Instance not found", "resource does not belong to cluster") + return } - respondJSON(w, http.StatusOK, response) + respondJSON(w, http.StatusOK, convertInstanceResponse(instance, true)) } // ListInstances 列出集群的所有实例 @@ -159,30 +135,13 @@ func (h *InstanceHandler) ListInstances(w http.ResponseWriter, r *http.Request) instances, err := h.instanceService.ListInstancesByCluster(r.Context(), clusterID) if err != nil { - respondError(w, http.StatusInternalServerError, "Failed to list instances", err.Error()) + respondServiceError(w, err, "Failed to list instances") return } responses := make([]*dto.InstanceResponse, 0, len(instances)) for _, instance := range instances { - responses = append(responses, &dto.InstanceResponse{ - ID: instance.ID, - ClusterID: instance.ClusterID, - Name: instance.Name, - Namespace: instance.Namespace, - RegistryID: instance.RegistryID, - Repository: instance.Repository, - Chart: instance.Chart, - Version: instance.Version, - Description: instance.Description, - Status: string(instance.Status), - StatusReason: instance.StatusReason, - LastOperation: string(instance.LastOperation), - LastError: instance.LastError, - Revision: instance.Revision, - CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), - }) + responses = append(responses, convertInstanceResponse(instance, false)) } response := &dto.InstanceListResponse{ @@ -225,12 +184,22 @@ func (h *InstanceHandler) UpdateInstance(w http.ResponseWriter, r *http.Request) // 更新字段 if req.Version != "" { instance.Upgrade(req.Version, req.Values) + } else if req.Values != nil { + instance.SetValues(req.Values) } if req.Description != "" { instance.Description = req.Description } if req.ValuesYAML != "" { instance.SetValuesYAML(req.ValuesYAML) + if req.Values == nil { + values, err := parseValuesYAML(req.ValuesYAML) + if err != nil { + respondError(w, http.StatusBadRequest, "Invalid values YAML", err.Error()) + return + } + instance.SetValues(values) + } } // 调用领域服务 @@ -239,27 +208,7 @@ func (h *InstanceHandler) UpdateInstance(w http.ResponseWriter, r *http.Request) return } - response := &dto.InstanceResponse{ - ID: instance.ID, - ClusterID: instance.ClusterID, - Name: instance.Name, - Namespace: instance.Namespace, - RegistryID: instance.RegistryID, - Repository: instance.Repository, - Chart: instance.Chart, - Version: instance.Version, - Description: instance.Description, - Status: string(instance.Status), - StatusReason: instance.StatusReason, - LastOperation: string(instance.LastOperation), - LastError: instance.LastError, - Revision: instance.Revision, - Values: instance.Values, - CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), - } - - respondJSON(w, http.StatusOK, response) + respondJSON(w, http.StatusOK, convertInstanceResponse(instance, true)) } // DeleteInstance 删除实例 @@ -320,6 +269,35 @@ func (h *InstanceHandler) ListInstanceEntries(w http.ResponseWriter, r *http.Req respondJSON(w, http.StatusOK, responses) } +func (h *InstanceHandler) GetInstanceDiagnostics(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + clusterID := vars["cluster_id"] + instanceID := vars["instance_id"] + tailLines := int64(200) + if raw := strings.TrimSpace(r.URL.Query().Get("tailLines")); raw != "" { + parsed, err := strconv.ParseInt(raw, 10, 64) + if err != nil || parsed < 0 { + respondError(w, http.StatusBadRequest, "Invalid tailLines", "tailLines must be a positive integer") + return + } + tailLines = parsed + } + + diagnostics, err := h.instanceService.GetInstanceDiagnostics(r.Context(), clusterID, instanceID, tailLines) + if err != nil { + status := http.StatusInternalServerError + switch err { + case entity.ErrInstanceNotFound, entity.ErrClusterNotFound: + status = http.StatusNotFound + case entity.ErrForbidden: + status = http.StatusForbidden + } + respondError(w, status, "Failed to collect instance diagnostics", err.Error()) + return + } + respondJSON(w, http.StatusOK, convertInstanceDiagnostics(diagnostics)) +} + func convertInstanceEntry(entry *entity.InstanceEntry) *dto.InstanceEntryResponse { portResponses := make([]dto.InstanceEntryPortResponse, 0, len(entry.Ports)) for _, port := range entry.Ports { @@ -369,3 +347,195 @@ func convertInstanceEntry(entry *entity.InstanceEntry) *dto.InstanceEntryRespons TLS: tlsResponses, } } + +func convertInstanceDiagnostics(diagnostics *entity.InstanceDiagnostics) *dto.InstanceDiagnosticsResponse { + if diagnostics == nil { + return &dto.InstanceDiagnosticsResponse{} + } + pods := make([]dto.InstancePodDiagnostics, 0, len(diagnostics.Pods)) + for _, pod := range diagnostics.Pods { + containers := make([]dto.InstanceContainerDiagnostics, 0, len(pod.Containers)) + for _, container := range pod.Containers { + containers = append(containers, dto.InstanceContainerDiagnostics{ + Name: container.Name, + Image: container.Image, + Ready: container.Ready, + RestartCount: container.RestartCount, + State: container.State, + Reason: container.Reason, + Message: container.Message, + }) + } + conditions := make([]dto.InstanceConditionDiagnostics, 0, len(pod.Conditions)) + for _, condition := range pod.Conditions { + conditions = append(conditions, dto.InstanceConditionDiagnostics{ + Type: condition.Type, + Status: condition.Status, + Reason: condition.Reason, + Message: condition.Message, + }) + } + pods = append(pods, dto.InstancePodDiagnostics{ + Name: pod.Name, + Namespace: pod.Namespace, + Phase: pod.Phase, + NodeName: pod.NodeName, + PodIP: pod.PodIP, + HostIP: pod.HostIP, + RestartCount: pod.RestartCount, + Containers: containers, + Conditions: conditions, + CreationTimestamp: formatTime(pod.CreationTimestamp), + }) + } + services := make([]dto.InstanceServiceDiagnostics, 0, len(diagnostics.Services)) + for _, svc := range diagnostics.Services { + ports := make([]dto.InstanceEntryPortResponse, 0, len(svc.Ports)) + for _, port := range svc.Ports { + ports = append(ports, dto.InstanceEntryPortResponse{ + Name: port.Name, + Protocol: port.Protocol, + Port: port.Port, + TargetPort: port.TargetPort, + NodePort: port.NodePort, + }) + } + services = append(services, dto.InstanceServiceDiagnostics{ + Name: svc.Name, + Namespace: svc.Namespace, + Type: svc.Type, + ClusterIP: svc.ClusterIP, + Ports: ports, + }) + } + events := make([]dto.InstanceEventDiagnostics, 0, len(diagnostics.Events)) + for _, event := range diagnostics.Events { + events = append(events, dto.InstanceEventDiagnostics{ + Type: event.Type, + Reason: event.Reason, + Message: event.Message, + InvolvedKind: event.InvolvedKind, + InvolvedName: event.InvolvedName, + Count: event.Count, + FirstTimestamp: formatTime(event.FirstTimestamp), + LastTimestamp: formatTime(event.LastTimestamp), + }) + } + logs := make([]dto.InstancePodLogResponse, 0, len(diagnostics.Logs)) + for _, logEntry := range diagnostics.Logs { + logs = append(logs, dto.InstancePodLogResponse{ + Pod: logEntry.Pod, + Container: logEntry.Container, + TailLines: logEntry.TailLines, + Log: logEntry.Log, + Error: logEntry.Error, + }) + } + return &dto.InstanceDiagnosticsResponse{ + InstanceName: diagnostics.InstanceName, + Namespace: diagnostics.Namespace, + Pods: pods, + Services: services, + Events: events, + Logs: logs, + CollectedAt: formatTime(diagnostics.CollectedAt), + } +} + +func formatTime(value time.Time) string { + if value.IsZero() { + return "" + } + return value.Format(time.RFC3339) +} + +func convertInstanceResponse(instance *entity.Instance, includeValues bool) *dto.InstanceResponse { + response := &dto.InstanceResponse{ + ID: instance.ID, + ClusterID: instance.ClusterID, + Name: instance.Name, + Namespace: instance.Namespace, + RegistryID: instance.RegistryID, + Repository: instance.Repository, + Chart: instance.Chart, + Version: instance.Version, + Description: instance.Description, + Status: string(instance.Status), + WorkspaceID: instance.WorkspaceID, + OwnerID: instance.OwnerID, + StatusReason: instance.StatusReason, + LastOperation: string(instance.LastOperation), + LastError: instance.LastError, + Revision: instance.Revision, + AllowedActions: []string{"view", "update", "delete"}, + CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), + UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), + } + if includeValues { + response.Values = instance.Values + } + return response +} + +func parseValuesYAML(valuesYAML string) (map[string]interface{}, error) { + valuesYAML = strings.TrimSpace(valuesYAML) + if valuesYAML == "" { + return map[string]interface{}{}, nil + } + + var decoded interface{} + if err := yaml.Unmarshal([]byte(valuesYAML), &decoded); err != nil { + return nil, err + } + + normalized, err := normalizeYAMLValue(decoded) + if err != nil { + return nil, err + } + values, ok := normalized.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("values YAML must be a mapping at the top level") + } + return values, nil +} + +func normalizeYAMLValue(value interface{}) (interface{}, error) { + switch typed := value.(type) { + case map[string]interface{}: + normalized := make(map[string]interface{}, len(typed)) + for key, child := range typed { + normalizedChild, err := normalizeYAMLValue(child) + if err != nil { + return nil, err + } + normalized[key] = normalizedChild + } + return normalized, nil + case map[interface{}]interface{}: + normalized := make(map[string]interface{}, len(typed)) + for key, child := range typed { + keyString, ok := key.(string) + if !ok { + return nil, fmt.Errorf("values YAML contains non-string key %v", key) + } + normalizedChild, err := normalizeYAMLValue(child) + if err != nil { + return nil, err + } + normalized[keyString] = normalizedChild + } + return normalized, nil + case []interface{}: + normalized := make([]interface{}, 0, len(typed)) + for _, child := range typed { + normalizedChild, err := normalizeYAMLValue(child) + if err != nil { + return nil, err + } + normalized = append(normalized, normalizedChild) + } + return normalized, nil + default: + return typed, nil + } +} diff --git a/backend/internal/adapter/input/http/rest/registry_handler.go b/backend/internal/adapter/input/http/rest/registry_handler.go index f6d1a03..b413ce0 100644 --- a/backend/internal/adapter/input/http/rest/registry_handler.go +++ b/backend/internal/adapter/input/http/rest/registry_handler.go @@ -44,6 +44,10 @@ func (h *RegistryHandler) CreateRegistry(w http.ResponseWriter, r *http.Request) registry := entity.NewRegistry(req.Name, req.URL) registry.Description = req.Description registry.Insecure = req.Insecure + registry.Visibility = req.Visibility + if req.GlobalShared || req.GlobalSharedAlt { + registry.Visibility = "global_shared" + } registry.SetCredentials(req.Username, req.Password) // 调用领域服务 @@ -136,6 +140,12 @@ func (h *RegistryHandler) UpdateRegistry(w http.ResponseWriter, r *http.Request) // 更新字段 registry.Update(req.Name, req.URL, req.Description) registry.Insecure = req.Insecure + if req.Visibility != "" { + registry.Visibility = req.Visibility + } + if req.GlobalShared || req.GlobalSharedAlt { + registry.Visibility = "global_shared" + } if req.Username != "" || req.Password != "" { registry.SetCredentials(req.Username, req.Password) } diff --git a/backend/internal/adapter/input/http/rest/utils.go b/backend/internal/adapter/input/http/rest/utils.go index 6e6fcd5..82ecf4c 100644 --- a/backend/internal/adapter/input/http/rest/utils.go +++ b/backend/internal/adapter/input/http/rest/utils.go @@ -3,7 +3,7 @@ package rest import ( "encoding/json" "net/http" - + "github.com/ocdp/cluster-service/internal/adapter/input/http/dto" ) @@ -32,4 +32,3 @@ func respondSuccess(w http.ResponseWriter, message string, data interface{}) { } respondJSON(w, http.StatusOK, response) } - diff --git a/backend/internal/adapter/input/http/rest/workspace_handler.go b/backend/internal/adapter/input/http/rest/workspace_handler.go new file mode 100644 index 0000000..311bb2c --- /dev/null +++ b/backend/internal/adapter/input/http/rest/workspace_handler.go @@ -0,0 +1,165 @@ +package rest + +import ( + "encoding/json" + "net/http" + "time" + + "github.com/gorilla/mux" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/service" + "github.com/ocdp/cluster-service/internal/pkg/authz" +) + +type WorkspaceHandler struct { + workspaceService *service.WorkspaceService +} + +func NewWorkspaceHandler(workspaceService *service.WorkspaceService) *WorkspaceHandler { + return &WorkspaceHandler{workspaceService: workspaceService} +} + +type createWorkspaceRequest struct { + Name string `json:"name"` +} + +type workspaceResponse struct { + ID string `json:"id"` + Name string `json:"name"` + Status string `json:"status"` + K8sNamespace string `json:"k8sNamespace"` + K8sSAName string `json:"k8sSaName"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + CreatedBy string `json:"createdBy"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` +} + +type bindClusterRequest struct { + ClusterID string `json:"clusterId"` +} + +type kubeconfigRequest struct { + ClusterID string `json:"clusterId"` + TTLSeconds int64 `json:"ttlSeconds"` +} + +func (h *WorkspaceHandler) ListWorkspaces(w http.ResponseWriter, r *http.Request) { + workspaces, err := h.workspaceService.ListWorkspaces(r.Context()) + if err != nil { + respondServiceError(w, err, "Failed to list workspaces") + return + } + response := make([]workspaceResponse, 0, len(workspaces)) + for _, workspace := range workspaces { + response = append(response, toWorkspaceResponse(workspace)) + } + respondJSON(w, http.StatusOK, response) +} + +func (h *WorkspaceHandler) CreateWorkspace(w http.ResponseWriter, r *http.Request) { + var req createWorkspaceRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + workspace, err := h.workspaceService.CreateWorkspace(r.Context(), req.Name) + if err != nil { + respondServiceError(w, err, "Failed to create workspace") + return + } + respondJSON(w, http.StatusCreated, toWorkspaceResponse(workspace)) +} + +func (h *WorkspaceHandler) InitClusterBinding(w http.ResponseWriter, r *http.Request) { + workspaceID := mux.Vars(r)["workspace_id"] + var req bindClusterRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + binding, err := h.workspaceService.EnsureClusterBinding(r.Context(), workspaceID, req.ClusterID) + if err != nil { + respondServiceError(w, err, "Failed to initialize workspace cluster binding") + return + } + respondJSON(w, http.StatusOK, binding) +} + +func (h *WorkspaceHandler) IssueKubeconfig(w http.ResponseWriter, r *http.Request) { + workspaceID := mux.Vars(r)["workspace_id"] + var req kubeconfigRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + kubeconfig, err := h.workspaceService.IssueKubeconfig(r.Context(), workspaceID, req.ClusterID, time.Duration(req.TTLSeconds)*time.Second) + if err != nil { + respondServiceError(w, err, "Failed to issue kubeconfig") + return + } + respondJSON(w, http.StatusOK, map[string]interface{}{ + "kubeconfig": kubeconfig.Kubeconfig, + "expiresAt": kubeconfig.ExpiresAt.Format(time.RFC3339), + }) +} + +func (h *WorkspaceHandler) IssueCurrentKubeconfig(w http.ResponseWriter, r *http.Request) { + clusterID := r.URL.Query().Get("clusterId") + if clusterID == "" { + clusterID = r.URL.Query().Get("cluster_id") + } + kubeconfig, err := h.workspaceService.IssueCurrentKubeconfig(r.Context(), clusterID, 2*time.Hour) + if err != nil { + respondServiceError(w, err, "Failed to issue kubeconfig") + return + } + w.Header().Set("Content-Type", "application/x-yaml") + w.Header().Set("X-OCDP-Kubeconfig-Expires-At", kubeconfig.ExpiresAt.Format(time.RFC3339)) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(kubeconfig.Kubeconfig)) +} + +func (h *WorkspaceHandler) SuspendWorkspace(w http.ResponseWriter, r *http.Request) { + workspaceID := mux.Vars(r)["workspace_id"] + if err := h.workspaceService.SuspendWorkspace(r.Context(), workspaceID); err != nil { + respondServiceError(w, err, "Failed to suspend workspace") + return + } + w.WriteHeader(http.StatusNoContent) +} + +func toWorkspaceResponse(workspace *entity.Workspace) workspaceResponse { + return workspaceResponse{ + ID: workspace.ID, + Name: workspace.Name, + Status: string(workspace.Status), + K8sNamespace: workspace.K8sNamespace, + K8sSAName: workspace.K8sSAName, + DefaultClusterID: workspace.DefaultClusterID, + QuotaCPU: workspace.QuotaCPU, + QuotaMemory: workspace.QuotaMemory, + QuotaGPU: workspace.QuotaGPU, + QuotaGPUMem: workspace.QuotaGPUMem, + CreatedBy: workspace.CreatedBy, + CreatedAt: workspace.CreatedAt.Format(time.RFC3339), + UpdatedAt: workspace.UpdatedAt.Format(time.RFC3339), + } +} + +func respondServiceError(w http.ResponseWriter, err error, fallback string) { + switch err { + case entity.ErrUnauthorized, authz.ErrUnauthenticated: + respondError(w, http.StatusUnauthorized, "Unauthorized", err.Error()) + case entity.ErrForbidden, authz.ErrForbidden, entity.ErrUserInactive, entity.ErrWorkspaceSuspended: + respondError(w, http.StatusForbidden, "Forbidden", err.Error()) + case entity.ErrClusterNotFound, entity.ErrRegistryNotFound, entity.ErrInstanceNotFound, entity.ErrWorkspaceNotFound: + respondError(w, http.StatusNotFound, fallback, err.Error()) + default: + respondError(w, http.StatusBadRequest, fallback, err.Error()) + } +} diff --git a/backend/internal/adapter/output/factory.go b/backend/internal/adapter/output/factory.go index 8565112..321f280 100644 --- a/backend/internal/adapter/output/factory.go +++ b/backend/internal/adapter/output/factory.go @@ -96,6 +96,36 @@ func (f *AdapterFactory) CreateInstanceRepository() (repository.InstanceReposito return postgres.NewInstanceRepository(f.db), nil } +func (f *AdapterFactory) CreateWorkspaceRepository() (repository.WorkspaceRepository, error) { + if f.mode == ModeMock { + return mock.NewWorkspaceRepositoryMock(), nil + } + if err := f.ensureDBConnection(); err != nil { + return nil, err + } + return postgres.NewWorkspaceRepository(f.db), nil +} + +func (f *AdapterFactory) CreateWorkspaceClusterBindingRepository() (repository.WorkspaceClusterBindingRepository, error) { + if f.mode == ModeMock { + return mock.NewWorkspaceClusterBindingRepositoryMock(), nil + } + if err := f.ensureDBConnection(); err != nil { + return nil, err + } + return postgres.NewWorkspaceClusterBindingRepository(f.db), nil +} + +func (f *AdapterFactory) CreateAuditLogRepository() (repository.AuditLogRepository, error) { + if f.mode == ModeMock { + return mock.NewAuditLogRepositoryMock(), nil + } + if err := f.ensureDBConnection(); err != nil { + return nil, err + } + return postgres.NewAuditLogRepository(f.db), nil +} + // CreateOCIClient 创建 OCI 客户端 func (f *AdapterFactory) CreateOCIClient() (repository.OCIClient, error) { if f.mode == ModeMock { @@ -127,6 +157,20 @@ func (f *AdapterFactory) CreateEntryClient() repository.InstanceEntryClient { return k8s.NewEntryClient() } +func (f *AdapterFactory) CreateDiagnosticsClient() repository.InstanceDiagnosticsClient { + if f.mode == ModeMock { + return k8s.NewMockDiagnosticsClient() + } + return k8s.NewDiagnosticsClient() +} + +func (f *AdapterFactory) CreateTenantKubeClient() repository.TenantKubeClient { + if f.mode == ModeMock { + return k8s.NewMockTenantClient() + } + return k8s.NewTenantClient() +} + // CreateAllRepositories 一次性创建所有 Repositories func (f *AdapterFactory) CreateAllRepositories() (*Repositories, error) { userRepo, err := f.CreateUserRepository() @@ -149,6 +193,21 @@ func (f *AdapterFactory) CreateAllRepositories() (*Repositories, error) { return nil, fmt.Errorf("failed to create instance repository: %w", err) } + workspaceRepo, err := f.CreateWorkspaceRepository() + if err != nil { + return nil, fmt.Errorf("failed to create workspace repository: %w", err) + } + + bindingRepo, err := f.CreateWorkspaceClusterBindingRepository() + if err != nil { + return nil, fmt.Errorf("failed to create workspace cluster binding repository: %w", err) + } + + auditRepo, err := f.CreateAuditLogRepository() + if err != nil { + return nil, fmt.Errorf("failed to create audit log repository: %w", err) + } + ociClient, err := f.CreateOCIClient() if err != nil { return nil, fmt.Errorf("failed to create OCI client: %w", err) @@ -162,29 +221,41 @@ func (f *AdapterFactory) CreateAllRepositories() (*Repositories, error) { // 创建 Metrics client(依赖 clusterRepo) metricsClient := f.CreateMetricsClient(clusterRepo) entryClient := f.CreateEntryClient() + diagnosticsClient := f.CreateDiagnosticsClient() + tenantClient := f.CreateTenantKubeClient() return &Repositories{ - UserRepo: userRepo, - ClusterRepo: clusterRepo, - RegistryRepo: registryRepo, - InstanceRepo: instanceRepo, - OCIClient: ociClient, - HelmClient: helmClient, - MetricsClient: metricsClient, - EntryClient: entryClient, + UserRepo: userRepo, + WorkspaceRepo: workspaceRepo, + BindingRepo: bindingRepo, + AuditRepo: auditRepo, + ClusterRepo: clusterRepo, + RegistryRepo: registryRepo, + InstanceRepo: instanceRepo, + OCIClient: ociClient, + HelmClient: helmClient, + MetricsClient: metricsClient, + EntryClient: entryClient, + DiagnosticsClient: diagnosticsClient, + TenantKubeClient: tenantClient, }, nil } // Repositories 所有仓储的集合 type Repositories struct { - UserRepo repository.UserRepository - ClusterRepo repository.ClusterRepository - RegistryRepo repository.RegistryRepository - InstanceRepo repository.InstanceRepository - OCIClient repository.OCIClient - HelmClient repository.HelmClient - MetricsClient repository.MetricsClient - EntryClient repository.InstanceEntryClient + UserRepo repository.UserRepository + WorkspaceRepo repository.WorkspaceRepository + BindingRepo repository.WorkspaceClusterBindingRepository + AuditRepo repository.AuditLogRepository + ClusterRepo repository.ClusterRepository + RegistryRepo repository.RegistryRepository + InstanceRepo repository.InstanceRepository + OCIClient repository.OCIClient + HelmClient repository.HelmClient + MetricsClient repository.MetricsClient + EntryClient repository.InstanceEntryClient + DiagnosticsClient repository.InstanceDiagnosticsClient + TenantKubeClient repository.TenantKubeClient } // ensureDBConnection 确保数据库连接已建立 diff --git a/backend/internal/adapter/output/helm/real/helm_client.go b/backend/internal/adapter/output/helm/real/helm_client.go index 4d95fda..f743cfe 100644 --- a/backend/internal/adapter/output/helm/real/helm_client.go +++ b/backend/internal/adapter/output/helm/real/helm_client.go @@ -21,6 +21,7 @@ import ( "k8s.io/client-go/rest" "k8s.io/client-go/restmapper" "k8s.io/client-go/tools/clientcmd" + clientcmdapi "k8s.io/client-go/tools/clientcmd/api" ) // HelmClient 真实的 Helm 客户端实现 @@ -36,39 +37,45 @@ func NewHelmClient() repository.HelmClient { } // getActionConfig 获取 Helm action configuration -func (h *HelmClient) getActionConfig(cluster *entity.Cluster, namespace string) (*action.Configuration, error) { +func (h *HelmClient) getActionConfig(cluster *entity.Cluster, namespace string) (*action.Configuration, func(), error) { actionConfig := new(action.Configuration) // 创建临时 kubeconfig 文件 kubeconfigContent := cluster.GetKubeConfig() tmpDir, err := os.MkdirTemp("", "helm-kubeconfig-*") if err != nil { - return nil, fmt.Errorf("failed to create temp dir: %w", err) + return nil, nil, fmt.Errorf("failed to create temp dir: %w", err) + } + cleanup := func() { + _ = os.RemoveAll(tmpDir) } kubeconfigPath := filepath.Join(tmpDir, "kubeconfig") if err := os.WriteFile(kubeconfigPath, []byte(kubeconfigContent), 0600); err != nil { - return nil, fmt.Errorf("failed to write kubeconfig: %w", err) + cleanup() + return nil, nil, fmt.Errorf("failed to write kubeconfig: %w", err) } // 使用 kubeconfig 初始化 action config if err := actionConfig.Init( - &kubeconfigGetter{kubeconfigPath: kubeconfigPath}, + &kubeconfigGetter{kubeconfigPath: kubeconfigPath, namespace: namespace}, namespace, os.Getenv("HELM_DRIVER"), // storage driver: configmap, secret, memory func(format string, v ...interface{}) { // Log function }, ); err != nil { - return nil, fmt.Errorf("failed to initialize action config: %w", err) + cleanup() + return nil, nil, fmt.Errorf("failed to initialize action config: %w", err) } - return actionConfig, nil + return actionConfig, cleanup, nil } // kubeconfigGetter implements RESTClientGetter type kubeconfigGetter struct { kubeconfigPath string + namespace string } func (k *kubeconfigGetter) ToRESTConfig() (*rest.Config, error) { @@ -95,25 +102,30 @@ func (k *kubeconfigGetter) ToRESTMapper() (meta.RESTMapper, error) { } func (k *kubeconfigGetter) ToRawKubeConfigLoader() clientcmd.ClientConfig { + overrides := &clientcmd.ConfigOverrides{} + if k.namespace != "" { + overrides.Context = clientcmdapi.Context{Namespace: k.namespace} + } return clientcmd.NewNonInteractiveDeferredLoadingClientConfig( &clientcmd.ClientConfigLoadingRules{ExplicitPath: k.kubeconfigPath}, - &clientcmd.ConfigOverrides{}, + overrides, ) } // Install 安装 Helm Chart func (h *HelmClient) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { - actionConfig, err := h.getActionConfig(cluster, instance.Namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, instance.Namespace) if err != nil { return err } + defer cleanup() install := action.NewInstall(actionConfig) install.ReleaseName = instance.Name install.Namespace = instance.Namespace install.CreateNamespace = true install.Wait = true - install.Timeout = 5 * time.Minute + install.Timeout = helmOperationTimeout() // 加载 Chart(从本地路径或 OCI registry) // 这里简化处理,假设 chart 已经被拉取到本地 @@ -139,15 +151,16 @@ func (h *HelmClient) Install(ctx context.Context, cluster *entity.Cluster, insta // Upgrade 升级 Helm Release func (h *HelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { - actionConfig, err := h.getActionConfig(cluster, instance.Namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, instance.Namespace) if err != nil { return err } + defer cleanup() upgrade := action.NewUpgrade(actionConfig) upgrade.Namespace = instance.Namespace upgrade.Wait = true - upgrade.Timeout = 5 * time.Minute + upgrade.Timeout = helmOperationTimeout() // 加载 Chart chartPath := fmt.Sprintf("/tmp/charts/%s-%s.tgz", instance.Chart, instance.Version) @@ -172,14 +185,15 @@ func (h *HelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, insta // Uninstall 卸载 Helm Release func (h *HelmClient) Uninstall(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) error { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return err } + defer cleanup() uninstall := action.NewUninstall(actionConfig) uninstall.Wait = true - uninstall.Timeout = 5 * time.Minute + uninstall.Timeout = helmOperationTimeout() _, err = uninstall.Run(releaseName) if err != nil { @@ -194,15 +208,16 @@ func (h *HelmClient) Uninstall(ctx context.Context, cluster *entity.Cluster, rel // Rollback 回滚 Helm Release func (h *HelmClient) Rollback(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string, revision int) error { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return err } + defer cleanup() rollback := action.NewRollback(actionConfig) rollback.Version = revision rollback.Wait = true - rollback.Timeout = 5 * time.Minute + rollback.Timeout = helmOperationTimeout() if err := rollback.Run(releaseName); err != nil { return fmt.Errorf("failed to rollback release: %w", err) @@ -211,12 +226,25 @@ func (h *HelmClient) Rollback(ctx context.Context, cluster *entity.Cluster, rele return nil } +func helmOperationTimeout() time.Duration { + raw := os.Getenv("HELM_OPERATION_TIMEOUT") + if raw == "" { + return 15 * time.Minute + } + timeout, err := time.ParseDuration(raw) + if err != nil || timeout <= 0 { + return 15 * time.Minute + } + return timeout +} + // GetStatus 获取 Release 状态 func (h *HelmClient) GetStatus(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (*entity.Instance, error) { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return nil, err } + defer cleanup() status := action.NewStatus(actionConfig) rel, err := status.Run(releaseName) @@ -229,10 +257,11 @@ func (h *HelmClient) GetStatus(ctx context.Context, cluster *entity.Cluster, rel // GetHistory 获取 Release 历史 func (h *HelmClient) GetHistory(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) ([]*entity.ReleaseHistory, error) { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return nil, err } + defer cleanup() history := action.NewHistory(actionConfig) history.Max = 256 @@ -259,10 +288,11 @@ func (h *HelmClient) GetHistory(ctx context.Context, cluster *entity.Cluster, re // List 列出集群中的所有 Releases func (h *HelmClient) List(ctx context.Context, cluster *entity.Cluster, namespace string) ([]*entity.Instance, error) { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return nil, err } + defer cleanup() list := action.NewList(actionConfig) if namespace == "" { @@ -284,10 +314,11 @@ func (h *HelmClient) List(ctx context.Context, cluster *entity.Cluster, namespac // GetValues 获取 Release 的 values func (h *HelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return nil, err } + defer cleanup() getValues := action.NewGetValues(actionConfig) values, err := getValues.Run(releaseName) diff --git a/backend/internal/adapter/output/helm/real/helm_client_test.go b/backend/internal/adapter/output/helm/real/helm_client_test.go new file mode 100644 index 0000000..249f7d2 --- /dev/null +++ b/backend/internal/adapter/output/helm/real/helm_client_test.go @@ -0,0 +1,45 @@ +package real + +import ( + "os" + "path/filepath" + "testing" +) + +func TestKubeconfigGetterOverridesNamespace(t *testing.T) { + t.Parallel() + + kubeconfigPath := filepath.Join(t.TempDir(), "kubeconfig") + kubeconfig := `apiVersion: v1 +kind: Config +clusters: +- cluster: + server: https://127.0.0.1:6443 + name: test +contexts: +- context: + cluster: test + user: test + name: test +current-context: test +users: +- name: test + user: + token: test +` + if err := os.WriteFile(kubeconfigPath, []byte(kubeconfig), 0600); err != nil { + t.Fatalf("failed to write kubeconfig: %v", err) + } + getter := &kubeconfigGetter{ + kubeconfigPath: kubeconfigPath, + namespace: "ocdp-u-alice", + } + + namespace, _, err := getter.ToRawKubeConfigLoader().Namespace() + if err != nil { + t.Fatalf("Namespace returned error: %v", err) + } + if namespace != "ocdp-u-alice" { + t.Fatalf("expected namespace override %q, got %q", "ocdp-u-alice", namespace) + } +} diff --git a/backend/internal/adapter/output/k8s/diagnostics_client.go b/backend/internal/adapter/output/k8s/diagnostics_client.go new file mode 100644 index 0000000..146d45f --- /dev/null +++ b/backend/internal/adapter/output/k8s/diagnostics_client.go @@ -0,0 +1,294 @@ +package k8s + +import ( + "context" + "fmt" + "io" + "sort" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +type DiagnosticsClient struct{} + +func NewDiagnosticsClient() repository.InstanceDiagnosticsClient { + return &DiagnosticsClient{} +} + +type MockDiagnosticsClient struct{} + +func NewMockDiagnosticsClient() repository.InstanceDiagnosticsClient { + return &MockDiagnosticsClient{} +} + +func (*MockDiagnosticsClient) GetDiagnostics(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance, tailLines int64) (*entity.InstanceDiagnostics, error) { + return &entity.InstanceDiagnostics{ + InstanceName: instance.Name, + Namespace: instance.Namespace, + CollectedAt: time.Now(), + }, nil +} + +func (c *DiagnosticsClient) GetDiagnostics(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance, tailLines int64) (*entity.InstanceDiagnostics, error) { + clientset, err := diagnosticsClientset(cluster) + if err != nil { + return nil, err + } + if tailLines <= 0 { + tailLines = 200 + } + if tailLines > 2000 { + tailLines = 2000 + } + + pods, err := listInstancePods(ctx, clientset, instance) + if err != nil { + return nil, err + } + services, err := listInstanceServices(ctx, clientset, instance) + if err != nil { + return nil, err + } + events, err := listInstanceEvents(ctx, clientset, instance, pods, services) + if err != nil { + return nil, err + } + logs := collectPodLogs(ctx, clientset, pods, tailLines) + + return &entity.InstanceDiagnostics{ + InstanceName: instance.Name, + Namespace: instance.Namespace, + Pods: convertPodsToDiagnostics(pods), + Services: convertServicesToDiagnostics(services), + Events: convertEventsToDiagnostics(events), + Logs: logs, + CollectedAt: time.Now(), + }, nil +} + +func diagnosticsClientset(cluster *entity.Cluster) (kubernetes.Interface, error) { + config, err := restConfigFromCluster(cluster) + if err != nil { + return nil, err + } + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create diagnostics kubernetes client: %w", err) + } + return clientset, nil +} + +func listInstancePods(ctx context.Context, clientset kubernetes.Interface, instance *entity.Instance) ([]corev1.Pod, error) { + selector := fmt.Sprintf("app.kubernetes.io/instance=%s", instance.Name) + pods, err := clientset.CoreV1().Pods(instance.Namespace).List(ctx, metav1.ListOptions{LabelSelector: selector}) + if err != nil { + return nil, fmt.Errorf("failed to list instance pods: %w", err) + } + if len(pods.Items) > 0 { + return pods.Items, nil + } + all, err := clientset.CoreV1().Pods(instance.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list namespace pods: %w", err) + } + filtered := make([]corev1.Pod, 0) + for _, pod := range all.Items { + if resourceMatchesInstance(pod.ObjectMeta, instance) { + filtered = append(filtered, pod) + } + } + return filtered, nil +} + +func listInstanceServices(ctx context.Context, clientset kubernetes.Interface, instance *entity.Instance) ([]corev1.Service, error) { + selector := fmt.Sprintf("app.kubernetes.io/instance=%s", instance.Name) + services, err := clientset.CoreV1().Services(instance.Namespace).List(ctx, metav1.ListOptions{LabelSelector: selector}) + if err != nil { + return nil, fmt.Errorf("failed to list instance services: %w", err) + } + if len(services.Items) > 0 { + return services.Items, nil + } + all, err := clientset.CoreV1().Services(instance.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list namespace services: %w", err) + } + filtered := make([]corev1.Service, 0) + for _, svc := range all.Items { + if resourceMatchesInstance(svc.ObjectMeta, instance) { + filtered = append(filtered, svc) + } + } + return filtered, nil +} + +func listInstanceEvents(ctx context.Context, clientset kubernetes.Interface, instance *entity.Instance, pods []corev1.Pod, services []corev1.Service) ([]corev1.Event, error) { + events, err := clientset.CoreV1().Events(instance.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list instance events: %w", err) + } + names := map[string]bool{instance.Name: true} + for _, pod := range pods { + names[pod.Name] = true + } + for _, svc := range services { + names[svc.Name] = true + } + filtered := make([]corev1.Event, 0) + for _, event := range events.Items { + if names[event.InvolvedObject.Name] || strings.Contains(event.Message, instance.Name) { + filtered = append(filtered, event) + } + } + sort.SliceStable(filtered, func(i, j int) bool { + return filtered[i].LastTimestamp.Time.After(filtered[j].LastTimestamp.Time) + }) + if len(filtered) > 100 { + filtered = filtered[:100] + } + return filtered, nil +} + +func collectPodLogs(ctx context.Context, clientset kubernetes.Interface, pods []corev1.Pod, tailLines int64) []entity.InstancePodLog { + logs := make([]entity.InstancePodLog, 0) + for _, pod := range pods { + for _, container := range pod.Spec.Containers { + item := entity.InstancePodLog{Pod: pod.Name, Container: container.Name, TailLines: tailLines} + req := clientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ + Container: container.Name, + TailLines: &tailLines, + }) + stream, err := req.Stream(ctx) + if err != nil { + item.Error = err.Error() + logs = append(logs, item) + continue + } + data, err := io.ReadAll(io.LimitReader(stream, 1<<20)) + _ = stream.Close() + if err != nil { + item.Error = err.Error() + } else { + item.Log = string(data) + } + logs = append(logs, item) + } + } + return logs +} + +func convertPodsToDiagnostics(pods []corev1.Pod) []entity.InstancePodDiagnostics { + out := make([]entity.InstancePodDiagnostics, 0, len(pods)) + for _, pod := range pods { + containers := make([]entity.InstanceContainerDiagnostics, 0, len(pod.Status.ContainerStatuses)) + var restarts int32 + for _, status := range pod.Status.ContainerStatuses { + restarts += status.RestartCount + containers = append(containers, entity.InstanceContainerDiagnostics{ + Name: status.Name, + Image: status.Image, + Ready: status.Ready, + RestartCount: status.RestartCount, + State: containerStateName(status.State), + Reason: containerStateReason(status.State), + Message: containerStateMessage(status.State), + }) + } + conditions := make([]entity.InstanceConditionDiagnostics, 0, len(pod.Status.Conditions)) + for _, condition := range pod.Status.Conditions { + conditions = append(conditions, entity.InstanceConditionDiagnostics{ + Type: string(condition.Type), + Status: string(condition.Status), + Reason: condition.Reason, + Message: condition.Message, + }) + } + out = append(out, entity.InstancePodDiagnostics{ + Name: pod.Name, + Namespace: pod.Namespace, + Phase: string(pod.Status.Phase), + NodeName: pod.Spec.NodeName, + PodIP: pod.Status.PodIP, + HostIP: pod.Status.HostIP, + RestartCount: restarts, + Containers: containers, + Conditions: conditions, + CreationTimestamp: pod.CreationTimestamp.Time, + }) + } + return out +} + +func convertServicesToDiagnostics(services []corev1.Service) []entity.InstanceServiceDiagnostics { + out := make([]entity.InstanceServiceDiagnostics, 0, len(services)) + for _, svc := range services { + entry := convertServiceToEntry(&svc) + out = append(out, entity.InstanceServiceDiagnostics{ + Name: svc.Name, + Namespace: svc.Namespace, + Type: string(svc.Spec.Type), + ClusterIP: svc.Spec.ClusterIP, + Ports: entry.Ports, + }) + } + return out +} + +func convertEventsToDiagnostics(events []corev1.Event) []entity.InstanceEventDiagnostics { + out := make([]entity.InstanceEventDiagnostics, 0, len(events)) + for _, event := range events { + out = append(out, entity.InstanceEventDiagnostics{ + Type: event.Type, + Reason: event.Reason, + Message: event.Message, + InvolvedKind: event.InvolvedObject.Kind, + InvolvedName: event.InvolvedObject.Name, + Count: event.Count, + FirstTimestamp: event.FirstTimestamp.Time, + LastTimestamp: event.LastTimestamp.Time, + }) + } + return out +} + +func containerStateName(state corev1.ContainerState) string { + switch { + case state.Running != nil: + return "running" + case state.Waiting != nil: + return "waiting" + case state.Terminated != nil: + return "terminated" + default: + return "unknown" + } +} + +func containerStateReason(state corev1.ContainerState) string { + switch { + case state.Waiting != nil: + return state.Waiting.Reason + case state.Terminated != nil: + return state.Terminated.Reason + default: + return "" + } +} + +func containerStateMessage(state corev1.ContainerState) string { + switch { + case state.Waiting != nil: + return state.Waiting.Message + case state.Terminated != nil: + return state.Terminated.Message + default: + return "" + } +} diff --git a/backend/internal/adapter/output/k8s/tenant_client.go b/backend/internal/adapter/output/k8s/tenant_client.go new file mode 100644 index 0000000..7841881 --- /dev/null +++ b/backend/internal/adapter/output/k8s/tenant_client.go @@ -0,0 +1,388 @@ +package k8s + +import ( + "context" + "encoding/base64" + "fmt" + "strings" + "time" + + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + clientcmdapi "k8s.io/client-go/tools/clientcmd/api" + + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +// TenantClient provisions namespace-scoped tenant Kubernetes resources. +type TenantClient struct { + clientset kubernetes.Interface +} + +// NewTenantClient creates a tenant provisioning client that builds Kubernetes +// clients from the supplied cluster entity for each call. +func NewTenantClient() repository.TenantKubeClient { + return &TenantClient{} +} + +// NewTenantClientForClientset creates a tenant provisioning client for tests or +// callers that already own a Kubernetes client. +func NewTenantClientForClientset(clientset kubernetes.Interface) repository.TenantKubeClient { + return &TenantClient{clientset: clientset} +} + +// EnsureTenant idempotently ensures Namespace, ServiceAccount, RoleBinding, and +// ResourceQuota resources for the tenant binding. +func (c *TenantClient) EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + binding = binding.WithDefaults() + if err := binding.Validate(); err != nil { + return err + } + clientset, _, err := c.clientsetForCluster(cluster) + if err != nil { + return err + } + if err := c.ensureNamespace(ctx, clientset, binding); err != nil { + return err + } + if err := c.ensureServiceAccount(ctx, clientset, binding); err != nil { + return err + } + if err := c.ensureRoleBinding(ctx, clientset, binding); err != nil { + return err + } + if err := c.ensureResourceQuota(ctx, clientset, binding); err != nil { + return err + } + return nil +} + +// IssueKubeconfig returns a short-lived kubeconfig backed by a Kubernetes +// TokenRequest. The token exists only in the returned value and is never stored. +func (c *TenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) { + binding = binding.WithDefaults() + if err := binding.Validate(); err != nil { + return nil, err + } + clientset, restConfig, err := c.clientsetForCluster(cluster) + if err != nil { + return nil, err + } + + cappedTTL := entity.TenantTokenTTL(ttl) + expirationSeconds := int64(cappedTTL.Seconds()) + tokenRequest, err := clientset.CoreV1(). + ServiceAccounts(binding.Namespace). + CreateToken(ctx, binding.ServiceAccountName, &authenticationv1.TokenRequest{ + Spec: authenticationv1.TokenRequestSpec{ + ExpirationSeconds: &expirationSeconds, + }, + }, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to request tenant service account token: %w", err) + } + if tokenRequest.Status.Token == "" { + return nil, entity.ErrInvalidTenantKubeconfigToken + } + + expiresAt := tokenRequest.Status.ExpirationTimestamp.Time + if expiresAt.IsZero() { + expiresAt = time.Now().Add(cappedTTL) + } + kubeconfig, err := buildTenantKubeconfig(cluster, restConfig, binding, tokenRequest.Status.Token) + if err != nil { + return nil, err + } + return &entity.TenantKubeconfig{ + Kubeconfig: kubeconfig, + ExpiresAt: expiresAt, + }, nil +} + +// SuspendTenant revokes tenant API access by deleting only the RoleBinding. +func (c *TenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + binding = binding.WithDefaults() + if err := binding.Validate(); err != nil { + return err + } + clientset, _, err := c.clientsetForCluster(cluster) + if err != nil { + return err + } + err = clientset.RbacV1(). + RoleBindings(binding.Namespace). + Delete(ctx, binding.RoleBindingName, metav1.DeleteOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return fmt.Errorf("failed to delete tenant role binding: %w", err) + } + return nil +} + +func (c *TenantClient) clientsetForCluster(cluster *entity.Cluster) (kubernetes.Interface, *rest.Config, error) { + if c.clientset != nil { + config := &rest.Config{Host: "https://kubernetes.default.svc"} + if cluster != nil { + clusterConfig, err := restConfigFromCluster(cluster) + if err == nil { + config = clusterConfig + } + } + return c.clientset, config, nil + } + + config, err := restConfigFromCluster(cluster) + if err != nil { + return nil, nil, err + } + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, nil, fmt.Errorf("failed to create tenant kubernetes client: %w", err) + } + return clientset, config, nil +} + +func restConfigFromCluster(cluster *entity.Cluster) (*rest.Config, error) { + if cluster == nil { + return nil, entity.ErrInvalidClusterHost + } + if looksLikeKubeconfig(cluster.CAData) { + config, err := clientcmd.RESTConfigFromKubeConfig([]byte(cluster.CAData)) + if err != nil { + return nil, fmt.Errorf("failed to parse tenant kubeconfig: %w", err) + } + return config, nil + } + if strings.TrimSpace(cluster.Host) == "" { + return nil, entity.ErrInvalidClusterHost + } + return &rest.Config{ + Host: cluster.Host, + TLSClientConfig: rest.TLSClientConfig{ + CAData: decodePossiblyBase64(cluster.CAData), + CertData: decodePossiblyBase64(cluster.CertData), + KeyData: decodePossiblyBase64(cluster.KeyData), + }, + BearerToken: cluster.Token, + }, nil +} + +func (c *TenantClient) ensureNamespace(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: binding.Namespace, + Labels: copyStringMap(binding.Labels), + Annotations: copyStringMap(binding.Annotations), + }, + } + _, err := clientset.CoreV1().Namespaces().Create(ctx, namespace, metav1.CreateOptions{}) + if apierrors.IsAlreadyExists(err) { + current, getErr := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{}) + if getErr != nil { + return fmt.Errorf("failed to get tenant namespace: %w", getErr) + } + mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) + if _, updateErr := clientset.CoreV1().Namespaces().Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { + return fmt.Errorf("failed to update tenant namespace: %w", updateErr) + } + return nil + } + if err != nil { + return fmt.Errorf("failed to create tenant namespace: %w", err) + } + return nil +} + +func (c *TenantClient) ensureServiceAccount(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { + serviceAccount := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: binding.ServiceAccountName, + Namespace: binding.Namespace, + Labels: copyStringMap(binding.Labels), + Annotations: copyStringMap(binding.Annotations), + }, + } + _, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Create(ctx, serviceAccount, metav1.CreateOptions{}) + if apierrors.IsAlreadyExists(err) { + current, getErr := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{}) + if getErr != nil { + return fmt.Errorf("failed to get tenant service account: %w", getErr) + } + mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) + if _, updateErr := clientset.CoreV1().ServiceAccounts(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { + return fmt.Errorf("failed to update tenant service account: %w", updateErr) + } + return nil + } + if err != nil { + return fmt.Errorf("failed to create tenant service account: %w", err) + } + return nil +} + +func (c *TenantClient) ensureRoleBinding(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { + roleBinding := desiredRoleBinding(binding) + _, err := clientset.RbacV1().RoleBindings(binding.Namespace).Create(ctx, roleBinding, metav1.CreateOptions{}) + if apierrors.IsAlreadyExists(err) { + current, getErr := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}) + if getErr != nil { + return fmt.Errorf("failed to get tenant role binding: %w", getErr) + } + mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) + current.Subjects = roleBinding.Subjects + current.RoleRef = roleBinding.RoleRef + if _, updateErr := clientset.RbacV1().RoleBindings(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { + return fmt.Errorf("failed to update tenant role binding: %w", updateErr) + } + return nil + } + if err != nil { + return fmt.Errorf("failed to create tenant role binding: %w", err) + } + return nil +} + +func (c *TenantClient) ensureResourceQuota(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { + resourceQuota := &corev1.ResourceQuota{ + ObjectMeta: metav1.ObjectMeta{ + Name: binding.ResourceQuotaName, + Namespace: binding.Namespace, + Labels: copyStringMap(binding.Labels), + Annotations: copyStringMap(binding.Annotations), + }, + Spec: corev1.ResourceQuotaSpec{ + Hard: binding.ResourceQuotaHard.DeepCopy(), + }, + } + _, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Create(ctx, resourceQuota, metav1.CreateOptions{}) + if apierrors.IsAlreadyExists(err) { + current, getErr := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}) + if getErr != nil { + return fmt.Errorf("failed to get tenant resource quota: %w", getErr) + } + mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) + current.Spec.Hard = binding.ResourceQuotaHard.DeepCopy() + if _, updateErr := clientset.CoreV1().ResourceQuotas(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { + return fmt.Errorf("failed to update tenant resource quota: %w", updateErr) + } + return nil + } + if err != nil { + return fmt.Errorf("failed to create tenant resource quota: %w", err) + } + return nil +} + +func desiredRoleBinding(binding entity.TenantBinding) *rbacv1.RoleBinding { + return &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: binding.RoleBindingName, + Namespace: binding.Namespace, + Labels: copyStringMap(binding.Labels), + Annotations: copyStringMap(binding.Annotations), + }, + Subjects: []rbacv1.Subject{{ + Kind: rbacv1.ServiceAccountKind, + Name: binding.ServiceAccountName, + Namespace: binding.Namespace, + }}, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "ClusterRole", + Name: binding.ClusterRoleName, + }, + } +} + +func buildTenantKubeconfig(cluster *entity.Cluster, restConfig *rest.Config, binding entity.TenantBinding, token string) (string, error) { + host := "" + var caData []byte + if restConfig != nil { + host = restConfig.Host + caData = append([]byte{}, restConfig.CAData...) + } + if host == "" && cluster != nil { + host = cluster.Host + } + if len(caData) == 0 && cluster != nil { + caData = decodePossiblyBase64(cluster.CAData) + } + if host == "" { + return "", entity.ErrInvalidClusterHost + } + + clusterName := "tenant-cluster" + if cluster != nil && cluster.Name != "" { + clusterName = cluster.Name + } + userName := binding.ServiceAccountName + contextName := fmt.Sprintf("%s/%s", clusterName, binding.Namespace) + config := clientcmdapi.NewConfig() + config.Clusters[clusterName] = &clientcmdapi.Cluster{ + Server: host, + CertificateAuthorityData: caData, + } + config.AuthInfos[userName] = &clientcmdapi.AuthInfo{ + Token: token, + } + config.Contexts[contextName] = &clientcmdapi.Context{ + Cluster: clusterName, + AuthInfo: userName, + Namespace: binding.Namespace, + } + config.CurrentContext = contextName + + bytes, err := clientcmd.Write(*config) + if err != nil { + return "", fmt.Errorf("failed to build tenant kubeconfig: %w", err) + } + return string(bytes), nil +} + +func mergeObjectMetadata(meta *metav1.ObjectMeta, labels, annotations map[string]string) { + if len(labels) > 0 && meta.Labels == nil { + meta.Labels = map[string]string{} + } + for key, value := range labels { + meta.Labels[key] = value + } + if len(annotations) > 0 && meta.Annotations == nil { + meta.Annotations = map[string]string{} + } + for key, value := range annotations { + meta.Annotations[key] = value + } +} + +func copyStringMap(values map[string]string) map[string]string { + if len(values) == 0 { + return nil + } + copied := make(map[string]string, len(values)) + for key, value := range values { + copied[key] = value + } + return copied +} + +func decodePossiblyBase64(value string) []byte { + decoded, err := base64.StdEncoding.DecodeString(value) + if err == nil { + return decoded + } + return []byte(value) +} + +func looksLikeKubeconfig(value string) bool { + trimmed := strings.TrimSpace(value) + return strings.HasPrefix(trimmed, "apiVersion:") || strings.HasPrefix(trimmed, "kind: Config") +} diff --git a/backend/internal/adapter/output/k8s/tenant_client_test.go b/backend/internal/adapter/output/k8s/tenant_client_test.go new file mode 100644 index 0000000..e011973 --- /dev/null +++ b/backend/internal/adapter/output/k8s/tenant_client_test.go @@ -0,0 +1,172 @@ +package k8s + +import ( + "context" + "strings" + "testing" + "time" + + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +func TestTenantClientEnsureTenantCreatesResources(t *testing.T) { + ctx := context.Background() + clientset := fake.NewSimpleClientset() + client := NewTenantClientForClientset(clientset) + binding := tenantBinding() + + if err := client.EnsureTenant(ctx, nil, binding); err != nil { + t.Fatalf("EnsureTenant returned error: %v", err) + } + + if _, err := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{}); err != nil { + t.Fatalf("expected namespace: %v", err) + } + if _, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{}); err != nil { + t.Fatalf("expected service account: %v", err) + } + roleBinding, err := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("expected role binding: %v", err) + } + if roleBinding.RoleRef.Kind != "ClusterRole" || roleBinding.RoleRef.Name != binding.ClusterRoleName { + t.Fatalf("unexpected role ref: %#v", roleBinding.RoleRef) + } + if len(roleBinding.Subjects) != 1 || roleBinding.Subjects[0].Name != binding.ServiceAccountName { + t.Fatalf("unexpected role binding subjects: %#v", roleBinding.Subjects) + } + quota, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("expected resource quota: %v", err) + } + if quota.Spec.Hard.Cpu().String() != "2" { + t.Fatalf("expected cpu quota 2, got %s", quota.Spec.Hard.Cpu().String()) + } +} + +func TestTenantClientEnsureTenantUpdatesExistingResources(t *testing.T) { + ctx := context.Background() + binding := tenantBinding() + clientset := fake.NewSimpleClientset( + &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace}}, + &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}}, + &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{Name: binding.RoleBindingName, Namespace: binding.Namespace}, + RoleRef: rbacv1.RoleRef{APIGroup: rbacv1.GroupName, Kind: "ClusterRole", Name: "view"}, + }, + &corev1.ResourceQuota{ + ObjectMeta: metav1.ObjectMeta{Name: binding.ResourceQuotaName, Namespace: binding.Namespace}, + Spec: corev1.ResourceQuotaSpec{Hard: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + }}, + }, + ) + client := NewTenantClientForClientset(clientset) + + if err := client.EnsureTenant(ctx, nil, binding); err != nil { + t.Fatalf("EnsureTenant returned error: %v", err) + } + + roleBinding, err := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("expected updated role binding: %v", err) + } + if roleBinding.RoleRef.Name != binding.ClusterRoleName { + t.Fatalf("expected role ref %q, got %q", binding.ClusterRoleName, roleBinding.RoleRef.Name) + } + if roleBinding.Labels["ocdp.io/tenant"] != binding.Namespace { + t.Fatalf("expected tenant label on updated role binding, got %#v", roleBinding.Labels) + } + quota, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("expected updated quota: %v", err) + } + if quota.Spec.Hard.Cpu().String() != "2" { + t.Fatalf("expected updated cpu quota 2, got %s", quota.Spec.Hard.Cpu().String()) + } +} + +func TestTenantClientSuspendTenantDeletesOnlyRoleBinding(t *testing.T) { + ctx := context.Background() + binding := tenantBinding() + clientset := fake.NewSimpleClientset( + &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace}}, + &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}}, + desiredRoleBinding(binding), + ) + client := NewTenantClientForClientset(clientset) + + if err := client.SuspendTenant(ctx, nil, binding); err != nil { + t.Fatalf("SuspendTenant returned error: %v", err) + } + if _, err := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}); !apierrors.IsNotFound(err) { + t.Fatalf("expected deleted role binding, got err %v", err) + } + if _, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{}); err != nil { + t.Fatalf("service account should remain: %v", err) + } +} + +func TestTenantClientIssueKubeconfigCapsTokenTTL(t *testing.T) { + ctx := context.Background() + binding := tenantBinding() + clientset := fake.NewSimpleClientset(&corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}, + }) + var requestedExpirationSeconds int64 + expiresAt := time.Now().Add(entity.MaxTenantKubeconfigTTL).UTC() + clientset.Fake.PrependReactor("create", "serviceaccounts", func(action k8stesting.Action) (bool, runtime.Object, error) { + if action.GetSubresource() != "token" { + return false, nil, nil + } + createAction := action.(k8stesting.CreateAction) + tokenRequest := createAction.GetObject().(*authenticationv1.TokenRequest) + if tokenRequest.Spec.ExpirationSeconds != nil { + requestedExpirationSeconds = *tokenRequest.Spec.ExpirationSeconds + } + return true, &authenticationv1.TokenRequest{ + Status: authenticationv1.TokenRequestStatus{ + Token: "short-lived-token", + ExpirationTimestamp: metav1.NewTime(expiresAt), + }, + }, nil + }) + client := NewTenantClientForClientset(clientset) + + kubeconfig, err := client.IssueKubeconfig(ctx, &entity.Cluster{Name: "test", Host: "https://example.invalid"}, binding, 24*time.Hour) + if err != nil { + t.Fatalf("IssueKubeconfig returned error: %v", err) + } + + if requestedExpirationSeconds != int64(entity.MaxTenantKubeconfigTTL.Seconds()) { + t.Fatalf("expected capped ttl %d, got %d", int64(entity.MaxTenantKubeconfigTTL.Seconds()), requestedExpirationSeconds) + } + if !kubeconfig.ExpiresAt.Equal(expiresAt) { + t.Fatalf("expected expiration %s, got %s", expiresAt, kubeconfig.ExpiresAt) + } + if !strings.Contains(kubeconfig.Kubeconfig, "short-lived-token") { + t.Fatal("expected kubeconfig to contain issued token") + } + if !strings.Contains(kubeconfig.Kubeconfig, "namespace: tenant-a") { + t.Fatalf("expected kubeconfig namespace, got:\n%s", kubeconfig.Kubeconfig) + } +} + +func tenantBinding() entity.TenantBinding { + binding := entity.NewTenantBinding("tenant-a") + binding.ResourceQuotaHard = corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + } + return binding +} diff --git a/backend/internal/adapter/output/k8s/tenant_mock.go b/backend/internal/adapter/output/k8s/tenant_mock.go new file mode 100644 index 0000000..df402b0 --- /dev/null +++ b/backend/internal/adapter/output/k8s/tenant_mock.go @@ -0,0 +1,36 @@ +package k8s + +import ( + "context" + "fmt" + "time" + + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +type MockTenantClient struct{} + +func NewMockTenantClient() repository.TenantKubeClient { + return &MockTenantClient{} +} + +func (c *MockTenantClient) EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + return binding.Validate() +} + +func (c *MockTenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) { + if err := binding.Validate(); err != nil { + return nil, err + } + expiresAt := time.Now().Add(entity.TenantTokenTTL(ttl)) + return &entity.TenantKubeconfig{ + Kubeconfig: fmt.Sprintf("apiVersion: v1\nkind: Config\nclusters:\n- name: %s\n cluster:\n server: %s\ncontexts:\n- name: %s\n context:\n cluster: %s\n namespace: %s\n user: %s\ncurrent-context: %s\nusers:\n- name: %s\n user:\n token: mock-ephemeral-token\n", + cluster.Name, cluster.Host, binding.Namespace, cluster.Name, binding.Namespace, binding.ServiceAccountName, binding.Namespace, binding.ServiceAccountName), + ExpiresAt: expiresAt, + }, nil +} + +func (c *MockTenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + return binding.Validate() +} diff --git a/backend/internal/adapter/output/oci/mock/oci_client_mock.go b/backend/internal/adapter/output/oci/mock/oci_client_mock.go index a31baaa..1556489 100644 --- a/backend/internal/adapter/output/oci/mock/oci_client_mock.go +++ b/backend/internal/adapter/output/oci/mock/oci_client_mock.go @@ -5,7 +5,7 @@ import ( "fmt" "strings" "time" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" ) @@ -13,7 +13,7 @@ import ( // OCIClientMock OCI Registry 客户端 Mock 实现 type OCIClientMock struct { // Mock 数据存储 - repositories map[string][]string // registryID -> []repositoryName + repositories map[string][]string // registryID -> []repositoryName artifacts map[string]map[string][]*entity.Artifact // registryID -> repository -> []artifact } @@ -23,10 +23,10 @@ func NewOCIClientMock() repository.OCIClient { repositories: make(map[string][]string), artifacts: make(map[string]map[string][]*entity.Artifact), } - + // 初始化一些测试数据 mock.initMockData() - + return mock } @@ -38,18 +38,18 @@ func (c *OCIClientMock) initMockData() { // initArtifactsForRegistry initializes mock artifacts for a given registry ID func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { c.artifacts[registryID] = make(map[string][]*entity.Artifact) - + // vllm-serve artifacts (OCI 格式的 Helm Chart) c.artifacts[registryID]["charts/vllm-serve"] = []*entity.Artifact{ { - RegistryID: registryID, - Repository: "charts/vllm-serve", - Tag: "0.1.0", - Digest: "sha256:abc123def456", - Type: entity.ArtifactTypeChart, - Size: 12345678, - MediaType: "application/vnd.oci.image.manifest.v1+json", - ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type + RegistryID: registryID, + Repository: "charts/vllm-serve", + Tag: "0.1.0", + Digest: "sha256:abc123def456", + Type: entity.ArtifactTypeChart, + Size: 12345678, + MediaType: "application/vnd.oci.image.manifest.v1+json", + ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "vllm-serve", "org.opencontainers.image.version": "0.1.0", @@ -57,14 +57,14 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { CreatedAt: time.Now().Add(-24 * time.Hour), }, { - RegistryID: registryID, - Repository: "charts/vllm-serve", - Tag: "0.2.0", - Digest: "sha256:xyz789uvw012", - Type: entity.ArtifactTypeChart, - Size: 13456789, - MediaType: "application/vnd.oci.image.manifest.v1+json", - ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type + RegistryID: registryID, + Repository: "charts/vllm-serve", + Tag: "0.2.0", + Digest: "sha256:xyz789uvw012", + Type: entity.ArtifactTypeChart, + Size: 13456789, + MediaType: "application/vnd.oci.image.manifest.v1+json", + ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "vllm-serve", "org.opencontainers.image.version": "0.2.0", @@ -72,36 +72,36 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { CreatedAt: time.Now(), }, } - + // nginx artifacts (OCI 格式的 Helm Chart) c.artifacts[registryID]["charts/nginx"] = []*entity.Artifact{ { - RegistryID: registryID, - Repository: "charts/nginx", - Tag: "1.0.0", - Digest: "sha256:nginx123456", - Type: entity.ArtifactTypeChart, - Size: 5678901, - MediaType: "application/vnd.oci.image.manifest.v1+json", - ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type + RegistryID: registryID, + Repository: "charts/nginx", + Tag: "1.0.0", + Digest: "sha256:nginx123456", + Type: entity.ArtifactTypeChart, + Size: 5678901, + MediaType: "application/vnd.oci.image.manifest.v1+json", + ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "nginx", }, CreatedAt: time.Now().Add(-48 * time.Hour), }, } - + // redis artifacts (OCI 格式的 Helm Chart) c.artifacts[registryID]["charts/redis"] = []*entity.Artifact{ { - RegistryID: registryID, - Repository: "charts/redis", - Tag: "6.2.0", - Digest: "sha256:redis789abc", - Type: entity.ArtifactTypeChart, - Size: 8901234, - MediaType: "application/vnd.oci.image.manifest.v1+json", - ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type + RegistryID: registryID, + Repository: "charts/redis", + Tag: "6.2.0", + Digest: "sha256:redis789abc", + Type: entity.ArtifactTypeChart, + Size: 8901234, + MediaType: "application/vnd.oci.image.manifest.v1+json", + ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "redis", "org.opencontainers.image.version": "6.2.0", @@ -109,18 +109,18 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { CreatedAt: time.Now().Add(-72 * time.Hour), }, } - + // alpine artifacts (Docker Image) c.artifacts[registryID]["library/alpine"] = []*entity.Artifact{ { - RegistryID: registryID, - Repository: "library/alpine", - Tag: "3.18", - Digest: "sha256:alpine123", - Type: entity.ArtifactTypeImage, - Size: 2345678, - MediaType: "application/vnd.docker.distribution.manifest.v2+json", - ConfigType: "application/vnd.docker.container.image.v1+json", // Docker Image 的 config type + RegistryID: registryID, + Repository: "library/alpine", + Tag: "3.18", + Digest: "sha256:alpine123", + Type: entity.ArtifactTypeImage, + Size: 2345678, + MediaType: "application/vnd.docker.distribution.manifest.v2+json", + ConfigType: "application/vnd.docker.container.image.v1+json", // Docker Image 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "alpine", "org.opencontainers.image.version": "3.18", @@ -128,14 +128,14 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { CreatedAt: time.Now().Add(-96 * time.Hour), }, { - RegistryID: registryID, - Repository: "library/alpine", - Tag: "latest", - Digest: "sha256:alpine456", - Type: entity.ArtifactTypeImage, - Size: 2456789, - MediaType: "application/vnd.docker.distribution.manifest.v2+json", - ConfigType: "application/vnd.docker.container.image.v1+json", // Docker Image 的 config type + RegistryID: registryID, + Repository: "library/alpine", + Tag: "latest", + Digest: "sha256:alpine456", + Type: entity.ArtifactTypeImage, + Size: 2456789, + MediaType: "application/vnd.docker.distribution.manifest.v2+json", + ConfigType: "application/vnd.docker.container.image.v1+json", // Docker Image 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "alpine", }, @@ -144,7 +144,7 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { } } -func (c *OCIClientMock) ListRepositories(ctx context.Context, registry *entity.Registry) ([]string, error) { +func (c *OCIClientMock) ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) { // Check if we have cached data for this registry repos, exists := c.repositories[registry.ID] if !exists { @@ -156,10 +156,20 @@ func (c *OCIClientMock) ListRepositories(ctx context.Context, registry *entity.R "library/alpine", } c.repositories[registry.ID] = repos - + // Also initialize artifacts for this registry c.initArtifactsForRegistry(registry.ID) } + if strings.EqualFold(strings.TrimSpace(artifactType), "chart") { + chartRepos := make([]string, 0) + for _, repo := range repos { + artifacts, _ := c.ListArtifacts(ctx, registry, repo, "chart") + if len(artifacts) > 0 { + chartRepos = append(chartRepos, repo) + } + } + return chartRepos, nil + } return repos, nil } @@ -170,20 +180,20 @@ func (c *OCIClientMock) ListArtifacts(ctx context.Context, registry *entity.Regi c.initArtifactsForRegistry(registry.ID) regArtifacts = c.artifacts[registry.ID] } - + artifacts, exists := regArtifacts[repository] if !exists { return []*entity.Artifact{}, nil } - + // 应用 mediaType 过滤 if mediaTypeFilter == "" || mediaTypeFilter == "all" { return artifacts, nil } - + filtered := make([]*entity.Artifact, 0) filter := strings.ToLower(strings.TrimSpace(mediaTypeFilter)) - + for _, artifact := range artifacts { switch filter { case "chart": @@ -200,7 +210,7 @@ func (c *OCIClientMock) ListArtifacts(ctx context.Context, registry *entity.Regi } } } - + return filtered, nil } @@ -211,19 +221,19 @@ func (c *OCIClientMock) GetArtifact(ctx context.Context, registry *entity.Regist c.initArtifactsForRegistry(registry.ID) regArtifacts = c.artifacts[registry.ID] } - + artifacts, exists := regArtifacts[repository] if !exists { return nil, entity.ErrArtifactNotFound } - + // 根据 tag 或 digest 查找 for _, artifact := range artifacts { if artifact.Tag == reference || artifact.Digest == reference { return artifact, nil } } - + return nil, entity.ErrArtifactNotFound } @@ -232,11 +242,11 @@ func (c *OCIClientMock) GetValuesSchema(ctx context.Context, registry *entity.Re if err != nil { return "", err } - + if !artifact.IsChart() { return "", fmt.Errorf("not a helm chart") } - + // 返回 Mock values schema mockSchema := `{ "$schema": "http://json-schema.org/draft-07/schema#", @@ -262,12 +272,23 @@ func (c *OCIClientMock) GetValuesSchema(ctx context.Context, registry *entity.Re return mockSchema, nil } +func (c *OCIClientMock) GetValuesYAML(ctx context.Context, registry *entity.Registry, repository, reference string) (string, error) { + artifact, err := c.GetArtifact(ctx, registry, repository, reference) + if err != nil { + return "", err + } + if !artifact.IsChart() { + return "", fmt.Errorf("not a helm chart") + } + return "replicaCount: 1\nimage:\n repository: nginx\n tag: latest\nservice:\n type: ClusterIP\n", nil +} + func (c *OCIClientMock) PullArtifact(ctx context.Context, registry *entity.Registry, repository, reference, destPath string) error { _, err := c.GetArtifact(ctx, registry, repository, reference) if err != nil { return err } - + // Mock 实现,不实际下载 return nil } @@ -281,4 +302,3 @@ func (c *OCIClientMock) CheckHealth(ctx context.Context, registry *entity.Regist // Mock 实现,总是返回健康 return nil } - diff --git a/backend/internal/adapter/output/oci/real/oci_client.go b/backend/internal/adapter/output/oci/real/oci_client.go index f9a9e76..d03b40e 100644 --- a/backend/internal/adapter/output/oci/real/oci_client.go +++ b/backend/internal/adapter/output/oci/real/oci_client.go @@ -8,9 +8,13 @@ import ( "fmt" "io" "net/http" + "net/url" "os" "path/filepath" + "sort" + "strconv" "strings" + "time" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" @@ -25,6 +29,30 @@ type OCIClient struct { httpClient *http.Client } +type harborProject struct { + Name string `json:"name"` +} + +type harborRepository struct { + Name string `json:"name"` + ArtifactCount int `json:"artifact_count"` +} + +type harborTag struct { + Name string `json:"name"` + PushTime string `json:"push_time"` +} + +type harborArtifact struct { + Digest string `json:"digest"` + MediaType string `json:"media_type"` + ArtifactType string `json:"artifact_type"` + Size int64 `json:"size"` + PushTime string `json:"push_time"` + Tags []harborTag `json:"tags"` + Annotations map[string]string `json:"annotations"` +} + // NewOCIClient 创建真实的 OCI 客户端 func NewOCIClient() repository.OCIClient { return &OCIClient{ @@ -60,8 +88,34 @@ func (c *OCIClient) getRegistry(reg *entity.Registry) (*remote.Registry, error) return registry, nil } -// ListRepositories 列出 Registry 中的所有 repositories -func (c *OCIClient) ListRepositories(ctx context.Context, registry *entity.Registry) ([]string, error) { +// ListRepositories 列出 Registry 中的 repositories. +// Harbor registry 优先使用 Harbor v2.0 API,避免 robot 账号依赖 /v2/_catalog 全局权限。 +func (c *OCIClient) ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) { + repositories, harborErr := c.listHarborRepositories(ctx, registry, artifactType) + if harborErr == nil { + return repositories, nil + } + + repositories, catalogErr := c.listOCIRepositories(ctx, registry) + if catalogErr != nil { + return nil, fmt.Errorf("failed to list repositories via Harbor API: %v; OCI catalog fallback also failed: %w", harborErr, catalogErr) + } + + if strings.EqualFold(strings.TrimSpace(artifactType), "chart") { + chartRepos := make([]string, 0) + for _, repo := range repositories { + artifacts, err := c.ListArtifacts(ctx, registry, repo, "chart") + if err == nil && len(artifacts) > 0 { + chartRepos = append(chartRepos, repo) + } + } + return chartRepos, nil + } + + return repositories, nil +} + +func (c *OCIClient) listOCIRepositories(ctx context.Context, registry *entity.Registry) ([]string, error) { reg, err := c.getRegistry(registry) if err != nil { return nil, err @@ -81,9 +135,278 @@ func (c *OCIClient) ListRepositories(ctx context.Context, registry *entity.Regis return repositories, nil } +func (c *OCIClient) listHarborRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) { + projects, err := c.harborListProjects(ctx, registry) + if err != nil { + return nil, err + } + + repositorySet := make(map[string]struct{}) + chartOnly := strings.EqualFold(strings.TrimSpace(artifactType), "chart") || strings.TrimSpace(artifactType) == "" + + for _, project := range projects { + projectName := strings.TrimSpace(project.Name) + if projectName == "" { + continue + } + + repositories, err := c.harborListProjectRepositories(ctx, registry, projectName) + if err != nil { + return nil, err + } + + for _, harborRepo := range repositories { + repoName := normalizeHarborRepositoryName(projectName, harborRepo.Name) + if repoName == "" { + continue + } + if chartOnly { + artifacts, err := c.listHarborArtifacts(ctx, registry, repoName, "chart") + if err != nil || len(artifacts) == 0 { + continue + } + } + repositorySet[repoName] = struct{}{} + } + } + + repositories := make([]string, 0, len(repositorySet)) + for repo := range repositorySet { + repositories = append(repositories, repo) + } + sort.Strings(repositories) + return repositories, nil +} + +func (c *OCIClient) harborListProjects(ctx context.Context, registry *entity.Registry) ([]harborProject, error) { + var projects []harborProject + if err := c.harborGetPaged(ctx, registry, "/api/v2.0/projects", url.Values{"member": []string{"true"}}, &projects); err != nil { + return nil, err + } + return projects, nil +} + +func (c *OCIClient) harborListProjectRepositories(ctx context.Context, registry *entity.Registry, projectName string) ([]harborRepository, error) { + var repositories []harborRepository + path := "/api/v2.0/projects/" + url.PathEscape(projectName) + "/repositories" + if err := c.harborGetPaged(ctx, registry, path, nil, &repositories); err != nil { + return nil, err + } + return repositories, nil +} + +func (c *OCIClient) listHarborArtifacts(ctx context.Context, registry *entity.Registry, repository, mediaTypeFilter string) ([]*entity.Artifact, error) { + projectName, repoName, ok := splitHarborRepository(repository) + if !ok { + return nil, fmt.Errorf("repository %q is not a Harbor project repository path", repository) + } + + var harborArtifacts []harborArtifact + path := "/api/v2.0/projects/" + url.PathEscape(projectName) + "/repositories/" + url.PathEscape(repoName) + "/artifacts" + query := url.Values{ + "with_tag": []string{"true"}, + "with_label": []string{"false"}, + } + if err := c.harborGetPaged(ctx, registry, path, query, &harborArtifacts); err != nil { + return nil, err + } + + artifacts := make([]*entity.Artifact, 0) + for _, harborArtifact := range harborArtifacts { + tags := harborArtifact.Tags + if len(tags) == 0 { + continue + } + + for _, tag := range tags { + if strings.TrimSpace(tag.Name) == "" { + continue + } + artifact := &entity.Artifact{ + Repository: repository, + Tag: tag.Name, + Digest: harborArtifact.Digest, + MediaType: harborArtifact.MediaType, + ConfigType: harborArtifact.ArtifactType, + Size: harborArtifact.Size, + Annotations: harborArtifact.Annotations, + CreatedAt: parseHarborTime(firstNonEmpty(tag.PushTime, harborArtifact.PushTime)), + } + if artifact.Annotations == nil { + artifact.Annotations = make(map[string]string) + } + + artifact.DetermineType() + if isHarborChartArtifact(harborArtifact) { + artifact.Type = entity.ArtifactTypeChart + } + + if c.shouldIncludeArtifact(artifact, mediaTypeFilter) { + artifacts = append(artifacts, artifact) + } + } + } + + return artifacts, nil +} + +func (c *OCIClient) harborGetPaged(ctx context.Context, registry *entity.Registry, path string, query url.Values, target interface{}) error { + const pageSize = 100 + + accumulated := make([]json.RawMessage, 0) + for page := 1; ; page++ { + pageQuery := cloneValues(query) + pageQuery.Set("page", fmt.Sprintf("%d", page)) + pageQuery.Set("page_size", fmt.Sprintf("%d", pageSize)) + + body, total, err := c.harborGet(ctx, registry, path, pageQuery) + if err != nil { + return err + } + + var pageItems []json.RawMessage + if err := json.Unmarshal(body, &pageItems); err != nil { + return fmt.Errorf("failed to decode Harbor response for %s: %w", path, err) + } + accumulated = append(accumulated, pageItems...) + + if len(pageItems) < pageSize || (total >= 0 && len(accumulated) >= total) { + break + } + } + + combined, err := json.Marshal(accumulated) + if err != nil { + return fmt.Errorf("failed to combine Harbor pages: %w", err) + } + if err := json.Unmarshal(combined, target); err != nil { + return fmt.Errorf("failed to decode Harbor pages: %w", err) + } + return nil +} + +func (c *OCIClient) harborGet(ctx context.Context, registry *entity.Registry, path string, query url.Values) ([]byte, int, error) { + baseURL, err := harborBaseURL(registry) + if err != nil { + return nil, -1, err + } + + requestURL := strings.TrimRight(baseURL, "/") + path + if len(query) > 0 { + requestURL += "?" + query.Encode() + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil) + if err != nil { + return nil, -1, err + } + req.Header.Set("Accept", "application/json") + if registry.Username != "" || registry.Password != "" { + req.SetBasicAuth(registry.Username, registry.Password) + } + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, -1, fmt.Errorf("Harbor API request failed: %w", err) + } + defer resp.Body.Close() + + body, readErr := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) + if readErr != nil { + return nil, -1, fmt.Errorf("failed to read Harbor API response: %w", readErr) + } + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return nil, -1, fmt.Errorf("Harbor API %s returned %d: %s", path, resp.StatusCode, strings.TrimSpace(string(body))) + } + + total := -1 + if value := strings.TrimSpace(resp.Header.Get("X-Total-Count")); value != "" { + if parsed, err := strconv.Atoi(value); err == nil { + total = parsed + } + } + return body, total, nil +} + +func harborBaseURL(registry *entity.Registry) (string, error) { + rawURL := strings.TrimSpace(registry.URL) + if rawURL == "" { + return "", fmt.Errorf("registry URL is empty") + } + if !strings.Contains(rawURL, "://") { + rawURL = "https://" + rawURL + } + parsed, err := url.Parse(rawURL) + if err != nil { + return "", fmt.Errorf("invalid registry URL %q: %w", registry.URL, err) + } + if parsed.Scheme == "" || parsed.Host == "" { + return "", fmt.Errorf("invalid registry URL %q", registry.URL) + } + return parsed.Scheme + "://" + parsed.Host, nil +} + +func splitHarborRepository(repository string) (string, string, bool) { + projectName, repoName, ok := strings.Cut(strings.Trim(repository, "/"), "/") + if !ok || projectName == "" || repoName == "" { + return "", "", false + } + return projectName, repoName, true +} + +func normalizeHarborRepositoryName(projectName, repositoryName string) string { + repositoryName = strings.Trim(repositoryName, "/") + if repositoryName == "" { + return "" + } + if strings.HasPrefix(repositoryName, projectName+"/") { + return repositoryName + } + return projectName + "/" + repositoryName +} + +func isHarborChartArtifact(artifact harborArtifact) bool { + typeInfo := strings.ToLower(strings.TrimSpace(artifact.ArtifactType + " " + artifact.MediaType)) + return strings.Contains(typeInfo, "chart") || strings.Contains(typeInfo, "helm") +} + +func cloneValues(values url.Values) url.Values { + cloned := make(url.Values) + for key, items := range values { + cloned[key] = append([]string(nil), items...) + } + return cloned +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if strings.TrimSpace(value) != "" { + return value + } + } + return "" +} + +func parseHarborTime(value string) time.Time { + value = strings.TrimSpace(value) + if value == "" { + return time.Time{} + } + for _, layout := range []string{time.RFC3339Nano, time.RFC3339, "2006-01-02T15:04:05.999999", "2006-01-02T15:04:05"} { + if parsed, err := time.Parse(layout, value); err == nil { + return parsed + } + } + return time.Time{} +} + // ListArtifacts 列出指定 repository 的所有 artifacts // mediaTypeFilter: "all", "image", "chart", "other" - 使用模糊匹配过滤 func (c *OCIClient) ListArtifacts(ctx context.Context, registry *entity.Registry, repository, mediaTypeFilter string) ([]*entity.Artifact, error) { + if artifacts, err := c.listHarborArtifacts(ctx, registry, repository, mediaTypeFilter); err == nil { + return artifacts, nil + } + reg, err := c.getRegistry(registry) if err != nil { return nil, err @@ -370,6 +693,113 @@ func (c *OCIClient) GetValuesSchema(ctx context.Context, registry *entity.Regist return "", entity.ErrValuesSchemaNotFound } +// GetValuesYAML 获取 Helm Chart 包内原始 values.yaml +func (c *OCIClient) GetValuesYAML(ctx context.Context, registry *entity.Registry, repository, reference string) (string, error) { + data, err := c.readChartFile(ctx, registry, repository, reference, "values.yaml") + if err != nil { + return "", err + } + if strings.TrimSpace(data) == "" { + return "", entity.ErrArtifactNotFound + } + return data, nil +} + +func (c *OCIClient) readChartFile(ctx context.Context, registry *entity.Registry, repository, reference, filename string) (string, error) { + reg, err := c.getRegistry(registry) + if err != nil { + return "", err + } + + repo, err := reg.Repository(ctx, repository) + if err != nil { + return "", fmt.Errorf("failed to get repository: %w", err) + } + + desc, err := repo.Resolve(ctx, reference) + if err != nil { + return "", fmt.Errorf("failed to resolve artifact: %w", err) + } + + manifestReader, err := repo.Fetch(ctx, desc) + if err != nil { + return "", fmt.Errorf("failed to fetch manifest: %w", err) + } + defer manifestReader.Close() + + manifestBytes, err := io.ReadAll(manifestReader) + if err != nil { + return "", fmt.Errorf("failed to read manifest: %w", err) + } + + var manifest ocispec.Manifest + if err := json.Unmarshal(manifestBytes, &manifest); err != nil { + return "", fmt.Errorf("failed to unmarshal manifest: %w", err) + } + + var chartLayer *ocispec.Descriptor + for i := range manifest.Layers { + layer := manifest.Layers[i] + if strings.Contains(layer.MediaType, "cncf.helm.chart") || + strings.Contains(layer.MediaType, "helm.chart.content") { + chartLayer = &manifest.Layers[i] + break + } + } + if chartLayer == nil { + return "", fmt.Errorf("helm chart layer not found in manifest") + } + if chartLayer.Digest == "" { + return "", fmt.Errorf("chart layer digest is empty") + } + if _, err := digest.Parse(string(chartLayer.Digest)); err != nil { + return "", fmt.Errorf("invalid chart layer digest: %w", err) + } + + layerReader, err := repo.Fetch(ctx, *chartLayer) + if err != nil { + return "", fmt.Errorf("failed to fetch chart layer: %w", err) + } + defer layerReader.Close() + + gzipReader, err := gzip.NewReader(layerReader) + if err != nil { + return "", fmt.Errorf("failed to create gzip reader: %w", err) + } + defer gzipReader.Close() + + tarReader := tar.NewReader(gzipReader) + bestDepth := int(^uint(0) >> 1) + var bestData []byte + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return "", fmt.Errorf("failed to read chart archive: %w", err) + } + if header.Typeflag != tar.TypeReg { + continue + } + if strings.HasSuffix(header.Name, filename) { + data, err := io.ReadAll(tarReader) + if err != nil { + return "", fmt.Errorf("failed to read %s: %w", filename, err) + } + depth := strings.Count(strings.Trim(header.Name, "/"), "/") + if depth < bestDepth { + bestDepth = depth + bestData = data + } + } + } + if len(bestData) > 0 { + return string(bestData), nil + } + return "", fmt.Errorf("%s not found in chart", filename) +} + // PullArtifact 下载 artifact 到本地 func (c *OCIClient) PullArtifact(ctx context.Context, registry *entity.Registry, repository, reference, destPath string) error { reg, err := c.getRegistry(registry) diff --git a/backend/internal/adapter/output/persistence/mock/cluster_repository_mock.go b/backend/internal/adapter/output/persistence/mock/cluster_repository_mock.go index 90e031b..0ba1fed 100644 --- a/backend/internal/adapter/output/persistence/mock/cluster_repository_mock.go +++ b/backend/internal/adapter/output/persistence/mock/cluster_repository_mock.go @@ -3,7 +3,7 @@ package mock import ( "context" "sync" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" "github.com/ocdp/cluster-service/internal/pkg/crypto" @@ -27,21 +27,21 @@ func NewClusterRepositoryMock(encryptor crypto.Encryptor) repository.ClusterRepo func (r *ClusterRepositoryMock) Create(ctx context.Context, cluster *entity.Cluster) error { r.mu.Lock() defer r.mu.Unlock() - + // 检查名称是否已存在 for _, c := range r.clusters { if c.Name == cluster.Name { return entity.ErrClusterExists } } - + // Mock 模式:如果没有提供认证信息,自动填充默认的 Mock 证书 if (cluster.CertData == "" || cluster.KeyData == "") && cluster.Token == "" { cluster.CAData = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1vY2sgQ0EgQ2VydGlmaWNhdGUKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQ==" cluster.CertData = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1vY2sgQ2xpZW50IENlcnRpZmljYXRlCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0=" cluster.KeyData = "LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNb2NrIFByaXZhdGUgS2V5Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0t" } - + // 加密敏感数据后存储 encryptedCluster := r.encryptCluster(cluster) r.clusters[cluster.ID] = encryptedCluster @@ -51,12 +51,12 @@ func (r *ClusterRepositoryMock) Create(ctx context.Context, cluster *entity.Clus func (r *ClusterRepositoryMock) GetByID(ctx context.Context, id string) (*entity.Cluster, error) { r.mu.RLock() defer r.mu.RUnlock() - + cluster, exists := r.clusters[id] if !exists { return nil, entity.ErrClusterNotFound } - + // 解密敏感数据后返回 return r.decryptCluster(cluster), nil } @@ -64,25 +64,25 @@ func (r *ClusterRepositoryMock) GetByID(ctx context.Context, id string) (*entity func (r *ClusterRepositoryMock) GetByName(ctx context.Context, name string) (*entity.Cluster, error) { r.mu.RLock() defer r.mu.RUnlock() - + for _, cluster := range r.clusters { if cluster.Name == name { // 解密敏感数据后返回 return r.decryptCluster(cluster), nil } } - + return nil, entity.ErrClusterNotFound } func (r *ClusterRepositoryMock) Update(ctx context.Context, cluster *entity.Cluster) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.clusters[cluster.ID]; !exists { return entity.ErrClusterNotFound } - + // 加密敏感数据后存储 encryptedCluster := r.encryptCluster(cluster) r.clusters[cluster.ID] = encryptedCluster @@ -92,11 +92,11 @@ func (r *ClusterRepositoryMock) Update(ctx context.Context, cluster *entity.Clus func (r *ClusterRepositoryMock) Delete(ctx context.Context, id string) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.clusters[id]; !exists { return entity.ErrClusterNotFound } - + delete(r.clusters, id) return nil } @@ -104,20 +104,20 @@ func (r *ClusterRepositoryMock) Delete(ctx context.Context, id string) error { func (r *ClusterRepositoryMock) List(ctx context.Context) ([]*entity.Cluster, error) { r.mu.RLock() defer r.mu.RUnlock() - + clusters := make([]*entity.Cluster, 0, len(r.clusters)) for _, cluster := range r.clusters { // 解密敏感数据后返回 clusters = append(clusters, r.decryptCluster(cluster)) } - + return clusters, nil } // encryptCluster 加密 Cluster 的敏感数据 func (r *ClusterRepositoryMock) encryptCluster(cluster *entity.Cluster) *entity.Cluster { encrypted := *cluster // 复制 - + // 加密证书数据 if cluster.CAData != "" && !crypto.IsEncrypted(cluster.CAData) { if encryptedData, err := r.encryptor.Encrypt(cluster.CAData); err == nil { @@ -139,14 +139,14 @@ func (r *ClusterRepositoryMock) encryptCluster(cluster *entity.Cluster) *entity. encrypted.Token = encryptedData } } - + return &encrypted } // decryptCluster 解密 Cluster 的敏感数据 func (r *ClusterRepositoryMock) decryptCluster(cluster *entity.Cluster) *entity.Cluster { decrypted := *cluster // 复制 - + // 解密证书数据 if cluster.CAData != "" && crypto.IsEncrypted(cluster.CAData) { if decryptedData, err := r.encryptor.Decrypt(cluster.CAData); err == nil { @@ -168,7 +168,6 @@ func (r *ClusterRepositoryMock) decryptCluster(cluster *entity.Cluster) *entity. decrypted.Token = decryptedData } } - + return &decrypted } - diff --git a/backend/internal/adapter/output/persistence/mock/instance_repository_mock.go b/backend/internal/adapter/output/persistence/mock/instance_repository_mock.go index 907401d..c89e4ea 100644 --- a/backend/internal/adapter/output/persistence/mock/instance_repository_mock.go +++ b/backend/internal/adapter/output/persistence/mock/instance_repository_mock.go @@ -3,7 +3,7 @@ package mock import ( "context" "sync" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" ) @@ -24,14 +24,14 @@ func NewInstanceRepositoryMock() repository.InstanceRepository { func (r *InstanceRepositoryMock) Create(ctx context.Context, instance *entity.Instance) error { r.mu.Lock() defer r.mu.Unlock() - + // 检查同一集群中名称是否已存在 for _, inst := range r.instances { if inst.ClusterID == instance.ClusterID && inst.Name == instance.Name { return entity.ErrInstanceExists } } - + r.instances[instance.ID] = instance return nil } @@ -39,36 +39,36 @@ func (r *InstanceRepositoryMock) Create(ctx context.Context, instance *entity.In func (r *InstanceRepositoryMock) GetByID(ctx context.Context, id string) (*entity.Instance, error) { r.mu.RLock() defer r.mu.RUnlock() - + instance, exists := r.instances[id] if !exists { return nil, entity.ErrInstanceNotFound } - + return instance, nil } func (r *InstanceRepositoryMock) GetByClusterAndName(ctx context.Context, clusterID, name string) (*entity.Instance, error) { r.mu.RLock() defer r.mu.RUnlock() - + for _, instance := range r.instances { if instance.ClusterID == clusterID && instance.Name == name { return instance, nil } } - + return nil, entity.ErrInstanceNotFound } func (r *InstanceRepositoryMock) Update(ctx context.Context, instance *entity.Instance) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.instances[instance.ID]; !exists { return entity.ErrInstanceNotFound } - + r.instances[instance.ID] = instance return nil } @@ -76,11 +76,11 @@ func (r *InstanceRepositoryMock) Update(ctx context.Context, instance *entity.In func (r *InstanceRepositoryMock) Delete(ctx context.Context, id string) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.instances[id]; !exists { return entity.ErrInstanceNotFound } - + delete(r.instances, id) return nil } @@ -88,26 +88,25 @@ func (r *InstanceRepositoryMock) Delete(ctx context.Context, id string) error { func (r *InstanceRepositoryMock) ListByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) { r.mu.RLock() defer r.mu.RUnlock() - + instances := make([]*entity.Instance, 0) for _, instance := range r.instances { if instance.ClusterID == clusterID { instances = append(instances, instance) } } - + return instances, nil } func (r *InstanceRepositoryMock) List(ctx context.Context) ([]*entity.Instance, error) { r.mu.RLock() defer r.mu.RUnlock() - + instances := make([]*entity.Instance, 0, len(r.instances)) for _, instance := range r.instances { instances = append(instances, instance) } - + return instances, nil } - diff --git a/backend/internal/adapter/output/persistence/mock/registry_repository_mock.go b/backend/internal/adapter/output/persistence/mock/registry_repository_mock.go index 536b09d..55e77ea 100644 --- a/backend/internal/adapter/output/persistence/mock/registry_repository_mock.go +++ b/backend/internal/adapter/output/persistence/mock/registry_repository_mock.go @@ -3,7 +3,7 @@ package mock import ( "context" "sync" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" "github.com/ocdp/cluster-service/internal/pkg/crypto" @@ -27,14 +27,14 @@ func NewRegistryRepositoryMock(encryptor crypto.Encryptor) repository.RegistryRe func (r *RegistryRepositoryMock) Create(ctx context.Context, registry *entity.Registry) error { r.mu.Lock() defer r.mu.Unlock() - + // 检查名称是否已存在 for _, reg := range r.registries { if reg.Name == registry.Name { return entity.ErrRegistryExists } } - + // 加密敏感数据后存储 encryptedRegistry := r.encryptRegistry(registry) r.registries[registry.ID] = encryptedRegistry @@ -44,12 +44,12 @@ func (r *RegistryRepositoryMock) Create(ctx context.Context, registry *entity.Re func (r *RegistryRepositoryMock) GetByID(ctx context.Context, id string) (*entity.Registry, error) { r.mu.RLock() defer r.mu.RUnlock() - + registry, exists := r.registries[id] if !exists { return nil, entity.ErrRegistryNotFound } - + // 解密敏感数据后返回 return r.decryptRegistry(registry), nil } @@ -57,25 +57,25 @@ func (r *RegistryRepositoryMock) GetByID(ctx context.Context, id string) (*entit func (r *RegistryRepositoryMock) GetByName(ctx context.Context, name string) (*entity.Registry, error) { r.mu.RLock() defer r.mu.RUnlock() - + for _, registry := range r.registries { if registry.Name == name { // 解密敏感数据后返回 return r.decryptRegistry(registry), nil } } - + return nil, entity.ErrRegistryNotFound } func (r *RegistryRepositoryMock) Update(ctx context.Context, registry *entity.Registry) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.registries[registry.ID]; !exists { return entity.ErrRegistryNotFound } - + // 加密敏感数据后存储 encryptedRegistry := r.encryptRegistry(registry) r.registries[registry.ID] = encryptedRegistry @@ -85,11 +85,11 @@ func (r *RegistryRepositoryMock) Update(ctx context.Context, registry *entity.Re func (r *RegistryRepositoryMock) Delete(ctx context.Context, id string) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.registries[id]; !exists { return entity.ErrRegistryNotFound } - + delete(r.registries, id) return nil } @@ -97,41 +97,40 @@ func (r *RegistryRepositoryMock) Delete(ctx context.Context, id string) error { func (r *RegistryRepositoryMock) List(ctx context.Context) ([]*entity.Registry, error) { r.mu.RLock() defer r.mu.RUnlock() - + registries := make([]*entity.Registry, 0, len(r.registries)) for _, registry := range r.registries { // 解密敏感数据后返回 registries = append(registries, r.decryptRegistry(registry)) } - + return registries, nil } // encryptRegistry 加密 Registry 的敏感数据 func (r *RegistryRepositoryMock) encryptRegistry(registry *entity.Registry) *entity.Registry { encrypted := *registry // 复制 - + // 加密密码 if registry.Password != "" && !crypto.IsEncrypted(registry.Password) { if encryptedPassword, err := r.encryptor.Encrypt(registry.Password); err == nil { encrypted.Password = encryptedPassword } } - + return &encrypted } // decryptRegistry 解密 Registry 的敏感数据 func (r *RegistryRepositoryMock) decryptRegistry(registry *entity.Registry) *entity.Registry { decrypted := *registry // 复制 - + // 解密密码 if registry.Password != "" && crypto.IsEncrypted(registry.Password) { if decryptedPassword, err := r.encryptor.Decrypt(registry.Password); err == nil { decrypted.Password = decryptedPassword } } - + return &decrypted } - diff --git a/backend/internal/adapter/output/persistence/mock/user_repository_mock.go b/backend/internal/adapter/output/persistence/mock/user_repository_mock.go index 3dc5cf5..b5c5b9f 100644 --- a/backend/internal/adapter/output/persistence/mock/user_repository_mock.go +++ b/backend/internal/adapter/output/persistence/mock/user_repository_mock.go @@ -3,7 +3,7 @@ package mock import ( "context" "sync" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" ) @@ -24,14 +24,14 @@ func NewUserRepositoryMock() repository.UserRepository { func (r *UserRepositoryMock) Create(ctx context.Context, user *entity.User) error { r.mu.Lock() defer r.mu.Unlock() - + // 检查是否已存在 for _, u := range r.users { if u.Username == user.Username { return entity.ErrUserExists } } - + r.users[user.ID] = user return nil } @@ -39,36 +39,36 @@ func (r *UserRepositoryMock) Create(ctx context.Context, user *entity.User) erro func (r *UserRepositoryMock) GetByID(ctx context.Context, id string) (*entity.User, error) { r.mu.RLock() defer r.mu.RUnlock() - + user, exists := r.users[id] if !exists { return nil, entity.ErrUserNotFound } - + return user, nil } func (r *UserRepositoryMock) GetByUsername(ctx context.Context, username string) (*entity.User, error) { r.mu.RLock() defer r.mu.RUnlock() - + for _, user := range r.users { if user.Username == username { return user, nil } } - + return nil, entity.ErrUserNotFound } func (r *UserRepositoryMock) Update(ctx context.Context, user *entity.User) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.users[user.ID]; !exists { return entity.ErrUserNotFound } - + r.users[user.ID] = user return nil } @@ -76,11 +76,11 @@ func (r *UserRepositoryMock) Update(ctx context.Context, user *entity.User) erro func (r *UserRepositoryMock) Delete(ctx context.Context, id string) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.users[id]; !exists { return entity.ErrUserNotFound } - + delete(r.users, id) return nil } @@ -88,12 +88,11 @@ func (r *UserRepositoryMock) Delete(ctx context.Context, id string) error { func (r *UserRepositoryMock) List(ctx context.Context) ([]*entity.User, error) { r.mu.RLock() defer r.mu.RUnlock() - + users := make([]*entity.User, 0, len(r.users)) for _, user := range r.users { users = append(users, user) } - + return users, nil } - diff --git a/backend/internal/adapter/output/persistence/mock/workspace_repository_mock.go b/backend/internal/adapter/output/persistence/mock/workspace_repository_mock.go new file mode 100644 index 0000000..31d9287 --- /dev/null +++ b/backend/internal/adapter/output/persistence/mock/workspace_repository_mock.go @@ -0,0 +1,162 @@ +package mock + +import ( + "context" + "sync" + + "github.com/google/uuid" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +type WorkspaceRepositoryMock struct { + mu sync.RWMutex + workspaces map[string]*entity.Workspace +} + +func NewWorkspaceRepositoryMock() repository.WorkspaceRepository { + repo := &WorkspaceRepositoryMock{workspaces: make(map[string]*entity.Workspace)} + defaultWorkspace := entity.NewWorkspace(entity.DefaultWorkspaceName, "") + defaultWorkspace.ID = entity.DefaultWorkspaceID + repo.workspaces[defaultWorkspace.ID] = defaultWorkspace + return repo +} + +func (r *WorkspaceRepositoryMock) Create(ctx context.Context, workspace *entity.Workspace) error { + r.mu.Lock() + defer r.mu.Unlock() + if workspace.ID == "" { + workspace.ID = uuid.New().String() + } + for _, existing := range r.workspaces { + if existing.Name == workspace.Name { + return entity.ErrWorkspaceExists + } + } + copy := *workspace + r.workspaces[workspace.ID] = © + return nil +} + +func (r *WorkspaceRepositoryMock) GetByID(ctx context.Context, id string) (*entity.Workspace, error) { + r.mu.RLock() + defer r.mu.RUnlock() + workspace, ok := r.workspaces[id] + if !ok { + return nil, entity.ErrWorkspaceNotFound + } + copy := *workspace + return ©, nil +} + +func (r *WorkspaceRepositoryMock) GetByName(ctx context.Context, name string) (*entity.Workspace, error) { + r.mu.RLock() + defer r.mu.RUnlock() + for _, workspace := range r.workspaces { + if workspace.Name == name { + copy := *workspace + return ©, nil + } + } + return nil, entity.ErrWorkspaceNotFound +} + +func (r *WorkspaceRepositoryMock) Update(ctx context.Context, workspace *entity.Workspace) error { + r.mu.Lock() + defer r.mu.Unlock() + if _, ok := r.workspaces[workspace.ID]; !ok { + return entity.ErrWorkspaceNotFound + } + copy := *workspace + r.workspaces[workspace.ID] = © + return nil +} + +func (r *WorkspaceRepositoryMock) List(ctx context.Context) ([]*entity.Workspace, error) { + r.mu.RLock() + defer r.mu.RUnlock() + result := make([]*entity.Workspace, 0, len(r.workspaces)) + for _, workspace := range r.workspaces { + copy := *workspace + result = append(result, ©) + } + return result, nil +} + +type WorkspaceClusterBindingRepositoryMock struct { + mu sync.RWMutex + bindings map[string]*entity.WorkspaceClusterBinding +} + +func NewWorkspaceClusterBindingRepositoryMock() repository.WorkspaceClusterBindingRepository { + return &WorkspaceClusterBindingRepositoryMock{bindings: make(map[string]*entity.WorkspaceClusterBinding)} +} + +func bindingKey(workspaceID, clusterID string) string { + return workspaceID + "/" + clusterID +} + +func (r *WorkspaceClusterBindingRepositoryMock) Upsert(ctx context.Context, binding *entity.WorkspaceClusterBinding) error { + r.mu.Lock() + defer r.mu.Unlock() + if binding.ID == "" { + binding.ID = uuid.New().String() + } + copy := *binding + r.bindings[bindingKey(binding.WorkspaceID, binding.ClusterID)] = © + return nil +} + +func (r *WorkspaceClusterBindingRepositoryMock) Get(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error) { + r.mu.RLock() + defer r.mu.RUnlock() + binding, ok := r.bindings[bindingKey(workspaceID, clusterID)] + if !ok { + return nil, entity.ErrWorkspaceNotFound + } + copy := *binding + return ©, nil +} + +func (r *WorkspaceClusterBindingRepositoryMock) Delete(ctx context.Context, workspaceID, clusterID string) error { + r.mu.Lock() + defer r.mu.Unlock() + delete(r.bindings, bindingKey(workspaceID, clusterID)) + return nil +} + +type AuditLogRepositoryMock struct { + mu sync.RWMutex + logs []*entity.AuditLog +} + +func NewAuditLogRepositoryMock() repository.AuditLogRepository { + return &AuditLogRepositoryMock{logs: make([]*entity.AuditLog, 0)} +} + +func (r *AuditLogRepositoryMock) Create(ctx context.Context, logEntry *entity.AuditLog) error { + r.mu.Lock() + defer r.mu.Unlock() + if logEntry.ID == "" { + logEntry.ID = uuid.New().String() + } + copy := *logEntry + r.logs = append(r.logs, ©) + return nil +} + +func (r *AuditLogRepositoryMock) ListByWorkspace(ctx context.Context, workspaceID string, limit int) ([]*entity.AuditLog, error) { + r.mu.RLock() + defer r.mu.RUnlock() + result := make([]*entity.AuditLog, 0) + for i := len(r.logs) - 1; i >= 0; i-- { + if r.logs[i].WorkspaceID == workspaceID { + copy := *r.logs[i] + result = append(result, ©) + if limit > 0 && len(result) >= limit { + break + } + } + } + return result, nil +} diff --git a/backend/internal/adapter/output/persistence/postgres/cluster_repository.go b/backend/internal/adapter/output/persistence/postgres/cluster_repository.go index fcd9f6d..93cb03f 100644 --- a/backend/internal/adapter/output/persistence/postgres/cluster_repository.go +++ b/backend/internal/adapter/output/persistence/postgres/cluster_repository.go @@ -12,54 +12,33 @@ import ( "github.com/ocdp/cluster-service/internal/pkg/crypto" ) -// ClusterRepository PostgreSQL 集群仓储实现 type ClusterRepository struct { db *DB encryptor crypto.Encryptor } -// NewClusterRepository 创建 PostgreSQL 集群仓储 func NewClusterRepository(db *DB, encryptor crypto.Encryptor) repository.ClusterRepository { - return &ClusterRepository{ - db: db, - encryptor: encryptor, - } + return &ClusterRepository{db: db, encryptor: encryptor} } -// Create 创建集群 func (r *ClusterRepository) Create(ctx context.Context, cluster *entity.Cluster) error { if cluster.ID == "" { cluster.ID = uuid.New().String() } - - // 加密敏感数据 - encryptedCAData, err := r.encryptor.Encrypt(cluster.CAData) + encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken, err := r.encryptClusterSecrets(cluster) if err != nil { - return fmt.Errorf("failed to encrypt CA data: %w", err) + return err } - - encryptedCertData, err := r.encryptor.Encrypt(cluster.CertData) - if err != nil { - return fmt.Errorf("failed to encrypt cert data: %w", err) - } - - encryptedKeyData, err := r.encryptor.Encrypt(cluster.KeyData) - if err != nil { - return fmt.Errorf("failed to encrypt key data: %w", err) - } - - encryptedToken, err := r.encryptor.Encrypt(cluster.Token) - if err != nil { - return fmt.Errorf("failed to encrypt token: %w", err) - } - query := ` - INSERT INTO clusters (id, name, host, ca_data, cert_data, key_data, token, description, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + INSERT INTO clusters + (id, workspace_id, owner_id, visibility, name, host, ca_data, cert_data, key_data, token, description, default_namespace, created_at, updated_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14) ` - _, err = r.db.conn.ExecContext(ctx, query, cluster.ID, + cluster.WorkspaceID, + cluster.OwnerID, + cluster.Visibility, cluster.Name, cluster.Host, encryptedCAData, @@ -67,160 +46,62 @@ func (r *ClusterRepository) Create(ctx context.Context, cluster *entity.Cluster) encryptedKeyData, encryptedToken, cluster.Description, + cluster.DefaultNamespace, cluster.CreatedAt, cluster.UpdatedAt, ) - if err != nil { return fmt.Errorf("failed to create cluster: %w", err) } - return nil } -// GetByID 根据 ID 获取集群 func (r *ClusterRepository) GetByID(ctx context.Context, id string) (*entity.Cluster, error) { - query := ` - SELECT id, name, host, ca_data, cert_data, key_data, token, description, created_at, updated_at - FROM clusters - WHERE id = $1 - ` - - cluster := &entity.Cluster{} - var encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken string - - err := r.db.conn.QueryRowContext(ctx, query, id).Scan( - &cluster.ID, - &cluster.Name, - &cluster.Host, - &encryptedCAData, - &encryptedCertData, - &encryptedKeyData, - &encryptedToken, - &cluster.Description, - &cluster.CreatedAt, - &cluster.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrClusterNotFound - } - if err != nil { - return nil, fmt.Errorf("failed to get cluster: %w", err) - } - - // 解密敏感数据 - cluster.CAData, err = r.encryptor.Decrypt(encryptedCAData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt CA data: %w", err) - } - - cluster.CertData, err = r.encryptor.Decrypt(encryptedCertData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt cert data: %w", err) - } - - cluster.KeyData, err = r.encryptor.Decrypt(encryptedKeyData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt key data: %w", err) - } - - cluster.Token, err = r.encryptor.Decrypt(encryptedToken) - if err != nil { - return nil, fmt.Errorf("failed to decrypt token: %w", err) - } - - return cluster, nil + return r.get(ctx, "id = $1", id) } -// GetByName 根据名称获取集群 func (r *ClusterRepository) GetByName(ctx context.Context, name string) (*entity.Cluster, error) { - query := ` - SELECT id, name, host, ca_data, cert_data, key_data, token, description, created_at, updated_at + return r.get(ctx, "name = $1", name) +} + +func (r *ClusterRepository) get(ctx context.Context, where string, arg interface{}) (*entity.Cluster, error) { + query := fmt.Sprintf(` + SELECT id, workspace_id, owner_id, visibility, name, host, ca_data, cert_data, key_data, token, description, default_namespace, created_at, updated_at FROM clusters - WHERE name = $1 - ` - - cluster := &entity.Cluster{} - var encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken string - - err := r.db.conn.QueryRowContext(ctx, query, name).Scan( - &cluster.ID, - &cluster.Name, - &cluster.Host, - &encryptedCAData, - &encryptedCertData, - &encryptedKeyData, - &encryptedToken, - &cluster.Description, - &cluster.CreatedAt, - &cluster.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrClusterNotFound - } + WHERE %s + `, where) + rows, err := r.db.conn.QueryContext(ctx, query, arg) if err != nil { return nil, fmt.Errorf("failed to get cluster: %w", err) } - - // 解密敏感数据 - cluster.CAData, err = r.encryptor.Decrypt(encryptedCAData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt CA data: %w", err) + defer rows.Close() + if !rows.Next() { + return nil, entity.ErrClusterNotFound } - - cluster.CertData, err = r.encryptor.Decrypt(encryptedCertData) + cluster, err := r.scanCluster(rows) if err != nil { - return nil, fmt.Errorf("failed to decrypt cert data: %w", err) + return nil, err } - - cluster.KeyData, err = r.encryptor.Decrypt(encryptedKeyData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt key data: %w", err) - } - - cluster.Token, err = r.encryptor.Decrypt(encryptedToken) - if err != nil { - return nil, fmt.Errorf("failed to decrypt token: %w", err) - } - return cluster, nil } -// Update 更新集群 func (r *ClusterRepository) Update(ctx context.Context, cluster *entity.Cluster) error { cluster.UpdatedAt = time.Now() - - // 加密敏感数据 - encryptedCAData, err := r.encryptor.Encrypt(cluster.CAData) + encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken, err := r.encryptClusterSecrets(cluster) if err != nil { - return fmt.Errorf("failed to encrypt CA data: %w", err) + return err } - - encryptedCertData, err := r.encryptor.Encrypt(cluster.CertData) - if err != nil { - return fmt.Errorf("failed to encrypt cert data: %w", err) - } - - encryptedKeyData, err := r.encryptor.Encrypt(cluster.KeyData) - if err != nil { - return fmt.Errorf("failed to encrypt key data: %w", err) - } - - encryptedToken, err := r.encryptor.Encrypt(cluster.Token) - if err != nil { - return fmt.Errorf("failed to encrypt token: %w", err) - } - query := ` UPDATE clusters - SET name = $1, host = $2, ca_data = $3, cert_data = $4, key_data = $5, - token = $6, description = $7, updated_at = $8 - WHERE id = $9 + SET workspace_id = $1, owner_id = $2, visibility = $3, name = $4, host = $5, + ca_data = $6, cert_data = $7, key_data = $8, token = $9, description = $10, + default_namespace = $11, updated_at = $12 + WHERE id = $13 ` - result, err := r.db.conn.ExecContext(ctx, query, + cluster.WorkspaceID, + cluster.OwnerID, + cluster.Visibility, cluster.Name, cluster.Host, encryptedCAData, @@ -228,110 +109,134 @@ func (r *ClusterRepository) Update(ctx context.Context, cluster *entity.Cluster) encryptedKeyData, encryptedToken, cluster.Description, + cluster.DefaultNamespace, cluster.UpdatedAt, cluster.ID, ) - if err != nil { return fmt.Errorf("failed to update cluster: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrClusterNotFound } - return nil } -// Delete 删除集群 func (r *ClusterRepository) Delete(ctx context.Context, id string) error { - query := `DELETE FROM clusters WHERE id = $1` - - result, err := r.db.conn.ExecContext(ctx, query, id) + result, err := r.db.conn.ExecContext(ctx, `DELETE FROM clusters WHERE id = $1`, id) if err != nil { return fmt.Errorf("failed to delete cluster: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrClusterNotFound } - return nil } -// List 列出所有集群 func (r *ClusterRepository) List(ctx context.Context) ([]*entity.Cluster, error) { query := ` - SELECT id, name, host, ca_data, cert_data, key_data, token, description, created_at, updated_at + SELECT id, workspace_id, owner_id, visibility, name, host, ca_data, cert_data, key_data, token, description, default_namespace, created_at, updated_at FROM clusters ORDER BY created_at DESC ` - rows, err := r.db.conn.QueryContext(ctx, query) if err != nil { return nil, fmt.Errorf("failed to list clusters: %w", err) } defer rows.Close() - clusters := make([]*entity.Cluster, 0) for rows.Next() { - cluster := &entity.Cluster{} - var encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken string - - err := rows.Scan( - &cluster.ID, - &cluster.Name, - &cluster.Host, - &encryptedCAData, - &encryptedCertData, - &encryptedKeyData, - &encryptedToken, - &cluster.Description, - &cluster.CreatedAt, - &cluster.UpdatedAt, - ) + cluster, err := r.scanCluster(rows) if err != nil { - return nil, fmt.Errorf("failed to scan cluster: %w", err) + return nil, err } - - // 解密敏感数据 - cluster.CAData, err = r.encryptor.Decrypt(encryptedCAData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt CA data: %w", err) - } - - cluster.CertData, err = r.encryptor.Decrypt(encryptedCertData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt cert data: %w", err) - } - - cluster.KeyData, err = r.encryptor.Decrypt(encryptedKeyData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt key data: %w", err) - } - - cluster.Token, err = r.encryptor.Decrypt(encryptedToken) - if err != nil { - return nil, fmt.Errorf("failed to decrypt token: %w", err) - } - clusters = append(clusters, cluster) } - if err := rows.Err(); err != nil { return nil, fmt.Errorf("rows iteration error: %w", err) } - return clusters, nil } +type clusterScanner interface { + Scan(dest ...interface{}) error +} + +func (r *ClusterRepository) scanCluster(scanner clusterScanner) (*entity.Cluster, error) { + cluster := &entity.Cluster{} + var encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken sql.NullString + var defaultNamespace sql.NullString + err := scanner.Scan( + &cluster.ID, + &cluster.WorkspaceID, + &cluster.OwnerID, + &cluster.Visibility, + &cluster.Name, + &cluster.Host, + &encryptedCAData, + &encryptedCertData, + &encryptedKeyData, + &encryptedToken, + &cluster.Description, + &defaultNamespace, + &cluster.CreatedAt, + &cluster.UpdatedAt, + ) + if err != nil { + return nil, fmt.Errorf("failed to scan cluster: %w", err) + } + cluster.DefaultNamespace = defaultNamespace.String + var decryptErr error + cluster.CAData, decryptErr = decryptMaybe(r.encryptor, encryptedCAData.String) + if decryptErr != nil { + return nil, fmt.Errorf("failed to decrypt CA data: %w", decryptErr) + } + cluster.CertData, decryptErr = decryptMaybe(r.encryptor, encryptedCertData.String) + if decryptErr != nil { + return nil, fmt.Errorf("failed to decrypt cert data: %w", decryptErr) + } + cluster.KeyData, decryptErr = decryptMaybe(r.encryptor, encryptedKeyData.String) + if decryptErr != nil { + return nil, fmt.Errorf("failed to decrypt key data: %w", decryptErr) + } + cluster.Token, decryptErr = decryptMaybe(r.encryptor, encryptedToken.String) + if decryptErr != nil { + return nil, fmt.Errorf("failed to decrypt token: %w", decryptErr) + } + return cluster, nil +} + +func (r *ClusterRepository) encryptClusterSecrets(cluster *entity.Cluster) (string, string, string, string, error) { + ca, err := r.encryptor.Encrypt(cluster.CAData) + if err != nil { + return "", "", "", "", fmt.Errorf("failed to encrypt CA data: %w", err) + } + cert, err := r.encryptor.Encrypt(cluster.CertData) + if err != nil { + return "", "", "", "", fmt.Errorf("failed to encrypt cert data: %w", err) + } + key, err := r.encryptor.Encrypt(cluster.KeyData) + if err != nil { + return "", "", "", "", fmt.Errorf("failed to encrypt key data: %w", err) + } + token, err := r.encryptor.Encrypt(cluster.Token) + if err != nil { + return "", "", "", "", fmt.Errorf("failed to encrypt token: %w", err) + } + return ca, cert, key, token, nil +} + +func decryptMaybe(encryptor crypto.Encryptor, value string) (string, error) { + if value == "" { + return "", nil + } + return encryptor.Decrypt(value) +} diff --git a/backend/internal/adapter/output/persistence/postgres/db.go b/backend/internal/adapter/output/persistence/postgres/db.go index 67fcc76..b45bd64 100644 --- a/backend/internal/adapter/output/persistence/postgres/db.go +++ b/backend/internal/adapter/output/persistence/postgres/db.go @@ -53,21 +53,69 @@ func (db *DB) GetConn() *sql.DB { // InitSchema 初始化数据库 schema func (db *DB) InitSchema() error { schema := ` + -- Workspaces 表 + CREATE TABLE IF NOT EXISTS workspaces ( + id VARCHAR(36) PRIMARY KEY, + name VARCHAR(255) NOT NULL UNIQUE, + status VARCHAR(50) NOT NULL DEFAULT 'active', + k8s_namespace VARCHAR(255) NOT NULL, + k8s_sa_name VARCHAR(255) NOT NULL, + default_cluster_id VARCHAR(36), + quota_cpu VARCHAR(50), + quota_memory VARCHAR(50), + quota_gpu VARCHAR(50), + quota_gpu_memory VARCHAR(50), + created_by VARCHAR(36), + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ); + + ALTER TABLE workspaces + ADD COLUMN IF NOT EXISTS default_cluster_id VARCHAR(36), + ADD COLUMN IF NOT EXISTS quota_cpu VARCHAR(50), + ADD COLUMN IF NOT EXISTS quota_memory VARCHAR(50), + ADD COLUMN IF NOT EXISTS quota_gpu VARCHAR(50), + ADD COLUMN IF NOT EXISTS quota_gpu_memory VARCHAR(50); + + INSERT INTO workspaces (id, name, status, k8s_namespace, k8s_sa_name, created_at, updated_at) + VALUES ('00000000-0000-0000-0000-000000000010', 'default', 'active', 'ocdp-ws-default', 'ocdp-ws-default', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + ON CONFLICT (id) DO NOTHING; + -- Users 表 CREATE TABLE IF NOT EXISTS users ( id VARCHAR(36) PRIMARY KEY, username VARCHAR(255) NOT NULL UNIQUE, password_hash TEXT NOT NULL, email VARCHAR(255) NOT NULL, + role VARCHAR(50) NOT NULL DEFAULT 'user', + workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + is_active BOOLEAN NOT NULL DEFAULT TRUE, + must_change_password BOOLEAN NOT NULL DEFAULT FALSE, + revoked_after TIMESTAMP NOT NULL DEFAULT '1970-01-01 00:00:00', created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); + ALTER TABLE users + ADD COLUMN IF NOT EXISTS role VARCHAR(50) NOT NULL DEFAULT 'user', + ADD COLUMN IF NOT EXISTS workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + ADD COLUMN IF NOT EXISTS is_active BOOLEAN NOT NULL DEFAULT TRUE, + ADD COLUMN IF NOT EXISTS must_change_password BOOLEAN NOT NULL DEFAULT FALSE, + ADD COLUMN IF NOT EXISTS revoked_after TIMESTAMP NOT NULL DEFAULT '1970-01-01 00:00:00'; + + UPDATE users SET role = 'admin' WHERE username = 'admin'; + UPDATE users SET workspace_id = '00000000-0000-0000-0000-000000000010' WHERE workspace_id = ''; + CREATE INDEX IF NOT EXISTS idx_users_username ON users(username); + CREATE INDEX IF NOT EXISTS idx_users_workspace ON users(workspace_id); + CREATE INDEX IF NOT EXISTS idx_users_revoked_after ON users(revoked_after); -- Clusters 表 CREATE TABLE IF NOT EXISTS clusters ( id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + owner_id VARCHAR(36) NOT NULL DEFAULT '', + visibility VARCHAR(50) NOT NULL DEFAULT 'private', name VARCHAR(255) NOT NULL UNIQUE, host TEXT NOT NULL, ca_data TEXT, @@ -75,15 +123,29 @@ func (db *DB) InitSchema() error { key_data TEXT, token TEXT, description TEXT, + default_namespace VARCHAR(255), created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); + ALTER TABLE clusters + ADD COLUMN IF NOT EXISTS workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + ADD COLUMN IF NOT EXISTS owner_id VARCHAR(36) NOT NULL DEFAULT '', + ADD COLUMN IF NOT EXISTS visibility VARCHAR(50) NOT NULL DEFAULT 'private', + ADD COLUMN IF NOT EXISTS default_namespace VARCHAR(255); + UPDATE clusters SET visibility = 'global_shared' WHERE visibility = 'private' AND owner_id = ''; + CREATE INDEX IF NOT EXISTS idx_clusters_name ON clusters(name); + CREATE INDEX IF NOT EXISTS idx_clusters_workspace ON clusters(workspace_id); + CREATE INDEX IF NOT EXISTS idx_clusters_owner ON clusters(owner_id); + CREATE INDEX IF NOT EXISTS idx_clusters_visibility ON clusters(visibility); -- Registries 表 CREATE TABLE IF NOT EXISTS registries ( id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + owner_id VARCHAR(36) NOT NULL DEFAULT '', + visibility VARCHAR(50) NOT NULL DEFAULT 'private', name VARCHAR(255) NOT NULL UNIQUE, url TEXT NOT NULL, description TEXT, @@ -94,11 +156,22 @@ func (db *DB) InitSchema() error { updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); + ALTER TABLE registries + ADD COLUMN IF NOT EXISTS workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + ADD COLUMN IF NOT EXISTS owner_id VARCHAR(36) NOT NULL DEFAULT '', + ADD COLUMN IF NOT EXISTS visibility VARCHAR(50) NOT NULL DEFAULT 'private'; + UPDATE registries SET visibility = 'global_shared' WHERE visibility = 'private' AND owner_id = ''; + CREATE INDEX IF NOT EXISTS idx_registries_name ON registries(name); + CREATE INDEX IF NOT EXISTS idx_registries_workspace ON registries(workspace_id); + CREATE INDEX IF NOT EXISTS idx_registries_owner ON registries(owner_id); + CREATE INDEX IF NOT EXISTS idx_registries_visibility ON registries(visibility); -- Instances 表 CREATE TABLE IF NOT EXISTS instances ( id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + owner_id VARCHAR(36) NOT NULL DEFAULT '', cluster_id VARCHAR(36) NOT NULL, name VARCHAR(255) NOT NULL, namespace VARCHAR(255) NOT NULL, @@ -121,9 +194,63 @@ func (db *DB) InitSchema() error { CONSTRAINT unique_cluster_name UNIQUE (cluster_id, name, namespace) ); + ALTER TABLE instances + ADD COLUMN IF NOT EXISTS workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + ADD COLUMN IF NOT EXISTS owner_id VARCHAR(36) NOT NULL DEFAULT ''; + CREATE INDEX IF NOT EXISTS idx_instances_cluster ON instances(cluster_id); CREATE INDEX IF NOT EXISTS idx_instances_registry ON instances(registry_id); CREATE INDEX IF NOT EXISTS idx_instances_name ON instances(name); + CREATE INDEX IF NOT EXISTS idx_instances_workspace ON instances(workspace_id); + CREATE INDEX IF NOT EXISTS idx_instances_owner ON instances(owner_id); + + CREATE TABLE IF NOT EXISTS workspace_cluster_bindings ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, + cluster_id VARCHAR(36) NOT NULL REFERENCES clusters(id) ON DELETE CASCADE, + namespace VARCHAR(255) NOT NULL, + service_account VARCHAR(255) NOT NULL, + quota_cpu VARCHAR(50), + quota_memory VARCHAR(50), + quota_gpu VARCHAR(50), + quota_gpu_memory VARCHAR(50), + status VARCHAR(50) NOT NULL DEFAULT 'active', + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE (workspace_id, cluster_id) + ); + ALTER TABLE workspace_cluster_bindings + ADD COLUMN IF NOT EXISTS quota_gpu_memory VARCHAR(50); + CREATE INDEX IF NOT EXISTS idx_workspace_cluster_bindings_workspace ON workspace_cluster_bindings(workspace_id); + CREATE INDEX IF NOT EXISTS idx_workspace_cluster_bindings_cluster ON workspace_cluster_bindings(cluster_id); + + CREATE TABLE IF NOT EXISTS workspace_quotas ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, + resource_type VARCHAR(50) NOT NULL, + hard_limit VARCHAR(100) NOT NULL, + soft_limit VARCHAR(100), + used VARCHAR(100), + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE (workspace_id, resource_type) + ); + + CREATE TABLE IF NOT EXISTS audit_logs ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + user_id VARCHAR(36), + action VARCHAR(100) NOT NULL, + resource_type VARCHAR(50) NOT NULL, + resource_id VARCHAR(36), + resource_name VARCHAR(255), + details JSONB, + ip_address VARCHAR(50), + user_agent TEXT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ); + CREATE INDEX IF NOT EXISTS idx_audit_logs_workspace ON audit_logs(workspace_id); + CREATE INDEX IF NOT EXISTS idx_audit_logs_user ON audit_logs(user_id); ` _, err := db.conn.Exec(schema) diff --git a/backend/internal/adapter/output/persistence/postgres/instance_repository.go b/backend/internal/adapter/output/persistence/postgres/instance_repository.go index d12400b..2d51f98 100644 --- a/backend/internal/adapter/output/persistence/postgres/instance_repository.go +++ b/backend/internal/adapter/output/persistence/postgres/instance_repository.go @@ -12,37 +12,32 @@ import ( "github.com/ocdp/cluster-service/internal/domain/repository" ) -// InstanceRepository PostgreSQL 实例仓储实现 type InstanceRepository struct { db *DB } -// NewInstanceRepository 创建 PostgreSQL 实例仓储 func NewInstanceRepository(db *DB) repository.InstanceRepository { return &InstanceRepository{db: db} } -// Create 创建实例 func (r *InstanceRepository) Create(ctx context.Context, instance *entity.Instance) error { if instance.ID == "" { instance.ID = uuid.New().String() } - - // 将 Values 转换为 JSON valuesJSON, err := json.Marshal(instance.Values) if err != nil { return fmt.Errorf("failed to marshal values: %w", err) } - query := ` - INSERT INTO instances (id, cluster_id, name, namespace, registry_id, repository, chart, version, - description, values, values_yaml, status, status_reason, last_operation, last_error, - revision, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) + INSERT INTO instances + (id, workspace_id, owner_id, cluster_id, name, namespace, registry_id, repository, chart, version, + description, values, values_yaml, status, status_reason, last_operation, last_error, revision, created_at, updated_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20) ` - _, err = r.db.conn.ExecContext(ctx, query, instance.ID, + instance.WorkspaceID, + instance.OwnerID, instance.ClusterID, instance.Name, instance.Namespace, @@ -61,166 +56,71 @@ func (r *InstanceRepository) Create(ctx context.Context, instance *entity.Instan instance.CreatedAt, instance.UpdatedAt, ) - if err != nil { return fmt.Errorf("failed to create instance: %w", err) } - return nil } -// GetByID 根据 ID 获取实例 func (r *InstanceRepository) GetByID(ctx context.Context, id string) (*entity.Instance, error) { - query := ` - SELECT id, cluster_id, name, namespace, registry_id, repository, chart, version, - description, values, values_yaml, status, status_reason, last_operation, last_error, - revision, created_at, updated_at - FROM instances - WHERE id = $1 - ` - - instance := &entity.Instance{} - var ( - valuesJSON []byte - statusReason sql.NullString - lastOperation sql.NullString - lastError sql.NullString - ) - - err := r.db.conn.QueryRowContext(ctx, query, id).Scan( - &instance.ID, - &instance.ClusterID, - &instance.Name, - &instance.Namespace, - &instance.RegistryID, - &instance.Repository, - &instance.Chart, - &instance.Version, - &instance.Description, - &valuesJSON, - &instance.ValuesYAML, - &instance.Status, - &statusReason, - &lastOperation, - &lastError, - &instance.Revision, - &instance.CreatedAt, - &instance.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrInstanceNotFound - } - if err != nil { - return nil, fmt.Errorf("failed to get instance: %w", err) - } - - // 解析 JSON Values - if len(valuesJSON) > 0 { - if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { - return nil, fmt.Errorf("failed to unmarshal values: %w", err) - } - } - - if statusReason.Valid { - instance.StatusReason = statusReason.String - } - if lastOperation.Valid { - instance.LastOperation = entity.InstanceOperation(lastOperation.String) - } - if lastError.Valid { - instance.LastError = lastError.String - } - - return instance, nil + return r.get(ctx, "id = $1", id) } -// GetByClusterAndName 根据集群 ID 和名称获取实例 func (r *InstanceRepository) GetByClusterAndName(ctx context.Context, clusterID, name string) (*entity.Instance, error) { query := ` - SELECT id, cluster_id, name, namespace, registry_id, repository, chart, version, + SELECT id, workspace_id, owner_id, cluster_id, name, namespace, registry_id, repository, chart, version, description, values, values_yaml, status, status_reason, last_operation, last_error, revision, created_at, updated_at FROM instances WHERE cluster_id = $1 AND name = $2 ` - - instance := &entity.Instance{} - var ( - valuesJSON []byte - statusReason sql.NullString - lastOperation sql.NullString - lastError sql.NullString - ) - - err := r.db.conn.QueryRowContext(ctx, query, clusterID, name).Scan( - &instance.ID, - &instance.ClusterID, - &instance.Name, - &instance.Namespace, - &instance.RegistryID, - &instance.Repository, - &instance.Chart, - &instance.Version, - &instance.Description, - &valuesJSON, - &instance.ValuesYAML, - &instance.Status, - &statusReason, - &lastOperation, - &lastError, - &instance.Revision, - &instance.CreatedAt, - &instance.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrInstanceNotFound - } + rows, err := r.db.conn.QueryContext(ctx, query, clusterID, name) if err != nil { return nil, fmt.Errorf("failed to get instance: %w", err) } - - // 解析 JSON Values - if len(valuesJSON) > 0 { - if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { - return nil, fmt.Errorf("failed to unmarshal values: %w", err) - } + defer rows.Close() + if !rows.Next() { + return nil, entity.ErrInstanceNotFound } - - if statusReason.Valid { - instance.StatusReason = statusReason.String - } - if lastOperation.Valid { - instance.LastOperation = entity.InstanceOperation(lastOperation.String) - } - if lastError.Valid { - instance.LastError = lastError.String - } - - return instance, nil + return r.scanInstance(rows) +} + +func (r *InstanceRepository) get(ctx context.Context, where string, arg interface{}) (*entity.Instance, error) { + query := fmt.Sprintf(` + SELECT id, workspace_id, owner_id, cluster_id, name, namespace, registry_id, repository, chart, version, + description, values, values_yaml, status, status_reason, last_operation, last_error, + revision, created_at, updated_at + FROM instances + WHERE %s + `, where) + rows, err := r.db.conn.QueryContext(ctx, query, arg) + if err != nil { + return nil, fmt.Errorf("failed to get instance: %w", err) + } + defer rows.Close() + if !rows.Next() { + return nil, entity.ErrInstanceNotFound + } + return r.scanInstance(rows) } -// Update 更新实例 func (r *InstanceRepository) Update(ctx context.Context, instance *entity.Instance) error { instance.UpdatedAt = time.Now() - - // 将 Values 转换为 JSON valuesJSON, err := json.Marshal(instance.Values) if err != nil { return fmt.Errorf("failed to marshal values: %w", err) } - query := ` UPDATE instances - SET cluster_id = $1, name = $2, namespace = $3, registry_id = $4, repository = $5, - chart = $6, version = $7, description = $8, values = $9, values_yaml = $10, - status = $11, status_reason = $12, last_operation = $13, last_error = $14, - revision = $15, updated_at = $16 - WHERE id = $17 + SET workspace_id = $1, owner_id = $2, cluster_id = $3, name = $4, namespace = $5, + registry_id = $6, repository = $7, chart = $8, version = $9, description = $10, + values = $11, values_yaml = $12, status = $13, status_reason = $14, + last_operation = $15, last_error = $16, revision = $17, updated_at = $18 + WHERE id = $19 ` - result, err := r.db.conn.ExecContext(ctx, query, + instance.WorkspaceID, + instance.OwnerID, instance.ClusterID, instance.Name, instance.Namespace, @@ -239,195 +139,126 @@ func (r *InstanceRepository) Update(ctx context.Context, instance *entity.Instan instance.UpdatedAt, instance.ID, ) - if err != nil { return fmt.Errorf("failed to update instance: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrInstanceNotFound } - return nil } -// Delete 删除实例 func (r *InstanceRepository) Delete(ctx context.Context, id string) error { - query := `DELETE FROM instances WHERE id = $1` - - result, err := r.db.conn.ExecContext(ctx, query, id) + result, err := r.db.conn.ExecContext(ctx, `DELETE FROM instances WHERE id = $1`, id) if err != nil { return fmt.Errorf("failed to delete instance: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrInstanceNotFound } - return nil } -// ListByCluster 列出指定集群的所有实例 func (r *InstanceRepository) ListByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) { - query := ` - SELECT id, cluster_id, name, namespace, registry_id, repository, chart, version, - description, values, values_yaml, status, status_reason, last_operation, last_error, - revision, created_at, updated_at - FROM instances - WHERE cluster_id = $1 - ORDER BY created_at DESC - ` - - rows, err := r.db.conn.QueryContext(ctx, query, clusterID) - if err != nil { - return nil, fmt.Errorf("failed to list instances: %w", err) - } - defer rows.Close() - - instances := make([]*entity.Instance, 0) - for rows.Next() { - instance := &entity.Instance{} - var ( - valuesJSON []byte - statusReason sql.NullString - lastOperation sql.NullString - lastError sql.NullString - ) - - err := rows.Scan( - &instance.ID, - &instance.ClusterID, - &instance.Name, - &instance.Namespace, - &instance.RegistryID, - &instance.Repository, - &instance.Chart, - &instance.Version, - &instance.Description, - &valuesJSON, - &instance.ValuesYAML, - &instance.Status, - &statusReason, - &lastOperation, - &lastError, - &instance.Revision, - &instance.CreatedAt, - &instance.UpdatedAt, - ) - if err != nil { - return nil, fmt.Errorf("failed to scan instance: %w", err) - } - - // 解析 JSON Values - if len(valuesJSON) > 0 { - if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { - return nil, fmt.Errorf("failed to unmarshal values: %w", err) - } - } - - if statusReason.Valid { - instance.StatusReason = statusReason.String - } - if lastOperation.Valid { - instance.LastOperation = entity.InstanceOperation(lastOperation.String) - } - if lastError.Valid { - instance.LastError = lastError.String - } - - instances = append(instances, instance) - } - - if err := rows.Err(); err != nil { - return nil, fmt.Errorf("rows iteration error: %w", err) - } - - return instances, nil + return r.list(ctx, "WHERE cluster_id = $1", clusterID) } -// List 列出所有实例 func (r *InstanceRepository) List(ctx context.Context) ([]*entity.Instance, error) { + return r.list(ctx, "", nil) +} + +func (r *InstanceRepository) list(ctx context.Context, where string, arg interface{}) ([]*entity.Instance, error) { query := ` - SELECT id, cluster_id, name, namespace, registry_id, repository, chart, version, + SELECT id, workspace_id, owner_id, cluster_id, name, namespace, registry_id, repository, chart, version, description, values, values_yaml, status, status_reason, last_operation, last_error, revision, created_at, updated_at FROM instances + ` + where + ` ORDER BY created_at DESC ` - - rows, err := r.db.conn.QueryContext(ctx, query) + var rows *sql.Rows + var err error + if where == "" { + rows, err = r.db.conn.QueryContext(ctx, query) + } else { + rows, err = r.db.conn.QueryContext(ctx, query, arg) + } if err != nil { return nil, fmt.Errorf("failed to list instances: %w", err) } defer rows.Close() - instances := make([]*entity.Instance, 0) for rows.Next() { - instance := &entity.Instance{} - var ( - valuesJSON []byte - statusReason sql.NullString - lastOperation sql.NullString - lastError sql.NullString - ) - - err := rows.Scan( - &instance.ID, - &instance.ClusterID, - &instance.Name, - &instance.Namespace, - &instance.RegistryID, - &instance.Repository, - &instance.Chart, - &instance.Version, - &instance.Description, - &valuesJSON, - &instance.ValuesYAML, - &instance.Status, - &statusReason, - &lastOperation, - &lastError, - &instance.Revision, - &instance.CreatedAt, - &instance.UpdatedAt, - ) + instance, err := r.scanInstance(rows) if err != nil { - return nil, fmt.Errorf("failed to scan instance: %w", err) + return nil, err } - - // 解析 JSON Values - if len(valuesJSON) > 0 { - if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { - return nil, fmt.Errorf("failed to unmarshal values: %w", err) - } - } - - if statusReason.Valid { - instance.StatusReason = statusReason.String - } - if lastOperation.Valid { - instance.LastOperation = entity.InstanceOperation(lastOperation.String) - } - if lastError.Valid { - instance.LastError = lastError.String - } - instances = append(instances, instance) } - if err := rows.Err(); err != nil { return nil, fmt.Errorf("rows iteration error: %w", err) } - return instances, nil } + +type instanceScanner interface { + Scan(dest ...interface{}) error +} + +func (r *InstanceRepository) scanInstance(scanner instanceScanner) (*entity.Instance, error) { + instance := &entity.Instance{} + var ( + valuesJSON []byte + statusReason sql.NullString + lastOperation sql.NullString + lastError sql.NullString + ) + err := scanner.Scan( + &instance.ID, + &instance.WorkspaceID, + &instance.OwnerID, + &instance.ClusterID, + &instance.Name, + &instance.Namespace, + &instance.RegistryID, + &instance.Repository, + &instance.Chart, + &instance.Version, + &instance.Description, + &valuesJSON, + &instance.ValuesYAML, + &instance.Status, + &statusReason, + &lastOperation, + &lastError, + &instance.Revision, + &instance.CreatedAt, + &instance.UpdatedAt, + ) + if err != nil { + return nil, fmt.Errorf("failed to scan instance: %w", err) + } + if len(valuesJSON) > 0 { + if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { + return nil, fmt.Errorf("failed to unmarshal values: %w", err) + } + } + if statusReason.Valid { + instance.StatusReason = statusReason.String + } + if lastOperation.Valid { + instance.LastOperation = entity.InstanceOperation(lastOperation.String) + } + if lastError.Valid { + instance.LastError = lastError.String + } + return instance, nil +} diff --git a/backend/internal/adapter/output/persistence/postgres/registry_repository.go b/backend/internal/adapter/output/persistence/postgres/registry_repository.go index 78fb6fd..f791e49 100644 --- a/backend/internal/adapter/output/persistence/postgres/registry_repository.go +++ b/backend/internal/adapter/output/persistence/postgres/registry_repository.go @@ -12,39 +12,32 @@ import ( "github.com/ocdp/cluster-service/internal/pkg/crypto" ) -// RegistryRepository PostgreSQL Registry 仓储实现 type RegistryRepository struct { db *DB encryptor crypto.Encryptor } -// NewRegistryRepository 创建 PostgreSQL Registry 仓储 func NewRegistryRepository(db *DB, encryptor crypto.Encryptor) repository.RegistryRepository { - return &RegistryRepository{ - db: db, - encryptor: encryptor, - } + return &RegistryRepository{db: db, encryptor: encryptor} } -// Create 创建 Registry func (r *RegistryRepository) Create(ctx context.Context, registry *entity.Registry) error { if registry.ID == "" { registry.ID = uuid.New().String() } - - // 加密密码 encryptedPassword, err := r.encryptor.Encrypt(registry.Password) if err != nil { return fmt.Errorf("failed to encrypt password: %w", err) } - query := ` - INSERT INTO registries (id, name, url, description, username, password, insecure, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) + INSERT INTO registries (id, workspace_id, owner_id, visibility, name, url, description, username, password, insecure, created_at, updated_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12) ` - _, err = r.db.conn.ExecContext(ctx, query, registry.ID, + registry.WorkspaceID, + registry.OwnerID, + registry.Visibility, registry.Name, registry.URL, registry.Description, @@ -54,110 +47,57 @@ func (r *RegistryRepository) Create(ctx context.Context, registry *entity.Regist registry.CreatedAt, registry.UpdatedAt, ) - if err != nil { return fmt.Errorf("failed to create registry: %w", err) } - return nil } -// GetByID 根据 ID 获取 Registry func (r *RegistryRepository) GetByID(ctx context.Context, id string) (*entity.Registry, error) { - query := ` - SELECT id, name, url, description, username, password, insecure, created_at, updated_at - FROM registries - WHERE id = $1 - ` - - registry := &entity.Registry{} - var encryptedPassword string - - err := r.db.conn.QueryRowContext(ctx, query, id).Scan( - ®istry.ID, - ®istry.Name, - ®istry.URL, - ®istry.Description, - ®istry.Username, - &encryptedPassword, - ®istry.Insecure, - ®istry.CreatedAt, - ®istry.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrRegistryNotFound - } - if err != nil { - return nil, fmt.Errorf("failed to get registry: %w", err) - } - - // 解密密码 - registry.Password, err = r.encryptor.Decrypt(encryptedPassword) - if err != nil { - return nil, fmt.Errorf("failed to decrypt password: %w", err) - } - - return registry, nil + return r.get(ctx, "id = $1", id) } -// GetByName 根据名称获取 Registry func (r *RegistryRepository) GetByName(ctx context.Context, name string) (*entity.Registry, error) { - query := ` - SELECT id, name, url, description, username, password, insecure, created_at, updated_at + return r.get(ctx, "name = $1", name) +} + +func (r *RegistryRepository) get(ctx context.Context, where string, arg interface{}) (*entity.Registry, error) { + query := fmt.Sprintf(` + SELECT id, workspace_id, owner_id, visibility, name, url, description, username, password, insecure, created_at, updated_at FROM registries - WHERE name = $1 - ` - - registry := &entity.Registry{} - var encryptedPassword string - - err := r.db.conn.QueryRowContext(ctx, query, name).Scan( - ®istry.ID, - ®istry.Name, - ®istry.URL, - ®istry.Description, - ®istry.Username, - &encryptedPassword, - ®istry.Insecure, - ®istry.CreatedAt, - ®istry.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrRegistryNotFound - } + WHERE %s + `, where) + rows, err := r.db.conn.QueryContext(ctx, query, arg) if err != nil { return nil, fmt.Errorf("failed to get registry: %w", err) } - - // 解密密码 - registry.Password, err = r.encryptor.Decrypt(encryptedPassword) - if err != nil { - return nil, fmt.Errorf("failed to decrypt password: %w", err) + defer rows.Close() + if !rows.Next() { + return nil, entity.ErrRegistryNotFound + } + registry, err := r.scanRegistry(rows) + if err != nil { + return nil, err } - return registry, nil } -// Update 更新 Registry func (r *RegistryRepository) Update(ctx context.Context, registry *entity.Registry) error { registry.UpdatedAt = time.Now() - - // 加密密码 encryptedPassword, err := r.encryptor.Encrypt(registry.Password) if err != nil { return fmt.Errorf("failed to encrypt password: %w", err) } - query := ` UPDATE registries - SET name = $1, url = $2, description = $3, username = $4, password = $5, - insecure = $6, updated_at = $7 - WHERE id = $8 + SET workspace_id = $1, owner_id = $2, visibility = $3, name = $4, url = $5, + description = $6, username = $7, password = $8, insecure = $9, updated_at = $10 + WHERE id = $11 ` - result, err := r.db.conn.ExecContext(ctx, query, + registry.WorkspaceID, + registry.OwnerID, + registry.Visibility, registry.Name, registry.URL, registry.Description, @@ -167,91 +107,86 @@ func (r *RegistryRepository) Update(ctx context.Context, registry *entity.Regist registry.UpdatedAt, registry.ID, ) - if err != nil { return fmt.Errorf("failed to update registry: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrRegistryNotFound } - return nil } -// Delete 删除 Registry func (r *RegistryRepository) Delete(ctx context.Context, id string) error { - query := `DELETE FROM registries WHERE id = $1` - - result, err := r.db.conn.ExecContext(ctx, query, id) + result, err := r.db.conn.ExecContext(ctx, `DELETE FROM registries WHERE id = $1`, id) if err != nil { return fmt.Errorf("failed to delete registry: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrRegistryNotFound } - return nil } -// List 列出所有 Registries func (r *RegistryRepository) List(ctx context.Context) ([]*entity.Registry, error) { query := ` - SELECT id, name, url, description, username, password, insecure, created_at, updated_at + SELECT id, workspace_id, owner_id, visibility, name, url, description, username, password, insecure, created_at, updated_at FROM registries ORDER BY created_at DESC ` - rows, err := r.db.conn.QueryContext(ctx, query) if err != nil { return nil, fmt.Errorf("failed to list registries: %w", err) } defer rows.Close() - registries := make([]*entity.Registry, 0) for rows.Next() { - registry := &entity.Registry{} - var encryptedPassword string - - err := rows.Scan( - ®istry.ID, - ®istry.Name, - ®istry.URL, - ®istry.Description, - ®istry.Username, - &encryptedPassword, - ®istry.Insecure, - ®istry.CreatedAt, - ®istry.UpdatedAt, - ) + registry, err := r.scanRegistry(rows) if err != nil { - return nil, fmt.Errorf("failed to scan registry: %w", err) + return nil, err } - - // 解密密码 - registry.Password, err = r.encryptor.Decrypt(encryptedPassword) - if err != nil { - return nil, fmt.Errorf("failed to decrypt password: %w", err) - } - registries = append(registries, registry) } - if err := rows.Err(); err != nil { return nil, fmt.Errorf("rows iteration error: %w", err) } - return registries, nil } +type registryScanner interface { + Scan(dest ...interface{}) error +} + +func (r *RegistryRepository) scanRegistry(scanner registryScanner) (*entity.Registry, error) { + registry := &entity.Registry{} + var encryptedPassword sql.NullString + err := scanner.Scan( + ®istry.ID, + ®istry.WorkspaceID, + ®istry.OwnerID, + ®istry.Visibility, + ®istry.Name, + ®istry.URL, + ®istry.Description, + ®istry.Username, + &encryptedPassword, + ®istry.Insecure, + ®istry.CreatedAt, + ®istry.UpdatedAt, + ) + if err != nil { + return nil, fmt.Errorf("failed to scan registry: %w", err) + } + registry.Password, err = decryptMaybe(r.encryptor, encryptedPassword.String) + if err != nil { + return nil, fmt.Errorf("failed to decrypt password: %w", err) + } + return registry, nil +} diff --git a/backend/internal/adapter/output/persistence/postgres/user_repository.go b/backend/internal/adapter/output/persistence/postgres/user_repository.go index ea766ee..7af0378 100644 --- a/backend/internal/adapter/output/persistence/postgres/user_repository.go +++ b/backend/internal/adapter/output/persistence/postgres/user_repository.go @@ -28,8 +28,8 @@ func (r *UserRepository) Create(ctx context.Context, user *entity.User) error { } query := ` - INSERT INTO users (id, username, password_hash, email, revoked_after, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, $7) + INSERT INTO users (id, username, password_hash, email, role, workspace_id, is_active, must_change_password, revoked_after, created_at, updated_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) ` _, err := r.db.conn.ExecContext(ctx, query, @@ -37,6 +37,10 @@ func (r *UserRepository) Create(ctx context.Context, user *entity.User) error { user.Username, user.PasswordHash, user.Email, + user.Role, + user.WorkspaceID, + user.IsActive, + user.MustChangePassword, user.RevokedAfter, user.CreatedAt, user.UpdatedAt, @@ -52,7 +56,7 @@ func (r *UserRepository) Create(ctx context.Context, user *entity.User) error { // GetByID 根据 ID 获取用户 func (r *UserRepository) GetByID(ctx context.Context, id string) (*entity.User, error) { query := ` - SELECT id, username, password_hash, email, revoked_after, created_at, updated_at + SELECT id, username, password_hash, email, role, workspace_id, is_active, must_change_password, revoked_after, created_at, updated_at FROM users WHERE id = $1 ` @@ -63,6 +67,10 @@ func (r *UserRepository) GetByID(ctx context.Context, id string) (*entity.User, &user.Username, &user.PasswordHash, &user.Email, + &user.Role, + &user.WorkspaceID, + &user.IsActive, + &user.MustChangePassword, &user.RevokedAfter, &user.CreatedAt, &user.UpdatedAt, @@ -81,7 +89,7 @@ func (r *UserRepository) GetByID(ctx context.Context, id string) (*entity.User, // GetByUsername 根据用户名获取用户 func (r *UserRepository) GetByUsername(ctx context.Context, username string) (*entity.User, error) { query := ` - SELECT id, username, password_hash, email, revoked_after, created_at, updated_at + SELECT id, username, password_hash, email, role, workspace_id, is_active, must_change_password, revoked_after, created_at, updated_at FROM users WHERE username = $1 ` @@ -92,6 +100,10 @@ func (r *UserRepository) GetByUsername(ctx context.Context, username string) (*e &user.Username, &user.PasswordHash, &user.Email, + &user.Role, + &user.WorkspaceID, + &user.IsActive, + &user.MustChangePassword, &user.RevokedAfter, &user.CreatedAt, &user.UpdatedAt, @@ -113,14 +125,19 @@ func (r *UserRepository) Update(ctx context.Context, user *entity.User) error { query := ` UPDATE users - SET username = $1, password_hash = $2, email = $3, revoked_after = $4, updated_at = $5 - WHERE id = $6 + SET username = $1, password_hash = $2, email = $3, role = $4, workspace_id = $5, + is_active = $6, must_change_password = $7, revoked_after = $8, updated_at = $9 + WHERE id = $10 ` result, err := r.db.conn.ExecContext(ctx, query, user.Username, user.PasswordHash, user.Email, + user.Role, + user.WorkspaceID, + user.IsActive, + user.MustChangePassword, user.RevokedAfter, user.UpdatedAt, user.ID, @@ -166,7 +183,7 @@ func (r *UserRepository) Delete(ctx context.Context, id string) error { // List 列出所有用户 func (r *UserRepository) List(ctx context.Context) ([]*entity.User, error) { query := ` - SELECT id, username, password_hash, email, revoked_after, created_at, updated_at + SELECT id, username, password_hash, email, role, workspace_id, is_active, must_change_password, revoked_after, created_at, updated_at FROM users ORDER BY created_at DESC ` @@ -185,6 +202,10 @@ func (r *UserRepository) List(ctx context.Context) ([]*entity.User, error) { &user.Username, &user.PasswordHash, &user.Email, + &user.Role, + &user.WorkspaceID, + &user.IsActive, + &user.MustChangePassword, &user.RevokedAfter, &user.CreatedAt, &user.UpdatedAt, @@ -201,4 +222,3 @@ func (r *UserRepository) List(ctx context.Context) ([]*entity.User, error) { return users, nil } - diff --git a/backend/internal/adapter/output/persistence/postgres/workspace_repository.go b/backend/internal/adapter/output/persistence/postgres/workspace_repository.go new file mode 100644 index 0000000..3c1d062 --- /dev/null +++ b/backend/internal/adapter/output/persistence/postgres/workspace_repository.go @@ -0,0 +1,345 @@ +package postgres + +import ( + "context" + "database/sql" + "encoding/json" + "fmt" + "time" + + "github.com/google/uuid" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +type WorkspaceRepository struct { + db *DB +} + +func NewWorkspaceRepository(db *DB) repository.WorkspaceRepository { + return &WorkspaceRepository{db: db} +} + +func (r *WorkspaceRepository) Create(ctx context.Context, workspace *entity.Workspace) error { + if workspace.ID == "" { + workspace.ID = uuid.New().String() + } + query := ` + INSERT INTO workspaces (id, name, status, k8s_namespace, k8s_sa_name, default_cluster_id, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, created_by, created_at, updated_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) + ` + _, err := r.db.conn.ExecContext(ctx, query, + workspace.ID, + workspace.Name, + workspace.Status, + workspace.K8sNamespace, + workspace.K8sSAName, + workspace.DefaultClusterID, + workspace.QuotaCPU, + workspace.QuotaMemory, + workspace.QuotaGPU, + workspace.QuotaGPUMem, + workspace.CreatedBy, + workspace.CreatedAt, + workspace.UpdatedAt, + ) + if err != nil { + return fmt.Errorf("failed to create workspace: %w", err) + } + return nil +} + +func (r *WorkspaceRepository) GetByID(ctx context.Context, id string) (*entity.Workspace, error) { + return r.get(ctx, "id = $1", id) +} + +func (r *WorkspaceRepository) GetByName(ctx context.Context, name string) (*entity.Workspace, error) { + return r.get(ctx, "name = $1", name) +} + +func (r *WorkspaceRepository) get(ctx context.Context, where string, arg interface{}) (*entity.Workspace, error) { + query := fmt.Sprintf(` + SELECT id, name, status, k8s_namespace, k8s_sa_name, default_cluster_id, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, created_by, created_at, updated_at + FROM workspaces + WHERE %s + `, where) + workspace := &entity.Workspace{} + var createdBy, defaultClusterID, quotaCPU, quotaMemory, quotaGPU, quotaGPUMem sql.NullString + err := r.db.conn.QueryRowContext(ctx, query, arg).Scan( + &workspace.ID, + &workspace.Name, + &workspace.Status, + &workspace.K8sNamespace, + &workspace.K8sSAName, + &defaultClusterID, + "aCPU, + "aMemory, + "aGPU, + "aGPUMem, + &createdBy, + &workspace.CreatedAt, + &workspace.UpdatedAt, + ) + if err == sql.ErrNoRows { + return nil, entity.ErrWorkspaceNotFound + } + if err != nil { + return nil, fmt.Errorf("failed to get workspace: %w", err) + } + workspace.CreatedBy = createdBy.String + workspace.DefaultClusterID = defaultClusterID.String + workspace.QuotaCPU = quotaCPU.String + workspace.QuotaMemory = quotaMemory.String + workspace.QuotaGPU = quotaGPU.String + workspace.QuotaGPUMem = quotaGPUMem.String + return workspace, nil +} + +func (r *WorkspaceRepository) Update(ctx context.Context, workspace *entity.Workspace) error { + workspace.UpdatedAt = time.Now() + query := ` + UPDATE workspaces + SET name = $1, status = $2, k8s_namespace = $3, k8s_sa_name = $4, + default_cluster_id = $5, + quota_cpu = $6, quota_memory = $7, quota_gpu = $8, quota_gpu_memory = $9, + created_by = $10, updated_at = $11 + WHERE id = $12 + ` + result, err := r.db.conn.ExecContext(ctx, query, + workspace.Name, + workspace.Status, + workspace.K8sNamespace, + workspace.K8sSAName, + workspace.DefaultClusterID, + workspace.QuotaCPU, + workspace.QuotaMemory, + workspace.QuotaGPU, + workspace.QuotaGPUMem, + workspace.CreatedBy, + workspace.UpdatedAt, + workspace.ID, + ) + if err != nil { + return fmt.Errorf("failed to update workspace: %w", err) + } + rows, err := result.RowsAffected() + if err != nil { + return fmt.Errorf("failed to get affected rows: %w", err) + } + if rows == 0 { + return entity.ErrWorkspaceNotFound + } + return nil +} + +func (r *WorkspaceRepository) List(ctx context.Context) ([]*entity.Workspace, error) { + query := ` + SELECT id, name, status, k8s_namespace, k8s_sa_name, default_cluster_id, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, created_by, created_at, updated_at + FROM workspaces + ORDER BY created_at DESC + ` + rows, err := r.db.conn.QueryContext(ctx, query) + if err != nil { + return nil, fmt.Errorf("failed to list workspaces: %w", err) + } + defer rows.Close() + workspaces := make([]*entity.Workspace, 0) + for rows.Next() { + workspace := &entity.Workspace{} + var createdBy, defaultClusterID, quotaCPU, quotaMemory, quotaGPU, quotaGPUMem sql.NullString + if err := rows.Scan( + &workspace.ID, + &workspace.Name, + &workspace.Status, + &workspace.K8sNamespace, + &workspace.K8sSAName, + &defaultClusterID, + "aCPU, + "aMemory, + "aGPU, + "aGPUMem, + &createdBy, + &workspace.CreatedAt, + &workspace.UpdatedAt, + ); err != nil { + return nil, fmt.Errorf("failed to scan workspace: %w", err) + } + workspace.CreatedBy = createdBy.String + workspace.DefaultClusterID = defaultClusterID.String + workspace.QuotaCPU = quotaCPU.String + workspace.QuotaMemory = quotaMemory.String + workspace.QuotaGPU = quotaGPU.String + workspace.QuotaGPUMem = quotaGPUMem.String + workspaces = append(workspaces, workspace) + } + return workspaces, rows.Err() +} + +type WorkspaceClusterBindingRepository struct { + db *DB +} + +func NewWorkspaceClusterBindingRepository(db *DB) repository.WorkspaceClusterBindingRepository { + return &WorkspaceClusterBindingRepository{db: db} +} + +func (r *WorkspaceClusterBindingRepository) Upsert(ctx context.Context, binding *entity.WorkspaceClusterBinding) error { + if binding.ID == "" { + binding.ID = uuid.New().String() + } + now := time.Now() + if binding.CreatedAt.IsZero() { + binding.CreatedAt = now + } + binding.UpdatedAt = now + query := ` + INSERT INTO workspace_cluster_bindings + (id, workspace_id, cluster_id, namespace, service_account, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, status, created_at, updated_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12) + ON CONFLICT (workspace_id, cluster_id) + DO UPDATE SET namespace = EXCLUDED.namespace, + service_account = EXCLUDED.service_account, + quota_cpu = EXCLUDED.quota_cpu, + quota_memory = EXCLUDED.quota_memory, + quota_gpu = EXCLUDED.quota_gpu, + quota_gpu_memory = EXCLUDED.quota_gpu_memory, + status = EXCLUDED.status, + updated_at = EXCLUDED.updated_at + ` + _, err := r.db.conn.ExecContext(ctx, query, + binding.ID, + binding.WorkspaceID, + binding.ClusterID, + binding.Namespace, + binding.ServiceAccount, + binding.QuotaCPU, + binding.QuotaMemory, + binding.QuotaGPU, + binding.QuotaGPUMem, + binding.Status, + binding.CreatedAt, + binding.UpdatedAt, + ) + if err != nil { + return fmt.Errorf("failed to upsert workspace cluster binding: %w", err) + } + return nil +} + +func (r *WorkspaceClusterBindingRepository) Get(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error) { + query := ` + SELECT id, workspace_id, cluster_id, namespace, service_account, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, status, created_at, updated_at + FROM workspace_cluster_bindings + WHERE workspace_id = $1 AND cluster_id = $2 + ` + binding := &entity.WorkspaceClusterBinding{} + err := r.db.conn.QueryRowContext(ctx, query, workspaceID, clusterID).Scan( + &binding.ID, + &binding.WorkspaceID, + &binding.ClusterID, + &binding.Namespace, + &binding.ServiceAccount, + &binding.QuotaCPU, + &binding.QuotaMemory, + &binding.QuotaGPU, + &binding.QuotaGPUMem, + &binding.Status, + &binding.CreatedAt, + &binding.UpdatedAt, + ) + if err == sql.ErrNoRows { + return nil, entity.ErrWorkspaceNotFound + } + if err != nil { + return nil, fmt.Errorf("failed to get workspace cluster binding: %w", err) + } + return binding, nil +} + +func (r *WorkspaceClusterBindingRepository) Delete(ctx context.Context, workspaceID, clusterID string) error { + _, err := r.db.conn.ExecContext(ctx, `DELETE FROM workspace_cluster_bindings WHERE workspace_id = $1 AND cluster_id = $2`, workspaceID, clusterID) + return err +} + +type AuditLogRepository struct { + db *DB +} + +func NewAuditLogRepository(db *DB) repository.AuditLogRepository { + return &AuditLogRepository{db: db} +} + +func (r *AuditLogRepository) Create(ctx context.Context, logEntry *entity.AuditLog) error { + if logEntry.ID == "" { + logEntry.ID = uuid.New().String() + } + details, err := json.Marshal(logEntry.Details) + if err != nil { + return fmt.Errorf("failed to marshal audit details: %w", err) + } + if logEntry.CreatedAt.IsZero() { + logEntry.CreatedAt = time.Now() + } + query := ` + INSERT INTO audit_logs (id, workspace_id, user_id, action, resource_type, resource_id, resource_name, details, ip_address, user_agent, created_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) + ` + _, err = r.db.conn.ExecContext(ctx, query, + logEntry.ID, + logEntry.WorkspaceID, + logEntry.UserID, + logEntry.Action, + logEntry.ResourceType, + logEntry.ResourceID, + logEntry.ResourceName, + string(details), + logEntry.IPAddress, + logEntry.UserAgent, + logEntry.CreatedAt, + ) + if err != nil { + return fmt.Errorf("failed to create audit log: %w", err) + } + return nil +} + +func (r *AuditLogRepository) ListByWorkspace(ctx context.Context, workspaceID string, limit int) ([]*entity.AuditLog, error) { + if limit <= 0 || limit > 500 { + limit = 100 + } + query := ` + SELECT id, workspace_id, user_id, action, resource_type, resource_id, resource_name, details, ip_address, user_agent, created_at + FROM audit_logs + WHERE workspace_id = $1 + ORDER BY created_at DESC + LIMIT $2 + ` + rows, err := r.db.conn.QueryContext(ctx, query, workspaceID, limit) + if err != nil { + return nil, fmt.Errorf("failed to list audit logs: %w", err) + } + defer rows.Close() + result := make([]*entity.AuditLog, 0) + for rows.Next() { + logEntry := &entity.AuditLog{} + var details []byte + if err := rows.Scan( + &logEntry.ID, + &logEntry.WorkspaceID, + &logEntry.UserID, + &logEntry.Action, + &logEntry.ResourceType, + &logEntry.ResourceID, + &logEntry.ResourceName, + &details, + &logEntry.IPAddress, + &logEntry.UserAgent, + &logEntry.CreatedAt, + ); err != nil { + return nil, fmt.Errorf("failed to scan audit log: %w", err) + } + _ = json.Unmarshal(details, &logEntry.Details) + result = append(result, logEntry) + } + return result, rows.Err() +} diff --git a/backend/internal/bootstrap/config.go b/backend/internal/bootstrap/config.go index 1d2e7d0..73270f9 100644 --- a/backend/internal/bootstrap/config.go +++ b/backend/internal/bootstrap/config.go @@ -5,14 +5,17 @@ import ( "fmt" "os" "path/filepath" + "sort" + "strconv" + "strings" ) // BootstrapConfig 预注入配置 type BootstrapConfig struct { - Enabled bool `json:"enabled"` - Users []UserSeed `json:"users"` - Registries []RegistrySeed `json:"registries"` - Clusters []ClusterSeed `json:"clusters"` + Enabled bool `json:"enabled"` + Users []UserSeed `json:"users"` + Registries []RegistrySeed `json:"registries"` + Clusters []ClusterSeed `json:"clusters"` } // UserSeed 用户预注入数据 @@ -20,6 +23,7 @@ type UserSeed struct { Username string `json:"username"` Password string `json:"password"` Email string `json:"email"` + Role string `json:"role"` } // RegistrySeed Registry 预注入数据 @@ -45,11 +49,12 @@ type ClusterSeed struct { // LoadBootstrapConfig 加载预注入配置 // 支持从文件或环境变量加载 -// +// // 加载优先级: // 1. 环境变量 BOOTSTRAP_CONFIG_JSON (最高优先级) -// 2. Mock 模式: 配置文件 config/bootstrap.json -// 3. 真实模式: GetDefaultBootstrapConfig() 中的真实数据 +// 2. 环境变量 BOOTSTRAP_* (root .env / container env) +// 3. Mock 模式: 配置文件 config/bootstrap.json +// 4. 未提供任何 bootstrap 配置时禁用预注入 func LoadBootstrapConfig() (*BootstrapConfig, error) { // 1. 优先从环境变量加载 if configJSON := os.Getenv("BOOTSTRAP_CONFIG_JSON"); configJSON != "" { @@ -60,9 +65,13 @@ func LoadBootstrapConfig() (*BootstrapConfig, error) { return &config, nil } + if config, ok := loadBootstrapConfigFromEnv(); ok { + return config, nil + } + // 2. 检查适配器模式 adapterMode := os.Getenv("ADAPTER_MODE") - + // Mock 模式: 使用配置文件(假数据) if adapterMode == "mock" { configPath := os.Getenv("BOOTSTRAP_CONFIG_FILE") @@ -72,7 +81,7 @@ func LoadBootstrapConfig() (*BootstrapConfig, error) { // 检查文件是否存在 if _, err := os.Stat(configPath); os.IsNotExist(err) { - // 配置文件不存在,使用默认配置 + // 配置文件不存在,不预注入任何数据 return GetDefaultBootstrapConfig(), nil } @@ -89,49 +98,142 @@ func LoadBootstrapConfig() (*BootstrapConfig, error) { return &config, nil } - // 3. 真实模式 (mode 1, mode 2): 使用代码中的真实预注入数据 + // 3. 真实模式: 未显式配置时不预注入任何数据 return GetDefaultBootstrapConfig(), nil } -// GetDefaultBootstrapConfig 获取默认的预注入配置(示例) -func GetDefaultBootstrapConfig() *BootstrapConfig { - return &BootstrapConfig{ - Enabled: true, - Users: []UserSeed{ - { - Username: "admin", - Password: "admin123", - Email: "admin@example.com", - }, - }, - Registries: []RegistrySeed{ - { - Name: "harbor-bwgdi", - URL: "https://harbor.bwgdi.com", - Description: "BWGDI Harbor Registry", - Username: "admin", - Password: "BWGDIP@ssw0rd1401#", - Insecure: false, - }, - }, - Clusters: []ClusterSeed{ - { - Name: "cluster1", - Host: "https://10.6.14.123:6443", - Description: "K3s Cluster 1", - CAData: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkekNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pPUFFRREFqQWpNU0V3SHdZRFZRUUREQmhyTTNNdGMyVnkKZG1WeUxXTmhRREUzTlRVME9ETTJOemt3SGhjTk1qVXdPREU0TURJeU1URTVXaGNOTXpVd09ERTJNREl5TVRFNQpXakFqTVNFd0h3WURWUVFEREJock0zTXRjMlZ5ZG1WeUxXTmhRREUzTlRVME9ETTJOemt3V1RBVEJnY3Foa2pPClBRSUJCZ2dxaGtqT1BRTUJCd05DQUFTaVBJUW5LZXR2VjQ3cHUyLytMV1lZaGJjbUY3V3RZQnArOGxDaUVKdkcKaFAyaE5BWVVmZDUrRnN5VVN3bDBTV3NoT3BORmRMc0NzY3pISkhycUpWYUVvMEl3UURBT0JnTlZIUThCQWY4RQpCQU1DQXFRd0R3WURWUjBUQVFIL0JBVXdBd0VCL3pBZEJnTlZIUTRFRmdRVTlCa3lhSGpPVG1RM29LYWlOaXFmCjVwZTF4L293Q2dZSUtvWkl6ajBFQXdJRFNBQXdSUUlnTzR4M3EyNmhhL1Z0NTRCT1Awc1hVNGt5ckVpNDR6TUcKc0d0Z25LY0NLbk1DSVFEcVhsSzBqSGNKSVE2bTRWanRub0VQWGdzQ2JrdW45WmxvVmxhbWtPNXAzZz09Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K", - CertData: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJrakNDQVRlZ0F3SUJBZ0lJVjVQT1FRblJoSGd3Q2dZSUtvWkl6ajBFQXdJd0l6RWhNQjhHQTFVRUF3d1kKYXpOekxXTnNhV1Z1ZEMxallVQXhOelUxTkRnek5qYzVNQjRYRFRJMU1EZ3hPREF5TWpFeE9Wb1hEVEkyTURneApPREF5TWpFeE9Wb3dNREVYTUJVR0ExVUVDaE1PYzNsemRHVnRPbTFoYzNSbGNuTXhGVEFUQmdOVkJBTVRESE41CmMzUmxiVHBoWkcxcGJqQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJMTjcrbjNXRDY0TThTMEEKT1Bpd2hReFZRNWdLTStRTk11REFzSlM1UVZFdTIyajZwaFlQYTNyQWFLU1hnZE1EdVYvbTRUamxTQmxCM2dJQwpnZW5wdTc2alNEQkdNQTRHQTFVZER3RUIvd1FFQXdJRm9EQVRCZ05WSFNVRUREQUtCZ2dyQmdFRkJRY0RBakFmCkJnTlZIU01FR0RBV2dCVGlxTWRFM0xYbElwVHRiREJnN0ZVcmV1NHVVREFLQmdncWhrak9QUVFEQWdOSkFEQkcKQWlFQXRPQ0s4ZmdzZmxhaTczcXdXMkhQbWM2bDVXNmR2L1BzNGhHNDZFRkV0VlFDSVFDenFkQitkZnFiWkJ5cwpNUm0zbDU1N3pNOFBNcDhRUE5lVFdiM0VoOEdtVGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCi0tLS0tQkVHSU4gQ0VSVElGSUNBVEUtLS0tLQpNSUlCZGpDQ0FSMmdBd0lCQWdJQkFEQUtCZ2dxaGtqT1BRUURBakFqTVNFd0h3WURWUVFEREJock0zTXRZMnhwClpXNTBMV05oUURFM05UVTBPRE0yTnprd0hoY05NalV3T0RFNE1ESXlNVEU1V2hjTk16VXdPREUyTURJeU1URTUKV2pBak1TRXdId1lEVlFRRERCaHJNM010WTJ4cFpXNTBMV05oUURFM05UVTBPRE0yTnprd1dUQVRCZ2NxaGtqTwpQUUlCQmdncWhrak9QUU1CQndOQ0FBU3JxQzd2RUhKYzQzUThIWG5MT0VQeXkyM0tYZzlHOVkycTJUaVFLMGhoCkJvNnh1WUxDMTFSWkhGNC85NGZJZitZa3BCcmRpcFFNTjRSaVVrUGZzM28zbzBJd1FEQU9CZ05WSFE4QkFmOEUKQkFNQ0FxUXdEd1lEVlIwVEFRSC9CQVV3QXdFQi96QWRCZ05WSFE0RUZnUVU0cWpIUk55MTVTS1U3V3d3WU94VgpLM3J1TGxBd0NnWUlLb1pJemowRUF3SURSd0F3UkFJZ041WmJQaEs4YkwxWllmcStGTVNNbkFCdEgzRSsxcnFoClpRUHY4UWM3S09nQ0lCMWhBclM5SXhKU1dYYlV3ZWE4WU0yVUNEMlplYTVxMHJMQnd4SHFqb3RjCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K", - KeyData: "LS0tLS1CRUdJTiBFQyBQUklWQVRFIEtFWS0tLS0tCk1IY0NBUUVFSUpuM2dPd0lBNzJGMXE2dkhvMHdDRk1RS0VXVmVnejlQYy9NRFhVVDU5c3pvQW9HQ0NxR1NNNDkKQXdFSG9VUURRZ0FFczN2NmZkWVByZ3p4TFFBNCtMQ0ZERlZEbUFvejVBMHk0TUN3bExsQlVTN2JhUHFtRmc5cgplc0JvcEplQjB3TzVYK2JoT09WSUdVSGVBZ0tCNmVtN3ZnPT0KLS0tLS1FTkQgRUMgUFJJVkFURSBLRVktLS0tLQo=", - }, - { - Name: "cluster2", - Host: "https://10.6.80.12:6443", - Description: "Kubernetes Cluster 2", - CAData: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURCVENDQWUyZ0F3SUJBZ0lJWCtGQVJITzJWdVl3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TlRFd016QXdNelEyTlROYUZ3MHpOVEV3TWpnd016VXhOVE5hTUJVeApFekFSQmdOVkJBTVRDbXQxWW1WeWJtVjBaWE13Z2dFaU1BMEdDU3FHU0liM0RRRUJBUVVBQTRJQkR3QXdnZ0VLCkFvSUJBUUROdFJSeG5JYVU2MS93UHVWNkpiR0hLaWtaZWVmYXlNOEFzVHRQeXQwaU5BaFgvVWNUT1pSVWYyZmUKTXBKSFNDdy9QQjJ2d1dCZDB2OVBEVWZ6RTYxL0lKcmhWZU54NmRxK0VPdVFqRmI2TlMvbkpiWmpXVFoyRFhBRQpkS1lwaGpXWGV3dWVuK0htTjlyK2tIZGlORVdmc0xDb1hWOFFMSmVRZXF4NHY2eTFkaEE1Ly9sdGxRV0ZsN2ZFCkRzeUpQb05tQmhzSy9SNEpYVDZ4Q0NqYmJmRFF6OE1hTXA0aWZnRW9ac0R6T2RlK3ZDL3diMEcxVmlpL1FjOEEKSCtSb2tJUkI2MTZqM0VjOWhsd1V4UjNyZThqOGFFdDJob1BkbTVhekt1YjQ0LzlKc3VaU1BWR0FYVXVjekQyawpYUU5UOWErOVl4RXZJZ0psdFpuRGVYSjZmeTFqQWdNQkFBR2pXVEJYTUE0R0ExVWREd0VCL3dRRUF3SUNwREFQCkJnTlZIUk1CQWY4RUJUQURBUUgvTUIwR0ExVWREZ1FXQkJSVEo2WWgwQ3lWVDRGNEhJUSszYWVhQzZzMUlUQVYKQmdOVkhSRUVEakFNZ2dwcmRXSmxjbTVsZEdWek1BMEdDU3FHU0liM0RRRUJDd1VBQTRJQkFRQ1pZM0xuUDl4Qgp1MjJaMENtazdiNUI2T1RtRS9obWlNRDNXY3kyb3RpcVhvZUE1VENRWnZxUk1PTk1NR3NCZFYza3FRRFhyaVR1CkQ4MDdaL3Q3SlAvOGo1RmRncDBCbkpoOUtlQkhaeVBybWFQNW9veFg4VWhFZHF0bWdsTUtBSk0xVmpKTExZNUwKMUcyRVNWa09NKytTSkV5MGJMbU9LM3M2YUI1L05pK3BVVS82Z1ZFNDFIZnh1SEJVYUtrRXNJR1d0WnNxbEY1cwp1RVAzZnY0ZmJRZVAxTmEvRlNaSmh4NlBybEdjZlE2Vmh6a1haY2Q1RExKMHZHbHZoTGdwREowdUVsUEd6NU5KCldFelVJZ3BGV25UMUd4TlhuNm02Sm9oMmNoWU5oQ25KOGZCS0Q4elozei9LdExCa2JwMDdMRlgwbzhXQUhEQmcKK1A4cjUwTm5IT3FHCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K", - CertData: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURLVENDQWhHZ0F3SUJBZ0lJWUlIcnhuOXYvOTR3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TlRFd016QXdNelEyTlROYUZ3MHlOakV3TXpBd016VXhOVE5hTUR3eApIekFkQmdOVkJBb1RGbXQxWW1WaFpHMDZZMngxYzNSbGNpMWhaRzFwYm5NeEdUQVhCZ05WQkFNVEVHdDFZbVZ5CmJtVjBaWE10WVdSdGFXNHdnZ0VpTUEwR0NTcUdTSWIzRFFFQkFRVUFBNElCRHdBd2dnRUtBb0lCQVFEd0NGWW0KY1JldG5xWjJBR21FUGJ2L1pRVzdrSzFKNHlBUmI2ODVlNEl5QjQ2OXdKOFVtd1crOXB2OWNsVm5YV3pnQkY3WQpnbkIyNi9DTWtqOVpnRkhOaWFPK3RXcXg3cHJKTkdDaHhiY29VMDZzQUIwR3MvUkVHK3VYMnFZa3RnVHpRNWFrCitGKzZrZElRek5VdnpwWFUzUFlHcDFEcGlzNWxZNFYzMkhnSkRaZkMrRzlpT1ROd1dtTzV3bGF1K1lsQkRGTVIKS2tnVFo1MDY5OXl5NWxnUlRoaTczSG1hUCtLWGdIT0QrNkNmeUZ6Ty80KzdLaExjanZpTGFUVjBjNGkzYkxidQo0K0llU2pwMEpxU2lxQlFtRHhHRitYMndCSkNiRVZObWJrd0hCVlh5eXlxdGJWV2dibEN6SWJ0UDBadHE3RUMwClo0WkNDemc5RFNqRGQwZWZBZ01CQUFHalZqQlVNQTRHQTFVZER3RUIvd1FFQXdJRm9EQVRCZ05WSFNVRUREQUsKQmdnckJnRUZCUWNEQWpBTUJnTlZIUk1CQWY4RUFqQUFNQjhHQTFVZEl3UVlNQmFBRkZNbnBpSFFMSlZQZ1hnYwpoRDdkcDVvTHF6VWhNQTBHQ1NxR1NJYjNEUUVCQ3dVQUE0SUJBUUFzTHJBMEhFOVNGNHAvSzBQejlVdFZLdk9rCjNUaEZ0ODZGTGlWNEJMcTZ5RSt1aHdHazk0b3p1Y3c1T2h1WEduTWFaUlFMYnliS3pJcjQvUUNqQVQ5eFVURWQKSFQ4c1c1UEhHMm5lbGJRckFNdVhRaFpXdlZTRmZ6Tk5GZG0rNStzdnVXajVtMklyNXNYRURlV2dBdmNLd3k2cwpVUjIxSmdtVXZHSFFtTVVZYWpnYW8wS3NjQmtNOEpZekFKdXZWdkJtTytwdzN5T2hVVmMyY0JnV0gybmx3L3RLCjZRR0Y0ZUZPRnJaYzM5UHp2NmlVOHFBYnNrQlVTVlhuaXg3dTNZUzFwTHNuZitSY0U0MmR1RzV4Nll3UFBlb28KRXBwWVluZ1R5TlpKKzVGaHVZdTUwMDJsQm1DV3JrSkxEek5NWlR3ai9DeG52ekVnSWJPWFpndnRpSXhpCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K", - KeyData: "LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcFFJQkFBS0NBUUVBOEFoV0puRVhyWjZtZGdCcGhEMjcvMlVGdTVDdFNlTWdFVyt2T1h1Q01nZU92Y0NmCkZKc0Z2dmFiL1hKVloxMXM0QVJlMklKd2R1dndqSkkvV1lCUnpZbWp2clZxc2U2YXlUUmdvY1czS0ZOT3JBQWQKQnJQMFJCdnJsOXFtSkxZRTgwT1dwUGhmdXBIU0VNelZMODZWMU56MkJxZFE2WXJPWldPRmQ5aDRDUTJYd3ZodgpZamt6Y0ZwanVjSldydm1KUVF4VEVTcElFMmVkT3ZmY3N1WllFVTRZdTl4NW1qL2lsNEJ6Zy91Z244aGN6ditQCnV5b1MzSTc0aTJrMWRIT0l0MnkyN3VQaUhrbzZkQ2Frb3FnVUpnOFJoZmw5c0FTUW14RlRabTVNQndWVjhzc3EKclcxVm9HNVFzeUc3VDlHYmF1eEF0R2VHUWdzNFBRMG93M2RIbndJREFRQUJBb0lCQUFxSWt4OUV2MEZEUVJMVQptY3pQMkx3d2RydndjV3BZcVVPYW54bnFyWi84Yk9zdTFNeFdzVDNjSEtSV3JDREpITW9INXhHaFI4WXdQSEl1CnlORG9ySzVVWi9jcWh2QWdCSExuOVlXajQ1SEZkaUplTHVmb1pjUEhaZU5ZR1FwclluUTZkeFh1UUdVem1RQmIKdk05SVJaTDl6MTRqWVkyZUpjaVZRWG9zNmJlYjUxYjgxNGljMTg1RHNtK2RhekRuNG14M2tNT0lueFR2K01pNQpxSWx5OU8vQURIaWpNd2taNVY5K3grSlpxM3Exc09SeTBKcUUwd1czbFcwQnFxSWRGRFRSelAvMFdiVGZZdDU3CmlRNjJySnhEN1RGNzR3Ni8xc3VqalU3Y2VsK1ltdTRvRFZjb05pOGdoTE1UZXE1OWpPMk1xR1FqMU5HUHRuTHkKb0hFOUs4RUNnWUVBOVRiQ3VEUlBtVDFmN0MwUldYUkJnejlENWhhRExkaS82aitjMGx5amR0TjkyR2JHdFNFMQozVVIvc2dsRit3bVliWmJmNExqUnpibnNZTGFleHRtakpzWXdFK0t4SSt3SEloSElPRFFaSTBaT08vMTJYdm1oCjB4dDdUNmNTVTZZSHZEbkp4WkpFaGt3TjBwL1ZoSHZMZFZMWmd3ZnNtQWlVekNTTVBmaUkySmtDZ1lFQStwYzcKTUJ0ZFNBZnd5cElMaUR6dis2WjFBQnVrWUphWnFQTk9IRGdLeElRNVJEQVZ5K3hSQXJWQ1V3RE5WdDJtTGJHUQpHZysvWXl4ZllEd2dSYTIxMUJDL0pUU3E4S1dHYVdXM0h2Z0VmMk54cVVIckNkT3VGZGhqdWkrMlRBdEdBb0w1CjluSGx3TXBZVVpydjF6dENCRmx4L1ZYd3NxUGZ6K2l5ZG1CVUxQY0NnWUVBcFM5Q2RMd29jdDQ1WSt2b0tBNTgKbzJGVzZBUjZVY1FWWkVOOTdPZWk1a1VLSFdEK3NyMndmMkhKYzdGemh1eXIxZ2N3d1QwL2VBcXJCV3VBQWd4UwpMNmlLY3ByZklZZTZObVVzTDFCSkxzNEpuYmZjcVpZWVFSSGVPNFljZm1UMkNRSVV2aGNPT2ptNWhnMU4xSFZnClZhUitDaHFvY3JJMUtsL2thVXFuUk9FQ2dZRUF5ZWx0RVhnYkUxMENrZFpYWUhEcFZUVnNkS2ZSTE5wcitZd0IKMWc3NTdobzBJbE0wWE5tTzlNV2tLVWt1S3QzeGRrUHFQbldOMnBUNFRJeGwzSDc1VVdRbEFBK041TlVhbG5ZVQp0T2xXaG1aVVFQTVNOUnJRM0YwOURkby80c242b1M5enhUVkUwTEM1dFJkSVJYNUQxVWxVNWJHSGZnazQzMGM1CjlOUHRQMFVDZ1lFQXk1L05hZXJlZDlQSDcyVzNDNW1UQy9jbEQxdUdmZXdPVkFkdko1eldlMDh4Q01CcEpya1QKU3dKM3NZOXYyaEdwSUxYZnU5YnppL0RWaW1sZk5MNkZBV2VaR3BCYm1qTHBEcUxWRzdhcUNHQVcvRG9iNmVlWApweEFiQTBLaUhoaE9sdUdONHdkbFdQRzNWdTlZNXZIb3RBNW1iZlRpaHhUYTlEZWRkZXlkNC9RPQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo=", - }, - }, +func loadBootstrapConfigFromEnv() (*BootstrapConfig, bool) { + if !hasBootstrapEnv() { + return nil, false } + + config := &BootstrapConfig{ + Enabled: true, + Users: make([]UserSeed, 0, 1), + Registries: make([]RegistrySeed, 0, 1), + Clusters: make([]ClusterSeed, 0), + } + + adminUser := strings.TrimSpace(os.Getenv("BOOTSTRAP_ADMIN_USER")) + adminPass := strings.TrimSpace(os.Getenv("BOOTSTRAP_ADMIN_PASS")) + if adminUser != "" && adminPass != "" { + config.Users = append(config.Users, UserSeed{ + Username: adminUser, + Password: adminPass, + Email: getEnv("BOOTSTRAP_ADMIN_EMAIL", adminUser+"@example.local"), + Role: "admin", + }) + } + + if registryURL := os.Getenv("BOOTSTRAP_REGISTRY_URL"); registryURL != "" { + registryUser := getEnv("BOOTSTRAP_REGISTRY_ROBOT_USER", getEnv("BOOTSTRAP_REGISTRY_USER", "")) + registryPass := getEnv("BOOTSTRAP_REGISTRY_ROBOT_PASS", getEnv("BOOTSTRAP_REGISTRY_PASS", "")) + config.Registries = append(config.Registries, RegistrySeed{ + Name: getEnv("BOOTSTRAP_REGISTRY_NAME", "harbor"), + URL: registryURL, + Description: getEnv("BOOTSTRAP_REGISTRY_DESC", ""), + Username: registryUser, + Password: registryPass, + Insecure: parseBoolEnv("BOOTSTRAP_REGISTRY_INSECURE", false), + }) + } + + if parseBoolEnv("BOOTSTRAP_ENABLE_CLUSTERS", false) { + for _, clusterName := range discoverBootstrapClusters() { + prefix := "BOOTSTRAP_CLUSTER_" + normalizeEnvName(clusterName) + "_" + host := os.Getenv(prefix + "HOST") + if host == "" { + continue + } + + config.Clusters = append(config.Clusters, ClusterSeed{ + Name: strings.ToLower(clusterName), + Host: host, + Description: os.Getenv(prefix + "DESC"), + CAData: os.Getenv(prefix + "CA"), + CertData: os.Getenv(prefix + "CERT"), + KeyData: os.Getenv(prefix + "KEY"), + Token: os.Getenv(prefix + "TOKEN"), + }) + } + } + + return config, true } +func hasBootstrapEnv() bool { + for _, env := range os.Environ() { + if strings.HasPrefix(env, "BOOTSTRAP_") { + return true + } + } + return false +} + +func discoverBootstrapClusters() []string { + names := make(map[string]struct{}) + + if configured := os.Getenv("BOOTSTRAP_CLUSTERS"); configured != "" { + for _, name := range strings.Split(configured, ",") { + name = strings.TrimSpace(name) + if name != "" { + names[normalizeEnvName(name)] = struct{}{} + } + } + } + + for _, env := range os.Environ() { + key, _, ok := strings.Cut(env, "=") + if !ok || !strings.HasPrefix(key, "BOOTSTRAP_CLUSTER_") || !strings.HasSuffix(key, "_HOST") { + continue + } + name := strings.TrimSuffix(strings.TrimPrefix(key, "BOOTSTRAP_CLUSTER_"), "_HOST") + if name != "" { + names[name] = struct{}{} + } + } + + result := make([]string, 0, len(names)) + for name := range names { + result = append(result, name) + } + sort.Strings(result) + return result +} + +func normalizeEnvName(name string) string { + replacer := strings.NewReplacer("-", "_", ".", "_", " ", "_") + return strings.ToUpper(replacer.Replace(strings.TrimSpace(name))) +} + +func parseBoolEnv(key string, defaultValue bool) bool { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return defaultValue + } + parsed, err := strconv.ParseBool(value) + if err != nil { + return defaultValue + } + return parsed +} + +func getEnv(key, defaultValue string) string { + if value := os.Getenv(key); value != "" { + return value + } + return defaultValue +} + +// GetDefaultBootstrapConfig 返回安全的空默认配置。 +// +// 这里不能包含真实或示例账号密码、Registry 或集群凭据。预注入数据必须来自 +// BOOTSTRAP_CONFIG_JSON、BOOTSTRAP_* 环境变量,或显式提供的 bootstrap 配置文件。 +func GetDefaultBootstrapConfig() *BootstrapConfig { + return &BootstrapConfig{ + Enabled: false, + Users: []UserSeed{}, + Registries: []RegistrySeed{}, + Clusters: []ClusterSeed{}, + } +} diff --git a/backend/internal/bootstrap/config_test.go b/backend/internal/bootstrap/config_test.go new file mode 100644 index 0000000..595a3fd --- /dev/null +++ b/backend/internal/bootstrap/config_test.go @@ -0,0 +1,103 @@ +package bootstrap + +import "testing" + +func TestDefaultBootstrapConfigIsEmptyAndDisabled(t *testing.T) { + config := GetDefaultBootstrapConfig() + if config.Enabled { + t.Fatal("default bootstrap config must be disabled") + } + if len(config.Users) != 0 || len(config.Registries) != 0 || len(config.Clusters) != 0 { + t.Fatalf("default bootstrap config must not include seeded data: %#v", config) + } +} + +func TestLoadBootstrapConfigFromEnv(t *testing.T) { + t.Setenv("BOOTSTRAP_ADMIN_USER", "root") + t.Setenv("BOOTSTRAP_ADMIN_PASS", "secret") + t.Setenv("BOOTSTRAP_ADMIN_EMAIL", "root@example.com") + t.Setenv("BOOTSTRAP_REGISTRY_NAME", "harbor") + t.Setenv("BOOTSTRAP_REGISTRY_URL", "https://harbor.example.com") + t.Setenv("BOOTSTRAP_REGISTRY_DESC", "test registry") + t.Setenv("BOOTSTRAP_REGISTRY_USER", "robot") + t.Setenv("BOOTSTRAP_REGISTRY_PASS", "robot-secret") + t.Setenv("BOOTSTRAP_REGISTRY_ROBOT_USER", "robot$ocdp") + t.Setenv("BOOTSTRAP_REGISTRY_ROBOT_PASS", "robot-token") + t.Setenv("BOOTSTRAP_REGISTRY_INSECURE", "true") + t.Setenv("BOOTSTRAP_ENABLE_CLUSTERS", "true") + t.Setenv("BOOTSTRAP_CLUSTERS", "cluster1,gpu-prod") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_HOST", "https://cluster1.example.com:6443") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_DESC", "cluster one") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_CA", "ca-data") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_CERT", "cert-data") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_KEY", "key-data") + t.Setenv("BOOTSTRAP_CLUSTER_GPU_PROD_HOST", "https://gpu.example.com:6443") + t.Setenv("BOOTSTRAP_CLUSTER_GPU_PROD_TOKEN", "bearer-token") + + config, ok := loadBootstrapConfigFromEnv() + if !ok { + t.Fatal("expected bootstrap config from environment") + } + + if len(config.Users) != 1 || config.Users[0].Username != "root" || config.Users[0].Password != "secret" { + t.Fatalf("unexpected users: %#v", config.Users) + } + + if len(config.Registries) != 1 { + t.Fatalf("expected one registry, got %d", len(config.Registries)) + } + registry := config.Registries[0] + if registry.Name != "harbor" || registry.URL != "https://harbor.example.com" || !registry.Insecure { + t.Fatalf("unexpected registry: %#v", registry) + } + if registry.Username != "robot$ocdp" || registry.Password != "robot-token" { + t.Fatalf("expected robot registry credentials, got %#v", registry) + } + + if len(config.Clusters) != 2 { + t.Fatalf("expected two clusters, got %d: %#v", len(config.Clusters), config.Clusters) + } + + clusterByName := map[string]ClusterSeed{} + for _, cluster := range config.Clusters { + clusterByName[cluster.Name] = cluster + } + + if clusterByName["cluster1"].Host != "https://cluster1.example.com:6443" { + t.Fatalf("unexpected cluster1: %#v", clusterByName["cluster1"]) + } + if clusterByName["gpu_prod"].Token != "bearer-token" { + t.Fatalf("unexpected gpu_prod: %#v", clusterByName["gpu_prod"]) + } +} + +func TestBootstrapClustersRequireExplicitEnable(t *testing.T) { + t.Setenv("BOOTSTRAP_ADMIN_USER", "root") + t.Setenv("BOOTSTRAP_ADMIN_PASS", "secret") + t.Setenv("BOOTSTRAP_CLUSTERS", "cluster1") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_HOST", "https://cluster1.example.com:6443") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_TOKEN", "token") + + config, ok := loadBootstrapConfigFromEnv() + if !ok { + t.Fatal("expected bootstrap config from environment") + } + if len(config.Clusters) != 0 { + t.Fatalf("bootstrap clusters must be disabled unless BOOTSTRAP_ENABLE_CLUSTERS=true, got %#v", config.Clusters) + } +} + +func TestBootstrapEnvDoesNotCreateDefaultAdmin(t *testing.T) { + t.Setenv("BOOTSTRAP_REGISTRY_URL", "https://harbor.example.com") + + config, ok := loadBootstrapConfigFromEnv() + if !ok { + t.Fatal("expected bootstrap config from environment") + } + if len(config.Users) != 0 { + t.Fatalf("expected no users without explicit admin credentials, got %#v", config.Users) + } + if len(config.Registries) != 1 { + t.Fatalf("expected one registry, got %d", len(config.Registries)) + } +} diff --git a/backend/internal/bootstrap/seeder.go b/backend/internal/bootstrap/seeder.go index 1047c14..de3896e 100644 --- a/backend/internal/bootstrap/seeder.go +++ b/backend/internal/bootstrap/seeder.go @@ -84,6 +84,12 @@ func (s *Seeder) seedUsers(ctx context.Context) error { // 创建用户 user := entity.NewUser(userSeed.Username, passwordHash, userSeed.Email) user.ID = uuid.New().String() + if userSeed.Role != "" { + user.Role = userSeed.Role + } + if user.Role == "admin" { + user.WorkspaceID = entity.DefaultWorkspaceID + } if err := s.repos.UserRepo.Create(ctx, user); err != nil { log.Printf(" ✗ Failed to create user '%s': %v", userSeed.Username, err) @@ -105,6 +111,7 @@ func (s *Seeder) seedRegistries(ctx context.Context) error { log.Printf(" ↳ Seeding %d registry(ies)...", len(s.config.Registries)) + ownerID := s.bootstrapOwnerID(ctx) for _, registrySeed := range s.config.Registries { // 检查 Registry 是否已存在 existingRegistry, _ := s.repos.RegistryRepo.GetByName(ctx, registrySeed.Name) @@ -117,6 +124,9 @@ func (s *Seeder) seedRegistries(ctx context.Context) error { registry := &entity.Registry{ ID: uuid.New().String(), Name: registrySeed.Name, + WorkspaceID: entity.DefaultWorkspaceID, + OwnerID: ownerID, + Visibility: "global_shared", URL: registrySeed.URL, Description: registrySeed.Description, Username: registrySeed.Username, @@ -146,6 +156,7 @@ func (s *Seeder) seedClusters(ctx context.Context) error { log.Printf(" ↳ Seeding %d cluster(s)...", len(s.config.Clusters)) + ownerID := s.bootstrapOwnerID(ctx) for _, clusterSeed := range s.config.Clusters { // 检查 Cluster 是否已存在 existingCluster, _ := s.repos.ClusterRepo.GetByName(ctx, clusterSeed.Name) @@ -158,6 +169,9 @@ func (s *Seeder) seedClusters(ctx context.Context) error { cluster := &entity.Cluster{ ID: uuid.New().String(), Name: clusterSeed.Name, + WorkspaceID: entity.DefaultWorkspaceID, + OwnerID: ownerID, + Visibility: "global_shared", Host: clusterSeed.Host, Description: clusterSeed.Description, CAData: clusterSeed.CAData, @@ -179,3 +193,22 @@ func (s *Seeder) seedClusters(ctx context.Context) error { return nil } +func (s *Seeder) bootstrapOwnerID(ctx context.Context) string { + for _, userSeed := range s.config.Users { + if userSeed.Role == "admin" { + if user, err := s.repos.UserRepo.GetByUsername(ctx, userSeed.Username); err == nil && user != nil { + return user.ID + } + } + } + users, err := s.repos.UserRepo.List(ctx) + if err != nil { + return "" + } + for _, user := range users { + if user.Role == "admin" { + return user.ID + } + } + return "" +} diff --git a/backend/internal/domain/entity/artifact.go b/backend/internal/domain/entity/artifact.go index f80b936..3b097c0 100644 --- a/backend/internal/domain/entity/artifact.go +++ b/backend/internal/domain/entity/artifact.go @@ -1,8 +1,8 @@ package entity import ( - "strings" - "time" + "strings" + "time" ) // ArtifactType Artifact 类型 @@ -16,16 +16,16 @@ const ( // Artifact OCI Artifact 领域实体 type Artifact struct { - RegistryID string - Repository string - Tag string - Digest string - Type ArtifactType - Size int64 - MediaType string - ConfigType string // Config layer 的 mediaType (用于更准确的类型判断) - Annotations map[string]string - CreatedAt time.Time + RegistryID string + Repository string + Tag string + Digest string + Type ArtifactType + Size int64 + MediaType string + ConfigType string // Config layer 的 mediaType (用于更准确的类型判断) + Annotations map[string]string + CreatedAt time.Time } // Repository 仓库信息 @@ -50,34 +50,34 @@ func NewArtifact(registryID, repository, tag, digest string) *Artifact { // SetType 设置 Artifact 类型(根据 mediaType 识别为 chart | image | other) // 已废弃:请使用 DetermineType() 方法,它提供更准确的类型判断 func (a *Artifact) SetType(mediaType string) { - lowerMediaType := strings.ToLower(strings.TrimSpace(mediaType)) + lowerMediaType := strings.ToLower(strings.TrimSpace(mediaType)) - containsAny := func(target string, keywords ...string) bool { - for _, keyword := range keywords { - if keyword != "" && strings.Contains(target, keyword) { - return true - } - } - return false - } + containsAny := func(target string, keywords ...string) bool { + for _, keyword := range keywords { + if keyword != "" && strings.Contains(target, keyword) { + return true + } + } + return false + } - switch { - case lowerMediaType == "": - a.Type = ArtifactTypeOther - case containsAny(lowerMediaType, - "helm", "cncf.helm", "helm.chart", "helm+", "chart+json", "chart.v1", "helm-package", "helm.config", - ): - a.Type = ArtifactTypeChart - case containsAny(lowerMediaType, - "docker", "vnd.docker", "docker.distribution", "docker.container.image", - "vnd.oci", "oci.image", "opencontainers", "container.image", - ): - a.Type = ArtifactTypeImage - case strings.Contains(lowerMediaType, "image") || strings.Contains(lowerMediaType, "manifest") || strings.Contains(lowerMediaType, "container"): - a.Type = ArtifactTypeImage - default: - a.Type = ArtifactTypeOther - } + switch { + case lowerMediaType == "": + a.Type = ArtifactTypeOther + case containsAny(lowerMediaType, + "helm", "cncf.helm", "helm.chart", "helm+", "chart+json", "chart.v1", "helm-package", "helm.config", + ): + a.Type = ArtifactTypeChart + case containsAny(lowerMediaType, + "docker", "vnd.docker", "docker.distribution", "docker.container.image", + "vnd.oci", "oci.image", "opencontainers", "container.image", + ): + a.Type = ArtifactTypeImage + case strings.Contains(lowerMediaType, "image") || strings.Contains(lowerMediaType, "manifest") || strings.Contains(lowerMediaType, "container"): + a.Type = ArtifactTypeImage + default: + a.Type = ArtifactTypeOther + } } // DetermineType 智能判断 Artifact 类型(综合多种信息) @@ -87,85 +87,84 @@ func (a *Artifact) SetType(mediaType string) { // 3. Repository 名称 - charts/ 前缀暗示 // 4. MediaType - 兜底判断 func (a *Artifact) DetermineType() { - containsAny := func(target string, keywords ...string) bool { - for _, keyword := range keywords { - if keyword != "" && strings.Contains(target, keyword) { - return true - } - } - return false - } - - // 1. 优先检查 ConfigType(最准确的判断方式) - if a.ConfigType != "" { - lowerConfigType := strings.ToLower(strings.TrimSpace(a.ConfigType)) - - // Helm Chart 的 config.mediaType - if containsAny(lowerConfigType, - "helm.config", "cncf.helm", "helm.chart", "chart.content", - ) { - a.Type = ArtifactTypeChart - return - } - - // Docker/OCI Image 的 config.mediaType - if containsAny(lowerConfigType, - "docker.container.image", "oci.image.config", - ) { - a.Type = ArtifactTypeImage - return - } - } - - // 2. 检查 Annotations - for key, value := range a.Annotations { - lowerKey := strings.ToLower(key) - lowerValue := strings.ToLower(value) - - if containsAny(lowerKey, "helm", "chart") || - containsAny(lowerValue, "helm", "chart") { - a.Type = ArtifactTypeChart - return - } - } - - // 3. 检查 Repository 名称(辅助判断) - if strings.HasPrefix(strings.ToLower(a.Repository), "charts/") { - // charts/ 开头的仓库很可能是 Helm Chart - // 但需要结合 MediaType 进一步确认 - lowerMediaType := strings.ToLower(strings.TrimSpace(a.MediaType)) - - // 如果是 OCI manifest 格式,很可能是以 OCI 格式存储的 Helm Chart - if strings.Contains(lowerMediaType, "oci.image.manifest") || - strings.Contains(lowerMediaType, "vnd.oci") { - a.Type = ArtifactTypeChart - return - } - } - - // 4. 回退到基于 MediaType 的判断(兜底逻辑) - lowerMediaType := strings.ToLower(strings.TrimSpace(a.MediaType)) - - switch { - case lowerMediaType == "": - a.Type = ArtifactTypeOther - case containsAny(lowerMediaType, - "helm", "cncf.helm", "helm.chart", "helm+", "chart+json", "chart.v1", "helm-package", "helm.config", - ): - a.Type = ArtifactTypeChart - case containsAny(lowerMediaType, - "docker", "vnd.docker", "docker.distribution", "docker.container.image", - ): - a.Type = ArtifactTypeImage - case strings.Contains(lowerMediaType, "image") || strings.Contains(lowerMediaType, "manifest"): - a.Type = ArtifactTypeImage - default: - a.Type = ArtifactTypeOther - } + containsAny := func(target string, keywords ...string) bool { + for _, keyword := range keywords { + if keyword != "" && strings.Contains(target, keyword) { + return true + } + } + return false + } + + // 1. 优先检查 ConfigType(最准确的判断方式) + if a.ConfigType != "" { + lowerConfigType := strings.ToLower(strings.TrimSpace(a.ConfigType)) + + // Helm Chart 的 config.mediaType + if containsAny(lowerConfigType, + "helm.config", "cncf.helm", "helm.chart", "chart.content", + ) { + a.Type = ArtifactTypeChart + return + } + + // Docker/OCI Image 的 config.mediaType + if containsAny(lowerConfigType, + "docker.container.image", "oci.image.config", + ) { + a.Type = ArtifactTypeImage + return + } + } + + // 2. 检查 Annotations + for key, value := range a.Annotations { + lowerKey := strings.ToLower(key) + lowerValue := strings.ToLower(value) + + if containsAny(lowerKey, "helm", "chart") || + containsAny(lowerValue, "helm", "chart") { + a.Type = ArtifactTypeChart + return + } + } + + // 3. 检查 Repository 名称(辅助判断) + if strings.HasPrefix(strings.ToLower(a.Repository), "charts/") { + // charts/ 开头的仓库很可能是 Helm Chart + // 但需要结合 MediaType 进一步确认 + lowerMediaType := strings.ToLower(strings.TrimSpace(a.MediaType)) + + // 如果是 OCI manifest 格式,很可能是以 OCI 格式存储的 Helm Chart + if strings.Contains(lowerMediaType, "oci.image.manifest") || + strings.Contains(lowerMediaType, "vnd.oci") { + a.Type = ArtifactTypeChart + return + } + } + + // 4. 回退到基于 MediaType 的判断(兜底逻辑) + lowerMediaType := strings.ToLower(strings.TrimSpace(a.MediaType)) + + switch { + case lowerMediaType == "": + a.Type = ArtifactTypeOther + case containsAny(lowerMediaType, + "helm", "cncf.helm", "helm.chart", "helm+", "chart+json", "chart.v1", "helm-package", "helm.config", + ): + a.Type = ArtifactTypeChart + case containsAny(lowerMediaType, + "docker", "vnd.docker", "docker.distribution", "docker.container.image", + ): + a.Type = ArtifactTypeImage + case strings.Contains(lowerMediaType, "image") || strings.Contains(lowerMediaType, "manifest"): + a.Type = ArtifactTypeImage + default: + a.Type = ArtifactTypeOther + } } // IsChart 判断是否为 Helm Chart func (a *Artifact) IsChart() bool { return a.Type == ArtifactTypeChart } - diff --git a/backend/internal/domain/entity/cluster.go b/backend/internal/domain/entity/cluster.go index 486031a..307b559 100644 --- a/backend/internal/domain/entity/cluster.go +++ b/backend/internal/domain/entity/cluster.go @@ -6,26 +6,31 @@ import ( // Cluster Kubernetes 集群领域实体 type Cluster struct { - ID string - Name string - Host string // Kubernetes API Server URL - CAData string // Base64 encoded CA certificate - CertData string // Base64 encoded client certificate - KeyData string // Base64 encoded client key - Token string // Bearer token (alternative to cert auth) - Description string - CreatedAt time.Time - UpdatedAt time.Time + ID string + WorkspaceID string + OwnerID string + Visibility string + Name string + Host string // Kubernetes API Server URL + CAData string // Base64 encoded CA certificate + CertData string // Base64 encoded client certificate + KeyData string // Base64 encoded client key + Token string // Bearer token (alternative to cert auth) + Description string + DefaultNamespace string + CreatedAt time.Time + UpdatedAt time.Time } // NewCluster 创建新集群 func NewCluster(name, host string) *Cluster { now := time.Now() return &Cluster{ - Name: name, - Host: host, - CreatedAt: now, - UpdatedAt: now, + Name: name, + Host: host, + Visibility: "private", + CreatedAt: now, + UpdatedAt: now, } } @@ -63,6 +68,9 @@ func (c *Cluster) Validate() error { if c.Host == "" { return ErrInvalidClusterHost } + if c.Visibility == "" { + c.Visibility = "private" + } // 必须有认证方式:证书或 Token if (c.CertData == "" || c.KeyData == "") && c.Token == "" { return ErrInvalidClusterAuth @@ -100,4 +108,3 @@ users: return kubeconfig } - diff --git a/backend/internal/domain/entity/errors.go b/backend/internal/domain/entity/errors.go index 91a65ad..71504c5 100644 --- a/backend/internal/domain/entity/errors.go +++ b/backend/internal/domain/entity/errors.go @@ -5,11 +5,15 @@ import "errors" // 领域错误定义 var ( // User errors - ErrInvalidUsername = errors.New("invalid username") - ErrInvalidPassword = errors.New("invalid password") - ErrUserNotFound = errors.New("user not found") - ErrUserExists = errors.New("user already exists") - ErrTokenRevoked = errors.New("token has been revoked") + ErrInvalidUsername = errors.New("invalid username") + ErrInvalidPassword = errors.New("invalid password") + ErrUserNotFound = errors.New("user not found") + ErrUserExists = errors.New("user already exists") + ErrTokenRevoked = errors.New("token has been revoked") + ErrUnauthorized = errors.New("authentication required") + ErrForbidden = errors.New("permission denied") + ErrUserInactive = errors.New("user is inactive") + ErrWorkspaceSuspended = errors.New("workspace is suspended") // Cluster errors ErrInvalidClusterName = errors.New("invalid cluster name") @@ -37,4 +41,8 @@ var ( ErrArtifactNotFound = errors.New("artifact not found") ErrRepositoryNotFound = errors.New("repository not found") ErrValuesSchemaNotFound = errors.New("values schema not found") + + // Workspace errors + ErrWorkspaceNotFound = errors.New("workspace not found") + ErrWorkspaceExists = errors.New("workspace already exists") ) diff --git a/backend/internal/domain/entity/instance.go b/backend/internal/domain/entity/instance.go index 1bc3c37..3e422d3 100644 --- a/backend/internal/domain/entity/instance.go +++ b/backend/internal/domain/entity/instance.go @@ -34,6 +34,8 @@ const ( // Instance Helm 应用实例领域实体 type Instance struct { ID string + WorkspaceID string + OwnerID string ClusterID string Name string // Helm Release Name Namespace string diff --git a/backend/internal/domain/entity/instance_diagnostics.go b/backend/internal/domain/entity/instance_diagnostics.go new file mode 100644 index 0000000..587186d --- /dev/null +++ b/backend/internal/domain/entity/instance_diagnostics.go @@ -0,0 +1,70 @@ +package entity + +import "time" + +type InstanceDiagnostics struct { + InstanceName string + Namespace string + Pods []InstancePodDiagnostics + Services []InstanceServiceDiagnostics + Events []InstanceEventDiagnostics + Logs []InstancePodLog + CollectedAt time.Time +} + +type InstancePodDiagnostics struct { + Name string + Namespace string + Phase string + NodeName string + PodIP string + HostIP string + RestartCount int32 + Containers []InstanceContainerDiagnostics + Conditions []InstanceConditionDiagnostics + CreationTimestamp time.Time +} + +type InstanceContainerDiagnostics struct { + Name string + Image string + Ready bool + RestartCount int32 + State string + Reason string + Message string +} + +type InstanceConditionDiagnostics struct { + Type string + Status string + Reason string + Message string +} + +type InstanceServiceDiagnostics struct { + Name string + Namespace string + Type string + ClusterIP string + Ports []InstanceEntryPort +} + +type InstanceEventDiagnostics struct { + Type string + Reason string + Message string + InvolvedKind string + InvolvedName string + Count int32 + FirstTimestamp time.Time + LastTimestamp time.Time +} + +type InstancePodLog struct { + Pod string + Container string + TailLines int64 + Log string + Error string +} diff --git a/backend/internal/domain/entity/metrics.go b/backend/internal/domain/entity/metrics.go index ad39546..756204f 100644 --- a/backend/internal/domain/entity/metrics.go +++ b/backend/internal/domain/entity/metrics.go @@ -4,70 +4,70 @@ import "time" // ClusterMetrics 集群监控指标 type ClusterMetrics struct { - ClusterID string `json:"cluster_id"` - ClusterName string `json:"cluster_name"` - Status string `json:"status"` // healthy, warning, error, unknown - Uptime string `json:"uptime"` - NodeCount int `json:"node_count"` - PodCount int `json:"pod_count"` - LastCheck time.Time `json:"last_check"` - + ClusterID string `json:"cluster_id"` + ClusterName string `json:"cluster_name"` + Status string `json:"status"` // healthy, warning, error, unknown + Uptime string `json:"uptime"` + NodeCount int `json:"node_count"` + PodCount int `json:"pod_count"` + LastCheck time.Time `json:"last_check"` + // 集群级别资源汇总 - TotalCPU string `json:"total_cpu"` // 如 "8 cores" - TotalMemory string `json:"total_memory"` // 如 "32 GB" - TotalGPU int `json:"total_gpu"` // GPU 总数 - - UsedCPU string `json:"used_cpu"` // 如 "4.5 cores" - UsedMemory string `json:"used_memory"` // 如 "16 GB" - UsedGPU int `json:"used_gpu"` // 使用的 GPU 数 - - CPUUsage float64 `json:"cpu_usage"` // 百分比 - MemoryUsage float64 `json:"memory_usage"` // 百分比 - GPUUsage float64 `json:"gpu_usage"` // 百分比 - + TotalCPU string `json:"total_cpu"` // 如 "8 cores" + TotalMemory string `json:"total_memory"` // 如 "32 GB" + TotalGPU int `json:"total_gpu"` // GPU 总数 + + UsedCPU string `json:"used_cpu"` // 如 "4.5 cores" + UsedMemory string `json:"used_memory"` // 如 "16 GB" + UsedGPU int `json:"used_gpu"` // 使用的 GPU 数 + + CPUUsage float64 `json:"cpu_usage"` // 百分比 + MemoryUsage float64 `json:"memory_usage"` // 百分比 + GPUUsage float64 `json:"gpu_usage"` // 百分比 + // 单机资源最大值 - MaxNodeCPU string `json:"max_node_cpu"` // 单机最大CPU容量,如 "8 cores" - MaxNodeMemory string `json:"max_node_memory"` // 单机最大内存容量,如 "32 GB" - MaxNodeGPU int `json:"max_node_gpu"` // 单机最大GPU数量 - MaxNodeCPUUsage float64 `json:"max_node_cpu_usage"` // 单机最高CPU使用率 - MaxNodeMemUsage float64 `json:"max_node_mem_usage"` // 单机最高内存使用率 - MaxNodeGPUUsage float64 `json:"max_node_gpu_usage"` // 单机最高GPU使用率 - + MaxNodeCPU string `json:"max_node_cpu"` // 单机最大CPU容量,如 "8 cores" + MaxNodeMemory string `json:"max_node_memory"` // 单机最大内存容量,如 "32 GB" + MaxNodeGPU int `json:"max_node_gpu"` // 单机最大GPU数量 + MaxNodeCPUUsage float64 `json:"max_node_cpu_usage"` // 单机最高CPU使用率 + MaxNodeMemUsage float64 `json:"max_node_mem_usage"` // 单机最高内存使用率 + MaxNodeGPUUsage float64 `json:"max_node_gpu_usage"` // 单机最高GPU使用率 + // 节点列表(简化信息) Nodes []NodeMetrics `json:"nodes,omitempty"` } // NodeMetrics 节点监控指标 type NodeMetrics struct { - NodeName string `json:"node_name"` - Status string `json:"status"` // Ready, NotReady - Role string `json:"role"` // control-plane, worker - Age string `json:"age"` - PodCount int `json:"pod_count"` - + NodeName string `json:"node_name"` + Status string `json:"status"` // Ready, NotReady + Role string `json:"role"` // control-plane, worker + Age string `json:"age"` + PodCount int `json:"pod_count"` + // CPU 资源 - CPUCapacity string `json:"cpu_capacity"` // 如 "4 cores" - CPUAllocatable string `json:"cpu_allocatable"` - CPUUsage string `json:"cpu_usage"` + CPUCapacity string `json:"cpu_capacity"` // 如 "4 cores" + CPUAllocatable string `json:"cpu_allocatable"` + CPUUsage string `json:"cpu_usage"` CPUPercent float64 `json:"cpu_percent"` - + // 内存资源 - MemoryCapacity string `json:"memory_capacity"` // 如 "16 GB" + MemoryCapacity string `json:"memory_capacity"` // 如 "16 GB" MemoryAllocatable string `json:"memory_allocatable"` MemoryUsage string `json:"memory_usage"` MemoryPercent float64 `json:"memory_percent"` - + // GPU 资源(如果有) GPUCapacity int `json:"gpu_capacity"` // GPU 总数 GPUUsage int `json:"gpu_usage"` // 已使用的 GPU GPUPercent float64 `json:"gpu_percent"` GPUType string `json:"gpu_type,omitempty"` // GPU 型号,如 "NVIDIA-Tesla-T4" - + // 其他信息 - OSImage string `json:"os_image,omitempty"` - KernelVersion string `json:"kernel_version,omitempty"` - ContainerRuntime string `json:"container_runtime,omitempty"` - KubeletVersion string `json:"kubelet_version,omitempty"` + OSImage string `json:"os_image,omitempty"` + KernelVersion string `json:"kernel_version,omitempty"` + ContainerRuntime string `json:"container_runtime,omitempty"` + KubeletVersion string `json:"kubelet_version,omitempty"` } // MonitoringSummary 监控汇总 @@ -80,4 +80,3 @@ type MonitoringSummary struct { TotalPods int `json:"total_pods"` LastUpdate time.Time `json:"last_update"` } - diff --git a/backend/internal/domain/entity/registry.go b/backend/internal/domain/entity/registry.go index 1b7ee56..5abbae4 100644 --- a/backend/internal/domain/entity/registry.go +++ b/backend/internal/domain/entity/registry.go @@ -7,6 +7,9 @@ import ( // Registry OCI Registry 领域实体 type Registry struct { ID string + WorkspaceID string + OwnerID string + Visibility string Name string URL string Description string @@ -21,10 +24,11 @@ type Registry struct { func NewRegistry(name, url string) *Registry { now := time.Now() return &Registry{ - Name: name, - URL: url, - CreatedAt: now, - UpdatedAt: now, + Name: name, + URL: url, + Visibility: "private", + CreatedAt: now, + UpdatedAt: now, } } @@ -55,6 +59,8 @@ func (r *Registry) Validate() error { if r.URL == "" { return ErrInvalidRegistryURL } + if r.Visibility == "" { + r.Visibility = "private" + } return nil } - diff --git a/backend/internal/domain/entity/tenant_binding.go b/backend/internal/domain/entity/tenant_binding.go new file mode 100644 index 0000000..11cd088 --- /dev/null +++ b/backend/internal/domain/entity/tenant_binding.go @@ -0,0 +1,123 @@ +package entity + +import ( + "errors" + "fmt" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/validation" +) + +const ( + DefaultTenantServiceAccountName = "tenant-admin" + DefaultTenantRoleBindingName = "tenant-admin" + DefaultTenantClusterRoleName = "admin" + DefaultTenantResourceQuotaName = "tenant-quota" + MaxTenantKubeconfigTTL = 2 * time.Hour +) + +var ( + ErrInvalidTenantNamespace = errors.New("invalid tenant namespace") + ErrInvalidTenantServiceAccount = errors.New("invalid tenant service account") + ErrInvalidTenantRoleBinding = errors.New("invalid tenant role binding") + ErrInvalidTenantClusterRole = errors.New("invalid tenant cluster role") + ErrInvalidTenantResourceQuota = errors.New("invalid tenant resource quota") + ErrInvalidTenantKubeconfigToken = errors.New("invalid tenant kubeconfig token") +) + +// TenantBinding describes the Kubernetes resources that grant a workspace access +// to one tenant namespace. It intentionally excludes credential material. +type TenantBinding struct { + Namespace string + ServiceAccountName string + RoleBindingName string + ClusterRoleName string + ResourceQuotaName string + Labels map[string]string + Annotations map[string]string + ResourceQuotaHard corev1.ResourceList +} + +// TenantKubeconfig contains a short-lived kubeconfig and its expiration time. +// Callers must treat Kubeconfig as secret material and must not persist or log it. +type TenantKubeconfig struct { + Kubeconfig string + ExpiresAt time.Time +} + +// NewTenantBinding returns a tenant binding with production-safe default object names. +func NewTenantBinding(namespace string) TenantBinding { + return TenantBinding{ + Namespace: namespace, + ServiceAccountName: DefaultTenantServiceAccountName, + RoleBindingName: DefaultTenantRoleBindingName, + ClusterRoleName: DefaultTenantClusterRoleName, + ResourceQuotaName: DefaultTenantResourceQuotaName, + Labels: map[string]string{ + "ocdp.io/managed-by": "ocdp", + "ocdp.io/tenant": namespace, + }, + } +} + +// WithDefaults fills optional names while preserving explicit caller choices. +func (b TenantBinding) WithDefaults() TenantBinding { + if b.ServiceAccountName == "" { + b.ServiceAccountName = DefaultTenantServiceAccountName + } + if b.RoleBindingName == "" { + b.RoleBindingName = DefaultTenantRoleBindingName + } + if b.ClusterRoleName == "" { + b.ClusterRoleName = DefaultTenantClusterRoleName + } + if b.ResourceQuotaName == "" { + b.ResourceQuotaName = DefaultTenantResourceQuotaName + } + if b.Labels == nil { + b.Labels = map[string]string{} + } + if b.Labels["ocdp.io/managed-by"] == "" { + b.Labels["ocdp.io/managed-by"] = "ocdp" + } + if b.Namespace != "" && b.Labels["ocdp.io/tenant"] == "" { + b.Labels["ocdp.io/tenant"] = b.Namespace + } + return b +} + +// Validate checks the object names required to provision a tenant namespace. +func (b TenantBinding) Validate() error { + b = b.WithDefaults() + if strings.TrimSpace(b.Namespace) == "" || len(validation.IsDNS1123Label(b.Namespace)) > 0 { + return ErrInvalidTenantNamespace + } + if strings.TrimSpace(b.ServiceAccountName) == "" || len(validation.IsDNS1123Subdomain(b.ServiceAccountName)) > 0 { + return ErrInvalidTenantServiceAccount + } + if strings.TrimSpace(b.RoleBindingName) == "" || len(validation.IsDNS1123Subdomain(b.RoleBindingName)) > 0 { + return ErrInvalidTenantRoleBinding + } + if strings.TrimSpace(b.ClusterRoleName) == "" || len(validation.IsDNS1123Subdomain(b.ClusterRoleName)) > 0 { + return ErrInvalidTenantClusterRole + } + if strings.TrimSpace(b.ResourceQuotaName) == "" || len(validation.IsDNS1123Subdomain(b.ResourceQuotaName)) > 0 { + return ErrInvalidTenantResourceQuota + } + return nil +} + +// TenantTokenTTL caps requested kubeconfig lifetimes at MaxTenantKubeconfigTTL. +func TenantTokenTTL(requested time.Duration) time.Duration { + if requested <= 0 || requested > MaxTenantKubeconfigTTL { + return MaxTenantKubeconfigTTL + } + return requested +} + +func (b TenantBinding) String() string { + b = b.WithDefaults() + return fmt.Sprintf("tenant namespace %q serviceAccount %q roleBinding %q", b.Namespace, b.ServiceAccountName, b.RoleBindingName) +} diff --git a/backend/internal/domain/entity/tenant_binding_test.go b/backend/internal/domain/entity/tenant_binding_test.go new file mode 100644 index 0000000..6fed41d --- /dev/null +++ b/backend/internal/domain/entity/tenant_binding_test.go @@ -0,0 +1,38 @@ +package entity + +import ( + "testing" + "time" +) + +func TestTenantTokenTTLCapsAtTwoHours(t *testing.T) { + testCases := []struct { + name string + requested time.Duration + want time.Duration + }{ + {name: "uses default for zero", requested: 0, want: MaxTenantKubeconfigTTL}, + {name: "keeps shorter ttl", requested: 30 * time.Minute, want: 30 * time.Minute}, + {name: "caps longer ttl", requested: 24 * time.Hour, want: MaxTenantKubeconfigTTL}, + } + + for _, tc := range testCases { + if got := TenantTokenTTL(tc.requested); got != tc.want { + t.Fatalf("%s: expected %s, got %s", tc.name, tc.want, got) + } + } +} + +func TestTenantBindingWithDefaults(t *testing.T) { + binding := NewTenantBinding("tenant-a").WithDefaults() + + if err := binding.Validate(); err != nil { + t.Fatalf("expected valid default binding: %v", err) + } + if binding.ServiceAccountName != DefaultTenantServiceAccountName { + t.Fatalf("expected default service account %q, got %q", DefaultTenantServiceAccountName, binding.ServiceAccountName) + } + if binding.Labels["ocdp.io/tenant"] != "tenant-a" { + t.Fatalf("expected tenant label, got %#v", binding.Labels) + } +} diff --git a/backend/internal/domain/entity/user.go b/backend/internal/domain/entity/user.go index bf6387f..372d4e3 100644 --- a/backend/internal/domain/entity/user.go +++ b/backend/internal/domain/entity/user.go @@ -6,13 +6,17 @@ import ( // User 用户领域实体 type User struct { - ID string - Username string - PasswordHash string - Email string - RevokedAfter time.Time // 全局 Token 撤销时间 - CreatedAt time.Time - UpdatedAt time.Time + ID string + Username string + PasswordHash string + Email string + Role string + WorkspaceID string + IsActive bool + MustChangePassword bool + RevokedAfter time.Time // 全局 Token 撤销时间 + CreatedAt time.Time + UpdatedAt time.Time } // NewUser 创建新用户 @@ -22,6 +26,9 @@ func NewUser(username, passwordHash, email string) *User { Username: username, PasswordHash: passwordHash, Email: email, + Role: "user", + WorkspaceID: DefaultWorkspaceID, + IsActive: true, RevokedAfter: time.Unix(0, 0), // 初始值:1970-01-01 CreatedAt: now, UpdatedAt: now, @@ -49,6 +56,11 @@ func (u *User) Validate() error { if u.PasswordHash == "" { return ErrInvalidPassword } + if u.Role == "" { + u.Role = "user" + } + if u.WorkspaceID == "" && u.Role != "admin" { + u.WorkspaceID = DefaultWorkspaceID + } return nil } - diff --git a/backend/internal/domain/entity/workspace.go b/backend/internal/domain/entity/workspace.go new file mode 100644 index 0000000..5dea0eb --- /dev/null +++ b/backend/internal/domain/entity/workspace.go @@ -0,0 +1,150 @@ +package entity + +import ( + "strings" + "time" +) + +const ( + DefaultWorkspaceID = "00000000-0000-0000-0000-000000000010" + DefaultWorkspaceName = "default" +) + +type WorkspaceStatus string + +const ( + WorkspaceActive WorkspaceStatus = "active" + WorkspaceSuspended WorkspaceStatus = "suspended" +) + +type Workspace struct { + ID string + Name string + Status WorkspaceStatus + K8sNamespace string + K8sSAName string + DefaultClusterID string + QuotaCPU string + QuotaMemory string + QuotaGPU string + QuotaGPUMem string + CreatedBy string + CreatedAt time.Time + UpdatedAt time.Time +} + +func NewWorkspace(name, createdBy string) *Workspace { + now := time.Now() + return &Workspace{ + Name: name, + Status: WorkspaceActive, + K8sNamespace: NamespaceForWorkspace(name), + K8sSAName: ServiceAccountForWorkspace(name), + CreatedBy: createdBy, + CreatedAt: now, + UpdatedAt: now, + } +} + +func NamespaceForWorkspace(name string) string { + if name == "" { + name = DefaultWorkspaceName + } + return prefixedDNSLabel("ocdp-ws-", name) +} + +func NamespaceForUser(username string) string { + if username == "" { + username = "user" + } + return prefixedDNSLabel("ocdp-u-", username) +} + +func ServiceAccountForWorkspace(name string) string { + if name == "" { + name = DefaultWorkspaceName + } + return prefixedDNSLabel("ocdp-ws-", name) +} + +func ServiceAccountForNamespace(namespace string) string { + if namespace == "" { + namespace = DefaultWorkspaceName + } + return prefixedDNSLabel("ocdp-sa-", namespace) +} + +func prefixedDNSLabel(prefix, value string) string { + label := normalizeDNSLabel(value) + maxLabelLen := 63 - len(prefix) + if maxLabelLen < 1 { + maxLabelLen = 1 + } + if len(label) > maxLabelLen { + label = strings.Trim(label[:maxLabelLen], "-") + } + if label == "" { + label = DefaultWorkspaceName + if len(label) > maxLabelLen { + label = label[:maxLabelLen] + } + } + return prefix + label +} + +func normalizeDNSLabel(value string) string { + out := make([]rune, 0, len(value)) + lastDash := false + for _, r := range value { + valid := (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') + if r >= 'A' && r <= 'Z' { + r = r + ('a' - 'A') + valid = true + } + if valid { + out = append(out, r) + lastDash = false + continue + } + if !lastDash && len(out) > 0 { + out = append(out, '-') + lastDash = true + } + } + for len(out) > 0 && out[len(out)-1] == '-' { + out = out[:len(out)-1] + } + if len(out) == 0 { + return DefaultWorkspaceName + } + return string(out) +} + +type WorkspaceClusterBinding struct { + ID string + WorkspaceID string + ClusterID string + Namespace string + ServiceAccount string + QuotaCPU string + QuotaMemory string + QuotaGPU string + QuotaGPUMem string + Status string + CreatedAt time.Time + UpdatedAt time.Time +} + +type AuditLog struct { + ID string + WorkspaceID string + UserID string + Action string + ResourceType string + ResourceID string + ResourceName string + Details map[string]interface{} + IPAddress string + UserAgent string + CreatedAt time.Time +} diff --git a/backend/internal/domain/repository/cluster_repository.go b/backend/internal/domain/repository/cluster_repository.go index b598025..18ea1ea 100644 --- a/backend/internal/domain/repository/cluster_repository.go +++ b/backend/internal/domain/repository/cluster_repository.go @@ -9,20 +9,19 @@ import ( type ClusterRepository interface { // Create 创建集群 Create(ctx context.Context, cluster *entity.Cluster) error - + // GetByID 根据 ID 获取集群 GetByID(ctx context.Context, id string) (*entity.Cluster, error) - + // GetByName 根据名称获取集群 GetByName(ctx context.Context, name string) (*entity.Cluster, error) - + // Update 更新集群 Update(ctx context.Context, cluster *entity.Cluster) error - + // Delete 删除集群 Delete(ctx context.Context, id string) error - + // List 列出所有集群 List(ctx context.Context) ([]*entity.Cluster, error) } - diff --git a/backend/internal/domain/repository/helm_client.go b/backend/internal/domain/repository/helm_client.go index 325aef4..7262522 100644 --- a/backend/internal/domain/repository/helm_client.go +++ b/backend/internal/domain/repository/helm_client.go @@ -9,26 +9,25 @@ import ( type HelmClient interface { // Install 安装 Helm Chart Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error - + // Upgrade 升级 Helm Release Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error - + // Uninstall 卸载 Helm Release Uninstall(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) error - + // Rollback 回滚 Helm Release Rollback(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string, revision int) error - + // GetStatus 获取 Release 状态 GetStatus(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (*entity.Instance, error) - + // GetHistory 获取 Release 历史 GetHistory(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) ([]*entity.ReleaseHistory, error) - + // List 列出集群中的所有 Releases List(ctx context.Context, cluster *entity.Cluster, namespace string) ([]*entity.Instance, error) - + // GetValues 获取 Release 的 values GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) } - diff --git a/backend/internal/domain/repository/instance_diagnostics_client.go b/backend/internal/domain/repository/instance_diagnostics_client.go new file mode 100644 index 0000000..f04b98a --- /dev/null +++ b/backend/internal/domain/repository/instance_diagnostics_client.go @@ -0,0 +1,11 @@ +package repository + +import ( + "context" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +type InstanceDiagnosticsClient interface { + GetDiagnostics(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance, tailLines int64) (*entity.InstanceDiagnostics, error) +} diff --git a/backend/internal/domain/repository/instance_repository.go b/backend/internal/domain/repository/instance_repository.go index 9cd955b..1347db1 100644 --- a/backend/internal/domain/repository/instance_repository.go +++ b/backend/internal/domain/repository/instance_repository.go @@ -9,23 +9,22 @@ import ( type InstanceRepository interface { // Create 创建实例 Create(ctx context.Context, instance *entity.Instance) error - + // GetByID 根据 ID 获取实例 GetByID(ctx context.Context, id string) (*entity.Instance, error) - + // GetByClusterAndName 根据集群 ID 和名称获取实例 GetByClusterAndName(ctx context.Context, clusterID, name string) (*entity.Instance, error) - + // Update 更新实例 Update(ctx context.Context, instance *entity.Instance) error - + // Delete 删除实例 Delete(ctx context.Context, id string) error - + // ListByCluster 列出指定集群的所有实例 ListByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) - + // List 列出所有实例 List(ctx context.Context) ([]*entity.Instance, error) } - diff --git a/backend/internal/domain/repository/metrics_client.go b/backend/internal/domain/repository/metrics_client.go index 498e0d3..d01ca11 100644 --- a/backend/internal/domain/repository/metrics_client.go +++ b/backend/internal/domain/repository/metrics_client.go @@ -10,8 +10,7 @@ import ( type MetricsClient interface { // GetClusterMetrics 获取集群的监控指标 GetClusterMetrics(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) - + // GetNodeMetrics 获取集群的节点指标 GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) } - diff --git a/backend/internal/domain/repository/oci_client.go b/backend/internal/domain/repository/oci_client.go index 1cd6c9a..b322be9 100644 --- a/backend/internal/domain/repository/oci_client.go +++ b/backend/internal/domain/repository/oci_client.go @@ -7,26 +7,29 @@ import ( // OCIClient OCI Registry 客户端接口(Output Port) type OCIClient interface { - // ListRepositories 列出 Registry 中的所有 repositories - ListRepositories(ctx context.Context, registry *entity.Registry) ([]string, error) - + // ListRepositories 列出 Registry 中的 repositories. + // artifactType 支持 "chart" 和 "all",默认由调用方决定。 + ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) + // ListArtifacts 列出指定 repository 的所有 artifacts // mediaTypeFilter: "all", "image", "chart", "other" - 使用模糊匹配过滤 ListArtifacts(ctx context.Context, registry *entity.Registry, repository, mediaTypeFilter string) ([]*entity.Artifact, error) - + // GetArtifact 获取指定 artifact 的详细信息 GetArtifact(ctx context.Context, registry *entity.Registry, repository, reference string) (*entity.Artifact, error) - + // GetValuesSchema 获取 Helm Chart 的 values schema GetValuesSchema(ctx context.Context, registry *entity.Registry, repository, reference string) (string, error) - + + // GetValuesYAML 获取 Helm Chart 原始 values.yaml + GetValuesYAML(ctx context.Context, registry *entity.Registry, repository, reference string) (string, error) + // PullArtifact 下载 artifact 到本地 PullArtifact(ctx context.Context, registry *entity.Registry, repository, reference, destPath string) error - + // PushArtifact 推送 artifact 到 Registry PushArtifact(ctx context.Context, registry *entity.Registry, repository, tag, sourcePath string) error - + // CheckHealth 检查 Registry 健康状态 CheckHealth(ctx context.Context, registry *entity.Registry) error } - diff --git a/backend/internal/domain/repository/registry_repository.go b/backend/internal/domain/repository/registry_repository.go index 7c7d2e5..fa32920 100644 --- a/backend/internal/domain/repository/registry_repository.go +++ b/backend/internal/domain/repository/registry_repository.go @@ -9,20 +9,19 @@ import ( type RegistryRepository interface { // Create 创建 Registry Create(ctx context.Context, registry *entity.Registry) error - + // GetByID 根据 ID 获取 Registry GetByID(ctx context.Context, id string) (*entity.Registry, error) - + // GetByName 根据名称获取 Registry GetByName(ctx context.Context, name string) (*entity.Registry, error) - + // Update 更新 Registry Update(ctx context.Context, registry *entity.Registry) error - + // Delete 删除 Registry Delete(ctx context.Context, id string) error - + // List 列出所有 Registries List(ctx context.Context) ([]*entity.Registry, error) } - diff --git a/backend/internal/domain/repository/tenant_kube_client.go b/backend/internal/domain/repository/tenant_kube_client.go new file mode 100644 index 0000000..bfbd100 --- /dev/null +++ b/backend/internal/domain/repository/tenant_kube_client.go @@ -0,0 +1,15 @@ +package repository + +import ( + "context" + "time" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +// TenantKubeClient provisions namespace-scoped Kubernetes access for tenants. +type TenantKubeClient interface { + EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error + IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) + SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error +} diff --git a/backend/internal/domain/repository/user_repository.go b/backend/internal/domain/repository/user_repository.go index eff3479..e787ba4 100644 --- a/backend/internal/domain/repository/user_repository.go +++ b/backend/internal/domain/repository/user_repository.go @@ -9,20 +9,19 @@ import ( type UserRepository interface { // Create 创建用户 Create(ctx context.Context, user *entity.User) error - + // GetByID 根据 ID 获取用户 GetByID(ctx context.Context, id string) (*entity.User, error) - + // GetByUsername 根据用户名获取用户 GetByUsername(ctx context.Context, username string) (*entity.User, error) - + // Update 更新用户 Update(ctx context.Context, user *entity.User) error - + // Delete 删除用户 Delete(ctx context.Context, id string) error - + // List 列出所有用户 List(ctx context.Context) ([]*entity.User, error) } - diff --git a/backend/internal/domain/repository/workspace_repository.go b/backend/internal/domain/repository/workspace_repository.go new file mode 100644 index 0000000..eb75079 --- /dev/null +++ b/backend/internal/domain/repository/workspace_repository.go @@ -0,0 +1,26 @@ +package repository + +import ( + "context" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +type WorkspaceRepository interface { + Create(ctx context.Context, workspace *entity.Workspace) error + GetByID(ctx context.Context, id string) (*entity.Workspace, error) + GetByName(ctx context.Context, name string) (*entity.Workspace, error) + Update(ctx context.Context, workspace *entity.Workspace) error + List(ctx context.Context) ([]*entity.Workspace, error) +} + +type WorkspaceClusterBindingRepository interface { + Upsert(ctx context.Context, binding *entity.WorkspaceClusterBinding) error + Get(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error) + Delete(ctx context.Context, workspaceID, clusterID string) error +} + +type AuditLogRepository interface { + Create(ctx context.Context, log *entity.AuditLog) error + ListByWorkspace(ctx context.Context, workspaceID string, limit int) ([]*entity.AuditLog, error) +} diff --git a/backend/internal/domain/service/artifact_service.go b/backend/internal/domain/service/artifact_service.go index deb1363..7938cf8 100644 --- a/backend/internal/domain/service/artifact_service.go +++ b/backend/internal/domain/service/artifact_service.go @@ -4,6 +4,7 @@ import ( "context" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // ArtifactService Artifact 浏览领域服务 @@ -25,22 +26,22 @@ func NewArtifactService( // GetRegistry 获取 Registry 信息 func (s *ArtifactService) GetRegistry(ctx context.Context, registryID string) (*entity.Registry, error) { - return s.registryRepo.GetByID(ctx, registryID) + return s.visibleRegistry(ctx, registryID) } -// ListRepositories 列出 Registry 中的所有 repositories -func (s *ArtifactService) ListRepositories(ctx context.Context, registryID string) ([]string, error) { - registry, err := s.registryRepo.GetByID(ctx, registryID) +// ListRepositories 列出 Registry 中的 repositories +func (s *ArtifactService) ListRepositories(ctx context.Context, registryID, artifactType string) ([]string, error) { + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return nil, entity.ErrRegistryNotFound } - return s.ociClient.ListRepositories(ctx, registry) + return s.ociClient.ListRepositories(ctx, registry, artifactType) } // ListArtifacts 列出 repository 中的所有 artifacts func (s *ArtifactService) ListArtifacts(ctx context.Context, registryID, repository, mediaTypeFilter string) ([]*entity.Artifact, error) { - registry, err := s.registryRepo.GetByID(ctx, registryID) + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return nil, entity.ErrRegistryNotFound } @@ -50,7 +51,7 @@ func (s *ArtifactService) ListArtifacts(ctx context.Context, registryID, reposit // GetArtifact 获取 artifact 详情 func (s *ArtifactService) GetArtifact(ctx context.Context, registryID, repository, reference string) (*entity.Artifact, error) { - registry, err := s.registryRepo.GetByID(ctx, registryID) + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return nil, entity.ErrRegistryNotFound } @@ -60,7 +61,7 @@ func (s *ArtifactService) GetArtifact(ctx context.Context, registryID, repositor // GetValuesSchema 获取 Helm Chart 的 values schema func (s *ArtifactService) GetValuesSchema(ctx context.Context, registryID, repository, reference string) (string, error) { - registry, err := s.registryRepo.GetByID(ctx, registryID) + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return "", entity.ErrRegistryNotFound } @@ -68,9 +69,19 @@ func (s *ArtifactService) GetValuesSchema(ctx context.Context, registryID, repos return s.ociClient.GetValuesSchema(ctx, registry, repository, reference) } +// GetValuesYAML 获取 Helm Chart 的原始 values.yaml +func (s *ArtifactService) GetValuesYAML(ctx context.Context, registryID, repository, reference string) (string, error) { + registry, err := s.visibleRegistry(ctx, registryID) + if err != nil { + return "", entity.ErrRegistryNotFound + } + + return s.ociClient.GetValuesYAML(ctx, registry, repository, reference) +} + // PullArtifact 下载 artifact func (s *ArtifactService) PullArtifact(ctx context.Context, registryID, repository, reference, destPath string) error { - registry, err := s.registryRepo.GetByID(ctx, registryID) + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return entity.ErrRegistryNotFound } @@ -78,3 +89,17 @@ func (s *ArtifactService) PullArtifact(ctx context.Context, registryID, reposito return s.ociClient.PullArtifact(ctx, registry, repository, reference, destPath) } +func (s *ArtifactService) visibleRegistry(ctx context.Context, registryID string) (*entity.Registry, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + registry, err := s.registryRepo.GetByID(ctx, registryID) + if err != nil { + return nil, entity.ErrRegistryNotFound + } + if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + return nil, entity.ErrRegistryNotFound + } + return registry, nil +} diff --git a/backend/internal/domain/service/auth_service.go b/backend/internal/domain/service/auth_service.go index fd7340a..240308d 100644 --- a/backend/internal/domain/service/auth_service.go +++ b/backend/internal/domain/service/auth_service.go @@ -2,14 +2,22 @@ package service import ( "context" + "strings" + "time" + "github.com/google/uuid" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" + jwtpkg "github.com/ocdp/cluster-service/internal/pkg/jwt" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/validation" ) // AuthService 认证领域服务 type AuthService struct { userRepo repository.UserRepository + workspaceRepo repository.WorkspaceRepository passwordHasher PasswordHasher tokenGenerator TokenGenerator } @@ -22,27 +30,48 @@ type PasswordHasher interface { // TokenGenerator Token 生成器接口 type TokenGenerator interface { - Generate(userID, username string) (accessToken, refreshToken string, err error) + Generate(userID, username, role, workspaceID string) (accessToken, refreshToken string, err error) Verify(token string) (userID, username string, err error) VerifyWithIssuedAt(token string) (userID, username string, issuedAt int64, err error) + VerifyAccess(token string) (*jwtpkg.Claims, error) + VerifyRefresh(token string) (*jwtpkg.Claims, error) Refresh(refreshToken string) (newAccessToken string, err error) } // NewAuthService 创建认证服务 func NewAuthService( userRepo repository.UserRepository, + workspaceRepo repository.WorkspaceRepository, passwordHasher PasswordHasher, tokenGenerator TokenGenerator, ) *AuthService { return &AuthService{ userRepo: userRepo, + workspaceRepo: workspaceRepo, passwordHasher: passwordHasher, tokenGenerator: tokenGenerator, } } -// Register 注册新用户(仅需用户名和密码,邮箱将自动补全) -func (s *AuthService) Register(ctx context.Context, username, password string) (*entity.User, error) { +// Register 注册新用户。业务入口只允许 admin 调用;初始 admin 由 bootstrap seeder 创建。 +type UserWorkspaceOptions struct { + Namespace string + DefaultClusterID string + QuotaCPU string + QuotaMemory string + QuotaGPU string + QuotaGPUMem string +} + +func (s *AuthService) Register(ctx context.Context, username, password, role, workspaceID string, opts UserWorkspaceOptions, isActive, mustChangePassword *bool) (*entity.User, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return nil, entity.ErrForbidden + } + // 检查用户是否已存在 existingUser, _ := s.userRepo.GetByUsername(ctx, username) if existingUser != nil { @@ -54,6 +83,10 @@ func (s *AuthService) Register(ctx context.Context, username, password string) ( if err != nil { return nil, err } + normalizedOpts, err := normalizeQuotaOptions(opts) + if err != nil { + return nil, err + } // 默认生成占位邮箱,避免数据库约束失败 email := username + "@local.ocdp" @@ -61,6 +94,27 @@ func (s *AuthService) Register(ctx context.Context, username, password string) ( // 创建用户 user := entity.NewUser(username, passwordHash, email) user.ID = uuid.New().String() + user.Role = normalizeUserRole(role) + user.WorkspaceID = workspaceID + if user.Role == authz.RoleUser && (user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID) { + workspace, err := s.createUserWorkspace(ctx, username, principal.UserID, normalizedOpts) + if err != nil { + return nil, err + } + user.WorkspaceID = workspace.ID + } + if user.WorkspaceID == "" { + user.WorkspaceID = entity.DefaultWorkspaceID + } + if user.Role == authz.RoleAdmin { + user.WorkspaceID = entity.DefaultWorkspaceID + } + if isActive != nil { + user.IsActive = *isActive + } + if mustChangePassword != nil { + user.MustChangePassword = *mustChangePassword + } if err := user.Validate(); err != nil { return nil, err @@ -73,31 +127,241 @@ func (s *AuthService) Register(ctx context.Context, username, password string) ( return user, nil } -// Login 用户登录 -func (s *AuthService) Login(ctx context.Context, username, password string) (accessToken, refreshToken string, err error) { - // 查找用户 - user, err := s.userRepo.GetByUsername(ctx, username) +func (s *AuthService) createUserWorkspace(ctx context.Context, username, createdBy string, opts UserWorkspaceOptions) (*entity.Workspace, error) { + if s.workspaceRepo == nil { + return nil, entity.ErrWorkspaceNotFound + } + name := strings.TrimPrefix(entity.NamespaceForUser(username), "ocdp-u-") + workspace := entity.NewWorkspace(name, createdBy) + workspace.ID = uuid.New().String() + workspace.DefaultClusterID = strings.TrimSpace(opts.DefaultClusterID) + namespace := strings.TrimSpace(opts.Namespace) + if namespace == "" { + namespace = entity.NamespaceForUser(username) + } + if namespace != "" { + if len(validation.IsDNS1123Label(namespace)) > 0 { + return nil, entity.ErrInvalidNamespace + } + workspace.K8sNamespace = namespace + workspace.K8sSAName = entity.ServiceAccountForNamespace(namespace) + } + workspace.QuotaCPU = strings.TrimSpace(opts.QuotaCPU) + workspace.QuotaMemory = strings.TrimSpace(opts.QuotaMemory) + workspace.QuotaGPU = strings.TrimSpace(opts.QuotaGPU) + workspace.QuotaGPUMem = strings.TrimSpace(opts.QuotaGPUMem) + if err := s.workspaceRepo.Create(ctx, workspace); err != nil { + return nil, err + } + return workspace, nil +} + +func normalizeQuotaOptions(opts UserWorkspaceOptions) (UserWorkspaceOptions, error) { + opts.Namespace = strings.TrimSpace(opts.Namespace) + opts.DefaultClusterID = strings.TrimSpace(opts.DefaultClusterID) + opts.QuotaCPU = normalizeStandardQuotaQuantity(opts.QuotaCPU) + opts.QuotaMemory = normalizeStandardQuotaQuantity(opts.QuotaMemory) + opts.QuotaGPU = normalizeStandardQuotaQuantity(opts.QuotaGPU) + gpuMem, err := normalizeGPUMemoryQuota(opts.QuotaGPUMem) if err != nil { - return "", "", entity.ErrUserNotFound + return opts, err + } + opts.QuotaGPUMem = gpuMem + for _, value := range []string{opts.QuotaCPU, opts.QuotaMemory, opts.QuotaGPU} { + if value == "" { + continue + } + if _, err := resource.ParseQuantity(value); err != nil { + return opts, entity.ErrInvalidTenantResourceQuota + } + } + if opts.Namespace != "" && len(validation.IsDNS1123Label(opts.Namespace)) > 0 { + return opts, entity.ErrInvalidNamespace + } + return opts, nil +} + +func (s *AuthService) ListUsers(ctx context.Context) ([]*entity.User, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return nil, entity.ErrForbidden + } + return s.userRepo.List(ctx) +} + +func (s *AuthService) UpdateUser(ctx context.Context, userID, role, workspaceID string, opts UserWorkspaceOptions, isActive, mustChangePassword *bool) (*entity.User, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return nil, entity.ErrForbidden + } + user, err := s.userRepo.GetByID(ctx, userID) + if err != nil { + return nil, entity.ErrUserNotFound + } + if role != "" { + user.Role = normalizeUserRole(role) + } + if workspaceID != "" { + user.WorkspaceID = workspaceID + } + if user.Role == authz.RoleAdmin { + user.WorkspaceID = entity.DefaultWorkspaceID + } + if user.Role == authz.RoleUser && (user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID) { + normalizedOpts, err := normalizeQuotaOptions(opts) + if err != nil { + return nil, err + } + workspace, err := s.createUserWorkspace(ctx, user.Username, principal.UserID, normalizedOpts) + if err != nil { + return nil, err + } + user.WorkspaceID = workspace.ID + } + if isActive != nil { + if user.ID == principal.UserID && !*isActive { + return nil, entity.ErrForbidden + } + user.IsActive = *isActive + } + if mustChangePassword != nil { + user.MustChangePassword = *mustChangePassword + } + if user.Role != authz.RoleAdmin && hasWorkspaceUpdates(opts) { + normalizedOpts, err := normalizeQuotaOptions(opts) + if err != nil { + return nil, err + } + workspace, err := s.workspaceRepo.GetByID(ctx, user.WorkspaceID) + if err != nil { + return nil, err + } + applyWorkspaceOptions(workspace, normalizedOpts) + if err := s.workspaceRepo.Update(ctx, workspace); err != nil { + return nil, err + } + } + user.RevokedAfter = time.Now() + user.UpdatedAt = time.Now() + if err := user.Validate(); err != nil { + return nil, err + } + if err := s.userRepo.Update(ctx, user); err != nil { + return nil, err + } + return user, nil +} + +func hasWorkspaceUpdates(opts UserWorkspaceOptions) bool { + return strings.TrimSpace(opts.Namespace) != "" || + strings.TrimSpace(opts.DefaultClusterID) != "" || + strings.TrimSpace(opts.QuotaCPU) != "" || + strings.TrimSpace(opts.QuotaMemory) != "" || + strings.TrimSpace(opts.QuotaGPU) != "" || + strings.TrimSpace(opts.QuotaGPUMem) != "" +} + +func applyWorkspaceOptions(workspace *entity.Workspace, opts UserWorkspaceOptions) { + if namespace := strings.TrimSpace(opts.Namespace); namespace != "" { + workspace.K8sNamespace = namespace + workspace.K8sSAName = entity.ServiceAccountForNamespace(namespace) + } + if value := strings.TrimSpace(opts.DefaultClusterID); value != "" { + workspace.DefaultClusterID = value + } + if value := strings.TrimSpace(opts.QuotaCPU); value != "" { + workspace.QuotaCPU = value + } + if value := strings.TrimSpace(opts.QuotaMemory); value != "" { + workspace.QuotaMemory = value + } + if value := strings.TrimSpace(opts.QuotaGPU); value != "" { + workspace.QuotaGPU = value + } + if value := strings.TrimSpace(opts.QuotaGPUMem); value != "" { + workspace.QuotaGPUMem = value + } +} + +func (s *AuthService) DeleteUser(ctx context.Context, userID string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return entity.ErrForbidden + } + if userID == principal.UserID { + return entity.ErrForbidden + } + return s.userRepo.Delete(ctx, userID) +} + +func normalizeUserRole(role string) string { + if role == authz.RoleAdmin { + return authz.RoleAdmin + } + return authz.RoleUser +} + +// Login 用户登录 +func (s *AuthService) Login(ctx context.Context, username, password string) (accessToken, refreshToken string, user *entity.User, err error) { + // 查找用户 + user, err = s.userRepo.GetByUsername(ctx, username) + if err != nil { + return "", "", nil, entity.ErrUserNotFound + } + if !user.IsActive { + return "", "", nil, entity.ErrUserInactive + } + if err := s.ensureWorkspaceActive(ctx, user); err != nil { + return "", "", nil, err } // 验证密码 if err := s.passwordHasher.Verify(password, user.PasswordHash); err != nil { - return "", "", entity.ErrInvalidPassword + return "", "", nil, entity.ErrInvalidPassword } // 生成 Token - accessToken, refreshToken, err = s.tokenGenerator.Generate(user.ID, user.Username) + accessToken, refreshToken, err = s.tokenGenerator.Generate(user.ID, user.Username, user.Role, user.WorkspaceID) if err != nil { - return "", "", err + return "", "", nil, err } - return accessToken, refreshToken, nil + return accessToken, refreshToken, user, nil } // RefreshToken 刷新 Token -func (s *AuthService) RefreshToken(ctx context.Context, refreshToken string) (string, error) { - return s.tokenGenerator.Refresh(refreshToken) +func (s *AuthService) RefreshToken(ctx context.Context, refreshToken string) (string, *entity.User, error) { + claims, err := s.tokenGenerator.VerifyRefresh(refreshToken) + if err != nil { + return "", nil, err + } + user, err := s.userRepo.GetByID(ctx, claims.UserID) + if err != nil { + return "", nil, entity.ErrUserNotFound + } + if !user.IsActive { + return "", nil, entity.ErrUserInactive + } + if claims.IssuedAt == nil || claims.IssuedAt.Unix() < user.RevokedAfter.Unix() { + return "", nil, entity.ErrTokenRevoked + } + if err := s.ensureWorkspaceActive(ctx, user); err != nil { + return "", nil, err + } + accessToken, _, err := s.tokenGenerator.Generate(user.ID, user.Username, user.Role, user.WorkspaceID) + if err != nil { + return "", nil, err + } + return accessToken, user, nil } // GetUserByID 根据 ID 获取用户 @@ -106,25 +370,84 @@ func (s *AuthService) GetUserByID(ctx context.Context, id string) (*entity.User, } // VerifyAccessToken 验证 Access Token(包括 revoked_after 检查) -func (s *AuthService) VerifyAccessToken(ctx context.Context, token string) (userID, username string, err error) { +func (s *AuthService) VerifyAccessToken(ctx context.Context, token string) (*authz.Principal, error) { // 1. JWT 自验证 - userID, username, issuedAt, err := s.tokenGenerator.VerifyWithIssuedAt(token) + claims, err := s.tokenGenerator.VerifyAccess(token) if err != nil { - return "", "", err + return nil, err } // 2. 检查用户级别的撤销时间 - user, err := s.userRepo.GetByID(ctx, userID) + user, err := s.userRepo.GetByID(ctx, claims.UserID) if err != nil { - return "", "", entity.ErrUserNotFound + return nil, entity.ErrUserNotFound + } + if !user.IsActive { + return nil, entity.ErrUserInactive } // 3. 如果 Token 签发时间早于 revoked_after,则失效 - if issuedAt < user.RevokedAfter.Unix() { - return "", "", entity.ErrTokenRevoked + if claims.IssuedAt == nil || claims.IssuedAt.Unix() < user.RevokedAfter.Unix() { + return nil, entity.ErrTokenRevoked + } + if err := s.ensureWorkspaceActive(ctx, user); err != nil { + return nil, err + } + workspaceName := "" + namespace := "" + defaultClusterID := "" + quotaCPU := "" + quotaMemory := "" + quotaGPU := "" + quotaGPUMem := "" + if s.workspaceRepo != nil && user.WorkspaceID != "" { + if workspace, err := s.workspaceRepo.GetByID(ctx, user.WorkspaceID); err == nil && workspace != nil { + workspaceName = workspace.Name + namespace = workspace.K8sNamespace + defaultClusterID = workspace.DefaultClusterID + quotaCPU = workspace.QuotaCPU + quotaMemory = workspace.QuotaMemory + quotaGPU = workspace.QuotaGPU + quotaGPUMem = workspace.QuotaGPUMem + } } - return userID, username, nil + return &authz.Principal{ + UserID: user.ID, + Username: user.Username, + Role: user.Role, + WorkspaceID: user.WorkspaceID, + WorkspaceName: workspaceName, + Namespace: namespace, + DefaultClusterID: defaultClusterID, + QuotaCPU: quotaCPU, + QuotaMemory: quotaMemory, + QuotaGPU: quotaGPU, + QuotaGPUMem: quotaGPUMem, + Permissions: authz.PermissionsForRole(user.Role), + PermissionVersion: 1, + }, nil +} + +func (s *AuthService) GetWorkspaceByID(ctx context.Context, id string) (*entity.Workspace, error) { + if s.workspaceRepo == nil || id == "" { + return nil, entity.ErrWorkspaceNotFound + } + return s.workspaceRepo.GetByID(ctx, id) +} + +func (s *AuthService) ensureWorkspaceActive(ctx context.Context, user *entity.User) error { + if user.Role == authz.RoleAdmin || user.WorkspaceID == "" || s.workspaceRepo == nil { + return nil + } + workspace, err := s.workspaceRepo.GetByID(ctx, user.WorkspaceID) + if err != nil { + return entity.ErrWorkspaceNotFound + } + if workspace.Status == entity.WorkspaceSuspended { + return entity.ErrWorkspaceSuspended + } + return nil } // ChangePassword 修改密码(会触发全局登出) diff --git a/backend/internal/domain/service/cluster_service.go b/backend/internal/domain/service/cluster_service.go index a6986b8..aeaa182 100644 --- a/backend/internal/domain/service/cluster_service.go +++ b/backend/internal/domain/service/cluster_service.go @@ -5,6 +5,7 @@ import ( "github.com/google/uuid" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // ClusterService 集群管理领域服务 @@ -21,8 +22,21 @@ func NewClusterService(clusterRepo repository.ClusterRepository) *ClusterService // CreateCluster 创建新集群 func (s *ClusterService) CreateCluster(ctx context.Context, cluster *entity.Cluster) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 生成 ID cluster.ID = uuid.New().String() + cluster.OwnerID = principal.UserID + cluster.WorkspaceID = principal.WorkspaceID + if principal.IsAdmin() && cluster.WorkspaceID == "" { + cluster.WorkspaceID = entity.DefaultWorkspaceID + } + if !principal.IsAdmin() && cluster.Visibility == authz.VisibilityGlobalShared { + return entity.ErrForbidden + } + cluster.Visibility = authz.NormalizeVisibility(principal.Role, cluster.Visibility) // 验证 if err := cluster.Validate(); err != nil { @@ -30,9 +44,11 @@ func (s *ClusterService) CreateCluster(ctx context.Context, cluster *entity.Clus } // 检查是否已存在 - existingCluster, _ := s.clusterRepo.GetByName(ctx, cluster.Name) - if existingCluster != nil { - return entity.ErrClusterExists + clusters, _ := s.clusterRepo.List(ctx) + for _, existingCluster := range clusters { + if existingCluster.Name == cluster.Name && existingCluster.WorkspaceID == cluster.WorkspaceID && existingCluster.OwnerID == cluster.OwnerID { + return entity.ErrClusterExists + } } return s.clusterRepo.Create(ctx, cluster) @@ -40,16 +56,41 @@ func (s *ClusterService) CreateCluster(ctx context.Context, cluster *entity.Clus // GetCluster 获取集群 func (s *ClusterService) GetCluster(ctx context.Context, id string) (*entity.Cluster, error) { - return s.clusterRepo.GetByID(ctx, id) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + cluster, err := s.clusterRepo.GetByID(ctx, id) + if err != nil { + return nil, err + } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } + return cluster, nil } // UpdateCluster 更新集群 func (s *ClusterService) UpdateCluster(ctx context.Context, cluster *entity.Cluster) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查是否存在 - _, err := s.clusterRepo.GetByID(ctx, cluster.ID) + existing, err := s.clusterRepo.GetByID(ctx, cluster.ID) if err != nil { return entity.ErrClusterNotFound } + if !authz.CanWriteResource(principal, existing.WorkspaceID, existing.OwnerID, existing.Visibility) { + return entity.ErrForbidden + } + cluster.WorkspaceID = existing.WorkspaceID + cluster.OwnerID = existing.OwnerID + if principal.IsAdmin() { + cluster.Visibility = authz.NormalizeVisibility(principal.Role, cluster.Visibility) + } else { + cluster.Visibility = existing.Visibility + } // 验证 if err := cluster.Validate(); err != nil { @@ -61,17 +102,37 @@ func (s *ClusterService) UpdateCluster(ctx context.Context, cluster *entity.Clus // DeleteCluster 删除集群 func (s *ClusterService) DeleteCluster(ctx context.Context, id string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查是否存在 - _, err := s.clusterRepo.GetByID(ctx, id) + cluster, err := s.clusterRepo.GetByID(ctx, id) if err != nil { return entity.ErrClusterNotFound } + if !authz.CanWriteResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return entity.ErrForbidden + } return s.clusterRepo.Delete(ctx, id) } // ListClusters 列出所有集群 func (s *ClusterService) ListClusters(ctx context.Context) ([]*entity.Cluster, error) { - return s.clusterRepo.List(ctx) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + clusters, err := s.clusterRepo.List(ctx) + if err != nil { + return nil, err + } + visible := make([]*entity.Cluster, 0, len(clusters)) + for _, cluster := range clusters { + if authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + visible = append(visible, cluster) + } + } + return visible, nil } - diff --git a/backend/internal/domain/service/instance_service.go b/backend/internal/domain/service/instance_service.go index 6e5ec7f..9532648 100644 --- a/backend/internal/domain/service/instance_service.go +++ b/backend/internal/domain/service/instance_service.go @@ -11,16 +11,23 @@ import ( "github.com/google/uuid" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" ) // InstanceService Helm 实例管理领域服务 type InstanceService struct { - instanceRepo repository.InstanceRepository - clusterRepo repository.ClusterRepository - registryRepo repository.RegistryRepository - helmClient repository.HelmClient - ociClient repository.OCIClient - entryClient repository.InstanceEntryClient + instanceRepo repository.InstanceRepository + clusterRepo repository.ClusterRepository + registryRepo repository.RegistryRepository + bindingRepo repository.WorkspaceClusterBindingRepository + helmClient repository.HelmClient + ociClient repository.OCIClient + entryClient repository.InstanceEntryClient + diagClient repository.InstanceDiagnosticsClient + workspaceRepo repository.WorkspaceRepository + tenantClient repository.TenantKubeClient } // NewInstanceService 创建实例服务 @@ -31,17 +38,32 @@ func NewInstanceService( helmClient repository.HelmClient, ociClient repository.OCIClient, entryClient repository.InstanceEntryClient, + bindingRepo ...repository.WorkspaceClusterBindingRepository, ) *InstanceService { + var workspaceBindingRepo repository.WorkspaceClusterBindingRepository + if len(bindingRepo) > 0 { + workspaceBindingRepo = bindingRepo[0] + } return &InstanceService{ instanceRepo: instanceRepo, clusterRepo: clusterRepo, registryRepo: registryRepo, + bindingRepo: workspaceBindingRepo, helmClient: helmClient, ociClient: ociClient, entryClient: entryClient, } } +func (s *InstanceService) SetDiagnosticsClient(client repository.InstanceDiagnosticsClient) { + s.diagClient = client +} + +func (s *InstanceService) SetTenantProvisioning(workspaceRepo repository.WorkspaceRepository, tenantClient repository.TenantKubeClient) { + s.workspaceRepo = workspaceRepo + s.tenantClient = tenantClient +} + const chartCacheDir = "/tmp/charts" func (s *InstanceService) chartArchivePath(instance *entity.Instance) string { @@ -62,8 +84,14 @@ func (s *InstanceService) downloadChart(ctx context.Context, registry *entity.Re // CreateInstance 创建(安装)新实例 func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.Instance) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 生成 ID instance.ID = uuid.New().String() + instance.WorkspaceID = principal.WorkspaceID + instance.OwnerID = principal.UserID // 验证 if err := instance.Validate(); err != nil { @@ -75,12 +103,25 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I if err != nil { return entity.ErrClusterNotFound } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return entity.ErrClusterNotFound + } // 检查 Registry 是否存在 registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID) if err != nil { return entity.ErrRegistryNotFound } + if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + return entity.ErrRegistryNotFound + } + if err := s.applyNamespacePolicy(ctx, principal, cluster, instance); err != nil { + return err + } + enforceNamespaceValues(instance) + if err := s.ensureTenantForInstance(ctx, principal, cluster, instance); err != nil { + return err + } // 检查实例是否已存在 existingInstance, _ := s.instanceRepo.GetByClusterAndName(ctx, instance.ClusterID, instance.Name) @@ -111,13 +152,24 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I // GetInstance 获取实例 func (s *InstanceService) GetInstance(ctx context.Context, id string) (*entity.Instance, error) { - return s.instanceRepo.GetByID(ctx, id) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + instance, err := s.instanceRepo.GetByID(ctx, id) + if err != nil { + return nil, err + } + if !s.canReadInstance(principal, instance) { + return nil, entity.ErrInstanceNotFound + } + return instance, nil } // GetInstanceStatus 获取实例实时状态 func (s *InstanceService) GetInstanceStatus(ctx context.Context, id string) (*entity.Instance, error) { // 从数据库获取基本信息 - instance, err := s.instanceRepo.GetByID(ctx, id) + instance, err := s.GetInstance(ctx, id) if err != nil { return nil, entity.ErrInstanceNotFound } @@ -143,11 +195,20 @@ func (s *InstanceService) GetInstanceStatus(ctx context.Context, id string) (*en // UpdateInstance 更新(升级)实例 func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.Instance) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查实例是否存在 existingInstance, err := s.instanceRepo.GetByID(ctx, instance.ID) if err != nil { return entity.ErrInstanceNotFound } + if !s.canWriteInstance(principal, existingInstance) { + return entity.ErrForbidden + } + instance.WorkspaceID = existingInstance.WorkspaceID + instance.OwnerID = existingInstance.OwnerID // 获取集群信息 cluster, err := s.clusterRepo.GetByID(ctx, existingInstance.ClusterID) @@ -161,6 +222,8 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I return entity.ErrRegistryNotFound } + instance.Namespace = existingInstance.Namespace + enforceNamespaceValues(instance) instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade") if err := s.instanceRepo.Update(ctx, instance); err != nil { return err @@ -182,11 +245,18 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I // DeleteInstance 删除(卸载)实例 func (s *InstanceService) DeleteInstance(ctx context.Context, id string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查实例是否存在 instance, err := s.instanceRepo.GetByID(ctx, id) if err != nil { return entity.ErrInstanceNotFound } + if !s.canWriteInstance(principal, instance) { + return entity.ErrForbidden + } // 获取集群信息 cluster, err := s.clusterRepo.GetByID(ctx, instance.ClusterID) @@ -208,11 +278,18 @@ func (s *InstanceService) DeleteInstance(ctx context.Context, id string) error { // RollbackInstance 回滚实例 func (s *InstanceService) RollbackInstance(ctx context.Context, id string, revision int) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查实例是否存在 instance, err := s.instanceRepo.GetByID(ctx, id) if err != nil { return entity.ErrInstanceNotFound } + if !s.canWriteInstance(principal, instance) { + return entity.ErrForbidden + } // 获取集群信息 cluster, err := s.clusterRepo.GetByID(ctx, instance.ClusterID) @@ -235,7 +312,7 @@ func (s *InstanceService) RollbackInstance(ctx context.Context, id string, revis // GetInstanceHistory 获取实例历史 func (s *InstanceService) GetInstanceHistory(ctx context.Context, id string) ([]*entity.ReleaseHistory, error) { // 检查实例是否存在 - instance, err := s.instanceRepo.GetByID(ctx, id) + instance, err := s.GetInstance(ctx, id) if err != nil { return nil, entity.ErrInstanceNotFound } @@ -252,18 +329,35 @@ func (s *InstanceService) GetInstanceHistory(ctx context.Context, id string) ([] // ListInstancesByCluster 列出集群的所有实例 func (s *InstanceService) ListInstancesByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } // 检查集群是否存在 - _, err := s.clusterRepo.GetByID(ctx, clusterID) + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) if err != nil { return nil, entity.ErrClusterNotFound } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } - return s.instanceRepo.ListByCluster(ctx, clusterID) + instances, err := s.instanceRepo.ListByCluster(ctx, clusterID) + if err != nil { + return nil, err + } + visible := make([]*entity.Instance, 0, len(instances)) + for _, instance := range instances { + if s.canReadInstance(principal, instance) { + visible = append(visible, instance) + } + } + return visible, nil } // ListInstanceEntries 列出实例关联的入口信息(Service / Ingress) func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, instanceID string) ([]*entity.InstanceEntry, error) { - instance, err := s.instanceRepo.GetByID(ctx, instanceID) + instance, err := s.GetInstance(ctx, instanceID) if err != nil { return nil, entity.ErrInstanceNotFound } @@ -283,6 +377,187 @@ func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, in return s.entryClient.ListEntries(ctx, cluster, instance) } +func (s *InstanceService) GetInstanceDiagnostics(ctx context.Context, clusterID, instanceID string, tailLines int64) (*entity.InstanceDiagnostics, error) { + instance, err := s.GetInstance(ctx, instanceID) + if err != nil { + return nil, entity.ErrInstanceNotFound + } + if instance.ClusterID != clusterID { + return nil, entity.ErrInstanceNotFound + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if s.diagClient == nil { + return nil, fmt.Errorf("instance diagnostics client is not configured") + } + return s.diagClient.GetDiagnostics(ctx, cluster, instance, tailLines) +} + +func (s *InstanceService) canReadInstance(principal *authz.Principal, instance *entity.Instance) bool { + if principal.IsAdmin() { + return true + } + return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID +} + +func (s *InstanceService) canWriteInstance(principal *authz.Principal, instance *entity.Instance) bool { + if principal.IsAdmin() { + return true + } + return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID +} + +func enforceNamespaceValues(instance *entity.Instance) { + if instance == nil || instance.Namespace == "" { + return + } + if instance.Values == nil { + instance.Values = map[string]interface{}{} + } + instance.Values["namespace"] = instance.Namespace + setExistingStringValue(instance.Values, "namespaceOverride", instance.Namespace) + setExistingStringValue(instance.Values, "targetNamespace", instance.Namespace) + setExistingNestedStringValue(instance.Values, "global", "namespace", instance.Namespace) + setExistingNestedStringValue(instance.Values, "global", "namespaceOverride", instance.Namespace) +} + +func setExistingStringValue(values map[string]interface{}, key, namespace string) { + if _, ok := values[key]; ok { + values[key] = namespace + } +} + +func setExistingNestedStringValue(values map[string]interface{}, parent, key, namespace string) { + child, ok := values[parent].(map[string]interface{}) + if !ok { + return + } + if _, ok := child[key]; ok { + child[key] = namespace + } +} + +func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error { + if principal.IsAdmin() { + if isProtectedSystemNamespace(instance.Namespace) { + return entity.ErrInvalidNamespace + } + return nil + } + if isReservedNamespace(instance.Namespace) { + return entity.ErrInvalidNamespace + } + if cluster.Visibility != authz.VisibilityPrivate || cluster.OwnerID != principal.UserID { + namespace := principal.Namespace + if namespace == "" { + namespace = entity.NamespaceForWorkspace(principal.WorkspaceName) + } + if s.bindingRepo != nil { + if binding, err := s.bindingRepo.Get(ctx, principal.WorkspaceID, cluster.ID); err == nil && binding != nil && binding.Namespace != "" { + namespace = binding.Namespace + } + } + instance.Namespace = namespace + return nil + } + if instance.Namespace == "" { + if cluster.DefaultNamespace != "" { + instance.Namespace = cluster.DefaultNamespace + } else if principal.Namespace != "" { + instance.Namespace = principal.Namespace + } else { + instance.Namespace = entity.NamespaceForWorkspace(principal.Username) + } + } + return nil +} + +func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error { + if principal.IsAdmin() || s.workspaceRepo == nil || s.tenantClient == nil { + return nil + } + workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID) + if err != nil { + return err + } + if workspace.Status == entity.WorkspaceSuspended { + return entity.ErrWorkspaceSuspended + } + binding := entity.NewTenantBinding(instance.Namespace) + binding.ServiceAccountName = workspace.K8sSAName + binding.ResourceQuotaHard = instanceResourceQuotaHard(workspace) + if err := s.tenantClient.EnsureTenant(ctx, cluster, binding); err != nil { + return err + } + if s.bindingRepo != nil { + _ = s.bindingRepo.Upsert(ctx, &entity.WorkspaceClusterBinding{ + ID: uuid.New().String(), + WorkspaceID: workspace.ID, + ClusterID: cluster.ID, + Namespace: instance.Namespace, + ServiceAccount: workspace.K8sSAName, + QuotaCPU: workspace.QuotaCPU, + QuotaMemory: workspace.QuotaMemory, + QuotaGPU: workspace.QuotaGPU, + QuotaGPUMem: workspace.QuotaGPUMem, + Status: "active", + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + }) + } + return nil +} + +func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList { + hard := corev1.ResourceList{} + addQuantity := func(name corev1.ResourceName, value string) { + value = normalizeStandardQuotaQuantity(value) + if value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[name] = quantity + } + } + addGPUMemoryQuantity := func(value string) { + value, err := normalizeGPUMemoryQuota(value) + if err != nil || value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity + } + } + if workspace == nil { + return hard + } + addQuantity(corev1.ResourceName("requests.cpu"), workspace.QuotaCPU) + addQuantity(corev1.ResourceName("requests.memory"), workspace.QuotaMemory) + addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), workspace.QuotaGPU) + addGPUMemoryQuantity(workspace.QuotaGPUMem) + return hard +} + +func isReservedNamespace(namespace string) bool { + switch namespace { + case "default", "kube-system", "kube-public", "kube-node-lease": + return true + default: + return false + } +} + +func isProtectedSystemNamespace(namespace string) bool { + switch namespace { + case "kube-system", "kube-public", "kube-node-lease": + return true + default: + return false + } +} + // executeAndSyncInstall 异步执行安装并监控状态 func (s *InstanceService) executeAndSyncInstall(ctx context.Context, instanceID string, cluster *entity.Cluster, registry *entity.Registry, instance *entity.Instance) { // 执行 Helm 安装 @@ -338,7 +613,7 @@ func (s *InstanceService) executeAndSyncRollback(ctx context.Context, instanceID func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceID string, cluster *entity.Cluster, releaseName, namespace string) { // 执行 Helm 卸载 err := s.helmClient.Uninstall(ctx, cluster, releaseName, namespace) - + // 获取实例 instance, getErr := s.instanceRepo.GetByID(ctx, instanceID) if getErr != nil { @@ -360,7 +635,7 @@ func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceI // 卸载成功,标记为已卸载 instance.MarkSuccess(entity.StatusUninstalled, instance.Revision, "Instance uninstalled successfully") _ = s.instanceRepo.Update(ctx, instance) - + // 验证卸载是否完成:尝试获取状态,如果获取不到说明已卸载 time.Sleep(3 * time.Second) _, statusErr := s.helmClient.GetStatus(ctx, cluster, releaseName, namespace) @@ -377,7 +652,7 @@ func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceI // syncInstanceStatus 同步实例状态(定期检查 Helm 状态并更新数据库) func (s *InstanceService) syncInstanceStatus(ctx context.Context, instanceID string, cluster *entity.Cluster, releaseName, namespace string, operation entity.InstanceOperation) { - maxAttempts := 30 // 最多尝试30次(约5分钟) + maxAttempts := 30 // 最多尝试30次(约5分钟) interval := 10 * time.Second // 每10秒检查一次 for i := 0; i < maxAttempts; i++ { diff --git a/backend/internal/domain/service/instance_service_test.go b/backend/internal/domain/service/instance_service_test.go index ae9d53c..d7b1e43 100644 --- a/backend/internal/domain/service/instance_service_test.go +++ b/backend/internal/domain/service/instance_service_test.go @@ -4,21 +4,26 @@ import ( "context" "errors" "testing" + "time" persistencemock "github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) func TestDeleteInstanceIgnoresMissingRelease(t *testing.T) { - ctx := context.Background() + principal := &authz.Principal{UserID: "user-1", Username: "tester", Role: authz.RoleUser, WorkspaceID: entity.DefaultWorkspaceID} + ctx := authz.WithPrincipal(context.Background(), principal) instanceRepo := persistencemock.NewInstanceRepositoryMock() instance := &entity.Instance{ - ID: "inst-1", - ClusterID: "cluster-1", - Name: "demo", - Namespace: "default", + ID: "inst-1", + WorkspaceID: entity.DefaultWorkspaceID, + OwnerID: "user-1", + ClusterID: "cluster-1", + Name: "demo", + Namespace: "default", } if err := instanceRepo.Create(ctx, instance); err != nil { t.Fatalf("failed to seed instance: %v", err) @@ -40,8 +45,63 @@ func TestDeleteInstanceIgnoresMissingRelease(t *testing.T) { t.Fatalf("DeleteInstance returned error: %v", err) } - if _, err := instanceRepo.GetByID(ctx, instance.ID); !errors.Is(err, entity.ErrInstanceNotFound) { - t.Fatalf("expected instance removed, got err=%v", err) + waitForInstanceDeleted(t, ctx, instanceRepo, instance.ID) +} + +func TestEnforceNamespaceValuesOverridesChartNamespaceKnobs(t *testing.T) { + instance := &entity.Instance{ + Namespace: "ocdp-u-alice", + Values: map[string]interface{}{ + "namespace": "default", + "namespaceOverride": "default", + "targetNamespace": "default", + "global": map[string]interface{}{ + "namespace": "default", + "namespaceOverride": "default", + }, + "image": map[string]interface{}{ + "repository": "nginx", + }, + }, + } + + enforceNamespaceValues(instance) + + if instance.Values["namespace"] != "ocdp-u-alice" { + t.Fatalf("expected top-level namespace to be enforced, got %#v", instance.Values["namespace"]) + } + if instance.Values["namespaceOverride"] != "ocdp-u-alice" { + t.Fatalf("expected namespaceOverride to be enforced, got %#v", instance.Values["namespaceOverride"]) + } + if instance.Values["targetNamespace"] != "ocdp-u-alice" { + t.Fatalf("expected targetNamespace to be enforced, got %#v", instance.Values["targetNamespace"]) + } + global, ok := instance.Values["global"].(map[string]interface{}) + if !ok { + t.Fatalf("expected global map, got %#v", instance.Values["global"]) + } + if global["namespace"] != "ocdp-u-alice" || global["namespaceOverride"] != "ocdp-u-alice" { + t.Fatalf("expected global namespace keys to be enforced, got %#v", global) + } +} + +func waitForInstanceDeleted(t *testing.T, ctx context.Context, repo repository.InstanceRepository, id string) { + t.Helper() + + deadline := time.After(2 * time.Second) + ticker := time.NewTicker(10 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-deadline: + _, err := repo.GetByID(ctx, id) + t.Fatalf("expected instance removed, got err=%v", err) + case <-ticker.C: + if _, err := repo.GetByID(ctx, id); errors.Is(err, entity.ErrInstanceNotFound) { + return + } + } } } diff --git a/backend/internal/domain/service/monitoring_service.go b/backend/internal/domain/service/monitoring_service.go index b07c1fb..1677ee1 100644 --- a/backend/internal/domain/service/monitoring_service.go +++ b/backend/internal/domain/service/monitoring_service.go @@ -6,11 +6,12 @@ import ( "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // MonitoringService 监控服务 type MonitoringService struct { - clusterRepo repository.ClusterRepository + clusterRepo repository.ClusterRepository metricsClient repository.MetricsClient } @@ -20,13 +21,24 @@ func NewMonitoringService( metricsClient repository.MetricsClient, ) *MonitoringService { return &MonitoringService{ - clusterRepo: clusterRepo, + clusterRepo: clusterRepo, metricsClient: metricsClient, } } // GetClusterMonitoring 获取单个集群的监控信息 func (s *MonitoringService) GetClusterMonitoring(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } metrics, err := s.metricsClient.GetClusterMetrics(ctx, clusterID) if err != nil { return nil, fmt.Errorf("failed to get cluster metrics: %w", err) @@ -36,6 +48,10 @@ func (s *MonitoringService) GetClusterMonitoring(ctx context.Context, clusterID // ListClusterMonitoring 获取所有集群的监控信息 func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entity.ClusterMetrics, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } // 获取所有集群 clusters, err := s.clusterRepo.List(ctx) if err != nil { @@ -45,6 +61,9 @@ func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entit // 获取每个集群的监控数据 result := make([]*entity.ClusterMetrics, 0, len(clusters)) for _, cluster := range clusters { + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + continue + } metrics, err := s.metricsClient.GetClusterMetrics(ctx, cluster.ID) if err != nil { // 如果某个集群获取失败,记录错误但继续 @@ -93,10 +112,20 @@ func (s *MonitoringService) GetMonitoringSummary(ctx context.Context) (*entity.M // GetNodeMetrics 获取集群的节点指标 func (s *MonitoringService) GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } nodes, err := s.metricsClient.GetNodeMetrics(ctx, clusterID) if err != nil { return nil, fmt.Errorf("failed to get node metrics: %w", err) } return nodes, nil } - diff --git a/backend/internal/domain/service/quota_quantity.go b/backend/internal/domain/service/quota_quantity.go new file mode 100644 index 0000000..232ede2 --- /dev/null +++ b/backend/internal/domain/service/quota_quantity.go @@ -0,0 +1,54 @@ +package service + +import ( + "strconv" + "strings" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +func normalizeStandardQuotaQuantity(value string) string { + value = strings.TrimSpace(value) + upper := strings.ToUpper(value) + switch { + case strings.HasSuffix(upper, "MB"): + return strings.TrimSpace(value[:len(value)-2]) + "M" + case strings.HasSuffix(upper, "GB"): + return strings.TrimSpace(value[:len(value)-2]) + "G" + default: + return value + } +} + +func normalizeGPUMemoryQuota(value string) (string, error) { + value = strings.TrimSpace(value) + if value == "" { + return "", nil + } + upper := strings.ToUpper(value) + multiplier := int64(1) + number := value + switch { + case strings.HasSuffix(upper, "MB"): + number = strings.TrimSpace(value[:len(value)-2]) + case strings.HasSuffix(upper, "M"): + number = strings.TrimSpace(value[:len(value)-1]) + case strings.HasSuffix(upper, "GB"): + number = strings.TrimSpace(value[:len(value)-2]) + multiplier = 1000 + case strings.HasSuffix(upper, "G"): + number = strings.TrimSpace(value[:len(value)-1]) + multiplier = 1000 + case strings.HasSuffix(upper, "GIB"): + number = strings.TrimSpace(value[:len(value)-3]) + multiplier = 1024 + case strings.HasSuffix(upper, "GI"): + number = strings.TrimSpace(value[:len(value)-2]) + multiplier = 1024 + } + parsed, err := strconv.ParseInt(number, 10, 64) + if err != nil || parsed < 0 { + return "", entity.ErrInvalidTenantResourceQuota + } + return strconv.FormatInt(parsed*multiplier, 10), nil +} diff --git a/backend/internal/domain/service/registry_service.go b/backend/internal/domain/service/registry_service.go index 92e7f80..99e66ab 100644 --- a/backend/internal/domain/service/registry_service.go +++ b/backend/internal/domain/service/registry_service.go @@ -5,6 +5,7 @@ import ( "github.com/google/uuid" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // RegistryService Registry 管理领域服务 @@ -26,8 +27,21 @@ func NewRegistryService( // CreateRegistry 创建新 Registry func (s *RegistryService) CreateRegistry(ctx context.Context, registry *entity.Registry) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 生成 ID registry.ID = uuid.New().String() + registry.OwnerID = principal.UserID + registry.WorkspaceID = principal.WorkspaceID + if principal.IsAdmin() && registry.WorkspaceID == "" { + registry.WorkspaceID = entity.DefaultWorkspaceID + } + if !principal.IsAdmin() && registry.Visibility == authz.VisibilityGlobalShared { + return entity.ErrForbidden + } + registry.Visibility = authz.NormalizeVisibility(principal.Role, registry.Visibility) // 验证 if err := registry.Validate(); err != nil { @@ -35,9 +49,11 @@ func (s *RegistryService) CreateRegistry(ctx context.Context, registry *entity.R } // 检查是否已存在 - existingRegistry, _ := s.registryRepo.GetByName(ctx, registry.Name) - if existingRegistry != nil { - return entity.ErrRegistryExists + registries, _ := s.registryRepo.List(ctx) + for _, existingRegistry := range registries { + if existingRegistry.Name == registry.Name && existingRegistry.WorkspaceID == registry.WorkspaceID && existingRegistry.OwnerID == registry.OwnerID { + return entity.ErrRegistryExists + } } return s.registryRepo.Create(ctx, registry) @@ -45,16 +61,41 @@ func (s *RegistryService) CreateRegistry(ctx context.Context, registry *entity.R // GetRegistry 获取 Registry func (s *RegistryService) GetRegistry(ctx context.Context, id string) (*entity.Registry, error) { - return s.registryRepo.GetByID(ctx, id) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + registry, err := s.registryRepo.GetByID(ctx, id) + if err != nil { + return nil, err + } + if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + return nil, entity.ErrRegistryNotFound + } + return registry, nil } // UpdateRegistry 更新 Registry func (s *RegistryService) UpdateRegistry(ctx context.Context, registry *entity.Registry) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查是否存在 - _, err := s.registryRepo.GetByID(ctx, registry.ID) + existing, err := s.registryRepo.GetByID(ctx, registry.ID) if err != nil { return entity.ErrRegistryNotFound } + if !authz.CanWriteResource(principal, existing.WorkspaceID, existing.OwnerID, existing.Visibility) { + return entity.ErrForbidden + } + registry.WorkspaceID = existing.WorkspaceID + registry.OwnerID = existing.OwnerID + if principal.IsAdmin() { + registry.Visibility = authz.NormalizeVisibility(principal.Role, registry.Visibility) + } else { + registry.Visibility = existing.Visibility + } // 验证 if err := registry.Validate(); err != nil { @@ -66,27 +107,47 @@ func (s *RegistryService) UpdateRegistry(ctx context.Context, registry *entity.R // DeleteRegistry 删除 Registry func (s *RegistryService) DeleteRegistry(ctx context.Context, id string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查是否存在 - _, err := s.registryRepo.GetByID(ctx, id) + registry, err := s.registryRepo.GetByID(ctx, id) if err != nil { return entity.ErrRegistryNotFound } + if !authz.CanWriteResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + return entity.ErrForbidden + } return s.registryRepo.Delete(ctx, id) } // ListRegistries 列出所有 Registries func (s *RegistryService) ListRegistries(ctx context.Context) ([]*entity.Registry, error) { - return s.registryRepo.List(ctx) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + registries, err := s.registryRepo.List(ctx) + if err != nil { + return nil, err + } + visible := make([]*entity.Registry, 0, len(registries)) + for _, registry := range registries { + if authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + visible = append(visible, registry) + } + } + return visible, nil } // CheckHealth 检查 Registry 健康状态 func (s *RegistryService) CheckHealth(ctx context.Context, id string) error { - registry, err := s.registryRepo.GetByID(ctx, id) + registry, err := s.GetRegistry(ctx, id) if err != nil { return entity.ErrRegistryNotFound } return s.ociClient.CheckHealth(ctx, registry) } - diff --git a/backend/internal/domain/service/workspace_service.go b/backend/internal/domain/service/workspace_service.go new file mode 100644 index 0000000..98d1122 --- /dev/null +++ b/backend/internal/domain/service/workspace_service.go @@ -0,0 +1,308 @@ +package service + +import ( + "context" + "sort" + "time" + + "github.com/google/uuid" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +type WorkspaceService struct { + workspaceRepo repository.WorkspaceRepository + bindingRepo repository.WorkspaceClusterBindingRepository + clusterRepo repository.ClusterRepository + tenantClient repository.TenantKubeClient + auditRepo repository.AuditLogRepository +} + +func NewWorkspaceService( + workspaceRepo repository.WorkspaceRepository, + bindingRepo repository.WorkspaceClusterBindingRepository, + clusterRepo repository.ClusterRepository, + tenantClient repository.TenantKubeClient, + auditRepo repository.AuditLogRepository, +) *WorkspaceService { + return &WorkspaceService{ + workspaceRepo: workspaceRepo, + bindingRepo: bindingRepo, + clusterRepo: clusterRepo, + tenantClient: tenantClient, + auditRepo: auditRepo, + } +} + +func (s *WorkspaceService) ListWorkspaces(ctx context.Context) ([]*entity.Workspace, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if principal.IsAdmin() { + return s.workspaceRepo.List(ctx) + } + workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID) + if err != nil { + return nil, err + } + return []*entity.Workspace{workspace}, nil +} + +func (s *WorkspaceService) CreateWorkspace(ctx context.Context, name string) (*entity.Workspace, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return nil, entity.ErrForbidden + } + workspace := entity.NewWorkspace(name, principal.UserID) + workspace.ID = uuid.New().String() + if err := s.workspaceRepo.Create(ctx, workspace); err != nil { + return nil, err + } + s.audit(ctx, principal, "create", "workspace", workspace.ID, workspace.Name, nil) + return workspace, nil +} + +func (s *WorkspaceService) EnsureClusterBinding(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() && workspaceID != principal.WorkspaceID { + return nil, entity.ErrForbidden + } + workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID) + if err != nil { + return nil, err + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if !principal.IsAdmin() && !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } + binding := &entity.WorkspaceClusterBinding{ + ID: uuid.New().String(), + WorkspaceID: workspace.ID, + ClusterID: cluster.ID, + Namespace: workspace.K8sNamespace, + ServiceAccount: workspace.K8sSAName, + QuotaCPU: workspace.QuotaCPU, + QuotaMemory: workspace.QuotaMemory, + QuotaGPU: workspace.QuotaGPU, + QuotaGPUMem: workspace.QuotaGPUMem, + Status: "active", + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + } + tenantBinding := entity.NewTenantBinding(binding.Namespace) + tenantBinding.ServiceAccountName = binding.ServiceAccount + tenantBinding.ResourceQuotaHard = resourceQuotaHard(workspace) + if s.tenantClient != nil { + if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil { + return nil, err + } + } + if err := s.bindingRepo.Upsert(ctx, binding); err != nil { + return nil, err + } + s.audit(ctx, principal, "init", "workspace_cluster_binding", binding.ID, binding.Namespace, map[string]interface{}{"cluster_id": clusterID}) + return binding, nil +} + +func (s *WorkspaceService) IssueKubeconfig(ctx context.Context, workspaceID, clusterID string, ttl time.Duration) (*entity.TenantKubeconfig, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() && workspaceID != principal.WorkspaceID { + return nil, entity.ErrForbidden + } + workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID) + if err != nil { + return nil, err + } + if workspace.Status == entity.WorkspaceSuspended { + return nil, entity.ErrWorkspaceSuspended + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if !principal.IsAdmin() && !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } + binding, err := s.bindingRepo.Get(ctx, workspaceID, clusterID) + if err != nil { + binding, err = s.EnsureClusterBinding(ctx, workspaceID, clusterID) + if err != nil { + return nil, err + } + } + tenantBinding := entity.NewTenantBinding(binding.Namespace) + tenantBinding.ServiceAccountName = binding.ServiceAccount + tenantBinding.ResourceQuotaHard = resourceQuotaHard(workspace) + kubeconfig, err := s.tenantClient.IssueKubeconfig(ctx, cluster, tenantBinding, ttl) + if err != nil { + return nil, err + } + s.audit(ctx, principal, "issue_kubeconfig", "workspace_cluster_binding", binding.ID, binding.Namespace, map[string]interface{}{"cluster_id": clusterID, "ttl_seconds": int64(entity.TenantTokenTTL(ttl).Seconds())}) + return kubeconfig, nil +} + +func resourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList { + hard := corev1.ResourceList{} + addQuantity := func(name corev1.ResourceName, value string) { + value = normalizeStandardQuotaQuantity(value) + if value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[name] = quantity + } + } + addGPUMemoryQuantity := func(value string) { + value, err := normalizeGPUMemoryQuota(value) + if err != nil || value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity + } + } + if workspace == nil { + return hard + } + addQuantity(corev1.ResourceName("requests.cpu"), workspace.QuotaCPU) + addQuantity(corev1.ResourceName("requests.memory"), workspace.QuotaMemory) + addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), workspace.QuotaGPU) + addGPUMemoryQuantity(workspace.QuotaGPUMem) + return hard +} + +func (s *WorkspaceService) IssueCurrentKubeconfig(ctx context.Context, requestedClusterID string, ttl time.Duration) (*entity.TenantKubeconfig, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if requestedClusterID != "" { + return s.IssueKubeconfig(ctx, principal.WorkspaceID, requestedClusterID, ttl) + } + workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID) + if err != nil { + return nil, err + } + if workspace.DefaultClusterID != "" { + return s.IssueKubeconfig(ctx, principal.WorkspaceID, workspace.DefaultClusterID, ttl) + } + return s.IssueDefaultKubeconfig(ctx, ttl) +} + +func (s *WorkspaceService) IssueDefaultKubeconfig(ctx context.Context, ttl time.Duration) (*entity.TenantKubeconfig, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + clusters, err := s.clusterRepo.List(ctx) + if err != nil { + return nil, err + } + candidates := make([]*entity.Cluster, 0, len(clusters)) + for _, cluster := range clusters { + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + continue + } + switch cluster.Visibility { + case authz.VisibilityGlobalShared: + candidates = append(candidates, cluster) + case authz.VisibilityWorkspaceShared: + if cluster.WorkspaceID == principal.WorkspaceID { + candidates = append(candidates, cluster) + } + } + } + sort.SliceStable(candidates, func(i, j int) bool { + leftRank := defaultKubeconfigClusterRank(candidates[i]) + rightRank := defaultKubeconfigClusterRank(candidates[j]) + if leftRank != rightRank { + return leftRank < rightRank + } + return candidates[i].Name < candidates[j].Name + }) + var firstIssueErr error + for _, cluster := range candidates { + if kubeconfig, err := s.IssueKubeconfig(ctx, principal.WorkspaceID, cluster.ID, ttl); err == nil { + return kubeconfig, nil + } else if firstIssueErr == nil { + firstIssueErr = err + } + } + if firstIssueErr != nil { + return nil, firstIssueErr + } + return nil, entity.ErrClusterNotFound +} + +func defaultKubeconfigClusterRank(cluster *entity.Cluster) int { + switch cluster.Visibility { + case authz.VisibilityGlobalShared: + return 0 + case authz.VisibilityWorkspaceShared: + return 1 + default: + return 2 + } +} + +func (s *WorkspaceService) SuspendWorkspace(ctx context.Context, workspaceID string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return entity.ErrForbidden + } + workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID) + if err != nil { + return err + } + workspace.Status = entity.WorkspaceSuspended + if err := s.workspaceRepo.Update(ctx, workspace); err != nil { + return err + } + clusters, _ := s.clusterRepo.List(ctx) + for _, cluster := range clusters { + binding, err := s.bindingRepo.Get(ctx, workspaceID, cluster.ID) + if err != nil { + continue + } + tenantBinding := entity.NewTenantBinding(binding.Namespace) + tenantBinding.ServiceAccountName = binding.ServiceAccount + _ = s.tenantClient.SuspendTenant(ctx, cluster, tenantBinding) + } + s.audit(ctx, principal, "suspend", "workspace", workspace.ID, workspace.Name, nil) + return nil +} + +func (s *WorkspaceService) audit(ctx context.Context, principal *authz.Principal, action, resourceType, resourceID, resourceName string, details map[string]interface{}) { + if s.auditRepo == nil || principal == nil { + return + } + _ = s.auditRepo.Create(ctx, &entity.AuditLog{ + WorkspaceID: principal.WorkspaceID, + UserID: principal.UserID, + Action: action, + ResourceType: resourceType, + ResourceID: resourceID, + ResourceName: resourceName, + Details: details, + CreatedAt: time.Now(), + }) +} diff --git a/backend/internal/pkg/authz/authz.go b/backend/internal/pkg/authz/authz.go new file mode 100644 index 0000000..b8e2d1e --- /dev/null +++ b/backend/internal/pkg/authz/authz.go @@ -0,0 +1,144 @@ +package authz + +import ( + "context" + "errors" +) + +type contextKey string + +const principalKey contextKey = "principal" + +const ( + RoleAdmin = "admin" + RoleUser = "user" +) + +const ( + VisibilityPrivate = "private" + VisibilityWorkspaceShared = "workspace_shared" + VisibilityGlobalShared = "global_shared" +) + +var ( + ErrUnauthenticated = errors.New("authentication required") + ErrForbidden = errors.New("permission denied") +) + +type Principal struct { + UserID string + Username string + Role string + WorkspaceID string + WorkspaceName string + Namespace string + DefaultClusterID string + QuotaCPU string + QuotaMemory string + QuotaGPU string + QuotaGPUMem string + Permissions []string + PermissionVersion int +} + +func WithPrincipal(ctx context.Context, principal *Principal) context.Context { + return context.WithValue(ctx, principalKey, principal) +} + +func PrincipalFromContext(ctx context.Context) (*Principal, bool) { + principal, ok := ctx.Value(principalKey).(*Principal) + return principal, ok && principal != nil +} + +func RequirePrincipal(ctx context.Context) (*Principal, error) { + principal, ok := PrincipalFromContext(ctx) + if !ok { + return nil, ErrUnauthenticated + } + return principal, nil +} + +func (p *Principal) IsAdmin() bool { + return p != nil && p.Role == RoleAdmin +} + +func CanReadResource(p *Principal, workspaceID, ownerID, visibility string) bool { + if p == nil { + return false + } + if p.IsAdmin() { + return true + } + switch visibility { + case VisibilityGlobalShared: + return true + case VisibilityWorkspaceShared: + return workspaceID != "" && workspaceID == p.WorkspaceID + default: + return ownerID != "" && ownerID == p.UserID + } +} + +func CanWriteResource(p *Principal, workspaceID, ownerID, visibility string) bool { + if p == nil { + return false + } + if p.IsAdmin() { + return true + } + if visibility == VisibilityGlobalShared { + return false + } + return workspaceID != "" && workspaceID == p.WorkspaceID && ownerID != "" && ownerID == p.UserID +} + +func NormalizeVisibility(role, requested string) string { + switch requested { + case VisibilityWorkspaceShared: + if role == RoleAdmin { + return requested + } + return VisibilityPrivate + case VisibilityGlobalShared: + if role == RoleAdmin { + return requested + } + return VisibilityPrivate + case VisibilityPrivate: + return requested + default: + return VisibilityPrivate + } +} + +func PermissionsForRole(role string) []string { + if role == RoleAdmin { + return []string{ + "*", + "home:view", + "workspaces:manage", + "users:manage", + "configuration:clusters:manage", + "configuration:registries:manage", + "artifact:registries:view", + "artifact:instances:manage", + "monitoring:clusters:view", + "clusters:manage:any", + "registries:manage:any", + "instances:manage:any", + "kubeconfig:issue:any", + } + } + return []string{ + "home:view", + "configuration:clusters:manage_own", + "configuration:registries:manage_own", + "artifact:registries:view", + "artifact:instances:manage_own", + "monitoring:clusters:view", + "clusters:manage:own", + "registries:manage:own", + "instances:manage:own", + "kubeconfig:issue:own", + } +} diff --git a/backend/internal/pkg/crypto/crypto_test.go b/backend/internal/pkg/crypto/crypto_test.go index 9b5dd8e..fef1b8f 100644 --- a/backend/internal/pkg/crypto/crypto_test.go +++ b/backend/internal/pkg/crypto/crypto_test.go @@ -12,7 +12,7 @@ func TestAESEncryptor(t *testing.T) { plaintext string }{ {"simple password", "password123"}, - {"harbor password", "BWGDIP@ssw0rd1401#"}, + {"registry password", "registry-password-example"}, {"empty string", ""}, {"long certificate", "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkekNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pP"}, {"unicode", "密码123!@#"}, @@ -121,4 +121,3 @@ func TestEncryptionConsistency(t *testing.T) { t.Error("Decryption should produce original plaintext") } } - diff --git a/backend/internal/pkg/jwt/jwt.go b/backend/internal/pkg/jwt/jwt.go index 5133c73..1ca221c 100644 --- a/backend/internal/pkg/jwt/jwt.go +++ b/backend/internal/pkg/jwt/jwt.go @@ -3,13 +3,13 @@ package jwt import ( "fmt" "time" - + "github.com/golang-jwt/jwt/v5" ) const ( - AccessTokenDuration = 24 * time.Hour // Access Token 有效期 - RefreshTokenDuration = 7 * 24 * time.Hour // Refresh Token 有效期 + AccessTokenDuration = 24 * time.Hour // Access Token 有效期 + RefreshTokenDuration = 7 * 24 * time.Hour // Refresh Token 有效期 ) // JWTManager JWT 管理器 @@ -26,98 +26,133 @@ func NewJWTManager(secretKey string) *JWTManager { // Claims JWT Claims type Claims struct { - UserID string `json:"user_id"` - Username string `json:"username"` + UserID string `json:"user_id"` + Username string `json:"username"` + Role string `json:"role"` + WorkspaceID string `json:"workspace_id"` + TokenType string `json:"token_type"` jwt.RegisteredClaims } // Generate 生成 Access Token 和 Refresh Token -func (m *JWTManager) Generate(userID, username string) (accessToken, refreshToken string, err error) { +func (m *JWTManager) Generate(userID, username, role, workspaceID string) (accessToken, refreshToken string, err error) { // 生成 Access Token accessClaims := &Claims{ - UserID: userID, - Username: username, + UserID: userID, + Username: username, + Role: role, + WorkspaceID: workspaceID, + TokenType: "access", RegisteredClaims: jwt.RegisteredClaims{ ExpiresAt: jwt.NewNumericDate(time.Now().Add(AccessTokenDuration)), IssuedAt: jwt.NewNumericDate(time.Now()), }, } - + accessTokenObj := jwt.NewWithClaims(jwt.SigningMethodHS256, accessClaims) accessToken, err = accessTokenObj.SignedString([]byte(m.secretKey)) if err != nil { return "", "", fmt.Errorf("failed to sign access token: %w", err) } - + // 生成 Refresh Token refreshClaims := &Claims{ - UserID: userID, - Username: username, + UserID: userID, + Username: username, + Role: role, + WorkspaceID: workspaceID, + TokenType: "refresh", RegisteredClaims: jwt.RegisteredClaims{ ExpiresAt: jwt.NewNumericDate(time.Now().Add(RefreshTokenDuration)), IssuedAt: jwt.NewNumericDate(time.Now()), }, } - + refreshTokenObj := jwt.NewWithClaims(jwt.SigningMethodHS256, refreshClaims) refreshToken, err = refreshTokenObj.SignedString([]byte(m.secretKey)) if err != nil { return "", "", fmt.Errorf("failed to sign refresh token: %w", err) } - + return accessToken, refreshToken, nil } // Verify 验证 Token func (m *JWTManager) Verify(tokenString string) (userID, username string, err error) { - userID, username, _, err = m.VerifyWithIssuedAt(tokenString) - return userID, username, err + claims, err := m.VerifyClaims(tokenString, "") + if err != nil { + return "", "", err + } + return claims.UserID, claims.Username, nil +} + +func (m *JWTManager) VerifyAccess(tokenString string) (*Claims, error) { + return m.VerifyClaims(tokenString, "access") +} + +func (m *JWTManager) VerifyRefresh(tokenString string) (*Claims, error) { + return m.VerifyClaims(tokenString, "refresh") } -// VerifyWithIssuedAt 验证 Token 并返回签发时间 func (m *JWTManager) VerifyWithIssuedAt(tokenString string) (userID, username string, issuedAt int64, err error) { + claims, err := m.VerifyClaims(tokenString, "access") + if err != nil { + return "", "", 0, err + } + return claims.UserID, claims.Username, claims.IssuedAt.Unix(), nil +} + +func (m *JWTManager) VerifyClaims(tokenString, expectedType string) (*Claims, error) { token, err := jwt.ParseWithClaims(tokenString, &Claims{}, func(token *jwt.Token) (interface{}, error) { if _, ok := token.Method.(*jwt.SigningMethodHMAC); !ok { return nil, fmt.Errorf("unexpected signing method: %v", token.Header["alg"]) } return []byte(m.secretKey), nil }) - + if err != nil { - return "", "", 0, fmt.Errorf("failed to parse token: %w", err) + return nil, fmt.Errorf("failed to parse token: %w", err) } - - if claims, ok := token.Claims.(*Claims); ok && token.Valid { - return claims.UserID, claims.Username, claims.IssuedAt.Unix(), nil + + claims, ok := token.Claims.(*Claims) + if !ok || !token.Valid { + return nil, fmt.Errorf("invalid token") } - - return "", "", 0, fmt.Errorf("invalid token") + if expectedType != "" && claims.TokenType != expectedType { + return nil, fmt.Errorf("invalid token type") + } + if claims.IssuedAt == nil { + return nil, fmt.Errorf("token missing issued_at") + } + return claims, nil } // Refresh 刷新 Token func (m *JWTManager) Refresh(refreshToken string) (string, error) { // 验证 Refresh Token - userID, username, err := m.Verify(refreshToken) + claims, err := m.VerifyRefresh(refreshToken) if err != nil { return "", fmt.Errorf("invalid refresh token: %w", err) } - + // 生成新的 Access Token accessClaims := &Claims{ - UserID: userID, - Username: username, + UserID: claims.UserID, + Username: claims.Username, + Role: claims.Role, + WorkspaceID: claims.WorkspaceID, + TokenType: "access", RegisteredClaims: jwt.RegisteredClaims{ ExpiresAt: jwt.NewNumericDate(time.Now().Add(AccessTokenDuration)), IssuedAt: jwt.NewNumericDate(time.Now()), }, } - + accessTokenObj := jwt.NewWithClaims(jwt.SigningMethodHS256, accessClaims) newAccessToken, err := accessTokenObj.SignedString([]byte(m.secretKey)) if err != nil { return "", fmt.Errorf("failed to sign new access token: %w", err) } - + return newAccessToken, nil } - diff --git a/backend/scripts/docker-quick-start.sh b/backend/scripts/docker-quick-start.sh index ac761e1..2f0174f 100755 --- a/backend/scripts/docker-quick-start.sh +++ b/backend/scripts/docker-quick-start.sh @@ -197,8 +197,8 @@ start_pgadmin() { echo "" print_info "访问地址: http://localhost:5050" print_info "登录信息:" - echo " 📧 邮箱: admin@ocdp.local" - echo " 🔑 密码: admin" + echo " 📧 邮箱: ${PGADMIN_EMAIL:-admin@ocdp.local}" + echo " 🔑 密码: ${PGADMIN_PASSWORD:-change-me}" echo "" print_info "连接数据库配置:" echo " 📍 Host: postgres" @@ -270,4 +270,3 @@ main() { # 运行主函数 main - diff --git a/backend/scripts/generate-bootstrap-config.sh b/backend/scripts/generate-bootstrap-config.sh index cb9c7b6..569b434 100755 --- a/backend/scripts/generate-bootstrap-config.sh +++ b/backend/scripts/generate-bootstrap-config.sh @@ -23,13 +23,7 @@ TMP_FILE=$(mktemp) cat > "$TMP_FILE" <<'EOF' { "enabled": true, - "users": [ - { - "username": "admin", - "password": "admin123", - "email": "admin@example.com" - } - ], + "users": [], "registries": [], "clusters": [] } @@ -38,6 +32,38 @@ EOF echo "📋 请按提示输入信息..." echo "" +# ===== Admin 用户配置 ===== +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "👤 Admin 用户配置" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +read -p "是否添加初始管理员用户? (y/n) [y]: " ADD_ADMIN +ADD_ADMIN=${ADD_ADMIN:-y} + +if [[ "$ADD_ADMIN" == "y" ]]; then + read -p "Admin 用户名: " ADMIN_USER + read -sp "Admin 密码: " ADMIN_PASS + echo "" + read -p "Admin 邮箱 [${ADMIN_USER}@example.local]: " ADMIN_EMAIL + ADMIN_EMAIL=${ADMIN_EMAIL:-"${ADMIN_USER}@example.local"} + + if [[ -z "$ADMIN_USER" || -z "$ADMIN_PASS" ]]; then + echo "❌ Admin 用户名和密码不能为空" + exit 1 + fi + + TMP_USER=$(jq -n \ + --arg username "$ADMIN_USER" \ + --arg password "$ADMIN_PASS" \ + --arg email "$ADMIN_EMAIL" \ + '{username: $username, password: $password, email: $email}') + + jq ".users += [$TMP_USER]" "$TMP_FILE" > "${TMP_FILE}.tmp" && mv "${TMP_FILE}.tmp" "$TMP_FILE" + echo "✅ Admin 用户 '$ADMIN_USER' 已添加" +fi + +echo "" + # ===== Registries 配置 ===== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "📦 Registry 配置" @@ -47,20 +73,23 @@ read -p "是否添加 Registry? (y/n) [y]: " ADD_REGISTRY ADD_REGISTRY=${ADD_REGISTRY:-y} if [[ "$ADD_REGISTRY" == "y" ]]; then - read -p "Registry 名称 [harbor-bwgdi]: " REGISTRY_NAME - REGISTRY_NAME=${REGISTRY_NAME:-harbor-bwgdi} + read -p "Registry 名称 [harbor]: " REGISTRY_NAME + REGISTRY_NAME=${REGISTRY_NAME:-harbor} - read -p "Registry URL [https://harbor.bwgdi.com]: " REGISTRY_URL - REGISTRY_URL=${REGISTRY_URL:-https://harbor.bwgdi.com} + read -p "Registry URL: " REGISTRY_URL - read -p "Registry 描述 [BWGDI Harbor Registry]: " REGISTRY_DESC - REGISTRY_DESC=${REGISTRY_DESC:-"BWGDI Harbor Registry"} + read -p "Registry 描述 [Harbor Registry]: " REGISTRY_DESC + REGISTRY_DESC=${REGISTRY_DESC:-"Harbor Registry"} - read -p "Registry 用户名 [admin]: " REGISTRY_USER - REGISTRY_USER=${REGISTRY_USER:-admin} + read -p "Registry 用户名(推荐 Harbor robot 账号): " REGISTRY_USER read -sp "Registry 密码: " REGISTRY_PASS echo "" + + if [[ -z "$REGISTRY_URL" ]]; then + echo "❌ Registry URL 不能为空" + exit 1 + fi read -p "是否跳过 SSL 验证? (y/n) [n]: " REGISTRY_INSECURE REGISTRY_INSECURE=${REGISTRY_INSECURE:-n} @@ -72,17 +101,14 @@ if [[ "$ADD_REGISTRY" == "y" ]]; then fi # 添加 Registry 到配置 - TMP_REGISTRY=$(cat < "${TMP_FILE}.tmp" && mv "${TMP_FILE}.tmp" "$TMP_FILE" echo "✅ Registry '$REGISTRY_NAME' 已添加" @@ -232,4 +258,3 @@ echo " curl http://localhost:8080/api/v1/clusters" echo "" echo "✨ 完成!" - diff --git a/backend/scripts/quick-start-production.sh b/backend/scripts/quick-start-production.sh index b73274e..a568177 100755 --- a/backend/scripts/quick-start-production.sh +++ b/backend/scripts/quick-start-production.sh @@ -75,11 +75,10 @@ echo " - Health: http://localhost:8080/health" echo "" echo "📍 数据库管理:" echo " - pgAdmin: http://localhost:5050" -echo " Email: admin@ocdp.local" -echo " Password: admin" +echo " Email: ${PGADMIN_EMAIL:-admin@ocdp.local}" +echo " Password: ${PGADMIN_PASSWORD:-change-me}" echo "" echo "✨ 按 Ctrl+C 停止服务" echo "" ./bin/ocdp-backend - diff --git a/backend/scripts/test-all-modes.sh b/backend/scripts/test-all-modes.sh index 0366159..79d4a96 100755 --- a/backend/scripts/test-all-modes.sh +++ b/backend/scripts/test-all-modes.sh @@ -87,9 +87,11 @@ test_api() { log_info "测试 API..." # 测试注册 + local test_username="testuser$RANDOM" + local test_password="test123" register_response=$(curl -s -X POST http://localhost:8080/api/v1/auth/register \ -H "Content-Type: application/json" \ - -d '{"username":"testuser'"$RANDOM"'","password":"test123","email":"test@example.com"}') + -d '{"username":"'"$test_username"'","password":"'"$test_password"'","email":"test@example.com"}') if echo "$register_response" | grep -q "id"; then log_success "$mode 模式 API 注册测试通过" @@ -100,7 +102,7 @@ test_api() { # 测试登录 login_response=$(curl -s -X POST http://localhost:8080/api/v1/auth/login \ -H "Content-Type: application/json" \ - -d '{"username":"admin","password":"admin123"}') + -d '{"username":"'"$test_username"'","password":"'"$test_password"'"}') if echo "$login_response" | grep -q "accessToken"; then log_success "$mode 模式 API 登录测试通过" @@ -392,4 +394,3 @@ main() { # 执行主函数 main - diff --git a/database.md b/database.md new file mode 100644 index 0000000..e2291e0 --- /dev/null +++ b/database.md @@ -0,0 +1,598 @@ +# OCDP 数据库结构说明 + +## 概述 + +OCDP (Open Container Deployment Platform) 是一个多租户容器部署平台,支持: +- 多 Workspace 隔离 +- RBAC 权限控制 (Admin / User) +- Kubernetes 集群管理 +- OCI Registry 集成 (Harbor) +- Helm Chart 部署 +- Values 模板版本管理 +- 资源配额控制 +- 审计日志 + +## 数据库配置 + +```yaml +# PostgreSQL 连接信息 +Host: localhost +Port: 5430 (docker) / 5432 (local) +Database: ocdp +User: ocdp +Password: ocdp_password +``` + +--- + +## 表结构 + +### 1. users - 用户表 + +存储用户账户信息,支持多租户和角色管理。 + +```sql +CREATE TABLE users ( + id VARCHAR(36) PRIMARY KEY, + username VARCHAR(255) NOT NULL UNIQUE, + password_hash TEXT NOT NULL, + email VARCHAR(255) NOT NULL, + role VARCHAR(20) NOT NULL DEFAULT 'user', -- 'admin' | 'user' + workspace_id VARCHAR(36), -- 所属工作空间,admin 为 NULL 表示全局 + is_active BOOLEAN NOT NULL DEFAULT TRUE, -- 账户是否激活 + must_change_password BOOLEAN NOT NULL DEFAULT FALSE, -- 首次登录必须修改密码 + revoked_after TIMESTAMP NOT NULL DEFAULT '1970-01-01 00:00:00', -- 全局 Token 撤销时间 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | 550e8400-e29b-41d4-a716-446655440000 | +| username | VARCHAR(255) | 用户名,唯一 | admin | +| password_hash | TEXT | bcrypt 密码哈希 | $2a$10$... | +| email | VARCHAR(255) | 邮箱 | admin@ocdp.local | +| role | VARCHAR(20) | 角色:admin/user | admin | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| is_active | BOOLEAN | 账户是否激活 | true | +| must_change_password | BOOLEAN | 首次登录必须修改密码 | false | +| revoked_after | TIMESTAMP | Token 撤销时间(修改密码后自动撤销旧 Token) | 2024-01-01 10:00:00 | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**索引**: +- `idx_users_username` - 用户名查询 +- `idx_users_role` - 角色筛选 +- `idx_users_workspace_id` - 工作空间筛选 +- `idx_users_is_active` - 激活状态筛选 + +**角色说明**: +- `admin`: 管理员,可管理所有 Workspace 和资源,workspace_id 为 NULL +- `user`: 普通用户,仅可访问自己 Workspace 内的资源 + +--- + +### 2. workspaces - 工作空间表 + +租户/团队隔离单元。 + +```sql +CREATE TABLE workspaces ( + id VARCHAR(36) PRIMARY KEY, + name VARCHAR(255) NOT NULL UNIQUE, + description TEXT, + created_by VARCHAR(36), + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | workspace-uuid | +| name | VARCHAR(255) | 工作空间名称,唯一 | team-alpha | +| description | TEXT | 描述 | Alpha 团队工作空间 | +| created_by | VARCHAR(36) | 创建者用户 ID | user-uuid | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**索引**: +- `idx_workspaces_name` - 名称查询 + +--- + +### 3. workspace_quotas - 工作空间配额表 + +每个 Workspace 的资源配额限制。 + +```sql +CREATE TABLE workspace_quotas ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, + resource_type VARCHAR(50) NOT NULL, -- 'cpu' | 'gpu' | 'gpu_memory' + hard_limit DECIMAL(10,2) NOT NULL, -- 硬限制(0 表示无限制) + soft_limit DECIMAL(10,2) NOT NULL, -- 软限制(警告阈值) + used DECIMAL(10,2) NOT NULL DEFAULT 0, -- 当前使用量 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(workspace_id, resource_type) +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | quota-uuid | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| resource_type | VARCHAR(50) | 资源类型:cpu/gpu/gpu_memory | cpu | +| hard_limit | DECIMAL(10,2) | 硬限制(0=无限制) | 10.00 | +| soft_limit | DECIMAL(10,2) | 软限制(警告阈值) | 8.00 | +| used | DECIMAL(10,2) | 当前使用量 | 5.00 | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**配额检查逻辑**: +1. 部署实例前检查 `used + new_request <= hard_limit` +2. 超过硬限制返回 403 Forbidden +3. 超过软限制发送警告通知 +4. 实例删除后释放配额 + +--- + +### 4. clusters - Kubernetes 集群表 + +管理 Kubernetes 集群连接信息。 + +```sql +CREATE TABLE clusters ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), -- 所属工作空间,NULL 表示全局共享 + owner_id VARCHAR(36), -- 创建者用户 ID + name VARCHAR(255) NOT NULL UNIQUE, + host TEXT NOT NULL, -- Kubernetes API Server URL + ca_data TEXT, -- CA 证书(Base64 编码) + cert_data TEXT, -- 客户端证书(Base64 编码) + key_data TEXT, -- 客户端密钥(Base64 编码) + token TEXT, -- Bearer Token(与证书认证二选一) + description TEXT, + isolation_mode VARCHAR(20) NOT NULL DEFAULT 'namespace', -- 'namespace' | 'cluster' + default_namespace VARCHAR(255), -- 默认 namespace 前缀 + is_shared BOOLEAN NOT NULL DEFAULT FALSE, -- 是否为共享集群 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | cluster-uuid | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| owner_id | VARCHAR(36) | 创建者用户 ID | user-uuid | +| name | VARCHAR(255) | 集群名称,唯一 | prod-k8s | +| host | VARCHAR(255) | Kubernetes API URL | https://k8s.example.com:6443 | +| ca_data | TEXT | CA 证书 Base64 | LS0tLS1... | +| cert_data | TEXT | 客户端证书 Base64 | LS0tLS1... | +| key_data | TEXT | 客户端密钥 Base64 | LS0tLS1... | +| token | TEXT | Bearer Token | eyJhbGci... | +| description | TEXT | 描述 | 生产环境集群 | +| isolation_mode | VARCHAR(20) | 隔离模式:namespace/cluster | namespace | +| default_namespace | VARCHAR(255) | 默认 namespace 前缀 | team-alpha | +| is_shared | BOOLEAN | 是否共享(admin 创建供多 Workspace 使用) | false | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**隔离模式说明**: +- `namespace`: 共享集群模式,多个 Workspace 使用不同 namespace + - 部署时自动分配:`{default_namespace}-{instance_name}` +- `cluster`: 私有集群模式,每个 Workspace 独立集群或独立凭证 + +**认证方式**: +1. 证书认证:`ca_data` + `cert_data` + `key_data` +2. Token 认证:`token` + +--- + +### 5. registries - OCI Registry 表 + +管理 Docker/OCI 镜像仓库(支持 Harbor)。 + +```sql +CREATE TABLE registries ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), -- 所属工作空间,NULL 表示全局共享 + owner_id VARCHAR(36), -- 创建者用户 ID + name VARCHAR(255) NOT NULL UNIQUE, + url TEXT NOT NULL, -- Registry URL + description TEXT, + username VARCHAR(255), -- 认证用户名 + password TEXT, -- 认证密码(加密存储) + insecure BOOLEAN DEFAULT FALSE, -- 是否跳过 TLS 验证 + is_shared BOOLEAN DEFAULT FALSE, -- 是否为共享 Registry + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | registry-uuid | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| owner_id | VARCHAR(36) | 创建者用户 ID | user-uuid | +| name | VARCHAR(255) | Registry 名称,唯一 | harbor-prod | +| url | TEXT | Registry URL | https://harbor.example.com | +| description | TEXT | 描述 | 生产环境 Harbor | +| username | VARCHAR(255) | 认证用户名 | admin | +| password | TEXT | 认证密码(加密) | encrypted... | +| insecure | BOOLEAN | 跳过 TLS 验证 | false | +| is_shared | BOOLEAN | 是否共享 | false | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +--- + +### 6. instances - Helm 实例表 + +部署的 Helm Release 管理。 + +```sql +CREATE TABLE instances ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), -- 所属工作空间 + owner_id VARCHAR(36), -- 创建者用户 ID + cluster_id VARCHAR(36) NOT NULL, + registry_id VARCHAR(36) NOT NULL, + chart_reference_id VARCHAR(36), -- 引用的 Chart 引用 + values_template_id VARCHAR(36), -- 使用的 Values 模板 + + name VARCHAR(255) NOT NULL, -- Helm Release 名称 + namespace VARCHAR(255) NOT NULL, -- Kubernetes 命名空间 + repository TEXT NOT NULL, -- OCI Repository (e.g., charts/app) + chart VARCHAR(255) NOT NULL, -- Chart 名称 + version VARCHAR(255) NOT NULL, -- Chart 版本 + description TEXT, + values JSONB, -- Helm Values (JSON) + values_yaml TEXT, -- Helm Values (YAML) + user_override_yaml TEXT, -- 用户额外覆盖配置 + + status VARCHAR(50) NOT NULL, -- 实例状态 + status_reason TEXT, -- 状态说明 + last_operation VARCHAR(50), -- 最后操作类型 + last_error TEXT, -- 最近错误 + revision INTEGER NOT NULL DEFAULT 1, -- Helm Release Revision + + cpu_requested DECIMAL(10,2) NOT NULL DEFAULT 0, -- CPU 请求量 (cores) + memory_requested VARCHAR(50) NOT NULL DEFAULT '0Mi', -- 内存请求量 + gpu_requested DECIMAL(10,2) NOT NULL DEFAULT 0, -- GPU 请求量 (cards) + gpu_memory_requested VARCHAR(50) NOT NULL DEFAULT '0Mi', -- GPU 内存请求量 + + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT fk_cluster FOREIGN KEY (cluster_id) REFERENCES clusters(id) ON DELETE CASCADE, + CONSTRAINT fk_registry FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE CASCADE, + UNIQUE (cluster_id, name, namespace) +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | instance-uuid | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| owner_id | VARCHAR(36) | 创建者用户 ID | user-uuid | +| cluster_id | VARCHAR(36) | 所属集群 ID | cluster-uuid | +| registry_id | VARCHAR(36) | 所属 Registry ID | registry-uuid | +| chart_reference_id | VARCHAR(36) | Chart 引用 ID | chart-ref-uuid | +| values_template_id | VARCHAR(36) | Values 模板 ID | template-uuid | +| name | VARCHAR(255) | Release 名称(RFC 1123) | my-app | +| namespace | VARCHAR(255) | Kubernetes 命名空间 | team-alpha-my-app | +| repository | TEXT | OCI Repository | harbor.example.com/charts/nginx | +| chart | VARCHAR(255) | Chart 名称 | nginx | +| version | VARCHAR(255) | Chart 版本 | 1.0.0 | +| description | TEXT | 描述 | Nginx 应用 | +| values | JSONB | Values JSON | {"replicas": 2} | +| values_yaml | TEXT | Values YAML | replicas: 2 | +| user_override_yaml | TEXT | 用户覆盖配置 | replicas: 3 | +| status | VARCHAR(50) | 状态 | deployed | +| status_reason | TEXT | 状态说明 | Install complete | +| last_operation | VARCHAR(50) | 最后操作 | install | +| last_error | TEXT | 错误信息 | - | +| revision | INTEGER | Helm Revision | 1 | +| cpu_requested | DECIMAL(10,2) | CPU 请求 | 2.00 | +| memory_requested | VARCHAR(50) | 内存请求 | 1Gi | +| gpu_requested | DECIMAL(10,2) | GPU 请求 | 0 | +| gpu_memory_requested | VARCHAR(50) | GPU 内存 | 0Mi | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**状态说明**: +| 状态 | 说明 | +|------|------| +| deployed | 部署成功 | +| failed | 部署失败 | +| pending-install | 安装中 | +| pending-upgrade | 升级中 | +| pending-rollback | 回滚中 | +| pending-delete | 删除中 | +| uninstalled | 已卸载 | +| superseded | 已被取代 | +| unknown | 未知 | + +--- + +### 7. storage_backends - 存储后端表 + +NFS/PV/HostPath 存储配置。 + +```sql +CREATE TABLE storage_backends ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + owner_id VARCHAR(36), + name VARCHAR(255) NOT NULL, + type VARCHAR(50) NOT NULL, -- 'nfs' | 'pv' | 'hostPath' + config JSONB NOT NULL, -- 存储配置 + description TEXT, + is_default BOOLEAN NOT NULL DEFAULT FALSE, -- 是否默认存储 + is_shared BOOLEAN NOT NULL DEFAULT FALSE, -- 是否共享 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(workspace_id, name) +); +``` + +**Config 结构**: +```json +// NFS +{"nfs": {"server": "192.168.1.100", "path": "/data"}} + +// PV +{"pv": {"storageClassName": "nfs", "capacity": "10Gi", "accessModes": ["ReadWriteMany"]}} + +// HostPath +{"hostPath": {"path": "/mnt/data"}} +``` + +--- + +### 8. chart_references - Chart 引用表 + +管理可用的 Helm Chart 引用。 + +```sql +CREATE TABLE chart_references ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + registry_id VARCHAR(36), + repository VARCHAR(500) NOT NULL, -- OCI repository path + chart_name VARCHAR(255) NOT NULL, + description TEXT, + is_enabled BOOLEAN NOT NULL DEFAULT TRUE, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(workspace_id, registry_id, repository) +); +``` + +--- + +### 9. values_templates - Values 模板表 + +Helm Values 模板,支持版本管理。 + +```sql +CREATE TABLE values_templates ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + owner_id VARCHAR(36), + chart_reference_id VARCHAR(36), + name VARCHAR(255) NOT NULL, + description TEXT, + values_yaml TEXT NOT NULL, + version INTEGER NOT NULL DEFAULT 1, -- 模板版本号 + is_default BOOLEAN NOT NULL DEFAULT FALSE, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(workspace_id, chart_reference_id, name) +); +``` + +**版本管理**: +- 每次更新创建新版本(version + 1) +- 支持回滚到历史版本 + +--- + +### 10. user_config_overrides - 用户配置覆盖表 + +用户个人配置覆盖。 + +```sql +CREATE TABLE user_config_overrides ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + user_id VARCHAR(36), + target_type VARCHAR(50) NOT NULL, -- 'storage' | 'template' | 'global' + target_id VARCHAR(36), + config JSONB NOT NULL, -- 覆盖配置 + priority INTEGER NOT NULL DEFAULT 0, -- 优先级 + is_active BOOLEAN NOT NULL DEFAULT TRUE, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +--- + +### 11. audit_logs - 审计日志表 + +记录所有操作行为。 + +```sql +CREATE TABLE audit_logs ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + user_id VARCHAR(36), + action VARCHAR(100) NOT NULL, -- 'create' | 'update' | 'delete' | 'deploy' | 'scale' + resource_type VARCHAR(50) NOT NULL, -- 'cluster' | 'registry' | 'instance' | ... + resource_id VARCHAR(36), + resource_name VARCHAR(255), + details JSONB, + ip_address VARCHAR(50), + user_agent TEXT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +--- + +### 12. schema_migrations - 迁移版本表 + +数据库版本记录。 + +```sql +CREATE TABLE schema_migrations ( + version VARCHAR(50) PRIMARY KEY, + applied_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +--- + +## ER 关系图 + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ workspaces │ +│ (id, name, description, created_by, created_at, updated_at) │ +└────────────────────────────────────┬────────────────────────────────────┘ + │ 1:N + ┌────────────────────────────┼────────────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌───────────────────┐ ┌───────────────────┐ ┌───────────────────┐ +│ workspace_quotas│ │ clusters │ │ registries │ +│ (workspace_id, │ │ (workspace_id, │ │ (workspace_id, │ +│ resource_type, │ │ owner_id, name, │ │ owner_id, name, │ +│ hard_limit, │ │ host, is_shared) │ │ url, is_shared) │ +│ soft_limit, used)│ └─────────┬─────────┘ └────────┬─────────┘ +└───────────────────┘ │ │ + │ │ + ┌───────────────────────────┼───────────────────────┘ + │ │ + ▼ ▼ +┌───────────────────┐ ┌───────────────────┐ +│ instances │ │ storage_backends│ +│ (workspace_id, │ │ (workspace_id, │ +│ owner_id, │ │ owner_id, name, │ +│ cluster_id, │ │ type, config) │ +│ registry_id, │ └───────────────────┘ +│ values_template) │ +└───────────────────┘ + +┌─────────────────────────────────────────────────────────────────────────┐ +│ users │ +│ (id, username, password_hash, email, role, workspace_id, is_active) │ +└────────────────────────────────────┬────────────────────────────────────┘ + │ + ┌────────────────────────────┼────────────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌───────────────────┐ ┌───────────────────┐ ┌───────────────────┐ +│ chart_references│ │ values_templates │ │ audit_logs │ +│ (workspace_id, │ │ (workspace_id, │ │ (user_id, action,│ +│ registry_id, │ │ owner_id, │ │ resource_type) │ +│ repository) │ │ chart_ref_id) │ └───────────────────┘ +└───────────────────┘ └───────────────────┘ +``` + +--- + +## 资源可见性规则 + +| 用户角色 | 可见范围 | +|---------|---------| +| Admin | 所有 Workspace 的所有资源(workspace_id 为 NULL 或有值都能看到) | +| User | 仅自己 Workspace 的资源 | +| 共享资源 | `is_shared=TRUE` 时,同 Workspace 内可见 | + +--- + +## 常用 SQL 操作 + +### 查询用户及其 Workspace +```sql +SELECT u.id, u.username, u.role, w.name as workspace_name +FROM users u +LEFT JOIN workspaces w ON u.workspace_id = w.id +WHERE u.is_active = TRUE; +``` + +### 查询 Workspace 配额使用情况 +```sql +SELECT w.name as workspace, + q.resource_type, + q.hard_limit, + q.soft_limit, + q.used, + CASE WHEN q.hard_limit > 0 THEN ROUND(q.used / q.hard_limit * 100, 2) ELSE 0 END as usage_percent +FROM workspace_quotas q +JOIN workspaces w ON q.workspace_id = w.id; +``` + +### 查询用户可用的集群 +```sql +-- Admin: 所有集群 +SELECT * FROM clusters; + +-- User: 自己 Workspace 的集群 + 共享集群 +SELECT * FROM clusters +WHERE workspace_id = 'user-workspace-id' + OR is_shared = TRUE; +``` + +### 查询实例状态统计 +```sql +SELECT status, COUNT(*) as count +FROM instances +WHERE workspace_id = 'workspace-id' +GROUP BY status; +``` + +### 查询审计日志 +```sql +SELECT a.created_at, u.username, a.action, a.resource_type, a.resource_name +FROM audit_logs a +JOIN users u ON a.user_id = u.id +WHERE a.workspace_id = 'workspace-id' +ORDER BY a.created_at DESC +LIMIT 50; +``` + +--- + +## 迁移历史 + +| 版本 | 说明 | 日期 | +|------|------|------| +| v1.0.0 | 初始版本(单租户) | 2024-01 | +| v2.0.0-multi-tenant | 多租户迁移:添加 workspaces, quotas, 扩展 users/clusters/registries/instances | 2025-04 | + +--- + +## 初始数据 + +### 创建 Admin 用户 +```sql +-- 默认密码: admin123 (bcrypt hash 需由应用设置) +INSERT INTO users (id, username, password_hash, email, role, workspace_id, is_active, must_change_password) +VALUES ( + '00000000-0000-0000-0000-000000000001', + 'admin', + '$2a$10$placeholder', -- 由应用初始化时设置 + 'admin@ocdp.local', + 'admin', + NULL, -- admin 的 workspace_id 为 NULL,表示全局 + TRUE, + TRUE -- 首次登录必须修改密码 +); +``` \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index b7ce80d..f111f1b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,19 +1,82 @@ # ================================================== -# OCDP Docker Compose (frontend + gateway layer) +# OCDP Docker Compose (complete local stack) # ================================================== # 使用方式: -# docker compose -f docker-compose.yml \ -# -f ./backend/docker-compose.yml \ -# --profile backend up --build -d +# docker compose up --build # # 说明: -# - 本文件只负责前端构建和 Nginx。 -# - Backend / PostgreSQL / pgAdmin 由 backend/docker-compose.yml 提供。 -# - Nginx 统一监听 80/443(默认映射 WEB_HTTP_PORT=80、WEB_HTTPS_PORT=443), +# - 本文件是本地部署主入口,包含 PostgreSQL、Backend、前端构建和 Nginx。 +# - 默认使用高位宿主端口,避免和本机其他项目冲突。 +# - Nginx 统一监听容器内 80/443(默认映射 WEB_HTTP_PORT=18080、WEB_HTTPS_PORT=18443), # 根据路径转发:/api/* → backend,其他路径 → 前端静态文件。 # ================================================== services: + # -------------------------------------------------- + # PostgreSQL 数据库 + # -------------------------------------------------- + postgres: + image: postgres:17-alpine + container_name: ocdp-postgres + restart: unless-stopped + environment: + POSTGRES_DB: ${POSTGRES_DB:-ocdp} + POSTGRES_USER: ${POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} + POSTGRES_INITDB_ARGS: "--encoding=UTF8 --lc-collate=C --lc-ctype=C" + ports: + - "${POSTGRES_PORT:-15432}:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres} -d ${POSTGRES_DB:-ocdp}"] + interval: 10s + timeout: 5s + retries: 30 + start_period: 60s + networks: + - ocdp-network + + # -------------------------------------------------- + # Backend API + # -------------------------------------------------- + backend: + build: + context: ./backend + dockerfile: Dockerfile + args: + GOPROXY: ${GOPROXY:-https://goproxy.cn,direct} + GOSUMDB: ${GOSUMDB:-sum.golang.google.cn} + image: ocdp-backend:latest + container_name: ocdp-backend + restart: unless-stopped + env_file: + - path: ./.env + required: false + format: raw + environment: + ADAPTER_MODE: ${ADAPTER_MODE:-production} + PORT: 8080 + JWT_SECRET: ${JWT_SECRET:-change-me-in-production} + ENCRYPTION_KEY: ${ENCRYPTION_KEY:-change-me-32-bytes-long-key-here} + DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432/${POSTGRES_DB:-ocdp}?sslmode=disable + ports: + - "${BACKEND_PORT:-18081}:8080" + volumes: + - ./config:/app/config:ro + - ./data:/app/data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + depends_on: + postgres: + condition: service_healthy + networks: + - ocdp-network + # -------------------------------------------------- # 构建前端静态资源 (一次性 Job) # -------------------------------------------------- @@ -57,15 +120,17 @@ services: depends_on: frontend-build: condition: service_completed_successfully + backend: + condition: service_healthy ports: - - "${WEB_HTTP_PORT:-80}:80" - - "${WEB_HTTPS_PORT:-443}:443" + - "${WEB_HTTP_PORT:-18080}:80" + - "${WEB_HTTPS_PORT:-18443}:443" volumes: - frontend_dist:/usr/share/nginx/html:ro - ./infra/nginx/default.conf:/etc/nginx/conf.d/default.conf:ro - ./infra/nginx/certs:/etc/nginx/certs:ro healthcheck: - test: ["CMD-SHELL", "wget -qO- http://localhost/healthz || exit 1"] + test: ["CMD-SHELL", "wget -qO- http://127.0.0.1/healthz || exit 1"] interval: 30s timeout: 5s retries: 5 @@ -84,6 +149,8 @@ networks: # Volumes # ================================================== volumes: + postgres_data: + name: ocdp-postgres-data frontend_dist: driver: local frontend_node_modules: diff --git a/docs/bug-report.md b/docs/bug-report.md new file mode 100644 index 0000000..4be45a3 --- /dev/null +++ b/docs/bug-report.md @@ -0,0 +1,164 @@ +# OCDP 系统测试 Bug 报告 + +**测试日期:** 2026-05-11 +**测试环境:** http://10.6.80.114:18080 +**集群:** k3s (dbf824f1-9962-4d8e-881e-870c75fdb6f5), k8s (23880994-dfe4-48d0-abc0-b49692cc630a) +**Harbor:** harbor.bwgdi.com (83b823af-873b-457c-912c-9ccde3cb12e6) + +--- + +## 测试团队 +| Agent | 角色 | 账号 | +|-------|------|------| +| user-a-agent | 前端 UI 测试 | test-user-a / TestUserA123! | +| user-b-agent | API/部署测试 | test-user-b / TestUserB123! | +| user-c-agent | 权限隔离测试 | test-user-c / TestUserC123! | +| security-agent | 安全测试 | admin + 普通用户 | + +--- + +## Bug 列表 (按严重度排序) + +### P0 - Blocker (核心功能不可用) + +| ID | 标题 | 发现者 | 页面/端点 | 描述 | +|----|------|--------|-----------|------| +| BUG-001 | **Launch 按钮点击无任何反应** | user-a | `/artifact/registries` (TagCard) | Chart Browser 中 TagCard 的 "Launch" 按钮显示为可用状态 (`is_enabled() == True`),但点击后无任何效果:不弹出 Launch Modal,无 URL 变化,无控制台错误。**核心"一键部署"流程完全阻塞** | +| BUG-002 | **SPA 直接路由返回空白页面** | user-a | `/clusters`, `/registries`, `/monitoring`, `/launch` | 直接访问 SPA 旧路由时只渲染 `
` 空壳,React SPA 无法挂载。代码中已定义 redirect 映射但未生效(如 `/clusters` → `/configuration/clusters`) | + +### P1 - 高 (High) + +| ID | 标题 | 发现者 | 页面/端点 | 描述 | +|----|------|--------|-----------|------| +| BUG-003 | DELETE 实例返回 404 但实际成功删除 | user-b, user-c | `DELETE /clusters/{id}/instances/{id}` | 删除操作正确触发 `pending-delete` 状态转换,但 HTTP 返回 **404**(空 body),非预期 202/204。客户端误判为失败 | +| BUG-004 | DELETE 实例返回空响应体 | user-b | `DELETE /clusters/{id}/instances/{id}` | 用正确的 token 和 ID 请求,返回空 body(无 JSON),前端解析会失败 | + +### P2 - 中 (Medium) + +| ID | 标题 | 发现者 | 页面/端点 | 描述 | +|----|------|--------|-----------|------| +| BUG-005 | Tags 专用端点缺失 | user-b | `GET /registries/{id}/repositories/{repo}/tags` | 端点未实现,返回纯文本 "404 page not found"。虽可通过 `/artifacts` 获取 tag,但 API 不完整 | +| BUG-006 | 跨用户 namespace 部署时静默覆盖 | user-c | `POST /clusters/{id}/instances` | 用户请求部署到其他用户的 namespace 时,服务端静默使用自己的 namespace,返回 200 且无任何警告或提示 | +| BUG-007 | Clusters Metrics API 缺失 | user-b | `GET /monitoring/clusters/{id}/metrics` | 监控页面可能需要的数据端点未实现(404) | +| BUG-008 | Cluster Stats API 缺失 | user-b | `GET /clusters/{id}/stats` | 统计端点未实现(404) | +| BUG-009 | Kubeconfig API 缺失 | user-b | `GET /clusters/{id}/kubeconfig` | kubeconfig 签发端点未实现(404) | +| BUG-010 | "Launch" 按钮缺乏可访问性标识 | user-a | TagCard "Launch" | Chart 上的 "Launch" 按钮无 `aria-label`,与侧边栏 "Launch Instance" 导航项标签冲突,屏幕阅读器用户无法区分 | + +### P3 - 低 (Low) + +| ID | 标题 | 发现者 | 页面/端点 | 描述 | +|----|------|--------|-----------|------| +| BUG-011 | API 响应格式不一致 | user-b | 列表 API | Clusters/Registries 返回裸数组,Instances 返回 `{ "instances": [...], "total": N }` 包装对象 | +| BUG-012 | `/auth/me` 返回空的 token 字段 | user-b | `GET /auth/me` | 响应中包含 `"accessToken": ""` 和 `"refreshToken": ""` 空字段,复用了 login 响应 DTO 未清理 | +| BUG-013 | 登录接口存在用户枚举漏洞 | security | `POST /auth/login` | 不存在用户返回 "user not found",存在用户返回 "invalid password",攻击者可枚举有效用户名 | +| BUG-014 | 登录接口无速率限制 | security | `POST /auth/login` | 10 次连续请求全部返回 401,无 429 限流或锁定 | +| BUG-015 | Nginx 版本信息泄露 | security | HTTP Headers | `Server: nginx/1.27.5` 暴露精确版本号 | +| BUG-016 | CORS 配置过于宽松 | security | All API | `Access-Control-Allow-Origin: *` 允许任意跨域请求 | +| BUG-017 | 缺少安全响应头 | security | All pages | 缺少 HSTS、X-Frame-Options、Content-Security-Policy 等 | +| BUG-018 | `/health` 端点返回 SPA HTML | security | `GET /health` | 健康检查返回完整 index.html,非 JSON 状态响应 | + +--- + +## 分类汇总 + +### 前端 Bug +| ID | 描述 | 严重度 | +|----|------|--------| +| BUG-001 | Launch 按钮无反应(核心功能阻塞) | P0 🔴 | +| BUG-002 | SPA 路由空白页 | P0 🔴 | +| BUG-010 | Launch 按钮缺少 aria-label | P2 🟡 | + +### 后端 API Bug +| ID | 描述 | 严重度 | +|----|------|--------| +| BUG-003 | DELETE 返回 404 | P1 🟠 | +| BUG-004 | DELETE 空 body | P1 🟠 | +| BUG-005 | Tags 端点缺失 | P2 🟡 | +| BUG-007 | Metrics API 缺失 | P2 🟡 | +| BUG-008 | Stats API 缺失 | P2 🟡 | +| BUG-009 | Kubeconfig API 缺失 | P2 🟡 | +| BUG-011 | 响应格式不一致 | P3 🔵 | +| BUG-012 | auth/me 空 token 字段 | P3 🔵 | +| BUG-018 | /health 返回 HTML | P3 🔵 | + +### 安全/权限 Bug +| ID | 描述 | 严重度 | +|----|------|--------| +| BUG-006 | Namespace 静默覆盖(安全但令人困惑) | P2 🟡 | +| BUG-013 | 用户枚举(错误消息差异) | P3 🔵 | +| BUG-014 | 无速率限制 | P3 🔵 | +| BUG-015 | Nginx 版本泄露 | P3 🔵 | +| BUG-016 | CORS Origin: * | P3 🔵 | +| BUG-017 | 缺少安全响应头 | P3 🔵 | + +### 严重度分布 +| 级别 | 数量 | +|------|------| +| P0 (Blocker) | 2 | +| P1 (High) | 2 | +| P2 (Medium) | 6 | +| P3 (Low) | 8 | +| **合计** | **18** | + +--- + +## 测试通过项 + +### 认证 +- [x] 有效凭据登录 (admin + 所有 test-user) +- [x] 无效凭据返回 401 +- [x] 无 token 访问被保护 API 返回 401 +- [x] 无效/篡改 JWT token 全部被拒绝 +- [x] /auth/me 返回正确的用户信息 +- [x] JWT payload 包含角色、权限、namespace + +### Cluster / Registry API +- [x] 集群列表正常返回 +- [x] 集群健康检查正常 +- [x] Registry 列表正常返回 +- [x] 通过 artifacts 端点浏览 repository 正常 +- [x] 无效 registry/repository 返回恰当错误 + +### 权限隔离 +- [x] GET /users 返回 403 (普通用户) +- [x] POST /auth/register 返回 403 (普通用户) +- [x] 用户无法访问其他用户的 workspace 资源 +- [x] 用户无法部署到其他用户的 Kubernetes namespace +- [x] 安全架构:核心认证/授权/脱敏/隔离控制均正确实现 + +### 实例部署生命周期 +- [x] 实例创建操作成功(pending-install) +- [x] 实例状态正确追踪(pending-install → deployed) +- [x] 实例删除正确转换状态(pending-delete → 消失) +- [x] 实例列表按 clusterId 正确过滤 + +### 安全测试通过项 +- [x] XSS/SQLi 注入安全处理 +- [x] 路径遍历攻击被阻止 +- [x] JWT alg=none/无效格式被拒绝 +- [x] 集群凭据和 Registry 密码脱敏显示 (••••••••) +- [x] 自注册端点需认证 (401) + +--- + +## 建议修复优先级 + +### 立即修复 (P0) +1. **BUG-001**: 调查 Launch 按钮 onClick handler — TagCard 组件中 `onLaunch` prop 未正确传递给 LaunchModal,或 launch 状态 / artifactType 检查阻止了 modal 打开 +2. **BUG-002**: 检查 React Router `` 组件和 SPA 的 index.html 配置,确保旧路由正确重定向 + +### 尽快修复 (P1) +3. **BUG-003/004**: InstanceHandler.Delete 应返回 202 Accepted + `{"status":"deleting"}` 而非 404+空 body + +### 短期修复 (P2) +4. 实现 `/metrics`, `/stats` 等缺失 API +5. Launch 按钮添加 `aria-label` 属性 +6. Namespace 覆盖时返回警告或 403 + +### 安全加固 (P3) +7. 登录错误消息统一为 "Invalid username or password" +8. 实现速率限制 +9. Nginx 安全加固:`server_tokens off` + 安全响应头 +10. CORS 收紧为具体域名 +11. 修复 `/health` 端点 +12. 统一 API 响应格式 diff --git a/docs/bugs-user-a.md b/docs/bugs-user-a.md new file mode 100644 index 0000000..a3e4a66 --- /dev/null +++ b/docs/bugs-user-a.md @@ -0,0 +1,92 @@ +# OCDP Platform QA Report - test-user-a + +**Date:** 2026-05-11 +**Environment:** http://10.6.80.114:18080 +**User:** test-user-a (non-admin) + +## Summary + +- **Total Bugs Found:** 3 +- **Screenshots Taken:** 12 +- **Test Status:** 7/8 areas covered, 1 blocked (Launch button non-functional) + +--- + +## Bug List + +### Bug #1: Direct SPA Routes Return Empty Pages (🔴 HIGH) + +- **Page:** Multiple — `/clusters`, `/registries`, `/monitoring`, `/launch` +- **Action:** Navigate directly to these URLs +- **Actual:** Returns only the React `
` shell with no rendered content (~0 chars body text). The SPA fails to mount when hitting these routes directly. +- **Expected:** Should either render content or redirect to correct working routes: + - `/clusters` → `/configuration/clusters` + - `/registries` → `/configuration/registries` + - `/monitoring` → `/monitoring/clusters` + - `/launch` → `/artifact/registries` +- **Severity:** HIGH — Users who bookmark or type these URLs see blank pages +- **Screenshot:** `01-login` (representative of empty state) + +**Working routes for reference:** +- `/configuration/clusters` ✅ +- `/configuration/registries` ✅ +- `/monitoring/clusters` ✅ +- `/artifact/registries` ✅ +- `/artifact/instances` ✅ + +--- + +### Bug #2: Launch Button Does Nothing When Clicked (🔴 HIGH) + +- **Page:** Chart Browser (`/artifact/registries`) +- **Action:** + 1. Navigate to `/artifact/registries` + 2. Registry `harbor-bwgdi` loads with 13 charts + 3. Expand `charts/chromadb` folder + 4. Tag `0.1.4` appears with "Launch" and "Copy" buttons + 5. Click the "Launch" button +- **Actual:** No visible reaction — no modal opens, no URL change, no console error. The button is not disabled (no `disabled` attribute, no `aria-disabled`), is visibly styled as active (`bg-blue-50 text-blue-700 border-blue-200 shadow-sm`), and Playwright confirms `is_enabled() == True`. The React onClick handler produces no observable effect. +- **Expected:** Clicking "Launch" on a chart tag should open a deployment form/dialog with cluster selector, instance name, namespace, and values configuration fields. +- **Severity:** HIGH — Core platform feature (deploying Helm charts) is completely blocked +- **Screenshot:** `04-chart-expanded` + +--- + +### Bug #3: Ambiguous "Launch" Button Labels (🟡 MEDIUM) + +- **Page:** Chart Browser (`/artifact/registries`) +- **Action:** Inspect button accessible names +- **Actual:** Both the sidebar navigation item "Launch Instance" and the chart action button "Launch" appear on the same page. The chart action button has no distinguishing `aria-label` or accessible description. The "Copy" button next to it has a `title="Copy pull command"` attribute, but "Launch" does not. +- **Expected:** The chart action should have a descriptive label like `aria-label="Launch chart chromadb version 0.1.4"` to differentiate from the nav item. +- **Severity:** MEDIUM — Accessibility concern; minor confusion for sighted users with multiple "Launch" targets + +--- + +## Test Results by Area + +| Area | Status | Notes | +|------|--------|-------| +| Login | ✅ PASS | test-user-a login successful, redirect to `/home` | +| Home Page | ✅ PASS | All cards visible, nav clicks work, no Users section | +| Sidebar Nav | ✅ PASS | All 6 items navigate correctly, Users hidden | +| Chart Browser | ❌ BLOCKED | Registry loads, charts expand, but **Launch button dead** | +| Instances | ✅ PASS | Empty state, filter, refresh all work | +| Monitoring | ✅ PASS | 2 clusters, health data, CPU/Memory/GPU stats all load | +| Config - Clusters | ✅ PASS | Both clusters listed, Add form opens | +| Config - Registries | ✅ PASS | Harbor registry listed, Add form opens | +| Direct Routes | ❌ FAIL | 4 routes return empty pages | + +## Screenshots + +- `01-login` → `/tmp/ocdp-qa-screenshots/01-login.png` +- `02-home` → `/tmp/ocdp-qa-screenshots/02-home.png` +- `02-home-full` → `/tmp/ocdp-qa-screenshots/02-home-full.png` +- `04-chart-browser` → `/tmp/ocdp-qa-screenshots/04-chart-browser.png` +- `04-chart-expanded` → `/tmp/ocdp-qa-screenshots/04-chart-expanded.png` +- `04-launch-modal` → `/tmp/ocdp-qa-screenshots/04-launch-modal.png` +- `05-instances` → `/tmp/ocdp-qa-screenshots/05-instances.png` +- `06-monitoring` → `/tmp/ocdp-qa-screenshots/06-monitoring.png` +- `07-clusters` → `/tmp/ocdp-qa-screenshots/07-clusters.png` +- `07-add-cluster-form` → `/tmp/ocdp-qa-screenshots/07-add-cluster-form.png` +- `08-registries` → `/tmp/ocdp-qa-screenshots/08-registries.png` +- `08-add-registry-form` → `/tmp/ocdp-qa-screenshots/08-add-registry-form.png` diff --git a/docs/bugs-user-b.md b/docs/bugs-user-b.md new file mode 100644 index 0000000..093ed1c --- /dev/null +++ b/docs/bugs-user-b.md @@ -0,0 +1,149 @@ +# Bug Report: test-user-b QA Test + +**Tester:** test-user-b (user role) +**Date:** 2026-05-11 +**Environment:** http://10.6.80.114:18080 + +--- + +## Bug 1: Repository Tags Endpoint Returns 404 + +**Endpoint:** `GET /api/v1/registries/{registryId}/repositories/{repository}/tags` +**Status Code:** 404 +**Response Body:** `404 page not found` (plain text, not JSON) + +**Expected:** Should return a list of tags for the chart/artifact. +**Actual:** The dedicated tags endpoint is not implemented or routes incorrectly. The artifacts endpoint (`/repositories/{repository}/artifacts`) does work and returns tag info. + +**Severity:** Medium — tags are still discoverable via artifacts endpoint but the dedicated tags API is broken. + +--- + +## Bug 2: DELETE Instance Returns Empty Response Body + +**Endpoint:** `DELETE /api/v1/clusters/{clusterId}/instances/{instanceId}` +**Status Code:** 200 +**Response Body:** (empty — no content at all) + +**Expected:** Should return a confirmation JSON body (e.g., `{"message": "Instance deletion initiated", "id": "..."}`) or at minimum a 202 Accepted with status details. + +**Actual:** Returns a completely empty body. The instance does transition to `pending-delete` state, but the API consumer receives no feedback. + +**Severity:** Medium — operation works but API consumer gets no confirmation. + +--- + +## Bug 3: Cluster Stats Endpoint Returns 404 + +**Endpoint:** `GET /api/v1/clusters/{clusterId}/stats` +**Status Code:** 404 +**Response Body:** `404 page not found` (plain text) + +**Expected:** Should return cluster resource statistics (CPU, memory, pod counts, etc.) or a proper JSON error if not implemented. + +**Actual:** Endpoint is not implemented — returns a raw 404 with no JSON error structure. + +**Severity:** Low — but given the user has `monitoring:clusters:view` permission, this is a missing feature. + +--- + +## Bug 4: Kubeconfig Endpoint Returns 404 + +**Endpoint:** `GET /api/v1/clusters/{clusterId}/kubeconfig` +**Status Code:** 404 +**Response Body:** `404 page not found` (plain text) + +**Expected:** Should return a kubeconfig file content or JSON error. User has `kubeconfig:issue:own` permission. + +**Actual:** Endpoint is not implemented. + +**Severity:** Low — the permission exists but the endpoint does nothing. + +--- + +## Bug 5: Monitoring Metrics Endpoint Returns 404 + +**Endpoint:** `GET /api/v1/monitoring/clusters/{clusterId}/metrics` +**Status Code:** 404 +**Response Body:** `404 page not found` (plain text) + +**Expected:** Monitoring metrics data. User has `monitoring:clusters:view` permission. + +**Actual:** Endpoint not found. + +**Severity:** Low — monitoring permissions exist but backend endpoints missing. + +--- + +## Bug 6: Inconsistent API Response Format (Array vs Object Wrapper) + +**Clusters and Registries** return bare arrays: +```json +[ + { "id": "...", "name": "k3s", ... } +] +``` + +**Instances** returns an object wrapper: +```json +{ + "instances": [ + { "id": "...", "name": "test-nginx-b", ... } + ], + "total": 1 +} +``` + +**Expected:** Consistent response format across all list endpoints. Either all return bare arrays or all use the `{ "items": [...], "total": N }` wrapper pattern. + +**Severity:** Low — API consistency issue. Makes client code harder to write generically. + +--- + +## Bug 7: auth/me Returns Empty Token Fields + +**Endpoint:** `GET /api/v1/auth/me` +**Response includes empty/unpopulated fields:** +```json +{ + "accessToken": "", + "refreshToken": "", + ... +} +``` + +**Expected:** Either remove these fields from the `/auth/me` response (they are only meaningful in login/refresh responses) or populate them with valid values. + +**Actual:** Emptry string values for both token fields create confusion about whether they should be present. + +**Severity:** Low — cosmetic issue, but suggests the DTO is reusing the login response struct without clearing token fields. + +--- + +## Summary + +| # | Bug | Severity | Category | +|---|-----|----------|----------| +| 1 | Tags endpoint 404 | Medium | Missing Implementation | +| 2 | DELETE returns empty body | Medium | API Response Quality | +| 3 | Cluster stats endpoint 404 | Low | Missing Implementation | +| 4 | Kubeconfig endpoint 404 | Low | Missing Implementation | +| 5 | Monitoring metrics endpoint 404 | Low | Missing Implementation | +| 6 | Inconsistent list response format | Low | API Consistency | +| 7 | auth/me returns empty tokens | Low | API Response Quality | + +**Passed Tests:** +- Login/authentication ✓ +- Auth/me user info ✓ +- Cluster listing ✓ +- Cluster health check ✓ +- Registry listing ✓ +- Repository browsing (artifacts) ✓ +- Instance deployment (nginx chart) ✓ +- Instance status tracking (pending-install → deployed) ✓ +- Instance deletion (async, transitions to pending-delete then removed) ✓ +- Error handling for invalid repository ✓ +- Error handling for missing required fields ✓ +- Auth rejects invalid tokens ✓ +- Auth rejects missing tokens ✓ +- Instance cleanup confirmed ✓ diff --git a/docs/bugs-user-c.md b/docs/bugs-user-c.md new file mode 100644 index 0000000..45164f9 --- /dev/null +++ b/docs/bugs-user-c.md @@ -0,0 +1,109 @@ +# QA Report: Permission Isolation & Multi-Tenancy Testing — test-user-c + +**Tester:** test-user-c (role: `user`) +**Date:** 2026-05-11 +**Environment:** http://10.6.80.114:18080 + +## Summary + +Test-user-c is a standard `user` role with namespace `ocdp-u-test-c`, workspace `71459030-7166-4c79-b53c-81c61da4c313`. Permissions follow the `manage_own` / `view` pattern — no admin-level permissions. + +--- + +## Test Results + +### 1. Login & Basic Access ✅ + +| Test | Result | Notes | +|------|--------|-------| +| POST /auth/login | ✅ Pass | Token issued, role=`user`, workspace/namespace correctly assigned | +| GET /auth/me | ✅ Pass | Returns correct user profile with permissions | +| GET /clusters | ✅ Pass | Sees all `global_shared` clusters (k8s, k3s) | +| GET /registries | ✅ Pass | Sees all `global_shared` registries (harbor) | + +### 2. Admin Endpoint Protection + +| Test | Result | Notes | +|------|--------|-------| +| GET /api/v1/users | ✅ **403 Forbidden** | Properly blocked — `permission denied` | +| POST /auth/register | ✅ **403 Forbidden** | Cannot register new users as non-admin | +| GET /api/v1/admin/* | ✅ **404** | Admin route prefix doesn't exist (not a bypass risk) | + +### 3. Frontend Access + +| Test | Result | Notes | +|------|--------|-------| +| GET /configuration/users | ⚠️ **200 (OK)** | SPA returns index.html — expected. Auth is enforced via API, not routes. | +| GET /configuration/clusters | ⚠️ **200 (OK)** | Same — SPA behavior. | +| GET /configuration/registries | ⚠️ **200 (OK)** | Same. | + +**Risk: Low.** This is standard SPA behavior. Authorization is enforced at the API level. However, if the frontend relies solely on hiding UI elements rather than checking permissions, users who manually navigate could see empty/error states. + +### 4. Namespace Isolation Enforcement + +| Test | Result | Notes | +|------|--------|-------| +| Deploy with `namespace: ocdp-u-test-a` | ⚠️ **Silently overridden** | Server ignored requested namespace and used `ocdp-u-test-c` instead. **No warning or error returned.** | +| PATCH to change namespace | ✅ **404** | PATCH endpoint doesn't exist — namespace cannot be changed after creation | + +🔴 **Bug: Silent namespace override (Low severity)** +When a user specifies a namespace that doesn't belong to them in the instance creation request, the server silently overrides it with the user's own namespace. This is secure (prevents cross-namespace deployment) but: +- The user receives HTTP 200 with the overridden value — no indication that their request was modified +- The response does not differentiate between "user's own namespace" and "requested namespace" +- This could lead to user confusion about where their resources were actually deployed +- It's unclear whether the user's Helm values also get silently overridden (e.g., the `values.namespace` field) + +### 5. Resource Isolation + +| Test | Result | Notes | +|------|--------|-------| +| GET instances with other workspaceId query param | ✅ **Isolated** | Returns only own instances (workspaceId filter is server-enforced) | +| DELETE on own instance | ⚠️ **Async deletion** | Returns HTTP 404 on DELETE itself, but instance transitions to `pending-delete` then disappears | + +🔴 **Bug: DELETE returns 404 on success (Medium severity)** +When deleting an instance via `DELETE /clusters/{clusterId}/instances/{instanceId}`: +- The instance transitions to `pending-delete` status +- But the HTTP response status code is **404** rather than 200/202/204 +- The first raw DELETE call returns an empty body (causing JSON parse errors) +- This is an API inconsistency — async deletions should return HTTP 202 Accepted + +### 6. Monitoring & Other Endpoints + +| Test | Result | Notes | +|------|--------|-------| +| GET /monitoring/clusters/.../pods | ✅ **404** | Monitoring endpoints not implemented for this cluster type | +| POST /kubeconfig | ✅ **404** | Kubeconfig endpoint not implemented | + +These endpoints return 404 which is acceptable behavior for features not yet implemented. + +--- + +## Security Assessment + +### Works as Intended ✅ +- Admin endpoints (`/users`, `/auth/register`) properly return 403 +- User cannot access other users' instances via workspaceId manipulation +- User cannot deploy into other users' Kubernetes namespaces +- No PATCH/PUT verbs available to modify existing instance namespaces +- No admin-specific route paths leak data + +### Bugs Found + +1. **DELETE returns 404 on successful async deletion** (Medium) + - Endpoint: `DELETE /clusters/{id}/instances/{id}` + - After call, instance status becomes `pending-delete` and eventually disappears + - But the HTTP response is `404` with empty body + - Expected: `202 Accepted` with a `status: "deleted"` or similar response + - Risk: Clients interpreting HTTP 404 as "not found" will retry or report errors incorrectly + +2. **Silent namespace override without user feedback** (Low) + - Endpoint: `POST /clusters/{id}/instances` + - When requesting deployment into another user's namespace, the server silently uses the caller's namespace + - No warning, no error, no indication in the response + - Expected: Either `403 Forbidden` with "cannot deploy into namespace owned by another user" or a response field indicating the override occurred + - Risk: Low for security (the override correctly prevents cross-tenant deployment), but could cause user confusion + +### No Critical Vulnerabilities Found +- No privilege escalation vectors identified +- No data leakage across workspaces +- No ability to access or manipulate other users' resources diff --git a/docs/security/bugs-security.md b/docs/security/bugs-security.md new file mode 100644 index 0000000..bdba1a2 --- /dev/null +++ b/docs/security/bugs-security.md @@ -0,0 +1,284 @@ +# OCDP Security Audit Report + +**Date:** 2026-05-11 +**Target:** http://10.6.80.114:18080 +**API Base:** http://10.6.80.114:18080/api/v1 + +--- + +## Finding 1: User Enumeration via Login Error Messages + +| Field | Value | +|-------|-------| +| **Test** | Authentication Error Disclosure | +| **Severity** | **Medium** | +| **Endpoint** | `POST /api/v1/auth/login` | +| **Status** | Confirmed | + +### What I Did + +```bash +# Non-existent user +curl -s -X POST http://10.6.80.114:18080/api/v1/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"nonexistent_user_xyz","password":"test123"}' + +# Existing user with wrong password +curl -s -X POST http://10.6.80.114:18080/api/v1/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"wrongpassword"}' +``` + +### Expected + +Both requests should return the same generic error message (e.g., "Invalid credentials") to prevent username enumeration. + +### Actual + +- Non-existent user: `{"error":"Login failed","message":"user not found","code":401}` +- Existing user: `{"error":"Login failed","message":"invalid password","code":401}` + +The error messages are different, allowing an attacker to determine whether a username exists in the system. + +### Impact + +An attacker can enumerate valid usernames by observing the error message difference. This is the first step in a targeted brute force or credential stuffing attack. + +### Recommendation + +Return identical error messages for both cases, e.g., `"Invalid username or password"`. + +--- + +## Finding 2: No Rate Limiting on Login Endpoint + +| Field | Value | +|-------|-------| +| **Test** | Brute Force Protection | +| **Severity** | **Medium** | +| **Endpoint** | `POST /api/v1/auth/login` | +| **Status** | Confirmed | + +### What I Did + +```bash +for i in $(seq 1 10); do + curl -s -o /dev/null -w "%{http_code}" \ + -X POST http://10.6.80.114:18080/api/v1/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"wrongpassword"}' +done +``` + +### Expected + +After a threshold (e.g., 5 failed attempts), the server should return HTTP 429 Too Many Requests or temporarily lock the account. + +### Actual + +All 10 rapid sequential attempts returned HTTP 401. No rate limiting, no account lockout, no progressive delay. + +### Impact + +An attacker can brute force passwords without restriction. Combined with Finding 1 (user enumeration), the attack surface is increased. + +### Recommendation + +- Implement rate limiting on the login endpoint (e.g., max 5 attempts per minute per IP). +- Consider account lockout after N failed attempts. +- Add progressive response delays after repeated failures. + +--- + +## Finding 3: Server Version Disclosure + +| Field | Value | +|-------|-------| +| **Test** | Information Disclosure | +| **Severity** | **Low** | +| **Endpoint** | All (HTTP response headers) | +| **Status** | Confirmed | + +### What I Did + +```bash +curl -s -D - http://10.6.80.114:18080/ | head -10 +``` + +### Expected + +Server header should be generic (e.g., `Server: nginx`) or removed entirely. + +### Actual + +```http +Server: nginx/1.27.5 +``` + +### Impact + +Knowing the exact nginx version helps attackers target known vulnerabilities for that specific version. + +### Recommendation + +Disable or obfuscate the Server header in nginx configuration: + +```nginx +server_tokens off; +``` + +--- + +## Finding 4: Permissive CORS Policy + +| Field | Value | +|-------|-------| +| **Test** | CORS Misconfiguration | +| **Severity** | **Low** | +| **Endpoint** | All API endpoints | +| **Status** | Confirmed | + +### What I Did + +```bash +curl -s -D - http://10.6.80.114:18080/api/v1/auth/login \ + -X POST -H "Content-Type: application/json" \ + -d '{"username":"test","password":"test"}' +``` + +### Expected + +CORS `Access-Control-Allow-Origin` should be restricted to the application's origin (e.g., `http://10.6.80.114:18080`) rather than allowing all origins. + +### Actual + +```http +Access-Control-Allow-Origin: * +Access-Control-Allow-Credentials: true +Access-Control-Allow-Methods: GET, POST, PUT, DELETE, OPTIONS +Access-Control-Allow-Headers: Content-Type, Authorization, X-Requested-With +Access-Control-Max-Age: 86400 +``` + +### Impact + +Any website can make cross-origin requests to the API. If a user is logged in, a malicious site could potentially make authenticated API calls on their behalf (CSRF-style attack, though mitigated by the Bearer token requirement). + +### Recommendation + +Restrict `Access-Control-Allow-Origin` to the specific frontend origin(s) instead of `*`. + +--- + +## Finding 5: Missing Security Headers + +| Field | Value | +|-------|-------| +| **Test** | Security Headers Audit | +| **Severity** | **Low** | +| **Endpoint** | All | +| **Status** | Confirmed | + +### What I Did + +```bash +curl -s -D - http://10.6.80.114:18080/ | head -20 +``` + +### Expected + +Security headers should include: +- `Strict-Transport-Security` +- `X-Content-Type-Options: nosniff` +- `X-Frame-Options: DENY` +- `Content-Security-Policy` + +### Actual + +None of these security headers are present in responses. + +### Impact + +Increases attack surface for clickjacking, MIME-type confusion, and XSS attacks. + +### Recommendation + +Add the following headers to nginx configuration: + +``` +add_header X-Frame-Options "DENY" always; +add_header X-Content-Type-Options "nosniff" always; +add_header X-XSS-Protection "0" always; +add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; +add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline';" always; +``` + +--- + +## Finding 6: `/health` Endpoint Returns HTML Instead of Health Status + +| Field | Value | +|-------|-------| +| **Test** | Health Endpoint Behavior | +| **Severity** | **Low** | +| **Endpoint** | `GET /health` | +| **Status** | Confirmed | + +### What I Did + +```bash +curl -s http://10.6.80.114:18080/health +``` + +### Expected + +A health check endpoint should return a structured JSON response (e.g., `{"status":"healthy"}`) with HTTP 200. + +### Actual + +Returns the full `index.html` SPA page with HTTP 200: + +```html + + + + OCDP Platform + ... +``` + +### Impact + +Not a direct vulnerability, but misconfigured health checks can cause false positives in monitoring/load balancer health checks. It also means the SPA is served at `/health`, which is unexpected. + +### Recommendation + +Implement a dedicated health endpoint that returns `{"status":"ok"}` with appropriate content type, or remove the `/health` route if not needed. + +--- + +## Tests Passed (No Issues Found) + +| Test | Result | +|------|--------| +| **1. Unauthenticated Access** | **PASS** - All business endpoints return 401 | +| **2. JWT Token Manipulation** | **PASS** - Tampered tokens, alg=none, invalid formats all rejected (401) | +| **3. XSS/SQLi Testing** | **PASS** - Script injection, SQLi patterns safely handled | +| **4. IDOR - Instance Access** | **PASS** - No instances deployed to test; cluster/registry isolation confirmed working | +| **5. Sensitive Data Masking** | **PASS** - Cluster certs/keys and registry passwords masked as `••••••••` | +| **6. Self-Registration** | **PASS** - Registration endpoint requires authentication (401) | +| **7. Path Traversal** | **PASS** - Path traversal attempts return index.html (not /etc/passwd) | +| **8. Admin Permission Escalation** | **PASS** - Regular users blocked from admin endpoints (403) | + +--- + +## Summary + +| Severity | Count | Findings | +|----------|-------|----------| +| Critical | 0 | — | +| High | 0 | — | +| **Medium** | **2** | User enumeration, No rate limiting | +| **Low** | **4** | Server version disclosure, Permissive CORS, Missing security headers, `/health` returns HTML | +| **Total** | **6** | | + +The platform's core security controls (authentication, JWT validation, authorization, sensitive data masking) are properly implemented. The main areas for improvement are authentication hardening (rate limiting, user enumeration) and HTTP security hardening (headers, CORS). diff --git a/docs/test-scenarios.md b/docs/test-scenarios.md new file mode 100644 index 0000000..dccdfa0 --- /dev/null +++ b/docs/test-scenarios.md @@ -0,0 +1,1640 @@ +# OCDP Test Scenarios + +> **Platform**: OCDP (Open Cloud Deployment Platform) +> **Deployed at**: http://10.6.80.114:18080 +> **Scope**: Full-stack test scenarios covering authentication, configuration, artifact browser, instance lifecycle, monitoring, user management, multi-tenancy, UI/UX, data persistence, security, and edge cases. + +--- + +## Category 1: Authentication & Authorization (25+ cases) + +### AUTH-001 — Login with valid credentials +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin account exists in the system | +| **Steps** | 1. Navigate to `/`
2. Enter valid username and password
3. Click "Login" | +| **Expected Result** | User is authenticated, redirected to `/home`, token stored in localStorage/session, toast "Welcome, [username]!" displayed | + +### AUTH-002 — Login with incorrect password +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Valid username exists | +| **Steps** | 1. Enter valid username with wrong password
2. Click "Login" | +| **Expected Result** | Login fails with 401 error, red error message displayed, user stays on login page | + +### AUTH-003 — Login with non-existent username +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Enter username that does not exist
2. Enter any password
3. Click "Login" | +| **Expected Result** | 401 returned, error message shown, no user enumerated | + +### AUTH-004 — Login with empty credentials +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Leave username and password empty
2. Click "Login" | +| **Expected Result** | HTML5 form validation prevents submission, or backend returns validation error | + +### AUTH-005 — Login with special characters in username +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Enter username with SQL injection patterns: `admin' OR '1'='1`
2. Enter password
3. Click "Login" | +| **Expected Result** | Login fails, no SQL injection succeeds, no data leak | + +### AUTH-006 — Successful login response contains expected fields +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Valid credentials | +| **Steps** | 1. Call `POST /api/v1/auth/login`
2. Inspect response body | +| **Expected Result** | Response contains `accessToken`, `refreshToken`, `username`, `role`, `permissions`, `userId`, `workspaceId` | + +### AUTH-007 — JWT token sent in Authorization header +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Valid token obtained | +| **Steps** | 1. Capture XHR request to any protected API
2. Inspect Authorization header | +| **Expected Result** | Header contains `Bearer ` | + +### AUTH-008 — Access protected route without token +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Clear all auth tokens | +| **Steps** | 1. Navigate directly to `/home`
2. Navigate to `/artifact/instances`
3. API call to `/api/v1/clusters` without token | +| **Expected Result** | Frontend redirects to `/`, backend returns 401 | + +### AUTH-009 — Access protected API without token +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | None | +| **Steps** | 1. Call `GET /api/v1/clusters` without Authorization header | +| **Expected Result** | 401 Unauthorized returned | + +### AUTH-010 — Token expiry handling +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Use a token near expiry or manipulate expiry | +| **Steps** | 1. Make API call with expired token | +| **Expected Result** | Backend returns 401, frontend should redirect to login page or attempt token refresh | + +### AUTH-011 — Token refresh flow +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Valid refresh token exists | +| **Steps** | 1. Call `POST /api/v1/auth/refresh` with valid refresh token
2. Call with expired/invalid refresh token | +| **Expected Result** | Valid refresh returns new access token; invalid returns 401 | + +### AUTH-012 — Logout behavior +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | User is logged in | +| **Steps** | 1. Click logout/sign out button
2. Try to navigate to previously visited protected page | +| **Expected Result** | Token cleared from storage, redirected to login page, protected routes inaccessible | + +### AUTH-013 — Logout clears token from localStorage +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User is logged in | +| **Steps** | 1. Inspect localStorage for auth tokens after login
2. Logout
3. Inspect localStorage again | +| **Expected Result** | Tokens removed after logout | + +### AUTH-014 — Role-based page access: admin +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin user logged in | +| **Steps** | 1. Navigate to `/configuration/users`
2. Navigate to `/configuration/clusters`
3. Navigate to `/artifact/instances` | +| **Expected Result** | All pages accessible | + +### AUTH-015 — Role-based page access: regular user +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Regular user logged in (non-admin) | +| **Steps** | 1. Navigate to `/configuration/users`
2. Navigate to `/admin` | +| **Expected Result** | Redirected to `/forbidden` or access denied page | + +### AUTH-016 — Regular user can access own resources pages +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Navigate to `/home`
2. Navigate to `/configuration/clusters`
3. Navigate to `/configuration/registries`
4. Navigate to `/artifact/registries`
5. Navigate to `/artifact/instances` | +| **Expected Result** | All pages accessible (user sees own resources) | + +### AUTH-017 — Login page redirect when already authenticated +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User is logged in | +| **Steps** | 1. Navigate to `/`
2. Observe behavior | +| **Expected Result** | Redirected to `/home` instead of showing login form | + +### AUTH-018 — Login page UI elements +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Not authenticated | +| **Steps** | 1. Observe login page
2. Check for OCDP Console branding, username input, password input, Login button | +| **Expected Result** | Page displays brand icon, "OCDP Console" title, username/password fields with correct autocomplete attributes, Login button | + +### AUTH-019 — Login button loading state +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Enter credentials and click Login
2. Observe button state during API call | +| **Expected Result** | Button shows spinner/loading state, text changes to "Logging in...", button disabled during request | + +### AUTH-020 — Login error display +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Enter wrong credentials and submit
2. Observe error message | +| **Expected Result** | Red error text appears below the login button, message is user-friendly (not a raw stack trace) | + +### AUTH-021 — Password change flow (mustChangePassword) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User created with `mustChangePassword: true` | +| **Steps** | 1. Login as that user
2. Observe redirect/behavior
3. Change password
4. Login again with new password | +| **Expected Result** | First login forces password change, old password rejected after change | + +### AUTH-022 — Refresh token expiry logout +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Both access and refresh tokens expired | +| **Steps** | 1. Wait for full token expiry
2. Make any API call that triggers refresh | +| **Expected Result** | User is logged out, redirected to login page | + +### AUTH-023 — Concurrent login sessions +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | User account exists | +| **Steps** | 1. Login in browser tab 1
2. Login in browser tab 2 with same credentials
3. Perform operations in both tabs | +| **Expected Result** | Both sessions work independently, no cross-tab interference | + +### AUTH-024 — Admin login shows "Admin only" badge on User Management +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Navigate to `/configuration/users`
2. Check for admin badge | +| **Expected Result** | "Admin only" badge visible in the User Management page header | + +### AUTH-025 — Token manipulation (tampered JWT) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Get valid token, modify its payload | +| **Steps** | 1. Decode JWT, change `role` to "admin" for a regular user token
2. Re-encode with modified payload and send API request | +| **Expected Result** | Backend rejects tampered token (signature verification fails), returns 401 | + +--- + +## Category 2: Cluster CRUD (15+ cases) + +### CLU-001 — Create cluster with all required fields +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Logged in as admin/user with cluster permissions | +| **Steps** | 1. Navigate to `/configuration/clusters`
2. Click "Add Cluster"
3. Fill in name, API Server URL, CA cert, client cert, client key
4. Click "Save" | +| **Expected Result** | Cluster created successfully, success toast shown, cluster appears in the list | + +### CLU-002 — Create cluster with token auth +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Click "Add Cluster"
2. Fill name, API Server URL, Bearer Token (leave cert fields empty)
3. Click "Save" | +| **Expected Result** | Cluster created using token authentication | + +### CLU-003 — Create cluster with empty name +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Leave name empty
2. Fill all other required fields
3. Click "Save" | +| **Expected Result** | Validation error "Cluster name is required" displayed near the name field | + +### CLU-004 — Create cluster with invalid API Server URL +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Enter name
2. Enter invalid URL (e.g., `not-a-url`, `ftp://...`)
3. Click "Save" | +| **Expected Result** | Validation error "Invalid URL format" displayed | + +### CLU-005 — Create cluster without auth credentials +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Enter name and URL
2. Leave all cert/key/token fields empty
3. Click "Save" | +| **Expected Result** | Validation errors on CA/Client Cert/Client Key fields | + +### CLU-006 — Edit cluster name and URL +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Existing cluster present | +| **Steps** | 1. Click edit on existing cluster
2. Change name and host
3. Click "Save" | +| **Expected Result** | Cluster updated, changes reflected in list | + +### CLU-007 — Edit cluster with new certificate (overwrite) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Existing cluster with cert auth | +| **Steps** | 1. Edit cluster
2. Enter new CA cert, client cert, client key in the "new" fields
3. Click "Save" | +| **Expected Result** | Certificate updated, "hasCaData" still appears as configured | + +### CLU-008 — Edit cluster leaving cert fields empty (no change) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Existing cluster with cert auth | +| **Steps** | 1. Edit cluster
2. Leave the "new" cert fields empty
3. Click "Save" | +| **Expected Result** | Cluster updated, existing certs retained | + +### CLU-009 — Delete cluster +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Existing cluster with no running instances (or expected behavior defined) | +| **Steps** | 1. Click delete icon on a cluster
2. Confirm deletion in browser confirm dialog | +| **Expected Result** | Cluster removed from list, success toast shown | + +### CLU-010 — Delete cluster cancellation +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Existing cluster | +| **Steps** | 1. Click delete on a cluster
2. Click "Cancel" in the confirmation dialog | +| **Expected Result** | Cluster not deleted, still visible in the list | + +### CLU-011 — Health check on reachable cluster +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | A reachable Kubernetes cluster configured | +| **Steps** | 1. Click health check / test button on the cluster row | +| **Expected Result** | Success toast with connection healthy message | + +### CLU-012 — Health check on unreachable cluster +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster with invalid host/cert configured | +| **Steps** | 1. Click health check / test button on the cluster | +| **Expected Result** | Error toast with connection failure message | + +### CLU-013 — Empty clusters state +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No clusters configured | +| **Steps** | 1. Navigate to `/configuration/clusters` | +| **Expected Result** | Empty state message displayed, add cluster action available | + +### CLU-014 — Cluster list with multiple clusters +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | 3+ clusters configured | +| **Steps** | 1. Navigate to `/configuration/clusters`
2. Scroll list | +| **Expected Result** | All clusters listed with name, URL, status indicators | + +### CLU-015 — Cluster description display in list +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Cluster with description exists | +| **Steps** | 1. View cluster list
2. Check if description is visible | +| **Expected Result** | Description shown as subtitle or tooltip in the cluster row | + +### CLU-016 — Cluster CRUD as regular user +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Create a new cluster
2. Edit the cluster
3. Delete the cluster | +| **Expected Result** | User can manage their own clusters, or see appropriate empty/permission state | + +### CLU-017 — Cluster form modal close/reset +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Create modal open with partially filled form | +| **Steps** | 1. Fill partial data
2. Click Cancel | +| **Expected Result** | Modal closes, form data cleared when reopened | + +--- + +## Category 3: Registry CRUD (15+ cases) + +### REG-001 — Create registry with all required fields +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Logged in | +| **Steps** | 1. Navigate to `/configuration/registries`
2. Click "Add Registry"
3. Fill name, URL, username, password
4. Click "Save" | +| **Expected Result** | Registry created, success toast shown, appears in list | + +### REG-002 — Create registry with insecure flag +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Open add registry modal
2. Fill required fields
3. Check "Allow insecure connection"
4. Click "Save" | +| **Expected Result** | Registry created with `insecure: true`, works for HTTP/self-signed registries | + +### REG-003 — Create registry without name +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Leave name empty
2. Fill other fields
3. Click "Save" | +| **Expected Result** | HTML5 form validation prevents submission (required attribute) | + +### REG-004 — Create registry with invalid URL +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Enter non-URL string for URL field (type=url)
2. Fill other fields
3. Click "Save" | +| **Expected Result** | HTML5 form validation prevents submission (type=url validation) | + +### REG-005 — Test registry connection +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Saved registry exists, it's reachable | +| **Steps** | 1. Edit an existing registry
2. Click "Test Connection" button | +| **Expected Result** | Connection test runs, success/error toast based on connectivity | + +### REG-006 — Test registry connection without saving first +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Creating new registry (unsaved) | +| **Steps** | 1. Fill registry form but do not save
2. Check if "Test Connection" is available | +| **Expected Result** | "Test Connection" button is not shown (only visible for saved registries) | + +### REG-007 — Edit registry name and URL +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Existing registry | +| **Steps** | 1. Edit a registry
2. Change its name and URL
3. Save | +| **Expected Result** | Registry updated, changes reflected | + +### REG-008 — Edit registry with new password +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Existing registry with password set | +| **Steps** | 1. Edit registry
2. Enter new password in the "New Password" field
3. Save | +| **Expected Result** | Password updated, "hasPassword" indicator shows as configured | + +### REG-009 — Edit registry leaving password empty +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Existing registry | +| **Steps** | 1. Edit registry
2. Leave new password field empty
3. Save | +| **Expected Result** | Registry updated, existing password retained | + +### REG-010 — Delete registry +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Existing registry with no active dependencies | +| **Steps** | 1. Click delete on a registry
2. Confirm deletion | +| **Expected Result** | Registry removed from list, success toast | + +### REG-011 — Delete registry with existing instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Registry has active instances deployed from it | +| **Steps** | 1. Try to delete registry that has active instances deriving from it | +| **Expected Result** | Backend should return error preventing deletion, or handle cascading gracefully | + +### REG-012 — Empty registries state +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No registries configured | +| **Steps** | 1. Navigate to `/configuration/registries` | +| **Expected Result** | Empty state message displayed | + +### REG-013 — Registry toggle insecure flag +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Existing registry | +| **Steps** | 1. Edit registry
2. Toggle insecure checkbox
3. Save | +| **Expected Result** | Insecure flag updated | + +### REG-014 — Registry list display +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Multiple registries exist | +| **Steps** | 1. View the registries page
2. Check each row | +| **Expected Result** | Each registry shows name, URL, username, insecure badge (if enabled) | + +### REG-015 — Registry CRUD as regular user +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Create a new registry
2. Edit the registry
3. Delete the registry | +| **Expected Result** | User can manage their own registries | + +--- + +## Category 4: Chart Browser / Launch Instance (20+ cases) + +### CHT-001 — Browse registries in chart browser +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Registries configured with Helm charts | +| **Steps** | 1. Navigate to `/artifact/registries`
2. Observe left panel | +| **Expected Result** | Registries listed with expand/collapse toggle, count badge | + +### CHT-002 — Expand registry tree and list repositories +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Registry has chart repositories | +| **Steps** | 1. Click on a registry to expand it
2. Observe repositories listed underneath | +| **Expected Result** | Repositories displayed as clickable items, each showing name | + +### CHT-003 — Empty repository list message +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Registry exists but has no chart repositories | +| **Steps** | 1. Expand registry
2. Observe sub-items | +| **Expected Result** | "No chart repositories found." message shown | + +### CHT-004 — Select repository and view artifacts +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Repository with chart artifacts exists | +| **Steps** | 1. Click on a repository in the left panel
2. Observe right panel | +| **Expected Result** | Repository name displayed in header, artifact tags shown as cards | + +### CHT-005 — Filter artifacts by Charts / All tags +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Repository has both chart and non-chart artifacts | +| **Steps** | 1. Select a repository
2. Click "Charts" filter button
3. Click "All tags" filter button | +| **Expected Result** | "Charts" filter shows only chart artifacts, "All tags" shows all | + +### CHT-006 — Filter toggle active state +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Repository selected | +| **Steps** | 1. Toggle between Charts and All tags | +| **Expected Result** | Active filter button has blue highlight, inactive has default styling | + +### CHT-007 — Tag card displays correct info +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Artifact loaded | +| **Steps** | 1. Observe a tag card | +| **Expected Result** | Card shows tag name, artifact type badge (chart/image), repository path, size | + +### CHT-008 — Launch button visible only for chart tags +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Chart and non-chart artifacts exist | +| **Steps** | 1. Observe a chart tag card
2. Observe a non-chart tag card | +| **Expected Result** | Chart tag card has blue "Launch" button; non-chart card does not | + +### CHT-009 — Copy pull command from tag card +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Tag card displayed | +| **Steps** | 1. Click "Copy" on a tag card | +| **Expected Result** | Helm pull command copied to clipboard, success toast shown | + +### CHT-010 — Search registries/repositories +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Multiple registries with repositories exist | +| **Steps** | 1. Type in the search box in the left panel
2. Observe filtering | +| **Expected Result** | List filters to matching registries and repositories; non-matching entries hidden | + +### CHT-011 — Open Launch modal +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Chart tag selected | +| **Steps** | 1. Click "Launch" on a chart tag | +| **Expected Result** | Launch modal opens with repository:tag header, cluster selector, instance name, namespace, values options | + +### CHT-012 — Launch modal loads clusters +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Clusters exist in the system | +| **Steps** | 1. Open Launch modal
2. Observe cluster dropdown | +| **Expected Result** | Cluster dropdown populated with available clusters | + +### CHT-013 — Launch modal: no clusters available +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No clusters configured | +| **Steps** | 1. Open Launch modal
2. Observe cluster section | +| **Expected Result** | Warning message "No clusters available. Please add a cluster first." displayed, Launch button disabled | + +### CHT-014 — Launch modal: instance name validation +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Launch modal open with cluster selected | +| **Steps** | 1. Leave instance name empty
2. Click Launch | +| **Expected Result** | Toast error "Instance name is required" | + +### CHT-015 — Launch modal: namespace validation +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster with namespace policy configured | +| **Steps** | 1. Select a disallowed namespace (not in allowedNamespaces)
2. Click Launch | +| **Expected Result** | Toast error "Selected namespace is not allowed for this cluster." | + +### CHT-016 — Launch modal: Quick / Form / YAML input modes +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Launch modal open | +| **Steps** | 1. Click each mode button (Quick, Guided, YAML)
2. Observe content changes | +| **Expected Result** | Quick: info panel about chart defaults. Guided: schema form (if schema exists). YAML: textarea for YAML input. Active mode highlighted. | + +### CHT-017 — Launch modal: YAML validation +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | YAML input mode selected | +| **Steps** | 1. Enter invalid YAML (e.g., `key: [invalid`)
2. Observe error state | +| **Expected Result** | Red error text below textarea, Launch button disabled | + +### CHT-018 — Launch modal: Load Defaults from values.yaml +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Chart has values.yaml with defaults | +| **Steps** | 1. Switch to YAML mode
2. Click "Load Defaults from values.yaml" | +| **Expected Result** | values.yaml content loaded into the textarea | + +### CHT-019 — Submit launch and navigate to instances +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | All required fields filled with valid data | +| **Steps** | 1. Fill cluster, instance name, namespace
2. Click Launch
3. Wait for redirect | +| **Expected Result** | Instance creation API called, success toast, redirected to `/artifact/instances`, instance shown with "Pending Install" status | + +### CHT-020 — Launch modal: namespace controlled by workspace policy +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster has namespace readOnly policy | +| **Steps** | 1. Open Launch modal
2. Select cluster with readonly namespace policy
3. Check namespace field | +| **Expected Result** | Namespace field is disabled with blue info message: "Namespace is controlled by your workspace policy." | + +### CHT-021 — Launch modal: namespace dropdown (allowed namespaces) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster has allowedNamespaces configured | +| **Steps** | 1. Select such cluster
2. Observe namespace field | +| **Expected Result** | Namespace becomes a dropdown with only allowed values | + +### CHT-022 — Launch modal: user's default cluster pre-selected +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | User has defaultClusterId set | +| **Steps** | 1. Open Launch modal | +| **Expected Result** | Default cluster auto-selected in the dropdown | + +--- + +## Category 5: Instance Management (20+ cases) + +### INS-001 — View instances (all clusters) +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Instances exist across clusters | +| **Steps** | 1. Navigate to `/artifact/instances` | +| **Expected Result** | All instances listed grouped by cluster, stats cards show totals | + +### INS-002 — Filter instances by cluster +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Multiple clusters with instances | +| **Steps** | 1. Navigate to instances page
2. Select a specific cluster from dropdown | +| **Expected Result** | Only instances from that cluster displayed | + +### INS-003 — Instance status: Deployed +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Instance in deployed state | +| **Steps** | 1. Look for a deployed instance card | +| **Expected Result** | Green "DEPLOYED" badge with checkmark icon, status reason shown | + +### INS-004 — Instance status: Failed +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance in failed state | +| **Steps** | 1. Look for a failed instance card | +| **Expected Result** | Red "FAILED" badge, error details visible (lastError section appears) | + +### INS-005 — Instance status: Pending (Install/Upgrade/Rollback/Delete) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance in transition state | +| **Steps** | 1. Look for pending instance card | +| **Expected Result** | Amber/yellow "PENDING INSTALL/UPGRADE/ROLLBACK/DELETE" badge | + +### INS-006 — Instance status: Unknown +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Instance with unknown status | +| **Steps** | 1. Look for unknown instance card | +| **Expected Result** | Gray "UNKNOWN" badge | + +### INS-007 — Refresh instance status +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance exists | +| **Steps** | 1. Click "Refresh" button on the instance card | +| **Expected Result** | Instance status re-fetched, card updates with latest status | + +### INS-008 — Instance card displays metadata correctly +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance exists | +| **Steps** | 1. Examine instance card content | +| **Expected Result** | Card shows: instance name, repository, version tag, namespace, revision, launch date, status reason | + +### INS-009 — Instance action buttons visibility +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance exists | +| **Steps** | 1. Check the action bar at bottom of instance card | +| **Expected Result** | Five buttons visible: Refresh, Entries, Diagnostics, Modify, Delete | + +### INS-010 — View entries (Services) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance with services | +| **Steps** | 1. Click "Entries" on the instance card
2. Observe modal | +| **Expected Result** | Modal shows Services with name, type, cluster IP, ports; source badge visible | + +### INS-011 — View entries (Ingresses) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance with ingresses | +| **Steps** | 1. Open entries modal
2. Check for Ingresses section | +| **Expected Result** | Ingresses listed with host, paths, TLS status | + +### INS-012 — View diagnostics (Describe) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance | +| **Steps** | 1. Click "Diagnostics" on instance card
2. Observe Describe tab | +| **Expected Result** | Modal shows Pods (with status, node, restarts, containers) and Services summary | + +### INS-013 — View diagnostics (Events) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance | +| **Steps** | 1. Open diagnostics
2. Click "Events" tab | +| **Expected Result** | Kubernetes events listed with type badge, reason, message, timestamp, count | + +### INS-014 — View diagnostics (Pod Logs) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance with running pods | +| **Steps** | 1. Open diagnostics
2. Click "Pod Logs" tab | +| **Expected Result** | Pod logs displayed in dark terminal-style blocks, copy button available | + +### INS-015 — Copy pod logs +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Diagnostics with logs loaded | +| **Steps** | 1. Open pod logs
2. Click "Copy Logs" | +| **Expected Result** | Combined logs copied to clipboard, success toast shown | + +### INS-016 — Modify instance version tag +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance, new chart version available | +| **Steps** | 1. Click "Modify" on instance
2. Change version tag
3. Confirm | +| **Expected Result** | Instance upgrade initiated, instance moves to "Pending Upgrade" status | + +### INS-017 — Modify instance with values changes +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance | +| **Steps** | 1. Open modify modal
2. Switch to YAML input
3. Update values
4. Confirm | +| **Expected Result** | Instance upgraded with modified values | + +### INS-018 — Terminate/delete instance with confirmation +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Deployed instance exists | +| **Steps** | 1. Click "Delete" on instance card
2. Confirm in browser dialog | +| **Expected Result** | Deletion initiated, instance enters "Pending Delete" status, eventually disappears | + +### INS-019 — Terminate instance cancellation +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Deployed instance | +| **Steps** | 1. Click "Delete"
2. Click "Cancel" in the confirmation dialog | +| **Expected Result** | Instance not deleted, dialog dismissed | + +### INS-020 — Empty instances state +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No instances deployed | +| **Steps** | 1. Navigate to `/artifact/instances` | +| **Expected Result** | Empty state displayed: "No instances found. Launch your first service instance from Artifact Registries" | + +### INS-021 — Instances auto-refresh +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Instances page open | +| **Steps** | 1. Stay on instances page
2. Observe network requests for 30+ seconds | +| **Expected Result** | Background auto-refresh fires every 30 seconds without user interaction | + +--- + +## Category 6: Cluster Monitoring (10+ cases) + +### MON-001 — View cluster health monitoring +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Clusters configured | +| **Steps** | 1. Navigate to `/monitoring/clusters` | +| **Expected Result** | Cluster monitoring cards displayed with health status badges, metrics grid | + +### MON-002 — Stats cards display summary +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | 3+ clusters with varying health | +| **Steps** | 1. Navigate to monitoring page | +| **Expected Result** | Stats cards show: Total Clusters, Healthy count, Warning count, Error count | + +### MON-003 — Monitoring card shows cluster metrics +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Healthy cluster | +| **Steps** | 1. Observe a cluster monitoring card | +| **Expected Result** | Card shows: cluster name, uptime, node count, pod count, GPU usage, CPU usage bar, memory usage bar, last checked time | + +### MON-004 — Expand node details +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster has nodes | +| **Steps** | 1. Click "Show Nodes" button on a cluster card
2. Observe node list | +| **Expected Result** | Node list expands showing individual node metrics (CPU, memory, GPU per node) | + +### MON-005 — Healthy cluster status display +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster is healthy | +| **Steps** | 1. Check card header | +| **Expected Result** | Green "Healthy" badge, green checkmark icon | + +### MON-006 — Error cluster status display +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster is unhealthy/error | +| **Steps** | 1. Check card header | +| **Expected Result** | Red "Error" badge, red X icon | + +### MON-007 — Auto-refresh monitoring +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Monitoring page open | +| **Steps** | 1. Stay on page
2. Observe metrics updates over time | +| **Expected Result** | Page auto-refreshes every 30 seconds, "Auto-refresh every 30 seconds" text visible | + +### MON-008 — Manual refresh +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Monitoring page open | +| **Steps** | 1. Click "Refresh" button | +| **Expected Result** | Data reloaded, loading state shown during refresh | + +### MON-009 — Empty monitoring state +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No clusters configured | +| **Steps** | 1. Navigate to monitoring page | +| **Expected Result** | "No Clusters Available" empty state displayed | + +### MON-010 — Error state when cluster unreachable +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster monitoring API returns error | +| **Steps** | 1. Simulate API failure
2. Observe page | +| **Expected Result** | Error state with retry button shown, error message displayed | + +--- + +## Category 7: User Management (Admin) (15+ cases) + +### USR-001 — Create user with role "user" +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Navigate to `/configuration/users`
2. Fill username, password, role=User
3. Set namespace, default cluster, resource limits
4. Click "Create User" | +| **Expected Result** | User created, appears in accounts table | + +### USR-002 — Create user with role "admin" +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Open create user form
2. Select Role=Admin
3. Fill username and password only (namespace/limits hidden for admin)
4. Click "Create User" | +| **Expected Result** | Admin user created, namespace/limits not required, role badge shows "admin" | + +### USR-003 — Create user with mustChangePassword +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create user modal | +| **Steps** | 1. Ensure "Require password change after first login" checkbox is checked
2. Create user | +| **Expected Result** | User created and must change password on first login | + +### USR-004 — Create user without required fields +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create form open | +| **Steps** | 1. Leave username or password empty
2. Click "Create User" | +| **Expected Result** | Validation error toast "Username and initial password are required." | + +### USR-005 — Edit user resource limits +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Non-admin user exists | +| **Steps** | 1. Click "Limits" on a user row
2. Change CPU, Memory, GPU, GPU Mem values
3. Click "Save Limits" | +| **Expected Result** | Limits modal closes, success toast, updated values shown in table | + +### USR-006 — Toggle user role (user ↔ admin) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User exists | +| **Steps** | 1. Click "Make Admin" on a user row
2. Observe role change
3. Click "Make User" to revert | +| **Expected Result** | Role toggled, badge updates, admin users can access all pages after re-login | + +### USR-007 — Enable/disable user +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Active user exists | +| **Steps** | 1. Click "Disable" on an active user
2. Observe badge change
3. Try to login as that user | +| **Expected Result** | Badge changes to "Disabled", disabled user cannot login (returns 401) | + +### USR-008 — Delete user +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Non-self user exists | +| **Steps** | 1. Click "Delete" on a user
2. Confirm deletion | +| **Expected Result** | User removed from table | + +### USR-009 — Cannot delete own admin account +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Current admin logged in | +| **Steps** | 1. Look at own user row | +| **Expected Result** | Delete button is disabled (or not rendered) for the current user | + +### USR-010 — Cannot disable own admin account +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Current admin logged in | +| **Steps** | 1. Look at own user row
2. Check Disable button state | +| **Expected Result** | Disable button is disabled for current user | + +### USR-011 — User Management page admin-only badge +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Observe page header | +| **Expected Result** | "Admin only" badge visible near the title | + +### USR-012 — User table displays all columns +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Users exist | +| **Steps** | 1. Observe the accounts table | +| **Expected Result** | Columns: User (username+email), Role (badge), Status (Active/Disabled), Namespace, Quota (CPU/Mem/GPU), Actions | + +### USR-013 — Namespace auto-generation for user +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Creating user with role=user | +| **Steps** | 1. Enter username
2. Check namespace field (before user edits it) | +| **Expected Result** | Namespace auto-populated as `ocdp-u-` | + +### USR-014 — Create user with resource limits +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create form open, role=User | +| **Steps** | 1. Set specific CPU, Memory, GPU, GPU memory limits
2. Create user
3. View user in table | +| **Expected Result** | Limits stored and displayed in the quota column | + +### USR-015 — User list refresh +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Users page open | +| **Steps** | 1. Click "Refresh" button | +| **Expected Result** | User list reloaded, loading state shown | + +--- + +## Category 8: Multi-tenancy & Permissions (15+ cases) + +### MTN-001 — User A cannot see User B's clusters +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Two regular users (A, B), each with their own cluster | +| **Steps** | 1. Login as User A
2. List clusters via API
3. Login as User B
4. List clusters via API | +| **Expected Result** | User A sees only their clusters, User B sees only their clusters. No cross-tenant leakage | + +### MTN-002 — User A cannot see User B's registries +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Two regular users with separate registries | +| **Steps** | 1. List registries as User A
2. List registries as User B | +| **Expected Result** | Each user sees only their own registries | + +### MTN-003 — User A cannot delete User B's instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User A and B each have instances | +| **Steps** | 1. As User A, try to call DELETE on User B's instance | +| **Expected Result** | Backend returns 403 Forbidden or 404 Not Found | + +### MTN-004 — User A cannot modify User B's instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User B has an instance | +| **Steps** | 1. As User A, try to update User B's instance | +| **Expected Result** | Backend returns 403 Forbidden | + +### MTN-005 — Admin can see all clusters +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin user, clusters belonging to multiple users exist | +| **Steps** | 1. Login as admin
2. List clusters | +| **Expected Result** | Admin sees all clusters across all users | + +### MTN-006 — Admin can see all registries +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin user, registries belonging to multiple users exist | +| **Steps** | 1. Login as admin
2. List registries | +| **Expected Result** | Admin sees all registries across all users | + +### MTN-007 — Admin can see all instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instances exist across different users | +| **Steps** | 1. Login as admin
2. List instances per cluster | +| **Expected Result** | Admin sees instances from all users' releases | + +### MTN-008 — ResourceQuota enforcement (CPU) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User with CPU quota set, deploying | +| **Steps** | 1. As user with CPU quota=4, try to deploy chart requesting >4 CPU
2. Check deployment outcome | +| **Expected Result** | Deployment should fail or ResourceQuota enforced in the namespace | + +### MTN-009 — ResourceQuota enforcement (GPU) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User with GPU quota=0 | +| **Steps** | 1. Try to deploy a chart requiring GPU | +| **Expected Result** | Deployment fails due to quota enforcement | + +### MTN-010 — Namespace isolation across users +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Users A and B configured with different namespaces | +| **Steps** | 1. User A deploys instance to their namespace
2. User B deploys instance to their namespace
3. Verify User A cannot see User B's pods/instances | +| **Expected Result** | Instances isolated by namespace, no cross-tenant visibility | + +### MTN-011 — Regular user cannot access admin pages +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Navigate to `/configuration/users`
2. Navigate to `/admin` | +| **Expected Result** | Redirected to `/forbidden`, access denied page shown | + +### MTN-012 — Regular user does not see "Users" in home setup card +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Navigate to `/home`
2. Check "Setup" section | +| **Expected Result** | "Users" card is not rendered for non-admin users | + +### MTN-013 — Default user permissions match expected set +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Regular user created without custom permissions | +| **Steps** | 1. Get user info from `/api/v1/auth/me` or similar
2. Inspect permissions array | +| **Expected Result** | Default permissions include: home:view, configuration:clusters:manage_own, configuration:registries:manage_own, artifact:registries:view, artifact:instances:manage_own | + +### MTN-014 — User workspace metadata stored and returned +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | User exists | +| **Steps** | 1. Login and inspect user response
2. Check workspaceId, workspaceName, namespace, defaultClusterId | +| **Expected Result** | Workspace metadata present and consistent | + +### MTN-015 — Admin can create resources under any user scope +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Check if admin can create clusters/registries without ownership restriction | +| **Expected Result** | Admin-created resources are accessible to admin (global scope) | + +--- + +## Category 9: UI/UX Bugs (20+ cases) + +### UI-001 — Page layout does not overflow horizontally +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Navigate to each page at 1440px viewport width
2. Check for horizontal scrollbar | +| **Expected Result** | No horizontal overflow, all content fits within viewport | + +### UI-002 — Responsive layout at mobile breakpoint (768px) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Resize browser to 768px width
2. Navigate through all pages | +| **Expected Result** | Navigation collapses, content stacks vertically, no broken layout | + +### UI-003 — Responsive layout at tablet breakpoint (1024px) +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Resize to 1024px width
2. Check all pages | +| **Expected Result** | Content reflows gracefully, no overlap | + +### UI-004 — Loading state displays correctly +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Slow network (simulated) | +| **Steps** | 1. Enable network throttling
2. Navigate to each page | +| **Expected Result** | Loading spinner/message appears while data is being fetched, content appears after load | + +### UI-005 — No flickering during page transitions +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Navigate between pages rapidly
2. Observe visual transitions | +| **Expected Result** | Smooth transitions, no white flash or layout shift | + +### UI-006 — Empty states show informative messages +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Fresh/empty system | +| **Steps** | 1. Check clusters page (empty)
2. Check registries page (empty)
3. Check instances page (empty) | +| **Expected Result** | Each page has a distinct, informative empty state message with relevant icon | + +### UI-007 — Error states show retry action +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | API returns error | +| **Steps** | 1. Simulate backend error
2. Observe error state | +| **Expected Result** | Error message displayed with a "Retry" button | + +### UI-008 — Form validation feedback is visible +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Submit forms with invalid data | +| **Expected Result** | Red error text appears near the invalid field, or toast notification with specific message | + +### UI-009 — Toast notifications appear and disappear +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Perform actions that trigger toasts (save, delete, error)
2. Observe toast behavior | +| **Expected Result** | Toast appears at expected position, auto-dismisses after timeout, can be dismissed manually | + +### UI-010 — Button states: disabled +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Find disabled buttons (Launch when no clusters, Submit with invalid YAML) | +| **Expected Result** | Disabled buttons have reduced opacity, no pointer cursor, cannot be clicked | + +### UI-011 — Button states: loading +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Action in progress | +| **Steps** | 1. Click a button that triggers an API call
2. Observe button during request | +| **Expected Result** | Button shows spinner/loading indicator, disabled during request | + +### UI-012 — Truncation of long text labels +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Long names exist | +| **Steps** | 1. Create resources with very long names
2. Observe display in cards and lists | +| **Expected Result** | Long text is truncated with ellipsis, no layout breakage | + +### UI-013 — Sidebar navigation highlight matches current page +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Navigate to each page
2. Check sidebar nav item highlight | +| **Expected Result** | Current page's nav item is highlighted/active | + +### UI-014 — Page header shows correct title and icon +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Navigate to each page | +| **Expected Result** | Page header displays correct title, icon, and description | + +### UI-015 — Color contrast meets readability +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Inspect text colors against backgrounds using DevTools | +| **Expected Result** | All text meets WCAG AA contrast ratio (4.5:1 for normal text, 3:1 for large text) | + +### UI-016 — Access denied page renders correctly +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | User with insufficient permissions | +| **Steps** | 1. Access a restricted page | +| **Expected Result** | Access denied page shown with "Back Home" button | + +### UI-017 — Cluster list shows health status indicator +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Clusters exist | +| **Steps** | 1. Navigate to cluster config page
2. Check each cluster row | +| **Expected Result** | Each cluster shows a health status indicator (green/yellow/red dot or similar) | + +### UI-018 — Search/filter in chart browser works correctly +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Multiple registries/repositories exist | +| **Steps** | 1. Type partial name in search box
2. Type a query that matches no results | +| **Expected Result** | Matching entries remain visible, non-matching hidden. "No registries" state when nothing matches. | + +### UI-019 — Modal backdrop click behavior +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Any modal open | +| **Steps** | 1. Open a modal (e.g., Launch modal, Cluster form, Modify modal)
2. Click on the dark backdrop | +| **Expected Result** | Modal closes (or stays open depending on design). Should not cause errors. | + +### UI-020 — Home page displays all sections +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Logged in as admin | +| **Steps** | 1. Navigate to `/home` | +| **Expected Result** | Three sections visible: primary actions (Launch Instance, Instances, Cluster Monitoring), runtime focus sidebar, setup section | + +--- + +## Category 10: Data Persistence (10+ cases) + +### PER-001 — Data survives page refresh (clusters) +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Clusters exist | +| **Steps** | 1. Navigate to clusters page
2. Refresh the page (F5) | +| **Expected Result** | Clusters still displayed after refresh | + +### PER-002 — Data survives page refresh (registries) +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Registries exist | +| **Steps** | 1. Navigate to registries page
2. Refresh | +| **Expected Result** | Registries still displayed | + +### PER-003 — Data survives page refresh (instances) +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Instances exist | +| **Steps** | 1. Navigate to instances page
2. Refresh | +| **Expected Result** | Instances still displayed | + +### PER-004 — Data survives browser tab close/reopen +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Resources exist | +| **Steps** | 1. Close browser tab
2. Open new tab and navigate to app
3. Login
4. Check all pages | +| **Expected Result** | All data intact after session restoration | + +### PER-005 — Created cluster persists after logout/login +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster was created | +| **Steps** | 1. Logout
2. Login again
3. Check cluster list | +| **Expected Result** | Cluster still present | + +### PER-006 — Created registry persists after logout/login +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Registry was created | +| **Steps** | 1. Logout
2. Login
3. Check registry list | +| **Expected Result** | Registry still present | + +### PER-007 — Instance deployment persists across page navigation +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance was launched | +| **Steps** | 1. Navigate away from instances page
2. Navigate back to instances page | +| **Expected Result** | Instance still listed with its status | + +### PER-008 — Cache consistency: new cluster appears in Launch dropdown +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Clusters page and artifact browser cached | +| **Steps** | 1. Add a new cluster
2. Navigate to chart browser
3. Open Launch modal
4. Check cluster dropdown | +| **Expected Result** | New cluster visible in dropdown (cache refreshed properly) | + +### PER-009 — Cache consistency: new registry appears in chart browser +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Registry page open, then chart browser | +| **Steps** | 1. Add a new registry
2. Navigate to chart browser
3. Check left panel | +| **Expected Result** | New registry visible (after refresh or auto-reload) | + +### PER-010 — Delete data persists (no phantom data) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Item deleted earlier | +| **Steps** | 1. Delete a cluster/registry
2. Refresh page
3. Check list | +| **Expected Result** | Deleted item does not reappear | + +--- + +## Category 11: Security (15+ cases) + +### SEC-001 — XSS via form inputs (cluster name) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Create cluster with name ``
2. Observe if script executes on the list page | +| **Expected Result** | Script tag is escaped/rendered as text, no XSS execution | + +### SEC-002 — XSS via form inputs (registry description) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Create registry with description containing HTML/script tags
2. Observe rendering | +| **Expected Result** | HTML is escaped, no script execution | + +### SEC-003 — XSS via instance name +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Launch instance with name ``
2. Navigate to instances page | +| **Expected Result** | Name is rendered safely, no XSS | + +### SEC-004 — IDOR: access another user's instance detail +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | User A has an instance, User B knows its ID | +| **Steps** | 1. Login as User B
2. Try to access User A's instance detail by ID | +| **Expected Result** | Backend returns 403 Forbidden or 404 | + +### SEC-005 — IDOR: modify another user's instance +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | User B has instance ID of User A | +| **Steps** | 1. Login as User A
2. Attempt PUT on User B's instance | +| **Expected Result** | 403 Forbidden | + +### SEC-006 — IDOR: delete another user's cluster +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Two regular users exist | +| **Steps** | 1. User A creates a cluster
2. User B attempts to delete it using cluster ID | +| **Expected Result** | 403 Forbidden | + +### SEC-007 — Sensitive data in API responses +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Call `GET /api/v1/clusters`
2. Inspect response for raw certs/keys/tokens | +| **Expected Result** | Sensitive fields are masked or encrypted (e.g., `hasCaData: true` instead of raw cert) | + +### SEC-008 — Sensitive data in registry responses +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Call `GET /api/v1/registries`
2. Check response for password exposure | +| **Expected Result** | Password not returned in plain text; `hasPassword` boolean used instead | + +### SEC-009 — JWT token manipulation: signature removed +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Valid JWT obtained | +| **Steps** | 1. Strip JWT signature, keep base64 payload
2. Send API request with tampered token | +| **Expected Result** | Backend rejects token, returns 401 | + +### SEC-010 — JWT token manipulation: alg changed to "none" +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Valid JWT obtained | +| **Steps** | 1. Change JWT header `alg` to `none`
2. Send modified token | +| **Expected Result** | Backend rejects, returns 401 | + +### SEC-011 — Directory traversal in repository name +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Try to access artifacts with `../../etc/passwd` as repository name | +| **Expected Result** | Returns 400 Bad Request or 404, no directory traversal occurs | + +### SEC-012 — Rate limiting on login endpoint +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Send rapid login requests (20+ in 1 second) with wrong passwords | +| **Expected Result** | After threshold, rate limiting kicks in (429 Too Many Requests) | + +### SEC-013 — Brute force protection +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Attempt login with wrong password 10+ times in succession | +| **Expected Result** | Account should be temporarily locked or delayed responses introduced | + +### SEC-014 — Session fixation test +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Capture pre-auth token
2. Login
3. Check if pre-auth token is still valid | +| **Expected Result** | Pre-auth token invalidated, new token issued on login | + +### SEC-015 — No sensitive data in error messages +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Trigger various API errors (invalid auth, bad request, server error)
2. Inspect error responses | +| **Expected Result** | Error messages do not reveal stack traces, SQL queries, or system internals | + +--- + +## Category 12: Edge Cases (10+ cases) + +### EDG-001 — Rapid double-click on submit buttons +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Click "Save" or "Create" button rapidly multiple times | +| **Expected Result** | Button is disabled after first click (loading state), duplicate submissions prevented | + +### EDG-002 — Very long instance name +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Launch modal open | +| **Steps** | 1. Enter instance name of 253+ characters
2. Submit | +| **Expected Result** | Backend validates Kubernetes naming constraints, returns error if too long | + +### EDG-003 — Special characters in namespace +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Launch modal open | +| **Steps** | 1. Enter namespace with uppercase letters or special characters
2. Submit | +| **Expected Result** | Backend validates DNS-1123 label constraints, returns error | + +### EDG-004 — Browser back/forward navigation +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Authenticated | +| **Steps** | 1. Navigate to page A, then page B
2. Click browser back button
3. Click browser forward button | +| **Expected Result** | Navigation works correctly, no infinite redirects, no blank pages | + +### EDG-005 — Concurrent operations: launch instance in two tabs +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Same user, same cluster, same namespace | +| **Steps** | 1. Tab 1: Launch instance "test-a"
2. Tab 2 (simultaneously): Launch instance "test-b" | +| **Expected Result** | Both instances created, no data race or corruption | + +### EDG-006 — Delete cluster with running instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster has active Helm releases | +| **Steps** | 1. Attempt to delete a cluster that has running instances | +| **Expected Result** | Backend should reject deletion or return a warning about active instances | + +### EDG-007 — Instance name collision (same namespace) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance "test" already exists in namespace "default" on cluster X | +| **Steps** | 1. Try to create another instance named "test" in the same namespace and cluster | +| **Expected Result** | Backend returns conflict error, instance not created | + +### EDG-008 — Rapid create/delete/create same resource name +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Create cluster named "test-cluster"
2. Delete it
3. Immediately create another cluster named "test-cluster" | +| **Expected Result** | Second creation succeeds after deletion completes | + +### EDG-009 — Helm release name collision across namespaces +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Helm release exists in namespace-a | +| **Steps** | 1. Launch instance with same name in namespace-b on the same cluster | +| **Expected Result** | Helm releases are namespaced, so creation should succeed in different namespace | + +### EDG-010 — YAML values with non-object top-level structure +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Launch modal open, YAML mode | +| **Steps** | 1. Enter just a string `"hello"` or array `[1,2,3]` as YAML values
2. Click Launch | +| **Expected Result** | YAML validation error: "Values YAML must be an object" | + +--- + +## Priority Distribution Summary + +| Priority | Count | +|----------|-------| +| P0 (Critical) | 26 | +| P1 (High) | 87 | +| P2 (Medium) | 34 | +| **Total** | **147** | + +## Existing Test Coverage Reference + +The following test scripts already exist in `test/` and cover portions of these scenarios: + +| Test Script | Coverage | +|-------------|----------| +| `current-platform-smoke.sh` | Login, registry health, chart browsing, optional deploy/cleanup | +| `frontend-playwright-smoke.py` | Login UI, chart browser rendering, instance page, mobile layout | +| `frontend-interactions-audit.py` | Auth, navigation, config modals, health buttons, launch modes | +| `multitenant_rbac_api_contract.py` | Auth denial, RBAC differences, resource isolation, admin cleanup | +| `multitenant_rbac_ui_playwright.py` | Multi-tenant UI isolation tests | +| `vllm_k3s_deploy_smoke.py` | Real k3s deployment, GPU quota, ResourceQuota, diagnostics | +| `chart_values_yaml_api_contract.py` | Values YAML API contract validation | +| `user_namespace_quota_api_contract.py` | User namespace and quota API contract | +| `instance_card_action_layout_playwright.py` | Instance card action button layout | diff --git a/docs/test-users.json b/docs/test-users.json new file mode 100644 index 0000000..7aec55d --- /dev/null +++ b/docs/test-users.json @@ -0,0 +1,79 @@ +{ + "meta": { + "createdAt": "2026-05-11T09:58:00Z", + "apiBase": "http://10.6.80.114:18080/api/v1", + "adminUsername": "admin", + "adminPassword": "admin123" + }, + "existingResources": { + "clusters": { + "k8s": { + "id": "23880994-dfe4-48d0-abc0-b49692cc630a", + "host": "https://10.6.80.12:6443" + }, + "k3s": { + "id": "dbf824f1-9962-4d8e-881e-870c75fdb6f5", + "host": "https://10.6.80.23:6443" + } + }, + "registries": { + "harbor-bwgdi": { + "id": "83b823af-873b-457c-912c-9ccde3cb12e6", + "url": "https://harbor.bwgdi.com" + } + } + }, + "testUsers": [ + { + "id": "0c70fce6-fa69-4231-979a-5970ff9b854b", + "username": "test-user-a", + "password": "TestUserA123!", + "email": "test-user-a@local.ocdp", + "role": "user", + "purpose": "Frontend UI testing", + "namespace": "ocdp-u-test-a", + "defaultClusterId": "dbf824f1-9962-4d8e-881e-870c75fdb6f5", + "quotaCpu": "4", + "quotaMemory": "8Gi", + "quotaGpu": "1", + "quotaGpuMemory": "5000" + }, + { + "id": "819b12ec-718e-48be-92bc-0cd1f7205926", + "username": "test-user-b", + "password": "TestUserB123!", + "email": "test-user-b@local.ocdp", + "role": "user", + "purpose": "API/deploy testing", + "namespace": "ocdp-u-test-b", + "defaultClusterId": "dbf824f1-9962-4d8e-881e-870c75fdb6f5", + "quotaCpu": "2", + "quotaMemory": "4Gi", + "quotaGpu": "0", + "quotaGpuMemory": "0" + }, + { + "id": "04ef67ba-49c2-44e2-87b4-b71b5d9f36dc", + "username": "test-user-c", + "password": "TestUserC123!", + "email": "test-user-c@local.ocdp", + "role": "user", + "purpose": "Permission isolation testing", + "namespace": "ocdp-u-test-c", + "defaultClusterId": "dbf824f1-9962-4d8e-881e-870c75fdb6f5", + "quotaCpu": "4", + "quotaMemory": "8Gi", + "quotaGpu": "1", + "quotaGpuMemory": "5000" + }, + { + "id": "8bcffd0e-4e7a-4e9a-a47b-bfdb463698c2", + "username": "test-admin-d", + "password": "TestAdminD123!", + "email": "test-admin-d@local.ocdp", + "role": "admin", + "purpose": "Admin features testing", + "namespace": "ocdp-ws-default" + } + ] +} diff --git a/docs/test2-quota.md b/docs/test2-quota.md new file mode 100644 index 0000000..2479c77 --- /dev/null +++ b/docs/test2-quota.md @@ -0,0 +1,156 @@ +# Resource Quota Enforcement Test Report + +**Date:** 2026-05-11 +**Tester:** test-user-b +**Namespace:** ocdp-u-test-b +**User Quota:** cpu=2, memory=4Gi, gpu=0, gpumem=0 + +--- + +## Test Summary + +| Test | Description | Expected | Actual | Result | +|------|-------------|----------|--------|--------| +| A | Deploy nginx (default, within quota) | Success | Deployed (status: `deployed`) | ✅ PASS | +| B | Deploy nginx (cpu=4, mem=8Gi, replicas=5, exceeds quota) | Blocked by quota | Helm release created, Service created, all pods blocked by ResourceQuota (status: `pending-install`) | ⚠️ PARTIAL | +| C | Deploy vllm-serve with gpu=1 (gpu quota = 0) | Blocked by quota | Helm release created, all pods blocked by ResourceQuota (status: `pending-install`) | ⚠️ PARTIAL | + +--- + +## Detailed Results + +### Test A: Deploy nginx within quota limits + +- **Instance:** `quota-test-nginx` (ed846c33-3631-4d54-adce-c7f00210176f) +- **Chart:** charts/nginx:22.1.1 +- **Values:** defaults +- **API Response:** HTTP 200, status: `pending-install` +- **Final Status after 21s:** `deployed` ("Instance deployed successfully") +- **K8s Resource Usage:** requests.cpu=100m/2, requests.memory=128Mi/4Gi + +### Test B: Deploy nginx exceeding quota + +- **Instance:** `quota-test-nginx-2` (36c0350f-089c-41c2-a66e-e93539c00d52) +- **Chart:** charts/nginx:22.1.1 +- **Values:** replicaCount=5, resources.limits.cpu=4/memory=8Gi, resources.requests.cpu=2/memory=4Gi +- **API Response:** HTTP 200, status: `pending-install` +- **Final Status (observed for 90s+):** `pending-install` (never transitioned to `deployed` or `failed`) +- **K8s Behavior:** + - Helm release created: `sh.helm.release.v1.quota-test-nginx-2.v1` + - TLS secret created + - Service created, IP assigned + - Deployment created, ReplicaSet scaled up + - **All pod creations FAILED** with: `Error creating: pods "..." is forbidden: exceeded quota: tenant-quota, requested: requests.cpu=2,requests.memory=4Gi, used: requests.cpu=100m,requests.memory=128Mi, limited: requests.cpu=2,requests.memory=4Gi` + +### Test C: Deploy GPU instance (gpu quota = 0) + +- **Instance:** `quota-test-gpu` (a0d692c8-cdf8-4248-a6d4-1468ad4a7cc7) +- **Chart:** charts/vllm-serve:0.6.0 +- **Values:** resources.gpuLimit=1, resources.gpuMem=5000 +- **API Response:** HTTP 200, status: `pending-install` +- **Final Status (observed for 30s+):** `pending-install` +- **K8s Behavior:** + - vllm-serve chart defaults: requests.cpu=8, requests.memory=16Gi, requests.nvidia.com/gpu=1, requests.nvidia.com/gpumem=5k + - All pods blocked: `exceeded quota: tenant-quota, requested: requests.cpu=8,requests.memory=16Gi,requests.nvidia.com/gpu=1,..., limited: requests.cpu=2,requests.memory=4Gi,requests.nvidia.com/gpu=0` + +--- + +## Key Findings + +### 1. No API-Level (Pre-flight) Quota Enforcement + +The backend API accepts **all** deployment requests regardless of whether they exceed the user's quota. There is no validation at the API layer that checks: + +- Whether the requested resources exceed the user's quota limits +- Whether the user's quota is already fully consumed by existing deployments + +**Evidence:** All three deployments returned HTTP 200 with `status: pending-install`. The backend logs contain zero quota-related entries. + +### 2. Kubernetes ResourceQuota Enforces at Pod Level + +The Kubernetes `ResourceQuota` object `tenant-quota` in namespace `ocdp-u-test-b` does enforce limits, but only at the **pod creation** level: + +```yaml +spec: + hard: + requests.cpu: "2" + requests.memory: 4Gi + requests.nvidia.com/gpu: "0" + requests.nvidia.com/gpumem: "0" +``` + +When pods exceed quota, Kubernetes explicitly refuses to create them with a clear error message. +However, Helm releases, Services, Deployments, and ReplicaSets are **still created** even when pods are blocked. + +### 3. Stuck at "pending-install" + +Instances that exceed quota remain stuck in `pending-install` status **indefinitely** — they never transition to `deployed`, `failed`, or any error status. The OCDP platform does not detect the ResourceQuota rejection and update the instance status accordingly. The only way to know about the failure is to check Kubernetes events directly: + +```bash +kubectl get events -n ocdp-u-test-b +``` + +### 4. GPU Quota Enforcement + +Users with `gpu=0` quota **can** submit deployments referencing GPU-enabled charts. The API does not reject them. Only the K8s ResourceQuota blocks pod creation at runtime. This could lead to: +- Unnecessary Helm releases and resource overhead in the cluster +- Confusion for users whose deployments appear to hang at `pending-install` + +### 5. Quota Exposed in Login Response + +The login response includes quota information: +```json +{ + "quotaCpu": "2", + "quotaMemory": "4Gi", + "quotaGpu": "0", + "quotaGpuMemory": "0" +} +``` +This could be used by the frontend to show usage limits, but no pre-flight check uses it server-side. + +--- + +## Recommendations + +1. **Add pre-flight quota validation** in the backend API: before accepting a deployment, check whether the requested resources (from chart values) would exceed the user's quota. Return HTTP 4xx with a clear error message. + +2. **Handle "pending-install" timeout**: implement a watcher that detects when a Helm release has been created but pods remain stuck (e.g., due to ResourceQuota) and: + - Update instance status to `failed` with a descriptive `statusReason` + - Clean up the Helm release, Service, etc. + - Optionally surface the K8s error message via the API + +3. **GPU quota pre-check**: if a chart requests GPU resources and the user's `gpu=0`, reject the deployment at the API level before creating any Kubernetes resources. + +4. **UI quota indicator**: show remaining quota (used vs. hard limit) on the deployment form so users know their limits before submitting. + +--- + +## ResourceQuota YAML (for reference) + +```yaml +apiVersion: v1 +kind: ResourceQuota +metadata: + name: tenant-quota + namespace: ocdp-u-test-b + labels: + ocdp.io/managed-by: ocdp + ocdp.io/tenant: ocdp-u-test-b +spec: + hard: + requests.cpu: "2" + requests.memory: 4Gi + requests.nvidia.com/gpu: "0" + requests.nvidia.com/gpumem: "0" +``` + +--- + +## Cleanup Verification + +All test instances were removed after testing: +- `quota-test-nginx` ✅ deleted (pods terminated, helm release removed, quota back to 0) +- `quota-test-nginx-2` ✅ cleaned up (no pods created, resources released) +- `quota-test-gpu` ✅ cleaned up (no pods created, resources released) +- ResourceQuota used: all resources at 0 diff --git a/docs/test2-report.md b/docs/test2-report.md new file mode 100644 index 0000000..09f674f --- /dev/null +++ b/docs/test2-report.md @@ -0,0 +1,141 @@ +# OCDP 第二次测试报告 + +**测试日期:** 2026-05-11 +**测试环境:** http://10.6.80.114:18080 + +--- + +## 测试1: 资源配额限额 + +### 测试方法 +使用 test-user-b(quota: cpu=2, mem=4Gi, gpu=0, gpumem=0)在 k3s 集群部署 nginx chart + +### 测试结果 + +| 测试 | 操作 | 预期 | 实际 | 结论 | +|------|------|------|------|------| +| Test A | 部署 nginx(默认值,在配额内) | 成功 | 部署完成,状态 deployed | ✅ | +| Test B | 部署 nginx(requests.cpu=2, mem=4Gi, replica=5,超配额) | 被配额阻止 | Helm release 创建成功,所有 Pod 被 ResourceQuota 阻塞,状态永远 stuck 在 pending-install | ⚠️ 部分通过 | +| Test C | 部署 vllm-serve(gpuLimit=1,gpu配额=0) | 被配额阻止 | Helm release 创建成功,Pod 被 ResourceQuota 阻塞,状态 pending-install | ⚠️ 部分通过 | + +### 关键发现 + +**1. 没有 API 层的预检查配额验证** +- 后端 API 无条件接受所有部署请求,不检查是否超配额 +- 所有超配额请求返回 HTTP 200 + status: pending-install +- 后端日志中**没有任何配额相关的条目** + +**2. K8s ResourceQuota 在 Pod 级别执行** +- `tenant-quota` ResourceQuota 对象确实存在并执行限制 +- 当 Pod 超配额时,K8s 明确拒绝创建并给出错误消息 +- 但 Helm release、Service、Deployment、ReplicaSet **仍然被创建** + +**3. 实例永远 stuck 在 "pending-install"** +- 超配额的实例永远不会转换到 deployed/failed/error +- OCDP 平台不检测 ResourceQuota 拒绝事件 +- 唯一知道失败的方式是直接查 K8s events + +**4. GPU 配额绕过** +- gpu=0 的用户可以提交需要 GPU 的 chart 部署 +- K8s ResourceQuota 最终会阻止,但 Helm release 等资源已被创建 + +**5. 有效的 ResourceQuota 配置** +```yaml +apiVersion: v1 +kind: ResourceQuota +metadata: + name: tenant-quota + namespace: ocdp-u-test-b +spec: + hard: + requests.cpu: "2" + requests.memory: 4Gi + requests.nvidia.com/gpu: "0" + requests.nvidia.com/gpumem: "0" +``` + +### 建议 +1. **添加 API 层预检查配额验证** — 在接受部署前检查请求资源是否超过用户配额 +2. **处理 pending-install 超时** — 监控 Helm release 创建后 Pod 是否 stuck,更新状态为 failed +3. **GPU 配额预检查** — 如果 chart 需要 GPU 而用户 gpu=0,在 API 层拒绝 +4. **UI 配额指示器** — 在部署表单上显示剩余配额 + +--- + +## 测试2: values.yaml 覆盖优先级 + +### 测试方法 +使用 test-user-c(quota: cpu=4, mem=8Gi, gpu=1, gpumem=5000)部署 vllm-serve:0.6.0 chart + +### 测试结果 + +| 方法 | 提交方式 | 是否部署成功 | 存储的值 | 结论 | +|------|----------|-------------|---------|------| +| 方法1 | `values` JSON 字段 | ✅ | cpuRequest=2, gpuMem=10000 | JSON 值被准确接受和存储 | +| 方法2 | `valuesYaml` 字符串 | ✅ | cpuRequest=4, gpuMem=10000 | YAML 被正确解析为结构化 values | +| 方法3 | 同时提供 `values` + `valuesYaml`(冲突) | ✅ 无任何错误/警告 | **values JSON 全胜** | `values` JSON 静默覆盖 `valuesYaml` | +| 方法4 | 不提供任何 values(使用 chart 默认) | ✅ | 仅 namespace | chart 默认值不存储在 API 响应中 | + +### 优先级最终结论 + +| 优先级 | 来源 | 说明 | +|--------|------|------| +| **最高** | `values` JSON 字段 | 请求体中的结构化 JSON | +| **中** | `valuesYaml` 字符串 | 请求体中的 YAML 字符串 | +| **最低** | Chart 内置 values.yaml | Helm chart 打包的默认值 | + +### 冲突测试详细结果 + +当同时提供 `values` 和 `valuesYaml` 且值冲突时: +- `values` JSON 字段**完全覆盖** `valuesYaml` +- **没有任何错误或警告**返回给用户 +- 两者被合并到统一的 DB `values` 字段 + +### gpuMem=10000 行为 +- 整数值 `10000` 在 `values` JSON 和 `valuesYaml` 中都被**正确接受** +- 无单位转换(作为整数 MB 标量存储) +- 符合项目规范 + +### 建议 +1. **记录优先级顺序** — 用户需知道同时提供两者时 values JSON 优先 +2. **添加冲突警告** — 当两个字段存在冲突值时应返回警告 +3. **考虑废弃一个字段** — values 和 valuesYaml 语义重复易混淆 + +--- + +## 测试3: 前端 UI 溢出/滚动/刷新 + +### 测试方法 +Playwright + 源码分析,测试 1920/768/375 三个视口 + +### 测试结果 +**总体结论: PASS** — 没有导致功能问题的关键溢出问题 + +| 测试项 | 结果 | 详情 | +|--------|------|------| +| 水平溢出 | ✅ 无问题 | 所有视口均无水平溢出 | +| 文本截断 | ⚠️ 1 个低风险 | InstanceCard h3 标题 truncate 无 title tooltip | +| 响应式设计 | ✅ 正确 | sm/md/lg/xl 断点覆盖完整 | +| 滚动行为 | ✅ 流畅 | Sidebar 和内容区独立滚动,overscroll-contain 防滚动穿透 | +| 模态框布局 | ✅ 正确 | body scroll lock + 内容独立滚动 | +| 页面刷新 | ✅ 正常 | 受保护路由正确重定向到登录页 | +| 颜色对比度 | ⚠️ 1 个中风险 | 登录页错误文本 red-400 在白色背景上仅 2.5:1 (WCAG AA 要求 4.5:1) | + +### 通过的细分项 +- Chart Browser 全高 + overflow-y-auto 布局 ✅ +- InstanceCard 操作按钮网格 grid-cols-2/3/5 响应正确 ✅ +- Tabs 支持 overflow-x-auto 水平滚动 ✅ +- 用户管理表格 overflow-x-auto ✅ +- iOS 触摸滚动 (`-webkit-overflow-scrolling: touch`) 已配置 ✅ + +### 建议 +1. 将登录页错误文本从 text-red-400 改为 text-red-600/700 +2. InstanceCard h3 标题添加 title 属性 + +--- + +## 综合建议 +1. 添加 API 层配额预检查 +2. 处理 pending-install 超时 + 状态更新 +3. 记录 values 覆盖优先级并添加冲突警告 +4. 统一 values JSON/YAML 的 API 设计 diff --git a/docs/test2-ui-overflow.md b/docs/test2-ui-overflow.md new file mode 100644 index 0000000..12c25b8 --- /dev/null +++ b/docs/test2-ui-overflow.md @@ -0,0 +1,271 @@ +# QA Report: UI Layout Overflow & Responsiveness Test + +**Date:** 2026-05-11 +**Environment:** http://10.6.80.114:18080 +**Browser:** Chromium (Playwright headless) +**Test Credentials:** test-user-a / TestUserA123! + +--- + +## Test Results Summary + +| # | Test | Status | Issues Found | +|----|------|--------|-------------| +| 1 | Login Page Layout | ✅ Pass | 1 Low | +| 2 | Home Page | ✅ Pass | 0 | +| 3 | Chart Browser (Registries) | ✅ Pass | 0 | +| 4 | Instances Page | ✅ Pass | 0 | +| 5 | Monitoring Page | ✅ Pass | 0 | +| 6 | Tablet Responsive (768px) | ✅ Pass | 0 | +| 7 | Mobile Responsive (375px) | ✅ Pass | 0 | +| 8 | Deep DOM Overflow Analysis | ✅ Pass | 0 | +| 9 | Source Code CSS Pattern Audit | ✅ Pass | 2 Info | +| 10 | Text Visibility & Contrast | ⚠️ 1 Issue | 1 Medium | + +--- + +## 1. Login Page (AuthPage.tsx) + +**Location:** `frontend/src/features/auth/pages/AuthPage.tsx` + +**Layout:** +- Form card is `max-w-md` (448px), horizontally centered via `flex items-center justify-center` +- Desktop viewport (1920×1080): card is perfectly centered (checked via bounding rect) +- Background: `bg-slate-50` with gradient overlay +- Card: `bg-white/95 backdrop-blur-xl` with `shadow-2xl` + +**Responsive:** +- Padding: `px-4 sm:px-6` — increases from 16px → 24px on `sm:` breakpoint +- Card padding: `p-6 sm:p-7` +- Icon: `w-11 h-11` — fixed size, not responsive + +### ✅ Issue #1-LOW: Login error text color contrast +- **File:** `AuthPage.tsx:96` +- **Pattern:** `

` +- **Problem:** `text-red-400` (`#f87171`) on white background has a contrast ratio of ~2.5:1, which fails WCAG AA (minimum 4.5:1 for normal text). Error messages may be hard to read for users with visual impairments. +- **Recommendation:** Use `text-red-600` or `text-red-700` for error text on white backgrounds. + +--- + +## 2. Home Page + +**Location:** `frontend/src/features/home/pages/HomePage.tsx` + +**Layout:** +- Main container: `min-h-full bg-slate-50 px-4 py-6 sm:px-6 lg:px-8` +- Two-column layout on large screens: `lg:grid-cols-[1.4fr_0.8fr]` +- Feature cards: `md:grid-cols-3` +- Quick actions: `md:grid-cols-3` + +**Scroll:** ScrollHeight=1080, Viewport=1080 — content fits exactly without scrolling on 1080p. + +**Overflow:** No horizontal overflow detected. Proper use of responsive padding and grid columns. + +### Passing — no issues found. + +--- + +## 3. Chart Browser / Registries + +**Location:** `frontend/src/features/artifact/registries/pages/ArtifactBrowserPage.tsx` + +**Layout (Desktop):** +- Main layout: `flex-1 flex overflow-hidden bg-slate-50` (sidebar + detail panes) +- Sidebar tree: `flex-1 overflow-y-auto custom-scrollbar` +- Detail pane: `flex-1 flex flex-col bg-white overflow-hidden` +- Tag grid: `grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-4` + +**Tablet (768px):** No overflow. Grid collapses to 2 columns. + +**Mobile (375px):** No overflow. Grid collapses to 1 column. + +### Key Patterns Found: +- `RepositoryItem.tsx:212` — `` — proper truncation with `title` tooltip +- `ArtifactBrowserPage.tsx:336` — `

` — uses 11px text with truncation +- `TagCard.tsx` — uses truncation with `title` attribute for long names + +### Passing — no overflow issues found. + +--- + +## 4. Instances Page + +**Location:** `frontend/src/features/artifact/instances/pages/InstancesManagementPage.tsx` +**Component:** `InstanceCard.tsx` + +**Layout:** +- Cluster cards: responsive grid `clusters.length > 1 ? 'md:grid-cols-3' : 'md:grid-cols-2'` +- Instance cards listed in single column then `lg:grid-cols-2 gap-6` +- Action buttons grid: `grid-cols-2 gap-2 md:grid-cols-3 xl:grid-cols-5` + +### ✅ Issue #2-INFO: Action button text truncation on InstanceCards +- **File:** `InstanceCard.tsx:285-327` +- **Pattern:** + ``` +

+ +
+ ``` +- **Analysis:** At `grid-cols-2` (small screens), two buttons share each row. The buttons use `min-w-0` which allows them to shrink, and `truncate` on the text span. However, the button text is short ("Refresh", "Entries", "Diagnostics", "Modify", "Delete"), so truncation is unlikely to occur in practice. +- **Mitigation:** Each `` has `title` attribute on parent button, providing tooltip fallback. +- **Verdict:** Acceptable — button labels are intentionally short and tooltips are present. + +### ✅ Issue #3-INFO: Header text truncation with tooltip +- **File:** `InstanceCard.tsx:185` +- **Pattern:** `

{instanceName}

` +- **Analysis:** Instance names could be long, `truncate` will clip with ellipsis. No `title` attribute on this element — unlike repository text below it. +- **Recommendation:** Add `title={instanceName}` to the `

` element for tooltip on overflow. + +### Passing — no critical overflow issues found. + +--- + +## 5. Monitoring / Clusters + +**Location:** `frontend/src/features/monitoring/clusters/` + +**Layout:** +- Cluster cards grid: `grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4` +- Card header: `

` with cluster name +- Metrics: `grid-cols-2 sm:grid-cols-4 gap-4 mb-3` +- Resource bars: `overflow-hidden` for proper progress bar clipping +- Node details: `grid-cols-1 lg:grid-cols-2 gap-3` + +**Overflow Check:** ScrollWidth = clientWidth at all tested viewports — no horizontal overflow. + +**Responsive:** +- 1920px: 4 columns of cluster cards +- 768px: 2 columns +- 375px: 1 column + +### Passing — no issues found. + +--- + +## 6. Sidebar Layout + +**Location:** `frontend/src/shared/components/layout/SidebarLayout/` + +**Layout:** +- Parent: `min-h-screen flex bg-dark text-primary overflow-hidden` +- Nav: `flex-1 p-3 space-y-1 overflow-y-auto` — independently scrollable +- Footer: Fixed at bottom, `p-3 text-xs text-muted` + +**Scroll Analysis:** +- Content area has `overflow-y-auto`, so sidebar nav items scroll independently when they exceed viewport height +- The footer anchors to the bottom of the sidebar (not the scroll area) +- At 1080px viewport, sidebar content fits without scrolling + +**Potential Concern:** If many nav items are added, the footer will push below the fold and the user must scroll the nav to see it. The `overflow-y-auto` on the `

+); diff --git a/frontend/src/app/routes/AppRoutes.tsx b/frontend/src/app/routes/AppRoutes.tsx index f191b70..b4e11c0 100644 --- a/frontend/src/app/routes/AppRoutes.tsx +++ b/frontend/src/app/routes/AppRoutes.tsx @@ -4,17 +4,22 @@ */ import { Routes, Route, Navigate } from "react-router-dom"; +import type { ReactNode } from "react"; import { ProtectedRoute } from "./RouteGuard"; +import { AccessDeniedPage } from "./AccessDeniedPage"; import AppShell from "@/shared/components/layout/AppShell"; import { getPageInfo, type NavItem } from "../constants/navigation"; -import { useLocation } from "react-router-dom"; +import { useLocation, useNavigate } from "react-router-dom"; import type { AuthResponse } from "@/api"; +import type { User } from "../providers/AuthContext"; +import { canAccessRoute } from "../providers/auth-model"; // Feature pages import AuthPage from "@/features/auth/pages/AuthPage"; import HomePage from "@/features/home/pages/HomePage"; import ClusterConfigPage from "@/features/configuration/clusters/pages/ClusterConfigPage"; import RegistryConfigPage from "@/features/configuration/registries/pages/RegistryConfigPage"; +import UserManagementPage from "@/features/configuration/users/pages/UserManagementPage"; import ArtifactBrowserPage from "@/features/artifact/registries/pages/ArtifactBrowserPage"; import InstancesManagementPage from "@/features/artifact/instances/pages/InstancesManagementPage"; import MonitoringClustersPage from "@/features/monitoring/clusters/pages/MonitoringClustersPage"; @@ -23,6 +28,7 @@ import { ApiTest } from "@/components/ApiTest"; interface AppRoutesProps { isAuthenticated: boolean; userName?: string; + user: User | null; navItems: NavItem[]; onLogin: (tokens: AuthResponse) => void; onLogout: () => void; @@ -34,12 +40,31 @@ interface AppRoutesProps { export const AppRoutes = ({ isAuthenticated, userName = "User", + user, navItems, onLogin, onLogout, }: AppRoutesProps) => { const location = useLocation(); + const navigate = useNavigate(); const pageInfo = getPageInfo(location.pathname); + const shell = (children: ReactNode) => ( + + {children} + + ); + const protectedPage = (path: string, children: ReactNode) => ( + + {shell(children)} + + ); return ( @@ -58,102 +83,54 @@ export const AppRoutes = ({ {/* Protected routes - wrapped in AppShell */} - - - - - } + element={protectedPage("/home", )} /> - - - - - } + element={protectedPage("/configuration/clusters", )} /> - - - - - } + element={protectedPage("/configuration/registries", )} /> - - - - - } + element={protectedPage("/artifact/registries", )} /> - - - - - } + element={protectedPage("/artifact/instances", )} /> )} + /> + + navigate("/home")} />)} + /> + + )} + /> + + navigate("/home")} />)} + /> + + - - - + {shell( navigate("/home")} />)} } /> @@ -178,5 +155,3 @@ export const AppRoutes = ({ ); }; - - diff --git a/frontend/src/app/routes/RouteGuard.tsx b/frontend/src/app/routes/RouteGuard.tsx index 8aaf42a..abb71c6 100644 --- a/frontend/src/app/routes/RouteGuard.tsx +++ b/frontend/src/app/routes/RouteGuard.tsx @@ -5,9 +5,11 @@ import { Navigate } from "react-router-dom"; import type { ReactNode } from "react"; +import type { User } from "../providers/AuthContext"; interface RouteGuardProps { isAuthenticated: boolean; + isAllowed?: boolean; redirectTo?: string; children: ReactNode; } @@ -17,11 +19,16 @@ interface RouteGuardProps { * Redirects to auth page if not authenticated */ export const ProtectedRoute = ({ - isAuthenticated, + isAuthenticated, + isAllowed = true, redirectTo = "/", children }: RouteGuardProps) => { - return isAuthenticated ? <>{children} : ; + if (!isAuthenticated) { + return ; + } + + return isAllowed ? <>{children} : ; }; /** @@ -36,4 +43,6 @@ export const PublicRoute = ({ return !isAuthenticated ? <>{children} : ; }; +export const canUseRoute = (user: User | null, predicate?: (user: User) => boolean): boolean => + !predicate || (user ? predicate(user) : false); diff --git a/frontend/src/features/artifact/instances/components/DiagnosticsModal.tsx b/frontend/src/features/artifact/instances/components/DiagnosticsModal.tsx new file mode 100644 index 0000000..9d15eb8 --- /dev/null +++ b/frontend/src/features/artifact/instances/components/DiagnosticsModal.tsx @@ -0,0 +1,244 @@ +import React, { useEffect, useMemo, useState } from "react"; +import { Activity, AlertTriangle, Box, Copy, FileText, RotateCw, Server, Terminal, X } from "lucide-react"; +import { getInstanceDiagnostics, type InstanceDiagnosticsResponse, type InstanceResponse } from "@/api"; +import { Button, Badge, LoadingState } from "@/shared/components"; +import { formatApiError } from "@/shared/utils"; +import { useToast } from "@/shared"; + +type TabKey = "summary" | "events" | "logs"; + +interface DiagnosticsModalProps { + instance: InstanceResponse; + onClose: () => void; +} + +export const DiagnosticsModal: React.FC = ({ instance, onClose }) => { + const { success, error: toastError } = useToast(); + const [data, setData] = useState(null); + const [loading, setLoading] = useState(true); + const [activeTab, setActiveTab] = useState("summary"); + + const loadDiagnostics = async () => { + if (!instance.clusterId || !instance.id) return; + setLoading(true); + try { + setData(await getInstanceDiagnostics({ clusterId: instance.clusterId, instanceId: instance.id }, { tailLines: 300 })); + } catch (err) { + toastError(formatApiError(err) || "Failed to load diagnostics"); + } finally { + setLoading(false); + } + }; + + useEffect(() => { + void loadDiagnostics(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [instance.clusterId, instance.id]); + + const combinedLogs = useMemo( + () => + (data?.logs ?? []) + .map((entry) => `# ${entry.pod || "pod"} / ${entry.container || "container"}\n${entry.error || entry.log || ""}`) + .join("\n\n"), + [data?.logs] + ); + + const copyLogs = async () => { + await navigator.clipboard.writeText(combinedLogs); + success("Logs copied"); + }; + + return ( +
+
+
+
+
+ + Runtime diagnostics +
+

{instance.name}

+

+ {instance.namespace} · {data?.collectedAt ? new Date(data.collectedAt).toLocaleString() : "live Kubernetes data"} +

+
+
+ + +
+
+ +
+
+ setActiveTab("summary")} icon={Box} label="Describe" /> + setActiveTab("events")} icon={AlertTriangle} label="Events" /> + setActiveTab("logs")} icon={Terminal} label="Pod Logs" /> +
+
+ +
+ {loading ? ( + + ) : !data ? ( +
+ Diagnostics data is not available. +
+ ) : activeTab === "summary" ? ( + + ) : activeTab === "events" ? ( + + ) : ( + + )} +
+
+
+ ); +}; + +const TabButton: React.FC<{ + active: boolean; + icon: React.ComponentType<{ className?: string }>; + label: string; + onClick: () => void; +}> = ({ active, icon: Icon, label, onClick }) => ( + +); + +const SummaryTab = ({ data }: { data: InstanceDiagnosticsResponse }) => ( +
+
+ + + +
+
+

Pods

+ {(data.pods ?? []).length === 0 ? ( + + ) : ( + (data.pods ?? []).map((pod) => ( +
+
+
+

{pod.name}

+

+ {pod.nodeName || "unscheduled"} · podIP {pod.podIp || "-"} · restarts {pod.restartCount ?? 0} +

+
+ + {pod.phase || "Unknown"} + +
+
+ {(pod.containers ?? []).map((container) => ( +
+
+ {container.name} + + {container.state || "unknown"} + +
+

+ {container.image} +

+ {(container.reason || container.message) && ( +

{container.reason || container.message}

+ )} +
+ ))} +
+
+ )) + )} +
+
+

Services

+ {(data.services ?? []).length === 0 ? : null} + {(data.services ?? []).map((svc) => ( +
+
+ {svc.name} + {svc.type} +
+

ClusterIP {svc.clusterIP || "-"}

+
+ {(svc.ports ?? []).map((port) => ( + + {port.name || "port"} {port.port}:{port.targetPort} + + ))} +
+
+ ))} +
+
+); + +const EventsTab = ({ data }: { data: InstanceDiagnosticsResponse }) => ( +
+ {(data.events ?? []).length === 0 ? : null} + {(data.events ?? []).map((event, index) => ( +
+
+
+ {event.type || "Normal"} + {event.reason} +
+ {event.lastTimestamp ? new Date(event.lastTimestamp).toLocaleString() : ""} +
+

{event.message}

+

+ {event.involvedKind}/{event.involvedName} · count {event.count ?? 1} +

+
+ ))} +
+); + +const LogsTab = ({ data, combinedLogs, onCopy }: { data: InstanceDiagnosticsResponse; combinedLogs: string; onCopy: () => void }) => ( +
+
+ +
+ {(data.logs ?? []).length === 0 ? : null} + {(data.logs ?? []).map((entry) => ( +
+
+ + {entry.pod}/{entry.container} +
+
{entry.error || entry.log || ""}
+
+ ))} +
+); + +const MetricCard = ({ icon: Icon, label, value }: { icon: React.ComponentType<{ className?: string }>; label: string; value: number }) => ( +
+
+ {label} + +
+

{value}

+
+); + +const EmptyLine = ({ text }: { text: string }) => ( +
{text}
+); diff --git a/frontend/src/features/artifact/instances/components/EntriesModal.tsx b/frontend/src/features/artifact/instances/components/EntriesModal.tsx index f567146..6e70d3b 100644 --- a/frontend/src/features/artifact/instances/components/EntriesModal.tsx +++ b/frontend/src/features/artifact/instances/components/EntriesModal.tsx @@ -321,7 +321,7 @@ export const EntriesModal: React.FC = ({ instance, onClose }) kubernetes: { color: "bg-green-600/20 text-green-400 border-green-500/30", label: "Live from Kubernetes" }, manifest: { color: "bg-blue-600/20 text-blue-400 border-blue-500/30", label: "From Helm Manifest" }, notes: { color: "bg-yellow-600/20 text-yellow-400 border-yellow-500/30", label: "From Helm Notes" }, - none: { color: "bg-gray-600/20 text-gray-400 border-gray-500/30", label: "No Data Available" }, + none: { color: "bg-slate-200/20 text-slate-500 border-gray-500/30", label: "No Data Available" }, }; const badge = badges[source as keyof typeof badges] || badges.none; @@ -335,11 +335,11 @@ export const EntriesModal: React.FC = ({ instance, onClose }) }; const renderService = (service: ServiceEntry, index: number) => ( -
+
-

{service.name || `Service ${index + 1}`}

-

Type: {service.type || 'Unknown'}

+

{service.name || `Service ${index + 1}`}

+

Type: {service.type || 'Unknown'}

{service.type || 'Unknown'} @@ -349,18 +349,18 @@ export const EntriesModal: React.FC = ({ instance, onClose })
{/* Cluster IP */} {service.cluster_ip && ( -
- Cluster IP: +
+ Cluster IP:
- {service.cluster_ip} + {service.cluster_ip}
@@ -369,10 +369,10 @@ export const EntriesModal: React.FC = ({ instance, onClose }) {/* Ports */} {service.ports && service.ports.length > 0 && service.ports.map((port, idx) => ( -
- {port.name || `Port ${idx + 1}`}: +
+ {port.name || `Port ${idx + 1}`}:
- + {port.port} → {port.target_port} {port.protocol || 'TCP'} {port.node_port && ` (NodePort: ${port.node_port})`} @@ -386,7 +386,7 @@ export const EntriesModal: React.FC = ({ instance, onClose })

LoadBalancer Entries:

{service.loadBalancer.ingress.map((ing, idx) => (
- + {ing.ip || ing.hostname}
@@ -396,19 +396,19 @@ export const EntriesModal: React.FC = ({ instance, onClose }) href={`http://${ing.ip}:${service.ports?.[0]?.port || 80}`} target="_blank" rel="noopener noreferrer" - className="p-1 hover:bg-gray-700 rounded transition" + className="p-1 hover:bg-slate-100 rounded transition" title="Open in browser" > @@ -423,12 +423,12 @@ export const EntriesModal: React.FC = ({ instance, onClose }) ); const renderIngress = (ingress: IngressEntry, index: number) => ( -
+
-

{ingress.name || `Ingress ${index + 1}`}

+

{ingress.name || `Ingress ${index + 1}`}

{ingress.class_name && ( -

Class: {ingress.class_name}

+

Class: {ingress.class_name}

)}
@@ -436,30 +436,30 @@ export const EntriesModal: React.FC = ({ instance, onClose })
{ingress.rules?.map((rule, ruleIdx) => ( -
+
{(() => { const host = rule.host; if (!host) return null; return (
- {host} + {host}
@@ -470,7 +470,7 @@ export const EntriesModal: React.FC = ({ instance, onClose }) const serviceName = path.backend?.service?.name || "service"; const servicePort = path.backend?.service?.port ?? "-"; return ( -
+
• {path.path || '/'} → {serviceName}:{servicePort}
); @@ -489,20 +489,20 @@ export const EntriesModal: React.FC = ({ instance, onClose }) return (
-
+
{/* Header */} -
+
-

Instance Entries

-

+

Instance Entries

+

{instance.name} ({instance.namespace})

@@ -511,14 +511,14 @@ export const EntriesModal: React.FC = ({ instance, onClose }) {loading ? (
- Loading entries... + Loading entries...
) : error ? (

{error}

@@ -527,7 +527,7 @@ export const EntriesModal: React.FC = ({ instance, onClose })
{/* Source Badge */}
-

Data Source:

+

Data Source:

{getSourceBadge(entries.source)}
@@ -536,7 +536,7 @@ export const EntriesModal: React.FC = ({ instance, onClose })
-

+

Services ({entries.services.length})

@@ -551,7 +551,7 @@ export const EntriesModal: React.FC = ({ instance, onClose })
-

+

Ingresses ({entries.ingresses.length})

@@ -564,9 +564,9 @@ export const EntriesModal: React.FC = ({ instance, onClose }) {/* Helm Notes (as fallback) */} {entries.notes && entries.source === "notes" && (
-

Helm Notes

-
-
+                  

Helm Notes

+
+
                       {entries.notes}
                     
@@ -579,8 +579,8 @@ export const EntriesModal: React.FC = ({ instance, onClose }) !entries.notes && (
-

No entries found for this instance

-

Data source: {entries.source || 'unknown'}

+

No entries found for this instance

+

Data source: {entries.source || 'unknown'}

)}
@@ -588,10 +588,10 @@ export const EntriesModal: React.FC = ({ instance, onClose })
{/* Footer */} -
+
diff --git a/frontend/src/features/artifact/instances/components/InstanceCard.tsx b/frontend/src/features/artifact/instances/components/InstanceCard.tsx index 2ab0565..bc77572 100644 --- a/frontend/src/features/artifact/instances/components/InstanceCard.tsx +++ b/frontend/src/features/artifact/instances/components/InstanceCard.tsx @@ -12,6 +12,7 @@ import { XCircle, Clock, Network, + Activity, Box, Calendar, GitBranch, @@ -29,6 +30,7 @@ interface InstanceCardProps { onTerminate: (instance: InstanceResponse) => void; onRefresh: (instance: InstanceResponse) => void; onViewEntries: (instance: InstanceResponse) => void; + onViewDiagnostics: (instance: InstanceResponse) => void; } type StatusVisual = { @@ -99,16 +101,16 @@ const STATUS_INFO_MAP: Record = { }, [INSTANCE_STATUS.uninstalled]: { icon: StopCircle, - color: "text-slate-300", - bg: "bg-gradient-to-r from-slate-500/20 to-gray-500/20 border-slate-500/40", + color: "text-slate-700", + bg: "bg-gradient-to-r from-slate-500/20 to-gray-500/20 border-slate-300/40", glow: "shadow-slate-500/20", label: "Uninstalled", defaultReason: "Instance has been removed from the cluster.", }, [INSTANCE_STATUS.unknown]: { icon: HelpCircle, - color: "text-slate-300", - bg: "bg-gradient-to-r from-slate-500/20 to-gray-500/20 border-slate-500/40", + color: "text-slate-700", + bg: "bg-gradient-to-r from-slate-500/20 to-gray-500/20 border-slate-300/40", glow: "shadow-slate-500/20", label: "Unknown", defaultReason: "Awaiting next state update.", @@ -136,6 +138,7 @@ export const InstanceCard: React.FC = ({ onTerminate, onRefresh, onViewEntries, + onViewDiagnostics, }) => { const normalizedStatus = (instance.status ?? INSTANCE_STATUS.unknown) as InstanceStatus; const statusInfo = @@ -164,12 +167,12 @@ export const InstanceCard: React.FC = ({ typeof instance.lastError === "string" ? instance.lastError.trim() : ""; return ( -
+
{/* Decorative gradient overlay */}
{/* Header with enhanced design */} -
+
{/* Enhanced icon with glow effect */} @@ -179,12 +182,12 @@ export const InstanceCard: React.FC = ({
-

+

{instanceName}

- -

+ +

{repository}

@@ -206,10 +209,10 @@ export const InstanceCard: React.FC = ({
-
- {statusReason} +
+ {statusReason} {lastOperationLabel && ( - + Operation: {lastOperationLabel} )} @@ -217,48 +220,48 @@ export const InstanceCard: React.FC = ({
{/* Enhanced Content Grid */} -
+
{/* Namespace */} -
+
-

Namespace

+

Namespace

-

+

{namespace}

{/* Revision */} -
+
-

Revision

+

Revision

-

+

{revision}

{/* Repository - Full Width */} -
+
-

Repository

+

Repository

-

+

{repository}

{/* Launched Date - Full Width */} -
+
-

Launched

+

Launched

-

+

{createdAtText}

@@ -267,7 +270,7 @@ export const InstanceCard: React.FC = ({ {lastError && (
- +

Last error

@@ -278,47 +281,51 @@ export const InstanceCard: React.FC = ({
{/* Enhanced Actions Bar */} -
-
-
+
+
+ -
-
-
diff --git a/frontend/src/features/artifact/instances/components/ModifyModal.tsx b/frontend/src/features/artifact/instances/components/ModifyModal.tsx index d06df2f..10beb20 100644 --- a/frontend/src/features/artifact/instances/components/ModifyModal.tsx +++ b/frontend/src/features/artifact/instances/components/ModifyModal.tsx @@ -5,6 +5,7 @@ */ import React, { useState, useEffect } from "react"; import { Settings } from "lucide-react"; +import { parse as parseYaml, stringify as stringifyYaml } from "yaml"; import type { InstanceResponse, UpdateInstanceRequest } from "@/api"; import { getValuesSchema } from "@/api"; import { @@ -13,7 +14,6 @@ import { FormField, Input, Textarea, - Checkbox, ErrorState, LoadingState, Badge, @@ -35,8 +35,6 @@ export const ModifyModal: React.FC = ({ const [tag, setTag] = useState(""); const [description, setDescription] = useState(""); const [valuesYaml, setValuesYaml] = useState(""); - const [wait, setWait] = useState(true); - const [timeout, setTimeout_] = useState(300); const [loading, setLoading] = useState(false); const [error, setError] = useState(null); @@ -58,7 +56,7 @@ export const ModifyModal: React.FC = ({ ? JSON.parse(instance.values) : instance.values; setFormValues(parsedValues); - setValuesYaml(typeof parsedValues === 'object' ? JSON.stringify(parsedValues, null, 2) : String(parsedValues)); + setValuesYaml(typeof parsedValues === 'object' ? stringifyYaml(parsedValues) : String(parsedValues)); } catch (err) { console.error('[ModifyModal] Failed to parse existing values:', err); setValuesYaml(String(instance.values) || ""); @@ -104,8 +102,7 @@ export const ModifyModal: React.FC = ({ const handleFormValuesChange = (values: Record) => { setFormValues(values); - // Also update YAML representation - setValuesYaml(JSON.stringify(values, null, 2)); + setValuesYaml(stringifyYaml(values)); }; const handleSubmit = async (e: React.FormEvent) => { @@ -116,7 +113,9 @@ export const ModifyModal: React.FC = ({ try { const payload: UpdateInstanceRequest = { version: tag && tag !== instance.version ? tag : undefined, - values: valuesYaml.trim() ? JSON.parse(valuesYaml) : undefined, + description: description.trim() || undefined, + values: valuesYaml.trim() ? parseValuesYaml(valuesYaml) : undefined, + valuesYaml: valuesYaml.trim() || undefined, }; if (!instance.clusterId || !instance.id) { @@ -128,8 +127,8 @@ export const ModifyModal: React.FC = ({ await onConfirm(instance.clusterId, instance.id, payload); onClose(); } catch (err: unknown) { - if (err instanceof SyntaxError) { - setError("Invalid JSON/YAML values. Please fix the configuration."); + if (err instanceof Error && err.message.includes("YAML")) { + setError(err.message); } else { setError((err as Error).message || "Failed to modify instance"); } @@ -144,7 +143,7 @@ export const ModifyModal: React.FC = ({ onClose={onClose} title={`Modify Instance - ${instance.name || "Unnamed"}`} icon={Settings} - iconColor="text-blue-400" + iconColor="text-blue-600" size="lg" footer={ <> @@ -175,15 +174,15 @@ export const ModifyModal: React.FC = ({ )} {/* Current Info */} -
-

- Current Version: {instance.version || "N/A"} +

+

+ Current Version: {instance.version || "N/A"}

-

- Cluster: {instance.clusterId || "N/A"} +

+ Cluster: {instance.clusterId || "N/A"}

-

- Repository: {instance.repository || "N/A"} +

+ Repository: {instance.repository || "N/A"}

@@ -215,12 +214,13 @@ export const ModifyModal: React.FC = ({ {/* Values Configuration */}
-