diff --git a/.gitignore b/.gitignore index fea6c0e..639c669 100644 --- a/.gitignore +++ b/.gitignore @@ -60,4 +60,19 @@ redis_data/ tmp/ temp/ *.tmp +.fuse_hidden* + +# Debug scripts +debug_*.py +test_*.py + +# Next.js build output (including stale caches) +frontend/.next*/ +frontend/next-env.d.ts + +# Compiled binary +backend/ocdp-backend + +# IDE / AI temp +.claude/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..bbe281f --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,47 @@ +# Project Overview + + +# 🤖 Claude Code Agentic Workflow (Strictly Follow) + +作为本项目的资深 AI 研发工程师,你在执行任何指令时,必须严格遵守以下核心原则与工作流。 + +## Ⅰ. 核心原则 (Core Principles) +1. **No Laziness (拒绝偷懒):** 必须找到问题的根本原因 (Root Causes)。禁止使用临时补丁 (Hack/Temporary fixes)。保持高级工程师的标准。 +2. **Demand Elegance (苛求优雅):** 对于非琐碎的修改,停下来问自己:“有更优雅的实现方式吗?”如果你发现之前的代码很 Hacky,在掌握全局上下文后,用优雅的方式重构它(但不要过度设计)。 +3. **Test-Driven Quality (测试驱动质量):** 在项目根目录维护 `test/` 文件夹,存放结构化测试脚本。每个脚本顶部必须用注释注明其覆盖的功能范围。当代码发生重大变更时,必须执行 `test/` 下所有相关测试脚本并确保通过,方可视为任务完成。 + +## Ⅱ. 任务管理闭环 (Task Management Protocol) +你必须通过读写 `tasks/` 目录下的文件来管理你的工作状态: +1. **Plan First:** 在开始实现前,将计划写入 `tasks/todo.md`,必须是可勾选的 Checkbox 列表。 +2. **Verify Plan:** 在动手写代码前,先和我(User)确认这个计划是否合理。 +3. **Track Progress:** 边做边在 `todo.md` 中打勾标记完成状态。 +4. **Explain Changes:** 在每执行完一个步骤时,给出高层次的代码修改总结。 +5. **Document Results:** 任务完成后,在 `todo.md` 中补充 Review 总结。 +6. **Capture Lessons:** 如果被我纠正了错误,立刻更新 `tasks/lessons.md`。 + +## Ⅲ. 工作流编排 (Workflow Orchestration) + +### 1. 强制规划模式 (Plan Node Default) +- 对于任何非琐碎任务(涉及 3 个以上步骤或架构决策),必须进入规划模式。 +- 提前写好详细的 Spec 以减少歧义。 +- **一旦情况不对劲(报错连连),立即停止盲目推进**,重新评估并制定新计划。 + +### 2. 经验自我迭代 (Self-Improvement Loop) +- 在每次会话开始时,主动读取 `tasks/lessons.md`,复习该项目的历史教训。 +- 针对犯过的错误,为自己制定防止再次踩坑的规则。 +- 无情地迭代这些经验,直到你的错误率显著下降。 + +### 3. 自主修复 Bug (Autonomous Bug Fixing) +- 当我给你一个 Bug 报告时:**直接去修。不要等我手把手教你。** +- 主动利用 CLI 权限去查看日志、定位错误代码、运行失败的测试用例,然后解决它。 +- 要求对用户“零上下文切换”——你去修复 CI 测试,不需要我告诉你具体该怎么做。 + +### 4. 交付前绝对验证 (Verification Before Done) +- **永远不要在没有证明代码能跑的情况下,把任务标记为“完成”。** +- 问自己:“Staff Engineer(主任工程师)会批准这段代码吗?” +- 必须主动运行测试(例如 `go test`, `npm run build`),检查日志,并向我证明正确性。 +- 对比修改前后的 Diff,确保行为符合预期。 + +### 5. 复杂问题拆解 (Agentic Strategy) +- 遇到极其复杂的问题时,不要试图在一个终端窗口内硬扛。 +- 拆解子任务,主动进行探索性研究,针对焦点问题逐一击破。 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..bbe281f --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,47 @@ +# Project Overview + + +# 🤖 Claude Code Agentic Workflow (Strictly Follow) + +作为本项目的资深 AI 研发工程师,你在执行任何指令时,必须严格遵守以下核心原则与工作流。 + +## Ⅰ. 核心原则 (Core Principles) +1. **No Laziness (拒绝偷懒):** 必须找到问题的根本原因 (Root Causes)。禁止使用临时补丁 (Hack/Temporary fixes)。保持高级工程师的标准。 +2. **Demand Elegance (苛求优雅):** 对于非琐碎的修改,停下来问自己:“有更优雅的实现方式吗?”如果你发现之前的代码很 Hacky,在掌握全局上下文后,用优雅的方式重构它(但不要过度设计)。 +3. **Test-Driven Quality (测试驱动质量):** 在项目根目录维护 `test/` 文件夹,存放结构化测试脚本。每个脚本顶部必须用注释注明其覆盖的功能范围。当代码发生重大变更时,必须执行 `test/` 下所有相关测试脚本并确保通过,方可视为任务完成。 + +## Ⅱ. 任务管理闭环 (Task Management Protocol) +你必须通过读写 `tasks/` 目录下的文件来管理你的工作状态: +1. **Plan First:** 在开始实现前,将计划写入 `tasks/todo.md`,必须是可勾选的 Checkbox 列表。 +2. **Verify Plan:** 在动手写代码前,先和我(User)确认这个计划是否合理。 +3. **Track Progress:** 边做边在 `todo.md` 中打勾标记完成状态。 +4. **Explain Changes:** 在每执行完一个步骤时,给出高层次的代码修改总结。 +5. **Document Results:** 任务完成后,在 `todo.md` 中补充 Review 总结。 +6. **Capture Lessons:** 如果被我纠正了错误,立刻更新 `tasks/lessons.md`。 + +## Ⅲ. 工作流编排 (Workflow Orchestration) + +### 1. 强制规划模式 (Plan Node Default) +- 对于任何非琐碎任务(涉及 3 个以上步骤或架构决策),必须进入规划模式。 +- 提前写好详细的 Spec 以减少歧义。 +- **一旦情况不对劲(报错连连),立即停止盲目推进**,重新评估并制定新计划。 + +### 2. 经验自我迭代 (Self-Improvement Loop) +- 在每次会话开始时,主动读取 `tasks/lessons.md`,复习该项目的历史教训。 +- 针对犯过的错误,为自己制定防止再次踩坑的规则。 +- 无情地迭代这些经验,直到你的错误率显著下降。 + +### 3. 自主修复 Bug (Autonomous Bug Fixing) +- 当我给你一个 Bug 报告时:**直接去修。不要等我手把手教你。** +- 主动利用 CLI 权限去查看日志、定位错误代码、运行失败的测试用例,然后解决它。 +- 要求对用户“零上下文切换”——你去修复 CI 测试,不需要我告诉你具体该怎么做。 + +### 4. 交付前绝对验证 (Verification Before Done) +- **永远不要在没有证明代码能跑的情况下,把任务标记为“完成”。** +- 问自己:“Staff Engineer(主任工程师)会批准这段代码吗?” +- 必须主动运行测试(例如 `go test`, `npm run build`),检查日志,并向我证明正确性。 +- 对比修改前后的 Diff,确保行为符合预期。 + +### 5. 复杂问题拆解 (Agentic Strategy) +- 遇到极其复杂的问题时,不要试图在一个终端窗口内硬扛。 +- 拆解子任务,主动进行探索性研究,针对焦点问题逐一击破。 diff --git a/Makefile b/Makefile index e181899..2298b12 100644 --- a/Makefile +++ b/Makefile @@ -1,56 +1,85 @@ # ============================================================ -# OCDP stack orchestration Makefile -# run-2: 构建前端静态资源 + 启动 nginx(统一入口)和 backend 栈 -# clean-2: 清理 run-2 产生的容器 / 卷 / 网络 +# OCDP root orchestration Makefile # ============================================================ SHELL := /bin/bash COMPOSE_BIN ?= docker compose - ROOT_COMPOSE := docker-compose.yml -BACKEND_COMPOSE := backend/docker-compose.yml -BACKEND_PROFILE := backend +COMPOSE := $(COMPOSE_BIN) -f $(ROOT_COMPOSE) -COMPOSE_STACK := $(COMPOSE_BIN) -f $(ROOT_COMPOSE) -f $(BACKEND_COMPOSE) --profile $(BACKEND_PROFILE) -COMPOSE_STACK_ALL := $(COMPOSE_BIN) -f $(ROOT_COMPOSE) -f $(BACKEND_COMPOSE) -STACK_ENV := ADAPTER_MODE=production BACKEND_BUILD_CONTEXT=$(abspath backend) BACKEND_BUILD_DOCKERFILE=$(abspath backend/Dockerfile) BACKEND_MOCK_BUILD_DOCKERFILE=$(abspath backend/Dockerfile.mock) INIT_DB_SQL_PATH=$(abspath backend/scripts/init-db.sql) +.PHONY: help install up restart stop clean run-2 clean-2 docker-dev docker-prod docker-up docker-down docker-logs docker-ps test -STACK_SERVICES := postgres backend nginx +.DEFAULT_GOAL := help -.PHONY: run-2 clean-2 build-backend - -run-2: - @echo "═══════════════════════════════════════════════" - @echo "🚀 run-2: rebuild static assets + start web gateway stack" - @echo "═══════════════════════════════════════════════" +help: @echo "" - @export COMPOSE_PROJECT_NAME=ocdp && \ - export ADAPTER_MODE=production && \ - export BACKEND_BUILD_CONTEXT=$(abspath backend) && \ - export BACKEND_BUILD_DOCKERFILE=$(abspath backend/Dockerfile) && \ - export BACKEND_MOCK_BUILD_DOCKERFILE=$(abspath backend/Dockerfile.mock) && \ - export INIT_DB_SQL_PATH=$(abspath backend/scripts/init-db.sql) && \ - echo "→ Rebuilding frontend static assets" && \ - $(COMPOSE_STACK) run --rm frontend-build && \ - echo "" && \ - echo "→ Rebuilding backend image" && \ - $(COMPOSE_STACK) build backend && \ - echo "" && \ - echo "→ Bringing up backend + nginx services" && \ - $(COMPOSE_STACK) up -d $(STACK_SERVICES) + @echo "OCDP commands" + @echo "────────────────────────────────────────" + @echo " make install Install local Go / frontend dependencies" + @echo " make up Build and start the complete platform: DB + API + web gateway" + @echo " make restart Restart the complete platform without removing volumes" + @echo " make stop Stop containers, keep volumes" + @echo " make clean Stop containers and remove project volumes" + @echo " make run-2 Alias of up, kept for old docs / muscle memory" + @echo " make docker-dev Alias of up" + @echo " make docker-prod Alias of up" + @echo " make docker-up Alias of up" + @echo " make docker-down Stop containers, keep volumes" + @echo " make clean-2 Stop containers and remove project volumes" + @echo " make docker-logs Follow Compose logs" + @echo " make docker-ps Show Compose service status" + @echo " make test Run structured verification script" @echo "" - @echo "✅ Services online:" - @echo "═══════════════════════════════════════════════" + @echo "Default local ports: web=18080, https=18443, backend=18081, postgres=15432" + @echo "Override with WEB_HTTP_PORT / WEB_HTTPS_PORT / BACKEND_PORT / POSTGRES_PORT." + @echo "" + +install: + @echo "→ Downloading backend modules" + @cd backend && go mod download + @echo "→ Installing frontend dependencies" + @cd frontend && npm ci + +up: + @echo "→ Building and starting OCDP stack" + @$(COMPOSE) up --build -d + @echo "" + @$(COMPOSE) ps -a + @echo "" + @echo "Web: http://localhost:$${WEB_HTTP_PORT:-18080}" + @echo "Backend: http://localhost:$${BACKEND_PORT:-18081}/health" + +restart: + @echo "→ Restarting OCDP stack" + @$(COMPOSE) up --build -d --force-recreate + @$(COMPOSE) ps -a + +stop: + @$(COMPOSE) down --remove-orphans + +clean: + @$(COMPOSE) down -v --remove-orphans + +run-2: up + +docker-dev: up + +docker-prod: up + +docker-up: up + +docker-down: + @$(MAKE) stop clean-2: - @echo "═══════════════════════════════════════════════" - @echo "🧹 clean-2: tearing down run-2 stack" - @echo "═══════════════════════════════════════════════" - @$(COMPOSE_STACK_ALL) down --remove-orphans || true - @$(COMPOSE_STACK_ALL) down -v --remove-orphans || true - @$(COMPOSE_BIN) -f $(BACKEND_COMPOSE) down -v --remove-orphans || true - @echo "✅ Environment cleaned" - @echo "═══════════════════════════════════════════════" + @$(MAKE) clean +docker-logs: + @$(COMPOSE) logs -f +docker-ps: + @$(COMPOSE) ps -a + +test: + @test/readme-deployment-refresh.sh diff --git a/Multi-Tenant Kubeconfig.md b/Multi-Tenant Kubeconfig.md new file mode 100644 index 0000000..4363f4d --- /dev/null +++ b/Multi-Tenant Kubeconfig.md @@ -0,0 +1,127 @@ +# Technical Specification: Multi-Tenant Kubeconfig & Auth Gateway + +## 1. System Overview & Goals +- **Objective**: Develop a backend API service that automates Kubernetes multi-tenant onboarding (Namespace + Quota isolation) and securely distributes short-lived, dynamic `kubeconfig` files using the Kubernetes `TokenRequest` API. +- **Architecture Independence**: This backend service acts as a standalone control plane. It is **not** strictly bound to a BFF pattern and does **not** need to run inside the target Kubernetes cluster (it supports Out-of-Cluster execution). +- **Out of Scope**: This spec does NOT cover the frontend UI implementation or the downstream workload deployment. It focuses strictly on identity, tenant provisioning, and credential brokering. +- **Security Principles**: Adhere strictly to Zero-Knowledge architecture (no token storage in DB), Ephemeral Credentials (short-lived tokens only), and Least Privilege (the Gateway must NOT be a `cluster-admin`). + +## 2. Architecture & Topology +- **Tech Stack**: Go `net/http` (or FastAPI), utilizing the official Kubernetes Client SDK (`client-go` or `kubernetes-client/python`). +- **Control Plane Flow**: + 1. Client/Frontend -> Gateway: User requests environment access. + 2. Gateway -> K8s API: Gateway authenticates to the target K8s cluster using its own master credentials (e.g., an Out-of-Cluster `kubeconfig`). + 3. Gateway -> K8s API: Executes Namespace/SA creation (if new) or calls `TokenRequest` API (if existing). + 4. Gateway -> Client/Frontend: Returns a generated `kubeconfig` YAML string with the short-lived JWT token. + +## 3. Core Business Logic Workflows + +### Phase 1: Tenant Initialization (Onboarding) +Triggered when a new user registers or requests a workspace for the first time. The Gateway must execute a K8s transaction creating four resources: +1. **Namespace**: `tenant-{user_uuid}` +2. **ServiceAccount**: `sa-tenant-admin` (Created inside the tenant's namespace). +3. **RoleBinding**: Bind `sa-tenant-admin` to the `admin` (or custom) ClusterRole, strictly isolated within `tenant-{user_uuid}`. +4. **ResourceQuota**: Enforce limits (e.g., `requests.cpu: "4"`, `limits.memory: "16Gi"`) to prevent noisy neighbors. + +### Phase 2: Credential Distribution (Dynamic Token) +Triggered when the user requests CLI access or downloads a kubeconfig. +1. Locate the user's associated Namespace and ServiceAccount, verifying the user's ownership of the workspace. +2. Audit Logging: Record the credential issuance event (User, IP, Workspace) into the database. +3. Call the `authentication.k8s.io/v1 TokenRequest` API targeting `sa-tenant-admin` in the specific tenant's namespace. +4. Set `expirationSeconds: 7200` (2 hours). Hard limit; cannot be extended. +5. Retrieve the generated JWT token and inject it into a pre-defined `kubeconfig` text template. + +### Phase 3: Automated Renewal & Emergency Suspension +- **Session Management**: If accessed via a Web UI, the Gateway intercepts requests, attaches the dynamic token, and forwards them. If the token is within 10 minutes of expiration, the Gateway automatically issues a new TokenRequest. +- **Emergency Suspension**: If a workspace is marked compromised, the Gateway deletes its K8s `RoleBinding`, instantly revoking access for all currently active tokens of that tenant. + +## 4. API Contracts + +### 4.1. Initialize Tenant Workspace +- **Route**: `POST /api/v1/workspaces/init` +- **Auth**: Gateway Session / Bearer Token +- **Rate Limit**: Strictly rate-limited per user to prevent Namespace exhaustion. +- **Request Payload**: + ```json + { + "tier": "basic" // Determines the ResourceQuota template + } +- **Response Payload (201 Created)**: + ```json + { + "namespace": "tenant-a1b2c3d4", + "status": "provisioned", + "quota": {"cpu": "4", "memory": "8Gi"} + } + ``` +### 4.2. Generate Dynamic Kubeconfig +- **Route**: `GET /api/v1/workspaces/credentials/kubeconfig` +- **Auth**: Gateway Session / Bearer Token +- **Request Payload(200 OK)**: Returns raw `application/x-yaml`content. + ```yaml + apiVersion: v1 + clusters: + - cluster: + server: https:// + certificate-authority-data: + name: internal-cluster + contexts: + - context: + cluster: internal-cluster + namespace: tenant-a1b2c3d4 # Default context locked to their namespace + user: sa-tenant-admin + name: tenant-context + current-context: tenant-context + kind: Config + users: + - name: sa-tenant-admin + user: + token: "eyJhbGciOiJSUzI1NiIs..." # Short-lived token injected here + ``` + +### 4.3. Suspend Workspace (Emergency Kill Switch) +- **Route**: POST /api/v1/workspaces/{id}/suspend +- **Auth**: Admin Only +- **Behavior**: Updates DB status to suspended and deletes the associated K8s RoleBinding. + + +### 5. Data Architecture & Persistence +- **Database**: PostgreSQL (Relational mapping between Users and K8s Namespaces). +- **Table**: `users` + - `id` (UUID, PK),`email`,`password_hash`,`status` +- **Table**: `workspaces` + + - `id` (UUID, PK) + + - `user_id` (UUID, FK to Users table) + + - `k8s_namespace` (String, unique) + + - `k8s_sa_name` (String) + + - `tier` (String) + + - `created_at` (Timestamp) +- **Table**: `audit_logs`(Security Compliance) + - `id` (UUID, PK), `user_id` (UUID), `workspace_id` (UUID), `action` (e.g., IssueKubeconfig), `ip_address`, `created_at` +- **Constraint**: We do NOT store the K8s Token in the database. Tokens are ephemeral and generated on-the-fly. + +## 6. Security, Threat Mitigation & Infrastructure Constraints + +### 6.1 Threat Model +| Threat | Mitigation Strategy | +| :--- | :--- | +| **Gateway Compromise** | The Gateway uses a strictly restricted K8s role. It cannot read existing `Secrets` or interfere with other tenants' running Pods. | +| **Token Theft (XSS)** | Application-level Auth must use `HttpOnly, Secure` Cookies. Generated Kubeconfigs expire in 2 hours. | +| **Resource Abuse (Mining)** | Hardcoded `ResourceQuota` per tenant upon creation. Global `LimitRange` enforced at the cluster level. | + +### 6.2 Restricted Gateway Credentials (Crucial) +The Gateway requires a K8s credential (Out-of-Cluster `kubeconfig` or Cloud IAM Role) to operate. **This credential MAY NOT have `cluster-admin` privileges.** It should be bound to a custom `ClusterRole` with ONLY the following permissions: +- `create`, `get`, `list` on `namespaces`, `resourcequotas`. +- `create`, `get`, `list` on `serviceaccounts`, `rolebindings`. +- `create` on `serviceaccounts/token` (CRITICAL for TokenRequest API). +- *Strictly prohibited*: `get` or `list` on `secrets`, `pods`, or `deployments`. + +### 6.3 Deployment & Networking +- **Deployment Agnostic**: The application will be packaged as a Docker image and can be deployed via Docker Compose, standalone VMs, or within a Kubernetes cluster. +- **CORS/CSP**: Since this might not be a single-origin BFF, explicit CORS policies (`Access-Control-Allow-Origin`) must be tightly defined if the frontend is hosted on a separate domain. Wildcards (`*`) are prohibited. \ No newline at end of file diff --git a/README.md b/README.md index 20b9f9c..74dbae3 100644 --- a/README.md +++ b/README.md @@ -1,336 +1,290 @@ -# OCDP - Open Cloud Development Platform +# OCDP - One Click Deployment Platform -[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) -[![Go Version](https://img.shields.io/badge/go-1.24+-00ADD8?logo=go)](https://go.dev/) -[![Node Version](https://img.shields.io/badge/node-20+-339933?logo=node.js)](https://nodejs.org/) -[![Docker](https://img.shields.io/badge/docker-20.10+-2496ED?logo=docker)](https://www.docker.com/) +OCDP 是一个面向 Kubernetes 的大模型推理部署平台。当前核心场景是:用户在页面选择 Harbor 中的 `vllm-serve` Helm Chart,填写实例名称、命名空间和 values 后,后端从 Harbor 拉取封装好的 OCI Helm Chart,并通过 Helm SDK 部署到已配置好的 Kubernetes 集群。 -开源云原生开发平台,用于管理 Kubernetes 集群、OCI Registry 和 Helm Charts 部署。 +## 当前能力 ---- +- Registry 管理:保存 Harbor / OCI Registry 地址与凭据,敏感字段加密入库。 +- Artifact 浏览:通过 Harbor v2.0 API 浏览当前凭据可见的项目、repositories 和 chart tags,避免依赖 `/v2/_catalog` 全局 catalog 权限。 +- 一键部署:从前端发起实例创建,后端拉取 Chart 并在目标集群执行 Helm install/upgrade/uninstall。 +- 集群管理:保存 Kubernetes API Server、CA、客户端证书或 token,用于后端连接集群。 +- 实例管理:查看部署状态、Helm revision、Service/Ingress 入口信息。 +- 认证:内置 JWT 登录,首次启动可通过 bootstrap 注入管理员账号。 -## ✨ 特性 +## 技术栈 -- 🎯 **Registry 管理** - 支持 Harbor、Docker Registry、OCI 标准仓库 -- 📦 **Artifact 浏览** - 浏览和管理 Helm Charts、容器镜像 -- 🚀 **一键部署** - 可视化部署 Helm Charts 到 Kubernetes 集群 -- 🔍 **智能过滤** - 按 MediaType 过滤 artifacts(chart、image、other) -- 🎨 **现代 UI** - 响应式设计,基于 React + TypeScript -- 🔐 **安全认证** - JWT 认证,加密存储敏感信息 -- 🐳 **容器化** - 完整的 Docker 支持,多种运行模式 -- 🔄 **热重载** - 开发模式支持代码热重载 +- 后端:Go 1.24,Gorilla Mux,Hexagonal Architecture,PostgreSQL,ORAS SDK,Helm SDK,Kubernetes client-go。 +- 前端:React 18,TypeScript,Vite,TailwindCSS。 +- 部署:Docker Compose,Nginx 静态文件与 `/api` 反向代理,PostgreSQL 持久化。 ---- +## 项目结构 -## 🚀 快速开始 +```text +ocdp-go/ +├── backend/ # Go 后端 +│ ├── cmd/api/ # API 入口 +│ ├── internal/adapter/input/ # HTTP REST handlers / DTO +│ ├── internal/adapter/output/ # PostgreSQL / ORAS / Helm / K8s 实现 +│ ├── internal/domain/ # Entity / Repository interface / Service +│ └── internal/bootstrap/ # 首次启动数据注入 +├── frontend/ # React + Vite 前端 +├── infra/nginx/ # Nginx 网关配置和 TLS 证书 +├── docker-compose.yml # 本地完整部署入口:PostgreSQL + Backend + 前端 build job + Nginx +├── backend/docker-compose.yml # PostgreSQL + Backend + pgAdmin +├── Makefile # 推荐入口:up / restart / stop / logs / ps +└── tasks/ # Agent 工作记录 +``` -### 前置要求 +## 后端部署链路 -- Docker 20.10+ -- Docker Compose 2.0+ -- (可选) Make 工具 +1. 前端调用 `POST /api/v1/clusters/{clusterId}/instances`,提交 `name`、`namespace`、`registryId`、`repository`、`tag` 和可选 `values`。 +2. 后端 `InstanceService.CreateInstance` 校验集群、Registry 和实例名唯一性,创建 pending 记录。 +3. Chart 浏览使用 Harbor v2.0 API;实际部署时后端使用 ORAS SDK 访问 Harbor,将指定 repository/tag 的 Helm Chart layer 下载到 `/tmp/charts/{chart}-{version}.tgz`。 +4. 后端用数据库中保存的集群凭据生成临时 kubeconfig。 +5. Helm SDK 加载本地 chart 包,并对目标集群执行 `install`;后续通过 Helm status 同步实例状态。 +6. 删除、升级和回滚实例同样通过 Helm SDK 操作目标集群。 -### 5分钟快速体验 +## 部署前准备 + +需要本机已安装: + +- Docker +- Docker Compose v2 或更高版本 +- Make,可选;没有 Make 时可直接执行 Compose 命令 + +根目录 `.env` 用于开发环境启动时注入端口、数据库、初始账号、Harbor 和 Kubernetes 集群。它是开发/测试 bootstrap 数据,不是长期配置中心;系统启动后建议在页面里维护 Registry 和 Cluster。不要提交真实 `.env`。 + +关键变量如下,实际值以你的 `.env` 为准: + +```dotenv +# 登录账号 bootstrap +BOOTSTRAP_ADMIN_USER=admin +BOOTSTRAP_ADMIN_PASS=change-me +BOOTSTRAP_ADMIN_EMAIL=admin@example.com + +# Harbor bootstrap +BOOTSTRAP_REGISTRY_NAME=harbor +BOOTSTRAP_REGISTRY_URL=https://harbor.example.com +BOOTSTRAP_REGISTRY_DESC=Harbor Registry +# 推荐使用 Harbor robot 账号,只授予目标项目 pull/read 权限 +BOOTSTRAP_REGISTRY_ROBOT_USER='robot$project+ocdp' +BOOTSTRAP_REGISTRY_ROBOT_PASS='robot-token' + +# 可选 fallback;未配置 ROBOT 变量时才会使用 +BOOTSTRAP_REGISTRY_USER=admin-or-user +BOOTSTRAP_REGISTRY_PASS=change-me +BOOTSTRAP_REGISTRY_INSECURE=false + +# Kubernetes 集群 bootstrap,需要显式启用并设置名称列表 +BOOTSTRAP_ENABLE_CLUSTERS=true +BOOTSTRAP_CLUSTERS=cluster1,cluster2 +BOOTSTRAP_CLUSTER_CLUSTER1_HOST=https://x.x.x.x:6443 +BOOTSTRAP_CLUSTER_CLUSTER1_DESC=GPU Cluster 1 +BOOTSTRAP_CLUSTER_CLUSTER1_CA=base64-ca-data +BOOTSTRAP_CLUSTER_CLUSTER1_CERT=base64-client-cert-data +BOOTSTRAP_CLUSTER_CLUSTER1_KEY=base64-client-key-data + +# 如使用 token,可配置 TOKEN;CERT/KEY 可按实际鉴权方式留空 +BOOTSTRAP_CLUSTER_CLUSTER2_HOST=https://x.x.x.x:6443 +BOOTSTRAP_CLUSTER_CLUSTER2_TOKEN=token-value + +# 服务端口,默认使用高位端口避免和本机其他项目冲突 +WEB_HTTP_PORT=18080 +WEB_HTTPS_PORT=18443 +BACKEND_PORT=18081 +POSTGRES_PORT=15432 + +# 安全与数据库 +JWT_SECRET=replace-with-a-strong-secret +ENCRYPTION_KEY=replace-with-32-byte-key +POSTGRES_DB=ocdp +POSTGRES_USER=postgres +POSTGRES_PASSWORD=replace-me + +# 可选:Docker 构建后端时使用的 Go module proxy。 +# 国内网络建议保留默认值;如公司网络要求,也可改回 https://proxy.golang.org,direct。 +GOPROXY=https://goproxy.cn,direct +GOSUMDB=sum.golang.google.cn +``` + +说明: + +- `BOOTSTRAP_CONFIG_JSON` 优先级最高,适合把完整 bootstrap 配置作为 JSON 注入。 +- 没有 `BOOTSTRAP_CONFIG_JSON` 时,后端会读取 `BOOTSTRAP_*` 变量生成初始账号、Registry 和 Cluster。 +- 没有任何显式 bootstrap 配置时,后端不会预注入用户、Registry 或 Cluster;代码中不再保留真实 Harbor、admin 或集群 fallback。 +- 初始管理员必须显式配置 `BOOTSTRAP_ADMIN_USER` 和 `BOOTSTRAP_ADMIN_PASS`。如果只配置 Registry/Cluster 而未配置管理员账号,系统不会自动创建默认账号。 +- Registry bootstrap 凭据优先级为 `BOOTSTRAP_REGISTRY_ROBOT_USER/PASS`,然后才是 `BOOTSTRAP_REGISTRY_USER/PASS`。Harbor robot 账号需要能访问目标项目的 repositories 和 artifacts。 +- Harbor robot 用户名通常包含 `$`。本项目 Compose 已使用 raw `env_file` 传给后端;如果你在 shell 里临时 `export BOOTSTRAP_REGISTRY_ROBOT_USER=...`,请用单引号包住值,避免 shell 展开 `$project`。 +- 已存在同名用户、Registry 或 Cluster 时,bootstrap 会跳过,不会覆盖数据库里的记录。 +- `ENCRYPTION_KEY` 用于加密保存 Harbor 密码和集群凭据;生产环境首次启动后不要随意更换,否则旧数据无法解密。 + +## 推荐部署流程 + +`.env` 文件为可选配置。不提供 `.env` 时,系统以空白状态启动,首次访问时展示管理员注册页面(Setup),第一个注册用户即为管理员。 ```bash -# 1. 克隆项目 -git clone +# 1. 克隆代码 +git clone https://gitea.bwgdi.com/OCDP/ocdp-go.git cd ocdp-go -# 2. 启动开发环境(Mock 模式,无需数据库) +# 2. 构建并后台启动完整平台(无需 .env) +make up + +# 3. 打开浏览器访问 http://:18080 +# 首次访问会看到 Initial Setup 页面,创建管理员账号和密码即可开始使用 +``` + +有 `.env` 时,可以预注入初始管理员账号、Registry 和 Cluster(用于开发/测试): + +# 4. 查看服务;postgres/backend/nginx 应为 Up,frontend-build Exited(0) 正常 +make docker-ps +``` + +访问地址: + +- 前端入口:http://localhost:${WEB_HTTP_PORT:-18080} +- 后端健康检查:http://localhost:${BACKEND_PORT:-18081}/health +- Swagger UI:http://localhost:${BACKEND_PORT:-18081}/api/docs +- Nginx 健康检查:http://localhost:${WEB_HTTP_PORT:-18080}/healthz + +兼容旧文档的命令仍可用,但只是 `make up` 的别名: + +```bash +make run-2 make docker-dev - -# 3. 访问应用 -# - 前端:http://localhost:5173 -# - 后端:http://localhost:8080 -# - 默认账号:admin / admin123 -``` - -**详细指南**:查看 [快速开始指南](./QUICK_START.md) - ---- - -## 📚 文档导航 - -### 📖 核心文档(必读) -- 🚀 [快速开始](./QUICK_START.md) - 5分钟快速上手 -- 📋 [使用指南](./USAGE_GUIDE.md) - 详细使用说明(推荐) -- 💡 [命令速查表](./COMMANDS_CHEATSHEET.md) - 常用命令快速参考 -- 📚 [文档中心](./docs/README.md) - 完整文档索引 - -### 🔧 专业文档 -- 📐 [开发规范](./docs/development/specification.md) - 代码规范和架构 -- 🚢 [部署指南](./docs/deployment/docker-guide.md) - 生产环境部署 -- 🔒 [安全实践](./docs/security/security-implementation.md) - 安全配置 -- 🎨 [功能文档](./docs/features/) - 详细功能说明 - -### 🔗 其他资源 -- 📋 [OpenAPI 规范](./backend/docs/openapi.yaml) - RESTful API 定义 -- 📦 [历史文档](./docs/archive/) - 项目演进历史 - ---- - -## 🏗️ 技术架构 - -### 技术栈 - -**后端**: -- Go 1.24+ (Hexagonal Architecture) -- PostgreSQL 16 -- Redis 7 - -**前端**: -- React 18 -- TypeScript 5 -- Vite 6 -- TailwindCSS 3 - -**容器化**: -- Docker -- Docker Compose -- Multi-stage builds - -### 架构图 - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Frontend │ -│ React + TypeScript + Vite │ -└──────────────────────────┬──────────────────────────────────┘ - │ HTTP/REST -┌──────────────────────────┼──────────────────────────────────┐ -│ │ Backend API │ -│ ▼ │ -│ ┌─────────────────────┐ │ -│ │ Input Adapters │ │ -│ │ (REST/GraphQL) │ │ -│ └──────────┬──────────┘ │ -│ │ │ -│ ┌──────────▼──────────┐ │ -│ │ Domain Services │ │ -│ │ (Business Logic) │ │ -│ └──────────┬──────────┘ │ -│ │ │ -│ ┌──────────▼──────────┐ │ -│ │ Output Adapters │ │ -│ │ (Repos/Clients) │ │ -│ └──────────┬──────────┘ │ -└───────────────────────┼─┴────────────────────────────────┘ - │ - ┌───────────────┼───────────────┐ - │ │ │ - ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ - │ PG DB │ │ Redis │ │ OCI │ - │ │ │ │ │ Registry│ - └─────────┘ └─────────┘ └─────────┘ -``` - -### 运行模式 - -| 模式 | 特点 | 适用场景 | 命令 | -|------|------|----------|------| -| **开发模式** | Mock 数据,热重载 | 日常开发 | `make docker-dev` | -| **生产模式** | 真实数据库,完整功能 | 生产部署 | `make docker-prod` | -| **Mock 模式** | 独立测试单个服务 | 单元测试 | `make docker-test-backend` | - ---- - -## 🛠️ 开发指南 - -### 项目结构 - -``` -ocdp-go/ -├── backend/ # Go 后端服务 -│ ├── cmd/api/ # 应用入口 -│ ├── internal/ # 内部代码 -│ │ ├── adapter/ # 适配器层 -│ │ ├── domain/ # 领域层 -│ │ └── bootstrap/ # 启动配置 -│ ├── Dockerfile # 生产环境 -│ ├── Dockerfile.dev # 开发环境 -│ └── Dockerfile.mock # Mock 测试 -│ -├── frontend/ # React 前端应用 -│ ├── src/ -│ │ ├── core/ # 核心功能 -│ │ ├── features/ # 功能模块 -│ │ └── shared/ # 共享组件 -│ ├── Dockerfile # 生产环境 -│ ├── Dockerfile.dev # 开发环境 -│ └── Dockerfile.mock # Mock 测试 -│ -├── api/ # API 规范 -│ └── openapi.yaml # OpenAPI 定义 -│ -├── docs/ # 项目文档 -│ ├── features/ # 功能文档 -│ ├── deployment/ # 部署文档 -│ └── development/ # 开发文档 -│ -├── docker-compose.yml # 统一配置(使用 profiles) -└── Makefile # 便捷命令 -``` - -### 常用命令 - -```bash -# Docker 服务(推荐) -make docker-dev # 启动开发环境 -make docker-prod # 启动生产环境 -make docker-test-backend # 测试后端 -make docker-test-frontend # 测试前端 -make docker-logs # 查看日志 -make docker-down # 停止服务 - -# OpenAPI 工作流 -make openapi-validate # 验证 API 规范 -make openapi-gen # 生成代码 -make openapi-docs # 生成文档 - -# 本地开发(不使用 Docker) -make install # 安装依赖 -make dev-local # 启动本地开发 -make test # 运行测试 -``` - -### 开发工作流 - -1. **启动开发环境**: - ```bash - make docker-dev - ``` - -2. **修改代码**(自动热重载): - - 后端:编辑 `backend/` 下的 Go 文件 - - 前端:编辑 `frontend/src/` 下的 React 组件 - -3. **查看日志**: - ```bash - make docker-logs - ``` - -4. **测试功能**: - - 前端:http://localhost:5173 - - 后端:http://localhost:8080 - -5. **提交代码**: - ```bash - git add . - git commit -m "feat: add new feature" - git push - ``` - ---- - -## 🧪 测试 - -### 后端测试 - -```bash -# 启动后端 Mock -make docker-test-backend-bg - -# 测试健康检查 -curl http://localhost:8080/health - -# 测试登录 -curl -X POST http://localhost:8080/api/v1/auth/login \ - -H "Content-Type: application/json" \ - -d '{"username":"admin","password":"admin123"}' - -# 测试 API -curl http://localhost:8080/api/v1/registries -curl http://localhost:8080/api/v1/clusters -``` - -### 前端测试 - -```bash -# 启动前端 Mock -make docker-test-frontend-bg - -# 访问前端 -open http://localhost:3000 -``` - -### 集成测试 - -```bash -# 启动完整环境 make docker-prod - -# 运行测试套件 -make test +make docker-up ``` ---- - -## 📦 部署 - -### Docker Compose 部署(推荐) +没有 Make 时,直接用根目录 Compose 文件: ```bash -# 1. 配置环境变量 -export JWT_SECRET="your-production-secret" -export ENCRYPTION_KEY="your-32-byte-encryption-key" - -# 2. 启动服务 -docker compose up -d - -# 3. 查看状态 -docker compose ps +docker compose up --build -d +docker compose ps -a ``` -### Kubernetes 部署 +代码、Dockerfile、前端资源变更后都建议使用 `make up` 或 `docker compose up --build -d`,避免复用旧镜像或旧前端静态资源。 -查看 [Kubernetes 部署指南](./docs/deployment/kubernetes-guide.md) +## 验证部署 ---- +```bash +# 健康检查 +curl http://localhost:${BACKEND_PORT:-18081}/health +curl http://localhost:${WEB_HTTP_PORT:-18080}/healthz -## 🤝 贡献 +# 检查是否需要初始化管理员(无 .env 部署时返回 needsSetup: true) +curl http://localhost:${BACKEND_PORT:-18081}/api/v1/auth/status -欢迎贡献代码!请遵循以下步骤: +# 初始化管理员账号(仅限尚无管理员时可用) +curl -s -X POST http://localhost:${BACKEND_PORT:-18081}/api/v1/auth/setup \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"your-password"}' -1. Fork 项目 -2. 创建功能分支 (`git checkout -b feature/amazing-feature`) -3. 提交更改 (`git commit -m 'feat: add amazing feature'`) -4. 推送分支 (`git push origin feature/amazing-feature`) -5. 创建 Pull Request +# 登录 +curl -s -X POST http://localhost:${BACKEND_PORT:-18081}/api/v1/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"your-password"}' -### 开发规范 +# 查看 bootstrap 是否生效,需要带 Bearer token +curl http://localhost:${BACKEND_PORT:-18081}/api/v1/registries \ + -H "Authorization: Bearer " -- **代码风格**:Go (gofmt),TypeScript (ESLint + Prettier) -- **提交规范**:遵循 [Conventional Commits](https://www.conventionalcommits.org/) -- **测试覆盖**:新功能必须包含测试 +curl http://localhost:${BACKEND_PORT:-18081}/api/v1/clusters \ + -H "Authorization: Bearer " +``` ---- +页面验证: -## 📄 许可证 +1. 打开前端入口并登录。 +2. 进入 Chart Browser,确认能看到 Harbor 中的 `vllm-serve` 或 nginx chart repository。当前默认只展示可部署 Helm chart。 +3. 选择 chart tag,点击 Launch。 +4. 选择目标集群、命名空间,填写实例名和 values。values 支持 schema 表单或 YAML;YAML 会在前端校验,并由后端解析为 Helm values map。 +5. 提交后到实例页面查看状态;后端会异步安装并同步 Helm 状态。 -本项目采用 MIT 许可证 - 查看 [LICENSE](LICENSE) 文件了解详情 +命令行 smoke test: ---- +```bash +# 只验证登录、Registry health、Harbor chart 浏览和 values schema +BASE_URL=http://localhost:${BACKEND_PORT:-18081}/api/v1 \ +ADMIN_USER="${BOOTSTRAP_ADMIN_USER:-admin}" \ +ADMIN_PASS="" \ +./test/current-platform-smoke.sh -## 🙏 致谢 +# 允许真实部署时,会创建测试 release 并在结束后调用平台删除 +RUN_DEPLOY_TEST=true \ +TEST_NAMESPACE=ocdp-smoke \ +TEST_RELEASE=ocdp-smoke-nginx \ +BASE_URL=http://localhost:${BACKEND_PORT:-18081}/api/v1 \ +ADMIN_PASS="" \ +./test/current-platform-smoke.sh +``` -- [Go](https://go.dev/) - 后端开发语言 -- [React](https://react.dev/) - 前端框架 -- [Vite](https://vitejs.dev/) - 构建工具 -- [Docker](https://www.docker.com/) - 容器化平台 -- [Kubernetes](https://kubernetes.io/) - 容器编排 -- [Harbor](https://goharbor.io/) - OCI Registry +## 常用运维命令 ---- +```bash +# 一条命令启动/更新完整平台 +make up -## 📞 联系方式 +# 强制重建并重启完整平台 +make restart -- **项目主页**:https://github.com/your-org/ocdp-go -- **问题反馈**:https://github.com/your-org/ocdp-go/issues -- **文档网站**:https://docs.ocdp.example.com +# 查看当前状态;需要关注 postgres/backend/nginx 是否 Up +make docker-ps +docker compose ps -a ---- +# 查看日志 +make docker-logs -
- Built with ❤️ by the OCDP Team -
+# 只重启后端 +docker compose restart backend + +# 只重启 Web 网关 +docker compose restart nginx + +# 停止本项目服务,保留数据库和前端构建卷 +make stop + +# 清理本项目容器和数据卷,谨慎使用,会删除 PostgreSQL 数据 +make clean +``` + +## 本地开发与测试 + +后端: + +```bash +cd backend +go test ./... +go run cmd/api/main.go +``` + +前端: + +```bash +cd frontend +npm ci +npm run build +``` + +Mock 后端仍可通过 `backend/docker-compose.yml` 的 `mock` profile 启动: + +```bash +docker compose -f backend/docker-compose.yml --profile mock up -d backend-mock +``` + +## 注意事项 + +- 不要为了端口冲突停止其他项目;优先通过 `WEB_HTTP_PORT`、`WEB_HTTPS_PORT`、`BACKEND_PORT`、`POSTGRES_PORT` 换端口。当前默认端口已经是 `18080/18443/18081/15432`。 +- `frontend-build` 是一次性构建任务,退出码 `0` 是正常状态;前端页面由 `nginx` 容器提供。若只看到 backend/postgres 在运行,请执行 `make up` 或 `docker compose up --build -d` 恢复完整栈。 +- 如果旧文档提到 `make docker-dev`、`make docker-prod`,现在这些命令仍可用,都会调用 `make up` 启动同一套 Docker 栈。 +- 如果之前用旧配置启动失败过,PostgreSQL 卷里可能残留旧的加密数据,表现为 `/api/v1/clusters` 或 `/api/v1/registries` 解密失败。开发/重装环境可执行 `make clean-2 && make docker-dev` 重新初始化;生产环境不要直接删卷,应先备份数据库。 +- `vllm-serve` 必须以 Helm Chart OCI artifact 的形式存在于 Harbor 中;后端会寻找 Helm Chart layer 并保存为 `.tgz`。 +- Harbor 浏览使用 `/api/v2.0/projects`、project repositories 和 artifacts API。若 robot 账号无法列项目或 artifacts,页面会显示明确错误;请检查 Harbor 项目成员/robot 权限,而不是给普通用户开放全局 catalog。 +- values YAML 已按 YAML 解析;顶层必须是 mapping,例如 `replicaCount: 1`。 +- Nginx 默认同时监听 HTTP 和 HTTPS,证书位于 `infra/nginx/certs/`,生产环境应替换为正式证书。 +- `make clean-2` 会删除本项目 Compose 卷,包括 PostgreSQL 数据;只想停服务时使用 `docker compose ... down --remove-orphans`。 + +## API 文档 + +- OpenAPI YAML:[backend/docs/openapi.yaml](./backend/docs/openapi.yaml) +- 运行后 Swagger UI:`/api/docs` diff --git a/backend/Dockerfile b/backend/Dockerfile index ae98b1f..f4f6dae 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -4,12 +4,17 @@ # ================================================== FROM golang:1.24-alpine AS builder +ARG GOPROXY=https://goproxy.cn,direct +ARG GOSUMDB=sum.golang.google.cn +ENV GOPROXY=${GOPROXY} +ENV GOSUMDB=${GOSUMDB} + RUN apk add --no-cache git make WORKDIR /build COPY go.mod go.sum ./ -RUN go mod download +RUN sh -c 'for i in 1 2 3; do go mod download && exit 0; echo "go mod download failed, retrying ($i/3)" >&2; sleep 5; done; go mod download' COPY . . RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o ocdp-backend cmd/api/main.go diff --git a/backend/cmd/api/main.go b/backend/cmd/api/main.go index d32c592..f23b13d 100644 --- a/backend/cmd/api/main.go +++ b/backend/cmd/api/main.go @@ -27,14 +27,17 @@ import ( "log" "net/http" "os" + "strings" "time" "github.com/gorilla/mux" "github.com/ocdp/cluster-service/internal/adapter/input/http/rest" "github.com/ocdp/cluster-service/internal/adapter/output" + "github.com/ocdp/cluster-service/internal/adapter/output/k8s" "github.com/ocdp/cluster-service/internal/bootstrap" "github.com/ocdp/cluster-service/internal/domain/service" + "github.com/ocdp/cluster-service/internal/pkg/authz" "github.com/ocdp/cluster-service/internal/pkg/crypto" "github.com/ocdp/cluster-service/internal/pkg/jwt" "github.com/ocdp/cluster-service/internal/pkg/password" @@ -72,9 +75,16 @@ func main() { // ===== 5. 创建 Domain Services ===== authService := service.NewAuthService( repos.UserRepo, + repos.WorkspaceRepo, passwordHasher, tokenGenerator, ) + authService.SetUserLifecycleCleanup( + repos.InstanceRepo, + repos.ClusterRepo, + repos.BindingRepo, + repos.TenantKubeClient, + ) clusterService := service.NewClusterService( repos.ClusterRepo, @@ -97,11 +107,26 @@ func main() { repos.HelmClient, repos.OCIClient, repos.EntryClient, + repos.BindingRepo, ) + instanceService.SetDiagnosticsClient(repos.DiagnosticsClient) + instanceService.SetTenantProvisioning(repos.WorkspaceRepo, repos.TenantKubeClient) + instanceService.SetScaleClient(k8s.NewScaleClient()) + instanceService.SetUserRepository(repos.UserRepo) monitoringService := service.NewMonitoringService( repos.ClusterRepo, repos.MetricsClient, + repos.InstanceRepo, + repos.UserRepo, + ) + + workspaceService := service.NewWorkspaceService( + repos.WorkspaceRepo, + repos.BindingRepo, + repos.ClusterRepo, + repos.TenantKubeClient, + repos.AuditRepo, ) log.Println("✅ Domain Services initialized") @@ -110,7 +135,7 @@ func main() { bootstrapConfig, err := bootstrap.LoadBootstrapConfig() if err != nil { log.Printf("⚠️ Warning: Failed to load bootstrap config: %v", err) - // 使用默认配置 + // 使用安全的空配置,避免在配置错误时写入任何预置账号或集群凭据。 bootstrapConfig = bootstrap.GetDefaultBootstrapConfig() } @@ -126,6 +151,7 @@ func main() { artifactHandler := rest.NewArtifactHandler(artifactService) instanceHandler := rest.NewInstanceHandler(instanceService) monitoringHandler := rest.NewMonitoringHandler(monitoringService) + workspaceHandler := rest.NewWorkspaceHandler(workspaceService) swaggerHandler := rest.NewSwaggerHandler() log.Println("✅ Input Adapters (REST handlers) initialized") @@ -133,11 +159,13 @@ func main() { // ===== 8. 设置路由 ===== router := setupRouter( authHandler, + authService, clusterHandler, registryHandler, artifactHandler, instanceHandler, monitoringHandler, + workspaceHandler, swaggerHandler, ) @@ -191,11 +219,13 @@ func getEnv(key, defaultValue string) string { // setupRouter 设置路由 func setupRouter( authHandler *rest.AuthHandler, + authService *service.AuthService, clusterHandler *rest.ClusterHandler, registryHandler *rest.RegistryHandler, artifactHandler *rest.ArtifactHandler, instanceHandler *rest.InstanceHandler, monitoringHandler *rest.MonitoringHandler, + workspaceHandler *rest.WorkspaceHandler, swaggerHandler *rest.SwaggerHandler, ) *mux.Router { router := mux.NewRouter().StrictSlash(true) @@ -222,45 +252,73 @@ func setupRouter( api := router.PathPrefix("/api/v1").Subrouter() // ===== 认证路由 ===== - api.HandleFunc("/auth/register", authHandler.Register) - api.HandleFunc("/auth/login", authHandler.Login) - api.HandleFunc("/auth/refresh", authHandler.RefreshToken) + api.HandleFunc("/auth/login", authHandler.Login).Methods(http.MethodPost) + api.HandleFunc("/auth/refresh", authHandler.RefreshToken).Methods(http.MethodPost) + api.HandleFunc("/auth/status", authHandler.AuthStatus).Methods(http.MethodGet) + api.HandleFunc("/auth/setup", authHandler.Setup).Methods(http.MethodPost) + + protected := api.PathPrefix("").Subrouter() + protected.Use(authMiddleware(authService)) + protected.HandleFunc("/auth/me", authHandler.Me).Methods(http.MethodGet) + protected.HandleFunc("/auth/register", authHandler.Register).Methods(http.MethodPost) + protected.HandleFunc("/users", authHandler.ListUsers).Methods(http.MethodGet) + protected.HandleFunc("/users", authHandler.Register).Methods(http.MethodPost) + protected.HandleFunc("/users/{user_id}", authHandler.UpdateUser).Methods(http.MethodPut) + protected.HandleFunc("/users/{user_id}", authHandler.DeleteUser).Methods(http.MethodDelete) // ===== 集群路由 ===== - api.HandleFunc("/clusters", clusterHandler.CreateCluster).Methods(http.MethodPost) - api.HandleFunc("/clusters", clusterHandler.GetAllClusters).Methods(http.MethodGet) - api.HandleFunc("/clusters/{cluster_id}", clusterHandler.GetCluster).Methods(http.MethodGet) - api.HandleFunc("/clusters/{cluster_id}", clusterHandler.UpdateCluster).Methods(http.MethodPut) - api.HandleFunc("/clusters/{cluster_id}", clusterHandler.DeleteCluster).Methods(http.MethodDelete) - api.HandleFunc("/clusters/{cluster_id}/health", clusterHandler.GetClusterHealth).Methods(http.MethodGet) + protected.HandleFunc("/clusters", clusterHandler.CreateCluster).Methods(http.MethodPost) + protected.HandleFunc("/clusters", clusterHandler.GetAllClusters).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}", clusterHandler.GetCluster).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}", clusterHandler.UpdateCluster).Methods(http.MethodPut) + protected.HandleFunc("/clusters/{cluster_id}", clusterHandler.DeleteCluster).Methods(http.MethodDelete) + protected.HandleFunc("/clusters/{cluster_id}/health", clusterHandler.GetClusterHealth).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/stats", monitoringHandler.GetClusterStats).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/kubeconfig", workspaceHandler.IssueClusterKubeconfig).Methods(http.MethodGet) // ===== Registry 路由 ===== - api.HandleFunc("/registries", registryHandler.CreateRegistry).Methods(http.MethodPost) - api.HandleFunc("/registries", registryHandler.GetAllRegistries).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}", registryHandler.GetRegistry).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}", registryHandler.UpdateRegistry).Methods(http.MethodPut) - api.HandleFunc("/registries/{registry_id}", registryHandler.DeleteRegistry).Methods(http.MethodDelete) - api.HandleFunc("/registries/{registry_id}/health", registryHandler.GetRegistryHealth).Methods(http.MethodGet) + protected.HandleFunc("/registries", registryHandler.CreateRegistry).Methods(http.MethodPost) + protected.HandleFunc("/registries", registryHandler.GetAllRegistries).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}", registryHandler.GetRegistry).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}", registryHandler.UpdateRegistry).Methods(http.MethodPut) + protected.HandleFunc("/registries/{registry_id}", registryHandler.DeleteRegistry).Methods(http.MethodDelete) + protected.HandleFunc("/registries/{registry_id}/health", registryHandler.GetRegistryHealth).Methods(http.MethodGet) // ===== Artifact 路由 ===== - api.HandleFunc("/registries/{registry_id}/repositories", artifactHandler.ListRepositories).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts", artifactHandler.ListArtifacts).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}", artifactHandler.GetArtifact).Methods(http.MethodGet) - api.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}/values-schema", artifactHandler.GetArtifactValuesSchema).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories", artifactHandler.ListRepositories).Methods(http.MethodGet) + protected.HandleFunc("/repositories/{repository_name:.+}/tags", artifactHandler.ListRepositoryTags).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts", artifactHandler.ListArtifacts).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/tags", artifactHandler.ListRepositoryTags).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}", artifactHandler.GetArtifact).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}/values-schema", artifactHandler.GetArtifactValuesSchema).Methods(http.MethodGet) + protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}/values-yaml", artifactHandler.GetArtifactValuesYAML).Methods(http.MethodGet) // ===== Instance 路由 ===== - api.HandleFunc("/clusters/{cluster_id}/instances", instanceHandler.CreateInstance).Methods(http.MethodPost) - api.HandleFunc("/clusters/{cluster_id}/instances", instanceHandler.ListInstances).Methods(http.MethodGet) - api.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.GetInstance).Methods(http.MethodGet) - api.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.UpdateInstance).Methods(http.MethodPut) - api.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.DeleteInstance).Methods(http.MethodDelete) - api.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/entries", instanceHandler.ListInstanceEntries).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances", instanceHandler.CreateInstance).Methods(http.MethodPost) + protected.HandleFunc("/clusters/{cluster_id}/instances", instanceHandler.ListInstances).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.GetInstance).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.UpdateInstance).Methods(http.MethodPut) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}", instanceHandler.DeleteInstance).Methods(http.MethodDelete) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/entries", instanceHandler.ListInstanceEntries).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/diagnostics", instanceHandler.GetInstanceDiagnostics).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/logs/stream", instanceHandler.StreamInstanceLogs).Methods(http.MethodGet) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/scale", instanceHandler.ScaleInstance).Methods(http.MethodPost) + protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/values-diff", instanceHandler.GetInstanceValuesDiff).Methods(http.MethodGet) // ===== Monitoring 路由 ===== - api.HandleFunc("/monitoring/clusters", monitoringHandler.ListClusterMonitoring).Methods(http.MethodGet) - api.HandleFunc("/monitoring/clusters/{cluster_id}", monitoringHandler.GetClusterMonitoring).Methods(http.MethodGet) - api.HandleFunc("/monitoring/clusters/{cluster_id}/nodes", monitoringHandler.GetNodeMetrics).Methods(http.MethodGet) - api.HandleFunc("/monitoring/summary", monitoringHandler.GetMonitoringSummary).Methods(http.MethodGet) + protected.HandleFunc("/monitoring/clusters", monitoringHandler.ListClusterMonitoring).Methods(http.MethodGet) + protected.HandleFunc("/monitoring/clusters/{cluster_id}", monitoringHandler.GetClusterMonitoring).Methods(http.MethodGet) + protected.HandleFunc("/monitoring/clusters/{cluster_id}/metrics", monitoringHandler.GetClusterMonitoring).Methods(http.MethodGet) + protected.HandleFunc("/monitoring/clusters/{cluster_id}/nodes", monitoringHandler.GetNodeMetrics).Methods(http.MethodGet) + protected.HandleFunc("/monitoring/summary", monitoringHandler.GetMonitoringSummary).Methods(http.MethodGet) + + // ===== Workspace 路由 ===== + protected.HandleFunc("/workspaces", workspaceHandler.ListWorkspaces).Methods(http.MethodGet) + protected.HandleFunc("/workspaces", workspaceHandler.CreateWorkspace).Methods(http.MethodPost) + protected.HandleFunc("/workspaces/credentials/kubeconfig", workspaceHandler.IssueCurrentKubeconfig).Methods(http.MethodGet) + protected.HandleFunc("/workspaces/{workspace_id}/clusters", workspaceHandler.InitClusterBinding).Methods(http.MethodPost) + protected.HandleFunc("/workspaces/{workspace_id}/kubeconfig", workspaceHandler.IssueKubeconfig).Methods(http.MethodPost) + protected.HandleFunc("/workspaces/{workspace_id}/suspend", workspaceHandler.SuspendWorkspace).Methods(http.MethodPost) // 处理 MethodNotAllowed 错误(OPTIONS 请求会触发) router.MethodNotAllowedHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -275,6 +333,35 @@ func setupRouter( return router } +func authMiddleware(authService *service.AuthService) mux.MiddlewareFunc { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + header := r.Header.Get("Authorization") + if !strings.HasPrefix(header, "Bearer ") { + writeJSONError(w, http.StatusUnauthorized, "Unauthorized", "missing bearer token") + return + } + token := strings.TrimSpace(strings.TrimPrefix(header, "Bearer ")) + if token == "" { + writeJSONError(w, http.StatusUnauthorized, "Unauthorized", "missing bearer token") + return + } + principal, err := authService.VerifyAccessToken(r.Context(), token) + if err != nil { + writeJSONError(w, http.StatusUnauthorized, "Unauthorized", err.Error()) + return + } + next.ServeHTTP(w, r.WithContext(authz.WithPrincipal(r.Context(), principal))) + }) + } +} + +func writeJSONError(w http.ResponseWriter, status int, code, message string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _, _ = w.Write([]byte(fmt.Sprintf(`{"error":%q,"message":%q}`, code, message))) +} + // loggingMiddleware 日志中间件 func loggingMiddleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -287,15 +374,16 @@ func loggingMiddleware(next http.Handler) http.Handler { // corsMiddleware CORS 中间件 func corsMiddleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // 设置 CORS 头 origin := r.Header.Get("Origin") - if origin == "" { - origin = "*" + if origin != "" { + w.Header().Add("Vary", "Origin") + if corsOriginAllowed(origin) { + w.Header().Set("Access-Control-Allow-Origin", origin) + w.Header().Set("Access-Control-Allow-Credentials", "true") + } } - w.Header().Set("Access-Control-Allow-Origin", origin) w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") - w.Header().Set("Access-Control-Allow-Credentials", "true") w.Header().Set("Access-Control-Max-Age", "86400") // 处理 OPTIONS 预检请求 @@ -307,3 +395,47 @@ func corsMiddleware(next http.Handler) http.Handler { next.ServeHTTP(w, r) }) } + +func corsOriginAllowed(origin string) bool { + origin = strings.TrimSpace(origin) + if origin == "" { + return false + } + for _, allowed := range corsAllowedOrigins() { + if origin == allowed { + return true + } + } + return false +} + +func corsAllowedOrigins() []string { + configured := strings.TrimSpace(os.Getenv("CORS_ALLOWED_ORIGINS")) + if configured == "" { + configured = strings.TrimSpace(os.Getenv("ALLOWED_ORIGINS")) + } + if configured == "" { + return []string{ + "http://localhost:3000", + "http://localhost:5173", + "http://localhost:8080", + "http://localhost:18080", + "http://localhost:18081", + "http://127.0.0.1:3000", + "http://127.0.0.1:5173", + "http://127.0.0.1:8080", + "http://127.0.0.1:18080", + "http://127.0.0.1:18081", + "http://10.6.80.114:18080", + } + } + origins := make([]string, 0) + for _, origin := range strings.Split(configured, ",") { + origin = strings.TrimSpace(origin) + if origin == "" || origin == "*" { + continue + } + origins = append(origins, origin) + } + return origins +} diff --git a/backend/cmd/api/main_test.go b/backend/cmd/api/main_test.go new file mode 100644 index 0000000..8d3459d --- /dev/null +++ b/backend/cmd/api/main_test.go @@ -0,0 +1,50 @@ +package main + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestCORSMiddlewareAllowsDefaultLocalhostOrigin(t *testing.T) { + t.Setenv("CORS_ALLOWED_ORIGINS", "") + t.Setenv("ALLOWED_ORIGINS", "") + + req := httptest.NewRequest(http.MethodGet, "/health", nil) + req.Header.Set("Origin", "http://localhost:5173") + rec := httptest.NewRecorder() + + corsMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })).ServeHTTP(rec, req) + + if got := rec.Header().Get("Access-Control-Allow-Origin"); got != "http://localhost:5173" { + t.Fatalf("expected localhost origin to be allowed, got %q", got) + } + if got := rec.Header().Get("Access-Control-Allow-Credentials"); got != "true" { + t.Fatalf("expected credentials header for allowed origin, got %q", got) + } +} + +func TestCORSMiddlewareDoesNotReflectDisallowedOrigin(t *testing.T) { + t.Setenv("CORS_ALLOWED_ORIGINS", "https://app.example.com") + t.Setenv("ALLOWED_ORIGINS", "") + + req := httptest.NewRequest(http.MethodOptions, "/api/v1/auth/login", nil) + req.Header.Set("Origin", "https://evil.example.com") + rec := httptest.NewRecorder() + + corsMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Fatal("preflight should not call next handler") + })).ServeHTTP(rec, req) + + if got := rec.Code; got != http.StatusNoContent { + t.Fatalf("expected preflight status %d, got %d", http.StatusNoContent, got) + } + if got := rec.Header().Get("Access-Control-Allow-Origin"); got != "" { + t.Fatalf("expected disallowed origin not to be reflected, got %q", got) + } + if got := rec.Header().Get("Access-Control-Allow-Credentials"); got != "" { + t.Fatalf("expected credentials header to be omitted for disallowed origin, got %q", got) + } +} diff --git a/backend/config/bootstrap.example.json b/backend/config/bootstrap.example.json index fab493e..55f3ab2 100644 --- a/backend/config/bootstrap.example.json +++ b/backend/config/bootstrap.example.json @@ -2,9 +2,9 @@ "enabled": true, "users": [ { - "username": "admin", - "password": "change-me-in-production", - "email": "admin@example.com" + "username": "bootstrap-admin", + "password": "replace-with-a-strong-password", + "email": "bootstrap-admin@example.local" } ], "registries": [ @@ -12,8 +12,8 @@ "name": "my-harbor", "url": "https://harbor.example.com", "description": "Harbor Registry", - "username": "admin", - "password": "change-me", + "username": "robot$project+ocdp", + "password": "replace-with-robot-token", "insecure": false } ], @@ -28,4 +28,3 @@ } ] } - diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index 4dab3fd..71a3071 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -37,7 +37,7 @@ services: POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} POSTGRES_INITDB_ARGS: "--encoding=UTF8 --lc-collate=C --lc-ctype=C" ports: - - "${POSTGRES_PORT:-5432}:5432" + - "${POSTGRES_PORT:-15432}:5432" volumes: - postgres_data:/var/lib/postgresql/data - ${INIT_DB_SQL_PATH:-./scripts/init-db.sql}:/docker-entrypoint-initdb.d/01-init.sql:ro @@ -58,9 +58,16 @@ services: build: context: ${BACKEND_BUILD_CONTEXT:-.} dockerfile: ${BACKEND_BUILD_DOCKERFILE:-Dockerfile} + args: + GOPROXY: ${GOPROXY:-https://goproxy.cn,direct} + GOSUMDB: ${GOSUMDB:-sum.golang.google.cn} image: ocdp-backend:latest container_name: ocdp-backend restart: unless-stopped + env_file: + - path: ../.env + required: false + format: raw environment: ADAPTER_MODE: ${ADAPTER_MODE:-production} PORT: 8080 @@ -68,12 +75,12 @@ services: ENCRYPTION_KEY: ${ENCRYPTION_KEY:-change-me-32-bytes-long-key-here} DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432/${POSTGRES_DB:-ocdp}?sslmode=disable ports: - - "${BACKEND_PORT:-8080}:8080" + - "${BACKEND_PORT:-18081}:8080" volumes: - ./config:/app/config:ro - ./data:/app/data healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + test: ["CMD", "curl", "-f", "http://127.0.0.1:8080/health"] interval: 30s timeout: 10s retries: 3 @@ -94,6 +101,9 @@ services: build: context: ${BACKEND_BUILD_CONTEXT:-.} dockerfile: ${BACKEND_MOCK_BUILD_DOCKERFILE:-Dockerfile.mock} + args: + GOPROXY: ${GOPROXY:-https://goproxy.cn,direct} + GOSUMDB: ${GOSUMDB:-sum.golang.google.cn} container_name: ocdp-backend-mock restart: unless-stopped environment: @@ -102,9 +112,9 @@ services: JWT_SECRET: ${JWT_SECRET:-test-jwt-secret-key} ENCRYPTION_KEY: ${ENCRYPTION_KEY:-test-encryption-key-32-bytes-long} ports: - - "${BACKEND_PORT:-8080}:8080" + - "${BACKEND_PORT:-18081}:8080" healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + test: ["CMD", "curl", "-f", "http://127.0.0.1:8080/health"] interval: 30s timeout: 10s retries: 3 @@ -124,7 +134,7 @@ services: restart: unless-stopped environment: PGADMIN_DEFAULT_EMAIL: ${PGADMIN_EMAIL:-admin@ocdp.local} - PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_PASSWORD:-admin} + PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_PASSWORD:-change-me} PGADMIN_CONFIG_SERVER_MODE: "False" PGADMIN_CONFIG_MASTER_PASSWORD_REQUIRED: "False" ports: diff --git a/backend/internal/adapter/input/http/dto/artifact_dto.go b/backend/internal/adapter/input/http/dto/artifact_dto.go index cd71cca..bb20702 100644 --- a/backend/internal/adapter/input/http/dto/artifact_dto.go +++ b/backend/internal/adapter/input/http/dto/artifact_dto.go @@ -6,9 +6,9 @@ type RepositoryListResponse struct { RegistryURL string `json:"registryUrl"` Repositories []string `json:"repositories"` Total int `json:"total"` - CatalogSupported bool `json:"catalogSupported"` // Whether _catalog API is supported - Source string `json:"source"` // Data source: "catalog" | "preconfigured" | "unavailable" - Message string `json:"message,omitempty"` // User-friendly message + CatalogSupported bool `json:"catalogSupported"` // Whether _catalog API is supported + Source string `json:"source"` // Data source: "catalog" | "preconfigured" | "unavailable" + Message string `json:"message,omitempty"` // User-friendly message } // ArtifactResponse Artifact 响应(简化版本,只包含核心字段) @@ -23,11 +23,11 @@ type ArtifactResponse struct { // TagResponse Tag 响应(前端期望的扁平化结构) type TagResponse struct { - RepositoryName string `json:"repositoryName"` // Repository name - Tag string `json:"tag"` // Tag name (e.g. "1.0.0", "latest") - Type string `json:"type"` // Artifact type: chart, image, other + RepositoryName string `json:"repositoryName"` // Repository name + Tag string `json:"tag"` // Tag name (e.g. "1.0.0", "latest") + Type string `json:"type"` // Artifact type: chart, image, other MediaType string `json:"mediaType,omitempty"` - Size int64 `json:"size"` // Artifact size (bytes) + Size int64 `json:"size"` // Artifact size (bytes) } // ArtifactListResponse Artifact 列表响应(包装格式,用于详细接口) @@ -42,3 +42,7 @@ type ValuesSchemaResponse struct { Schema string `json:"schema"` } +// ValuesYAMLResponse Helm Chart 默认 values.yaml 响应 +type ValuesYAMLResponse struct { + ValuesYAML string `json:"valuesYaml"` +} diff --git a/backend/internal/adapter/input/http/dto/auth_dto.go b/backend/internal/adapter/input/http/dto/auth_dto.go index cb2c823..38d56e1 100644 --- a/backend/internal/adapter/input/http/dto/auth_dto.go +++ b/backend/internal/adapter/input/http/dto/auth_dto.go @@ -1,9 +1,47 @@ package dto +import "strings" + // RegisterRequest 用户注册请求 type RegisterRequest struct { - Username string `json:"username" binding:"required"` - Password string `json:"password" binding:"required,min=6"` + Username string `json:"username" binding:"required"` + Password string `json:"password" binding:"required,min=6"` + Role string `json:"role,omitempty"` + WorkspaceID string `json:"workspaceId,omitempty"` + WorkspaceIDSnake string `json:"workspace_id,omitempty"` + Namespace string `json:"namespace,omitempty"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + DefaultClusterIDSnake string `json:"default_cluster_id,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaCPUSnake string `json:"quota_cpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaMemorySnake string `json:"quota_memory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUSnake string `json:"quota_gpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + QuotaGPUMemSnake string `json:"quota_gpu_memory,omitempty"` + IsActive *bool `json:"isActive,omitempty"` + IsActiveSnake *bool `json:"is_active,omitempty"` + MustChangePassword *bool `json:"mustChangePassword,omitempty"` + MustChangePasswordSnake *bool `json:"must_change_password,omitempty"` +} + +func (r *RegisterRequest) Normalize() { + if r == nil { + return + } + r.WorkspaceID = firstNonBlank(r.WorkspaceID, r.WorkspaceIDSnake) + r.DefaultClusterID = firstNonBlank(r.DefaultClusterID, r.DefaultClusterIDSnake) + r.QuotaCPU = firstNonBlank(r.QuotaCPU, r.QuotaCPUSnake) + r.QuotaMemory = firstNonBlank(r.QuotaMemory, r.QuotaMemorySnake) + r.QuotaGPU = firstNonBlank(r.QuotaGPU, r.QuotaGPUSnake) + r.QuotaGPUMem = firstNonBlank(r.QuotaGPUMem, r.QuotaGPUMemSnake) + if r.IsActive == nil { + r.IsActive = r.IsActiveSnake + } + if r.MustChangePassword == nil { + r.MustChangePassword = r.MustChangePasswordSnake + } } // LoginRequest 用户登录请求 @@ -12,6 +50,13 @@ type LoginRequest struct { Password string `json:"password" binding:"required"` } +// SetupRequest 初始管理员注册请求 +type SetupRequest struct { + Username string `json:"username" binding:"required"` + Password string `json:"password" binding:"required"` + Email string `json:"email,omitempty"` +} + // RefreshTokenRequest 刷新 Token 请求 type RefreshTokenRequest struct { RefreshToken string `json:"refreshToken" binding:"required"` @@ -19,17 +64,86 @@ type RefreshTokenRequest struct { // AuthResponse 认证响应 type AuthResponse struct { - AccessToken string `json:"accessToken"` - RefreshToken string `json:"refreshToken"` - UserID string `json:"userId"` - Username string `json:"username"` + AccessToken string `json:"accessToken"` + RefreshToken string `json:"refreshToken"` + UserID string `json:"userId"` + Username string `json:"username"` + Role string `json:"role"` + WorkspaceID string `json:"workspaceId"` + WorkspaceName string `json:"workspaceName,omitempty"` + Namespace string `json:"namespace,omitempty"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + Permissions []string `json:"permissions,omitempty"` + PermissionVersion int `json:"permissionVersion"` } // UserResponse 用户信息响应 type UserResponse struct { - ID string `json:"id"` - Username string `json:"username"` - Email string `json:"email"` - CreatedAt string `json:"createdAt"` - UpdatedAt string `json:"updatedAt"` + ID string `json:"id"` + Username string `json:"username"` + Email string `json:"email"` + Role string `json:"role"` + WorkspaceID string `json:"workspaceId"` + WorkspaceName string `json:"workspaceName,omitempty"` + Namespace string `json:"namespace,omitempty"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + IsActive bool `json:"isActive"` + MustChangePassword bool `json:"mustChangePassword"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` +} + +// UpdateUserRequest 管理员更新用户状态/角色请求 +type UpdateUserRequest struct { + Role string `json:"role,omitempty"` + WorkspaceID string `json:"workspaceId,omitempty"` + WorkspaceIDSnake string `json:"workspace_id,omitempty"` + Namespace string `json:"namespace,omitempty"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + DefaultClusterIDSnake string `json:"default_cluster_id,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaCPUSnake string `json:"quota_cpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaMemorySnake string `json:"quota_memory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUSnake string `json:"quota_gpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + QuotaGPUMemSnake string `json:"quota_gpu_memory,omitempty"` + IsActive *bool `json:"isActive,omitempty"` + IsActiveSnake *bool `json:"is_active,omitempty"` + MustChangePassword *bool `json:"mustChangePassword,omitempty"` + MustChangePasswordSnake *bool `json:"must_change_password,omitempty"` +} + +func (r *UpdateUserRequest) Normalize() { + if r == nil { + return + } + r.WorkspaceID = firstNonBlank(r.WorkspaceID, r.WorkspaceIDSnake) + r.DefaultClusterID = firstNonBlank(r.DefaultClusterID, r.DefaultClusterIDSnake) + r.QuotaCPU = firstNonBlank(r.QuotaCPU, r.QuotaCPUSnake) + r.QuotaMemory = firstNonBlank(r.QuotaMemory, r.QuotaMemorySnake) + r.QuotaGPU = firstNonBlank(r.QuotaGPU, r.QuotaGPUSnake) + r.QuotaGPUMem = firstNonBlank(r.QuotaGPUMem, r.QuotaGPUMemSnake) + if r.IsActive == nil { + r.IsActive = r.IsActiveSnake + } + if r.MustChangePassword == nil { + r.MustChangePassword = r.MustChangePasswordSnake + } +} + +func firstNonBlank(primary, alternate string) string { + if strings.TrimSpace(primary) != "" { + return primary + } + return alternate } diff --git a/backend/internal/adapter/input/http/dto/auth_dto_test.go b/backend/internal/adapter/input/http/dto/auth_dto_test.go new file mode 100644 index 0000000..cbe8626 --- /dev/null +++ b/backend/internal/adapter/input/http/dto/auth_dto_test.go @@ -0,0 +1,51 @@ +package dto + +import "testing" + +func TestRegisterRequestNormalizeUsesSnakeCaseAlternates(t *testing.T) { + active := false + mustChange := true + req := RegisterRequest{ + WorkspaceIDSnake: "workspace-1", + DefaultClusterIDSnake: "cluster-1", + QuotaCPUSnake: "2", + QuotaMemorySnake: "4Gi", + QuotaGPUSnake: "1", + QuotaGPUMemSnake: "10000", + IsActiveSnake: &active, + MustChangePasswordSnake: &mustChange, + } + + req.Normalize() + + if req.WorkspaceID != "workspace-1" || req.DefaultClusterID != "cluster-1" { + t.Fatalf("expected snake case workspace/cluster fields to normalize, got %#v", req) + } + if req.QuotaCPU != "2" || req.QuotaMemory != "4Gi" || req.QuotaGPU != "1" || req.QuotaGPUMem != "10000" { + t.Fatalf("expected snake case quota fields to normalize, got %#v", req) + } + if req.IsActive == nil || *req.IsActive { + t.Fatalf("expected is_active=false to normalize, got %#v", req.IsActive) + } + if req.MustChangePassword == nil || !*req.MustChangePassword { + t.Fatalf("expected must_change_password=true to normalize, got %#v", req.MustChangePassword) + } +} + +func TestUpdateUserRequestNormalizeKeepsCamelCasePrimary(t *testing.T) { + req := UpdateUserRequest{ + DefaultClusterID: "camel-cluster", + DefaultClusterIDSnake: "snake-cluster", + QuotaCPU: "3", + QuotaCPUSnake: "4", + } + + req.Normalize() + + if req.DefaultClusterID != "camel-cluster" { + t.Fatalf("expected camelCase defaultClusterId to win, got %q", req.DefaultClusterID) + } + if req.QuotaCPU != "3" { + t.Fatalf("expected camelCase quotaCpu to win, got %q", req.QuotaCPU) + } +} diff --git a/backend/internal/adapter/input/http/dto/cluster_dto.go b/backend/internal/adapter/input/http/dto/cluster_dto.go index d84a816..d764221 100644 --- a/backend/internal/adapter/input/http/dto/cluster_dto.go +++ b/backend/internal/adapter/input/http/dto/cluster_dto.go @@ -2,30 +2,38 @@ package dto // CreateClusterRequest 创建集群请求 type CreateClusterRequest struct { - Name string `json:"name" binding:"required"` - Host string `json:"host" binding:"required"` - CAData string `json:"caData"` - CADataAlt string `json:"ca_data"` - CertData string `json:"certData"` - CertDataAlt string `json:"cert_data"` - KeyData string `json:"keyData"` - KeyDataAlt string `json:"key_data"` - Token string `json:"token"` - Description string `json:"description"` + Name string `json:"name" binding:"required"` + Host string `json:"host" binding:"required"` + CAData string `json:"caData"` + CADataAlt string `json:"ca_data"` + CertData string `json:"certData"` + CertDataAlt string `json:"cert_data"` + KeyData string `json:"keyData"` + KeyDataAlt string `json:"key_data"` + Token string `json:"token"` + Description string `json:"description"` + Visibility string `json:"visibility"` + GlobalShared bool `json:"globalShared"` + GlobalSharedAlt bool `json:"global_shared"` + DefaultNamespace string `json:"defaultNamespace"` } // UpdateClusterRequest 更新集群请求 type UpdateClusterRequest struct { - Name string `json:"name"` - Host string `json:"host"` - CAData string `json:"caData"` - CADataAlt string `json:"ca_data"` - CertData string `json:"certData"` - CertDataAlt string `json:"cert_data"` - KeyData string `json:"keyData"` - KeyDataAlt string `json:"key_data"` - Token string `json:"token"` - Description string `json:"description"` + Name string `json:"name"` + Host string `json:"host"` + CAData string `json:"caData"` + CADataAlt string `json:"ca_data"` + CertData string `json:"certData"` + CertDataAlt string `json:"cert_data"` + KeyData string `json:"keyData"` + KeyDataAlt string `json:"key_data"` + Token string `json:"token"` + Description string `json:"description"` + Visibility string `json:"visibility"` + GlobalShared bool `json:"globalShared"` + GlobalSharedAlt bool `json:"global_shared"` + DefaultNamespace string `json:"defaultNamespace"` } // Normalize 将多种命名风格的字段合并到统一字段 @@ -56,10 +64,15 @@ func (r *UpdateClusterRequest) Normalize() { // ClusterResponse 集群响应(敏感数据已脱敏) type ClusterResponse struct { - ID string `json:"id"` - Name string `json:"name"` - Host string `json:"host"` - Description string `json:"description"` + ID string `json:"id"` + Name string `json:"name"` + Host string `json:"host"` + Description string `json:"description"` + WorkspaceID string `json:"workspaceId"` + OwnerID string `json:"ownerId"` + Visibility string `json:"visibility"` + DefaultNamespace string `json:"defaultNamespace,omitempty"` + AllowedActions []string `json:"allowedActions,omitempty"` // 认证配置状态(不返回实际证书数据,仅返回是否已配置) HasCAData bool `json:"hasCaData"` HasCertData bool `json:"hasCertData"` diff --git a/backend/internal/adapter/input/http/dto/converter.go b/backend/internal/adapter/input/http/dto/converter.go index 40fa9b6..ce87571 100644 --- a/backend/internal/adapter/input/http/dto/converter.go +++ b/backend/internal/adapter/input/http/dto/converter.go @@ -9,6 +9,9 @@ import ( func ToRegistryResponse(registry *entity.Registry) *RegistryResponse { response := &RegistryResponse{ ID: registry.ID, + WorkspaceID: registry.WorkspaceID, + OwnerID: registry.OwnerID, + Visibility: registry.Visibility, Name: registry.Name, URL: registry.URL, Description: registry.Description, @@ -17,33 +20,37 @@ func ToRegistryResponse(registry *entity.Registry) *RegistryResponse { CreatedAt: registry.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), UpdatedAt: registry.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), } - + // 脱敏处理密码 if registry.Password != "" { response.HasPassword = true response.Password = crypto.MaskSensitiveData(registry.Password) } - + return response } // ToClusterResponse 转换 Cluster 实体为响应 DTO(脱敏) func ToClusterResponse(cluster *entity.Cluster) *ClusterResponse { response := &ClusterResponse{ - ID: cluster.ID, - Name: cluster.Name, - Host: cluster.Host, - Description: cluster.Description, - CreatedAt: cluster.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: cluster.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), + ID: cluster.ID, + WorkspaceID: cluster.WorkspaceID, + OwnerID: cluster.OwnerID, + Visibility: cluster.Visibility, + Name: cluster.Name, + Host: cluster.Host, + Description: cluster.Description, + DefaultNamespace: cluster.DefaultNamespace, + CreatedAt: cluster.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), + UpdatedAt: cluster.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), } - + // 设置认证配置状态标志 response.HasCAData = cluster.CAData != "" response.HasCertData = cluster.CertData != "" response.HasKeyData = cluster.KeyData != "" response.HasToken = cluster.Token != "" - + // 脱敏处理敏感数据(仅显示掩码) if cluster.CAData != "" { response.CAData = crypto.MaskSensitiveData(cluster.CAData) @@ -57,7 +64,6 @@ func ToClusterResponse(cluster *entity.Cluster) *ClusterResponse { if cluster.Token != "" { response.Token = crypto.MaskSensitiveData(cluster.Token) } - + return response } - diff --git a/backend/internal/adapter/input/http/dto/error_dto.go b/backend/internal/adapter/input/http/dto/error_dto.go index 6b33da6..990e830 100644 --- a/backend/internal/adapter/input/http/dto/error_dto.go +++ b/backend/internal/adapter/input/http/dto/error_dto.go @@ -12,4 +12,3 @@ type SuccessResponse struct { Message string `json:"message"` Data interface{} `json:"data,omitempty"` } - diff --git a/backend/internal/adapter/input/http/dto/instance_dto.go b/backend/internal/adapter/input/http/dto/instance_dto.go index 76aca25..17ddb79 100644 --- a/backend/internal/adapter/input/http/dto/instance_dto.go +++ b/backend/internal/adapter/input/http/dto/instance_dto.go @@ -11,14 +11,16 @@ type CreateInstanceRequest struct { Description string `json:"description"` Values map[string]interface{} `json:"values"` ValuesYAML string `json:"valuesYaml"` + ValuesYAMLAlt string `json:"values_yaml"` } // UpdateInstanceRequest 更新实例请求 type UpdateInstanceRequest struct { - Version string `json:"version"` - Description string `json:"description"` - Values map[string]interface{} `json:"values"` - ValuesYAML string `json:"valuesYaml"` + Version string `json:"version"` + Description string `json:"description"` + Values map[string]interface{} `json:"values"` + ValuesYAML string `json:"valuesYaml"` + ValuesYAMLAlt string `json:"values_yaml"` } // Normalize 将多种命名风格的字段合并到统一字段 @@ -26,6 +28,16 @@ func (r *CreateInstanceRequest) Normalize() { if r.RegistryID == "" { r.RegistryID = r.RegistryIDAlt } + if r.ValuesYAML == "" { + r.ValuesYAML = r.ValuesYAMLAlt + } +} + +// Normalize 将多种命名风格的字段合并到统一字段 +func (r *UpdateInstanceRequest) Normalize() { + if r.ValuesYAML == "" { + r.ValuesYAML = r.ValuesYAMLAlt + } } // RollbackInstanceRequest 回滚实例请求 @@ -43,23 +55,28 @@ type DeleteInstanceRequest struct { // InstanceResponse 实例响应 type InstanceResponse struct { - ID string `json:"id"` - ClusterID string `json:"clusterId"` - Name string `json:"name"` - Namespace string `json:"namespace"` - RegistryID string `json:"registryId"` - Repository string `json:"repository"` - Chart string `json:"chart"` - Version string `json:"version"` - Description string `json:"description"` - Status string `json:"status"` - StatusReason string `json:"statusReason,omitempty"` - LastOperation string `json:"lastOperation,omitempty"` - LastError string `json:"lastError,omitempty"` - Revision int `json:"revision"` - Values map[string]interface{} `json:"values,omitempty"` - CreatedAt string `json:"createdAt"` - UpdatedAt string `json:"updatedAt"` + ID string `json:"id"` + ClusterID string `json:"clusterId"` + Name string `json:"name"` + Namespace string `json:"namespace"` + RegistryID string `json:"registryId"` + Repository string `json:"repository"` + Chart string `json:"chart"` + Version string `json:"version"` + Description string `json:"description"` + Status string `json:"status"` + WorkspaceID string `json:"workspaceId"` + OwnerID string `json:"ownerId"` + OwnerUsername string `json:"ownerUsername,omitempty"` + AllowedActions []string `json:"allowedActions,omitempty"` + StatusReason string `json:"statusReason,omitempty"` + LastOperation string `json:"lastOperation,omitempty"` + LastError string `json:"lastError,omitempty"` + Revision int `json:"revision"` + Values map[string]interface{} `json:"values,omitempty"` + Replicas int `json:"replicas"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` } // InstanceStatusResponse 实例状态响应 @@ -131,3 +148,89 @@ type InstanceEntryResponse struct { Hosts []InstanceEntryHostResponse `json:"hosts,omitempty"` TLS []InstanceEntryTLSResponse `json:"tls,omitempty"` } + +type InstanceDiagnosticsResponse struct { + InstanceName string `json:"instanceName"` + Namespace string `json:"namespace"` + Pods []InstancePodDiagnostics `json:"pods"` + Services []InstanceServiceDiagnostics `json:"services"` + Events []InstanceEventDiagnostics `json:"events"` + Logs []InstancePodLogResponse `json:"logs"` + CollectedAt string `json:"collectedAt"` +} + +type InstancePodDiagnostics struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + Phase string `json:"phase"` + NodeName string `json:"nodeName,omitempty"` + PodIP string `json:"podIp,omitempty"` + HostIP string `json:"hostIp,omitempty"` + RestartCount int32 `json:"restartCount"` + Containers []InstanceContainerDiagnostics `json:"containers"` + Conditions []InstanceConditionDiagnostics `json:"conditions"` + CreationTimestamp string `json:"creationTimestamp,omitempty"` +} + +type InstanceContainerDiagnostics struct { + Name string `json:"name"` + Image string `json:"image"` + Ready bool `json:"ready"` + RestartCount int32 `json:"restartCount"` + State string `json:"state"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` +} + +type InstanceConditionDiagnostics struct { + Type string `json:"type"` + Status string `json:"status"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` +} + +type InstanceServiceDiagnostics struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + Type string `json:"type"` + ClusterIP string `json:"clusterIP,omitempty"` + Ports []InstanceEntryPortResponse `json:"ports,omitempty"` +} + +type InstanceEventDiagnostics struct { + Type string `json:"type"` + Reason string `json:"reason"` + Message string `json:"message"` + InvolvedKind string `json:"involvedKind"` + InvolvedName string `json:"involvedName"` + Count int32 `json:"count"` + FirstTimestamp string `json:"firstTimestamp,omitempty"` + LastTimestamp string `json:"lastTimestamp,omitempty"` +} + +// ScaleInstanceRequest 扩缩容实例请求 +type ScaleInstanceRequest struct { + Replicas int `json:"replicas" binding:"required"` + Workload string `json:"workload"` +} + +// ScaleInstanceResponse 扩缩容实例响应 +type ScaleInstanceResponse struct { + Instance *InstanceResponse `json:"instance"` + Replicas int `json:"replicas"` + Message string `json:"message"` +} + +// InstanceValuesDiffResponse 实例 values 差异响应 +type InstanceValuesDiffResponse struct { + Current map[string]interface{} `json:"current"` + Defaults map[string]interface{} `json:"defaults"` +} + +type InstancePodLogResponse struct { + Pod string `json:"pod"` + Container string `json:"container"` + TailLines int64 `json:"tailLines"` + Log string `json:"log,omitempty"` + Error string `json:"error,omitempty"` +} diff --git a/backend/internal/adapter/input/http/dto/monitoring_dto.go b/backend/internal/adapter/input/http/dto/monitoring_dto.go index bcb3496..1aa2d69 100644 --- a/backend/internal/adapter/input/http/dto/monitoring_dto.go +++ b/backend/internal/adapter/input/http/dto/monitoring_dto.go @@ -8,29 +8,56 @@ import ( // ClusterMetricsResponse 集群监控响应 type ClusterMetricsResponse struct { - ClusterID string `json:"clusterId"` - ClusterName string `json:"clusterName"` - Status string `json:"status"` - Uptime string `json:"uptime"` - NodeCount int `json:"nodeCount"` - PodCount int `json:"podCount"` - LastCheck time.Time `json:"lastCheck"` - TotalCPU string `json:"totalCpu"` - TotalMemory string `json:"totalMemory"` - TotalGPU int `json:"totalGpu"` - UsedCPU string `json:"usedCpu"` - UsedMemory string `json:"usedMemory"` - UsedGPU int `json:"usedGpu"` - CPUUsage float64 `json:"cpuUsage"` - MemoryUsage float64 `json:"memoryUsage"` - GPUUsage float64 `json:"gpuUsage"` - MaxNodeCPU string `json:"maxNodeCpu"` - MaxNodeMemory string `json:"maxNodeMemory"` - MaxNodeGPU int `json:"maxNodeGpu"` - MaxNodeCPUUsage float64 `json:"maxNodeCpuUsage"` - MaxNodeMemUsage float64 `json:"maxNodeMemUsage"` - MaxNodeGPUUsage float64 `json:"maxNodeGpuUsage"` - Nodes []NodeMetricsResponse `json:"nodes,omitempty"` + ClusterID string `json:"clusterId"` + ClusterName string `json:"clusterName"` + Status string `json:"status"` + Uptime string `json:"uptime"` + NodeCount int `json:"nodeCount"` + PodCount int `json:"podCount"` + LastCheck time.Time `json:"lastCheck"` + TotalCPU string `json:"totalCpu"` + TotalMemory string `json:"totalMemory"` + TotalGPU int `json:"totalGpu"` + UsedCPU string `json:"usedCpu"` + UsedMemory string `json:"usedMemory"` + UsedGPU int `json:"usedGpu"` + CPUUsage float64 `json:"cpuUsage"` + MemoryUsage float64 `json:"memoryUsage"` + GPUUsage float64 `json:"gpuUsage"` + CPURequests string `json:"cpuRequests,omitempty"` + CPULimits string `json:"cpuLimits,omitempty"` + MemoryRequests string `json:"memoryRequests,omitempty"` + MemoryLimits string `json:"memoryLimits,omitempty"` + GPURequests int64 `json:"gpuRequests,omitempty"` + GPULimits int64 `json:"gpuLimits,omitempty"` + GPUMemoryRequestsMB int64 `json:"gpuMemoryRequestsMb,omitempty"` + GPUMemoryLimitsMB int64 `json:"gpuMemoryLimitsMb,omitempty"` + AllocatedGPU int64 `json:"allocatedGpu,omitempty"` + AllocatedGPUMemoryMB int64 `json:"allocatedGpuMemoryMb,omitempty"` + ResourceUsageByUser []UserResourceUsageResponse `json:"resourceUsageByUser,omitempty"` + MaxNodeCPU string `json:"maxNodeCpu"` + MaxNodeMemory string `json:"maxNodeMemory"` + MaxNodeGPU int `json:"maxNodeGpu"` + MaxNodeCPUUsage float64 `json:"maxNodeCpuUsage"` + MaxNodeMemUsage float64 `json:"maxNodeMemUsage"` + MaxNodeGPUUsage float64 `json:"maxNodeGpuUsage"` + Nodes []NodeMetricsResponse `json:"nodes,omitempty"` +} + +type UserResourceUsageResponse struct { + UserID string `json:"userId"` + Username string `json:"username"` + WorkspaceID string `json:"workspaceId"` + InstanceCount int `json:"instanceCount"` + PodCount int `json:"podCount"` + CPURequests string `json:"cpuRequests"` + CPULimits string `json:"cpuLimits"` + MemoryRequests string `json:"memoryRequests"` + MemoryLimits string `json:"memoryLimits"` + GPURequests int64 `json:"gpuRequests"` + GPULimits int64 `json:"gpuLimits"` + GPUMemoryRequestsMB int64 `json:"gpuMemoryRequestsMb"` + GPUMemoryLimitsMB int64 `json:"gpuMemoryLimitsMb"` } // NodeMetricsResponse 节点监控响应 @@ -72,28 +99,59 @@ type MonitoringSummaryResponse struct { // ToClusterMetricsResponse 转换为响应 func ToClusterMetricsResponse(m *entity.ClusterMetrics) *ClusterMetricsResponse { resp := &ClusterMetricsResponse{ - ClusterID: m.ClusterID, - ClusterName: m.ClusterName, - Status: m.Status, - Uptime: m.Uptime, - NodeCount: m.NodeCount, - PodCount: m.PodCount, - LastCheck: m.LastCheck, - TotalCPU: m.TotalCPU, - TotalMemory: m.TotalMemory, - TotalGPU: m.TotalGPU, - UsedCPU: m.UsedCPU, - UsedMemory: m.UsedMemory, - UsedGPU: m.UsedGPU, - CPUUsage: m.CPUUsage, - MemoryUsage: m.MemoryUsage, - GPUUsage: m.GPUUsage, - MaxNodeCPU: m.MaxNodeCPU, - MaxNodeMemory: m.MaxNodeMemory, - MaxNodeGPU: m.MaxNodeGPU, - MaxNodeCPUUsage: m.MaxNodeCPUUsage, - MaxNodeMemUsage: m.MaxNodeMemUsage, - MaxNodeGPUUsage: m.MaxNodeGPUUsage, + ClusterID: m.ClusterID, + ClusterName: m.ClusterName, + Status: m.Status, + Uptime: m.Uptime, + NodeCount: m.NodeCount, + PodCount: m.PodCount, + LastCheck: m.LastCheck, + TotalCPU: m.TotalCPU, + TotalMemory: m.TotalMemory, + TotalGPU: m.TotalGPU, + UsedCPU: m.UsedCPU, + UsedMemory: m.UsedMemory, + UsedGPU: m.UsedGPU, + CPUUsage: m.CPUUsage, + MemoryUsage: m.MemoryUsage, + GPUUsage: m.GPUUsage, + CPURequests: m.CPURequests, + CPULimits: m.CPULimits, + MemoryRequests: m.MemoryRequests, + MemoryLimits: m.MemoryLimits, + GPURequests: m.GPURequests, + GPULimits: m.GPULimits, + GPUMemoryRequestsMB: m.GPUMemoryRequestsMB, + GPUMemoryLimitsMB: m.GPUMemoryLimitsMB, + AllocatedGPU: m.AllocatedGPU, + AllocatedGPUMemoryMB: m.AllocatedGPUMemoryMB, + MaxNodeCPU: m.MaxNodeCPU, + MaxNodeMemory: m.MaxNodeMemory, + MaxNodeGPU: m.MaxNodeGPU, + MaxNodeCPUUsage: m.MaxNodeCPUUsage, + MaxNodeMemUsage: m.MaxNodeMemUsage, + MaxNodeGPUUsage: m.MaxNodeGPUUsage, + } + + if len(m.ResourceUsageByUser) > 0 { + resp.ResourceUsageByUser = make([]UserResourceUsageResponse, len(m.ResourceUsageByUser)) + for i, usage := range m.ResourceUsageByUser { + resp.ResourceUsageByUser[i] = UserResourceUsageResponse{ + UserID: usage.UserID, + Username: usage.Username, + WorkspaceID: usage.WorkspaceID, + InstanceCount: usage.InstanceCount, + PodCount: usage.PodCount, + CPURequests: usage.CPURequests, + CPULimits: usage.CPULimits, + MemoryRequests: usage.MemoryRequests, + MemoryLimits: usage.MemoryLimits, + GPURequests: usage.GPURequests, + GPULimits: usage.GPULimits, + GPUMemoryRequestsMB: usage.GPUMemoryRequestsMB, + GPUMemoryLimitsMB: usage.GPUMemoryLimitsMB, + } + } } if len(m.Nodes) > 0 { @@ -140,4 +198,3 @@ func ToMonitoringSummaryResponse(s *entity.MonitoringSummary) *MonitoringSummary LastUpdate: s.LastUpdate, } } - diff --git a/backend/internal/adapter/input/http/dto/registry_dto.go b/backend/internal/adapter/input/http/dto/registry_dto.go index 25de40a..0c53b69 100644 --- a/backend/internal/adapter/input/http/dto/registry_dto.go +++ b/backend/internal/adapter/input/http/dto/registry_dto.go @@ -2,36 +2,46 @@ package dto // CreateRegistryRequest 创建 Registry 请求 type CreateRegistryRequest struct { - Name string `json:"name" binding:"required"` - URL string `json:"url" binding:"required"` - Username string `json:"username"` - Password string `json:"password"` - Description string `json:"description"` - Insecure bool `json:"insecure"` + Name string `json:"name" binding:"required"` + URL string `json:"url" binding:"required"` + Username string `json:"username"` + Password string `json:"password"` + Description string `json:"description"` + Insecure bool `json:"insecure"` + Visibility string `json:"visibility"` + GlobalShared bool `json:"globalShared"` + GlobalSharedAlt bool `json:"global_shared"` } // UpdateRegistryRequest 更新 Registry 请求 type UpdateRegistryRequest struct { - Name string `json:"name"` - URL string `json:"url"` - Username string `json:"username"` - Password string `json:"password"` - Description string `json:"description"` - Insecure bool `json:"insecure"` + Name string `json:"name"` + URL string `json:"url"` + Username string `json:"username"` + Password string `json:"password"` + Description string `json:"description"` + Insecure bool `json:"insecure"` + Visibility string `json:"visibility"` + GlobalShared bool `json:"globalShared"` + GlobalSharedAlt bool `json:"global_shared"` } // RegistryResponse Registry 响应(敏感数据已脱敏) type RegistryResponse struct { - ID string `json:"id"` - Name string `json:"name"` - URL string `json:"url"` - Description string `json:"description"` - Username string `json:"username,omitempty"` // 明文返回用户名(不敏感) - Password string `json:"password,omitempty"` // 脱敏显示(••••••••) - HasPassword bool `json:"hasPassword"` // 是否已设置密码 - Insecure bool `json:"insecure"` - CreatedAt string `json:"createdAt"` - UpdatedAt string `json:"updatedAt"` + ID string `json:"id"` + Name string `json:"name"` + URL string `json:"url"` + Description string `json:"description"` + WorkspaceID string `json:"workspaceId"` + OwnerID string `json:"ownerId"` + Visibility string `json:"visibility"` + AllowedActions []string `json:"allowedActions,omitempty"` + Username string `json:"username,omitempty"` // 明文返回用户名(不敏感) + Password string `json:"password,omitempty"` // 脱敏显示(••••••••) + HasPassword bool `json:"hasPassword"` // 是否已设置密码 + Insecure bool `json:"insecure"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` } // RegistryHealthResponse Registry 健康状态响应 @@ -39,4 +49,3 @@ type RegistryHealthResponse struct { Healthy bool `json:"healthy"` Message string `json:"message,omitempty"` } - diff --git a/backend/internal/adapter/input/http/rest/artifact_handler.go b/backend/internal/adapter/input/http/rest/artifact_handler.go index d04bdbc..75aeb69 100644 --- a/backend/internal/adapter/input/http/rest/artifact_handler.go +++ b/backend/internal/adapter/input/http/rest/artifact_handler.go @@ -29,14 +29,19 @@ func NewArtifactHandler(artifactService *service.ArtifactService) *ArtifactHandl // @Accept json // @Produce json // @Param registry_id path string true "Registry ID" +// @Param artifact_type query string false "Artifact type filter (chart, all)" default(chart) // @Success 200 {object} dto.RepositoryListResponse // @Failure 500 {object} dto.ErrorResponse // @Router /registries/{registry_id}/repositories [get] func (h *ArtifactHandler) ListRepositories(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) registryID := vars["registry_id"] + artifactType := r.URL.Query().Get("artifact_type") + if artifactType == "" { + artifactType = "chart" + } - repositories, err := h.artifactService.ListRepositories(r.Context(), registryID) + repositories, err := h.artifactService.ListRepositories(r.Context(), registryID, artifactType) if err != nil { respondError(w, http.StatusInternalServerError, "Failed to list repositories", err.Error()) return @@ -50,13 +55,17 @@ func (h *ArtifactHandler) ListRepositories(w http.ResponseWriter, r *http.Reques } // Determine source and message based on repository count - source := "catalog" + source := "harbor-api" catalogSupported := true message := "" if len(repositories) == 0 { source = "unavailable" - message = "No repositories found in this registry" + if artifactType == "chart" { + message = "No chart repositories found in this registry" + } else { + message = "No repositories found in this registry" + } } response := &dto.RepositoryListResponse{ @@ -117,6 +126,25 @@ func (h *ArtifactHandler) ListArtifacts(w http.ResponseWriter, r *http.Request) respondJSON(w, http.StatusOK, tagResponses) } +// ListRepositoryTags is a compatibility alias for clients that request tags +// directly instead of the canonical artifacts endpoint. +func (h *ArtifactHandler) ListRepositoryTags(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + if vars["registry_id"] == "" { + registryID := r.URL.Query().Get("registry_id") + if registryID == "" { + registryID = r.URL.Query().Get("registryId") + } + if registryID == "" { + respondError(w, http.StatusBadRequest, "Missing registry ID", "registry_id query parameter is required") + return + } + vars["registry_id"] = registryID + r = mux.SetURLVars(r, vars) + } + h.ListArtifacts(w, r) +} + // GetArtifact 获取 artifact 详情 // @Summary 获取 Artifact 详情 // @Description 获取指定 Artifact 的详细信息 @@ -191,3 +219,37 @@ func (h *ArtifactHandler) GetArtifactValuesSchema(w http.ResponseWriter, r *http respondJSON(w, http.StatusOK, response) } + +// GetArtifactValuesYAML 获取 Helm Chart 的默认 values.yaml +// @Summary 获取 Helm Chart 默认 Values YAML +// @Description 获取 Helm Chart 包内原始 values.yaml,用于高级覆盖编辑 +// @Tags Artifacts +// @Accept json +// @Produce json +// @Param registry_id path string true "Registry ID" +// @Param repository_name path string true "Repository Name (URL encoded)" +// @Param reference path string true "Artifact Reference (tag or digest)" +// @Success 200 {object} dto.ValuesYAMLResponse +// @Failure 500 {object} dto.ErrorResponse +// @Router /registries/{registry_id}/repositories/{repository_name}/artifacts/{reference}/values-yaml [get] +func (h *ArtifactHandler) GetArtifactValuesYAML(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + registryID := vars["registry_id"] + repositoryName := vars["repository_name"] + reference := vars["reference"] + + valuesYAML, err := h.artifactService.GetValuesYAML(r.Context(), registryID, repositoryName, reference) + if err != nil { + switch { + case errors.Is(err, entity.ErrRegistryNotFound), + errors.Is(err, entity.ErrRepositoryNotFound), + errors.Is(err, entity.ErrArtifactNotFound): + respondError(w, http.StatusNotFound, "Values YAML not found", err.Error()) + default: + respondError(w, http.StatusInternalServerError, "Failed to get values YAML", err.Error()) + } + return + } + + respondJSON(w, http.StatusOK, &dto.ValuesYAMLResponse{ValuesYAML: valuesYAML}) +} diff --git a/backend/internal/adapter/input/http/rest/auth_handler.go b/backend/internal/adapter/input/http/rest/auth_handler.go index f67acda..969ca33 100644 --- a/backend/internal/adapter/input/http/rest/auth_handler.go +++ b/backend/internal/adapter/input/http/rest/auth_handler.go @@ -1,11 +1,19 @@ package rest import ( + "context" "encoding/json" + "net" "net/http" + "strings" + "sync" + "time" + "github.com/gorilla/mux" "github.com/ocdp/cluster-service/internal/adapter/input/http/dto" + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/service" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // AuthHandler 认证 Handler @@ -13,6 +21,74 @@ type AuthHandler struct { authService *service.AuthService } +const ( + loginRateLimitWindow = time.Minute + loginRateLimitFailures = 5 +) + +var defaultLoginRateLimiter = newLoginRateLimiter(loginRateLimitWindow, loginRateLimitFailures) + +type loginRateLimiter struct { + mu sync.Mutex + window time.Duration + limit int + failures map[string]loginFailureState + now func() time.Time +} + +type loginFailureState struct { + count int + windowEnds time.Time +} + +func newLoginRateLimiter(window time.Duration, limit int) *loginRateLimiter { + return &loginRateLimiter{ + window: window, + limit: limit, + failures: make(map[string]loginFailureState), + now: time.Now, + } +} + +func (l *loginRateLimiter) Allow(key string) bool { + if l == nil || key == "" { + return true + } + l.mu.Lock() + defer l.mu.Unlock() + state, ok := l.failures[key] + now := l.now() + if !ok || now.After(state.windowEnds) { + return true + } + return state.count < l.limit +} + +func (l *loginRateLimiter) RecordFailure(key string) { + if l == nil || key == "" { + return + } + l.mu.Lock() + defer l.mu.Unlock() + now := l.now() + state, ok := l.failures[key] + if !ok || now.After(state.windowEnds) { + l.failures[key] = loginFailureState{count: 1, windowEnds: now.Add(l.window)} + return + } + state.count++ + l.failures[key] = state +} + +func (l *loginRateLimiter) Reset(key string) { + if l == nil || key == "" { + return + } + l.mu.Lock() + defer l.mu.Unlock() + delete(l.failures, key) +} + // NewAuthHandler 创建认证 Handler func NewAuthHandler(authService *service.AuthService) *AuthHandler { return &AuthHandler{ @@ -20,9 +96,9 @@ func NewAuthHandler(authService *service.AuthService) *AuthHandler { } } -// Register 用户注册 -// @Summary 用户注册 -// @Description 创建一个新的后台用户 +// Register 管理员创建用户 +// @Summary 管理员创建用户 +// @Description 创建一个新的后台用户。公开自注册已禁用,只允许 admin 调用。 // @Tags Auth // @Accept json // @Produce json @@ -36,24 +112,68 @@ func (h *AuthHandler) Register(w http.ResponseWriter, r *http.Request) { respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) return } + req.Normalize() // 调用领域服务 - user, err := h.authService.Register(r.Context(), req.Username, req.Password) + user, err := h.authService.Register(r.Context(), req.Username, req.Password, req.Role, req.WorkspaceID, service.UserWorkspaceOptions{ + Namespace: req.Namespace, + DefaultClusterID: req.DefaultClusterID, + QuotaCPU: req.QuotaCPU, + QuotaMemory: req.QuotaMemory, + QuotaGPU: req.QuotaGPU, + QuotaGPUMem: req.QuotaGPUMem, + }, req.IsActive, req.MustChangePassword) if err != nil { - respondError(w, http.StatusBadRequest, "Registration failed", err.Error()) + respondServiceError(w, err, "Registration failed") return } - // 返回响应 - response := &dto.UserResponse{ - ID: user.ID, - Username: user.Username, - Email: user.Email, - CreatedAt: user.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: user.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), - } + respondJSON(w, http.StatusCreated, h.convertUserResponse(r.Context(), user)) +} - respondJSON(w, http.StatusCreated, response) +func (h *AuthHandler) ListUsers(w http.ResponseWriter, r *http.Request) { + users, err := h.authService.ListUsers(r.Context()) + if err != nil { + respondServiceError(w, err, "Failed to list users") + return + } + responses := make([]*dto.UserResponse, 0, len(users)) + for _, user := range users { + responses = append(responses, h.convertUserResponse(r.Context(), user)) + } + respondJSON(w, http.StatusOK, responses) +} + +func (h *AuthHandler) UpdateUser(w http.ResponseWriter, r *http.Request) { + userID := mux.Vars(r)["user_id"] + var req dto.UpdateUserRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + req.Normalize() + user, err := h.authService.UpdateUser(r.Context(), userID, req.Role, req.WorkspaceID, service.UserWorkspaceOptions{ + Namespace: req.Namespace, + DefaultClusterID: req.DefaultClusterID, + QuotaCPU: req.QuotaCPU, + QuotaMemory: req.QuotaMemory, + QuotaGPU: req.QuotaGPU, + QuotaGPUMem: req.QuotaGPUMem, + }, req.IsActive, req.MustChangePassword) + if err != nil { + respondServiceError(w, err, "Failed to update user") + return + } + respondJSON(w, http.StatusOK, h.convertUserResponse(r.Context(), user)) +} + +func (h *AuthHandler) DeleteUser(w http.ResponseWriter, r *http.Request) { + userID := mux.Vars(r)["user_id"] + if err := h.authService.DeleteUser(r.Context(), userID); err != nil { + respondServiceError(w, err, "Failed to delete user") + return + } + w.WriteHeader(http.StatusNoContent) } // Login 用户登录 @@ -73,26 +193,122 @@ func (h *AuthHandler) Login(w http.ResponseWriter, r *http.Request) { return } - // 调用领域服务 - accessToken, refreshToken, err := h.authService.Login(r.Context(), req.Username, req.Password) - if err != nil { - respondError(w, http.StatusUnauthorized, "Login failed", err.Error()) + rateLimitKey := loginRateLimitKey(r, req.Username) + if !defaultLoginRateLimiter.Allow(rateLimitKey) { + w.Header().Set("Retry-After", "60") + respondError(w, http.StatusTooManyRequests, "Too many login attempts", "too many login attempts; retry later") return } - // 获取用户信息 - // TODO: 从 token 解析用户信息或从服务获取 + // 调用领域服务 + accessToken, refreshToken, user, err := h.authService.Login(r.Context(), req.Username, req.Password) + if err != nil { + defaultLoginRateLimiter.RecordFailure(rateLimitKey) + respondError(w, http.StatusUnauthorized, "Invalid username or password", "invalid username or password") + return + } + defaultLoginRateLimiter.Reset(rateLimitKey) + + workspace, _ := h.authService.GetWorkspaceByID(r.Context(), user.WorkspaceID) // 返回响应 response := &dto.AuthResponse{ - AccessToken: accessToken, - RefreshToken: refreshToken, - Username: req.Username, + AccessToken: accessToken, + RefreshToken: refreshToken, + UserID: user.ID, + Username: user.Username, + Role: user.Role, + WorkspaceID: user.WorkspaceID, + WorkspaceName: workspaceName(workspace), + Namespace: workspaceNamespace(workspace), + DefaultClusterID: workspaceDefaultClusterID(workspace), + QuotaCPU: workspaceQuotaCPU(workspace), + QuotaMemory: workspaceQuotaMemory(workspace), + QuotaGPU: workspaceQuotaGPU(workspace), + QuotaGPUMem: workspaceQuotaGPUMem(workspace), + Permissions: authz.PermissionsForRole(user.Role), + PermissionVersion: 1, } respondJSON(w, http.StatusOK, response) } +func loginRateLimitKey(r *http.Request, username string) string { + client := strings.TrimSpace(r.Header.Get("X-Forwarded-For")) + if idx := strings.Index(client, ","); idx >= 0 { + client = strings.TrimSpace(client[:idx]) + } + if client == "" { + client = strings.TrimSpace(r.Header.Get("X-Real-IP")) + } + if client == "" { + client = r.RemoteAddr + if host, _, err := net.SplitHostPort(client); err == nil { + client = host + } + } + return strings.ToLower(strings.TrimSpace(username)) + "|" + client +} + +// AuthStatus returns whether the system needs initial setup (no admin exists). +func (h *AuthHandler) AuthStatus(w http.ResponseWriter, r *http.Request) { + hasAdmin, err := h.authService.IsAdminExists(r.Context()) + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to check status", err.Error()) + return + } + respondJSON(w, http.StatusOK, map[string]any{ + "needsSetup": !hasAdmin, + "hasUsers": hasAdmin, + }) +} + +// Setup creates the first admin user. Only works when no admin exists. +func (h *AuthHandler) Setup(w http.ResponseWriter, r *http.Request) { + var req dto.SetupRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + if strings.TrimSpace(req.Username) == "" || strings.TrimSpace(req.Password) == "" { + respondError(w, http.StatusBadRequest, "Missing fields", "username and password are required") + return + } + + _, accessToken, refreshToken, err := h.authService.SetupInitialAdmin(r.Context(), req.Username, req.Password, req.Email) + if err != nil { + respondServiceError(w, err, "Failed to create initial admin") + return + } + + respondJSON(w, http.StatusCreated, map[string]string{ + "accessToken": accessToken, + "refreshToken": refreshToken, + }) +} + +func (h *AuthHandler) convertUserResponse(ctx context.Context, user *entity.User) *dto.UserResponse { + workspace, _ := h.authService.GetWorkspaceByID(ctx, user.WorkspaceID) + return &dto.UserResponse{ + ID: user.ID, + Username: user.Username, + Email: user.Email, + Role: user.Role, + WorkspaceID: user.WorkspaceID, + WorkspaceName: workspaceName(workspace), + Namespace: workspaceNamespace(workspace), + DefaultClusterID: workspaceDefaultClusterID(workspace), + QuotaCPU: workspaceQuotaCPU(workspace), + QuotaMemory: workspaceQuotaMemory(workspace), + QuotaGPU: workspaceQuotaGPU(workspace), + QuotaGPUMem: workspaceQuotaGPUMem(workspace), + IsActive: user.IsActive, + MustChangePassword: user.MustChangePassword, + CreatedAt: user.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), + UpdatedAt: user.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), + } +} + // RefreshToken 刷新 Token // @Summary 刷新访问令牌 // @Description 使用刷新令牌获取新的访问令牌 @@ -111,17 +327,109 @@ func (h *AuthHandler) RefreshToken(w http.ResponseWriter, r *http.Request) { } // 调用领域服务 - newAccessToken, err := h.authService.RefreshToken(r.Context(), req.RefreshToken) + newAccessToken, user, err := h.authService.RefreshToken(r.Context(), req.RefreshToken) if err != nil { respondError(w, http.StatusUnauthorized, "Token refresh failed", err.Error()) return } + workspace, _ := h.authService.GetWorkspaceByID(r.Context(), user.WorkspaceID) // 返回响应 response := &dto.AuthResponse{ - AccessToken: newAccessToken, - RefreshToken: req.RefreshToken, + AccessToken: newAccessToken, + RefreshToken: req.RefreshToken, + UserID: user.ID, + Username: user.Username, + Role: user.Role, + WorkspaceID: user.WorkspaceID, + WorkspaceName: workspaceName(workspace), + Namespace: workspaceNamespace(workspace), + DefaultClusterID: workspaceDefaultClusterID(workspace), + QuotaCPU: workspaceQuotaCPU(workspace), + QuotaMemory: workspaceQuotaMemory(workspace), + QuotaGPU: workspaceQuotaGPU(workspace), + QuotaGPUMem: workspaceQuotaGPUMem(workspace), + Permissions: authz.PermissionsForRole(user.Role), + PermissionVersion: 1, } respondJSON(w, http.StatusOK, response) } + +func (h *AuthHandler) Me(w http.ResponseWriter, r *http.Request) { + header := r.Header.Get("Authorization") + token := strings.TrimSpace(strings.TrimPrefix(header, "Bearer ")) + if token == "" || token == header { + respondError(w, http.StatusUnauthorized, "Unauthorized", "missing bearer token") + return + } + principal, err := h.authService.VerifyAccessToken(r.Context(), token) + if err != nil { + respondError(w, http.StatusUnauthorized, "Unauthorized", err.Error()) + return + } + respondJSON(w, http.StatusOK, &dto.AuthResponse{ + UserID: principal.UserID, + Username: principal.Username, + Role: principal.Role, + WorkspaceID: principal.WorkspaceID, + WorkspaceName: principal.WorkspaceName, + Namespace: principal.Namespace, + DefaultClusterID: principal.DefaultClusterID, + QuotaCPU: principal.QuotaCPU, + QuotaMemory: principal.QuotaMemory, + QuotaGPU: principal.QuotaGPU, + QuotaGPUMem: principal.QuotaGPUMem, + Permissions: principal.Permissions, + PermissionVersion: principal.PermissionVersion, + }) +} + +func workspaceName(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.Name +} + +func workspaceNamespace(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.K8sNamespace +} + +func workspaceDefaultClusterID(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.DefaultClusterID +} + +func workspaceQuotaCPU(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.QuotaCPU +} + +func workspaceQuotaMemory(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.QuotaMemory +} + +func workspaceQuotaGPU(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.QuotaGPU +} + +func workspaceQuotaGPUMem(workspace *entity.Workspace) string { + if workspace == nil { + return "" + } + return workspace.QuotaGPUMem +} diff --git a/backend/internal/adapter/input/http/rest/auth_handler_test.go b/backend/internal/adapter/input/http/rest/auth_handler_test.go new file mode 100644 index 0000000..d4a4119 --- /dev/null +++ b/backend/internal/adapter/input/http/rest/auth_handler_test.go @@ -0,0 +1,44 @@ +package rest + +import ( + "testing" + "time" +) + +func TestLoginRateLimiterBlocksAfterConfiguredFailures(t *testing.T) { + now := time.Date(2026, 5, 14, 12, 0, 0, 0, time.UTC) + limiter := newLoginRateLimiter(time.Minute, 2) + limiter.now = func() time.Time { return now } + + key := "user|127.0.0.1" + if !limiter.Allow(key) { + t.Fatal("expected first attempt to be allowed") + } + limiter.RecordFailure(key) + if !limiter.Allow(key) { + t.Fatal("expected second attempt to be allowed") + } + limiter.RecordFailure(key) + if limiter.Allow(key) { + t.Fatal("expected third attempt inside the window to be blocked") + } + + now = now.Add(time.Minute + time.Second) + if !limiter.Allow(key) { + t.Fatal("expected attempts to be allowed after the window expires") + } +} + +func TestLoginRateLimiterResetClearsFailures(t *testing.T) { + limiter := newLoginRateLimiter(time.Minute, 1) + key := "user|127.0.0.1" + + limiter.RecordFailure(key) + if limiter.Allow(key) { + t.Fatal("expected key to be blocked after one failure") + } + limiter.Reset(key) + if !limiter.Allow(key) { + t.Fatal("expected reset key to be allowed") + } +} diff --git a/backend/internal/adapter/input/http/rest/cluster_handler.go b/backend/internal/adapter/input/http/rest/cluster_handler.go index c887f8e..b68e882 100644 --- a/backend/internal/adapter/input/http/rest/cluster_handler.go +++ b/backend/internal/adapter/input/http/rest/cluster_handler.go @@ -45,6 +45,11 @@ func (h *ClusterHandler) CreateCluster(w http.ResponseWriter, r *http.Request) { // 创建实体 cluster := entity.NewCluster(req.Name, req.Host) cluster.Description = req.Description + cluster.Visibility = req.Visibility + if req.GlobalShared || req.GlobalSharedAlt { + cluster.Visibility = "global_shared" + } + cluster.DefaultNamespace = req.DefaultNamespace if req.CertData != "" && req.KeyData != "" { cluster.SetCertAuth(req.CAData, req.CertData, req.KeyData) @@ -147,6 +152,15 @@ func (h *ClusterHandler) UpdateCluster(w http.ResponseWriter, r *http.Request) { // 更新字段 cluster.Update(req.Name, req.Host, req.Description) + if req.Visibility != "" { + cluster.Visibility = req.Visibility + } + if req.GlobalShared || req.GlobalSharedAlt { + cluster.Visibility = "global_shared" + } + if req.DefaultNamespace != "" { + cluster.DefaultNamespace = req.DefaultNamespace + } if req.CertData != "" && req.KeyData != "" { cluster.SetCertAuth(req.CAData, req.CertData, req.KeyData) diff --git a/backend/internal/adapter/input/http/rest/instance_handler.go b/backend/internal/adapter/input/http/rest/instance_handler.go index 777e965..e55baae 100644 --- a/backend/internal/adapter/input/http/rest/instance_handler.go +++ b/backend/internal/adapter/input/http/rest/instance_handler.go @@ -2,13 +2,18 @@ package rest import ( "encoding/json" + "fmt" "net/http" + "reflect" + "strconv" "strings" + "time" "github.com/gorilla/mux" "github.com/ocdp/cluster-service/internal/adapter/input/http/dto" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/service" + "gopkg.in/yaml.v3" ) // InstanceHandler 实例 Handler @@ -45,6 +50,11 @@ func (h *InstanceHandler) CreateInstance(w http.ResponseWriter, r *http.Request) return } req.Normalize() + parsedYAML, hasValuesYAML, err := parseAndCompareValues(req.Values, req.ValuesYAML) + if err != nil { + respondError(w, http.StatusBadRequest, "Invalid values", err.Error()) + return + } // Extract chart name from repository (e.g., "charts/nginx" -> "nginx") chart := req.Repository @@ -67,38 +77,20 @@ func (h *InstanceHandler) CreateInstance(w http.ResponseWriter, r *http.Request) if req.Values != nil { instance.SetValues(req.Values) } - if req.ValuesYAML != "" { + if hasValuesYAML { instance.SetValuesYAML(req.ValuesYAML) + if req.Values == nil { + instance.SetValues(parsedYAML) + } } // 调用领域服务 if err := h.instanceService.CreateInstance(r.Context(), instance); err != nil { - respondError(w, http.StatusBadRequest, "Failed to create instance", err.Error()) + respondServiceError(w, err, "Failed to create instance") return } - // 返回响应 - response := &dto.InstanceResponse{ - ID: instance.ID, - ClusterID: instance.ClusterID, - Name: instance.Name, - Namespace: instance.Namespace, - RegistryID: instance.RegistryID, - Repository: instance.Repository, - Chart: instance.Chart, - Version: instance.Version, - Description: instance.Description, - Status: string(instance.Status), - StatusReason: instance.StatusReason, - LastOperation: string(instance.LastOperation), - LastError: instance.LastError, - Revision: instance.Revision, - Values: instance.Values, - CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), - } - - respondJSON(w, http.StatusCreated, response) + respondJSON(w, http.StatusCreated, convertInstanceResponse(instance, true)) } // GetInstance 获取实例详情 @@ -113,6 +105,7 @@ func (h *InstanceHandler) CreateInstance(w http.ResponseWriter, r *http.Request) // @Router /clusters/{cluster_id}/instances/{instance_id} [get] func (h *InstanceHandler) GetInstance(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) + clusterID := vars["cluster_id"] instanceID := vars["instance_id"] instance, err := h.instanceService.GetInstance(r.Context(), instanceID) @@ -120,28 +113,13 @@ func (h *InstanceHandler) GetInstance(w http.ResponseWriter, r *http.Request) { respondError(w, http.StatusNotFound, "Instance not found", err.Error()) return } - - response := &dto.InstanceResponse{ - ID: instance.ID, - ClusterID: instance.ClusterID, - Name: instance.Name, - Namespace: instance.Namespace, - RegistryID: instance.RegistryID, - Repository: instance.Repository, - Chart: instance.Chart, - Version: instance.Version, - Description: instance.Description, - Status: string(instance.Status), - StatusReason: instance.StatusReason, - LastOperation: string(instance.LastOperation), - LastError: instance.LastError, - Revision: instance.Revision, - Values: instance.Values, - CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), + if instance.ClusterID != clusterID { + respondError(w, http.StatusNotFound, "Instance not found", "resource does not belong to cluster") + return } + h.instanceService.EnrichReplicas(r.Context(), clusterID, []*entity.Instance{instance}) - respondJSON(w, http.StatusOK, response) + respondJSON(w, http.StatusOK, convertInstanceResponse(instance, true)) } // ListInstances 列出集群的所有实例 @@ -159,30 +137,16 @@ func (h *InstanceHandler) ListInstances(w http.ResponseWriter, r *http.Request) instances, err := h.instanceService.ListInstancesByCluster(r.Context(), clusterID) if err != nil { - respondError(w, http.StatusInternalServerError, "Failed to list instances", err.Error()) + respondServiceError(w, err, "Failed to list instances") return } + // Enrich with running replicas from K8s + instances = h.instanceService.EnrichReplicas(r.Context(), clusterID, instances) + responses := make([]*dto.InstanceResponse, 0, len(instances)) for _, instance := range instances { - responses = append(responses, &dto.InstanceResponse{ - ID: instance.ID, - ClusterID: instance.ClusterID, - Name: instance.Name, - Namespace: instance.Namespace, - RegistryID: instance.RegistryID, - Repository: instance.Repository, - Chart: instance.Chart, - Version: instance.Version, - Description: instance.Description, - Status: string(instance.Status), - StatusReason: instance.StatusReason, - LastOperation: string(instance.LastOperation), - LastError: instance.LastError, - Revision: instance.Revision, - CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), - }) + responses = append(responses, convertInstanceResponse(instance, true)) } response := &dto.InstanceListResponse{ @@ -214,6 +178,12 @@ func (h *InstanceHandler) UpdateInstance(w http.ResponseWriter, r *http.Request) respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) return } + req.Normalize() + parsedYAML, hasValuesYAML, err := parseAndCompareValues(req.Values, req.ValuesYAML) + if err != nil { + respondError(w, http.StatusBadRequest, "Invalid values", err.Error()) + return + } // 获取现有实例 instance, err := h.instanceService.GetInstance(r.Context(), instanceID) @@ -225,41 +195,26 @@ func (h *InstanceHandler) UpdateInstance(w http.ResponseWriter, r *http.Request) // 更新字段 if req.Version != "" { instance.Upgrade(req.Version, req.Values) + } else if req.Values != nil { + instance.SetValues(req.Values) } if req.Description != "" { instance.Description = req.Description } - if req.ValuesYAML != "" { + if hasValuesYAML { instance.SetValuesYAML(req.ValuesYAML) + if req.Values == nil { + instance.SetValues(parsedYAML) + } } // 调用领域服务 if err := h.instanceService.UpdateInstance(r.Context(), instance); err != nil { - respondError(w, http.StatusBadRequest, "Failed to update instance", err.Error()) + respondServiceError(w, err, "Failed to update instance") return } - response := &dto.InstanceResponse{ - ID: instance.ID, - ClusterID: instance.ClusterID, - Name: instance.Name, - Namespace: instance.Namespace, - RegistryID: instance.RegistryID, - Repository: instance.Repository, - Chart: instance.Chart, - Version: instance.Version, - Description: instance.Description, - Status: string(instance.Status), - StatusReason: instance.StatusReason, - LastOperation: string(instance.LastOperation), - LastError: instance.LastError, - Revision: instance.Revision, - Values: instance.Values, - CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), - UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), - } - - respondJSON(w, http.StatusOK, response) + respondJSON(w, http.StatusOK, convertInstanceResponse(instance, true)) } // DeleteInstance 删除实例 @@ -320,6 +275,152 @@ func (h *InstanceHandler) ListInstanceEntries(w http.ResponseWriter, r *http.Req respondJSON(w, http.StatusOK, responses) } +func (h *InstanceHandler) GetInstanceDiagnostics(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + clusterID := vars["cluster_id"] + instanceID := vars["instance_id"] + tailLines := int64(200) + if raw := strings.TrimSpace(r.URL.Query().Get("tailLines")); raw != "" { + parsed, err := strconv.ParseInt(raw, 10, 64) + if err != nil || parsed < 0 { + respondError(w, http.StatusBadRequest, "Invalid tailLines", "tailLines must be a positive integer") + return + } + tailLines = parsed + } else if raw := strings.TrimSpace(r.URL.Query().Get("tail_lines")); raw != "" { + parsed, err := strconv.ParseInt(raw, 10, 64) + if err != nil || parsed < 0 { + respondError(w, http.StatusBadRequest, "Invalid tail_lines", "tail_lines must be a positive integer") + return + } + tailLines = parsed + } + + diagnostics, err := h.instanceService.GetInstanceDiagnostics(r.Context(), clusterID, instanceID, tailLines) + if err != nil { + status := http.StatusInternalServerError + switch err { + case entity.ErrInstanceNotFound, entity.ErrClusterNotFound: + status = http.StatusNotFound + case entity.ErrForbidden: + status = http.StatusForbidden + } + respondError(w, status, "Failed to collect instance diagnostics", err.Error()) + return + } + respondJSON(w, http.StatusOK, convertInstanceDiagnostics(diagnostics)) +} + +func (h *InstanceHandler) StreamInstanceLogs(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + clusterID := vars["cluster_id"] + instanceID := vars["instance_id"] + + podName := strings.TrimSpace(r.URL.Query().Get("pod")) + containerName := strings.TrimSpace(r.URL.Query().Get("container")) + if podName == "" || containerName == "" { + respondError(w, http.StatusBadRequest, "Missing required query parameter", "both 'pod' and 'container' are required") + return + } + + tailLines := int64(200) + if raw := strings.TrimSpace(r.URL.Query().Get("tailLines")); raw != "" { + parsed, err := strconv.ParseInt(raw, 10, 64) + if err != nil || parsed < 0 { + respondError(w, http.StatusBadRequest, "Invalid tailLines", "tailLines must be a positive integer") + return + } + tailLines = parsed + } + + lines, errs, err := h.instanceService.StreamInstanceLogs(r.Context(), clusterID, instanceID, podName, containerName, tailLines) + if err != nil { + status := http.StatusInternalServerError + switch err { + case entity.ErrInstanceNotFound, entity.ErrClusterNotFound: + status = http.StatusNotFound + } + respondError(w, status, "Failed to stream instance logs", err.Error()) + return + } + + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + flusher, ok := w.(http.Flusher) + if !ok { + respondError(w, http.StatusInternalServerError, "Streaming not supported", "server does not support response flushing") + return + } + + for { + select { + case <-r.Context().Done(): + return + case line, open := <-lines: + if !open { + fmt.Fprintf(w, "data: [DONE]\n\n") + flusher.Flush() + return + } + fmt.Fprintf(w, "data: %s\n\n", line) + flusher.Flush() + case err, open := <-errs: + if open && err != nil { + fmt.Fprintf(w, "data: [ERROR] %s\n\n", err.Error()) + flusher.Flush() + } + } + } +} + +// ScaleInstance 扩缩容实例 +func (h *InstanceHandler) ScaleInstance(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + clusterID := vars["cluster_id"] + instanceID := vars["instance_id"] + + var req dto.ScaleInstanceRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + if req.Replicas < 0 { + respondError(w, http.StatusBadRequest, "Invalid replicas", "replicas must be >= 0") + return + } + + result, err := h.instanceService.ScaleInstance(r.Context(), clusterID, instanceID, req.Replicas, req.Workload) + if err != nil { + respondServiceError(w, err, "Failed to scale instance") + return + } + + instResp := convertInstanceResponse(result, true) + instResp.Replicas = req.Replicas + + respondJSON(w, http.StatusOK, dto.ScaleInstanceResponse{ + Instance: instResp, + Replicas: req.Replicas, + Message: fmt.Sprintf("Scaled to %d replicas", req.Replicas), + }) +} + +// GetInstanceValuesDiff 获取实例 values 差异 +func (h *InstanceHandler) GetInstanceValuesDiff(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + clusterID := vars["cluster_id"] + instanceID := vars["instance_id"] + + diff, err := h.instanceService.GetInstanceValuesDiff(r.Context(), clusterID, instanceID) + if err != nil { + respondServiceError(w, err, "Failed to get values diff") + return + } + respondJSON(w, http.StatusOK, diff) +} + func convertInstanceEntry(entry *entity.InstanceEntry) *dto.InstanceEntryResponse { portResponses := make([]dto.InstanceEntryPortResponse, 0, len(entry.Ports)) for _, port := range entry.Ports { @@ -369,3 +470,234 @@ func convertInstanceEntry(entry *entity.InstanceEntry) *dto.InstanceEntryRespons TLS: tlsResponses, } } + +func convertInstanceDiagnostics(diagnostics *entity.InstanceDiagnostics) *dto.InstanceDiagnosticsResponse { + if diagnostics == nil { + return &dto.InstanceDiagnosticsResponse{} + } + pods := make([]dto.InstancePodDiagnostics, 0, len(diagnostics.Pods)) + for _, pod := range diagnostics.Pods { + containers := make([]dto.InstanceContainerDiagnostics, 0, len(pod.Containers)) + for _, container := range pod.Containers { + containers = append(containers, dto.InstanceContainerDiagnostics{ + Name: container.Name, + Image: container.Image, + Ready: container.Ready, + RestartCount: container.RestartCount, + State: container.State, + Reason: container.Reason, + Message: container.Message, + }) + } + conditions := make([]dto.InstanceConditionDiagnostics, 0, len(pod.Conditions)) + for _, condition := range pod.Conditions { + conditions = append(conditions, dto.InstanceConditionDiagnostics{ + Type: condition.Type, + Status: condition.Status, + Reason: condition.Reason, + Message: condition.Message, + }) + } + pods = append(pods, dto.InstancePodDiagnostics{ + Name: pod.Name, + Namespace: pod.Namespace, + Phase: pod.Phase, + NodeName: pod.NodeName, + PodIP: pod.PodIP, + HostIP: pod.HostIP, + RestartCount: pod.RestartCount, + Containers: containers, + Conditions: conditions, + CreationTimestamp: formatTime(pod.CreationTimestamp), + }) + } + services := make([]dto.InstanceServiceDiagnostics, 0, len(diagnostics.Services)) + for _, svc := range diagnostics.Services { + ports := make([]dto.InstanceEntryPortResponse, 0, len(svc.Ports)) + for _, port := range svc.Ports { + ports = append(ports, dto.InstanceEntryPortResponse{ + Name: port.Name, + Protocol: port.Protocol, + Port: port.Port, + TargetPort: port.TargetPort, + NodePort: port.NodePort, + }) + } + services = append(services, dto.InstanceServiceDiagnostics{ + Name: svc.Name, + Namespace: svc.Namespace, + Type: svc.Type, + ClusterIP: svc.ClusterIP, + Ports: ports, + }) + } + events := make([]dto.InstanceEventDiagnostics, 0, len(diagnostics.Events)) + for _, event := range diagnostics.Events { + events = append(events, dto.InstanceEventDiagnostics{ + Type: event.Type, + Reason: event.Reason, + Message: event.Message, + InvolvedKind: event.InvolvedKind, + InvolvedName: event.InvolvedName, + Count: event.Count, + FirstTimestamp: formatTime(event.FirstTimestamp), + LastTimestamp: formatTime(event.LastTimestamp), + }) + } + logs := make([]dto.InstancePodLogResponse, 0, len(diagnostics.Logs)) + for _, logEntry := range diagnostics.Logs { + logs = append(logs, dto.InstancePodLogResponse{ + Pod: logEntry.Pod, + Container: logEntry.Container, + TailLines: logEntry.TailLines, + Log: logEntry.Log, + Error: logEntry.Error, + }) + } + return &dto.InstanceDiagnosticsResponse{ + InstanceName: diagnostics.InstanceName, + Namespace: diagnostics.Namespace, + Pods: pods, + Services: services, + Events: events, + Logs: logs, + CollectedAt: formatTime(diagnostics.CollectedAt), + } +} + +func formatTime(value time.Time) string { + if value.IsZero() { + return "" + } + return value.Format(time.RFC3339) +} + +func convertInstanceResponse(instance *entity.Instance, includeValues bool) *dto.InstanceResponse { + response := &dto.InstanceResponse{ + ID: instance.ID, + ClusterID: instance.ClusterID, + Name: instance.Name, + Namespace: instance.Namespace, + RegistryID: instance.RegistryID, + Repository: instance.Repository, + Chart: instance.Chart, + Version: instance.Version, + Description: instance.Description, + Status: string(instance.Status), + WorkspaceID: instance.WorkspaceID, + OwnerID: instance.OwnerID, + OwnerUsername: instance.OwnerUsername, + StatusReason: instance.StatusReason, + LastOperation: string(instance.LastOperation), + LastError: instance.LastError, + Revision: instance.Revision, + Replicas: instance.Replicas, + AllowedActions: []string{"view", "update", "delete"}, + CreatedAt: instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"), + UpdatedAt: instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"), + } + if includeValues { + response.Values = instance.Values + } + return response +} + +func parseValuesYAML(valuesYAML string) (map[string]interface{}, error) { + valuesYAML = strings.TrimSpace(valuesYAML) + if valuesYAML == "" { + return map[string]interface{}{}, nil + } + + var decoded interface{} + if err := yaml.Unmarshal([]byte(valuesYAML), &decoded); err != nil { + return nil, err + } + + normalized, err := normalizeYAMLValue(decoded) + if err != nil { + return nil, err + } + values, ok := normalized.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("values YAML must be a mapping at the top level") + } + return values, nil +} + +func parseAndCompareValues(values map[string]interface{}, valuesYAML string) (map[string]interface{}, bool, error) { + if strings.TrimSpace(valuesYAML) == "" { + return nil, false, nil + } + parsed, err := parseValuesYAML(valuesYAML) + if err != nil { + return nil, true, fmt.Errorf("invalid values YAML: %w", err) + } + if values == nil { + return parsed, true, nil + } + normalizedValues, err := normalizeJSONComparable(values) + if err != nil { + return nil, true, fmt.Errorf("invalid values: %w", err) + } + normalizedYAML, err := normalizeJSONComparable(parsed) + if err != nil { + return nil, true, fmt.Errorf("invalid values YAML: %w", err) + } + if !reflect.DeepEqual(normalizedValues, normalizedYAML) { + return nil, true, fmt.Errorf("values and valuesYaml conflict") + } + return parsed, true, nil +} + +func normalizeJSONComparable(value interface{}) (interface{}, error) { + data, err := json.Marshal(value) + if err != nil { + return nil, err + } + var normalized interface{} + if err := json.Unmarshal(data, &normalized); err != nil { + return nil, err + } + return normalized, nil +} + +func normalizeYAMLValue(value interface{}) (interface{}, error) { + switch typed := value.(type) { + case map[string]interface{}: + normalized := make(map[string]interface{}, len(typed)) + for key, child := range typed { + normalizedChild, err := normalizeYAMLValue(child) + if err != nil { + return nil, err + } + normalized[key] = normalizedChild + } + return normalized, nil + case map[interface{}]interface{}: + normalized := make(map[string]interface{}, len(typed)) + for key, child := range typed { + keyString, ok := key.(string) + if !ok { + return nil, fmt.Errorf("values YAML contains non-string key %v", key) + } + normalizedChild, err := normalizeYAMLValue(child) + if err != nil { + return nil, err + } + normalized[keyString] = normalizedChild + } + return normalized, nil + case []interface{}: + normalized := make([]interface{}, 0, len(typed)) + for _, child := range typed { + normalizedChild, err := normalizeYAMLValue(child) + if err != nil { + return nil, err + } + normalized = append(normalized, normalizedChild) + } + return normalized, nil + default: + return typed, nil + } +} diff --git a/backend/internal/adapter/input/http/rest/monitoring_handler.go b/backend/internal/adapter/input/http/rest/monitoring_handler.go index 2834168..2eb1783 100644 --- a/backend/internal/adapter/input/http/rest/monitoring_handler.go +++ b/backend/internal/adapter/input/http/rest/monitoring_handler.go @@ -43,6 +43,12 @@ func (h *MonitoringHandler) GetClusterMonitoring(w http.ResponseWriter, r *http. respondJSON(w, http.StatusOK, response) } +// GetClusterStats is a compatibility alias for cluster detail dashboards that +// historically read stats from /clusters/{id}/stats. +func (h *MonitoringHandler) GetClusterStats(w http.ResponseWriter, r *http.Request) { + h.GetClusterMonitoring(w, r) +} + // ListClusterMonitoring 获取所有集群的监控信息 // @Summary 列出集群监控 // @Tags Monitoring diff --git a/backend/internal/adapter/input/http/rest/registry_handler.go b/backend/internal/adapter/input/http/rest/registry_handler.go index f6d1a03..b413ce0 100644 --- a/backend/internal/adapter/input/http/rest/registry_handler.go +++ b/backend/internal/adapter/input/http/rest/registry_handler.go @@ -44,6 +44,10 @@ func (h *RegistryHandler) CreateRegistry(w http.ResponseWriter, r *http.Request) registry := entity.NewRegistry(req.Name, req.URL) registry.Description = req.Description registry.Insecure = req.Insecure + registry.Visibility = req.Visibility + if req.GlobalShared || req.GlobalSharedAlt { + registry.Visibility = "global_shared" + } registry.SetCredentials(req.Username, req.Password) // 调用领域服务 @@ -136,6 +140,12 @@ func (h *RegistryHandler) UpdateRegistry(w http.ResponseWriter, r *http.Request) // 更新字段 registry.Update(req.Name, req.URL, req.Description) registry.Insecure = req.Insecure + if req.Visibility != "" { + registry.Visibility = req.Visibility + } + if req.GlobalShared || req.GlobalSharedAlt { + registry.Visibility = "global_shared" + } if req.Username != "" || req.Password != "" { registry.SetCredentials(req.Username, req.Password) } diff --git a/backend/internal/adapter/input/http/rest/utils.go b/backend/internal/adapter/input/http/rest/utils.go index 6e6fcd5..82ecf4c 100644 --- a/backend/internal/adapter/input/http/rest/utils.go +++ b/backend/internal/adapter/input/http/rest/utils.go @@ -3,7 +3,7 @@ package rest import ( "encoding/json" "net/http" - + "github.com/ocdp/cluster-service/internal/adapter/input/http/dto" ) @@ -32,4 +32,3 @@ func respondSuccess(w http.ResponseWriter, message string, data interface{}) { } respondJSON(w, http.StatusOK, response) } - diff --git a/backend/internal/adapter/input/http/rest/workspace_handler.go b/backend/internal/adapter/input/http/rest/workspace_handler.go new file mode 100644 index 0000000..ac5b648 --- /dev/null +++ b/backend/internal/adapter/input/http/rest/workspace_handler.go @@ -0,0 +1,183 @@ +package rest + +import ( + "encoding/json" + "errors" + "net/http" + "time" + + "github.com/gorilla/mux" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/service" + "github.com/ocdp/cluster-service/internal/pkg/authz" +) + +type WorkspaceHandler struct { + workspaceService *service.WorkspaceService +} + +func NewWorkspaceHandler(workspaceService *service.WorkspaceService) *WorkspaceHandler { + return &WorkspaceHandler{workspaceService: workspaceService} +} + +type createWorkspaceRequest struct { + Name string `json:"name"` +} + +type workspaceResponse struct { + ID string `json:"id"` + Name string `json:"name"` + Status string `json:"status"` + K8sNamespace string `json:"k8sNamespace"` + K8sSAName string `json:"k8sSaName"` + DefaultClusterID string `json:"defaultClusterId,omitempty"` + QuotaCPU string `json:"quotaCpu,omitempty"` + QuotaMemory string `json:"quotaMemory,omitempty"` + QuotaGPU string `json:"quotaGpu,omitempty"` + QuotaGPUMem string `json:"quotaGpuMemory,omitempty"` + CreatedBy string `json:"createdBy"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` +} + +type bindClusterRequest struct { + ClusterID string `json:"clusterId"` +} + +type kubeconfigRequest struct { + ClusterID string `json:"clusterId"` + TTLSeconds int64 `json:"ttlSeconds"` +} + +func (h *WorkspaceHandler) ListWorkspaces(w http.ResponseWriter, r *http.Request) { + workspaces, err := h.workspaceService.ListWorkspaces(r.Context()) + if err != nil { + respondServiceError(w, err, "Failed to list workspaces") + return + } + response := make([]workspaceResponse, 0, len(workspaces)) + for _, workspace := range workspaces { + response = append(response, toWorkspaceResponse(workspace)) + } + respondJSON(w, http.StatusOK, response) +} + +func (h *WorkspaceHandler) CreateWorkspace(w http.ResponseWriter, r *http.Request) { + var req createWorkspaceRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + workspace, err := h.workspaceService.CreateWorkspace(r.Context(), req.Name) + if err != nil { + respondServiceError(w, err, "Failed to create workspace") + return + } + respondJSON(w, http.StatusCreated, toWorkspaceResponse(workspace)) +} + +func (h *WorkspaceHandler) InitClusterBinding(w http.ResponseWriter, r *http.Request) { + workspaceID := mux.Vars(r)["workspace_id"] + var req bindClusterRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + binding, err := h.workspaceService.EnsureClusterBinding(r.Context(), workspaceID, req.ClusterID) + if err != nil { + respondServiceError(w, err, "Failed to initialize workspace cluster binding") + return + } + respondJSON(w, http.StatusOK, binding) +} + +func (h *WorkspaceHandler) IssueKubeconfig(w http.ResponseWriter, r *http.Request) { + workspaceID := mux.Vars(r)["workspace_id"] + var req kubeconfigRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body", err.Error()) + return + } + kubeconfig, err := h.workspaceService.IssueKubeconfig(r.Context(), workspaceID, req.ClusterID, time.Duration(req.TTLSeconds)*time.Second) + if err != nil { + respondServiceError(w, err, "Failed to issue kubeconfig") + return + } + respondJSON(w, http.StatusOK, map[string]interface{}{ + "kubeconfig": kubeconfig.Kubeconfig, + "expiresAt": kubeconfig.ExpiresAt.Format(time.RFC3339), + }) +} + +func (h *WorkspaceHandler) IssueCurrentKubeconfig(w http.ResponseWriter, r *http.Request) { + clusterID := r.URL.Query().Get("clusterId") + if clusterID == "" { + clusterID = r.URL.Query().Get("cluster_id") + } + h.issueCurrentKubeconfigForCluster(w, r, clusterID) +} + +func (h *WorkspaceHandler) IssueClusterKubeconfig(w http.ResponseWriter, r *http.Request) { + clusterID := mux.Vars(r)["cluster_id"] + h.issueCurrentKubeconfigForCluster(w, r, clusterID) +} + +func (h *WorkspaceHandler) issueCurrentKubeconfigForCluster(w http.ResponseWriter, r *http.Request, clusterID string) { + kubeconfig, err := h.workspaceService.IssueCurrentKubeconfig(r.Context(), clusterID, 2*time.Hour) + if err != nil { + respondServiceError(w, err, "Failed to issue kubeconfig") + return + } + w.Header().Set("Content-Type", "application/x-yaml") + w.Header().Set("X-OCDP-Kubeconfig-Expires-At", kubeconfig.ExpiresAt.Format(time.RFC3339)) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(kubeconfig.Kubeconfig)) +} + +func (h *WorkspaceHandler) SuspendWorkspace(w http.ResponseWriter, r *http.Request) { + workspaceID := mux.Vars(r)["workspace_id"] + if err := h.workspaceService.SuspendWorkspace(r.Context(), workspaceID); err != nil { + respondServiceError(w, err, "Failed to suspend workspace") + return + } + w.WriteHeader(http.StatusNoContent) +} + +func toWorkspaceResponse(workspace *entity.Workspace) workspaceResponse { + return workspaceResponse{ + ID: workspace.ID, + Name: workspace.Name, + Status: string(workspace.Status), + K8sNamespace: workspace.K8sNamespace, + K8sSAName: workspace.K8sSAName, + DefaultClusterID: workspace.DefaultClusterID, + QuotaCPU: workspace.QuotaCPU, + QuotaMemory: workspace.QuotaMemory, + QuotaGPU: workspace.QuotaGPU, + QuotaGPUMem: workspace.QuotaGPUMem, + CreatedBy: workspace.CreatedBy, + CreatedAt: workspace.CreatedAt.Format(time.RFC3339), + UpdatedAt: workspace.UpdatedAt.Format(time.RFC3339), + } +} + +func respondServiceError(w http.ResponseWriter, err error, fallback string) { + if errors.Is(err, service.ErrQuotaExceeded) { + respondError(w, http.StatusUnprocessableEntity, "Quota exceeded", err.Error()) + return + } + switch err { + case entity.ErrUnauthorized, authz.ErrUnauthenticated: + respondError(w, http.StatusUnauthorized, "Unauthorized", err.Error()) + case entity.ErrForbidden, authz.ErrForbidden, entity.ErrUserInactive, entity.ErrWorkspaceSuspended: + respondError(w, http.StatusForbidden, "Forbidden", err.Error()) + case entity.ErrWorkspaceNamespaceConflict, entity.ErrUserHasInstances, entity.ErrWorkspaceExists, entity.ErrInstanceExists: + respondError(w, http.StatusConflict, "Conflict", err.Error()) + case entity.ErrProtectedNamespace: + respondError(w, http.StatusForbidden, "Forbidden", err.Error()) + case entity.ErrClusterNotFound, entity.ErrRegistryNotFound, entity.ErrInstanceNotFound, entity.ErrWorkspaceNotFound: + respondError(w, http.StatusNotFound, fallback, err.Error()) + default: + respondError(w, http.StatusBadRequest, fallback, err.Error()) + } +} diff --git a/backend/internal/adapter/output/factory.go b/backend/internal/adapter/output/factory.go index 8565112..321f280 100644 --- a/backend/internal/adapter/output/factory.go +++ b/backend/internal/adapter/output/factory.go @@ -96,6 +96,36 @@ func (f *AdapterFactory) CreateInstanceRepository() (repository.InstanceReposito return postgres.NewInstanceRepository(f.db), nil } +func (f *AdapterFactory) CreateWorkspaceRepository() (repository.WorkspaceRepository, error) { + if f.mode == ModeMock { + return mock.NewWorkspaceRepositoryMock(), nil + } + if err := f.ensureDBConnection(); err != nil { + return nil, err + } + return postgres.NewWorkspaceRepository(f.db), nil +} + +func (f *AdapterFactory) CreateWorkspaceClusterBindingRepository() (repository.WorkspaceClusterBindingRepository, error) { + if f.mode == ModeMock { + return mock.NewWorkspaceClusterBindingRepositoryMock(), nil + } + if err := f.ensureDBConnection(); err != nil { + return nil, err + } + return postgres.NewWorkspaceClusterBindingRepository(f.db), nil +} + +func (f *AdapterFactory) CreateAuditLogRepository() (repository.AuditLogRepository, error) { + if f.mode == ModeMock { + return mock.NewAuditLogRepositoryMock(), nil + } + if err := f.ensureDBConnection(); err != nil { + return nil, err + } + return postgres.NewAuditLogRepository(f.db), nil +} + // CreateOCIClient 创建 OCI 客户端 func (f *AdapterFactory) CreateOCIClient() (repository.OCIClient, error) { if f.mode == ModeMock { @@ -127,6 +157,20 @@ func (f *AdapterFactory) CreateEntryClient() repository.InstanceEntryClient { return k8s.NewEntryClient() } +func (f *AdapterFactory) CreateDiagnosticsClient() repository.InstanceDiagnosticsClient { + if f.mode == ModeMock { + return k8s.NewMockDiagnosticsClient() + } + return k8s.NewDiagnosticsClient() +} + +func (f *AdapterFactory) CreateTenantKubeClient() repository.TenantKubeClient { + if f.mode == ModeMock { + return k8s.NewMockTenantClient() + } + return k8s.NewTenantClient() +} + // CreateAllRepositories 一次性创建所有 Repositories func (f *AdapterFactory) CreateAllRepositories() (*Repositories, error) { userRepo, err := f.CreateUserRepository() @@ -149,6 +193,21 @@ func (f *AdapterFactory) CreateAllRepositories() (*Repositories, error) { return nil, fmt.Errorf("failed to create instance repository: %w", err) } + workspaceRepo, err := f.CreateWorkspaceRepository() + if err != nil { + return nil, fmt.Errorf("failed to create workspace repository: %w", err) + } + + bindingRepo, err := f.CreateWorkspaceClusterBindingRepository() + if err != nil { + return nil, fmt.Errorf("failed to create workspace cluster binding repository: %w", err) + } + + auditRepo, err := f.CreateAuditLogRepository() + if err != nil { + return nil, fmt.Errorf("failed to create audit log repository: %w", err) + } + ociClient, err := f.CreateOCIClient() if err != nil { return nil, fmt.Errorf("failed to create OCI client: %w", err) @@ -162,29 +221,41 @@ func (f *AdapterFactory) CreateAllRepositories() (*Repositories, error) { // 创建 Metrics client(依赖 clusterRepo) metricsClient := f.CreateMetricsClient(clusterRepo) entryClient := f.CreateEntryClient() + diagnosticsClient := f.CreateDiagnosticsClient() + tenantClient := f.CreateTenantKubeClient() return &Repositories{ - UserRepo: userRepo, - ClusterRepo: clusterRepo, - RegistryRepo: registryRepo, - InstanceRepo: instanceRepo, - OCIClient: ociClient, - HelmClient: helmClient, - MetricsClient: metricsClient, - EntryClient: entryClient, + UserRepo: userRepo, + WorkspaceRepo: workspaceRepo, + BindingRepo: bindingRepo, + AuditRepo: auditRepo, + ClusterRepo: clusterRepo, + RegistryRepo: registryRepo, + InstanceRepo: instanceRepo, + OCIClient: ociClient, + HelmClient: helmClient, + MetricsClient: metricsClient, + EntryClient: entryClient, + DiagnosticsClient: diagnosticsClient, + TenantKubeClient: tenantClient, }, nil } // Repositories 所有仓储的集合 type Repositories struct { - UserRepo repository.UserRepository - ClusterRepo repository.ClusterRepository - RegistryRepo repository.RegistryRepository - InstanceRepo repository.InstanceRepository - OCIClient repository.OCIClient - HelmClient repository.HelmClient - MetricsClient repository.MetricsClient - EntryClient repository.InstanceEntryClient + UserRepo repository.UserRepository + WorkspaceRepo repository.WorkspaceRepository + BindingRepo repository.WorkspaceClusterBindingRepository + AuditRepo repository.AuditLogRepository + ClusterRepo repository.ClusterRepository + RegistryRepo repository.RegistryRepository + InstanceRepo repository.InstanceRepository + OCIClient repository.OCIClient + HelmClient repository.HelmClient + MetricsClient repository.MetricsClient + EntryClient repository.InstanceEntryClient + DiagnosticsClient repository.InstanceDiagnosticsClient + TenantKubeClient repository.TenantKubeClient } // ensureDBConnection 确保数据库连接已建立 diff --git a/backend/internal/adapter/output/helm/mock/helm_client_mock.go b/backend/internal/adapter/output/helm/mock/helm_client_mock.go index ce00280..cd80ae8 100644 --- a/backend/internal/adapter/output/helm/mock/helm_client_mock.go +++ b/backend/internal/adapter/output/helm/mock/helm_client_mock.go @@ -4,7 +4,7 @@ import ( "context" "fmt" "time" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" ) @@ -12,38 +12,47 @@ import ( // HelmClientMock Helm 客户端 Mock 实现 type HelmClientMock struct { // Mock 数据存储 - releases map[string]map[string]*entity.Instance // clusterID -> releaseName -> instance - history map[string]map[string][]*entity.ReleaseHistory // clusterID -> releaseName -> []history + releases map[string]map[string]*entity.Instance // clusterID -> releaseName -> instance + history map[string]map[string][]*entity.ReleaseHistory // clusterID -> releaseName -> []history + estimates map[string]map[string]*repository.ResourceEstimate // clusterID -> releaseName -> estimate } // NewHelmClientMock 创建 Mock 实现 func NewHelmClientMock() repository.HelmClient { return &HelmClientMock{ - releases: make(map[string]map[string]*entity.Instance), - history: make(map[string]map[string][]*entity.ReleaseHistory), + releases: make(map[string]map[string]*entity.Instance), + history: make(map[string]map[string][]*entity.ReleaseHistory), + estimates: make(map[string]map[string]*repository.ResourceEstimate), } } +func (c *HelmClientMock) SetResourceEstimate(clusterID, namespace, releaseName string, estimate *repository.ResourceEstimate) { + if c.estimates[clusterID] == nil { + c.estimates[clusterID] = make(map[string]*repository.ResourceEstimate) + } + c.estimates[clusterID][fmt.Sprintf("%s/%s", namespace, releaseName)] = estimate +} + func (c *HelmClientMock) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { // 初始化集群数据 if c.releases[cluster.ID] == nil { c.releases[cluster.ID] = make(map[string]*entity.Instance) c.history[cluster.ID] = make(map[string][]*entity.ReleaseHistory) } - + // 检查是否已存在 key := fmt.Sprintf("%s/%s", instance.Namespace, instance.Name) if _, exists := c.releases[cluster.ID][key]; exists { return entity.ErrInstanceExists } - + // Mock 安装 instance.Status = entity.StatusDeployed instance.Revision = 1 instance.UpdatedAt = time.Now() - + c.releases[cluster.ID][key] = instance - + // 添加历史记录 c.history[cluster.ID][key] = []*entity.ReleaseHistory{ { @@ -55,25 +64,25 @@ func (c *HelmClientMock) Install(ctx context.Context, cluster *entity.Cluster, i Description: "Install complete", }, } - + return nil } func (c *HelmClientMock) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { key := fmt.Sprintf("%s/%s", instance.Namespace, instance.Name) - + existing, exists := c.releases[cluster.ID][key] if !exists { return entity.ErrInstanceNotFound } - + // Mock 升级 instance.Revision = existing.Revision + 1 instance.Status = entity.StatusDeployed instance.UpdatedAt = time.Now() - + c.releases[cluster.ID][key] = instance - + // 添加历史记录 history := &entity.ReleaseHistory{ Revision: instance.Revision, @@ -84,44 +93,44 @@ func (c *HelmClientMock) Upgrade(ctx context.Context, cluster *entity.Cluster, i Description: "Upgrade complete", } c.history[cluster.ID][key] = append(c.history[cluster.ID][key], history) - + return nil } func (c *HelmClientMock) Uninstall(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) error { key := fmt.Sprintf("%s/%s", namespace, releaseName) - + if _, exists := c.releases[cluster.ID][key]; !exists { return entity.ErrInstanceNotFound } - + // Mock 卸载 delete(c.releases[cluster.ID], key) - + return nil } func (c *HelmClientMock) Rollback(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string, revision int) error { key := fmt.Sprintf("%s/%s", namespace, releaseName) - + instance, exists := c.releases[cluster.ID][key] if !exists { return entity.ErrInstanceNotFound } - + // 检查历史记录是否存在 histories := c.history[cluster.ID][key] if revision > len(histories) || revision < 1 { return fmt.Errorf("revision %d not found", revision) } - + // Mock 回滚 instance.Revision = len(histories) + 1 instance.Status = entity.StatusDeployed instance.UpdatedAt = time.Now() - + c.releases[cluster.ID][key] = instance - + // 添加回滚历史记录 history := &entity.ReleaseHistory{ Revision: instance.Revision, @@ -132,33 +141,33 @@ func (c *HelmClientMock) Rollback(ctx context.Context, cluster *entity.Cluster, Description: fmt.Sprintf("Rollback to revision %d", revision), } c.history[cluster.ID][key] = append(c.history[cluster.ID][key], history) - + return nil } func (c *HelmClientMock) GetStatus(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (*entity.Instance, error) { key := fmt.Sprintf("%s/%s", namespace, releaseName) - + instance, exists := c.releases[cluster.ID][key] if !exists { return nil, entity.ErrInstanceNotFound } - + return instance, nil } func (c *HelmClientMock) GetHistory(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) ([]*entity.ReleaseHistory, error) { key := fmt.Sprintf("%s/%s", namespace, releaseName) - + if _, exists := c.releases[cluster.ID][key]; !exists { return nil, entity.ErrInstanceNotFound } - + histories := c.history[cluster.ID][key] if histories == nil { return []*entity.ReleaseHistory{}, nil } - + return histories, nil } @@ -167,7 +176,7 @@ func (c *HelmClientMock) List(ctx context.Context, cluster *entity.Cluster, name if clusterReleases == nil { return []*entity.Instance{}, nil } - + instances := make([]*entity.Instance, 0) for key, instance := range clusterReleases { // 如果指定了 namespace,只返回该 namespace 的 @@ -179,18 +188,41 @@ func (c *HelmClientMock) List(ctx context.Context, cluster *entity.Cluster, name } instances = append(instances, c.releases[cluster.ID][key]) } - + return instances, nil } func (c *HelmClientMock) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) { key := fmt.Sprintf("%s/%s", namespace, releaseName) - + instance, exists := c.releases[cluster.ID][key] if !exists { return nil, entity.ErrInstanceNotFound } - + return instance.Values, nil } +func (c *HelmClientMock) GetChartDefaultValues(chartPath string) (map[string]interface{}, error) { + return map[string]interface{}{ + "replicaCount": 1, + "image": map[string]interface{}{ + "repository": "nginx", + "tag": "latest", + }, + }, nil +} + +func (c *HelmClientMock) EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*repository.ResourceEstimate, error) { + clusterID := "" + if cluster != nil { + clusterID = cluster.ID + } + key := fmt.Sprintf("%s/%s", instance.Namespace, instance.Name) + if c.estimates[clusterID] != nil { + if estimate := c.estimates[clusterID][key]; estimate != nil { + return estimate, nil + } + } + return &repository.ResourceEstimate{}, nil +} diff --git a/backend/internal/adapter/output/helm/real/helm_client.go b/backend/internal/adapter/output/helm/real/helm_client.go index 4d95fda..0599283 100644 --- a/backend/internal/adapter/output/helm/real/helm_client.go +++ b/backend/internal/adapter/output/helm/real/helm_client.go @@ -10,6 +10,7 @@ import ( "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + domainservice "github.com/ocdp/cluster-service/internal/domain/service" "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/chart/loader" "helm.sh/helm/v3/pkg/cli" @@ -21,6 +22,7 @@ import ( "k8s.io/client-go/rest" "k8s.io/client-go/restmapper" "k8s.io/client-go/tools/clientcmd" + clientcmdapi "k8s.io/client-go/tools/clientcmd/api" ) // HelmClient 真实的 Helm 客户端实现 @@ -36,39 +38,45 @@ func NewHelmClient() repository.HelmClient { } // getActionConfig 获取 Helm action configuration -func (h *HelmClient) getActionConfig(cluster *entity.Cluster, namespace string) (*action.Configuration, error) { +func (h *HelmClient) getActionConfig(cluster *entity.Cluster, namespace string) (*action.Configuration, func(), error) { actionConfig := new(action.Configuration) // 创建临时 kubeconfig 文件 kubeconfigContent := cluster.GetKubeConfig() tmpDir, err := os.MkdirTemp("", "helm-kubeconfig-*") if err != nil { - return nil, fmt.Errorf("failed to create temp dir: %w", err) + return nil, nil, fmt.Errorf("failed to create temp dir: %w", err) + } + cleanup := func() { + _ = os.RemoveAll(tmpDir) } kubeconfigPath := filepath.Join(tmpDir, "kubeconfig") if err := os.WriteFile(kubeconfigPath, []byte(kubeconfigContent), 0600); err != nil { - return nil, fmt.Errorf("failed to write kubeconfig: %w", err) + cleanup() + return nil, nil, fmt.Errorf("failed to write kubeconfig: %w", err) } // 使用 kubeconfig 初始化 action config if err := actionConfig.Init( - &kubeconfigGetter{kubeconfigPath: kubeconfigPath}, + &kubeconfigGetter{kubeconfigPath: kubeconfigPath, namespace: namespace}, namespace, os.Getenv("HELM_DRIVER"), // storage driver: configmap, secret, memory func(format string, v ...interface{}) { // Log function }, ); err != nil { - return nil, fmt.Errorf("failed to initialize action config: %w", err) + cleanup() + return nil, nil, fmt.Errorf("failed to initialize action config: %w", err) } - return actionConfig, nil + return actionConfig, cleanup, nil } // kubeconfigGetter implements RESTClientGetter type kubeconfigGetter struct { kubeconfigPath string + namespace string } func (k *kubeconfigGetter) ToRESTConfig() (*rest.Config, error) { @@ -95,25 +103,30 @@ func (k *kubeconfigGetter) ToRESTMapper() (meta.RESTMapper, error) { } func (k *kubeconfigGetter) ToRawKubeConfigLoader() clientcmd.ClientConfig { + overrides := &clientcmd.ConfigOverrides{} + if k.namespace != "" { + overrides.Context = clientcmdapi.Context{Namespace: k.namespace} + } return clientcmd.NewNonInteractiveDeferredLoadingClientConfig( &clientcmd.ClientConfigLoadingRules{ExplicitPath: k.kubeconfigPath}, - &clientcmd.ConfigOverrides{}, + overrides, ) } // Install 安装 Helm Chart func (h *HelmClient) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { - actionConfig, err := h.getActionConfig(cluster, instance.Namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, instance.Namespace) if err != nil { return err } + defer cleanup() install := action.NewInstall(actionConfig) install.ReleaseName = instance.Name install.Namespace = instance.Namespace install.CreateNamespace = true install.Wait = true - install.Timeout = 5 * time.Minute + install.Timeout = helmOperationTimeout() // 加载 Chart(从本地路径或 OCI registry) // 这里简化处理,假设 chart 已经被拉取到本地 @@ -139,15 +152,17 @@ func (h *HelmClient) Install(ctx context.Context, cluster *entity.Cluster, insta // Upgrade 升级 Helm Release func (h *HelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { - actionConfig, err := h.getActionConfig(cluster, instance.Namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, instance.Namespace) if err != nil { return err } + defer cleanup() upgrade := action.NewUpgrade(actionConfig) upgrade.Namespace = instance.Namespace + upgrade.ReuseValues = true upgrade.Wait = true - upgrade.Timeout = 5 * time.Minute + upgrade.Timeout = helmOperationTimeout() // 加载 Chart chartPath := fmt.Sprintf("/tmp/charts/%s-%s.tgz", instance.Chart, instance.Version) @@ -172,14 +187,15 @@ func (h *HelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, insta // Uninstall 卸载 Helm Release func (h *HelmClient) Uninstall(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) error { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return err } + defer cleanup() uninstall := action.NewUninstall(actionConfig) uninstall.Wait = true - uninstall.Timeout = 5 * time.Minute + uninstall.Timeout = helmOperationTimeout() _, err = uninstall.Run(releaseName) if err != nil { @@ -194,15 +210,16 @@ func (h *HelmClient) Uninstall(ctx context.Context, cluster *entity.Cluster, rel // Rollback 回滚 Helm Release func (h *HelmClient) Rollback(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string, revision int) error { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return err } + defer cleanup() rollback := action.NewRollback(actionConfig) rollback.Version = revision rollback.Wait = true - rollback.Timeout = 5 * time.Minute + rollback.Timeout = helmOperationTimeout() if err := rollback.Run(releaseName); err != nil { return fmt.Errorf("failed to rollback release: %w", err) @@ -211,12 +228,25 @@ func (h *HelmClient) Rollback(ctx context.Context, cluster *entity.Cluster, rele return nil } +func helmOperationTimeout() time.Duration { + raw := os.Getenv("HELM_OPERATION_TIMEOUT") + if raw == "" { + return 15 * time.Minute + } + timeout, err := time.ParseDuration(raw) + if err != nil || timeout <= 0 { + return 15 * time.Minute + } + return timeout +} + // GetStatus 获取 Release 状态 func (h *HelmClient) GetStatus(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (*entity.Instance, error) { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return nil, err } + defer cleanup() status := action.NewStatus(actionConfig) rel, err := status.Run(releaseName) @@ -229,10 +259,11 @@ func (h *HelmClient) GetStatus(ctx context.Context, cluster *entity.Cluster, rel // GetHistory 获取 Release 历史 func (h *HelmClient) GetHistory(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) ([]*entity.ReleaseHistory, error) { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return nil, err } + defer cleanup() history := action.NewHistory(actionConfig) history.Max = 256 @@ -259,10 +290,11 @@ func (h *HelmClient) GetHistory(ctx context.Context, cluster *entity.Cluster, re // List 列出集群中的所有 Releases func (h *HelmClient) List(ctx context.Context, cluster *entity.Cluster, namespace string) ([]*entity.Instance, error) { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return nil, err } + defer cleanup() list := action.NewList(actionConfig) if namespace == "" { @@ -284,12 +316,14 @@ func (h *HelmClient) List(ctx context.Context, cluster *entity.Cluster, namespac // GetValues 获取 Release 的 values func (h *HelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) { - actionConfig, err := h.getActionConfig(cluster, namespace) + actionConfig, cleanup, err := h.getActionConfig(cluster, namespace) if err != nil { return nil, err } + defer cleanup() getValues := action.NewGetValues(actionConfig) + getValues.AllValues = true values, err := getValues.Run(releaseName) if err != nil { return nil, fmt.Errorf("failed to get values: %w", err) @@ -298,6 +332,56 @@ func (h *HelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, rel return values, nil } +// GetChartDefaultValues 从 chart 包中读取默认 values +func (h *HelmClient) GetChartDefaultValues(chartPath string) (map[string]interface{}, error) { + chart, err := loader.Load(chartPath) + if err != nil { + return nil, fmt.Errorf("failed to load chart: %w", err) + } + vals := make(map[string]interface{}) + if chart.Values != nil { + for k, v := range chart.Values { + vals[k] = v + } + } + return vals, nil +} + +func (h *HelmClient) EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*repository.ResourceEstimate, error) { + chartPath := fmt.Sprintf("/tmp/charts/%s-%s.tgz", instance.Chart, instance.Version) + chart, err := loader.Load(chartPath) + if err != nil { + return nil, fmt.Errorf("failed to load chart: %w", err) + } + actionConfig := new(action.Configuration) + actionConfig.Log = func(format string, v ...interface{}) {} + + install := action.NewInstall(actionConfig) + install.ReleaseName = instance.Name + if install.ReleaseName == "" { + install.ReleaseName = "quota-precheck" + } + install.Namespace = instance.Namespace + if install.Namespace == "" { + install.Namespace = "default" + } + install.DryRun = true + install.DryRunOption = "client" + install.ClientOnly = true + install.Replace = true + install.SkipSchemaValidation = true + + values := instance.Values + if values == nil { + values = map[string]interface{}{} + } + release, err := install.RunWithContext(ctx, chart, values) + if err != nil { + return nil, fmt.Errorf("failed to render chart for quota estimate: %w", err) + } + return domainservice.EstimateRenderedManifestResources(release.Manifest) +} + // convertReleaseToInstance 转换 Helm Release 为 Instance func (h *HelmClient) convertReleaseToInstance(rel *release.Release) *entity.Instance { return &entity.Instance{ diff --git a/backend/internal/adapter/output/helm/real/helm_client_test.go b/backend/internal/adapter/output/helm/real/helm_client_test.go new file mode 100644 index 0000000..249f7d2 --- /dev/null +++ b/backend/internal/adapter/output/helm/real/helm_client_test.go @@ -0,0 +1,45 @@ +package real + +import ( + "os" + "path/filepath" + "testing" +) + +func TestKubeconfigGetterOverridesNamespace(t *testing.T) { + t.Parallel() + + kubeconfigPath := filepath.Join(t.TempDir(), "kubeconfig") + kubeconfig := `apiVersion: v1 +kind: Config +clusters: +- cluster: + server: https://127.0.0.1:6443 + name: test +contexts: +- context: + cluster: test + user: test + name: test +current-context: test +users: +- name: test + user: + token: test +` + if err := os.WriteFile(kubeconfigPath, []byte(kubeconfig), 0600); err != nil { + t.Fatalf("failed to write kubeconfig: %v", err) + } + getter := &kubeconfigGetter{ + kubeconfigPath: kubeconfigPath, + namespace: "ocdp-u-alice", + } + + namespace, _, err := getter.ToRawKubeConfigLoader().Namespace() + if err != nil { + t.Fatalf("Namespace returned error: %v", err) + } + if namespace != "ocdp-u-alice" { + t.Fatalf("expected namespace override %q, got %q", "ocdp-u-alice", namespace) + } +} diff --git a/backend/internal/adapter/output/k8s/diagnostics_client.go b/backend/internal/adapter/output/k8s/diagnostics_client.go new file mode 100644 index 0000000..8ff484f --- /dev/null +++ b/backend/internal/adapter/output/k8s/diagnostics_client.go @@ -0,0 +1,374 @@ +package k8s + +import ( + "bufio" + "context" + "fmt" + "io" + "sort" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +type DiagnosticsClient struct{} + +func NewDiagnosticsClient() repository.InstanceDiagnosticsClient { + return &DiagnosticsClient{} +} + +type MockDiagnosticsClient struct{} + +func NewMockDiagnosticsClient() repository.InstanceDiagnosticsClient { + return &MockDiagnosticsClient{} +} + +func (*MockDiagnosticsClient) GetDiagnostics(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance, tailLines int64) (*entity.InstanceDiagnostics, error) { + return &entity.InstanceDiagnostics{ + InstanceName: instance.Name, + Namespace: instance.Namespace, + CollectedAt: time.Now(), + }, nil +} + +func (*MockDiagnosticsClient) StreamPodLogs(ctx context.Context, cluster *entity.Cluster, namespace, podName, containerName string, tailLines int64) (<-chan string, <-chan error, error) { + lines := make(chan string, 10) + errs := make(chan error, 1) + go func() { + defer close(lines) + defer close(errs) + select { + case <-ctx.Done(): + return + case lines <- "[mock] Streaming pod logs...": + case lines <- "[mock] Container started successfully": + case lines <- "[mock] Listening on :8080": + } + }() + return lines, errs, nil +} + +func (c *DiagnosticsClient) GetDiagnostics(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance, tailLines int64) (*entity.InstanceDiagnostics, error) { + clientset, err := diagnosticsClientset(cluster) + if err != nil { + return nil, err + } + if tailLines <= 0 { + tailLines = 200 + } + if tailLines > 2000 { + tailLines = 2000 + } + + pods, err := listInstancePods(ctx, clientset, instance) + if err != nil { + return nil, err + } + services, err := listInstanceServices(ctx, clientset, instance) + if err != nil { + return nil, err + } + events, err := listInstanceEvents(ctx, clientset, instance, pods, services) + if err != nil { + return nil, err + } + logs := collectPodLogs(ctx, clientset, pods, tailLines) + + return &entity.InstanceDiagnostics{ + InstanceName: instance.Name, + Namespace: instance.Namespace, + Pods: convertPodsToDiagnostics(pods), + Services: convertServicesToDiagnostics(services), + Events: convertEventsToDiagnostics(events), + Logs: logs, + CollectedAt: time.Now(), + }, nil +} + +func (c *DiagnosticsClient) StreamPodLogs(ctx context.Context, cluster *entity.Cluster, namespace, podName, containerName string, tailLines int64) (<-chan string, <-chan error, error) { + clientset, err := diagnosticsClientset(cluster) + if err != nil { + return nil, nil, err + } + if tailLines <= 0 { + tailLines = 200 + } + if tailLines > 2000 { + tailLines = 2000 + } + + req := clientset.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{ + Container: containerName, + Follow: true, + TailLines: &tailLines, + }) + + stream, err := req.Stream(ctx) + if err != nil { + return nil, nil, fmt.Errorf("failed to open log stream for %s/%s: %w", podName, containerName, err) + } + + lines := make(chan string, 64) + errs := make(chan error, 1) + + go func() { + defer close(lines) + defer close(errs) + defer func() { _ = stream.Close() }() + + scanner := bufio.NewScanner(stream) + // Allow long lines; Kubernetes log entries can exceed the default 64 KiB + scanner.Buffer(make([]byte, 0, 64*1024), 2*1024*1024) + + for scanner.Scan() { + select { + case <-ctx.Done(): + return + default: + } + line := scanner.Text() + if line == "" { + continue + } + select { + case lines <- line: + case <-ctx.Done(): + return + } + } + if err := scanner.Err(); err != nil { + select { + case errs <- err: + case <-ctx.Done(): + } + } + }() + + return lines, errs, nil +} + +func diagnosticsClientset(cluster *entity.Cluster) (kubernetes.Interface, error) { + config, err := restConfigFromCluster(cluster) + if err != nil { + return nil, err + } + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create diagnostics kubernetes client: %w", err) + } + return clientset, nil +} + +func listInstancePods(ctx context.Context, clientset kubernetes.Interface, instance *entity.Instance) ([]corev1.Pod, error) { + selector := fmt.Sprintf("app.kubernetes.io/instance=%s", instance.Name) + pods, err := clientset.CoreV1().Pods(instance.Namespace).List(ctx, metav1.ListOptions{LabelSelector: selector}) + if err != nil { + return nil, fmt.Errorf("failed to list instance pods: %w", err) + } + if len(pods.Items) > 0 { + return pods.Items, nil + } + all, err := clientset.CoreV1().Pods(instance.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list namespace pods: %w", err) + } + filtered := make([]corev1.Pod, 0) + for _, pod := range all.Items { + if resourceMatchesInstance(pod.ObjectMeta, instance) { + filtered = append(filtered, pod) + } + } + return filtered, nil +} + +func listInstanceServices(ctx context.Context, clientset kubernetes.Interface, instance *entity.Instance) ([]corev1.Service, error) { + selector := fmt.Sprintf("app.kubernetes.io/instance=%s", instance.Name) + services, err := clientset.CoreV1().Services(instance.Namespace).List(ctx, metav1.ListOptions{LabelSelector: selector}) + if err != nil { + return nil, fmt.Errorf("failed to list instance services: %w", err) + } + if len(services.Items) > 0 { + return services.Items, nil + } + all, err := clientset.CoreV1().Services(instance.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list namespace services: %w", err) + } + filtered := make([]corev1.Service, 0) + for _, svc := range all.Items { + if resourceMatchesInstance(svc.ObjectMeta, instance) { + filtered = append(filtered, svc) + } + } + return filtered, nil +} + +func listInstanceEvents(ctx context.Context, clientset kubernetes.Interface, instance *entity.Instance, pods []corev1.Pod, services []corev1.Service) ([]corev1.Event, error) { + events, err := clientset.CoreV1().Events(instance.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list instance events: %w", err) + } + names := map[string]bool{instance.Name: true} + for _, pod := range pods { + names[pod.Name] = true + } + for _, svc := range services { + names[svc.Name] = true + } + filtered := make([]corev1.Event, 0) + for _, event := range events.Items { + if names[event.InvolvedObject.Name] || strings.Contains(event.Message, instance.Name) { + filtered = append(filtered, event) + } + } + sort.SliceStable(filtered, func(i, j int) bool { + return filtered[i].LastTimestamp.Time.After(filtered[j].LastTimestamp.Time) + }) + if len(filtered) > 100 { + filtered = filtered[:100] + } + return filtered, nil +} + +func collectPodLogs(ctx context.Context, clientset kubernetes.Interface, pods []corev1.Pod, tailLines int64) []entity.InstancePodLog { + logs := make([]entity.InstancePodLog, 0) + for _, pod := range pods { + for _, container := range pod.Spec.Containers { + item := entity.InstancePodLog{Pod: pod.Name, Container: container.Name, TailLines: tailLines} + req := clientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ + Container: container.Name, + TailLines: &tailLines, + }) + stream, err := req.Stream(ctx) + if err != nil { + item.Error = err.Error() + logs = append(logs, item) + continue + } + data, err := io.ReadAll(io.LimitReader(stream, 1<<20)) + _ = stream.Close() + if err != nil { + item.Error = err.Error() + } else { + item.Log = string(data) + } + logs = append(logs, item) + } + } + return logs +} + +func convertPodsToDiagnostics(pods []corev1.Pod) []entity.InstancePodDiagnostics { + out := make([]entity.InstancePodDiagnostics, 0, len(pods)) + for _, pod := range pods { + containers := make([]entity.InstanceContainerDiagnostics, 0, len(pod.Status.ContainerStatuses)) + var restarts int32 + for _, status := range pod.Status.ContainerStatuses { + restarts += status.RestartCount + containers = append(containers, entity.InstanceContainerDiagnostics{ + Name: status.Name, + Image: status.Image, + Ready: status.Ready, + RestartCount: status.RestartCount, + State: containerStateName(status.State), + Reason: containerStateReason(status.State), + Message: containerStateMessage(status.State), + }) + } + conditions := make([]entity.InstanceConditionDiagnostics, 0, len(pod.Status.Conditions)) + for _, condition := range pod.Status.Conditions { + conditions = append(conditions, entity.InstanceConditionDiagnostics{ + Type: string(condition.Type), + Status: string(condition.Status), + Reason: condition.Reason, + Message: condition.Message, + }) + } + out = append(out, entity.InstancePodDiagnostics{ + Name: pod.Name, + Namespace: pod.Namespace, + Phase: string(pod.Status.Phase), + NodeName: pod.Spec.NodeName, + PodIP: pod.Status.PodIP, + HostIP: pod.Status.HostIP, + RestartCount: restarts, + Containers: containers, + Conditions: conditions, + CreationTimestamp: pod.CreationTimestamp.Time, + }) + } + return out +} + +func convertServicesToDiagnostics(services []corev1.Service) []entity.InstanceServiceDiagnostics { + out := make([]entity.InstanceServiceDiagnostics, 0, len(services)) + for _, svc := range services { + entry := convertServiceToEntry(&svc) + out = append(out, entity.InstanceServiceDiagnostics{ + Name: svc.Name, + Namespace: svc.Namespace, + Type: string(svc.Spec.Type), + ClusterIP: svc.Spec.ClusterIP, + Ports: entry.Ports, + }) + } + return out +} + +func convertEventsToDiagnostics(events []corev1.Event) []entity.InstanceEventDiagnostics { + out := make([]entity.InstanceEventDiagnostics, 0, len(events)) + for _, event := range events { + out = append(out, entity.InstanceEventDiagnostics{ + Type: event.Type, + Reason: event.Reason, + Message: event.Message, + InvolvedKind: event.InvolvedObject.Kind, + InvolvedName: event.InvolvedObject.Name, + Count: event.Count, + FirstTimestamp: event.FirstTimestamp.Time, + LastTimestamp: event.LastTimestamp.Time, + }) + } + return out +} + +func containerStateName(state corev1.ContainerState) string { + switch { + case state.Running != nil: + return "running" + case state.Waiting != nil: + return "waiting" + case state.Terminated != nil: + return "terminated" + default: + return "unknown" + } +} + +func containerStateReason(state corev1.ContainerState) string { + switch { + case state.Waiting != nil: + return state.Waiting.Reason + case state.Terminated != nil: + return state.Terminated.Reason + default: + return "" + } +} + +func containerStateMessage(state corev1.ContainerState) string { + switch { + case state.Waiting != nil: + return state.Waiting.Message + case state.Terminated != nil: + return state.Terminated.Message + default: + return "" + } +} diff --git a/backend/internal/adapter/output/k8s/metrics_client.go b/backend/internal/adapter/output/k8s/metrics_client.go index 4499d99..38efe18 100644 --- a/backend/internal/adapter/output/k8s/metrics_client.go +++ b/backend/internal/adapter/output/k8s/metrics_client.go @@ -63,7 +63,7 @@ func (c *MetricsClient) GetClusterMetrics(ctx context.Context, clusterID string) // 计算集群级别汇总 metrics := c.aggregateClusterMetrics(cluster, nodes.Items, pods.Items, nodeMetrics) - + return metrics, nil } @@ -87,6 +87,37 @@ func (c *MetricsClient) GetNodeMetrics(ctx context.Context, clusterID string) ([ return c.getNodeMetricsData(ctx, clientset, metricsClient, nodes.Items) } +// GetPodResourceAllocations returns Kubernetes Pod requests/limits without +// inventing utilization values. GPU memory is treated as vendor integer MB. +func (c *MetricsClient) GetPodResourceAllocations(ctx context.Context, clusterID string) ([]*entity.PodResourceAllocation, error) { + cluster, err := c.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, fmt.Errorf("failed to get cluster: %w", err) + } + + clientset, _, err := c.createK8sClients(cluster) + if err != nil { + return nil, fmt.Errorf("failed to create k8s client: %w", err) + } + + pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list pods: %w", err) + } + + result := make([]*entity.PodResourceAllocation, 0, len(pods.Items)) + for _, pod := range pods.Items { + result = append(result, &entity.PodResourceAllocation{ + ClusterID: clusterID, + Namespace: pod.Namespace, + PodName: pod.Name, + InstanceName: inferHelmReleaseName(pod.Labels), + Allocation: podResourceAllocation(&pod), + }) + } + return result, nil +} + // createK8sClients 创建 Kubernetes 客户端 func (c *MetricsClient) createK8sClients(cluster *entity.Cluster) (*kubernetes.Clientset, *metricsv.Clientset, error) { config, err := clientcmd.RESTConfigFromKubeConfig([]byte(cluster.GetKubeConfig())) @@ -127,14 +158,14 @@ func (c *MetricsClient) getNodeMetricsData( for _, node := range nodes { nodeMetric := &entity.NodeMetrics{ - NodeName: node.Name, - Status: getNodeStatus(&node), - Role: getNodeRole(&node), - Age: getNodeAge(&node), - OSImage: node.Status.NodeInfo.OSImage, - KernelVersion: node.Status.NodeInfo.KernelVersion, - ContainerRuntime: node.Status.NodeInfo.ContainerRuntimeVersion, - KubeletVersion: node.Status.NodeInfo.KubeletVersion, + NodeName: node.Name, + Status: getNodeStatus(&node), + Role: getNodeRole(&node), + Age: getNodeAge(&node), + OSImage: node.Status.NodeInfo.OSImage, + KernelVersion: node.Status.NodeInfo.KernelVersion, + ContainerRuntime: node.Status.NodeInfo.ContainerRuntimeVersion, + KubeletVersion: node.Status.NodeInfo.KubeletVersion, } // CPU @@ -213,7 +244,7 @@ func (c *MetricsClient) aggregateClusterMetrics( var totalCPU, totalMem, usedCPU, usedMem int64 var totalGPU, usedGPU int healthyNodes := 0 - + // 单机最大值 var maxNodeCPU, maxNodeMem int64 var maxNodeGPU int @@ -251,7 +282,7 @@ func (c *MetricsClient) aggregateClusterMetrics( // 从 nodeMetrics 获取使用情况 if i < len(nodeMetrics) && nodeMetrics[i] != nil { metrics.Nodes = append(metrics.Nodes, *nodeMetrics[i]) - + // 更新单机最大使用率 if nodeMetrics[i].CPUPercent > maxNodeCPUUsage { maxNodeCPUUsage = nodeMetrics[i].CPUPercent @@ -274,7 +305,7 @@ func (c *MetricsClient) aggregateClusterMetrics( metrics.TotalCPU = fmt.Sprintf("%.2f cores", float64(totalCPU)/1000.0) metrics.TotalMemory = formatBytes(totalMem) metrics.TotalGPU = totalGPU - + // 格式化单机最大值 metrics.MaxNodeCPU = fmt.Sprintf("%.2f cores", float64(maxNodeCPU)/1000.0) metrics.MaxNodeMemory = formatBytes(maxNodeMem) @@ -292,7 +323,7 @@ func (c *MetricsClient) aggregateClusterMetrics( usedMem += int64(nm.MemoryPercent * float64(totalMem) / 100.0) usedGPU += nm.GPUUsage } - + if totalCPU > 0 { metrics.CPUUsage = float64(usedCPU) / float64(totalCPU) * 100 } @@ -302,7 +333,7 @@ func (c *MetricsClient) aggregateClusterMetrics( if totalGPU > 0 { metrics.GPUUsage = float64(usedGPU) / float64(totalGPU) * 100 } - + metrics.UsedCPU = fmt.Sprintf("%.2f cores", float64(usedCPU)/1000.0) metrics.UsedMemory = formatBytes(usedMem) metrics.UsedGPU = usedGPU @@ -348,7 +379,7 @@ func getNodeAge(node *corev1.Node) string { age := time.Since(node.CreationTimestamp.Time) days := int(age.Hours() / 24) hours := int(age.Hours()) % 24 - + if days > 0 { return fmt.Sprintf("%dd %dh", days, hours) } @@ -368,3 +399,110 @@ func formatBytes(bytes int64) string { return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp]) } +func inferHelmReleaseName(labels map[string]string) string { + if labels == nil { + return "" + } + for _, key := range []string{ + "app.kubernetes.io/instance", + "release", + "helm.sh/release", + "meta.helm.sh/release-name", + "app", + } { + if value := labels[key]; value != "" { + return value + } + } + return "" +} + +func podResourceAllocation(pod *corev1.Pod) entity.ResourceAllocation { + if pod == nil { + return entity.ResourceAllocation{} + } + sum := entity.ResourceAllocation{} + for _, container := range pod.Spec.Containers { + sum = addContainerAllocation(sum, container) + } + initMax := entity.ResourceAllocation{} + for _, container := range pod.Spec.InitContainers { + initMax = maxAllocation(initMax, containerAllocation(container)) + } + return maxAllocation(sum, initMax) +} + +func addContainerAllocation(base entity.ResourceAllocation, container corev1.Container) entity.ResourceAllocation { + return addAllocation(base, containerAllocation(container)) +} + +func containerAllocation(container corev1.Container) entity.ResourceAllocation { + requests := container.Resources.Requests + limits := container.Resources.Limits + return entity.ResourceAllocation{ + CPURequestsMilli: quantityMilliValue(requests, corev1.ResourceCPU), + CPULimitsMilli: quantityMilliValue(limits, corev1.ResourceCPU), + MemoryRequestsBytes: quantityValue(requests, corev1.ResourceMemory), + MemoryLimitsBytes: quantityValue(limits, corev1.ResourceMemory), + GPURequests: quantityValue(requests, corev1.ResourceName("nvidia.com/gpu")), + GPULimits: quantityValue(limits, corev1.ResourceName("nvidia.com/gpu")), + GPUMemoryRequestsMB: quantityValueAny(requests, corev1.ResourceName("nvidia.com/gpumem"), corev1.ResourceName("requests.nvidia.com/gpumem")), + GPUMemoryLimitsMB: quantityValueAny(limits, corev1.ResourceName("nvidia.com/gpumem"), corev1.ResourceName("requests.nvidia.com/gpumem")), + } +} + +func addAllocation(left, right entity.ResourceAllocation) entity.ResourceAllocation { + return entity.ResourceAllocation{ + CPURequestsMilli: left.CPURequestsMilli + right.CPURequestsMilli, + CPULimitsMilli: left.CPULimitsMilli + right.CPULimitsMilli, + MemoryRequestsBytes: left.MemoryRequestsBytes + right.MemoryRequestsBytes, + MemoryLimitsBytes: left.MemoryLimitsBytes + right.MemoryLimitsBytes, + GPURequests: left.GPURequests + right.GPURequests, + GPULimits: left.GPULimits + right.GPULimits, + GPUMemoryRequestsMB: left.GPUMemoryRequestsMB + right.GPUMemoryRequestsMB, + GPUMemoryLimitsMB: left.GPUMemoryLimitsMB + right.GPUMemoryLimitsMB, + } +} + +func maxAllocation(left, right entity.ResourceAllocation) entity.ResourceAllocation { + return entity.ResourceAllocation{ + CPURequestsMilli: maxInt64(left.CPURequestsMilli, right.CPURequestsMilli), + CPULimitsMilli: maxInt64(left.CPULimitsMilli, right.CPULimitsMilli), + MemoryRequestsBytes: maxInt64(left.MemoryRequestsBytes, right.MemoryRequestsBytes), + MemoryLimitsBytes: maxInt64(left.MemoryLimitsBytes, right.MemoryLimitsBytes), + GPURequests: maxInt64(left.GPURequests, right.GPURequests), + GPULimits: maxInt64(left.GPULimits, right.GPULimits), + GPUMemoryRequestsMB: maxInt64(left.GPUMemoryRequestsMB, right.GPUMemoryRequestsMB), + GPUMemoryLimitsMB: maxInt64(left.GPUMemoryLimitsMB, right.GPUMemoryLimitsMB), + } +} + +func quantityMilliValue(resources corev1.ResourceList, name corev1.ResourceName) int64 { + if quantity, ok := resources[name]; ok { + return quantity.MilliValue() + } + return 0 +} + +func quantityValue(resources corev1.ResourceList, name corev1.ResourceName) int64 { + if quantity, ok := resources[name]; ok { + return quantity.Value() + } + return 0 +} + +func quantityValueAny(resources corev1.ResourceList, names ...corev1.ResourceName) int64 { + for _, name := range names { + if quantity, ok := resources[name]; ok { + return quantity.Value() + } + } + return 0 +} + +func maxInt64(left, right int64) int64 { + if left > right { + return left + } + return right +} diff --git a/backend/internal/adapter/output/k8s/metrics_client_test.go b/backend/internal/adapter/output/k8s/metrics_client_test.go new file mode 100644 index 0000000..b210842 --- /dev/null +++ b/backend/internal/adapter/output/k8s/metrics_client_test.go @@ -0,0 +1,29 @@ +package k8s + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +func TestContainerAllocationCountsVendorGPUMemoryKey(t *testing.T) { + container := corev1.Container{ + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceName("nvidia.com/gpumem"): resource.MustParse("10000"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceName("nvidia.com/gpumem"): resource.MustParse("12000"), + }, + }, + } + + allocation := containerAllocation(container) + if allocation.GPUMemoryRequestsMB != 10000 { + t.Fatalf("expected GPU memory requests 10000 MB, got %d", allocation.GPUMemoryRequestsMB) + } + if allocation.GPUMemoryLimitsMB != 12000 { + t.Fatalf("expected GPU memory limits 12000 MB, got %d", allocation.GPUMemoryLimitsMB) + } +} diff --git a/backend/internal/adapter/output/k8s/scale_client.go b/backend/internal/adapter/output/k8s/scale_client.go new file mode 100644 index 0000000..f2d6a96 --- /dev/null +++ b/backend/internal/adapter/output/k8s/scale_client.go @@ -0,0 +1,134 @@ +package k8s + +import ( + "context" + "fmt" + + "github.com/ocdp/cluster-service/internal/domain/entity" + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +// ScaleClient provides K8s-native workload scaling (bypasses Helm) +type ScaleClient struct{} + +// NewScaleClient creates a ScaleClient +func NewScaleClient() *ScaleClient { + return &ScaleClient{} +} + +// findDeployment searches for a deployment matching the release name using various label strategies. +func (c *ScaleClient) findDeployment(ctx context.Context, clientset *kubernetes.Clientset, namespace, releaseName string) (*appsv1.Deployment, error) { + labelQueries := []string{ + fmt.Sprintf("app.kubernetes.io/instance=%s", releaseName), + fmt.Sprintf("release=%s", releaseName), + fmt.Sprintf("app=%s", releaseName), + fmt.Sprintf("app.kubernetes.io/name=%s", releaseName), + } + + for _, query := range labelQueries { + deployments, err := clientset.AppsV1().Deployments(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: query, + }) + if err != nil { + continue + } + if len(deployments.Items) > 0 { + return &deployments.Items[0], nil + } + } + + // Fallback: get by name directly + dep, err := clientset.AppsV1().Deployments(namespace).Get(ctx, releaseName, metav1.GetOptions{}) + if err == nil && dep != nil { + return dep, nil + } + + return nil, nil +} + +// GetDeploymentReplicas returns the current replicas count for a deployment. +func (c *ScaleClient) GetDeploymentReplicas(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string) (int32, error) { + clientset, err := c.clientsetForCluster(cluster) + if err != nil { + return 0, fmt.Errorf("failed to create k8s client: %w", err) + } + + dep, err := c.findDeployment(ctx, clientset, namespace, releaseName) + if err != nil { + return 0, err + } + if dep != nil && dep.Spec.Replicas != nil { + return *dep.Spec.Replicas, nil + } + + // Fallback to statefulsets + return c.getStatefulSetReplicas(ctx, clientset, namespace, releaseName) +} + +func (c *ScaleClient) getStatefulSetReplicas(ctx context.Context, clientset *kubernetes.Clientset, namespace, releaseName string) (int32, error) { + stsList, err := clientset.AppsV1().StatefulSets(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: fmt.Sprintf("app.kubernetes.io/instance=%s", releaseName), + }) + if err != nil { + return 0, err + } + if len(stsList.Items) == 0 { + return 0, nil // No replicable workload found + } + sts := stsList.Items[0] + if sts.Spec.Replicas != nil { + return *sts.Spec.Replicas, nil + } + return 0, nil +} + +// ScaleDeployment scales the K8s deployment directly (bypasses Helm). +func (c *ScaleClient) ScaleDeployment(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string, replicas int32) error { + clientset, err := c.clientsetForCluster(cluster) + if err != nil { + return fmt.Errorf("failed to create k8s client: %w", err) + } + + dep, err := c.findDeployment(ctx, clientset, namespace, releaseName) + if err != nil { + return err + } + if dep != nil { + dep.Spec.Replicas = &replicas + _, err = clientset.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to scale deployment %s: %w", dep.Name, err) + } + return nil + } + + // Try StatefulSets + stsList, err := clientset.AppsV1().StatefulSets(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: fmt.Sprintf("app.kubernetes.io/instance=%s", releaseName), + }) + if err == nil && len(stsList.Items) > 0 { + sts := stsList.Items[0] + sts.Spec.Replicas = &replicas + _, err = clientset.AppsV1().StatefulSets(namespace).Update(ctx, &sts, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to scale statefulset %s: %w", sts.Name, err) + } + return nil + } + + return fmt.Errorf("no deployment or statefulset found for release %s in namespace %s", releaseName, namespace) +} + +func (c *ScaleClient) clientsetForCluster(cluster *entity.Cluster) (*kubernetes.Clientset, error) { + restConfig, err := restConfigFromCluster(cluster) + if err != nil { + return nil, fmt.Errorf("failed to create rest config: %w", err) + } + clientset, err := kubernetes.NewForConfig(restConfig) + if err != nil { + return nil, fmt.Errorf("failed to create clientset: %w", err) + } + return clientset, nil +} diff --git a/backend/internal/adapter/output/k8s/tenant_client.go b/backend/internal/adapter/output/k8s/tenant_client.go new file mode 100644 index 0000000..8109b45 --- /dev/null +++ b/backend/internal/adapter/output/k8s/tenant_client.go @@ -0,0 +1,483 @@ +package k8s + +import ( + "context" + "encoding/base64" + "fmt" + "strings" + "time" + + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + clientcmdapi "k8s.io/client-go/tools/clientcmd/api" + + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +// TenantClient provisions namespace-scoped tenant Kubernetes resources. +type TenantClient struct { + clientset kubernetes.Interface +} + +// NewTenantClient creates a tenant provisioning client that builds Kubernetes +// clients from the supplied cluster entity for each call. +func NewTenantClient() repository.TenantKubeClient { + return &TenantClient{} +} + +// NewTenantClientForClientset creates a tenant provisioning client for tests or +// callers that already own a Kubernetes client. +func NewTenantClientForClientset(clientset kubernetes.Interface) repository.TenantKubeClient { + return &TenantClient{clientset: clientset} +} + +// EnsureTenant idempotently ensures Namespace, ServiceAccount, RoleBinding, and +// ResourceQuota resources for the tenant binding. +func (c *TenantClient) EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + binding = binding.WithDefaults() + if err := binding.Validate(); err != nil { + return err + } + clientset, _, err := c.clientsetForCluster(cluster) + if err != nil { + return err + } + if err := c.ensureNamespace(ctx, clientset, binding); err != nil { + return err + } + if err := c.ensureServiceAccount(ctx, clientset, binding); err != nil { + return err + } + if err := c.ensureRoleBinding(ctx, clientset, binding); err != nil { + return err + } + if err := c.ensureResourceQuota(ctx, clientset, binding); err != nil { + return err + } + return nil +} + +// IssueKubeconfig returns a short-lived kubeconfig backed by a Kubernetes +// TokenRequest. The token exists only in the returned value and is never stored. +func (c *TenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) { + binding = binding.WithDefaults() + if err := binding.Validate(); err != nil { + return nil, err + } + clientset, restConfig, err := c.clientsetForCluster(cluster) + if err != nil { + return nil, err + } + + cappedTTL := entity.TenantTokenTTL(ttl) + expirationSeconds := int64(cappedTTL.Seconds()) + tokenRequest, err := clientset.CoreV1(). + ServiceAccounts(binding.Namespace). + CreateToken(ctx, binding.ServiceAccountName, &authenticationv1.TokenRequest{ + Spec: authenticationv1.TokenRequestSpec{ + ExpirationSeconds: &expirationSeconds, + }, + }, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to request tenant service account token: %w", err) + } + if tokenRequest.Status.Token == "" { + return nil, entity.ErrInvalidTenantKubeconfigToken + } + + expiresAt := tokenRequest.Status.ExpirationTimestamp.Time + if expiresAt.IsZero() { + expiresAt = time.Now().Add(cappedTTL) + } + kubeconfig, err := buildTenantKubeconfig(cluster, restConfig, binding, tokenRequest.Status.Token) + if err != nil { + return nil, err + } + return &entity.TenantKubeconfig{ + Kubeconfig: kubeconfig, + ExpiresAt: expiresAt, + }, nil +} + +func (c *TenantClient) GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*repository.ResourceQuotaUsage, error) { + binding = binding.WithDefaults() + if err := binding.Validate(); err != nil { + return nil, err + } + clientset, _, err := c.clientsetForCluster(cluster) + if err != nil { + return nil, err + } + quota, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get tenant resource quota usage: %w", err) + } + return &repository.ResourceQuotaUsage{ + Hard: resourceVectorFromList(quota.Status.Hard), + Used: resourceVectorFromList(quota.Status.Used), + }, nil +} + +// SuspendTenant revokes tenant API access by deleting only the RoleBinding. +func (c *TenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + binding = binding.WithDefaults() + if err := binding.Validate(); err != nil { + return err + } + clientset, _, err := c.clientsetForCluster(cluster) + if err != nil { + return err + } + err = clientset.RbacV1(). + RoleBindings(binding.Namespace). + Delete(ctx, binding.RoleBindingName, metav1.DeleteOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return fmt.Errorf("failed to delete tenant role binding: %w", err) + } + return nil +} + +func (c *TenantClient) DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + binding = binding.WithDefaults() + if err := binding.Validate(); err != nil { + return err + } + if isProtectedTenantNamespace(binding.Namespace) { + return entity.ErrProtectedNamespace + } + clientset, _, err := c.clientsetForCluster(cluster) + if err != nil { + return err + } + if err := deleteIgnoringNotFound(ctx, func() error { + return clientset.RbacV1().RoleBindings(binding.Namespace).Delete(ctx, binding.RoleBindingName, metav1.DeleteOptions{}) + }); err != nil { + return fmt.Errorf("failed to delete tenant role binding: %w", err) + } + if err := deleteIgnoringNotFound(ctx, func() error { + return clientset.CoreV1().ResourceQuotas(binding.Namespace).Delete(ctx, binding.ResourceQuotaName, metav1.DeleteOptions{}) + }); err != nil { + return fmt.Errorf("failed to delete tenant resource quota: %w", err) + } + if err := deleteIgnoringNotFound(ctx, func() error { + return clientset.CoreV1().ServiceAccounts(binding.Namespace).Delete(ctx, binding.ServiceAccountName, metav1.DeleteOptions{}) + }); err != nil { + return fmt.Errorf("failed to delete tenant service account: %w", err) + } + namespace, err := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return fmt.Errorf("failed to get tenant namespace before deletion: %w", err) + } + if namespace.Labels["ocdp.io/managed-by"] != "ocdp" || namespace.Labels["ocdp.io/tenant"] != binding.Namespace { + return fmt.Errorf("refusing to delete unmanaged namespace %q", binding.Namespace) + } + if err := deleteIgnoringNotFound(ctx, func() error { + return clientset.CoreV1().Namespaces().Delete(ctx, binding.Namespace, metav1.DeleteOptions{}) + }); err != nil { + return fmt.Errorf("failed to delete tenant namespace: %w", err) + } + return nil +} + +func deleteIgnoringNotFound(ctx context.Context, deleteFn func() error) error { + if err := ctx.Err(); err != nil { + return err + } + err := deleteFn() + if apierrors.IsNotFound(err) { + return nil + } + return err +} + +func isProtectedTenantNamespace(namespace string) bool { + switch strings.TrimSpace(namespace) { + case "", "default", "kube-system", "kube-public", "kube-node-lease": + return true + default: + return false + } +} + +func resourceVectorFromList(values corev1.ResourceList) repository.ResourceVector { + gpu := values[corev1.ResourceName("requests.nvidia.com/gpu")] + gpuMem := values[corev1.ResourceName("requests.nvidia.com/gpumem")] + return repository.ResourceVector{ + CPU: values[corev1.ResourceName("requests.cpu")], + Memory: values[corev1.ResourceName("requests.memory")], + GPU: gpu.Value(), + GPUMemoryMB: gpuMem.Value(), + } +} + +func (c *TenantClient) clientsetForCluster(cluster *entity.Cluster) (kubernetes.Interface, *rest.Config, error) { + if c.clientset != nil { + config := &rest.Config{Host: "https://kubernetes.default.svc"} + if cluster != nil { + clusterConfig, err := restConfigFromCluster(cluster) + if err == nil { + config = clusterConfig + } + } + return c.clientset, config, nil + } + + config, err := restConfigFromCluster(cluster) + if err != nil { + return nil, nil, err + } + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, nil, fmt.Errorf("failed to create tenant kubernetes client: %w", err) + } + return clientset, config, nil +} + +func restConfigFromCluster(cluster *entity.Cluster) (*rest.Config, error) { + if cluster == nil { + return nil, entity.ErrInvalidClusterHost + } + if looksLikeKubeconfig(cluster.CAData) { + config, err := clientcmd.RESTConfigFromKubeConfig([]byte(cluster.CAData)) + if err != nil { + return nil, fmt.Errorf("failed to parse tenant kubeconfig: %w", err) + } + return config, nil + } + if strings.TrimSpace(cluster.Host) == "" { + return nil, entity.ErrInvalidClusterHost + } + return &rest.Config{ + Host: cluster.Host, + TLSClientConfig: rest.TLSClientConfig{ + CAData: decodePossiblyBase64(cluster.CAData), + CertData: decodePossiblyBase64(cluster.CertData), + KeyData: decodePossiblyBase64(cluster.KeyData), + }, + BearerToken: cluster.Token, + }, nil +} + +func (c *TenantClient) ensureNamespace(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: binding.Namespace, + Labels: copyStringMap(binding.Labels), + Annotations: copyStringMap(binding.Annotations), + }, + } + _, err := clientset.CoreV1().Namespaces().Create(ctx, namespace, metav1.CreateOptions{}) + if apierrors.IsAlreadyExists(err) { + current, getErr := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{}) + if getErr != nil { + return fmt.Errorf("failed to get tenant namespace: %w", getErr) + } + mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) + if _, updateErr := clientset.CoreV1().Namespaces().Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { + return fmt.Errorf("failed to update tenant namespace: %w", updateErr) + } + return nil + } + if err != nil { + return fmt.Errorf("failed to create tenant namespace: %w", err) + } + return nil +} + +func (c *TenantClient) ensureServiceAccount(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { + serviceAccount := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: binding.ServiceAccountName, + Namespace: binding.Namespace, + Labels: copyStringMap(binding.Labels), + Annotations: copyStringMap(binding.Annotations), + }, + } + _, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Create(ctx, serviceAccount, metav1.CreateOptions{}) + if apierrors.IsAlreadyExists(err) { + current, getErr := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{}) + if getErr != nil { + return fmt.Errorf("failed to get tenant service account: %w", getErr) + } + mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) + if _, updateErr := clientset.CoreV1().ServiceAccounts(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { + return fmt.Errorf("failed to update tenant service account: %w", updateErr) + } + return nil + } + if err != nil { + return fmt.Errorf("failed to create tenant service account: %w", err) + } + return nil +} + +func (c *TenantClient) ensureRoleBinding(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { + roleBinding := desiredRoleBinding(binding) + _, err := clientset.RbacV1().RoleBindings(binding.Namespace).Create(ctx, roleBinding, metav1.CreateOptions{}) + if apierrors.IsAlreadyExists(err) { + current, getErr := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}) + if getErr != nil { + return fmt.Errorf("failed to get tenant role binding: %w", getErr) + } + mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) + current.Subjects = roleBinding.Subjects + current.RoleRef = roleBinding.RoleRef + if _, updateErr := clientset.RbacV1().RoleBindings(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { + return fmt.Errorf("failed to update tenant role binding: %w", updateErr) + } + return nil + } + if err != nil { + return fmt.Errorf("failed to create tenant role binding: %w", err) + } + return nil +} + +func (c *TenantClient) ensureResourceQuota(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { + resourceQuota := &corev1.ResourceQuota{ + ObjectMeta: metav1.ObjectMeta{ + Name: binding.ResourceQuotaName, + Namespace: binding.Namespace, + Labels: copyStringMap(binding.Labels), + Annotations: copyStringMap(binding.Annotations), + }, + Spec: corev1.ResourceQuotaSpec{ + Hard: binding.ResourceQuotaHard.DeepCopy(), + }, + } + _, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Create(ctx, resourceQuota, metav1.CreateOptions{}) + if apierrors.IsAlreadyExists(err) { + current, getErr := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}) + if getErr != nil { + return fmt.Errorf("failed to get tenant resource quota: %w", getErr) + } + mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) + current.Spec.Hard = binding.ResourceQuotaHard.DeepCopy() + if _, updateErr := clientset.CoreV1().ResourceQuotas(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { + return fmt.Errorf("failed to update tenant resource quota: %w", updateErr) + } + return nil + } + if err != nil { + return fmt.Errorf("failed to create tenant resource quota: %w", err) + } + return nil +} + +func desiredRoleBinding(binding entity.TenantBinding) *rbacv1.RoleBinding { + return &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: binding.RoleBindingName, + Namespace: binding.Namespace, + Labels: copyStringMap(binding.Labels), + Annotations: copyStringMap(binding.Annotations), + }, + Subjects: []rbacv1.Subject{{ + Kind: rbacv1.ServiceAccountKind, + Name: binding.ServiceAccountName, + Namespace: binding.Namespace, + }}, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "ClusterRole", + Name: binding.ClusterRoleName, + }, + } +} + +func buildTenantKubeconfig(cluster *entity.Cluster, restConfig *rest.Config, binding entity.TenantBinding, token string) (string, error) { + host := "" + var caData []byte + if restConfig != nil { + host = restConfig.Host + caData = append([]byte{}, restConfig.CAData...) + } + if host == "" && cluster != nil { + host = cluster.Host + } + if len(caData) == 0 && cluster != nil { + caData = decodePossiblyBase64(cluster.CAData) + } + if host == "" { + return "", entity.ErrInvalidClusterHost + } + + clusterName := "tenant-cluster" + if cluster != nil && cluster.Name != "" { + clusterName = cluster.Name + } + userName := binding.ServiceAccountName + contextName := fmt.Sprintf("%s/%s", clusterName, binding.Namespace) + config := clientcmdapi.NewConfig() + config.Clusters[clusterName] = &clientcmdapi.Cluster{ + Server: host, + CertificateAuthorityData: caData, + } + config.AuthInfos[userName] = &clientcmdapi.AuthInfo{ + Token: token, + } + config.Contexts[contextName] = &clientcmdapi.Context{ + Cluster: clusterName, + AuthInfo: userName, + Namespace: binding.Namespace, + } + config.CurrentContext = contextName + + bytes, err := clientcmd.Write(*config) + if err != nil { + return "", fmt.Errorf("failed to build tenant kubeconfig: %w", err) + } + return string(bytes), nil +} + +func mergeObjectMetadata(meta *metav1.ObjectMeta, labels, annotations map[string]string) { + if len(labels) > 0 && meta.Labels == nil { + meta.Labels = map[string]string{} + } + for key, value := range labels { + meta.Labels[key] = value + } + if len(annotations) > 0 && meta.Annotations == nil { + meta.Annotations = map[string]string{} + } + for key, value := range annotations { + meta.Annotations[key] = value + } +} + +func copyStringMap(values map[string]string) map[string]string { + if len(values) == 0 { + return nil + } + copied := make(map[string]string, len(values)) + for key, value := range values { + copied[key] = value + } + return copied +} + +func decodePossiblyBase64(value string) []byte { + decoded, err := base64.StdEncoding.DecodeString(value) + if err == nil { + return decoded + } + return []byte(value) +} + +func looksLikeKubeconfig(value string) bool { + trimmed := strings.TrimSpace(value) + return strings.HasPrefix(trimmed, "apiVersion:") || strings.HasPrefix(trimmed, "kind: Config") +} diff --git a/backend/internal/adapter/output/k8s/tenant_client_test.go b/backend/internal/adapter/output/k8s/tenant_client_test.go new file mode 100644 index 0000000..0e82cdd --- /dev/null +++ b/backend/internal/adapter/output/k8s/tenant_client_test.go @@ -0,0 +1,214 @@ +package k8s + +import ( + "context" + "errors" + "strings" + "testing" + "time" + + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +func TestTenantClientEnsureTenantCreatesResources(t *testing.T) { + ctx := context.Background() + clientset := fake.NewSimpleClientset() + client := NewTenantClientForClientset(clientset) + binding := tenantBinding() + + if err := client.EnsureTenant(ctx, nil, binding); err != nil { + t.Fatalf("EnsureTenant returned error: %v", err) + } + + if _, err := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{}); err != nil { + t.Fatalf("expected namespace: %v", err) + } + if _, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{}); err != nil { + t.Fatalf("expected service account: %v", err) + } + roleBinding, err := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("expected role binding: %v", err) + } + if roleBinding.RoleRef.Kind != "ClusterRole" || roleBinding.RoleRef.Name != binding.ClusterRoleName { + t.Fatalf("unexpected role ref: %#v", roleBinding.RoleRef) + } + if len(roleBinding.Subjects) != 1 || roleBinding.Subjects[0].Name != binding.ServiceAccountName { + t.Fatalf("unexpected role binding subjects: %#v", roleBinding.Subjects) + } + quota, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("expected resource quota: %v", err) + } + if quota.Spec.Hard.Cpu().String() != "2" { + t.Fatalf("expected cpu quota 2, got %s", quota.Spec.Hard.Cpu().String()) + } +} + +func TestTenantClientEnsureTenantUpdatesExistingResources(t *testing.T) { + ctx := context.Background() + binding := tenantBinding() + clientset := fake.NewSimpleClientset( + &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace, Labels: binding.Labels}}, + &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}}, + &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{Name: binding.RoleBindingName, Namespace: binding.Namespace}, + RoleRef: rbacv1.RoleRef{APIGroup: rbacv1.GroupName, Kind: "ClusterRole", Name: "view"}, + }, + &corev1.ResourceQuota{ + ObjectMeta: metav1.ObjectMeta{Name: binding.ResourceQuotaName, Namespace: binding.Namespace}, + Spec: corev1.ResourceQuotaSpec{Hard: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + }}, + }, + ) + client := NewTenantClientForClientset(clientset) + + if err := client.EnsureTenant(ctx, nil, binding); err != nil { + t.Fatalf("EnsureTenant returned error: %v", err) + } + + roleBinding, err := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("expected updated role binding: %v", err) + } + if roleBinding.RoleRef.Name != binding.ClusterRoleName { + t.Fatalf("expected role ref %q, got %q", binding.ClusterRoleName, roleBinding.RoleRef.Name) + } + if roleBinding.Labels["ocdp.io/tenant"] != binding.Namespace { + t.Fatalf("expected tenant label on updated role binding, got %#v", roleBinding.Labels) + } + quota, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("expected updated quota: %v", err) + } + if quota.Spec.Hard.Cpu().String() != "2" { + t.Fatalf("expected updated cpu quota 2, got %s", quota.Spec.Hard.Cpu().String()) + } +} + +func TestTenantClientSuspendTenantDeletesOnlyRoleBinding(t *testing.T) { + ctx := context.Background() + binding := tenantBinding() + clientset := fake.NewSimpleClientset( + &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace, Labels: binding.Labels}}, + &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}}, + desiredRoleBinding(binding), + ) + client := NewTenantClientForClientset(clientset) + + if err := client.SuspendTenant(ctx, nil, binding); err != nil { + t.Fatalf("SuspendTenant returned error: %v", err) + } + if _, err := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}); !apierrors.IsNotFound(err) { + t.Fatalf("expected deleted role binding, got err %v", err) + } + if _, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{}); err != nil { + t.Fatalf("service account should remain: %v", err) + } +} + +func TestTenantClientDeleteTenantDeletesTenantResources(t *testing.T) { + ctx := context.Background() + binding := tenantBinding() + clientset := fake.NewSimpleClientset( + &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace, Labels: binding.Labels}}, + &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}}, + desiredRoleBinding(binding), + &corev1.ResourceQuota{ObjectMeta: metav1.ObjectMeta{Name: binding.ResourceQuotaName, Namespace: binding.Namespace}}, + ) + client := NewTenantClientForClientset(clientset) + + if err := client.DeleteTenant(ctx, nil, binding); err != nil { + t.Fatalf("DeleteTenant returned error: %v", err) + } + if _, err := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}); !apierrors.IsNotFound(err) { + t.Fatalf("expected role binding deleted, got %v", err) + } + if _, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}); !apierrors.IsNotFound(err) { + t.Fatalf("expected resource quota deleted, got %v", err) + } + if _, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{}); !apierrors.IsNotFound(err) { + t.Fatalf("expected service account deleted, got %v", err) + } + if _, err := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{}); !apierrors.IsNotFound(err) { + t.Fatalf("expected namespace deleted, got %v", err) + } +} + +func TestTenantClientDeleteTenantRejectsProtectedNamespace(t *testing.T) { + ctx := context.Background() + client := NewTenantClientForClientset(fake.NewSimpleClientset( + &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "default"}}, + )) + binding := entity.NewTenantBinding("default") + + err := client.DeleteTenant(ctx, nil, binding) + if !errors.Is(err, entity.ErrProtectedNamespace) { + t.Fatalf("expected protected namespace error, got %v", err) + } +} + +func TestTenantClientIssueKubeconfigCapsTokenTTL(t *testing.T) { + ctx := context.Background() + binding := tenantBinding() + clientset := fake.NewSimpleClientset(&corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}, + }) + var requestedExpirationSeconds int64 + expiresAt := time.Now().Add(entity.MaxTenantKubeconfigTTL).UTC() + clientset.Fake.PrependReactor("create", "serviceaccounts", func(action k8stesting.Action) (bool, runtime.Object, error) { + if action.GetSubresource() != "token" { + return false, nil, nil + } + createAction := action.(k8stesting.CreateAction) + tokenRequest := createAction.GetObject().(*authenticationv1.TokenRequest) + if tokenRequest.Spec.ExpirationSeconds != nil { + requestedExpirationSeconds = *tokenRequest.Spec.ExpirationSeconds + } + return true, &authenticationv1.TokenRequest{ + Status: authenticationv1.TokenRequestStatus{ + Token: "short-lived-token", + ExpirationTimestamp: metav1.NewTime(expiresAt), + }, + }, nil + }) + client := NewTenantClientForClientset(clientset) + + kubeconfig, err := client.IssueKubeconfig(ctx, &entity.Cluster{Name: "test", Host: "https://example.invalid"}, binding, 24*time.Hour) + if err != nil { + t.Fatalf("IssueKubeconfig returned error: %v", err) + } + + if requestedExpirationSeconds != int64(entity.MaxTenantKubeconfigTTL.Seconds()) { + t.Fatalf("expected capped ttl %d, got %d", int64(entity.MaxTenantKubeconfigTTL.Seconds()), requestedExpirationSeconds) + } + if !kubeconfig.ExpiresAt.Equal(expiresAt) { + t.Fatalf("expected expiration %s, got %s", expiresAt, kubeconfig.ExpiresAt) + } + if !strings.Contains(kubeconfig.Kubeconfig, "short-lived-token") { + t.Fatal("expected kubeconfig to contain issued token") + } + if !strings.Contains(kubeconfig.Kubeconfig, "namespace: tenant-a") { + t.Fatalf("expected kubeconfig namespace, got:\n%s", kubeconfig.Kubeconfig) + } +} + +func tenantBinding() entity.TenantBinding { + binding := entity.NewTenantBinding("tenant-a") + binding.ResourceQuotaHard = corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + } + return binding +} diff --git a/backend/internal/adapter/output/k8s/tenant_mock.go b/backend/internal/adapter/output/k8s/tenant_mock.go new file mode 100644 index 0000000..f741887 --- /dev/null +++ b/backend/internal/adapter/output/k8s/tenant_mock.go @@ -0,0 +1,58 @@ +package k8s + +import ( + "context" + "fmt" + "time" + + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +type MockTenantClient struct{} + +func NewMockTenantClient() repository.TenantKubeClient { + return &MockTenantClient{} +} + +func (c *MockTenantClient) EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + return binding.Validate() +} + +func (c *MockTenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) { + if err := binding.Validate(); err != nil { + return nil, err + } + expiresAt := time.Now().Add(entity.TenantTokenTTL(ttl)) + return &entity.TenantKubeconfig{ + Kubeconfig: fmt.Sprintf("apiVersion: v1\nkind: Config\nclusters:\n- name: %s\n cluster:\n server: %s\ncontexts:\n- name: %s\n context:\n cluster: %s\n namespace: %s\n user: %s\ncurrent-context: %s\nusers:\n- name: %s\n user:\n token: mock-ephemeral-token\n", + cluster.Name, cluster.Host, binding.Namespace, cluster.Name, binding.Namespace, binding.ServiceAccountName, binding.Namespace, binding.ServiceAccountName), + ExpiresAt: expiresAt, + }, nil +} + +func (c *MockTenantClient) GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*repository.ResourceQuotaUsage, error) { + if err := binding.Validate(); err != nil { + return nil, err + } + return &repository.ResourceQuotaUsage{ + Hard: resourceVectorFromList(binding.ResourceQuotaHard), + Used: repository.ResourceVector{}, + }, nil +} + +func (c *MockTenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + return binding.Validate() +} + +func (c *MockTenantClient) DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + if err := binding.Validate(); err != nil { + return err + } + switch binding.Namespace { + case "", "default", "kube-system", "kube-public", "kube-node-lease": + return entity.ErrProtectedNamespace + default: + return nil + } +} diff --git a/backend/internal/adapter/output/oci/mock/oci_client_mock.go b/backend/internal/adapter/output/oci/mock/oci_client_mock.go index a31baaa..1556489 100644 --- a/backend/internal/adapter/output/oci/mock/oci_client_mock.go +++ b/backend/internal/adapter/output/oci/mock/oci_client_mock.go @@ -5,7 +5,7 @@ import ( "fmt" "strings" "time" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" ) @@ -13,7 +13,7 @@ import ( // OCIClientMock OCI Registry 客户端 Mock 实现 type OCIClientMock struct { // Mock 数据存储 - repositories map[string][]string // registryID -> []repositoryName + repositories map[string][]string // registryID -> []repositoryName artifacts map[string]map[string][]*entity.Artifact // registryID -> repository -> []artifact } @@ -23,10 +23,10 @@ func NewOCIClientMock() repository.OCIClient { repositories: make(map[string][]string), artifacts: make(map[string]map[string][]*entity.Artifact), } - + // 初始化一些测试数据 mock.initMockData() - + return mock } @@ -38,18 +38,18 @@ func (c *OCIClientMock) initMockData() { // initArtifactsForRegistry initializes mock artifacts for a given registry ID func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { c.artifacts[registryID] = make(map[string][]*entity.Artifact) - + // vllm-serve artifacts (OCI 格式的 Helm Chart) c.artifacts[registryID]["charts/vllm-serve"] = []*entity.Artifact{ { - RegistryID: registryID, - Repository: "charts/vllm-serve", - Tag: "0.1.0", - Digest: "sha256:abc123def456", - Type: entity.ArtifactTypeChart, - Size: 12345678, - MediaType: "application/vnd.oci.image.manifest.v1+json", - ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type + RegistryID: registryID, + Repository: "charts/vllm-serve", + Tag: "0.1.0", + Digest: "sha256:abc123def456", + Type: entity.ArtifactTypeChart, + Size: 12345678, + MediaType: "application/vnd.oci.image.manifest.v1+json", + ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "vllm-serve", "org.opencontainers.image.version": "0.1.0", @@ -57,14 +57,14 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { CreatedAt: time.Now().Add(-24 * time.Hour), }, { - RegistryID: registryID, - Repository: "charts/vllm-serve", - Tag: "0.2.0", - Digest: "sha256:xyz789uvw012", - Type: entity.ArtifactTypeChart, - Size: 13456789, - MediaType: "application/vnd.oci.image.manifest.v1+json", - ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type + RegistryID: registryID, + Repository: "charts/vllm-serve", + Tag: "0.2.0", + Digest: "sha256:xyz789uvw012", + Type: entity.ArtifactTypeChart, + Size: 13456789, + MediaType: "application/vnd.oci.image.manifest.v1+json", + ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "vllm-serve", "org.opencontainers.image.version": "0.2.0", @@ -72,36 +72,36 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { CreatedAt: time.Now(), }, } - + // nginx artifacts (OCI 格式的 Helm Chart) c.artifacts[registryID]["charts/nginx"] = []*entity.Artifact{ { - RegistryID: registryID, - Repository: "charts/nginx", - Tag: "1.0.0", - Digest: "sha256:nginx123456", - Type: entity.ArtifactTypeChart, - Size: 5678901, - MediaType: "application/vnd.oci.image.manifest.v1+json", - ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type + RegistryID: registryID, + Repository: "charts/nginx", + Tag: "1.0.0", + Digest: "sha256:nginx123456", + Type: entity.ArtifactTypeChart, + Size: 5678901, + MediaType: "application/vnd.oci.image.manifest.v1+json", + ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "nginx", }, CreatedAt: time.Now().Add(-48 * time.Hour), }, } - + // redis artifacts (OCI 格式的 Helm Chart) c.artifacts[registryID]["charts/redis"] = []*entity.Artifact{ { - RegistryID: registryID, - Repository: "charts/redis", - Tag: "6.2.0", - Digest: "sha256:redis789abc", - Type: entity.ArtifactTypeChart, - Size: 8901234, - MediaType: "application/vnd.oci.image.manifest.v1+json", - ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type + RegistryID: registryID, + Repository: "charts/redis", + Tag: "6.2.0", + Digest: "sha256:redis789abc", + Type: entity.ArtifactTypeChart, + Size: 8901234, + MediaType: "application/vnd.oci.image.manifest.v1+json", + ConfigType: "application/vnd.cncf.helm.config.v1+json", // Helm Chart 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "redis", "org.opencontainers.image.version": "6.2.0", @@ -109,18 +109,18 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { CreatedAt: time.Now().Add(-72 * time.Hour), }, } - + // alpine artifacts (Docker Image) c.artifacts[registryID]["library/alpine"] = []*entity.Artifact{ { - RegistryID: registryID, - Repository: "library/alpine", - Tag: "3.18", - Digest: "sha256:alpine123", - Type: entity.ArtifactTypeImage, - Size: 2345678, - MediaType: "application/vnd.docker.distribution.manifest.v2+json", - ConfigType: "application/vnd.docker.container.image.v1+json", // Docker Image 的 config type + RegistryID: registryID, + Repository: "library/alpine", + Tag: "3.18", + Digest: "sha256:alpine123", + Type: entity.ArtifactTypeImage, + Size: 2345678, + MediaType: "application/vnd.docker.distribution.manifest.v2+json", + ConfigType: "application/vnd.docker.container.image.v1+json", // Docker Image 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "alpine", "org.opencontainers.image.version": "3.18", @@ -128,14 +128,14 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { CreatedAt: time.Now().Add(-96 * time.Hour), }, { - RegistryID: registryID, - Repository: "library/alpine", - Tag: "latest", - Digest: "sha256:alpine456", - Type: entity.ArtifactTypeImage, - Size: 2456789, - MediaType: "application/vnd.docker.distribution.manifest.v2+json", - ConfigType: "application/vnd.docker.container.image.v1+json", // Docker Image 的 config type + RegistryID: registryID, + Repository: "library/alpine", + Tag: "latest", + Digest: "sha256:alpine456", + Type: entity.ArtifactTypeImage, + Size: 2456789, + MediaType: "application/vnd.docker.distribution.manifest.v2+json", + ConfigType: "application/vnd.docker.container.image.v1+json", // Docker Image 的 config type Annotations: map[string]string{ "org.opencontainers.image.title": "alpine", }, @@ -144,7 +144,7 @@ func (c *OCIClientMock) initArtifactsForRegistry(registryID string) { } } -func (c *OCIClientMock) ListRepositories(ctx context.Context, registry *entity.Registry) ([]string, error) { +func (c *OCIClientMock) ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) { // Check if we have cached data for this registry repos, exists := c.repositories[registry.ID] if !exists { @@ -156,10 +156,20 @@ func (c *OCIClientMock) ListRepositories(ctx context.Context, registry *entity.R "library/alpine", } c.repositories[registry.ID] = repos - + // Also initialize artifacts for this registry c.initArtifactsForRegistry(registry.ID) } + if strings.EqualFold(strings.TrimSpace(artifactType), "chart") { + chartRepos := make([]string, 0) + for _, repo := range repos { + artifacts, _ := c.ListArtifacts(ctx, registry, repo, "chart") + if len(artifacts) > 0 { + chartRepos = append(chartRepos, repo) + } + } + return chartRepos, nil + } return repos, nil } @@ -170,20 +180,20 @@ func (c *OCIClientMock) ListArtifacts(ctx context.Context, registry *entity.Regi c.initArtifactsForRegistry(registry.ID) regArtifacts = c.artifacts[registry.ID] } - + artifacts, exists := regArtifacts[repository] if !exists { return []*entity.Artifact{}, nil } - + // 应用 mediaType 过滤 if mediaTypeFilter == "" || mediaTypeFilter == "all" { return artifacts, nil } - + filtered := make([]*entity.Artifact, 0) filter := strings.ToLower(strings.TrimSpace(mediaTypeFilter)) - + for _, artifact := range artifacts { switch filter { case "chart": @@ -200,7 +210,7 @@ func (c *OCIClientMock) ListArtifacts(ctx context.Context, registry *entity.Regi } } } - + return filtered, nil } @@ -211,19 +221,19 @@ func (c *OCIClientMock) GetArtifact(ctx context.Context, registry *entity.Regist c.initArtifactsForRegistry(registry.ID) regArtifacts = c.artifacts[registry.ID] } - + artifacts, exists := regArtifacts[repository] if !exists { return nil, entity.ErrArtifactNotFound } - + // 根据 tag 或 digest 查找 for _, artifact := range artifacts { if artifact.Tag == reference || artifact.Digest == reference { return artifact, nil } } - + return nil, entity.ErrArtifactNotFound } @@ -232,11 +242,11 @@ func (c *OCIClientMock) GetValuesSchema(ctx context.Context, registry *entity.Re if err != nil { return "", err } - + if !artifact.IsChart() { return "", fmt.Errorf("not a helm chart") } - + // 返回 Mock values schema mockSchema := `{ "$schema": "http://json-schema.org/draft-07/schema#", @@ -262,12 +272,23 @@ func (c *OCIClientMock) GetValuesSchema(ctx context.Context, registry *entity.Re return mockSchema, nil } +func (c *OCIClientMock) GetValuesYAML(ctx context.Context, registry *entity.Registry, repository, reference string) (string, error) { + artifact, err := c.GetArtifact(ctx, registry, repository, reference) + if err != nil { + return "", err + } + if !artifact.IsChart() { + return "", fmt.Errorf("not a helm chart") + } + return "replicaCount: 1\nimage:\n repository: nginx\n tag: latest\nservice:\n type: ClusterIP\n", nil +} + func (c *OCIClientMock) PullArtifact(ctx context.Context, registry *entity.Registry, repository, reference, destPath string) error { _, err := c.GetArtifact(ctx, registry, repository, reference) if err != nil { return err } - + // Mock 实现,不实际下载 return nil } @@ -281,4 +302,3 @@ func (c *OCIClientMock) CheckHealth(ctx context.Context, registry *entity.Regist // Mock 实现,总是返回健康 return nil } - diff --git a/backend/internal/adapter/output/oci/real/oci_client.go b/backend/internal/adapter/output/oci/real/oci_client.go index f9a9e76..d03b40e 100644 --- a/backend/internal/adapter/output/oci/real/oci_client.go +++ b/backend/internal/adapter/output/oci/real/oci_client.go @@ -8,9 +8,13 @@ import ( "fmt" "io" "net/http" + "net/url" "os" "path/filepath" + "sort" + "strconv" "strings" + "time" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" @@ -25,6 +29,30 @@ type OCIClient struct { httpClient *http.Client } +type harborProject struct { + Name string `json:"name"` +} + +type harborRepository struct { + Name string `json:"name"` + ArtifactCount int `json:"artifact_count"` +} + +type harborTag struct { + Name string `json:"name"` + PushTime string `json:"push_time"` +} + +type harborArtifact struct { + Digest string `json:"digest"` + MediaType string `json:"media_type"` + ArtifactType string `json:"artifact_type"` + Size int64 `json:"size"` + PushTime string `json:"push_time"` + Tags []harborTag `json:"tags"` + Annotations map[string]string `json:"annotations"` +} + // NewOCIClient 创建真实的 OCI 客户端 func NewOCIClient() repository.OCIClient { return &OCIClient{ @@ -60,8 +88,34 @@ func (c *OCIClient) getRegistry(reg *entity.Registry) (*remote.Registry, error) return registry, nil } -// ListRepositories 列出 Registry 中的所有 repositories -func (c *OCIClient) ListRepositories(ctx context.Context, registry *entity.Registry) ([]string, error) { +// ListRepositories 列出 Registry 中的 repositories. +// Harbor registry 优先使用 Harbor v2.0 API,避免 robot 账号依赖 /v2/_catalog 全局权限。 +func (c *OCIClient) ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) { + repositories, harborErr := c.listHarborRepositories(ctx, registry, artifactType) + if harborErr == nil { + return repositories, nil + } + + repositories, catalogErr := c.listOCIRepositories(ctx, registry) + if catalogErr != nil { + return nil, fmt.Errorf("failed to list repositories via Harbor API: %v; OCI catalog fallback also failed: %w", harborErr, catalogErr) + } + + if strings.EqualFold(strings.TrimSpace(artifactType), "chart") { + chartRepos := make([]string, 0) + for _, repo := range repositories { + artifacts, err := c.ListArtifacts(ctx, registry, repo, "chart") + if err == nil && len(artifacts) > 0 { + chartRepos = append(chartRepos, repo) + } + } + return chartRepos, nil + } + + return repositories, nil +} + +func (c *OCIClient) listOCIRepositories(ctx context.Context, registry *entity.Registry) ([]string, error) { reg, err := c.getRegistry(registry) if err != nil { return nil, err @@ -81,9 +135,278 @@ func (c *OCIClient) ListRepositories(ctx context.Context, registry *entity.Regis return repositories, nil } +func (c *OCIClient) listHarborRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) { + projects, err := c.harborListProjects(ctx, registry) + if err != nil { + return nil, err + } + + repositorySet := make(map[string]struct{}) + chartOnly := strings.EqualFold(strings.TrimSpace(artifactType), "chart") || strings.TrimSpace(artifactType) == "" + + for _, project := range projects { + projectName := strings.TrimSpace(project.Name) + if projectName == "" { + continue + } + + repositories, err := c.harborListProjectRepositories(ctx, registry, projectName) + if err != nil { + return nil, err + } + + for _, harborRepo := range repositories { + repoName := normalizeHarborRepositoryName(projectName, harborRepo.Name) + if repoName == "" { + continue + } + if chartOnly { + artifacts, err := c.listHarborArtifacts(ctx, registry, repoName, "chart") + if err != nil || len(artifacts) == 0 { + continue + } + } + repositorySet[repoName] = struct{}{} + } + } + + repositories := make([]string, 0, len(repositorySet)) + for repo := range repositorySet { + repositories = append(repositories, repo) + } + sort.Strings(repositories) + return repositories, nil +} + +func (c *OCIClient) harborListProjects(ctx context.Context, registry *entity.Registry) ([]harborProject, error) { + var projects []harborProject + if err := c.harborGetPaged(ctx, registry, "/api/v2.0/projects", url.Values{"member": []string{"true"}}, &projects); err != nil { + return nil, err + } + return projects, nil +} + +func (c *OCIClient) harborListProjectRepositories(ctx context.Context, registry *entity.Registry, projectName string) ([]harborRepository, error) { + var repositories []harborRepository + path := "/api/v2.0/projects/" + url.PathEscape(projectName) + "/repositories" + if err := c.harborGetPaged(ctx, registry, path, nil, &repositories); err != nil { + return nil, err + } + return repositories, nil +} + +func (c *OCIClient) listHarborArtifacts(ctx context.Context, registry *entity.Registry, repository, mediaTypeFilter string) ([]*entity.Artifact, error) { + projectName, repoName, ok := splitHarborRepository(repository) + if !ok { + return nil, fmt.Errorf("repository %q is not a Harbor project repository path", repository) + } + + var harborArtifacts []harborArtifact + path := "/api/v2.0/projects/" + url.PathEscape(projectName) + "/repositories/" + url.PathEscape(repoName) + "/artifacts" + query := url.Values{ + "with_tag": []string{"true"}, + "with_label": []string{"false"}, + } + if err := c.harborGetPaged(ctx, registry, path, query, &harborArtifacts); err != nil { + return nil, err + } + + artifacts := make([]*entity.Artifact, 0) + for _, harborArtifact := range harborArtifacts { + tags := harborArtifact.Tags + if len(tags) == 0 { + continue + } + + for _, tag := range tags { + if strings.TrimSpace(tag.Name) == "" { + continue + } + artifact := &entity.Artifact{ + Repository: repository, + Tag: tag.Name, + Digest: harborArtifact.Digest, + MediaType: harborArtifact.MediaType, + ConfigType: harborArtifact.ArtifactType, + Size: harborArtifact.Size, + Annotations: harborArtifact.Annotations, + CreatedAt: parseHarborTime(firstNonEmpty(tag.PushTime, harborArtifact.PushTime)), + } + if artifact.Annotations == nil { + artifact.Annotations = make(map[string]string) + } + + artifact.DetermineType() + if isHarborChartArtifact(harborArtifact) { + artifact.Type = entity.ArtifactTypeChart + } + + if c.shouldIncludeArtifact(artifact, mediaTypeFilter) { + artifacts = append(artifacts, artifact) + } + } + } + + return artifacts, nil +} + +func (c *OCIClient) harborGetPaged(ctx context.Context, registry *entity.Registry, path string, query url.Values, target interface{}) error { + const pageSize = 100 + + accumulated := make([]json.RawMessage, 0) + for page := 1; ; page++ { + pageQuery := cloneValues(query) + pageQuery.Set("page", fmt.Sprintf("%d", page)) + pageQuery.Set("page_size", fmt.Sprintf("%d", pageSize)) + + body, total, err := c.harborGet(ctx, registry, path, pageQuery) + if err != nil { + return err + } + + var pageItems []json.RawMessage + if err := json.Unmarshal(body, &pageItems); err != nil { + return fmt.Errorf("failed to decode Harbor response for %s: %w", path, err) + } + accumulated = append(accumulated, pageItems...) + + if len(pageItems) < pageSize || (total >= 0 && len(accumulated) >= total) { + break + } + } + + combined, err := json.Marshal(accumulated) + if err != nil { + return fmt.Errorf("failed to combine Harbor pages: %w", err) + } + if err := json.Unmarshal(combined, target); err != nil { + return fmt.Errorf("failed to decode Harbor pages: %w", err) + } + return nil +} + +func (c *OCIClient) harborGet(ctx context.Context, registry *entity.Registry, path string, query url.Values) ([]byte, int, error) { + baseURL, err := harborBaseURL(registry) + if err != nil { + return nil, -1, err + } + + requestURL := strings.TrimRight(baseURL, "/") + path + if len(query) > 0 { + requestURL += "?" + query.Encode() + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil) + if err != nil { + return nil, -1, err + } + req.Header.Set("Accept", "application/json") + if registry.Username != "" || registry.Password != "" { + req.SetBasicAuth(registry.Username, registry.Password) + } + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, -1, fmt.Errorf("Harbor API request failed: %w", err) + } + defer resp.Body.Close() + + body, readErr := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) + if readErr != nil { + return nil, -1, fmt.Errorf("failed to read Harbor API response: %w", readErr) + } + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return nil, -1, fmt.Errorf("Harbor API %s returned %d: %s", path, resp.StatusCode, strings.TrimSpace(string(body))) + } + + total := -1 + if value := strings.TrimSpace(resp.Header.Get("X-Total-Count")); value != "" { + if parsed, err := strconv.Atoi(value); err == nil { + total = parsed + } + } + return body, total, nil +} + +func harborBaseURL(registry *entity.Registry) (string, error) { + rawURL := strings.TrimSpace(registry.URL) + if rawURL == "" { + return "", fmt.Errorf("registry URL is empty") + } + if !strings.Contains(rawURL, "://") { + rawURL = "https://" + rawURL + } + parsed, err := url.Parse(rawURL) + if err != nil { + return "", fmt.Errorf("invalid registry URL %q: %w", registry.URL, err) + } + if parsed.Scheme == "" || parsed.Host == "" { + return "", fmt.Errorf("invalid registry URL %q", registry.URL) + } + return parsed.Scheme + "://" + parsed.Host, nil +} + +func splitHarborRepository(repository string) (string, string, bool) { + projectName, repoName, ok := strings.Cut(strings.Trim(repository, "/"), "/") + if !ok || projectName == "" || repoName == "" { + return "", "", false + } + return projectName, repoName, true +} + +func normalizeHarborRepositoryName(projectName, repositoryName string) string { + repositoryName = strings.Trim(repositoryName, "/") + if repositoryName == "" { + return "" + } + if strings.HasPrefix(repositoryName, projectName+"/") { + return repositoryName + } + return projectName + "/" + repositoryName +} + +func isHarborChartArtifact(artifact harborArtifact) bool { + typeInfo := strings.ToLower(strings.TrimSpace(artifact.ArtifactType + " " + artifact.MediaType)) + return strings.Contains(typeInfo, "chart") || strings.Contains(typeInfo, "helm") +} + +func cloneValues(values url.Values) url.Values { + cloned := make(url.Values) + for key, items := range values { + cloned[key] = append([]string(nil), items...) + } + return cloned +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if strings.TrimSpace(value) != "" { + return value + } + } + return "" +} + +func parseHarborTime(value string) time.Time { + value = strings.TrimSpace(value) + if value == "" { + return time.Time{} + } + for _, layout := range []string{time.RFC3339Nano, time.RFC3339, "2006-01-02T15:04:05.999999", "2006-01-02T15:04:05"} { + if parsed, err := time.Parse(layout, value); err == nil { + return parsed + } + } + return time.Time{} +} + // ListArtifacts 列出指定 repository 的所有 artifacts // mediaTypeFilter: "all", "image", "chart", "other" - 使用模糊匹配过滤 func (c *OCIClient) ListArtifacts(ctx context.Context, registry *entity.Registry, repository, mediaTypeFilter string) ([]*entity.Artifact, error) { + if artifacts, err := c.listHarborArtifacts(ctx, registry, repository, mediaTypeFilter); err == nil { + return artifacts, nil + } + reg, err := c.getRegistry(registry) if err != nil { return nil, err @@ -370,6 +693,113 @@ func (c *OCIClient) GetValuesSchema(ctx context.Context, registry *entity.Regist return "", entity.ErrValuesSchemaNotFound } +// GetValuesYAML 获取 Helm Chart 包内原始 values.yaml +func (c *OCIClient) GetValuesYAML(ctx context.Context, registry *entity.Registry, repository, reference string) (string, error) { + data, err := c.readChartFile(ctx, registry, repository, reference, "values.yaml") + if err != nil { + return "", err + } + if strings.TrimSpace(data) == "" { + return "", entity.ErrArtifactNotFound + } + return data, nil +} + +func (c *OCIClient) readChartFile(ctx context.Context, registry *entity.Registry, repository, reference, filename string) (string, error) { + reg, err := c.getRegistry(registry) + if err != nil { + return "", err + } + + repo, err := reg.Repository(ctx, repository) + if err != nil { + return "", fmt.Errorf("failed to get repository: %w", err) + } + + desc, err := repo.Resolve(ctx, reference) + if err != nil { + return "", fmt.Errorf("failed to resolve artifact: %w", err) + } + + manifestReader, err := repo.Fetch(ctx, desc) + if err != nil { + return "", fmt.Errorf("failed to fetch manifest: %w", err) + } + defer manifestReader.Close() + + manifestBytes, err := io.ReadAll(manifestReader) + if err != nil { + return "", fmt.Errorf("failed to read manifest: %w", err) + } + + var manifest ocispec.Manifest + if err := json.Unmarshal(manifestBytes, &manifest); err != nil { + return "", fmt.Errorf("failed to unmarshal manifest: %w", err) + } + + var chartLayer *ocispec.Descriptor + for i := range manifest.Layers { + layer := manifest.Layers[i] + if strings.Contains(layer.MediaType, "cncf.helm.chart") || + strings.Contains(layer.MediaType, "helm.chart.content") { + chartLayer = &manifest.Layers[i] + break + } + } + if chartLayer == nil { + return "", fmt.Errorf("helm chart layer not found in manifest") + } + if chartLayer.Digest == "" { + return "", fmt.Errorf("chart layer digest is empty") + } + if _, err := digest.Parse(string(chartLayer.Digest)); err != nil { + return "", fmt.Errorf("invalid chart layer digest: %w", err) + } + + layerReader, err := repo.Fetch(ctx, *chartLayer) + if err != nil { + return "", fmt.Errorf("failed to fetch chart layer: %w", err) + } + defer layerReader.Close() + + gzipReader, err := gzip.NewReader(layerReader) + if err != nil { + return "", fmt.Errorf("failed to create gzip reader: %w", err) + } + defer gzipReader.Close() + + tarReader := tar.NewReader(gzipReader) + bestDepth := int(^uint(0) >> 1) + var bestData []byte + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return "", fmt.Errorf("failed to read chart archive: %w", err) + } + if header.Typeflag != tar.TypeReg { + continue + } + if strings.HasSuffix(header.Name, filename) { + data, err := io.ReadAll(tarReader) + if err != nil { + return "", fmt.Errorf("failed to read %s: %w", filename, err) + } + depth := strings.Count(strings.Trim(header.Name, "/"), "/") + if depth < bestDepth { + bestDepth = depth + bestData = data + } + } + } + if len(bestData) > 0 { + return string(bestData), nil + } + return "", fmt.Errorf("%s not found in chart", filename) +} + // PullArtifact 下载 artifact 到本地 func (c *OCIClient) PullArtifact(ctx context.Context, registry *entity.Registry, repository, reference, destPath string) error { reg, err := c.getRegistry(registry) diff --git a/backend/internal/adapter/output/persistence/mock/cluster_repository_mock.go b/backend/internal/adapter/output/persistence/mock/cluster_repository_mock.go index 90e031b..0ba1fed 100644 --- a/backend/internal/adapter/output/persistence/mock/cluster_repository_mock.go +++ b/backend/internal/adapter/output/persistence/mock/cluster_repository_mock.go @@ -3,7 +3,7 @@ package mock import ( "context" "sync" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" "github.com/ocdp/cluster-service/internal/pkg/crypto" @@ -27,21 +27,21 @@ func NewClusterRepositoryMock(encryptor crypto.Encryptor) repository.ClusterRepo func (r *ClusterRepositoryMock) Create(ctx context.Context, cluster *entity.Cluster) error { r.mu.Lock() defer r.mu.Unlock() - + // 检查名称是否已存在 for _, c := range r.clusters { if c.Name == cluster.Name { return entity.ErrClusterExists } } - + // Mock 模式:如果没有提供认证信息,自动填充默认的 Mock 证书 if (cluster.CertData == "" || cluster.KeyData == "") && cluster.Token == "" { cluster.CAData = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1vY2sgQ0EgQ2VydGlmaWNhdGUKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQ==" cluster.CertData = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1vY2sgQ2xpZW50IENlcnRpZmljYXRlCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0=" cluster.KeyData = "LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNb2NrIFByaXZhdGUgS2V5Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0t" } - + // 加密敏感数据后存储 encryptedCluster := r.encryptCluster(cluster) r.clusters[cluster.ID] = encryptedCluster @@ -51,12 +51,12 @@ func (r *ClusterRepositoryMock) Create(ctx context.Context, cluster *entity.Clus func (r *ClusterRepositoryMock) GetByID(ctx context.Context, id string) (*entity.Cluster, error) { r.mu.RLock() defer r.mu.RUnlock() - + cluster, exists := r.clusters[id] if !exists { return nil, entity.ErrClusterNotFound } - + // 解密敏感数据后返回 return r.decryptCluster(cluster), nil } @@ -64,25 +64,25 @@ func (r *ClusterRepositoryMock) GetByID(ctx context.Context, id string) (*entity func (r *ClusterRepositoryMock) GetByName(ctx context.Context, name string) (*entity.Cluster, error) { r.mu.RLock() defer r.mu.RUnlock() - + for _, cluster := range r.clusters { if cluster.Name == name { // 解密敏感数据后返回 return r.decryptCluster(cluster), nil } } - + return nil, entity.ErrClusterNotFound } func (r *ClusterRepositoryMock) Update(ctx context.Context, cluster *entity.Cluster) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.clusters[cluster.ID]; !exists { return entity.ErrClusterNotFound } - + // 加密敏感数据后存储 encryptedCluster := r.encryptCluster(cluster) r.clusters[cluster.ID] = encryptedCluster @@ -92,11 +92,11 @@ func (r *ClusterRepositoryMock) Update(ctx context.Context, cluster *entity.Clus func (r *ClusterRepositoryMock) Delete(ctx context.Context, id string) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.clusters[id]; !exists { return entity.ErrClusterNotFound } - + delete(r.clusters, id) return nil } @@ -104,20 +104,20 @@ func (r *ClusterRepositoryMock) Delete(ctx context.Context, id string) error { func (r *ClusterRepositoryMock) List(ctx context.Context) ([]*entity.Cluster, error) { r.mu.RLock() defer r.mu.RUnlock() - + clusters := make([]*entity.Cluster, 0, len(r.clusters)) for _, cluster := range r.clusters { // 解密敏感数据后返回 clusters = append(clusters, r.decryptCluster(cluster)) } - + return clusters, nil } // encryptCluster 加密 Cluster 的敏感数据 func (r *ClusterRepositoryMock) encryptCluster(cluster *entity.Cluster) *entity.Cluster { encrypted := *cluster // 复制 - + // 加密证书数据 if cluster.CAData != "" && !crypto.IsEncrypted(cluster.CAData) { if encryptedData, err := r.encryptor.Encrypt(cluster.CAData); err == nil { @@ -139,14 +139,14 @@ func (r *ClusterRepositoryMock) encryptCluster(cluster *entity.Cluster) *entity. encrypted.Token = encryptedData } } - + return &encrypted } // decryptCluster 解密 Cluster 的敏感数据 func (r *ClusterRepositoryMock) decryptCluster(cluster *entity.Cluster) *entity.Cluster { decrypted := *cluster // 复制 - + // 解密证书数据 if cluster.CAData != "" && crypto.IsEncrypted(cluster.CAData) { if decryptedData, err := r.encryptor.Decrypt(cluster.CAData); err == nil { @@ -168,7 +168,6 @@ func (r *ClusterRepositoryMock) decryptCluster(cluster *entity.Cluster) *entity. decrypted.Token = decryptedData } } - + return &decrypted } - diff --git a/backend/internal/adapter/output/persistence/mock/instance_repository_mock.go b/backend/internal/adapter/output/persistence/mock/instance_repository_mock.go index 907401d..c89e4ea 100644 --- a/backend/internal/adapter/output/persistence/mock/instance_repository_mock.go +++ b/backend/internal/adapter/output/persistence/mock/instance_repository_mock.go @@ -3,7 +3,7 @@ package mock import ( "context" "sync" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" ) @@ -24,14 +24,14 @@ func NewInstanceRepositoryMock() repository.InstanceRepository { func (r *InstanceRepositoryMock) Create(ctx context.Context, instance *entity.Instance) error { r.mu.Lock() defer r.mu.Unlock() - + // 检查同一集群中名称是否已存在 for _, inst := range r.instances { if inst.ClusterID == instance.ClusterID && inst.Name == instance.Name { return entity.ErrInstanceExists } } - + r.instances[instance.ID] = instance return nil } @@ -39,36 +39,36 @@ func (r *InstanceRepositoryMock) Create(ctx context.Context, instance *entity.In func (r *InstanceRepositoryMock) GetByID(ctx context.Context, id string) (*entity.Instance, error) { r.mu.RLock() defer r.mu.RUnlock() - + instance, exists := r.instances[id] if !exists { return nil, entity.ErrInstanceNotFound } - + return instance, nil } func (r *InstanceRepositoryMock) GetByClusterAndName(ctx context.Context, clusterID, name string) (*entity.Instance, error) { r.mu.RLock() defer r.mu.RUnlock() - + for _, instance := range r.instances { if instance.ClusterID == clusterID && instance.Name == name { return instance, nil } } - + return nil, entity.ErrInstanceNotFound } func (r *InstanceRepositoryMock) Update(ctx context.Context, instance *entity.Instance) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.instances[instance.ID]; !exists { return entity.ErrInstanceNotFound } - + r.instances[instance.ID] = instance return nil } @@ -76,11 +76,11 @@ func (r *InstanceRepositoryMock) Update(ctx context.Context, instance *entity.In func (r *InstanceRepositoryMock) Delete(ctx context.Context, id string) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.instances[id]; !exists { return entity.ErrInstanceNotFound } - + delete(r.instances, id) return nil } @@ -88,26 +88,25 @@ func (r *InstanceRepositoryMock) Delete(ctx context.Context, id string) error { func (r *InstanceRepositoryMock) ListByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) { r.mu.RLock() defer r.mu.RUnlock() - + instances := make([]*entity.Instance, 0) for _, instance := range r.instances { if instance.ClusterID == clusterID { instances = append(instances, instance) } } - + return instances, nil } func (r *InstanceRepositoryMock) List(ctx context.Context) ([]*entity.Instance, error) { r.mu.RLock() defer r.mu.RUnlock() - + instances := make([]*entity.Instance, 0, len(r.instances)) for _, instance := range r.instances { instances = append(instances, instance) } - + return instances, nil } - diff --git a/backend/internal/adapter/output/persistence/mock/registry_repository_mock.go b/backend/internal/adapter/output/persistence/mock/registry_repository_mock.go index 536b09d..55e77ea 100644 --- a/backend/internal/adapter/output/persistence/mock/registry_repository_mock.go +++ b/backend/internal/adapter/output/persistence/mock/registry_repository_mock.go @@ -3,7 +3,7 @@ package mock import ( "context" "sync" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" "github.com/ocdp/cluster-service/internal/pkg/crypto" @@ -27,14 +27,14 @@ func NewRegistryRepositoryMock(encryptor crypto.Encryptor) repository.RegistryRe func (r *RegistryRepositoryMock) Create(ctx context.Context, registry *entity.Registry) error { r.mu.Lock() defer r.mu.Unlock() - + // 检查名称是否已存在 for _, reg := range r.registries { if reg.Name == registry.Name { return entity.ErrRegistryExists } } - + // 加密敏感数据后存储 encryptedRegistry := r.encryptRegistry(registry) r.registries[registry.ID] = encryptedRegistry @@ -44,12 +44,12 @@ func (r *RegistryRepositoryMock) Create(ctx context.Context, registry *entity.Re func (r *RegistryRepositoryMock) GetByID(ctx context.Context, id string) (*entity.Registry, error) { r.mu.RLock() defer r.mu.RUnlock() - + registry, exists := r.registries[id] if !exists { return nil, entity.ErrRegistryNotFound } - + // 解密敏感数据后返回 return r.decryptRegistry(registry), nil } @@ -57,25 +57,25 @@ func (r *RegistryRepositoryMock) GetByID(ctx context.Context, id string) (*entit func (r *RegistryRepositoryMock) GetByName(ctx context.Context, name string) (*entity.Registry, error) { r.mu.RLock() defer r.mu.RUnlock() - + for _, registry := range r.registries { if registry.Name == name { // 解密敏感数据后返回 return r.decryptRegistry(registry), nil } } - + return nil, entity.ErrRegistryNotFound } func (r *RegistryRepositoryMock) Update(ctx context.Context, registry *entity.Registry) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.registries[registry.ID]; !exists { return entity.ErrRegistryNotFound } - + // 加密敏感数据后存储 encryptedRegistry := r.encryptRegistry(registry) r.registries[registry.ID] = encryptedRegistry @@ -85,11 +85,11 @@ func (r *RegistryRepositoryMock) Update(ctx context.Context, registry *entity.Re func (r *RegistryRepositoryMock) Delete(ctx context.Context, id string) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.registries[id]; !exists { return entity.ErrRegistryNotFound } - + delete(r.registries, id) return nil } @@ -97,41 +97,40 @@ func (r *RegistryRepositoryMock) Delete(ctx context.Context, id string) error { func (r *RegistryRepositoryMock) List(ctx context.Context) ([]*entity.Registry, error) { r.mu.RLock() defer r.mu.RUnlock() - + registries := make([]*entity.Registry, 0, len(r.registries)) for _, registry := range r.registries { // 解密敏感数据后返回 registries = append(registries, r.decryptRegistry(registry)) } - + return registries, nil } // encryptRegistry 加密 Registry 的敏感数据 func (r *RegistryRepositoryMock) encryptRegistry(registry *entity.Registry) *entity.Registry { encrypted := *registry // 复制 - + // 加密密码 if registry.Password != "" && !crypto.IsEncrypted(registry.Password) { if encryptedPassword, err := r.encryptor.Encrypt(registry.Password); err == nil { encrypted.Password = encryptedPassword } } - + return &encrypted } // decryptRegistry 解密 Registry 的敏感数据 func (r *RegistryRepositoryMock) decryptRegistry(registry *entity.Registry) *entity.Registry { decrypted := *registry // 复制 - + // 解密密码 if registry.Password != "" && crypto.IsEncrypted(registry.Password) { if decryptedPassword, err := r.encryptor.Decrypt(registry.Password); err == nil { decrypted.Password = decryptedPassword } } - + return &decrypted } - diff --git a/backend/internal/adapter/output/persistence/mock/user_repository_mock.go b/backend/internal/adapter/output/persistence/mock/user_repository_mock.go index 3dc5cf5..076070d 100644 --- a/backend/internal/adapter/output/persistence/mock/user_repository_mock.go +++ b/backend/internal/adapter/output/persistence/mock/user_repository_mock.go @@ -3,7 +3,7 @@ package mock import ( "context" "sync" - + "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" ) @@ -24,14 +24,14 @@ func NewUserRepositoryMock() repository.UserRepository { func (r *UserRepositoryMock) Create(ctx context.Context, user *entity.User) error { r.mu.Lock() defer r.mu.Unlock() - + // 检查是否已存在 for _, u := range r.users { if u.Username == user.Username { return entity.ErrUserExists } } - + r.users[user.ID] = user return nil } @@ -39,36 +39,36 @@ func (r *UserRepositoryMock) Create(ctx context.Context, user *entity.User) erro func (r *UserRepositoryMock) GetByID(ctx context.Context, id string) (*entity.User, error) { r.mu.RLock() defer r.mu.RUnlock() - + user, exists := r.users[id] if !exists { return nil, entity.ErrUserNotFound } - + return user, nil } func (r *UserRepositoryMock) GetByUsername(ctx context.Context, username string) (*entity.User, error) { r.mu.RLock() defer r.mu.RUnlock() - + for _, user := range r.users { if user.Username == username { return user, nil } } - + return nil, entity.ErrUserNotFound } func (r *UserRepositoryMock) Update(ctx context.Context, user *entity.User) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.users[user.ID]; !exists { return entity.ErrUserNotFound } - + r.users[user.ID] = user return nil } @@ -76,24 +76,34 @@ func (r *UserRepositoryMock) Update(ctx context.Context, user *entity.User) erro func (r *UserRepositoryMock) Delete(ctx context.Context, id string) error { r.mu.Lock() defer r.mu.Unlock() - + if _, exists := r.users[id]; !exists { return entity.ErrUserNotFound } - + delete(r.users, id) return nil } +func (r *UserRepositoryMock) AdminExists(ctx context.Context) (bool, error) { + r.mu.RLock() + defer r.mu.RUnlock() + for _, u := range r.users { + if u.Role == "admin" { + return true, nil + } + } + return false, nil +} + func (r *UserRepositoryMock) List(ctx context.Context) ([]*entity.User, error) { r.mu.RLock() defer r.mu.RUnlock() - + users := make([]*entity.User, 0, len(r.users)) for _, user := range r.users { users = append(users, user) } - + return users, nil } - diff --git a/backend/internal/adapter/output/persistence/mock/workspace_repository_mock.go b/backend/internal/adapter/output/persistence/mock/workspace_repository_mock.go new file mode 100644 index 0000000..5d24789 --- /dev/null +++ b/backend/internal/adapter/output/persistence/mock/workspace_repository_mock.go @@ -0,0 +1,186 @@ +package mock + +import ( + "context" + "sync" + + "github.com/google/uuid" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +type WorkspaceRepositoryMock struct { + mu sync.RWMutex + workspaces map[string]*entity.Workspace +} + +func NewWorkspaceRepositoryMock() repository.WorkspaceRepository { + repo := &WorkspaceRepositoryMock{workspaces: make(map[string]*entity.Workspace)} + defaultWorkspace := entity.NewWorkspace(entity.DefaultWorkspaceName, "") + defaultWorkspace.ID = entity.DefaultWorkspaceID + repo.workspaces[defaultWorkspace.ID] = defaultWorkspace + return repo +} + +func (r *WorkspaceRepositoryMock) Create(ctx context.Context, workspace *entity.Workspace) error { + r.mu.Lock() + defer r.mu.Unlock() + if workspace.ID == "" { + workspace.ID = uuid.New().String() + } + for _, existing := range r.workspaces { + if existing.Name == workspace.Name { + return entity.ErrWorkspaceExists + } + } + copy := *workspace + r.workspaces[workspace.ID] = © + return nil +} + +func (r *WorkspaceRepositoryMock) GetByID(ctx context.Context, id string) (*entity.Workspace, error) { + r.mu.RLock() + defer r.mu.RUnlock() + workspace, ok := r.workspaces[id] + if !ok { + return nil, entity.ErrWorkspaceNotFound + } + copy := *workspace + return ©, nil +} + +func (r *WorkspaceRepositoryMock) GetByName(ctx context.Context, name string) (*entity.Workspace, error) { + r.mu.RLock() + defer r.mu.RUnlock() + for _, workspace := range r.workspaces { + if workspace.Name == name { + copy := *workspace + return ©, nil + } + } + return nil, entity.ErrWorkspaceNotFound +} + +func (r *WorkspaceRepositoryMock) Update(ctx context.Context, workspace *entity.Workspace) error { + r.mu.Lock() + defer r.mu.Unlock() + if _, ok := r.workspaces[workspace.ID]; !ok { + return entity.ErrWorkspaceNotFound + } + copy := *workspace + r.workspaces[workspace.ID] = © + return nil +} + +func (r *WorkspaceRepositoryMock) Delete(ctx context.Context, id string) error { + r.mu.Lock() + defer r.mu.Unlock() + if _, ok := r.workspaces[id]; !ok { + return entity.ErrWorkspaceNotFound + } + delete(r.workspaces, id) + return nil +} + +func (r *WorkspaceRepositoryMock) List(ctx context.Context) ([]*entity.Workspace, error) { + r.mu.RLock() + defer r.mu.RUnlock() + result := make([]*entity.Workspace, 0, len(r.workspaces)) + for _, workspace := range r.workspaces { + copy := *workspace + result = append(result, ©) + } + return result, nil +} + +type WorkspaceClusterBindingRepositoryMock struct { + mu sync.RWMutex + bindings map[string]*entity.WorkspaceClusterBinding +} + +func NewWorkspaceClusterBindingRepositoryMock() repository.WorkspaceClusterBindingRepository { + return &WorkspaceClusterBindingRepositoryMock{bindings: make(map[string]*entity.WorkspaceClusterBinding)} +} + +func bindingKey(workspaceID, clusterID string) string { + return workspaceID + "/" + clusterID +} + +func (r *WorkspaceClusterBindingRepositoryMock) Upsert(ctx context.Context, binding *entity.WorkspaceClusterBinding) error { + r.mu.Lock() + defer r.mu.Unlock() + if binding.ID == "" { + binding.ID = uuid.New().String() + } + copy := *binding + r.bindings[bindingKey(binding.WorkspaceID, binding.ClusterID)] = © + return nil +} + +func (r *WorkspaceClusterBindingRepositoryMock) Get(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error) { + r.mu.RLock() + defer r.mu.RUnlock() + binding, ok := r.bindings[bindingKey(workspaceID, clusterID)] + if !ok { + return nil, entity.ErrWorkspaceNotFound + } + copy := *binding + return ©, nil +} + +func (r *WorkspaceClusterBindingRepositoryMock) ListByWorkspace(ctx context.Context, workspaceID string) ([]*entity.WorkspaceClusterBinding, error) { + r.mu.RLock() + defer r.mu.RUnlock() + result := make([]*entity.WorkspaceClusterBinding, 0) + for _, binding := range r.bindings { + if binding.WorkspaceID != workspaceID { + continue + } + copy := *binding + result = append(result, ©) + } + return result, nil +} + +func (r *WorkspaceClusterBindingRepositoryMock) Delete(ctx context.Context, workspaceID, clusterID string) error { + r.mu.Lock() + defer r.mu.Unlock() + delete(r.bindings, bindingKey(workspaceID, clusterID)) + return nil +} + +type AuditLogRepositoryMock struct { + mu sync.RWMutex + logs []*entity.AuditLog +} + +func NewAuditLogRepositoryMock() repository.AuditLogRepository { + return &AuditLogRepositoryMock{logs: make([]*entity.AuditLog, 0)} +} + +func (r *AuditLogRepositoryMock) Create(ctx context.Context, logEntry *entity.AuditLog) error { + r.mu.Lock() + defer r.mu.Unlock() + if logEntry.ID == "" { + logEntry.ID = uuid.New().String() + } + copy := *logEntry + r.logs = append(r.logs, ©) + return nil +} + +func (r *AuditLogRepositoryMock) ListByWorkspace(ctx context.Context, workspaceID string, limit int) ([]*entity.AuditLog, error) { + r.mu.RLock() + defer r.mu.RUnlock() + result := make([]*entity.AuditLog, 0) + for i := len(r.logs) - 1; i >= 0; i-- { + if r.logs[i].WorkspaceID == workspaceID { + copy := *r.logs[i] + result = append(result, ©) + if limit > 0 && len(result) >= limit { + break + } + } + } + return result, nil +} diff --git a/backend/internal/adapter/output/persistence/postgres/cluster_repository.go b/backend/internal/adapter/output/persistence/postgres/cluster_repository.go index fcd9f6d..93cb03f 100644 --- a/backend/internal/adapter/output/persistence/postgres/cluster_repository.go +++ b/backend/internal/adapter/output/persistence/postgres/cluster_repository.go @@ -12,54 +12,33 @@ import ( "github.com/ocdp/cluster-service/internal/pkg/crypto" ) -// ClusterRepository PostgreSQL 集群仓储实现 type ClusterRepository struct { db *DB encryptor crypto.Encryptor } -// NewClusterRepository 创建 PostgreSQL 集群仓储 func NewClusterRepository(db *DB, encryptor crypto.Encryptor) repository.ClusterRepository { - return &ClusterRepository{ - db: db, - encryptor: encryptor, - } + return &ClusterRepository{db: db, encryptor: encryptor} } -// Create 创建集群 func (r *ClusterRepository) Create(ctx context.Context, cluster *entity.Cluster) error { if cluster.ID == "" { cluster.ID = uuid.New().String() } - - // 加密敏感数据 - encryptedCAData, err := r.encryptor.Encrypt(cluster.CAData) + encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken, err := r.encryptClusterSecrets(cluster) if err != nil { - return fmt.Errorf("failed to encrypt CA data: %w", err) + return err } - - encryptedCertData, err := r.encryptor.Encrypt(cluster.CertData) - if err != nil { - return fmt.Errorf("failed to encrypt cert data: %w", err) - } - - encryptedKeyData, err := r.encryptor.Encrypt(cluster.KeyData) - if err != nil { - return fmt.Errorf("failed to encrypt key data: %w", err) - } - - encryptedToken, err := r.encryptor.Encrypt(cluster.Token) - if err != nil { - return fmt.Errorf("failed to encrypt token: %w", err) - } - query := ` - INSERT INTO clusters (id, name, host, ca_data, cert_data, key_data, token, description, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + INSERT INTO clusters + (id, workspace_id, owner_id, visibility, name, host, ca_data, cert_data, key_data, token, description, default_namespace, created_at, updated_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14) ` - _, err = r.db.conn.ExecContext(ctx, query, cluster.ID, + cluster.WorkspaceID, + cluster.OwnerID, + cluster.Visibility, cluster.Name, cluster.Host, encryptedCAData, @@ -67,160 +46,62 @@ func (r *ClusterRepository) Create(ctx context.Context, cluster *entity.Cluster) encryptedKeyData, encryptedToken, cluster.Description, + cluster.DefaultNamespace, cluster.CreatedAt, cluster.UpdatedAt, ) - if err != nil { return fmt.Errorf("failed to create cluster: %w", err) } - return nil } -// GetByID 根据 ID 获取集群 func (r *ClusterRepository) GetByID(ctx context.Context, id string) (*entity.Cluster, error) { - query := ` - SELECT id, name, host, ca_data, cert_data, key_data, token, description, created_at, updated_at - FROM clusters - WHERE id = $1 - ` - - cluster := &entity.Cluster{} - var encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken string - - err := r.db.conn.QueryRowContext(ctx, query, id).Scan( - &cluster.ID, - &cluster.Name, - &cluster.Host, - &encryptedCAData, - &encryptedCertData, - &encryptedKeyData, - &encryptedToken, - &cluster.Description, - &cluster.CreatedAt, - &cluster.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrClusterNotFound - } - if err != nil { - return nil, fmt.Errorf("failed to get cluster: %w", err) - } - - // 解密敏感数据 - cluster.CAData, err = r.encryptor.Decrypt(encryptedCAData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt CA data: %w", err) - } - - cluster.CertData, err = r.encryptor.Decrypt(encryptedCertData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt cert data: %w", err) - } - - cluster.KeyData, err = r.encryptor.Decrypt(encryptedKeyData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt key data: %w", err) - } - - cluster.Token, err = r.encryptor.Decrypt(encryptedToken) - if err != nil { - return nil, fmt.Errorf("failed to decrypt token: %w", err) - } - - return cluster, nil + return r.get(ctx, "id = $1", id) } -// GetByName 根据名称获取集群 func (r *ClusterRepository) GetByName(ctx context.Context, name string) (*entity.Cluster, error) { - query := ` - SELECT id, name, host, ca_data, cert_data, key_data, token, description, created_at, updated_at + return r.get(ctx, "name = $1", name) +} + +func (r *ClusterRepository) get(ctx context.Context, where string, arg interface{}) (*entity.Cluster, error) { + query := fmt.Sprintf(` + SELECT id, workspace_id, owner_id, visibility, name, host, ca_data, cert_data, key_data, token, description, default_namespace, created_at, updated_at FROM clusters - WHERE name = $1 - ` - - cluster := &entity.Cluster{} - var encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken string - - err := r.db.conn.QueryRowContext(ctx, query, name).Scan( - &cluster.ID, - &cluster.Name, - &cluster.Host, - &encryptedCAData, - &encryptedCertData, - &encryptedKeyData, - &encryptedToken, - &cluster.Description, - &cluster.CreatedAt, - &cluster.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrClusterNotFound - } + WHERE %s + `, where) + rows, err := r.db.conn.QueryContext(ctx, query, arg) if err != nil { return nil, fmt.Errorf("failed to get cluster: %w", err) } - - // 解密敏感数据 - cluster.CAData, err = r.encryptor.Decrypt(encryptedCAData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt CA data: %w", err) + defer rows.Close() + if !rows.Next() { + return nil, entity.ErrClusterNotFound } - - cluster.CertData, err = r.encryptor.Decrypt(encryptedCertData) + cluster, err := r.scanCluster(rows) if err != nil { - return nil, fmt.Errorf("failed to decrypt cert data: %w", err) + return nil, err } - - cluster.KeyData, err = r.encryptor.Decrypt(encryptedKeyData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt key data: %w", err) - } - - cluster.Token, err = r.encryptor.Decrypt(encryptedToken) - if err != nil { - return nil, fmt.Errorf("failed to decrypt token: %w", err) - } - return cluster, nil } -// Update 更新集群 func (r *ClusterRepository) Update(ctx context.Context, cluster *entity.Cluster) error { cluster.UpdatedAt = time.Now() - - // 加密敏感数据 - encryptedCAData, err := r.encryptor.Encrypt(cluster.CAData) + encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken, err := r.encryptClusterSecrets(cluster) if err != nil { - return fmt.Errorf("failed to encrypt CA data: %w", err) + return err } - - encryptedCertData, err := r.encryptor.Encrypt(cluster.CertData) - if err != nil { - return fmt.Errorf("failed to encrypt cert data: %w", err) - } - - encryptedKeyData, err := r.encryptor.Encrypt(cluster.KeyData) - if err != nil { - return fmt.Errorf("failed to encrypt key data: %w", err) - } - - encryptedToken, err := r.encryptor.Encrypt(cluster.Token) - if err != nil { - return fmt.Errorf("failed to encrypt token: %w", err) - } - query := ` UPDATE clusters - SET name = $1, host = $2, ca_data = $3, cert_data = $4, key_data = $5, - token = $6, description = $7, updated_at = $8 - WHERE id = $9 + SET workspace_id = $1, owner_id = $2, visibility = $3, name = $4, host = $5, + ca_data = $6, cert_data = $7, key_data = $8, token = $9, description = $10, + default_namespace = $11, updated_at = $12 + WHERE id = $13 ` - result, err := r.db.conn.ExecContext(ctx, query, + cluster.WorkspaceID, + cluster.OwnerID, + cluster.Visibility, cluster.Name, cluster.Host, encryptedCAData, @@ -228,110 +109,134 @@ func (r *ClusterRepository) Update(ctx context.Context, cluster *entity.Cluster) encryptedKeyData, encryptedToken, cluster.Description, + cluster.DefaultNamespace, cluster.UpdatedAt, cluster.ID, ) - if err != nil { return fmt.Errorf("failed to update cluster: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrClusterNotFound } - return nil } -// Delete 删除集群 func (r *ClusterRepository) Delete(ctx context.Context, id string) error { - query := `DELETE FROM clusters WHERE id = $1` - - result, err := r.db.conn.ExecContext(ctx, query, id) + result, err := r.db.conn.ExecContext(ctx, `DELETE FROM clusters WHERE id = $1`, id) if err != nil { return fmt.Errorf("failed to delete cluster: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrClusterNotFound } - return nil } -// List 列出所有集群 func (r *ClusterRepository) List(ctx context.Context) ([]*entity.Cluster, error) { query := ` - SELECT id, name, host, ca_data, cert_data, key_data, token, description, created_at, updated_at + SELECT id, workspace_id, owner_id, visibility, name, host, ca_data, cert_data, key_data, token, description, default_namespace, created_at, updated_at FROM clusters ORDER BY created_at DESC ` - rows, err := r.db.conn.QueryContext(ctx, query) if err != nil { return nil, fmt.Errorf("failed to list clusters: %w", err) } defer rows.Close() - clusters := make([]*entity.Cluster, 0) for rows.Next() { - cluster := &entity.Cluster{} - var encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken string - - err := rows.Scan( - &cluster.ID, - &cluster.Name, - &cluster.Host, - &encryptedCAData, - &encryptedCertData, - &encryptedKeyData, - &encryptedToken, - &cluster.Description, - &cluster.CreatedAt, - &cluster.UpdatedAt, - ) + cluster, err := r.scanCluster(rows) if err != nil { - return nil, fmt.Errorf("failed to scan cluster: %w", err) + return nil, err } - - // 解密敏感数据 - cluster.CAData, err = r.encryptor.Decrypt(encryptedCAData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt CA data: %w", err) - } - - cluster.CertData, err = r.encryptor.Decrypt(encryptedCertData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt cert data: %w", err) - } - - cluster.KeyData, err = r.encryptor.Decrypt(encryptedKeyData) - if err != nil { - return nil, fmt.Errorf("failed to decrypt key data: %w", err) - } - - cluster.Token, err = r.encryptor.Decrypt(encryptedToken) - if err != nil { - return nil, fmt.Errorf("failed to decrypt token: %w", err) - } - clusters = append(clusters, cluster) } - if err := rows.Err(); err != nil { return nil, fmt.Errorf("rows iteration error: %w", err) } - return clusters, nil } +type clusterScanner interface { + Scan(dest ...interface{}) error +} + +func (r *ClusterRepository) scanCluster(scanner clusterScanner) (*entity.Cluster, error) { + cluster := &entity.Cluster{} + var encryptedCAData, encryptedCertData, encryptedKeyData, encryptedToken sql.NullString + var defaultNamespace sql.NullString + err := scanner.Scan( + &cluster.ID, + &cluster.WorkspaceID, + &cluster.OwnerID, + &cluster.Visibility, + &cluster.Name, + &cluster.Host, + &encryptedCAData, + &encryptedCertData, + &encryptedKeyData, + &encryptedToken, + &cluster.Description, + &defaultNamespace, + &cluster.CreatedAt, + &cluster.UpdatedAt, + ) + if err != nil { + return nil, fmt.Errorf("failed to scan cluster: %w", err) + } + cluster.DefaultNamespace = defaultNamespace.String + var decryptErr error + cluster.CAData, decryptErr = decryptMaybe(r.encryptor, encryptedCAData.String) + if decryptErr != nil { + return nil, fmt.Errorf("failed to decrypt CA data: %w", decryptErr) + } + cluster.CertData, decryptErr = decryptMaybe(r.encryptor, encryptedCertData.String) + if decryptErr != nil { + return nil, fmt.Errorf("failed to decrypt cert data: %w", decryptErr) + } + cluster.KeyData, decryptErr = decryptMaybe(r.encryptor, encryptedKeyData.String) + if decryptErr != nil { + return nil, fmt.Errorf("failed to decrypt key data: %w", decryptErr) + } + cluster.Token, decryptErr = decryptMaybe(r.encryptor, encryptedToken.String) + if decryptErr != nil { + return nil, fmt.Errorf("failed to decrypt token: %w", decryptErr) + } + return cluster, nil +} + +func (r *ClusterRepository) encryptClusterSecrets(cluster *entity.Cluster) (string, string, string, string, error) { + ca, err := r.encryptor.Encrypt(cluster.CAData) + if err != nil { + return "", "", "", "", fmt.Errorf("failed to encrypt CA data: %w", err) + } + cert, err := r.encryptor.Encrypt(cluster.CertData) + if err != nil { + return "", "", "", "", fmt.Errorf("failed to encrypt cert data: %w", err) + } + key, err := r.encryptor.Encrypt(cluster.KeyData) + if err != nil { + return "", "", "", "", fmt.Errorf("failed to encrypt key data: %w", err) + } + token, err := r.encryptor.Encrypt(cluster.Token) + if err != nil { + return "", "", "", "", fmt.Errorf("failed to encrypt token: %w", err) + } + return ca, cert, key, token, nil +} + +func decryptMaybe(encryptor crypto.Encryptor, value string) (string, error) { + if value == "" { + return "", nil + } + return encryptor.Decrypt(value) +} diff --git a/backend/internal/adapter/output/persistence/postgres/db.go b/backend/internal/adapter/output/persistence/postgres/db.go index 67fcc76..b45bd64 100644 --- a/backend/internal/adapter/output/persistence/postgres/db.go +++ b/backend/internal/adapter/output/persistence/postgres/db.go @@ -53,21 +53,69 @@ func (db *DB) GetConn() *sql.DB { // InitSchema 初始化数据库 schema func (db *DB) InitSchema() error { schema := ` + -- Workspaces 表 + CREATE TABLE IF NOT EXISTS workspaces ( + id VARCHAR(36) PRIMARY KEY, + name VARCHAR(255) NOT NULL UNIQUE, + status VARCHAR(50) NOT NULL DEFAULT 'active', + k8s_namespace VARCHAR(255) NOT NULL, + k8s_sa_name VARCHAR(255) NOT NULL, + default_cluster_id VARCHAR(36), + quota_cpu VARCHAR(50), + quota_memory VARCHAR(50), + quota_gpu VARCHAR(50), + quota_gpu_memory VARCHAR(50), + created_by VARCHAR(36), + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ); + + ALTER TABLE workspaces + ADD COLUMN IF NOT EXISTS default_cluster_id VARCHAR(36), + ADD COLUMN IF NOT EXISTS quota_cpu VARCHAR(50), + ADD COLUMN IF NOT EXISTS quota_memory VARCHAR(50), + ADD COLUMN IF NOT EXISTS quota_gpu VARCHAR(50), + ADD COLUMN IF NOT EXISTS quota_gpu_memory VARCHAR(50); + + INSERT INTO workspaces (id, name, status, k8s_namespace, k8s_sa_name, created_at, updated_at) + VALUES ('00000000-0000-0000-0000-000000000010', 'default', 'active', 'ocdp-ws-default', 'ocdp-ws-default', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + ON CONFLICT (id) DO NOTHING; + -- Users 表 CREATE TABLE IF NOT EXISTS users ( id VARCHAR(36) PRIMARY KEY, username VARCHAR(255) NOT NULL UNIQUE, password_hash TEXT NOT NULL, email VARCHAR(255) NOT NULL, + role VARCHAR(50) NOT NULL DEFAULT 'user', + workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + is_active BOOLEAN NOT NULL DEFAULT TRUE, + must_change_password BOOLEAN NOT NULL DEFAULT FALSE, + revoked_after TIMESTAMP NOT NULL DEFAULT '1970-01-01 00:00:00', created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); + ALTER TABLE users + ADD COLUMN IF NOT EXISTS role VARCHAR(50) NOT NULL DEFAULT 'user', + ADD COLUMN IF NOT EXISTS workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + ADD COLUMN IF NOT EXISTS is_active BOOLEAN NOT NULL DEFAULT TRUE, + ADD COLUMN IF NOT EXISTS must_change_password BOOLEAN NOT NULL DEFAULT FALSE, + ADD COLUMN IF NOT EXISTS revoked_after TIMESTAMP NOT NULL DEFAULT '1970-01-01 00:00:00'; + + UPDATE users SET role = 'admin' WHERE username = 'admin'; + UPDATE users SET workspace_id = '00000000-0000-0000-0000-000000000010' WHERE workspace_id = ''; + CREATE INDEX IF NOT EXISTS idx_users_username ON users(username); + CREATE INDEX IF NOT EXISTS idx_users_workspace ON users(workspace_id); + CREATE INDEX IF NOT EXISTS idx_users_revoked_after ON users(revoked_after); -- Clusters 表 CREATE TABLE IF NOT EXISTS clusters ( id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + owner_id VARCHAR(36) NOT NULL DEFAULT '', + visibility VARCHAR(50) NOT NULL DEFAULT 'private', name VARCHAR(255) NOT NULL UNIQUE, host TEXT NOT NULL, ca_data TEXT, @@ -75,15 +123,29 @@ func (db *DB) InitSchema() error { key_data TEXT, token TEXT, description TEXT, + default_namespace VARCHAR(255), created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); + ALTER TABLE clusters + ADD COLUMN IF NOT EXISTS workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + ADD COLUMN IF NOT EXISTS owner_id VARCHAR(36) NOT NULL DEFAULT '', + ADD COLUMN IF NOT EXISTS visibility VARCHAR(50) NOT NULL DEFAULT 'private', + ADD COLUMN IF NOT EXISTS default_namespace VARCHAR(255); + UPDATE clusters SET visibility = 'global_shared' WHERE visibility = 'private' AND owner_id = ''; + CREATE INDEX IF NOT EXISTS idx_clusters_name ON clusters(name); + CREATE INDEX IF NOT EXISTS idx_clusters_workspace ON clusters(workspace_id); + CREATE INDEX IF NOT EXISTS idx_clusters_owner ON clusters(owner_id); + CREATE INDEX IF NOT EXISTS idx_clusters_visibility ON clusters(visibility); -- Registries 表 CREATE TABLE IF NOT EXISTS registries ( id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + owner_id VARCHAR(36) NOT NULL DEFAULT '', + visibility VARCHAR(50) NOT NULL DEFAULT 'private', name VARCHAR(255) NOT NULL UNIQUE, url TEXT NOT NULL, description TEXT, @@ -94,11 +156,22 @@ func (db *DB) InitSchema() error { updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); + ALTER TABLE registries + ADD COLUMN IF NOT EXISTS workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + ADD COLUMN IF NOT EXISTS owner_id VARCHAR(36) NOT NULL DEFAULT '', + ADD COLUMN IF NOT EXISTS visibility VARCHAR(50) NOT NULL DEFAULT 'private'; + UPDATE registries SET visibility = 'global_shared' WHERE visibility = 'private' AND owner_id = ''; + CREATE INDEX IF NOT EXISTS idx_registries_name ON registries(name); + CREATE INDEX IF NOT EXISTS idx_registries_workspace ON registries(workspace_id); + CREATE INDEX IF NOT EXISTS idx_registries_owner ON registries(owner_id); + CREATE INDEX IF NOT EXISTS idx_registries_visibility ON registries(visibility); -- Instances 表 CREATE TABLE IF NOT EXISTS instances ( id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + owner_id VARCHAR(36) NOT NULL DEFAULT '', cluster_id VARCHAR(36) NOT NULL, name VARCHAR(255) NOT NULL, namespace VARCHAR(255) NOT NULL, @@ -121,9 +194,63 @@ func (db *DB) InitSchema() error { CONSTRAINT unique_cluster_name UNIQUE (cluster_id, name, namespace) ); + ALTER TABLE instances + ADD COLUMN IF NOT EXISTS workspace_id VARCHAR(36) NOT NULL DEFAULT '00000000-0000-0000-0000-000000000010', + ADD COLUMN IF NOT EXISTS owner_id VARCHAR(36) NOT NULL DEFAULT ''; + CREATE INDEX IF NOT EXISTS idx_instances_cluster ON instances(cluster_id); CREATE INDEX IF NOT EXISTS idx_instances_registry ON instances(registry_id); CREATE INDEX IF NOT EXISTS idx_instances_name ON instances(name); + CREATE INDEX IF NOT EXISTS idx_instances_workspace ON instances(workspace_id); + CREATE INDEX IF NOT EXISTS idx_instances_owner ON instances(owner_id); + + CREATE TABLE IF NOT EXISTS workspace_cluster_bindings ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, + cluster_id VARCHAR(36) NOT NULL REFERENCES clusters(id) ON DELETE CASCADE, + namespace VARCHAR(255) NOT NULL, + service_account VARCHAR(255) NOT NULL, + quota_cpu VARCHAR(50), + quota_memory VARCHAR(50), + quota_gpu VARCHAR(50), + quota_gpu_memory VARCHAR(50), + status VARCHAR(50) NOT NULL DEFAULT 'active', + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE (workspace_id, cluster_id) + ); + ALTER TABLE workspace_cluster_bindings + ADD COLUMN IF NOT EXISTS quota_gpu_memory VARCHAR(50); + CREATE INDEX IF NOT EXISTS idx_workspace_cluster_bindings_workspace ON workspace_cluster_bindings(workspace_id); + CREATE INDEX IF NOT EXISTS idx_workspace_cluster_bindings_cluster ON workspace_cluster_bindings(cluster_id); + + CREATE TABLE IF NOT EXISTS workspace_quotas ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, + resource_type VARCHAR(50) NOT NULL, + hard_limit VARCHAR(100) NOT NULL, + soft_limit VARCHAR(100), + used VARCHAR(100), + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE (workspace_id, resource_type) + ); + + CREATE TABLE IF NOT EXISTS audit_logs ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + user_id VARCHAR(36), + action VARCHAR(100) NOT NULL, + resource_type VARCHAR(50) NOT NULL, + resource_id VARCHAR(36), + resource_name VARCHAR(255), + details JSONB, + ip_address VARCHAR(50), + user_agent TEXT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ); + CREATE INDEX IF NOT EXISTS idx_audit_logs_workspace ON audit_logs(workspace_id); + CREATE INDEX IF NOT EXISTS idx_audit_logs_user ON audit_logs(user_id); ` _, err := db.conn.Exec(schema) diff --git a/backend/internal/adapter/output/persistence/postgres/instance_repository.go b/backend/internal/adapter/output/persistence/postgres/instance_repository.go index d12400b..2d51f98 100644 --- a/backend/internal/adapter/output/persistence/postgres/instance_repository.go +++ b/backend/internal/adapter/output/persistence/postgres/instance_repository.go @@ -12,37 +12,32 @@ import ( "github.com/ocdp/cluster-service/internal/domain/repository" ) -// InstanceRepository PostgreSQL 实例仓储实现 type InstanceRepository struct { db *DB } -// NewInstanceRepository 创建 PostgreSQL 实例仓储 func NewInstanceRepository(db *DB) repository.InstanceRepository { return &InstanceRepository{db: db} } -// Create 创建实例 func (r *InstanceRepository) Create(ctx context.Context, instance *entity.Instance) error { if instance.ID == "" { instance.ID = uuid.New().String() } - - // 将 Values 转换为 JSON valuesJSON, err := json.Marshal(instance.Values) if err != nil { return fmt.Errorf("failed to marshal values: %w", err) } - query := ` - INSERT INTO instances (id, cluster_id, name, namespace, registry_id, repository, chart, version, - description, values, values_yaml, status, status_reason, last_operation, last_error, - revision, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) + INSERT INTO instances + (id, workspace_id, owner_id, cluster_id, name, namespace, registry_id, repository, chart, version, + description, values, values_yaml, status, status_reason, last_operation, last_error, revision, created_at, updated_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20) ` - _, err = r.db.conn.ExecContext(ctx, query, instance.ID, + instance.WorkspaceID, + instance.OwnerID, instance.ClusterID, instance.Name, instance.Namespace, @@ -61,166 +56,71 @@ func (r *InstanceRepository) Create(ctx context.Context, instance *entity.Instan instance.CreatedAt, instance.UpdatedAt, ) - if err != nil { return fmt.Errorf("failed to create instance: %w", err) } - return nil } -// GetByID 根据 ID 获取实例 func (r *InstanceRepository) GetByID(ctx context.Context, id string) (*entity.Instance, error) { - query := ` - SELECT id, cluster_id, name, namespace, registry_id, repository, chart, version, - description, values, values_yaml, status, status_reason, last_operation, last_error, - revision, created_at, updated_at - FROM instances - WHERE id = $1 - ` - - instance := &entity.Instance{} - var ( - valuesJSON []byte - statusReason sql.NullString - lastOperation sql.NullString - lastError sql.NullString - ) - - err := r.db.conn.QueryRowContext(ctx, query, id).Scan( - &instance.ID, - &instance.ClusterID, - &instance.Name, - &instance.Namespace, - &instance.RegistryID, - &instance.Repository, - &instance.Chart, - &instance.Version, - &instance.Description, - &valuesJSON, - &instance.ValuesYAML, - &instance.Status, - &statusReason, - &lastOperation, - &lastError, - &instance.Revision, - &instance.CreatedAt, - &instance.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrInstanceNotFound - } - if err != nil { - return nil, fmt.Errorf("failed to get instance: %w", err) - } - - // 解析 JSON Values - if len(valuesJSON) > 0 { - if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { - return nil, fmt.Errorf("failed to unmarshal values: %w", err) - } - } - - if statusReason.Valid { - instance.StatusReason = statusReason.String - } - if lastOperation.Valid { - instance.LastOperation = entity.InstanceOperation(lastOperation.String) - } - if lastError.Valid { - instance.LastError = lastError.String - } - - return instance, nil + return r.get(ctx, "id = $1", id) } -// GetByClusterAndName 根据集群 ID 和名称获取实例 func (r *InstanceRepository) GetByClusterAndName(ctx context.Context, clusterID, name string) (*entity.Instance, error) { query := ` - SELECT id, cluster_id, name, namespace, registry_id, repository, chart, version, + SELECT id, workspace_id, owner_id, cluster_id, name, namespace, registry_id, repository, chart, version, description, values, values_yaml, status, status_reason, last_operation, last_error, revision, created_at, updated_at FROM instances WHERE cluster_id = $1 AND name = $2 ` - - instance := &entity.Instance{} - var ( - valuesJSON []byte - statusReason sql.NullString - lastOperation sql.NullString - lastError sql.NullString - ) - - err := r.db.conn.QueryRowContext(ctx, query, clusterID, name).Scan( - &instance.ID, - &instance.ClusterID, - &instance.Name, - &instance.Namespace, - &instance.RegistryID, - &instance.Repository, - &instance.Chart, - &instance.Version, - &instance.Description, - &valuesJSON, - &instance.ValuesYAML, - &instance.Status, - &statusReason, - &lastOperation, - &lastError, - &instance.Revision, - &instance.CreatedAt, - &instance.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrInstanceNotFound - } + rows, err := r.db.conn.QueryContext(ctx, query, clusterID, name) if err != nil { return nil, fmt.Errorf("failed to get instance: %w", err) } - - // 解析 JSON Values - if len(valuesJSON) > 0 { - if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { - return nil, fmt.Errorf("failed to unmarshal values: %w", err) - } + defer rows.Close() + if !rows.Next() { + return nil, entity.ErrInstanceNotFound } - - if statusReason.Valid { - instance.StatusReason = statusReason.String - } - if lastOperation.Valid { - instance.LastOperation = entity.InstanceOperation(lastOperation.String) - } - if lastError.Valid { - instance.LastError = lastError.String - } - - return instance, nil + return r.scanInstance(rows) +} + +func (r *InstanceRepository) get(ctx context.Context, where string, arg interface{}) (*entity.Instance, error) { + query := fmt.Sprintf(` + SELECT id, workspace_id, owner_id, cluster_id, name, namespace, registry_id, repository, chart, version, + description, values, values_yaml, status, status_reason, last_operation, last_error, + revision, created_at, updated_at + FROM instances + WHERE %s + `, where) + rows, err := r.db.conn.QueryContext(ctx, query, arg) + if err != nil { + return nil, fmt.Errorf("failed to get instance: %w", err) + } + defer rows.Close() + if !rows.Next() { + return nil, entity.ErrInstanceNotFound + } + return r.scanInstance(rows) } -// Update 更新实例 func (r *InstanceRepository) Update(ctx context.Context, instance *entity.Instance) error { instance.UpdatedAt = time.Now() - - // 将 Values 转换为 JSON valuesJSON, err := json.Marshal(instance.Values) if err != nil { return fmt.Errorf("failed to marshal values: %w", err) } - query := ` UPDATE instances - SET cluster_id = $1, name = $2, namespace = $3, registry_id = $4, repository = $5, - chart = $6, version = $7, description = $8, values = $9, values_yaml = $10, - status = $11, status_reason = $12, last_operation = $13, last_error = $14, - revision = $15, updated_at = $16 - WHERE id = $17 + SET workspace_id = $1, owner_id = $2, cluster_id = $3, name = $4, namespace = $5, + registry_id = $6, repository = $7, chart = $8, version = $9, description = $10, + values = $11, values_yaml = $12, status = $13, status_reason = $14, + last_operation = $15, last_error = $16, revision = $17, updated_at = $18 + WHERE id = $19 ` - result, err := r.db.conn.ExecContext(ctx, query, + instance.WorkspaceID, + instance.OwnerID, instance.ClusterID, instance.Name, instance.Namespace, @@ -239,195 +139,126 @@ func (r *InstanceRepository) Update(ctx context.Context, instance *entity.Instan instance.UpdatedAt, instance.ID, ) - if err != nil { return fmt.Errorf("failed to update instance: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrInstanceNotFound } - return nil } -// Delete 删除实例 func (r *InstanceRepository) Delete(ctx context.Context, id string) error { - query := `DELETE FROM instances WHERE id = $1` - - result, err := r.db.conn.ExecContext(ctx, query, id) + result, err := r.db.conn.ExecContext(ctx, `DELETE FROM instances WHERE id = $1`, id) if err != nil { return fmt.Errorf("failed to delete instance: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrInstanceNotFound } - return nil } -// ListByCluster 列出指定集群的所有实例 func (r *InstanceRepository) ListByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) { - query := ` - SELECT id, cluster_id, name, namespace, registry_id, repository, chart, version, - description, values, values_yaml, status, status_reason, last_operation, last_error, - revision, created_at, updated_at - FROM instances - WHERE cluster_id = $1 - ORDER BY created_at DESC - ` - - rows, err := r.db.conn.QueryContext(ctx, query, clusterID) - if err != nil { - return nil, fmt.Errorf("failed to list instances: %w", err) - } - defer rows.Close() - - instances := make([]*entity.Instance, 0) - for rows.Next() { - instance := &entity.Instance{} - var ( - valuesJSON []byte - statusReason sql.NullString - lastOperation sql.NullString - lastError sql.NullString - ) - - err := rows.Scan( - &instance.ID, - &instance.ClusterID, - &instance.Name, - &instance.Namespace, - &instance.RegistryID, - &instance.Repository, - &instance.Chart, - &instance.Version, - &instance.Description, - &valuesJSON, - &instance.ValuesYAML, - &instance.Status, - &statusReason, - &lastOperation, - &lastError, - &instance.Revision, - &instance.CreatedAt, - &instance.UpdatedAt, - ) - if err != nil { - return nil, fmt.Errorf("failed to scan instance: %w", err) - } - - // 解析 JSON Values - if len(valuesJSON) > 0 { - if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { - return nil, fmt.Errorf("failed to unmarshal values: %w", err) - } - } - - if statusReason.Valid { - instance.StatusReason = statusReason.String - } - if lastOperation.Valid { - instance.LastOperation = entity.InstanceOperation(lastOperation.String) - } - if lastError.Valid { - instance.LastError = lastError.String - } - - instances = append(instances, instance) - } - - if err := rows.Err(); err != nil { - return nil, fmt.Errorf("rows iteration error: %w", err) - } - - return instances, nil + return r.list(ctx, "WHERE cluster_id = $1", clusterID) } -// List 列出所有实例 func (r *InstanceRepository) List(ctx context.Context) ([]*entity.Instance, error) { + return r.list(ctx, "", nil) +} + +func (r *InstanceRepository) list(ctx context.Context, where string, arg interface{}) ([]*entity.Instance, error) { query := ` - SELECT id, cluster_id, name, namespace, registry_id, repository, chart, version, + SELECT id, workspace_id, owner_id, cluster_id, name, namespace, registry_id, repository, chart, version, description, values, values_yaml, status, status_reason, last_operation, last_error, revision, created_at, updated_at FROM instances + ` + where + ` ORDER BY created_at DESC ` - - rows, err := r.db.conn.QueryContext(ctx, query) + var rows *sql.Rows + var err error + if where == "" { + rows, err = r.db.conn.QueryContext(ctx, query) + } else { + rows, err = r.db.conn.QueryContext(ctx, query, arg) + } if err != nil { return nil, fmt.Errorf("failed to list instances: %w", err) } defer rows.Close() - instances := make([]*entity.Instance, 0) for rows.Next() { - instance := &entity.Instance{} - var ( - valuesJSON []byte - statusReason sql.NullString - lastOperation sql.NullString - lastError sql.NullString - ) - - err := rows.Scan( - &instance.ID, - &instance.ClusterID, - &instance.Name, - &instance.Namespace, - &instance.RegistryID, - &instance.Repository, - &instance.Chart, - &instance.Version, - &instance.Description, - &valuesJSON, - &instance.ValuesYAML, - &instance.Status, - &statusReason, - &lastOperation, - &lastError, - &instance.Revision, - &instance.CreatedAt, - &instance.UpdatedAt, - ) + instance, err := r.scanInstance(rows) if err != nil { - return nil, fmt.Errorf("failed to scan instance: %w", err) + return nil, err } - - // 解析 JSON Values - if len(valuesJSON) > 0 { - if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { - return nil, fmt.Errorf("failed to unmarshal values: %w", err) - } - } - - if statusReason.Valid { - instance.StatusReason = statusReason.String - } - if lastOperation.Valid { - instance.LastOperation = entity.InstanceOperation(lastOperation.String) - } - if lastError.Valid { - instance.LastError = lastError.String - } - instances = append(instances, instance) } - if err := rows.Err(); err != nil { return nil, fmt.Errorf("rows iteration error: %w", err) } - return instances, nil } + +type instanceScanner interface { + Scan(dest ...interface{}) error +} + +func (r *InstanceRepository) scanInstance(scanner instanceScanner) (*entity.Instance, error) { + instance := &entity.Instance{} + var ( + valuesJSON []byte + statusReason sql.NullString + lastOperation sql.NullString + lastError sql.NullString + ) + err := scanner.Scan( + &instance.ID, + &instance.WorkspaceID, + &instance.OwnerID, + &instance.ClusterID, + &instance.Name, + &instance.Namespace, + &instance.RegistryID, + &instance.Repository, + &instance.Chart, + &instance.Version, + &instance.Description, + &valuesJSON, + &instance.ValuesYAML, + &instance.Status, + &statusReason, + &lastOperation, + &lastError, + &instance.Revision, + &instance.CreatedAt, + &instance.UpdatedAt, + ) + if err != nil { + return nil, fmt.Errorf("failed to scan instance: %w", err) + } + if len(valuesJSON) > 0 { + if err := json.Unmarshal(valuesJSON, &instance.Values); err != nil { + return nil, fmt.Errorf("failed to unmarshal values: %w", err) + } + } + if statusReason.Valid { + instance.StatusReason = statusReason.String + } + if lastOperation.Valid { + instance.LastOperation = entity.InstanceOperation(lastOperation.String) + } + if lastError.Valid { + instance.LastError = lastError.String + } + return instance, nil +} diff --git a/backend/internal/adapter/output/persistence/postgres/registry_repository.go b/backend/internal/adapter/output/persistence/postgres/registry_repository.go index 78fb6fd..f791e49 100644 --- a/backend/internal/adapter/output/persistence/postgres/registry_repository.go +++ b/backend/internal/adapter/output/persistence/postgres/registry_repository.go @@ -12,39 +12,32 @@ import ( "github.com/ocdp/cluster-service/internal/pkg/crypto" ) -// RegistryRepository PostgreSQL Registry 仓储实现 type RegistryRepository struct { db *DB encryptor crypto.Encryptor } -// NewRegistryRepository 创建 PostgreSQL Registry 仓储 func NewRegistryRepository(db *DB, encryptor crypto.Encryptor) repository.RegistryRepository { - return &RegistryRepository{ - db: db, - encryptor: encryptor, - } + return &RegistryRepository{db: db, encryptor: encryptor} } -// Create 创建 Registry func (r *RegistryRepository) Create(ctx context.Context, registry *entity.Registry) error { if registry.ID == "" { registry.ID = uuid.New().String() } - - // 加密密码 encryptedPassword, err := r.encryptor.Encrypt(registry.Password) if err != nil { return fmt.Errorf("failed to encrypt password: %w", err) } - query := ` - INSERT INTO registries (id, name, url, description, username, password, insecure, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) + INSERT INTO registries (id, workspace_id, owner_id, visibility, name, url, description, username, password, insecure, created_at, updated_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12) ` - _, err = r.db.conn.ExecContext(ctx, query, registry.ID, + registry.WorkspaceID, + registry.OwnerID, + registry.Visibility, registry.Name, registry.URL, registry.Description, @@ -54,110 +47,57 @@ func (r *RegistryRepository) Create(ctx context.Context, registry *entity.Regist registry.CreatedAt, registry.UpdatedAt, ) - if err != nil { return fmt.Errorf("failed to create registry: %w", err) } - return nil } -// GetByID 根据 ID 获取 Registry func (r *RegistryRepository) GetByID(ctx context.Context, id string) (*entity.Registry, error) { - query := ` - SELECT id, name, url, description, username, password, insecure, created_at, updated_at - FROM registries - WHERE id = $1 - ` - - registry := &entity.Registry{} - var encryptedPassword string - - err := r.db.conn.QueryRowContext(ctx, query, id).Scan( - ®istry.ID, - ®istry.Name, - ®istry.URL, - ®istry.Description, - ®istry.Username, - &encryptedPassword, - ®istry.Insecure, - ®istry.CreatedAt, - ®istry.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrRegistryNotFound - } - if err != nil { - return nil, fmt.Errorf("failed to get registry: %w", err) - } - - // 解密密码 - registry.Password, err = r.encryptor.Decrypt(encryptedPassword) - if err != nil { - return nil, fmt.Errorf("failed to decrypt password: %w", err) - } - - return registry, nil + return r.get(ctx, "id = $1", id) } -// GetByName 根据名称获取 Registry func (r *RegistryRepository) GetByName(ctx context.Context, name string) (*entity.Registry, error) { - query := ` - SELECT id, name, url, description, username, password, insecure, created_at, updated_at + return r.get(ctx, "name = $1", name) +} + +func (r *RegistryRepository) get(ctx context.Context, where string, arg interface{}) (*entity.Registry, error) { + query := fmt.Sprintf(` + SELECT id, workspace_id, owner_id, visibility, name, url, description, username, password, insecure, created_at, updated_at FROM registries - WHERE name = $1 - ` - - registry := &entity.Registry{} - var encryptedPassword string - - err := r.db.conn.QueryRowContext(ctx, query, name).Scan( - ®istry.ID, - ®istry.Name, - ®istry.URL, - ®istry.Description, - ®istry.Username, - &encryptedPassword, - ®istry.Insecure, - ®istry.CreatedAt, - ®istry.UpdatedAt, - ) - - if err == sql.ErrNoRows { - return nil, entity.ErrRegistryNotFound - } + WHERE %s + `, where) + rows, err := r.db.conn.QueryContext(ctx, query, arg) if err != nil { return nil, fmt.Errorf("failed to get registry: %w", err) } - - // 解密密码 - registry.Password, err = r.encryptor.Decrypt(encryptedPassword) - if err != nil { - return nil, fmt.Errorf("failed to decrypt password: %w", err) + defer rows.Close() + if !rows.Next() { + return nil, entity.ErrRegistryNotFound + } + registry, err := r.scanRegistry(rows) + if err != nil { + return nil, err } - return registry, nil } -// Update 更新 Registry func (r *RegistryRepository) Update(ctx context.Context, registry *entity.Registry) error { registry.UpdatedAt = time.Now() - - // 加密密码 encryptedPassword, err := r.encryptor.Encrypt(registry.Password) if err != nil { return fmt.Errorf("failed to encrypt password: %w", err) } - query := ` UPDATE registries - SET name = $1, url = $2, description = $3, username = $4, password = $5, - insecure = $6, updated_at = $7 - WHERE id = $8 + SET workspace_id = $1, owner_id = $2, visibility = $3, name = $4, url = $5, + description = $6, username = $7, password = $8, insecure = $9, updated_at = $10 + WHERE id = $11 ` - result, err := r.db.conn.ExecContext(ctx, query, + registry.WorkspaceID, + registry.OwnerID, + registry.Visibility, registry.Name, registry.URL, registry.Description, @@ -167,91 +107,86 @@ func (r *RegistryRepository) Update(ctx context.Context, registry *entity.Regist registry.UpdatedAt, registry.ID, ) - if err != nil { return fmt.Errorf("failed to update registry: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrRegistryNotFound } - return nil } -// Delete 删除 Registry func (r *RegistryRepository) Delete(ctx context.Context, id string) error { - query := `DELETE FROM registries WHERE id = $1` - - result, err := r.db.conn.ExecContext(ctx, query, id) + result, err := r.db.conn.ExecContext(ctx, `DELETE FROM registries WHERE id = $1`, id) if err != nil { return fmt.Errorf("failed to delete registry: %w", err) } - rows, err := result.RowsAffected() if err != nil { return fmt.Errorf("failed to get affected rows: %w", err) } - if rows == 0 { return entity.ErrRegistryNotFound } - return nil } -// List 列出所有 Registries func (r *RegistryRepository) List(ctx context.Context) ([]*entity.Registry, error) { query := ` - SELECT id, name, url, description, username, password, insecure, created_at, updated_at + SELECT id, workspace_id, owner_id, visibility, name, url, description, username, password, insecure, created_at, updated_at FROM registries ORDER BY created_at DESC ` - rows, err := r.db.conn.QueryContext(ctx, query) if err != nil { return nil, fmt.Errorf("failed to list registries: %w", err) } defer rows.Close() - registries := make([]*entity.Registry, 0) for rows.Next() { - registry := &entity.Registry{} - var encryptedPassword string - - err := rows.Scan( - ®istry.ID, - ®istry.Name, - ®istry.URL, - ®istry.Description, - ®istry.Username, - &encryptedPassword, - ®istry.Insecure, - ®istry.CreatedAt, - ®istry.UpdatedAt, - ) + registry, err := r.scanRegistry(rows) if err != nil { - return nil, fmt.Errorf("failed to scan registry: %w", err) + return nil, err } - - // 解密密码 - registry.Password, err = r.encryptor.Decrypt(encryptedPassword) - if err != nil { - return nil, fmt.Errorf("failed to decrypt password: %w", err) - } - registries = append(registries, registry) } - if err := rows.Err(); err != nil { return nil, fmt.Errorf("rows iteration error: %w", err) } - return registries, nil } +type registryScanner interface { + Scan(dest ...interface{}) error +} + +func (r *RegistryRepository) scanRegistry(scanner registryScanner) (*entity.Registry, error) { + registry := &entity.Registry{} + var encryptedPassword sql.NullString + err := scanner.Scan( + ®istry.ID, + ®istry.WorkspaceID, + ®istry.OwnerID, + ®istry.Visibility, + ®istry.Name, + ®istry.URL, + ®istry.Description, + ®istry.Username, + &encryptedPassword, + ®istry.Insecure, + ®istry.CreatedAt, + ®istry.UpdatedAt, + ) + if err != nil { + return nil, fmt.Errorf("failed to scan registry: %w", err) + } + registry.Password, err = decryptMaybe(r.encryptor, encryptedPassword.String) + if err != nil { + return nil, fmt.Errorf("failed to decrypt password: %w", err) + } + return registry, nil +} diff --git a/backend/internal/adapter/output/persistence/postgres/user_repository.go b/backend/internal/adapter/output/persistence/postgres/user_repository.go index ea766ee..839ec89 100644 --- a/backend/internal/adapter/output/persistence/postgres/user_repository.go +++ b/backend/internal/adapter/output/persistence/postgres/user_repository.go @@ -28,8 +28,8 @@ func (r *UserRepository) Create(ctx context.Context, user *entity.User) error { } query := ` - INSERT INTO users (id, username, password_hash, email, revoked_after, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, $7) + INSERT INTO users (id, username, password_hash, email, role, workspace_id, is_active, must_change_password, revoked_after, created_at, updated_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) ` _, err := r.db.conn.ExecContext(ctx, query, @@ -37,6 +37,10 @@ func (r *UserRepository) Create(ctx context.Context, user *entity.User) error { user.Username, user.PasswordHash, user.Email, + user.Role, + user.WorkspaceID, + user.IsActive, + user.MustChangePassword, user.RevokedAfter, user.CreatedAt, user.UpdatedAt, @@ -52,7 +56,7 @@ func (r *UserRepository) Create(ctx context.Context, user *entity.User) error { // GetByID 根据 ID 获取用户 func (r *UserRepository) GetByID(ctx context.Context, id string) (*entity.User, error) { query := ` - SELECT id, username, password_hash, email, revoked_after, created_at, updated_at + SELECT id, username, password_hash, email, role, workspace_id, is_active, must_change_password, revoked_after, created_at, updated_at FROM users WHERE id = $1 ` @@ -63,6 +67,10 @@ func (r *UserRepository) GetByID(ctx context.Context, id string) (*entity.User, &user.Username, &user.PasswordHash, &user.Email, + &user.Role, + &user.WorkspaceID, + &user.IsActive, + &user.MustChangePassword, &user.RevokedAfter, &user.CreatedAt, &user.UpdatedAt, @@ -81,7 +89,7 @@ func (r *UserRepository) GetByID(ctx context.Context, id string) (*entity.User, // GetByUsername 根据用户名获取用户 func (r *UserRepository) GetByUsername(ctx context.Context, username string) (*entity.User, error) { query := ` - SELECT id, username, password_hash, email, revoked_after, created_at, updated_at + SELECT id, username, password_hash, email, role, workspace_id, is_active, must_change_password, revoked_after, created_at, updated_at FROM users WHERE username = $1 ` @@ -92,6 +100,10 @@ func (r *UserRepository) GetByUsername(ctx context.Context, username string) (*e &user.Username, &user.PasswordHash, &user.Email, + &user.Role, + &user.WorkspaceID, + &user.IsActive, + &user.MustChangePassword, &user.RevokedAfter, &user.CreatedAt, &user.UpdatedAt, @@ -108,19 +120,30 @@ func (r *UserRepository) GetByUsername(ctx context.Context, username string) (*e } // Update 更新用户 +func (r *UserRepository) AdminExists(ctx context.Context) (bool, error) { + var exists bool + err := r.db.conn.QueryRowContext(ctx, `SELECT EXISTS(SELECT 1 FROM users WHERE role = 'admin')`).Scan(&exists) + return exists, err +} + func (r *UserRepository) Update(ctx context.Context, user *entity.User) error { user.UpdatedAt = time.Now() query := ` UPDATE users - SET username = $1, password_hash = $2, email = $3, revoked_after = $4, updated_at = $5 - WHERE id = $6 + SET username = $1, password_hash = $2, email = $3, role = $4, workspace_id = $5, + is_active = $6, must_change_password = $7, revoked_after = $8, updated_at = $9 + WHERE id = $10 ` result, err := r.db.conn.ExecContext(ctx, query, user.Username, user.PasswordHash, user.Email, + user.Role, + user.WorkspaceID, + user.IsActive, + user.MustChangePassword, user.RevokedAfter, user.UpdatedAt, user.ID, @@ -166,7 +189,7 @@ func (r *UserRepository) Delete(ctx context.Context, id string) error { // List 列出所有用户 func (r *UserRepository) List(ctx context.Context) ([]*entity.User, error) { query := ` - SELECT id, username, password_hash, email, revoked_after, created_at, updated_at + SELECT id, username, password_hash, email, role, workspace_id, is_active, must_change_password, revoked_after, created_at, updated_at FROM users ORDER BY created_at DESC ` @@ -185,6 +208,10 @@ func (r *UserRepository) List(ctx context.Context) ([]*entity.User, error) { &user.Username, &user.PasswordHash, &user.Email, + &user.Role, + &user.WorkspaceID, + &user.IsActive, + &user.MustChangePassword, &user.RevokedAfter, &user.CreatedAt, &user.UpdatedAt, @@ -201,4 +228,3 @@ func (r *UserRepository) List(ctx context.Context) ([]*entity.User, error) { return users, nil } - diff --git a/backend/internal/adapter/output/persistence/postgres/workspace_repository.go b/backend/internal/adapter/output/persistence/postgres/workspace_repository.go new file mode 100644 index 0000000..4185247 --- /dev/null +++ b/backend/internal/adapter/output/persistence/postgres/workspace_repository.go @@ -0,0 +1,404 @@ +package postgres + +import ( + "context" + "database/sql" + "encoding/json" + "fmt" + "time" + + "github.com/google/uuid" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" +) + +type WorkspaceRepository struct { + db *DB +} + +func NewWorkspaceRepository(db *DB) repository.WorkspaceRepository { + return &WorkspaceRepository{db: db} +} + +func (r *WorkspaceRepository) Create(ctx context.Context, workspace *entity.Workspace) error { + if workspace.ID == "" { + workspace.ID = uuid.New().String() + } + query := ` + INSERT INTO workspaces (id, name, status, k8s_namespace, k8s_sa_name, default_cluster_id, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, created_by, created_at, updated_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) + ON CONFLICT (name) DO NOTHING + ` + result, err := r.db.conn.ExecContext(ctx, query, + workspace.ID, + workspace.Name, + workspace.Status, + workspace.K8sNamespace, + workspace.K8sSAName, + workspace.DefaultClusterID, + workspace.QuotaCPU, + workspace.QuotaMemory, + workspace.QuotaGPU, + workspace.QuotaGPUMem, + workspace.CreatedBy, + workspace.CreatedAt, + workspace.UpdatedAt, + ) + if err != nil { + return fmt.Errorf("failed to create workspace: %w", err) + } + rows, err := result.RowsAffected() + if err != nil { + return fmt.Errorf("failed to get affected rows: %w", err) + } + if rows == 0 { + return entity.ErrWorkspaceExists + } + return nil +} + +func (r *WorkspaceRepository) GetByID(ctx context.Context, id string) (*entity.Workspace, error) { + return r.get(ctx, "id = $1", id) +} + +func (r *WorkspaceRepository) GetByName(ctx context.Context, name string) (*entity.Workspace, error) { + return r.get(ctx, "name = $1", name) +} + +func (r *WorkspaceRepository) get(ctx context.Context, where string, arg interface{}) (*entity.Workspace, error) { + query := fmt.Sprintf(` + SELECT id, name, status, k8s_namespace, k8s_sa_name, default_cluster_id, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, created_by, created_at, updated_at + FROM workspaces + WHERE %s + `, where) + workspace := &entity.Workspace{} + var createdBy, defaultClusterID, quotaCPU, quotaMemory, quotaGPU, quotaGPUMem sql.NullString + err := r.db.conn.QueryRowContext(ctx, query, arg).Scan( + &workspace.ID, + &workspace.Name, + &workspace.Status, + &workspace.K8sNamespace, + &workspace.K8sSAName, + &defaultClusterID, + "aCPU, + "aMemory, + "aGPU, + "aGPUMem, + &createdBy, + &workspace.CreatedAt, + &workspace.UpdatedAt, + ) + if err == sql.ErrNoRows { + return nil, entity.ErrWorkspaceNotFound + } + if err != nil { + return nil, fmt.Errorf("failed to get workspace: %w", err) + } + workspace.CreatedBy = createdBy.String + workspace.DefaultClusterID = defaultClusterID.String + workspace.QuotaCPU = quotaCPU.String + workspace.QuotaMemory = quotaMemory.String + workspace.QuotaGPU = quotaGPU.String + workspace.QuotaGPUMem = quotaGPUMem.String + return workspace, nil +} + +func (r *WorkspaceRepository) Update(ctx context.Context, workspace *entity.Workspace) error { + workspace.UpdatedAt = time.Now() + query := ` + UPDATE workspaces + SET name = $1, status = $2, k8s_namespace = $3, k8s_sa_name = $4, + default_cluster_id = $5, + quota_cpu = $6, quota_memory = $7, quota_gpu = $8, quota_gpu_memory = $9, + created_by = $10, updated_at = $11 + WHERE id = $12 + ` + result, err := r.db.conn.ExecContext(ctx, query, + workspace.Name, + workspace.Status, + workspace.K8sNamespace, + workspace.K8sSAName, + workspace.DefaultClusterID, + workspace.QuotaCPU, + workspace.QuotaMemory, + workspace.QuotaGPU, + workspace.QuotaGPUMem, + workspace.CreatedBy, + workspace.UpdatedAt, + workspace.ID, + ) + if err != nil { + return fmt.Errorf("failed to update workspace: %w", err) + } + rows, err := result.RowsAffected() + if err != nil { + return fmt.Errorf("failed to get affected rows: %w", err) + } + if rows == 0 { + return entity.ErrWorkspaceNotFound + } + return nil +} + +func (r *WorkspaceRepository) Delete(ctx context.Context, id string) error { + result, err := r.db.conn.ExecContext(ctx, `DELETE FROM workspaces WHERE id = $1`, id) + if err != nil { + return fmt.Errorf("failed to delete workspace: %w", err) + } + rows, err := result.RowsAffected() + if err != nil { + return fmt.Errorf("failed to get affected rows: %w", err) + } + if rows == 0 { + return entity.ErrWorkspaceNotFound + } + return nil +} + +func (r *WorkspaceRepository) List(ctx context.Context) ([]*entity.Workspace, error) { + query := ` + SELECT id, name, status, k8s_namespace, k8s_sa_name, default_cluster_id, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, created_by, created_at, updated_at + FROM workspaces + ORDER BY created_at DESC + ` + rows, err := r.db.conn.QueryContext(ctx, query) + if err != nil { + return nil, fmt.Errorf("failed to list workspaces: %w", err) + } + defer rows.Close() + workspaces := make([]*entity.Workspace, 0) + for rows.Next() { + workspace := &entity.Workspace{} + var createdBy, defaultClusterID, quotaCPU, quotaMemory, quotaGPU, quotaGPUMem sql.NullString + if err := rows.Scan( + &workspace.ID, + &workspace.Name, + &workspace.Status, + &workspace.K8sNamespace, + &workspace.K8sSAName, + &defaultClusterID, + "aCPU, + "aMemory, + "aGPU, + "aGPUMem, + &createdBy, + &workspace.CreatedAt, + &workspace.UpdatedAt, + ); err != nil { + return nil, fmt.Errorf("failed to scan workspace: %w", err) + } + workspace.CreatedBy = createdBy.String + workspace.DefaultClusterID = defaultClusterID.String + workspace.QuotaCPU = quotaCPU.String + workspace.QuotaMemory = quotaMemory.String + workspace.QuotaGPU = quotaGPU.String + workspace.QuotaGPUMem = quotaGPUMem.String + workspaces = append(workspaces, workspace) + } + return workspaces, rows.Err() +} + +type WorkspaceClusterBindingRepository struct { + db *DB +} + +func NewWorkspaceClusterBindingRepository(db *DB) repository.WorkspaceClusterBindingRepository { + return &WorkspaceClusterBindingRepository{db: db} +} + +func (r *WorkspaceClusterBindingRepository) Upsert(ctx context.Context, binding *entity.WorkspaceClusterBinding) error { + if binding.ID == "" { + binding.ID = uuid.New().String() + } + now := time.Now() + if binding.CreatedAt.IsZero() { + binding.CreatedAt = now + } + binding.UpdatedAt = now + query := ` + INSERT INTO workspace_cluster_bindings + (id, workspace_id, cluster_id, namespace, service_account, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, status, created_at, updated_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12) + ON CONFLICT (workspace_id, cluster_id) + DO UPDATE SET namespace = EXCLUDED.namespace, + service_account = EXCLUDED.service_account, + quota_cpu = EXCLUDED.quota_cpu, + quota_memory = EXCLUDED.quota_memory, + quota_gpu = EXCLUDED.quota_gpu, + quota_gpu_memory = EXCLUDED.quota_gpu_memory, + status = EXCLUDED.status, + updated_at = EXCLUDED.updated_at + ` + _, err := r.db.conn.ExecContext(ctx, query, + binding.ID, + binding.WorkspaceID, + binding.ClusterID, + binding.Namespace, + binding.ServiceAccount, + binding.QuotaCPU, + binding.QuotaMemory, + binding.QuotaGPU, + binding.QuotaGPUMem, + binding.Status, + binding.CreatedAt, + binding.UpdatedAt, + ) + if err != nil { + return fmt.Errorf("failed to upsert workspace cluster binding: %w", err) + } + return nil +} + +func (r *WorkspaceClusterBindingRepository) Get(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error) { + query := ` + SELECT id, workspace_id, cluster_id, namespace, service_account, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, status, created_at, updated_at + FROM workspace_cluster_bindings + WHERE workspace_id = $1 AND cluster_id = $2 + ` + binding := &entity.WorkspaceClusterBinding{} + err := r.db.conn.QueryRowContext(ctx, query, workspaceID, clusterID).Scan( + &binding.ID, + &binding.WorkspaceID, + &binding.ClusterID, + &binding.Namespace, + &binding.ServiceAccount, + &binding.QuotaCPU, + &binding.QuotaMemory, + &binding.QuotaGPU, + &binding.QuotaGPUMem, + &binding.Status, + &binding.CreatedAt, + &binding.UpdatedAt, + ) + if err == sql.ErrNoRows { + return nil, entity.ErrWorkspaceNotFound + } + if err != nil { + return nil, fmt.Errorf("failed to get workspace cluster binding: %w", err) + } + return binding, nil +} + +func (r *WorkspaceClusterBindingRepository) ListByWorkspace(ctx context.Context, workspaceID string) ([]*entity.WorkspaceClusterBinding, error) { + query := ` + SELECT id, workspace_id, cluster_id, namespace, service_account, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, status, created_at, updated_at + FROM workspace_cluster_bindings + WHERE workspace_id = $1 + ORDER BY created_at ASC + ` + rows, err := r.db.conn.QueryContext(ctx, query, workspaceID) + if err != nil { + return nil, fmt.Errorf("failed to list workspace cluster bindings: %w", err) + } + defer rows.Close() + bindings := make([]*entity.WorkspaceClusterBinding, 0) + for rows.Next() { + binding := &entity.WorkspaceClusterBinding{} + if err := rows.Scan( + &binding.ID, + &binding.WorkspaceID, + &binding.ClusterID, + &binding.Namespace, + &binding.ServiceAccount, + &binding.QuotaCPU, + &binding.QuotaMemory, + &binding.QuotaGPU, + &binding.QuotaGPUMem, + &binding.Status, + &binding.CreatedAt, + &binding.UpdatedAt, + ); err != nil { + return nil, fmt.Errorf("failed to scan workspace cluster binding: %w", err) + } + bindings = append(bindings, binding) + } + return bindings, rows.Err() +} + +func (r *WorkspaceClusterBindingRepository) Delete(ctx context.Context, workspaceID, clusterID string) error { + _, err := r.db.conn.ExecContext(ctx, `DELETE FROM workspace_cluster_bindings WHERE workspace_id = $1 AND cluster_id = $2`, workspaceID, clusterID) + return err +} + +type AuditLogRepository struct { + db *DB +} + +func NewAuditLogRepository(db *DB) repository.AuditLogRepository { + return &AuditLogRepository{db: db} +} + +func (r *AuditLogRepository) Create(ctx context.Context, logEntry *entity.AuditLog) error { + if logEntry.ID == "" { + logEntry.ID = uuid.New().String() + } + details, err := json.Marshal(logEntry.Details) + if err != nil { + return fmt.Errorf("failed to marshal audit details: %w", err) + } + if logEntry.CreatedAt.IsZero() { + logEntry.CreatedAt = time.Now() + } + query := ` + INSERT INTO audit_logs (id, workspace_id, user_id, action, resource_type, resource_id, resource_name, details, ip_address, user_agent, created_at) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) + ` + _, err = r.db.conn.ExecContext(ctx, query, + logEntry.ID, + logEntry.WorkspaceID, + logEntry.UserID, + logEntry.Action, + logEntry.ResourceType, + logEntry.ResourceID, + logEntry.ResourceName, + string(details), + logEntry.IPAddress, + logEntry.UserAgent, + logEntry.CreatedAt, + ) + if err != nil { + return fmt.Errorf("failed to create audit log: %w", err) + } + return nil +} + +func (r *AuditLogRepository) ListByWorkspace(ctx context.Context, workspaceID string, limit int) ([]*entity.AuditLog, error) { + if limit <= 0 || limit > 500 { + limit = 100 + } + query := ` + SELECT id, workspace_id, user_id, action, resource_type, resource_id, resource_name, details, ip_address, user_agent, created_at + FROM audit_logs + WHERE workspace_id = $1 + ORDER BY created_at DESC + LIMIT $2 + ` + rows, err := r.db.conn.QueryContext(ctx, query, workspaceID, limit) + if err != nil { + return nil, fmt.Errorf("failed to list audit logs: %w", err) + } + defer rows.Close() + result := make([]*entity.AuditLog, 0) + for rows.Next() { + logEntry := &entity.AuditLog{} + var details []byte + if err := rows.Scan( + &logEntry.ID, + &logEntry.WorkspaceID, + &logEntry.UserID, + &logEntry.Action, + &logEntry.ResourceType, + &logEntry.ResourceID, + &logEntry.ResourceName, + &details, + &logEntry.IPAddress, + &logEntry.UserAgent, + &logEntry.CreatedAt, + ); err != nil { + return nil, fmt.Errorf("failed to scan audit log: %w", err) + } + _ = json.Unmarshal(details, &logEntry.Details) + result = append(result, logEntry) + } + return result, rows.Err() +} diff --git a/backend/internal/bootstrap/config.go b/backend/internal/bootstrap/config.go index 1d2e7d0..15d6e76 100644 --- a/backend/internal/bootstrap/config.go +++ b/backend/internal/bootstrap/config.go @@ -5,14 +5,17 @@ import ( "fmt" "os" "path/filepath" + "sort" + "strconv" + "strings" ) // BootstrapConfig 预注入配置 type BootstrapConfig struct { - Enabled bool `json:"enabled"` - Users []UserSeed `json:"users"` - Registries []RegistrySeed `json:"registries"` - Clusters []ClusterSeed `json:"clusters"` + Enabled bool `json:"enabled"` + Users []UserSeed `json:"users"` + Registries []RegistrySeed `json:"registries"` + Clusters []ClusterSeed `json:"clusters"` } // UserSeed 用户预注入数据 @@ -20,6 +23,7 @@ type UserSeed struct { Username string `json:"username"` Password string `json:"password"` Email string `json:"email"` + Role string `json:"role"` } // RegistrySeed Registry 预注入数据 @@ -45,11 +49,12 @@ type ClusterSeed struct { // LoadBootstrapConfig 加载预注入配置 // 支持从文件或环境变量加载 -// +// // 加载优先级: // 1. 环境变量 BOOTSTRAP_CONFIG_JSON (最高优先级) -// 2. Mock 模式: 配置文件 config/bootstrap.json -// 3. 真实模式: GetDefaultBootstrapConfig() 中的真实数据 +// 2. 环境变量 BOOTSTRAP_* (root .env / container env) +// 3. Mock 模式: 配置文件 config/bootstrap.json +// 4. 未提供任何 bootstrap 配置时禁用预注入 func LoadBootstrapConfig() (*BootstrapConfig, error) { // 1. 优先从环境变量加载 if configJSON := os.Getenv("BOOTSTRAP_CONFIG_JSON"); configJSON != "" { @@ -60,9 +65,13 @@ func LoadBootstrapConfig() (*BootstrapConfig, error) { return &config, nil } + if config, ok := loadBootstrapConfigFromEnv(); ok { + return config, nil + } + // 2. 检查适配器模式 adapterMode := os.Getenv("ADAPTER_MODE") - + // Mock 模式: 使用配置文件(假数据) if adapterMode == "mock" { configPath := os.Getenv("BOOTSTRAP_CONFIG_FILE") @@ -72,7 +81,7 @@ func LoadBootstrapConfig() (*BootstrapConfig, error) { // 检查文件是否存在 if _, err := os.Stat(configPath); os.IsNotExist(err) { - // 配置文件不存在,使用默认配置 + // 配置文件不存在,不预注入任何数据 return GetDefaultBootstrapConfig(), nil } @@ -89,49 +98,144 @@ func LoadBootstrapConfig() (*BootstrapConfig, error) { return &config, nil } - // 3. 真实模式 (mode 1, mode 2): 使用代码中的真实预注入数据 + // 3. 真实模式: 未显式配置时不预注入任何数据 return GetDefaultBootstrapConfig(), nil } -// GetDefaultBootstrapConfig 获取默认的预注入配置(示例) -func GetDefaultBootstrapConfig() *BootstrapConfig { - return &BootstrapConfig{ - Enabled: true, - Users: []UserSeed{ - { - Username: "admin", - Password: "admin123", - Email: "admin@example.com", - }, - }, - Registries: []RegistrySeed{ - { - Name: "harbor-bwgdi", - URL: "https://harbor.bwgdi.com", - Description: "BWGDI Harbor Registry", - Username: "admin", - Password: "BWGDIP@ssw0rd1401#", - Insecure: false, - }, - }, - Clusters: []ClusterSeed{ - { - Name: "cluster1", - Host: "https://10.6.14.123:6443", - Description: "K3s Cluster 1", - CAData: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkekNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pPUFFRREFqQWpNU0V3SHdZRFZRUUREQmhyTTNNdGMyVnkKZG1WeUxXTmhRREUzTlRVME9ETTJOemt3SGhjTk1qVXdPREU0TURJeU1URTVXaGNOTXpVd09ERTJNREl5TVRFNQpXakFqTVNFd0h3WURWUVFEREJock0zTXRjMlZ5ZG1WeUxXTmhRREUzTlRVME9ETTJOemt3V1RBVEJnY3Foa2pPClBRSUJCZ2dxaGtqT1BRTUJCd05DQUFTaVBJUW5LZXR2VjQ3cHUyLytMV1lZaGJjbUY3V3RZQnArOGxDaUVKdkcKaFAyaE5BWVVmZDUrRnN5VVN3bDBTV3NoT3BORmRMc0NzY3pISkhycUpWYUVvMEl3UURBT0JnTlZIUThCQWY4RQpCQU1DQXFRd0R3WURWUjBUQVFIL0JBVXdBd0VCL3pBZEJnTlZIUTRFRmdRVTlCa3lhSGpPVG1RM29LYWlOaXFmCjVwZTF4L293Q2dZSUtvWkl6ajBFQXdJRFNBQXdSUUlnTzR4M3EyNmhhL1Z0NTRCT1Awc1hVNGt5ckVpNDR6TUcKc0d0Z25LY0NLbk1DSVFEcVhsSzBqSGNKSVE2bTRWanRub0VQWGdzQ2JrdW45WmxvVmxhbWtPNXAzZz09Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K", - CertData: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJrakNDQVRlZ0F3SUJBZ0lJVjVQT1FRblJoSGd3Q2dZSUtvWkl6ajBFQXdJd0l6RWhNQjhHQTFVRUF3d1kKYXpOekxXTnNhV1Z1ZEMxallVQXhOelUxTkRnek5qYzVNQjRYRFRJMU1EZ3hPREF5TWpFeE9Wb1hEVEkyTURneApPREF5TWpFeE9Wb3dNREVYTUJVR0ExVUVDaE1PYzNsemRHVnRPbTFoYzNSbGNuTXhGVEFUQmdOVkJBTVRESE41CmMzUmxiVHBoWkcxcGJqQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJMTjcrbjNXRDY0TThTMEEKT1Bpd2hReFZRNWdLTStRTk11REFzSlM1UVZFdTIyajZwaFlQYTNyQWFLU1hnZE1EdVYvbTRUamxTQmxCM2dJQwpnZW5wdTc2alNEQkdNQTRHQTFVZER3RUIvd1FFQXdJRm9EQVRCZ05WSFNVRUREQUtCZ2dyQmdFRkJRY0RBakFmCkJnTlZIU01FR0RBV2dCVGlxTWRFM0xYbElwVHRiREJnN0ZVcmV1NHVVREFLQmdncWhrak9QUVFEQWdOSkFEQkcKQWlFQXRPQ0s4ZmdzZmxhaTczcXdXMkhQbWM2bDVXNmR2L1BzNGhHNDZFRkV0VlFDSVFDenFkQitkZnFiWkJ5cwpNUm0zbDU1N3pNOFBNcDhRUE5lVFdiM0VoOEdtVGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCi0tLS0tQkVHSU4gQ0VSVElGSUNBVEUtLS0tLQpNSUlCZGpDQ0FSMmdBd0lCQWdJQkFEQUtCZ2dxaGtqT1BRUURBakFqTVNFd0h3WURWUVFEREJock0zTXRZMnhwClpXNTBMV05oUURFM05UVTBPRE0yTnprd0hoY05NalV3T0RFNE1ESXlNVEU1V2hjTk16VXdPREUyTURJeU1URTUKV2pBak1TRXdId1lEVlFRRERCaHJNM010WTJ4cFpXNTBMV05oUURFM05UVTBPRE0yTnprd1dUQVRCZ2NxaGtqTwpQUUlCQmdncWhrak9QUU1CQndOQ0FBU3JxQzd2RUhKYzQzUThIWG5MT0VQeXkyM0tYZzlHOVkycTJUaVFLMGhoCkJvNnh1WUxDMTFSWkhGNC85NGZJZitZa3BCcmRpcFFNTjRSaVVrUGZzM28zbzBJd1FEQU9CZ05WSFE4QkFmOEUKQkFNQ0FxUXdEd1lEVlIwVEFRSC9CQVV3QXdFQi96QWRCZ05WSFE0RUZnUVU0cWpIUk55MTVTS1U3V3d3WU94VgpLM3J1TGxBd0NnWUlLb1pJemowRUF3SURSd0F3UkFJZ041WmJQaEs4YkwxWllmcStGTVNNbkFCdEgzRSsxcnFoClpRUHY4UWM3S09nQ0lCMWhBclM5SXhKU1dYYlV3ZWE4WU0yVUNEMlplYTVxMHJMQnd4SHFqb3RjCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K", - KeyData: "LS0tLS1CRUdJTiBFQyBQUklWQVRFIEtFWS0tLS0tCk1IY0NBUUVFSUpuM2dPd0lBNzJGMXE2dkhvMHdDRk1RS0VXVmVnejlQYy9NRFhVVDU5c3pvQW9HQ0NxR1NNNDkKQXdFSG9VUURRZ0FFczN2NmZkWVByZ3p4TFFBNCtMQ0ZERlZEbUFvejVBMHk0TUN3bExsQlVTN2JhUHFtRmc5cgplc0JvcEplQjB3TzVYK2JoT09WSUdVSGVBZ0tCNmVtN3ZnPT0KLS0tLS1FTkQgRUMgUFJJVkFURSBLRVktLS0tLQo=", - }, - { - Name: "cluster2", - Host: "https://10.6.80.12:6443", - Description: "Kubernetes Cluster 2", - CAData: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURCVENDQWUyZ0F3SUJBZ0lJWCtGQVJITzJWdVl3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TlRFd016QXdNelEyTlROYUZ3MHpOVEV3TWpnd016VXhOVE5hTUJVeApFekFSQmdOVkJBTVRDbXQxWW1WeWJtVjBaWE13Z2dFaU1BMEdDU3FHU0liM0RRRUJBUVVBQTRJQkR3QXdnZ0VLCkFvSUJBUUROdFJSeG5JYVU2MS93UHVWNkpiR0hLaWtaZWVmYXlNOEFzVHRQeXQwaU5BaFgvVWNUT1pSVWYyZmUKTXBKSFNDdy9QQjJ2d1dCZDB2OVBEVWZ6RTYxL0lKcmhWZU54NmRxK0VPdVFqRmI2TlMvbkpiWmpXVFoyRFhBRQpkS1lwaGpXWGV3dWVuK0htTjlyK2tIZGlORVdmc0xDb1hWOFFMSmVRZXF4NHY2eTFkaEE1Ly9sdGxRV0ZsN2ZFCkRzeUpQb05tQmhzSy9SNEpYVDZ4Q0NqYmJmRFF6OE1hTXA0aWZnRW9ac0R6T2RlK3ZDL3diMEcxVmlpL1FjOEEKSCtSb2tJUkI2MTZqM0VjOWhsd1V4UjNyZThqOGFFdDJob1BkbTVhekt1YjQ0LzlKc3VaU1BWR0FYVXVjekQyawpYUU5UOWErOVl4RXZJZ0psdFpuRGVYSjZmeTFqQWdNQkFBR2pXVEJYTUE0R0ExVWREd0VCL3dRRUF3SUNwREFQCkJnTlZIUk1CQWY4RUJUQURBUUgvTUIwR0ExVWREZ1FXQkJSVEo2WWgwQ3lWVDRGNEhJUSszYWVhQzZzMUlUQVYKQmdOVkhSRUVEakFNZ2dwcmRXSmxjbTVsZEdWek1BMEdDU3FHU0liM0RRRUJDd1VBQTRJQkFRQ1pZM0xuUDl4Qgp1MjJaMENtazdiNUI2T1RtRS9obWlNRDNXY3kyb3RpcVhvZUE1VENRWnZxUk1PTk1NR3NCZFYza3FRRFhyaVR1CkQ4MDdaL3Q3SlAvOGo1RmRncDBCbkpoOUtlQkhaeVBybWFQNW9veFg4VWhFZHF0bWdsTUtBSk0xVmpKTExZNUwKMUcyRVNWa09NKytTSkV5MGJMbU9LM3M2YUI1L05pK3BVVS82Z1ZFNDFIZnh1SEJVYUtrRXNJR1d0WnNxbEY1cwp1RVAzZnY0ZmJRZVAxTmEvRlNaSmh4NlBybEdjZlE2Vmh6a1haY2Q1RExKMHZHbHZoTGdwREowdUVsUEd6NU5KCldFelVJZ3BGV25UMUd4TlhuNm02Sm9oMmNoWU5oQ25KOGZCS0Q4elozei9LdExCa2JwMDdMRlgwbzhXQUhEQmcKK1A4cjUwTm5IT3FHCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K", - CertData: "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURLVENDQWhHZ0F3SUJBZ0lJWUlIcnhuOXYvOTR3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TlRFd016QXdNelEyTlROYUZ3MHlOakV3TXpBd016VXhOVE5hTUR3eApIekFkQmdOVkJBb1RGbXQxWW1WaFpHMDZZMngxYzNSbGNpMWhaRzFwYm5NeEdUQVhCZ05WQkFNVEVHdDFZbVZ5CmJtVjBaWE10WVdSdGFXNHdnZ0VpTUEwR0NTcUdTSWIzRFFFQkFRVUFBNElCRHdBd2dnRUtBb0lCQVFEd0NGWW0KY1JldG5xWjJBR21FUGJ2L1pRVzdrSzFKNHlBUmI2ODVlNEl5QjQ2OXdKOFVtd1crOXB2OWNsVm5YV3pnQkY3WQpnbkIyNi9DTWtqOVpnRkhOaWFPK3RXcXg3cHJKTkdDaHhiY29VMDZzQUIwR3MvUkVHK3VYMnFZa3RnVHpRNWFrCitGKzZrZElRek5VdnpwWFUzUFlHcDFEcGlzNWxZNFYzMkhnSkRaZkMrRzlpT1ROd1dtTzV3bGF1K1lsQkRGTVIKS2tnVFo1MDY5OXl5NWxnUlRoaTczSG1hUCtLWGdIT0QrNkNmeUZ6Ty80KzdLaExjanZpTGFUVjBjNGkzYkxidQo0K0llU2pwMEpxU2lxQlFtRHhHRitYMndCSkNiRVZObWJrd0hCVlh5eXlxdGJWV2dibEN6SWJ0UDBadHE3RUMwClo0WkNDemc5RFNqRGQwZWZBZ01CQUFHalZqQlVNQTRHQTFVZER3RUIvd1FFQXdJRm9EQVRCZ05WSFNVRUREQUsKQmdnckJnRUZCUWNEQWpBTUJnTlZIUk1CQWY4RUFqQUFNQjhHQTFVZEl3UVlNQmFBRkZNbnBpSFFMSlZQZ1hnYwpoRDdkcDVvTHF6VWhNQTBHQ1NxR1NJYjNEUUVCQ3dVQUE0SUJBUUFzTHJBMEhFOVNGNHAvSzBQejlVdFZLdk9rCjNUaEZ0ODZGTGlWNEJMcTZ5RSt1aHdHazk0b3p1Y3c1T2h1WEduTWFaUlFMYnliS3pJcjQvUUNqQVQ5eFVURWQKSFQ4c1c1UEhHMm5lbGJRckFNdVhRaFpXdlZTRmZ6Tk5GZG0rNStzdnVXajVtMklyNXNYRURlV2dBdmNLd3k2cwpVUjIxSmdtVXZHSFFtTVVZYWpnYW8wS3NjQmtNOEpZekFKdXZWdkJtTytwdzN5T2hVVmMyY0JnV0gybmx3L3RLCjZRR0Y0ZUZPRnJaYzM5UHp2NmlVOHFBYnNrQlVTVlhuaXg3dTNZUzFwTHNuZitSY0U0MmR1RzV4Nll3UFBlb28KRXBwWVluZ1R5TlpKKzVGaHVZdTUwMDJsQm1DV3JrSkxEek5NWlR3ai9DeG52ekVnSWJPWFpndnRpSXhpCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K", - KeyData: "LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcFFJQkFBS0NBUUVBOEFoV0puRVhyWjZtZGdCcGhEMjcvMlVGdTVDdFNlTWdFVyt2T1h1Q01nZU92Y0NmCkZKc0Z2dmFiL1hKVloxMXM0QVJlMklKd2R1dndqSkkvV1lCUnpZbWp2clZxc2U2YXlUUmdvY1czS0ZOT3JBQWQKQnJQMFJCdnJsOXFtSkxZRTgwT1dwUGhmdXBIU0VNelZMODZWMU56MkJxZFE2WXJPWldPRmQ5aDRDUTJYd3ZodgpZamt6Y0ZwanVjSldydm1KUVF4VEVTcElFMmVkT3ZmY3N1WllFVTRZdTl4NW1qL2lsNEJ6Zy91Z244aGN6ditQCnV5b1MzSTc0aTJrMWRIT0l0MnkyN3VQaUhrbzZkQ2Frb3FnVUpnOFJoZmw5c0FTUW14RlRabTVNQndWVjhzc3EKclcxVm9HNVFzeUc3VDlHYmF1eEF0R2VHUWdzNFBRMG93M2RIbndJREFRQUJBb0lCQUFxSWt4OUV2MEZEUVJMVQptY3pQMkx3d2RydndjV3BZcVVPYW54bnFyWi84Yk9zdTFNeFdzVDNjSEtSV3JDREpITW9INXhHaFI4WXdQSEl1CnlORG9ySzVVWi9jcWh2QWdCSExuOVlXajQ1SEZkaUplTHVmb1pjUEhaZU5ZR1FwclluUTZkeFh1UUdVem1RQmIKdk05SVJaTDl6MTRqWVkyZUpjaVZRWG9zNmJlYjUxYjgxNGljMTg1RHNtK2RhekRuNG14M2tNT0lueFR2K01pNQpxSWx5OU8vQURIaWpNd2taNVY5K3grSlpxM3Exc09SeTBKcUUwd1czbFcwQnFxSWRGRFRSelAvMFdiVGZZdDU3CmlRNjJySnhEN1RGNzR3Ni8xc3VqalU3Y2VsK1ltdTRvRFZjb05pOGdoTE1UZXE1OWpPMk1xR1FqMU5HUHRuTHkKb0hFOUs4RUNnWUVBOVRiQ3VEUlBtVDFmN0MwUldYUkJnejlENWhhRExkaS82aitjMGx5amR0TjkyR2JHdFNFMQozVVIvc2dsRit3bVliWmJmNExqUnpibnNZTGFleHRtakpzWXdFK0t4SSt3SEloSElPRFFaSTBaT08vMTJYdm1oCjB4dDdUNmNTVTZZSHZEbkp4WkpFaGt3TjBwL1ZoSHZMZFZMWmd3ZnNtQWlVekNTTVBmaUkySmtDZ1lFQStwYzcKTUJ0ZFNBZnd5cElMaUR6dis2WjFBQnVrWUphWnFQTk9IRGdLeElRNVJEQVZ5K3hSQXJWQ1V3RE5WdDJtTGJHUQpHZysvWXl4ZllEd2dSYTIxMUJDL0pUU3E4S1dHYVdXM0h2Z0VmMk54cVVIckNkT3VGZGhqdWkrMlRBdEdBb0w1CjluSGx3TXBZVVpydjF6dENCRmx4L1ZYd3NxUGZ6K2l5ZG1CVUxQY0NnWUVBcFM5Q2RMd29jdDQ1WSt2b0tBNTgKbzJGVzZBUjZVY1FWWkVOOTdPZWk1a1VLSFdEK3NyMndmMkhKYzdGemh1eXIxZ2N3d1QwL2VBcXJCV3VBQWd4UwpMNmlLY3ByZklZZTZObVVzTDFCSkxzNEpuYmZjcVpZWVFSSGVPNFljZm1UMkNRSVV2aGNPT2ptNWhnMU4xSFZnClZhUitDaHFvY3JJMUtsL2thVXFuUk9FQ2dZRUF5ZWx0RVhnYkUxMENrZFpYWUhEcFZUVnNkS2ZSTE5wcitZd0IKMWc3NTdobzBJbE0wWE5tTzlNV2tLVWt1S3QzeGRrUHFQbldOMnBUNFRJeGwzSDc1VVdRbEFBK041TlVhbG5ZVQp0T2xXaG1aVVFQTVNOUnJRM0YwOURkby80c242b1M5enhUVkUwTEM1dFJkSVJYNUQxVWxVNWJHSGZnazQzMGM1CjlOUHRQMFVDZ1lFQXk1L05hZXJlZDlQSDcyVzNDNW1UQy9jbEQxdUdmZXdPVkFkdko1eldlMDh4Q01CcEpya1QKU3dKM3NZOXYyaEdwSUxYZnU5YnppL0RWaW1sZk5MNkZBV2VaR3BCYm1qTHBEcUxWRzdhcUNHQVcvRG9iNmVlWApweEFiQTBLaUhoaE9sdUdONHdkbFdQRzNWdTlZNXZIb3RBNW1iZlRpaHhUYTlEZWRkZXlkNC9RPQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo=", - }, - }, +func loadBootstrapConfigFromEnv() (*BootstrapConfig, bool) { + if !hasBootstrapEnv() { + return nil, false } + + config := &BootstrapConfig{ + Enabled: true, + Users: make([]UserSeed, 0, 1), + Registries: make([]RegistrySeed, 0, 1), + Clusters: make([]ClusterSeed, 0), + } + + adminUser := strings.TrimSpace(os.Getenv("BOOTSTRAP_ADMIN_USER")) + adminPass := strings.TrimSpace(os.Getenv("BOOTSTRAP_ADMIN_PASS")) + if adminUser != "" && adminPass != "" { + config.Users = append(config.Users, UserSeed{ + Username: adminUser, + Password: adminPass, + Email: getEnv("BOOTSTRAP_ADMIN_EMAIL", adminUser+"@example.local"), + Role: "admin", + }) + } + + if registryURL := os.Getenv("BOOTSTRAP_REGISTRY_URL"); registryURL != "" { + registryUser := getEnv("BOOTSTRAP_REGISTRY_ROBOT_USER", getEnv("BOOTSTRAP_REGISTRY_USER", "")) + registryPass := getEnv("BOOTSTRAP_REGISTRY_ROBOT_PASS", getEnv("BOOTSTRAP_REGISTRY_PASS", "")) + config.Registries = append(config.Registries, RegistrySeed{ + Name: getEnv("BOOTSTRAP_REGISTRY_NAME", "harbor"), + URL: registryURL, + Description: getEnv("BOOTSTRAP_REGISTRY_DESC", ""), + Username: registryUser, + Password: registryPass, + Insecure: parseBoolEnv("BOOTSTRAP_REGISTRY_INSECURE", false), + }) + } + + enableClusters := parseBoolEnv("BOOTSTRAP_ENABLE_CLUSTERS", false) || + os.Getenv("BOOTSTRAP_CLUSTERS") != "" + if enableClusters { + for _, clusterName := range discoverBootstrapClusters() { + prefix := "BOOTSTRAP_CLUSTER_" + normalizeEnvName(clusterName) + "_" + host := os.Getenv(prefix + "HOST") + if host == "" { + continue + } + + config.Clusters = append(config.Clusters, ClusterSeed{ + Name: strings.ToLower(clusterName), + Host: host, + Description: os.Getenv(prefix + "DESC"), + CAData: os.Getenv(prefix + "CA"), + CertData: os.Getenv(prefix + "CERT"), + KeyData: os.Getenv(prefix + "KEY"), + Token: os.Getenv(prefix + "TOKEN"), + }) + } + } + + return config, true } +func hasBootstrapEnv() bool { + for _, env := range os.Environ() { + if strings.HasPrefix(env, "BOOTSTRAP_") { + return true + } + } + return false +} + +func discoverBootstrapClusters() []string { + names := make(map[string]struct{}) + + if configured := os.Getenv("BOOTSTRAP_CLUSTERS"); configured != "" { + for _, name := range strings.Split(configured, ",") { + name = strings.TrimSpace(name) + if name != "" { + names[normalizeEnvName(name)] = struct{}{} + } + } + } + + for _, env := range os.Environ() { + key, _, ok := strings.Cut(env, "=") + if !ok || !strings.HasPrefix(key, "BOOTSTRAP_CLUSTER_") || !strings.HasSuffix(key, "_HOST") { + continue + } + name := strings.TrimSuffix(strings.TrimPrefix(key, "BOOTSTRAP_CLUSTER_"), "_HOST") + if name != "" { + names[name] = struct{}{} + } + } + + result := make([]string, 0, len(names)) + for name := range names { + result = append(result, name) + } + sort.Strings(result) + return result +} + +func normalizeEnvName(name string) string { + replacer := strings.NewReplacer("-", "_", ".", "_", " ", "_") + return strings.ToUpper(replacer.Replace(strings.TrimSpace(name))) +} + +func parseBoolEnv(key string, defaultValue bool) bool { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return defaultValue + } + parsed, err := strconv.ParseBool(value) + if err != nil { + return defaultValue + } + return parsed +} + +func getEnv(key, defaultValue string) string { + if value := os.Getenv(key); value != "" { + return value + } + return defaultValue +} + +// GetDefaultBootstrapConfig 返回安全的空默认配置。 +// +// 这里不能包含真实或示例账号密码、Registry 或集群凭据。预注入数据必须来自 +// BOOTSTRAP_CONFIG_JSON、BOOTSTRAP_* 环境变量,或显式提供的 bootstrap 配置文件。 +func GetDefaultBootstrapConfig() *BootstrapConfig { + return &BootstrapConfig{ + Enabled: false, + Users: []UserSeed{}, + Registries: []RegistrySeed{}, + Clusters: []ClusterSeed{}, + } +} diff --git a/backend/internal/bootstrap/config_test.go b/backend/internal/bootstrap/config_test.go new file mode 100644 index 0000000..595a3fd --- /dev/null +++ b/backend/internal/bootstrap/config_test.go @@ -0,0 +1,103 @@ +package bootstrap + +import "testing" + +func TestDefaultBootstrapConfigIsEmptyAndDisabled(t *testing.T) { + config := GetDefaultBootstrapConfig() + if config.Enabled { + t.Fatal("default bootstrap config must be disabled") + } + if len(config.Users) != 0 || len(config.Registries) != 0 || len(config.Clusters) != 0 { + t.Fatalf("default bootstrap config must not include seeded data: %#v", config) + } +} + +func TestLoadBootstrapConfigFromEnv(t *testing.T) { + t.Setenv("BOOTSTRAP_ADMIN_USER", "root") + t.Setenv("BOOTSTRAP_ADMIN_PASS", "secret") + t.Setenv("BOOTSTRAP_ADMIN_EMAIL", "root@example.com") + t.Setenv("BOOTSTRAP_REGISTRY_NAME", "harbor") + t.Setenv("BOOTSTRAP_REGISTRY_URL", "https://harbor.example.com") + t.Setenv("BOOTSTRAP_REGISTRY_DESC", "test registry") + t.Setenv("BOOTSTRAP_REGISTRY_USER", "robot") + t.Setenv("BOOTSTRAP_REGISTRY_PASS", "robot-secret") + t.Setenv("BOOTSTRAP_REGISTRY_ROBOT_USER", "robot$ocdp") + t.Setenv("BOOTSTRAP_REGISTRY_ROBOT_PASS", "robot-token") + t.Setenv("BOOTSTRAP_REGISTRY_INSECURE", "true") + t.Setenv("BOOTSTRAP_ENABLE_CLUSTERS", "true") + t.Setenv("BOOTSTRAP_CLUSTERS", "cluster1,gpu-prod") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_HOST", "https://cluster1.example.com:6443") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_DESC", "cluster one") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_CA", "ca-data") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_CERT", "cert-data") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_KEY", "key-data") + t.Setenv("BOOTSTRAP_CLUSTER_GPU_PROD_HOST", "https://gpu.example.com:6443") + t.Setenv("BOOTSTRAP_CLUSTER_GPU_PROD_TOKEN", "bearer-token") + + config, ok := loadBootstrapConfigFromEnv() + if !ok { + t.Fatal("expected bootstrap config from environment") + } + + if len(config.Users) != 1 || config.Users[0].Username != "root" || config.Users[0].Password != "secret" { + t.Fatalf("unexpected users: %#v", config.Users) + } + + if len(config.Registries) != 1 { + t.Fatalf("expected one registry, got %d", len(config.Registries)) + } + registry := config.Registries[0] + if registry.Name != "harbor" || registry.URL != "https://harbor.example.com" || !registry.Insecure { + t.Fatalf("unexpected registry: %#v", registry) + } + if registry.Username != "robot$ocdp" || registry.Password != "robot-token" { + t.Fatalf("expected robot registry credentials, got %#v", registry) + } + + if len(config.Clusters) != 2 { + t.Fatalf("expected two clusters, got %d: %#v", len(config.Clusters), config.Clusters) + } + + clusterByName := map[string]ClusterSeed{} + for _, cluster := range config.Clusters { + clusterByName[cluster.Name] = cluster + } + + if clusterByName["cluster1"].Host != "https://cluster1.example.com:6443" { + t.Fatalf("unexpected cluster1: %#v", clusterByName["cluster1"]) + } + if clusterByName["gpu_prod"].Token != "bearer-token" { + t.Fatalf("unexpected gpu_prod: %#v", clusterByName["gpu_prod"]) + } +} + +func TestBootstrapClustersRequireExplicitEnable(t *testing.T) { + t.Setenv("BOOTSTRAP_ADMIN_USER", "root") + t.Setenv("BOOTSTRAP_ADMIN_PASS", "secret") + t.Setenv("BOOTSTRAP_CLUSTERS", "cluster1") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_HOST", "https://cluster1.example.com:6443") + t.Setenv("BOOTSTRAP_CLUSTER_CLUSTER1_TOKEN", "token") + + config, ok := loadBootstrapConfigFromEnv() + if !ok { + t.Fatal("expected bootstrap config from environment") + } + if len(config.Clusters) != 0 { + t.Fatalf("bootstrap clusters must be disabled unless BOOTSTRAP_ENABLE_CLUSTERS=true, got %#v", config.Clusters) + } +} + +func TestBootstrapEnvDoesNotCreateDefaultAdmin(t *testing.T) { + t.Setenv("BOOTSTRAP_REGISTRY_URL", "https://harbor.example.com") + + config, ok := loadBootstrapConfigFromEnv() + if !ok { + t.Fatal("expected bootstrap config from environment") + } + if len(config.Users) != 0 { + t.Fatalf("expected no users without explicit admin credentials, got %#v", config.Users) + } + if len(config.Registries) != 1 { + t.Fatalf("expected one registry, got %d", len(config.Registries)) + } +} diff --git a/backend/internal/bootstrap/seeder.go b/backend/internal/bootstrap/seeder.go index 1047c14..de3896e 100644 --- a/backend/internal/bootstrap/seeder.go +++ b/backend/internal/bootstrap/seeder.go @@ -84,6 +84,12 @@ func (s *Seeder) seedUsers(ctx context.Context) error { // 创建用户 user := entity.NewUser(userSeed.Username, passwordHash, userSeed.Email) user.ID = uuid.New().String() + if userSeed.Role != "" { + user.Role = userSeed.Role + } + if user.Role == "admin" { + user.WorkspaceID = entity.DefaultWorkspaceID + } if err := s.repos.UserRepo.Create(ctx, user); err != nil { log.Printf(" ✗ Failed to create user '%s': %v", userSeed.Username, err) @@ -105,6 +111,7 @@ func (s *Seeder) seedRegistries(ctx context.Context) error { log.Printf(" ↳ Seeding %d registry(ies)...", len(s.config.Registries)) + ownerID := s.bootstrapOwnerID(ctx) for _, registrySeed := range s.config.Registries { // 检查 Registry 是否已存在 existingRegistry, _ := s.repos.RegistryRepo.GetByName(ctx, registrySeed.Name) @@ -117,6 +124,9 @@ func (s *Seeder) seedRegistries(ctx context.Context) error { registry := &entity.Registry{ ID: uuid.New().String(), Name: registrySeed.Name, + WorkspaceID: entity.DefaultWorkspaceID, + OwnerID: ownerID, + Visibility: "global_shared", URL: registrySeed.URL, Description: registrySeed.Description, Username: registrySeed.Username, @@ -146,6 +156,7 @@ func (s *Seeder) seedClusters(ctx context.Context) error { log.Printf(" ↳ Seeding %d cluster(s)...", len(s.config.Clusters)) + ownerID := s.bootstrapOwnerID(ctx) for _, clusterSeed := range s.config.Clusters { // 检查 Cluster 是否已存在 existingCluster, _ := s.repos.ClusterRepo.GetByName(ctx, clusterSeed.Name) @@ -158,6 +169,9 @@ func (s *Seeder) seedClusters(ctx context.Context) error { cluster := &entity.Cluster{ ID: uuid.New().String(), Name: clusterSeed.Name, + WorkspaceID: entity.DefaultWorkspaceID, + OwnerID: ownerID, + Visibility: "global_shared", Host: clusterSeed.Host, Description: clusterSeed.Description, CAData: clusterSeed.CAData, @@ -179,3 +193,22 @@ func (s *Seeder) seedClusters(ctx context.Context) error { return nil } +func (s *Seeder) bootstrapOwnerID(ctx context.Context) string { + for _, userSeed := range s.config.Users { + if userSeed.Role == "admin" { + if user, err := s.repos.UserRepo.GetByUsername(ctx, userSeed.Username); err == nil && user != nil { + return user.ID + } + } + } + users, err := s.repos.UserRepo.List(ctx) + if err != nil { + return "" + } + for _, user := range users { + if user.Role == "admin" { + return user.ID + } + } + return "" +} diff --git a/backend/internal/domain/entity/artifact.go b/backend/internal/domain/entity/artifact.go index f80b936..3b097c0 100644 --- a/backend/internal/domain/entity/artifact.go +++ b/backend/internal/domain/entity/artifact.go @@ -1,8 +1,8 @@ package entity import ( - "strings" - "time" + "strings" + "time" ) // ArtifactType Artifact 类型 @@ -16,16 +16,16 @@ const ( // Artifact OCI Artifact 领域实体 type Artifact struct { - RegistryID string - Repository string - Tag string - Digest string - Type ArtifactType - Size int64 - MediaType string - ConfigType string // Config layer 的 mediaType (用于更准确的类型判断) - Annotations map[string]string - CreatedAt time.Time + RegistryID string + Repository string + Tag string + Digest string + Type ArtifactType + Size int64 + MediaType string + ConfigType string // Config layer 的 mediaType (用于更准确的类型判断) + Annotations map[string]string + CreatedAt time.Time } // Repository 仓库信息 @@ -50,34 +50,34 @@ func NewArtifact(registryID, repository, tag, digest string) *Artifact { // SetType 设置 Artifact 类型(根据 mediaType 识别为 chart | image | other) // 已废弃:请使用 DetermineType() 方法,它提供更准确的类型判断 func (a *Artifact) SetType(mediaType string) { - lowerMediaType := strings.ToLower(strings.TrimSpace(mediaType)) + lowerMediaType := strings.ToLower(strings.TrimSpace(mediaType)) - containsAny := func(target string, keywords ...string) bool { - for _, keyword := range keywords { - if keyword != "" && strings.Contains(target, keyword) { - return true - } - } - return false - } + containsAny := func(target string, keywords ...string) bool { + for _, keyword := range keywords { + if keyword != "" && strings.Contains(target, keyword) { + return true + } + } + return false + } - switch { - case lowerMediaType == "": - a.Type = ArtifactTypeOther - case containsAny(lowerMediaType, - "helm", "cncf.helm", "helm.chart", "helm+", "chart+json", "chart.v1", "helm-package", "helm.config", - ): - a.Type = ArtifactTypeChart - case containsAny(lowerMediaType, - "docker", "vnd.docker", "docker.distribution", "docker.container.image", - "vnd.oci", "oci.image", "opencontainers", "container.image", - ): - a.Type = ArtifactTypeImage - case strings.Contains(lowerMediaType, "image") || strings.Contains(lowerMediaType, "manifest") || strings.Contains(lowerMediaType, "container"): - a.Type = ArtifactTypeImage - default: - a.Type = ArtifactTypeOther - } + switch { + case lowerMediaType == "": + a.Type = ArtifactTypeOther + case containsAny(lowerMediaType, + "helm", "cncf.helm", "helm.chart", "helm+", "chart+json", "chart.v1", "helm-package", "helm.config", + ): + a.Type = ArtifactTypeChart + case containsAny(lowerMediaType, + "docker", "vnd.docker", "docker.distribution", "docker.container.image", + "vnd.oci", "oci.image", "opencontainers", "container.image", + ): + a.Type = ArtifactTypeImage + case strings.Contains(lowerMediaType, "image") || strings.Contains(lowerMediaType, "manifest") || strings.Contains(lowerMediaType, "container"): + a.Type = ArtifactTypeImage + default: + a.Type = ArtifactTypeOther + } } // DetermineType 智能判断 Artifact 类型(综合多种信息) @@ -87,85 +87,84 @@ func (a *Artifact) SetType(mediaType string) { // 3. Repository 名称 - charts/ 前缀暗示 // 4. MediaType - 兜底判断 func (a *Artifact) DetermineType() { - containsAny := func(target string, keywords ...string) bool { - for _, keyword := range keywords { - if keyword != "" && strings.Contains(target, keyword) { - return true - } - } - return false - } - - // 1. 优先检查 ConfigType(最准确的判断方式) - if a.ConfigType != "" { - lowerConfigType := strings.ToLower(strings.TrimSpace(a.ConfigType)) - - // Helm Chart 的 config.mediaType - if containsAny(lowerConfigType, - "helm.config", "cncf.helm", "helm.chart", "chart.content", - ) { - a.Type = ArtifactTypeChart - return - } - - // Docker/OCI Image 的 config.mediaType - if containsAny(lowerConfigType, - "docker.container.image", "oci.image.config", - ) { - a.Type = ArtifactTypeImage - return - } - } - - // 2. 检查 Annotations - for key, value := range a.Annotations { - lowerKey := strings.ToLower(key) - lowerValue := strings.ToLower(value) - - if containsAny(lowerKey, "helm", "chart") || - containsAny(lowerValue, "helm", "chart") { - a.Type = ArtifactTypeChart - return - } - } - - // 3. 检查 Repository 名称(辅助判断) - if strings.HasPrefix(strings.ToLower(a.Repository), "charts/") { - // charts/ 开头的仓库很可能是 Helm Chart - // 但需要结合 MediaType 进一步确认 - lowerMediaType := strings.ToLower(strings.TrimSpace(a.MediaType)) - - // 如果是 OCI manifest 格式,很可能是以 OCI 格式存储的 Helm Chart - if strings.Contains(lowerMediaType, "oci.image.manifest") || - strings.Contains(lowerMediaType, "vnd.oci") { - a.Type = ArtifactTypeChart - return - } - } - - // 4. 回退到基于 MediaType 的判断(兜底逻辑) - lowerMediaType := strings.ToLower(strings.TrimSpace(a.MediaType)) - - switch { - case lowerMediaType == "": - a.Type = ArtifactTypeOther - case containsAny(lowerMediaType, - "helm", "cncf.helm", "helm.chart", "helm+", "chart+json", "chart.v1", "helm-package", "helm.config", - ): - a.Type = ArtifactTypeChart - case containsAny(lowerMediaType, - "docker", "vnd.docker", "docker.distribution", "docker.container.image", - ): - a.Type = ArtifactTypeImage - case strings.Contains(lowerMediaType, "image") || strings.Contains(lowerMediaType, "manifest"): - a.Type = ArtifactTypeImage - default: - a.Type = ArtifactTypeOther - } + containsAny := func(target string, keywords ...string) bool { + for _, keyword := range keywords { + if keyword != "" && strings.Contains(target, keyword) { + return true + } + } + return false + } + + // 1. 优先检查 ConfigType(最准确的判断方式) + if a.ConfigType != "" { + lowerConfigType := strings.ToLower(strings.TrimSpace(a.ConfigType)) + + // Helm Chart 的 config.mediaType + if containsAny(lowerConfigType, + "helm.config", "cncf.helm", "helm.chart", "chart.content", + ) { + a.Type = ArtifactTypeChart + return + } + + // Docker/OCI Image 的 config.mediaType + if containsAny(lowerConfigType, + "docker.container.image", "oci.image.config", + ) { + a.Type = ArtifactTypeImage + return + } + } + + // 2. 检查 Annotations + for key, value := range a.Annotations { + lowerKey := strings.ToLower(key) + lowerValue := strings.ToLower(value) + + if containsAny(lowerKey, "helm", "chart") || + containsAny(lowerValue, "helm", "chart") { + a.Type = ArtifactTypeChart + return + } + } + + // 3. 检查 Repository 名称(辅助判断) + if strings.HasPrefix(strings.ToLower(a.Repository), "charts/") { + // charts/ 开头的仓库很可能是 Helm Chart + // 但需要结合 MediaType 进一步确认 + lowerMediaType := strings.ToLower(strings.TrimSpace(a.MediaType)) + + // 如果是 OCI manifest 格式,很可能是以 OCI 格式存储的 Helm Chart + if strings.Contains(lowerMediaType, "oci.image.manifest") || + strings.Contains(lowerMediaType, "vnd.oci") { + a.Type = ArtifactTypeChart + return + } + } + + // 4. 回退到基于 MediaType 的判断(兜底逻辑) + lowerMediaType := strings.ToLower(strings.TrimSpace(a.MediaType)) + + switch { + case lowerMediaType == "": + a.Type = ArtifactTypeOther + case containsAny(lowerMediaType, + "helm", "cncf.helm", "helm.chart", "helm+", "chart+json", "chart.v1", "helm-package", "helm.config", + ): + a.Type = ArtifactTypeChart + case containsAny(lowerMediaType, + "docker", "vnd.docker", "docker.distribution", "docker.container.image", + ): + a.Type = ArtifactTypeImage + case strings.Contains(lowerMediaType, "image") || strings.Contains(lowerMediaType, "manifest"): + a.Type = ArtifactTypeImage + default: + a.Type = ArtifactTypeOther + } } // IsChart 判断是否为 Helm Chart func (a *Artifact) IsChart() bool { return a.Type == ArtifactTypeChart } - diff --git a/backend/internal/domain/entity/cluster.go b/backend/internal/domain/entity/cluster.go index 486031a..307b559 100644 --- a/backend/internal/domain/entity/cluster.go +++ b/backend/internal/domain/entity/cluster.go @@ -6,26 +6,31 @@ import ( // Cluster Kubernetes 集群领域实体 type Cluster struct { - ID string - Name string - Host string // Kubernetes API Server URL - CAData string // Base64 encoded CA certificate - CertData string // Base64 encoded client certificate - KeyData string // Base64 encoded client key - Token string // Bearer token (alternative to cert auth) - Description string - CreatedAt time.Time - UpdatedAt time.Time + ID string + WorkspaceID string + OwnerID string + Visibility string + Name string + Host string // Kubernetes API Server URL + CAData string // Base64 encoded CA certificate + CertData string // Base64 encoded client certificate + KeyData string // Base64 encoded client key + Token string // Bearer token (alternative to cert auth) + Description string + DefaultNamespace string + CreatedAt time.Time + UpdatedAt time.Time } // NewCluster 创建新集群 func NewCluster(name, host string) *Cluster { now := time.Now() return &Cluster{ - Name: name, - Host: host, - CreatedAt: now, - UpdatedAt: now, + Name: name, + Host: host, + Visibility: "private", + CreatedAt: now, + UpdatedAt: now, } } @@ -63,6 +68,9 @@ func (c *Cluster) Validate() error { if c.Host == "" { return ErrInvalidClusterHost } + if c.Visibility == "" { + c.Visibility = "private" + } // 必须有认证方式:证书或 Token if (c.CertData == "" || c.KeyData == "") && c.Token == "" { return ErrInvalidClusterAuth @@ -100,4 +108,3 @@ users: return kubeconfig } - diff --git a/backend/internal/domain/entity/errors.go b/backend/internal/domain/entity/errors.go index 91a65ad..c190c78 100644 --- a/backend/internal/domain/entity/errors.go +++ b/backend/internal/domain/entity/errors.go @@ -5,11 +5,15 @@ import "errors" // 领域错误定义 var ( // User errors - ErrInvalidUsername = errors.New("invalid username") - ErrInvalidPassword = errors.New("invalid password") - ErrUserNotFound = errors.New("user not found") - ErrUserExists = errors.New("user already exists") - ErrTokenRevoked = errors.New("token has been revoked") + ErrInvalidUsername = errors.New("invalid username") + ErrInvalidPassword = errors.New("invalid password") + ErrUserNotFound = errors.New("user not found") + ErrUserExists = errors.New("user already exists") + ErrTokenRevoked = errors.New("token has been revoked") + ErrUnauthorized = errors.New("authentication required") + ErrForbidden = errors.New("permission denied") + ErrUserInactive = errors.New("user is inactive") + ErrWorkspaceSuspended = errors.New("workspace is suspended") // Cluster errors ErrInvalidClusterName = errors.New("invalid cluster name") @@ -37,4 +41,11 @@ var ( ErrArtifactNotFound = errors.New("artifact not found") ErrRepositoryNotFound = errors.New("repository not found") ErrValuesSchemaNotFound = errors.New("values schema not found") + + // Workspace errors + ErrWorkspaceNotFound = errors.New("workspace not found") + ErrWorkspaceExists = errors.New("workspace already exists") + ErrWorkspaceNamespaceConflict = errors.New("workspace namespace conflict") + ErrUserHasInstances = errors.New("user has active instances") + ErrProtectedNamespace = errors.New("protected namespace") ) diff --git a/backend/internal/domain/entity/instance.go b/backend/internal/domain/entity/instance.go index 1bc3c37..aa8a1b7 100644 --- a/backend/internal/domain/entity/instance.go +++ b/backend/internal/domain/entity/instance.go @@ -34,6 +34,8 @@ const ( // Instance Helm 应用实例领域实体 type Instance struct { ID string + WorkspaceID string + OwnerID string ClusterID string Name string // Helm Release Name Namespace string @@ -51,6 +53,8 @@ type Instance struct { Revision int // Helm Release Revision CreatedAt time.Time UpdatedAt time.Time + Replicas int // Running K8s replicas (enriched, not persisted) + OwnerUsername string } // NewInstance 创建新实例 diff --git a/backend/internal/domain/entity/instance_diagnostics.go b/backend/internal/domain/entity/instance_diagnostics.go new file mode 100644 index 0000000..587186d --- /dev/null +++ b/backend/internal/domain/entity/instance_diagnostics.go @@ -0,0 +1,70 @@ +package entity + +import "time" + +type InstanceDiagnostics struct { + InstanceName string + Namespace string + Pods []InstancePodDiagnostics + Services []InstanceServiceDiagnostics + Events []InstanceEventDiagnostics + Logs []InstancePodLog + CollectedAt time.Time +} + +type InstancePodDiagnostics struct { + Name string + Namespace string + Phase string + NodeName string + PodIP string + HostIP string + RestartCount int32 + Containers []InstanceContainerDiagnostics + Conditions []InstanceConditionDiagnostics + CreationTimestamp time.Time +} + +type InstanceContainerDiagnostics struct { + Name string + Image string + Ready bool + RestartCount int32 + State string + Reason string + Message string +} + +type InstanceConditionDiagnostics struct { + Type string + Status string + Reason string + Message string +} + +type InstanceServiceDiagnostics struct { + Name string + Namespace string + Type string + ClusterIP string + Ports []InstanceEntryPort +} + +type InstanceEventDiagnostics struct { + Type string + Reason string + Message string + InvolvedKind string + InvolvedName string + Count int32 + FirstTimestamp time.Time + LastTimestamp time.Time +} + +type InstancePodLog struct { + Pod string + Container string + TailLines int64 + Log string + Error string +} diff --git a/backend/internal/domain/entity/metrics.go b/backend/internal/domain/entity/metrics.go index ad39546..6909ae5 100644 --- a/backend/internal/domain/entity/metrics.go +++ b/backend/internal/domain/entity/metrics.go @@ -4,70 +4,118 @@ import "time" // ClusterMetrics 集群监控指标 type ClusterMetrics struct { - ClusterID string `json:"cluster_id"` - ClusterName string `json:"cluster_name"` - Status string `json:"status"` // healthy, warning, error, unknown - Uptime string `json:"uptime"` - NodeCount int `json:"node_count"` - PodCount int `json:"pod_count"` - LastCheck time.Time `json:"last_check"` - + ClusterID string `json:"cluster_id"` + ClusterName string `json:"cluster_name"` + Status string `json:"status"` // healthy, warning, error, unknown + Uptime string `json:"uptime"` + NodeCount int `json:"node_count"` + PodCount int `json:"pod_count"` + LastCheck time.Time `json:"last_check"` + // 集群级别资源汇总 - TotalCPU string `json:"total_cpu"` // 如 "8 cores" - TotalMemory string `json:"total_memory"` // 如 "32 GB" - TotalGPU int `json:"total_gpu"` // GPU 总数 - - UsedCPU string `json:"used_cpu"` // 如 "4.5 cores" - UsedMemory string `json:"used_memory"` // 如 "16 GB" - UsedGPU int `json:"used_gpu"` // 使用的 GPU 数 - - CPUUsage float64 `json:"cpu_usage"` // 百分比 - MemoryUsage float64 `json:"memory_usage"` // 百分比 - GPUUsage float64 `json:"gpu_usage"` // 百分比 - + TotalCPU string `json:"total_cpu"` // 如 "8 cores" + TotalMemory string `json:"total_memory"` // 如 "32 GB" + TotalGPU int `json:"total_gpu"` // GPU 总数 + + UsedCPU string `json:"used_cpu"` // 如 "4.5 cores" + UsedMemory string `json:"used_memory"` // 如 "16 GB" + UsedGPU int `json:"used_gpu"` // 使用的 GPU 数 + + CPUUsage float64 `json:"cpu_usage"` // 百分比 + MemoryUsage float64 `json:"memory_usage"` // 百分比 + GPUUsage float64 `json:"gpu_usage"` // 百分比 + + CPURequests string `json:"cpu_requests,omitempty"` + CPULimits string `json:"cpu_limits,omitempty"` + MemoryRequests string `json:"memory_requests,omitempty"` + MemoryLimits string `json:"memory_limits,omitempty"` + GPURequests int64 `json:"gpu_requests,omitempty"` + GPULimits int64 `json:"gpu_limits,omitempty"` + GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb,omitempty"` + GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb,omitempty"` + AllocatedGPU int64 `json:"allocated_gpu,omitempty"` + AllocatedGPUMemoryMB int64 `json:"allocated_gpu_memory_mb,omitempty"` + ResourceUsageByUser []UserResourceUsage `json:"resource_usage_by_user,omitempty"` + // 单机资源最大值 - MaxNodeCPU string `json:"max_node_cpu"` // 单机最大CPU容量,如 "8 cores" - MaxNodeMemory string `json:"max_node_memory"` // 单机最大内存容量,如 "32 GB" - MaxNodeGPU int `json:"max_node_gpu"` // 单机最大GPU数量 - MaxNodeCPUUsage float64 `json:"max_node_cpu_usage"` // 单机最高CPU使用率 - MaxNodeMemUsage float64 `json:"max_node_mem_usage"` // 单机最高内存使用率 - MaxNodeGPUUsage float64 `json:"max_node_gpu_usage"` // 单机最高GPU使用率 - + MaxNodeCPU string `json:"max_node_cpu"` // 单机最大CPU容量,如 "8 cores" + MaxNodeMemory string `json:"max_node_memory"` // 单机最大内存容量,如 "32 GB" + MaxNodeGPU int `json:"max_node_gpu"` // 单机最大GPU数量 + MaxNodeCPUUsage float64 `json:"max_node_cpu_usage"` // 单机最高CPU使用率 + MaxNodeMemUsage float64 `json:"max_node_mem_usage"` // 单机最高内存使用率 + MaxNodeGPUUsage float64 `json:"max_node_gpu_usage"` // 单机最高GPU使用率 + // 节点列表(简化信息) Nodes []NodeMetrics `json:"nodes,omitempty"` } +// ResourceAllocation is derived from Kubernetes Pod resources requests/limits. +type ResourceAllocation struct { + CPURequestsMilli int64 + CPULimitsMilli int64 + MemoryRequestsBytes int64 + MemoryLimitsBytes int64 + GPURequests int64 + GPULimits int64 + GPUMemoryRequestsMB int64 + GPUMemoryLimitsMB int64 +} + +type PodResourceAllocation struct { + ClusterID string + Namespace string + PodName string + InstanceName string + Allocation ResourceAllocation +} + +type UserResourceUsage struct { + UserID string `json:"user_id"` + Username string `json:"username"` + WorkspaceID string `json:"workspace_id"` + InstanceCount int `json:"instance_count"` + PodCount int `json:"pod_count"` + CPURequests string `json:"cpu_requests"` + CPULimits string `json:"cpu_limits"` + MemoryRequests string `json:"memory_requests"` + MemoryLimits string `json:"memory_limits"` + GPURequests int64 `json:"gpu_requests"` + GPULimits int64 `json:"gpu_limits"` + GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb"` + GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb"` +} + // NodeMetrics 节点监控指标 type NodeMetrics struct { - NodeName string `json:"node_name"` - Status string `json:"status"` // Ready, NotReady - Role string `json:"role"` // control-plane, worker - Age string `json:"age"` - PodCount int `json:"pod_count"` - + NodeName string `json:"node_name"` + Status string `json:"status"` // Ready, NotReady + Role string `json:"role"` // control-plane, worker + Age string `json:"age"` + PodCount int `json:"pod_count"` + // CPU 资源 - CPUCapacity string `json:"cpu_capacity"` // 如 "4 cores" - CPUAllocatable string `json:"cpu_allocatable"` - CPUUsage string `json:"cpu_usage"` + CPUCapacity string `json:"cpu_capacity"` // 如 "4 cores" + CPUAllocatable string `json:"cpu_allocatable"` + CPUUsage string `json:"cpu_usage"` CPUPercent float64 `json:"cpu_percent"` - + // 内存资源 - MemoryCapacity string `json:"memory_capacity"` // 如 "16 GB" + MemoryCapacity string `json:"memory_capacity"` // 如 "16 GB" MemoryAllocatable string `json:"memory_allocatable"` MemoryUsage string `json:"memory_usage"` MemoryPercent float64 `json:"memory_percent"` - + // GPU 资源(如果有) GPUCapacity int `json:"gpu_capacity"` // GPU 总数 GPUUsage int `json:"gpu_usage"` // 已使用的 GPU GPUPercent float64 `json:"gpu_percent"` GPUType string `json:"gpu_type,omitempty"` // GPU 型号,如 "NVIDIA-Tesla-T4" - + // 其他信息 - OSImage string `json:"os_image,omitempty"` - KernelVersion string `json:"kernel_version,omitempty"` - ContainerRuntime string `json:"container_runtime,omitempty"` - KubeletVersion string `json:"kubelet_version,omitempty"` + OSImage string `json:"os_image,omitempty"` + KernelVersion string `json:"kernel_version,omitempty"` + ContainerRuntime string `json:"container_runtime,omitempty"` + KubeletVersion string `json:"kubelet_version,omitempty"` } // MonitoringSummary 监控汇总 @@ -80,4 +128,3 @@ type MonitoringSummary struct { TotalPods int `json:"total_pods"` LastUpdate time.Time `json:"last_update"` } - diff --git a/backend/internal/domain/entity/registry.go b/backend/internal/domain/entity/registry.go index 1b7ee56..5abbae4 100644 --- a/backend/internal/domain/entity/registry.go +++ b/backend/internal/domain/entity/registry.go @@ -7,6 +7,9 @@ import ( // Registry OCI Registry 领域实体 type Registry struct { ID string + WorkspaceID string + OwnerID string + Visibility string Name string URL string Description string @@ -21,10 +24,11 @@ type Registry struct { func NewRegistry(name, url string) *Registry { now := time.Now() return &Registry{ - Name: name, - URL: url, - CreatedAt: now, - UpdatedAt: now, + Name: name, + URL: url, + Visibility: "private", + CreatedAt: now, + UpdatedAt: now, } } @@ -55,6 +59,8 @@ func (r *Registry) Validate() error { if r.URL == "" { return ErrInvalidRegistryURL } + if r.Visibility == "" { + r.Visibility = "private" + } return nil } - diff --git a/backend/internal/domain/entity/tenant_binding.go b/backend/internal/domain/entity/tenant_binding.go new file mode 100644 index 0000000..11cd088 --- /dev/null +++ b/backend/internal/domain/entity/tenant_binding.go @@ -0,0 +1,123 @@ +package entity + +import ( + "errors" + "fmt" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/validation" +) + +const ( + DefaultTenantServiceAccountName = "tenant-admin" + DefaultTenantRoleBindingName = "tenant-admin" + DefaultTenantClusterRoleName = "admin" + DefaultTenantResourceQuotaName = "tenant-quota" + MaxTenantKubeconfigTTL = 2 * time.Hour +) + +var ( + ErrInvalidTenantNamespace = errors.New("invalid tenant namespace") + ErrInvalidTenantServiceAccount = errors.New("invalid tenant service account") + ErrInvalidTenantRoleBinding = errors.New("invalid tenant role binding") + ErrInvalidTenantClusterRole = errors.New("invalid tenant cluster role") + ErrInvalidTenantResourceQuota = errors.New("invalid tenant resource quota") + ErrInvalidTenantKubeconfigToken = errors.New("invalid tenant kubeconfig token") +) + +// TenantBinding describes the Kubernetes resources that grant a workspace access +// to one tenant namespace. It intentionally excludes credential material. +type TenantBinding struct { + Namespace string + ServiceAccountName string + RoleBindingName string + ClusterRoleName string + ResourceQuotaName string + Labels map[string]string + Annotations map[string]string + ResourceQuotaHard corev1.ResourceList +} + +// TenantKubeconfig contains a short-lived kubeconfig and its expiration time. +// Callers must treat Kubeconfig as secret material and must not persist or log it. +type TenantKubeconfig struct { + Kubeconfig string + ExpiresAt time.Time +} + +// NewTenantBinding returns a tenant binding with production-safe default object names. +func NewTenantBinding(namespace string) TenantBinding { + return TenantBinding{ + Namespace: namespace, + ServiceAccountName: DefaultTenantServiceAccountName, + RoleBindingName: DefaultTenantRoleBindingName, + ClusterRoleName: DefaultTenantClusterRoleName, + ResourceQuotaName: DefaultTenantResourceQuotaName, + Labels: map[string]string{ + "ocdp.io/managed-by": "ocdp", + "ocdp.io/tenant": namespace, + }, + } +} + +// WithDefaults fills optional names while preserving explicit caller choices. +func (b TenantBinding) WithDefaults() TenantBinding { + if b.ServiceAccountName == "" { + b.ServiceAccountName = DefaultTenantServiceAccountName + } + if b.RoleBindingName == "" { + b.RoleBindingName = DefaultTenantRoleBindingName + } + if b.ClusterRoleName == "" { + b.ClusterRoleName = DefaultTenantClusterRoleName + } + if b.ResourceQuotaName == "" { + b.ResourceQuotaName = DefaultTenantResourceQuotaName + } + if b.Labels == nil { + b.Labels = map[string]string{} + } + if b.Labels["ocdp.io/managed-by"] == "" { + b.Labels["ocdp.io/managed-by"] = "ocdp" + } + if b.Namespace != "" && b.Labels["ocdp.io/tenant"] == "" { + b.Labels["ocdp.io/tenant"] = b.Namespace + } + return b +} + +// Validate checks the object names required to provision a tenant namespace. +func (b TenantBinding) Validate() error { + b = b.WithDefaults() + if strings.TrimSpace(b.Namespace) == "" || len(validation.IsDNS1123Label(b.Namespace)) > 0 { + return ErrInvalidTenantNamespace + } + if strings.TrimSpace(b.ServiceAccountName) == "" || len(validation.IsDNS1123Subdomain(b.ServiceAccountName)) > 0 { + return ErrInvalidTenantServiceAccount + } + if strings.TrimSpace(b.RoleBindingName) == "" || len(validation.IsDNS1123Subdomain(b.RoleBindingName)) > 0 { + return ErrInvalidTenantRoleBinding + } + if strings.TrimSpace(b.ClusterRoleName) == "" || len(validation.IsDNS1123Subdomain(b.ClusterRoleName)) > 0 { + return ErrInvalidTenantClusterRole + } + if strings.TrimSpace(b.ResourceQuotaName) == "" || len(validation.IsDNS1123Subdomain(b.ResourceQuotaName)) > 0 { + return ErrInvalidTenantResourceQuota + } + return nil +} + +// TenantTokenTTL caps requested kubeconfig lifetimes at MaxTenantKubeconfigTTL. +func TenantTokenTTL(requested time.Duration) time.Duration { + if requested <= 0 || requested > MaxTenantKubeconfigTTL { + return MaxTenantKubeconfigTTL + } + return requested +} + +func (b TenantBinding) String() string { + b = b.WithDefaults() + return fmt.Sprintf("tenant namespace %q serviceAccount %q roleBinding %q", b.Namespace, b.ServiceAccountName, b.RoleBindingName) +} diff --git a/backend/internal/domain/entity/tenant_binding_test.go b/backend/internal/domain/entity/tenant_binding_test.go new file mode 100644 index 0000000..6fed41d --- /dev/null +++ b/backend/internal/domain/entity/tenant_binding_test.go @@ -0,0 +1,38 @@ +package entity + +import ( + "testing" + "time" +) + +func TestTenantTokenTTLCapsAtTwoHours(t *testing.T) { + testCases := []struct { + name string + requested time.Duration + want time.Duration + }{ + {name: "uses default for zero", requested: 0, want: MaxTenantKubeconfigTTL}, + {name: "keeps shorter ttl", requested: 30 * time.Minute, want: 30 * time.Minute}, + {name: "caps longer ttl", requested: 24 * time.Hour, want: MaxTenantKubeconfigTTL}, + } + + for _, tc := range testCases { + if got := TenantTokenTTL(tc.requested); got != tc.want { + t.Fatalf("%s: expected %s, got %s", tc.name, tc.want, got) + } + } +} + +func TestTenantBindingWithDefaults(t *testing.T) { + binding := NewTenantBinding("tenant-a").WithDefaults() + + if err := binding.Validate(); err != nil { + t.Fatalf("expected valid default binding: %v", err) + } + if binding.ServiceAccountName != DefaultTenantServiceAccountName { + t.Fatalf("expected default service account %q, got %q", DefaultTenantServiceAccountName, binding.ServiceAccountName) + } + if binding.Labels["ocdp.io/tenant"] != "tenant-a" { + t.Fatalf("expected tenant label, got %#v", binding.Labels) + } +} diff --git a/backend/internal/domain/entity/user.go b/backend/internal/domain/entity/user.go index bf6387f..372d4e3 100644 --- a/backend/internal/domain/entity/user.go +++ b/backend/internal/domain/entity/user.go @@ -6,13 +6,17 @@ import ( // User 用户领域实体 type User struct { - ID string - Username string - PasswordHash string - Email string - RevokedAfter time.Time // 全局 Token 撤销时间 - CreatedAt time.Time - UpdatedAt time.Time + ID string + Username string + PasswordHash string + Email string + Role string + WorkspaceID string + IsActive bool + MustChangePassword bool + RevokedAfter time.Time // 全局 Token 撤销时间 + CreatedAt time.Time + UpdatedAt time.Time } // NewUser 创建新用户 @@ -22,6 +26,9 @@ func NewUser(username, passwordHash, email string) *User { Username: username, PasswordHash: passwordHash, Email: email, + Role: "user", + WorkspaceID: DefaultWorkspaceID, + IsActive: true, RevokedAfter: time.Unix(0, 0), // 初始值:1970-01-01 CreatedAt: now, UpdatedAt: now, @@ -49,6 +56,11 @@ func (u *User) Validate() error { if u.PasswordHash == "" { return ErrInvalidPassword } + if u.Role == "" { + u.Role = "user" + } + if u.WorkspaceID == "" && u.Role != "admin" { + u.WorkspaceID = DefaultWorkspaceID + } return nil } - diff --git a/backend/internal/domain/entity/workspace.go b/backend/internal/domain/entity/workspace.go new file mode 100644 index 0000000..5dea0eb --- /dev/null +++ b/backend/internal/domain/entity/workspace.go @@ -0,0 +1,150 @@ +package entity + +import ( + "strings" + "time" +) + +const ( + DefaultWorkspaceID = "00000000-0000-0000-0000-000000000010" + DefaultWorkspaceName = "default" +) + +type WorkspaceStatus string + +const ( + WorkspaceActive WorkspaceStatus = "active" + WorkspaceSuspended WorkspaceStatus = "suspended" +) + +type Workspace struct { + ID string + Name string + Status WorkspaceStatus + K8sNamespace string + K8sSAName string + DefaultClusterID string + QuotaCPU string + QuotaMemory string + QuotaGPU string + QuotaGPUMem string + CreatedBy string + CreatedAt time.Time + UpdatedAt time.Time +} + +func NewWorkspace(name, createdBy string) *Workspace { + now := time.Now() + return &Workspace{ + Name: name, + Status: WorkspaceActive, + K8sNamespace: NamespaceForWorkspace(name), + K8sSAName: ServiceAccountForWorkspace(name), + CreatedBy: createdBy, + CreatedAt: now, + UpdatedAt: now, + } +} + +func NamespaceForWorkspace(name string) string { + if name == "" { + name = DefaultWorkspaceName + } + return prefixedDNSLabel("ocdp-ws-", name) +} + +func NamespaceForUser(username string) string { + if username == "" { + username = "user" + } + return prefixedDNSLabel("ocdp-u-", username) +} + +func ServiceAccountForWorkspace(name string) string { + if name == "" { + name = DefaultWorkspaceName + } + return prefixedDNSLabel("ocdp-ws-", name) +} + +func ServiceAccountForNamespace(namespace string) string { + if namespace == "" { + namespace = DefaultWorkspaceName + } + return prefixedDNSLabel("ocdp-sa-", namespace) +} + +func prefixedDNSLabel(prefix, value string) string { + label := normalizeDNSLabel(value) + maxLabelLen := 63 - len(prefix) + if maxLabelLen < 1 { + maxLabelLen = 1 + } + if len(label) > maxLabelLen { + label = strings.Trim(label[:maxLabelLen], "-") + } + if label == "" { + label = DefaultWorkspaceName + if len(label) > maxLabelLen { + label = label[:maxLabelLen] + } + } + return prefix + label +} + +func normalizeDNSLabel(value string) string { + out := make([]rune, 0, len(value)) + lastDash := false + for _, r := range value { + valid := (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') + if r >= 'A' && r <= 'Z' { + r = r + ('a' - 'A') + valid = true + } + if valid { + out = append(out, r) + lastDash = false + continue + } + if !lastDash && len(out) > 0 { + out = append(out, '-') + lastDash = true + } + } + for len(out) > 0 && out[len(out)-1] == '-' { + out = out[:len(out)-1] + } + if len(out) == 0 { + return DefaultWorkspaceName + } + return string(out) +} + +type WorkspaceClusterBinding struct { + ID string + WorkspaceID string + ClusterID string + Namespace string + ServiceAccount string + QuotaCPU string + QuotaMemory string + QuotaGPU string + QuotaGPUMem string + Status string + CreatedAt time.Time + UpdatedAt time.Time +} + +type AuditLog struct { + ID string + WorkspaceID string + UserID string + Action string + ResourceType string + ResourceID string + ResourceName string + Details map[string]interface{} + IPAddress string + UserAgent string + CreatedAt time.Time +} diff --git a/backend/internal/domain/repository/cluster_repository.go b/backend/internal/domain/repository/cluster_repository.go index b598025..18ea1ea 100644 --- a/backend/internal/domain/repository/cluster_repository.go +++ b/backend/internal/domain/repository/cluster_repository.go @@ -9,20 +9,19 @@ import ( type ClusterRepository interface { // Create 创建集群 Create(ctx context.Context, cluster *entity.Cluster) error - + // GetByID 根据 ID 获取集群 GetByID(ctx context.Context, id string) (*entity.Cluster, error) - + // GetByName 根据名称获取集群 GetByName(ctx context.Context, name string) (*entity.Cluster, error) - + // Update 更新集群 Update(ctx context.Context, cluster *entity.Cluster) error - + // Delete 删除集群 Delete(ctx context.Context, id string) error - + // List 列出所有集群 List(ctx context.Context) ([]*entity.Cluster, error) } - diff --git a/backend/internal/domain/repository/helm_client.go b/backend/internal/domain/repository/helm_client.go index 325aef4..47e2b58 100644 --- a/backend/internal/domain/repository/helm_client.go +++ b/backend/internal/domain/repository/helm_client.go @@ -3,32 +3,50 @@ package repository import ( "context" "github.com/ocdp/cluster-service/internal/domain/entity" + "k8s.io/apimachinery/pkg/api/resource" ) +type ResourceVector struct { + CPU resource.Quantity + Memory resource.Quantity + GPU int64 + GPUMemoryMB int64 +} + +type ResourceEstimate struct { + Requests ResourceVector + Limits ResourceVector +} + // HelmClient Helm 客户端接口(Output Port) type HelmClient interface { // Install 安装 Helm Chart Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error - + // Upgrade 升级 Helm Release Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error - + // Uninstall 卸载 Helm Release Uninstall(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) error - + // Rollback 回滚 Helm Release Rollback(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string, revision int) error - + // GetStatus 获取 Release 状态 GetStatus(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (*entity.Instance, error) - + // GetHistory 获取 Release 历史 GetHistory(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) ([]*entity.ReleaseHistory, error) - + // List 列出集群中的所有 Releases List(ctx context.Context, cluster *entity.Cluster, namespace string) ([]*entity.Instance, error) - + // GetValues 获取 Release 的 values GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) -} + // GetChartDefaultValues 从 chart 包中读取默认 values + GetChartDefaultValues(chartPath string) (map[string]interface{}, error) + + // EstimateInstanceResources renders an instance chart with final values and sums Pod template resources. + EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*ResourceEstimate, error) +} diff --git a/backend/internal/domain/repository/instance_diagnostics_client.go b/backend/internal/domain/repository/instance_diagnostics_client.go new file mode 100644 index 0000000..2f65c2b --- /dev/null +++ b/backend/internal/domain/repository/instance_diagnostics_client.go @@ -0,0 +1,17 @@ +package repository + +import ( + "context" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +type InstanceDiagnosticsClient interface { + GetDiagnostics(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance, tailLines int64) (*entity.InstanceDiagnostics, error) +} + +// PodLogStreamer streams pod log lines over channels. The caller reads from the +// lines channel until it is closed; errors are sent to the errs channel. +type PodLogStreamer interface { + StreamPodLogs(ctx context.Context, cluster *entity.Cluster, namespace, podName, containerName string, tailLines int64) (<-chan string, <-chan error, error) +} diff --git a/backend/internal/domain/repository/instance_repository.go b/backend/internal/domain/repository/instance_repository.go index 9cd955b..1347db1 100644 --- a/backend/internal/domain/repository/instance_repository.go +++ b/backend/internal/domain/repository/instance_repository.go @@ -9,23 +9,22 @@ import ( type InstanceRepository interface { // Create 创建实例 Create(ctx context.Context, instance *entity.Instance) error - + // GetByID 根据 ID 获取实例 GetByID(ctx context.Context, id string) (*entity.Instance, error) - + // GetByClusterAndName 根据集群 ID 和名称获取实例 GetByClusterAndName(ctx context.Context, clusterID, name string) (*entity.Instance, error) - + // Update 更新实例 Update(ctx context.Context, instance *entity.Instance) error - + // Delete 删除实例 Delete(ctx context.Context, id string) error - + // ListByCluster 列出指定集群的所有实例 ListByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) - + // List 列出所有实例 List(ctx context.Context) ([]*entity.Instance, error) } - diff --git a/backend/internal/domain/repository/metrics_client.go b/backend/internal/domain/repository/metrics_client.go index 498e0d3..c7b5300 100644 --- a/backend/internal/domain/repository/metrics_client.go +++ b/backend/internal/domain/repository/metrics_client.go @@ -10,8 +10,10 @@ import ( type MetricsClient interface { // GetClusterMetrics 获取集群的监控指标 GetClusterMetrics(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) - + // GetNodeMetrics 获取集群的节点指标 GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) -} + // GetPodResourceAllocations returns Pod requests/limits grouped by Pod. + GetPodResourceAllocations(ctx context.Context, clusterID string) ([]*entity.PodResourceAllocation, error) +} diff --git a/backend/internal/domain/repository/oci_client.go b/backend/internal/domain/repository/oci_client.go index 1cd6c9a..b322be9 100644 --- a/backend/internal/domain/repository/oci_client.go +++ b/backend/internal/domain/repository/oci_client.go @@ -7,26 +7,29 @@ import ( // OCIClient OCI Registry 客户端接口(Output Port) type OCIClient interface { - // ListRepositories 列出 Registry 中的所有 repositories - ListRepositories(ctx context.Context, registry *entity.Registry) ([]string, error) - + // ListRepositories 列出 Registry 中的 repositories. + // artifactType 支持 "chart" 和 "all",默认由调用方决定。 + ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) + // ListArtifacts 列出指定 repository 的所有 artifacts // mediaTypeFilter: "all", "image", "chart", "other" - 使用模糊匹配过滤 ListArtifacts(ctx context.Context, registry *entity.Registry, repository, mediaTypeFilter string) ([]*entity.Artifact, error) - + // GetArtifact 获取指定 artifact 的详细信息 GetArtifact(ctx context.Context, registry *entity.Registry, repository, reference string) (*entity.Artifact, error) - + // GetValuesSchema 获取 Helm Chart 的 values schema GetValuesSchema(ctx context.Context, registry *entity.Registry, repository, reference string) (string, error) - + + // GetValuesYAML 获取 Helm Chart 原始 values.yaml + GetValuesYAML(ctx context.Context, registry *entity.Registry, repository, reference string) (string, error) + // PullArtifact 下载 artifact 到本地 PullArtifact(ctx context.Context, registry *entity.Registry, repository, reference, destPath string) error - + // PushArtifact 推送 artifact 到 Registry PushArtifact(ctx context.Context, registry *entity.Registry, repository, tag, sourcePath string) error - + // CheckHealth 检查 Registry 健康状态 CheckHealth(ctx context.Context, registry *entity.Registry) error } - diff --git a/backend/internal/domain/repository/registry_repository.go b/backend/internal/domain/repository/registry_repository.go index 7c7d2e5..fa32920 100644 --- a/backend/internal/domain/repository/registry_repository.go +++ b/backend/internal/domain/repository/registry_repository.go @@ -9,20 +9,19 @@ import ( type RegistryRepository interface { // Create 创建 Registry Create(ctx context.Context, registry *entity.Registry) error - + // GetByID 根据 ID 获取 Registry GetByID(ctx context.Context, id string) (*entity.Registry, error) - + // GetByName 根据名称获取 Registry GetByName(ctx context.Context, name string) (*entity.Registry, error) - + // Update 更新 Registry Update(ctx context.Context, registry *entity.Registry) error - + // Delete 删除 Registry Delete(ctx context.Context, id string) error - + // List 列出所有 Registries List(ctx context.Context) ([]*entity.Registry, error) } - diff --git a/backend/internal/domain/repository/tenant_kube_client.go b/backend/internal/domain/repository/tenant_kube_client.go new file mode 100644 index 0000000..522fdf6 --- /dev/null +++ b/backend/internal/domain/repository/tenant_kube_client.go @@ -0,0 +1,22 @@ +package repository + +import ( + "context" + "time" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +// TenantKubeClient provisions namespace-scoped Kubernetes access for tenants. +type TenantKubeClient interface { + EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error + IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) + GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*ResourceQuotaUsage, error) + SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error + DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error +} + +type ResourceQuotaUsage struct { + Hard ResourceVector + Used ResourceVector +} diff --git a/backend/internal/domain/repository/user_repository.go b/backend/internal/domain/repository/user_repository.go index eff3479..5a913ed 100644 --- a/backend/internal/domain/repository/user_repository.go +++ b/backend/internal/domain/repository/user_repository.go @@ -9,20 +9,22 @@ import ( type UserRepository interface { // Create 创建用户 Create(ctx context.Context, user *entity.User) error - + // GetByID 根据 ID 获取用户 GetByID(ctx context.Context, id string) (*entity.User, error) - + // GetByUsername 根据用户名获取用户 GetByUsername(ctx context.Context, username string) (*entity.User, error) - + // Update 更新用户 Update(ctx context.Context, user *entity.User) error - + // Delete 删除用户 Delete(ctx context.Context, id string) error - + // List 列出所有用户 List(ctx context.Context) ([]*entity.User, error) -} + // AdminExists checks whether any admin user exists (lightweight EXISTS query) + AdminExists(ctx context.Context) (bool, error) +} diff --git a/backend/internal/domain/repository/workspace_repository.go b/backend/internal/domain/repository/workspace_repository.go new file mode 100644 index 0000000..d308777 --- /dev/null +++ b/backend/internal/domain/repository/workspace_repository.go @@ -0,0 +1,28 @@ +package repository + +import ( + "context" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +type WorkspaceRepository interface { + Create(ctx context.Context, workspace *entity.Workspace) error + GetByID(ctx context.Context, id string) (*entity.Workspace, error) + GetByName(ctx context.Context, name string) (*entity.Workspace, error) + Update(ctx context.Context, workspace *entity.Workspace) error + Delete(ctx context.Context, id string) error + List(ctx context.Context) ([]*entity.Workspace, error) +} + +type WorkspaceClusterBindingRepository interface { + Upsert(ctx context.Context, binding *entity.WorkspaceClusterBinding) error + Get(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error) + ListByWorkspace(ctx context.Context, workspaceID string) ([]*entity.WorkspaceClusterBinding, error) + Delete(ctx context.Context, workspaceID, clusterID string) error +} + +type AuditLogRepository interface { + Create(ctx context.Context, log *entity.AuditLog) error + ListByWorkspace(ctx context.Context, workspaceID string, limit int) ([]*entity.AuditLog, error) +} diff --git a/backend/internal/domain/service/artifact_service.go b/backend/internal/domain/service/artifact_service.go index deb1363..7938cf8 100644 --- a/backend/internal/domain/service/artifact_service.go +++ b/backend/internal/domain/service/artifact_service.go @@ -4,6 +4,7 @@ import ( "context" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // ArtifactService Artifact 浏览领域服务 @@ -25,22 +26,22 @@ func NewArtifactService( // GetRegistry 获取 Registry 信息 func (s *ArtifactService) GetRegistry(ctx context.Context, registryID string) (*entity.Registry, error) { - return s.registryRepo.GetByID(ctx, registryID) + return s.visibleRegistry(ctx, registryID) } -// ListRepositories 列出 Registry 中的所有 repositories -func (s *ArtifactService) ListRepositories(ctx context.Context, registryID string) ([]string, error) { - registry, err := s.registryRepo.GetByID(ctx, registryID) +// ListRepositories 列出 Registry 中的 repositories +func (s *ArtifactService) ListRepositories(ctx context.Context, registryID, artifactType string) ([]string, error) { + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return nil, entity.ErrRegistryNotFound } - return s.ociClient.ListRepositories(ctx, registry) + return s.ociClient.ListRepositories(ctx, registry, artifactType) } // ListArtifacts 列出 repository 中的所有 artifacts func (s *ArtifactService) ListArtifacts(ctx context.Context, registryID, repository, mediaTypeFilter string) ([]*entity.Artifact, error) { - registry, err := s.registryRepo.GetByID(ctx, registryID) + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return nil, entity.ErrRegistryNotFound } @@ -50,7 +51,7 @@ func (s *ArtifactService) ListArtifacts(ctx context.Context, registryID, reposit // GetArtifact 获取 artifact 详情 func (s *ArtifactService) GetArtifact(ctx context.Context, registryID, repository, reference string) (*entity.Artifact, error) { - registry, err := s.registryRepo.GetByID(ctx, registryID) + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return nil, entity.ErrRegistryNotFound } @@ -60,7 +61,7 @@ func (s *ArtifactService) GetArtifact(ctx context.Context, registryID, repositor // GetValuesSchema 获取 Helm Chart 的 values schema func (s *ArtifactService) GetValuesSchema(ctx context.Context, registryID, repository, reference string) (string, error) { - registry, err := s.registryRepo.GetByID(ctx, registryID) + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return "", entity.ErrRegistryNotFound } @@ -68,9 +69,19 @@ func (s *ArtifactService) GetValuesSchema(ctx context.Context, registryID, repos return s.ociClient.GetValuesSchema(ctx, registry, repository, reference) } +// GetValuesYAML 获取 Helm Chart 的原始 values.yaml +func (s *ArtifactService) GetValuesYAML(ctx context.Context, registryID, repository, reference string) (string, error) { + registry, err := s.visibleRegistry(ctx, registryID) + if err != nil { + return "", entity.ErrRegistryNotFound + } + + return s.ociClient.GetValuesYAML(ctx, registry, repository, reference) +} + // PullArtifact 下载 artifact func (s *ArtifactService) PullArtifact(ctx context.Context, registryID, repository, reference, destPath string) error { - registry, err := s.registryRepo.GetByID(ctx, registryID) + registry, err := s.visibleRegistry(ctx, registryID) if err != nil { return entity.ErrRegistryNotFound } @@ -78,3 +89,17 @@ func (s *ArtifactService) PullArtifact(ctx context.Context, registryID, reposito return s.ociClient.PullArtifact(ctx, registry, repository, reference, destPath) } +func (s *ArtifactService) visibleRegistry(ctx context.Context, registryID string) (*entity.Registry, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + registry, err := s.registryRepo.GetByID(ctx, registryID) + if err != nil { + return nil, entity.ErrRegistryNotFound + } + if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + return nil, entity.ErrRegistryNotFound + } + return registry, nil +} diff --git a/backend/internal/domain/service/auth_service.go b/backend/internal/domain/service/auth_service.go index fd7340a..b72305e 100644 --- a/backend/internal/domain/service/auth_service.go +++ b/backend/internal/domain/service/auth_service.go @@ -2,14 +2,27 @@ package service import ( "context" + "errors" + "strings" + "time" + "github.com/google/uuid" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" + jwtpkg "github.com/ocdp/cluster-service/internal/pkg/jwt" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/validation" ) // AuthService 认证领域服务 type AuthService struct { userRepo repository.UserRepository + workspaceRepo repository.WorkspaceRepository + instanceRepo repository.InstanceRepository + clusterRepo repository.ClusterRepository + bindingRepo repository.WorkspaceClusterBindingRepository + tenantClient repository.TenantKubeClient passwordHasher PasswordHasher tokenGenerator TokenGenerator } @@ -22,27 +35,109 @@ type PasswordHasher interface { // TokenGenerator Token 生成器接口 type TokenGenerator interface { - Generate(userID, username string) (accessToken, refreshToken string, err error) + Generate(userID, username, role, workspaceID string) (accessToken, refreshToken string, err error) Verify(token string) (userID, username string, err error) VerifyWithIssuedAt(token string) (userID, username string, issuedAt int64, err error) + VerifyAccess(token string) (*jwtpkg.Claims, error) + VerifyRefresh(token string) (*jwtpkg.Claims, error) Refresh(refreshToken string) (newAccessToken string, err error) } // NewAuthService 创建认证服务 func NewAuthService( userRepo repository.UserRepository, + workspaceRepo repository.WorkspaceRepository, passwordHasher PasswordHasher, tokenGenerator TokenGenerator, ) *AuthService { return &AuthService{ userRepo: userRepo, + workspaceRepo: workspaceRepo, passwordHasher: passwordHasher, tokenGenerator: tokenGenerator, } } -// Register 注册新用户(仅需用户名和密码,邮箱将自动补全) -func (s *AuthService) Register(ctx context.Context, username, password string) (*entity.User, error) { +func (s *AuthService) SetUserLifecycleCleanup( + instanceRepo repository.InstanceRepository, + clusterRepo repository.ClusterRepository, + bindingRepo repository.WorkspaceClusterBindingRepository, + tenantClient repository.TenantKubeClient, +) { + s.instanceRepo = instanceRepo + s.clusterRepo = clusterRepo + s.bindingRepo = bindingRepo + s.tenantClient = tenantClient +} + +// Register 注册新用户。业务入口只允许 admin 调用;初始 admin 由 bootstrap seeder 创建。 +type UserWorkspaceOptions struct { + Namespace string + DefaultClusterID string + QuotaCPU string + QuotaMemory string + QuotaGPU string + QuotaGPUMem string +} + +func defaultEmail(username string) string { + return username + "@local.ocdp" +} + +// IsAdminExists checks whether any admin user already exists in the database. +func (s *AuthService) IsAdminExists(ctx context.Context) (bool, error) { + return s.userRepo.AdminExists(ctx) +} + +// SetupInitialAdmin creates the first admin user and returns access + refresh tokens. +// Fails if an admin already exists. +func (s *AuthService) SetupInitialAdmin(ctx context.Context, username, password, email string) (*entity.User, string, string, error) { + hasAdmin, err := s.IsAdminExists(ctx) + if err != nil { + return nil, "", "", err + } + if hasAdmin { + return nil, "", "", entity.ErrForbidden + } + + passwordHash, err := s.passwordHasher.Hash(password) + if err != nil { + return nil, "", "", err + } + + if email == "" { + email = defaultEmail(username) + } + + user := entity.NewUser(username, passwordHash, email) + user.ID = uuid.New().String() + user.Role = authz.RoleAdmin + user.WorkspaceID = entity.DefaultWorkspaceID + + if err := user.Validate(); err != nil { + return nil, "", "", err + } + if err := s.userRepo.Create(ctx, user); err != nil { + return nil, "", "", err + } + + // Generate tokens directly — avoid a separate login round-trip + accessToken, refreshToken, err := s.tokenGenerator.Generate(user.ID, user.Username, user.Role, user.WorkspaceID) + if err != nil { + return nil, "", "", err + } + return user, accessToken, refreshToken, nil +} + +func (s *AuthService) Register(ctx context.Context, username, password, role, workspaceID string, opts UserWorkspaceOptions, isActive, mustChangePassword *bool) (*entity.User, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return nil, entity.ErrForbidden + } + // 检查用户是否已存在 existingUser, _ := s.userRepo.GetByUsername(ctx, username) if existingUser != nil { @@ -54,13 +149,41 @@ func (s *AuthService) Register(ctx context.Context, username, password string) ( if err != nil { return nil, err } + normalizedOpts, err := normalizeQuotaOptions(opts) + if err != nil { + return nil, err + } + if normalizeUserRole(role) == authz.RoleUser { + normalizedOpts = defaultUserQuotaOptions(normalizedOpts) + } // 默认生成占位邮箱,避免数据库约束失败 - email := username + "@local.ocdp" + email := defaultEmail(username) // 创建用户 user := entity.NewUser(username, passwordHash, email) user.ID = uuid.New().String() + user.Role = normalizeUserRole(role) + user.WorkspaceID = workspaceID + if user.Role == authz.RoleUser { + workspace, err := s.createUserWorkspace(ctx, username, principal.UserID, normalizedOpts) + if err != nil { + return nil, err + } + user.WorkspaceID = workspace.ID + } + if user.WorkspaceID == "" { + user.WorkspaceID = entity.DefaultWorkspaceID + } + if user.Role == authz.RoleAdmin { + user.WorkspaceID = entity.DefaultWorkspaceID + } + if isActive != nil { + user.IsActive = *isActive + } + if mustChangePassword != nil { + user.MustChangePassword = *mustChangePassword + } if err := user.Validate(); err != nil { return nil, err @@ -73,31 +196,538 @@ func (s *AuthService) Register(ctx context.Context, username, password string) ( return user, nil } -// Login 用户登录 -func (s *AuthService) Login(ctx context.Context, username, password string) (accessToken, refreshToken string, err error) { - // 查找用户 - user, err := s.userRepo.GetByUsername(ctx, username) +func (s *AuthService) createUserWorkspace(ctx context.Context, username, createdBy string, opts UserWorkspaceOptions) (*entity.Workspace, error) { + if s.workspaceRepo == nil { + return nil, entity.ErrWorkspaceNotFound + } + name := userWorkspaceName(username) + namespace := strings.TrimSpace(opts.Namespace) + if namespace == "" { + namespace = entity.NamespaceForUser(username) + } + if namespace != "" { + if len(validation.IsDNS1123Label(namespace)) > 0 { + return nil, entity.ErrInvalidNamespace + } + } + if existing, err := s.workspaceRepo.GetByName(ctx, name); err == nil && existing != nil { + if namespace != "" && existing.K8sNamespace != namespace { + if err := s.ensureNamespaceAvailable(ctx, namespace, existing.ID); err != nil { + return nil, err + } + } + applyWorkspaceOptions(existing, opts) + if namespace != "" { + existing.K8sNamespace = namespace + existing.K8sSAName = entity.ServiceAccountForNamespace(namespace) + } + if err := s.workspaceRepo.Update(ctx, existing); err != nil { + return nil, err + } + return existing, nil + } else if err != nil && !errors.Is(err, entity.ErrWorkspaceNotFound) { + return nil, err + } + if err := s.ensureNamespaceAvailable(ctx, namespace, ""); err != nil { + return nil, err + } + workspace := entity.NewWorkspace(name, createdBy) + workspace.ID = uuid.New().String() + workspace.DefaultClusterID = strings.TrimSpace(opts.DefaultClusterID) + if namespace != "" { + workspace.K8sNamespace = namespace + workspace.K8sSAName = entity.ServiceAccountForNamespace(namespace) + } + workspace.QuotaCPU = strings.TrimSpace(opts.QuotaCPU) + workspace.QuotaMemory = strings.TrimSpace(opts.QuotaMemory) + workspace.QuotaGPU = strings.TrimSpace(opts.QuotaGPU) + workspace.QuotaGPUMem = strings.TrimSpace(opts.QuotaGPUMem) + if err := s.workspaceRepo.Create(ctx, workspace); err != nil { + if errors.Is(err, entity.ErrWorkspaceExists) { + existing, getErr := s.workspaceRepo.GetByName(ctx, name) + if getErr != nil { + return nil, err + } + if existing.K8sNamespace != namespace { + return nil, entity.ErrWorkspaceNamespaceConflict + } + return existing, nil + } + return nil, err + } + return workspace, nil +} + +func userWorkspaceName(username string) string { + return strings.TrimPrefix(entity.NamespaceForUser(username), "ocdp-u-") +} + +func (s *AuthService) ensureNamespaceAvailable(ctx context.Context, namespace, allowedWorkspaceID string) error { + if s.workspaceRepo == nil || strings.TrimSpace(namespace) == "" { + return nil + } + workspaces, err := s.workspaceRepo.List(ctx) if err != nil { - return "", "", entity.ErrUserNotFound + return err + } + for _, workspace := range workspaces { + if workspace == nil || workspace.K8sNamespace != namespace { + continue + } + if allowedWorkspaceID != "" && workspace.ID == allowedWorkspaceID { + continue + } + return entity.ErrWorkspaceNamespaceConflict + } + return nil +} + +func normalizeQuotaOptions(opts UserWorkspaceOptions) (UserWorkspaceOptions, error) { + opts.Namespace = strings.TrimSpace(opts.Namespace) + opts.DefaultClusterID = strings.TrimSpace(opts.DefaultClusterID) + opts.QuotaCPU = normalizeStandardQuotaQuantity(opts.QuotaCPU) + opts.QuotaMemory = normalizeStandardQuotaQuantity(opts.QuotaMemory) + opts.QuotaGPU = normalizeStandardQuotaQuantity(opts.QuotaGPU) + gpuMem, err := normalizeGPUMemoryQuota(opts.QuotaGPUMem) + if err != nil { + return opts, err + } + opts.QuotaGPUMem = gpuMem + for _, value := range []string{opts.QuotaCPU, opts.QuotaMemory, opts.QuotaGPU} { + if value == "" { + continue + } + if _, err := resource.ParseQuantity(value); err != nil { + return opts, entity.ErrInvalidTenantResourceQuota + } + } + if opts.Namespace != "" && len(validation.IsDNS1123Label(opts.Namespace)) > 0 { + return opts, entity.ErrInvalidNamespace + } + return opts, nil +} + +func defaultUserQuotaOptions(opts UserWorkspaceOptions) UserWorkspaceOptions { + if strings.TrimSpace(opts.QuotaGPU) == "" { + opts.QuotaGPU = "0" + } + if strings.TrimSpace(opts.QuotaGPUMem) == "" { + opts.QuotaGPUMem = "0" + } + return opts +} + +func (s *AuthService) ListUsers(ctx context.Context) ([]*entity.User, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return nil, entity.ErrForbidden + } + return s.userRepo.List(ctx) +} + +func (s *AuthService) UpdateUser(ctx context.Context, userID, role, workspaceID string, opts UserWorkspaceOptions, isActive, mustChangePassword *bool) (*entity.User, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return nil, entity.ErrForbidden + } + user, err := s.userRepo.GetByID(ctx, userID) + if err != nil { + return nil, entity.ErrUserNotFound + } + previousRole := user.Role + if role != "" { + user.Role = normalizeUserRole(role) + } + if workspaceID != "" && user.Role != authz.RoleUser { + user.WorkspaceID = workspaceID + } + workspaceHandled := false + if user.Role == authz.RoleAdmin { + user.WorkspaceID = entity.DefaultWorkspaceID + } + if user.Role == authz.RoleUser && (role != "" || workspaceID != "" || hasWorkspaceUpdates(opts)) { + normalizedOpts, err := normalizeQuotaOptions(opts) + if err != nil { + return nil, err + } + normalizedOpts = defaultUserQuotaOptions(normalizedOpts) + currentWorkspace, _ := s.currentUserWorkspace(ctx, user) + if currentWorkspace != nil && shouldCreatePrivateWorkspace(user, previousRole, currentWorkspace) { + if normalizedOpts.Namespace == "" || normalizedOpts.Namespace == currentWorkspace.K8sNamespace { + normalizedOpts.Namespace = "" + } + } + workspace, err := s.ensureUserWorkspaceForUpdate(ctx, user, previousRole, currentWorkspace, opts, normalizedOpts, principal.UserID) + if err != nil { + return nil, err + } + user.WorkspaceID = workspace.ID + workspaceHandled = true + } + if isActive != nil { + if user.ID == principal.UserID && !*isActive { + return nil, entity.ErrForbidden + } + user.IsActive = *isActive + } + if mustChangePassword != nil { + user.MustChangePassword = *mustChangePassword + } + if user.Role != authz.RoleAdmin && !workspaceHandled && hasWorkspaceUpdates(opts) { + normalizedOpts, err := normalizeQuotaOptions(opts) + if err != nil { + return nil, err + } + workspace, err := s.workspaceRepo.GetByID(ctx, user.WorkspaceID) + if err != nil { + return nil, err + } + applyWorkspaceOptionsForUpdate(workspace, opts, normalizedOpts) + if err := s.workspaceRepo.Update(ctx, workspace); err != nil { + return nil, err + } + if err := s.syncWorkspaceBindings(ctx, workspace); err != nil { + return nil, err + } + } + user.RevokedAfter = time.Now() + user.UpdatedAt = time.Now() + if err := user.Validate(); err != nil { + return nil, err + } + if err := s.userRepo.Update(ctx, user); err != nil { + return nil, err + } + return user, nil +} + +func hasWorkspaceUpdates(opts UserWorkspaceOptions) bool { + return strings.TrimSpace(opts.Namespace) != "" || + strings.TrimSpace(opts.DefaultClusterID) != "" || + strings.TrimSpace(opts.QuotaCPU) != "" || + strings.TrimSpace(opts.QuotaMemory) != "" || + strings.TrimSpace(opts.QuotaGPU) != "" || + strings.TrimSpace(opts.QuotaGPUMem) != "" +} + +func applyWorkspaceOptions(workspace *entity.Workspace, opts UserWorkspaceOptions) { + if namespace := strings.TrimSpace(opts.Namespace); namespace != "" { + workspace.K8sNamespace = namespace + workspace.K8sSAName = entity.ServiceAccountForNamespace(namespace) + } + if value := strings.TrimSpace(opts.DefaultClusterID); value != "" { + workspace.DefaultClusterID = value + } + if value := strings.TrimSpace(opts.QuotaCPU); value != "" { + workspace.QuotaCPU = value + } + if value := strings.TrimSpace(opts.QuotaMemory); value != "" { + workspace.QuotaMemory = value + } + if value := strings.TrimSpace(opts.QuotaGPU); value != "" { + workspace.QuotaGPU = value + } + if value := strings.TrimSpace(opts.QuotaGPUMem); value != "" { + workspace.QuotaGPUMem = value + } +} + +func (s *AuthService) currentUserWorkspace(ctx context.Context, user *entity.User) (*entity.Workspace, error) { + if s.workspaceRepo == nil || user == nil || user.WorkspaceID == "" { + return nil, entity.ErrWorkspaceNotFound + } + return s.workspaceRepo.GetByID(ctx, user.WorkspaceID) +} + +func shouldCreatePrivateWorkspace(user *entity.User, previousRole string, current *entity.Workspace) bool { + if user == nil { + return true + } + if previousRole == authz.RoleAdmin || user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID { + return true + } + if current == nil { + return true + } + return current.Name != userWorkspaceName(user.Username) +} + +func (s *AuthService) ensureUserWorkspaceForUpdate(ctx context.Context, user *entity.User, previousRole string, current *entity.Workspace, rawOpts, normalizedOpts UserWorkspaceOptions, createdBy string) (*entity.Workspace, error) { + if s.workspaceRepo == nil { + return nil, entity.ErrWorkspaceNotFound + } + if shouldCreatePrivateWorkspace(user, previousRole, current) { + return s.createUserWorkspace(ctx, user.Username, createdBy, normalizedOpts) + } + if rawNamespace := strings.TrimSpace(rawOpts.Namespace); rawNamespace != "" && rawNamespace != current.K8sNamespace { + if err := s.ensureNamespaceAvailable(ctx, rawNamespace, current.ID); err != nil { + return nil, err + } + } + applyWorkspaceOptionsForUpdate(current, rawOpts, normalizedOpts) + if err := s.workspaceRepo.Update(ctx, current); err != nil { + return nil, err + } + if err := s.syncWorkspaceBindings(ctx, current); err != nil { + return nil, err + } + return current, nil +} + +func applyWorkspaceOptionsForUpdate(workspace *entity.Workspace, rawOpts, normalizedOpts UserWorkspaceOptions) { + if namespace := strings.TrimSpace(rawOpts.Namespace); namespace != "" { + workspace.K8sNamespace = namespace + workspace.K8sSAName = entity.ServiceAccountForNamespace(namespace) + } + if strings.TrimSpace(rawOpts.DefaultClusterID) != "" { + workspace.DefaultClusterID = normalizedOpts.DefaultClusterID + } + if strings.TrimSpace(rawOpts.QuotaCPU) != "" { + workspace.QuotaCPU = normalizedOpts.QuotaCPU + } + if strings.TrimSpace(rawOpts.QuotaMemory) != "" { + workspace.QuotaMemory = normalizedOpts.QuotaMemory + } + if strings.TrimSpace(rawOpts.QuotaGPU) != "" { + workspace.QuotaGPU = normalizedOpts.QuotaGPU + } + if strings.TrimSpace(rawOpts.QuotaGPUMem) != "" { + workspace.QuotaGPUMem = normalizedOpts.QuotaGPUMem + } +} + +func (s *AuthService) syncWorkspaceBindings(ctx context.Context, workspace *entity.Workspace) error { + if workspace == nil || s.bindingRepo == nil { + return nil + } + bindings, err := s.bindingRepo.ListByWorkspace(ctx, workspace.ID) + if err != nil { + return err + } + for _, binding := range bindings { + if binding == nil { + continue + } + binding.QuotaCPU = strings.TrimSpace(workspace.QuotaCPU) + binding.QuotaMemory = strings.TrimSpace(workspace.QuotaMemory) + binding.QuotaGPU = strings.TrimSpace(workspace.QuotaGPU) + if binding.QuotaGPU == "" { + binding.QuotaGPU = "0" + } + binding.QuotaGPUMem = strings.TrimSpace(workspace.QuotaGPUMem) + if binding.QuotaGPUMem == "" { + binding.QuotaGPUMem = "0" + } + binding.UpdatedAt = time.Now() + if s.tenantClient != nil && s.clusterRepo != nil { + cluster, err := s.clusterRepo.GetByID(ctx, binding.ClusterID) + if err != nil { + if errors.Is(err, entity.ErrClusterNotFound) { + continue + } + return err + } + tenantBinding := entity.NewTenantBinding(binding.Namespace) + tenantBinding.ServiceAccountName = binding.ServiceAccount + tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding) + if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil { + return err + } + } + if err := s.bindingRepo.Upsert(ctx, binding); err != nil { + return err + } + } + return nil +} + +func (s *AuthService) DeleteUser(ctx context.Context, userID string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return entity.ErrForbidden + } + if userID == principal.UserID { + return entity.ErrForbidden + } + user, err := s.userRepo.GetByID(ctx, userID) + if err != nil { + return entity.ErrUserNotFound + } + if err := s.ensureUserHasNoInstances(ctx, user); err != nil { + return err + } + if s.isExclusiveUserWorkspace(ctx, user) { + if err := s.cleanupUserWorkspace(ctx, user.WorkspaceID); err != nil { + return err + } + } + return s.userRepo.Delete(ctx, userID) +} + +func (s *AuthService) ensureUserHasNoInstances(ctx context.Context, user *entity.User) error { + if s.instanceRepo == nil || user == nil { + return nil + } + instances, err := s.instanceRepo.List(ctx) + if err != nil { + return err + } + for _, instance := range instances { + if instance == nil { + continue + } + if instance.OwnerID == user.ID { + return entity.ErrUserHasInstances + } + if user.WorkspaceID != "" && user.WorkspaceID != entity.DefaultWorkspaceID && instance.WorkspaceID == user.WorkspaceID { + return entity.ErrUserHasInstances + } + } + return nil +} + +func (s *AuthService) isExclusiveUserWorkspace(ctx context.Context, user *entity.User) bool { + if user == nil || user.Role == authz.RoleAdmin || user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID { + return false + } + users, err := s.userRepo.List(ctx) + if err != nil { + return false + } + for _, other := range users { + if other == nil || other.ID == user.ID { + continue + } + if other.WorkspaceID == user.WorkspaceID { + return false + } + } + return true +} + +func (s *AuthService) cleanupUserWorkspace(ctx context.Context, workspaceID string) error { + if s.workspaceRepo == nil || s.bindingRepo == nil { + return nil + } + workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID) + if err != nil { + return err + } + if isProtectedWorkspaceNamespace(workspace.K8sNamespace) { + return entity.ErrProtectedNamespace + } + bindings, err := s.bindingRepo.ListByWorkspace(ctx, workspace.ID) + if err != nil { + return err + } + for _, binding := range bindings { + if binding == nil { + continue + } + if isProtectedWorkspaceNamespace(binding.Namespace) { + return entity.ErrProtectedNamespace + } + if s.tenantClient != nil && s.clusterRepo != nil { + cluster, err := s.clusterRepo.GetByID(ctx, binding.ClusterID) + if err != nil && !errors.Is(err, entity.ErrClusterNotFound) { + return err + } + if err == nil { + tenantBinding := entity.NewTenantBinding(binding.Namespace) + tenantBinding.ServiceAccountName = binding.ServiceAccount + tenantBinding.ResourceQuotaHard = resourceQuotaHard(workspace) + if err := s.tenantClient.DeleteTenant(ctx, cluster, tenantBinding); err != nil { + return err + } + } + } + if err := s.bindingRepo.Delete(ctx, binding.WorkspaceID, binding.ClusterID); err != nil { + return err + } + } + if err := s.workspaceRepo.Delete(ctx, workspace.ID); err != nil && !errors.Is(err, entity.ErrWorkspaceNotFound) { + return err + } + return nil +} + +func isProtectedWorkspaceNamespace(namespace string) bool { + switch strings.TrimSpace(namespace) { + case "", "default", "kube-system", "kube-public", "kube-node-lease": + return true + default: + return false + } +} + +func normalizeUserRole(role string) string { + if role == authz.RoleAdmin { + return authz.RoleAdmin + } + return authz.RoleUser +} + +// Login 用户登录 +func (s *AuthService) Login(ctx context.Context, username, password string) (accessToken, refreshToken string, user *entity.User, err error) { + // 查找用户 + user, err = s.userRepo.GetByUsername(ctx, username) + if err != nil { + return "", "", nil, entity.ErrUserNotFound + } + if !user.IsActive { + return "", "", nil, entity.ErrUserInactive + } + if err := s.ensureWorkspaceActive(ctx, user); err != nil { + return "", "", nil, err } // 验证密码 if err := s.passwordHasher.Verify(password, user.PasswordHash); err != nil { - return "", "", entity.ErrInvalidPassword + return "", "", nil, entity.ErrInvalidPassword } // 生成 Token - accessToken, refreshToken, err = s.tokenGenerator.Generate(user.ID, user.Username) + accessToken, refreshToken, err = s.tokenGenerator.Generate(user.ID, user.Username, user.Role, user.WorkspaceID) if err != nil { - return "", "", err + return "", "", nil, err } - return accessToken, refreshToken, nil + return accessToken, refreshToken, user, nil } // RefreshToken 刷新 Token -func (s *AuthService) RefreshToken(ctx context.Context, refreshToken string) (string, error) { - return s.tokenGenerator.Refresh(refreshToken) +func (s *AuthService) RefreshToken(ctx context.Context, refreshToken string) (string, *entity.User, error) { + claims, err := s.tokenGenerator.VerifyRefresh(refreshToken) + if err != nil { + return "", nil, err + } + user, err := s.userRepo.GetByID(ctx, claims.UserID) + if err != nil { + return "", nil, entity.ErrUserNotFound + } + if !user.IsActive { + return "", nil, entity.ErrUserInactive + } + if claims.IssuedAt == nil || claims.IssuedAt.Unix() < user.RevokedAfter.Unix() { + return "", nil, entity.ErrTokenRevoked + } + if err := s.ensureWorkspaceActive(ctx, user); err != nil { + return "", nil, err + } + accessToken, _, err := s.tokenGenerator.Generate(user.ID, user.Username, user.Role, user.WorkspaceID) + if err != nil { + return "", nil, err + } + return accessToken, user, nil } // GetUserByID 根据 ID 获取用户 @@ -106,25 +736,84 @@ func (s *AuthService) GetUserByID(ctx context.Context, id string) (*entity.User, } // VerifyAccessToken 验证 Access Token(包括 revoked_after 检查) -func (s *AuthService) VerifyAccessToken(ctx context.Context, token string) (userID, username string, err error) { +func (s *AuthService) VerifyAccessToken(ctx context.Context, token string) (*authz.Principal, error) { // 1. JWT 自验证 - userID, username, issuedAt, err := s.tokenGenerator.VerifyWithIssuedAt(token) + claims, err := s.tokenGenerator.VerifyAccess(token) if err != nil { - return "", "", err + return nil, err } // 2. 检查用户级别的撤销时间 - user, err := s.userRepo.GetByID(ctx, userID) + user, err := s.userRepo.GetByID(ctx, claims.UserID) if err != nil { - return "", "", entity.ErrUserNotFound + return nil, entity.ErrUserNotFound + } + if !user.IsActive { + return nil, entity.ErrUserInactive } // 3. 如果 Token 签发时间早于 revoked_after,则失效 - if issuedAt < user.RevokedAfter.Unix() { - return "", "", entity.ErrTokenRevoked + if claims.IssuedAt == nil || claims.IssuedAt.Unix() < user.RevokedAfter.Unix() { + return nil, entity.ErrTokenRevoked + } + if err := s.ensureWorkspaceActive(ctx, user); err != nil { + return nil, err + } + workspaceName := "" + namespace := "" + defaultClusterID := "" + quotaCPU := "" + quotaMemory := "" + quotaGPU := "" + quotaGPUMem := "" + if s.workspaceRepo != nil && user.WorkspaceID != "" { + if workspace, err := s.workspaceRepo.GetByID(ctx, user.WorkspaceID); err == nil && workspace != nil { + workspaceName = workspace.Name + namespace = workspace.K8sNamespace + defaultClusterID = workspace.DefaultClusterID + quotaCPU = workspace.QuotaCPU + quotaMemory = workspace.QuotaMemory + quotaGPU = workspace.QuotaGPU + quotaGPUMem = workspace.QuotaGPUMem + } } - return userID, username, nil + return &authz.Principal{ + UserID: user.ID, + Username: user.Username, + Role: user.Role, + WorkspaceID: user.WorkspaceID, + WorkspaceName: workspaceName, + Namespace: namespace, + DefaultClusterID: defaultClusterID, + QuotaCPU: quotaCPU, + QuotaMemory: quotaMemory, + QuotaGPU: quotaGPU, + QuotaGPUMem: quotaGPUMem, + Permissions: authz.PermissionsForRole(user.Role), + PermissionVersion: 1, + }, nil +} + +func (s *AuthService) GetWorkspaceByID(ctx context.Context, id string) (*entity.Workspace, error) { + if s.workspaceRepo == nil || id == "" { + return nil, entity.ErrWorkspaceNotFound + } + return s.workspaceRepo.GetByID(ctx, id) +} + +func (s *AuthService) ensureWorkspaceActive(ctx context.Context, user *entity.User) error { + if user.Role == authz.RoleAdmin || user.WorkspaceID == "" || s.workspaceRepo == nil { + return nil + } + workspace, err := s.workspaceRepo.GetByID(ctx, user.WorkspaceID) + if err != nil { + return entity.ErrWorkspaceNotFound + } + if workspace.Status == entity.WorkspaceSuspended { + return entity.ErrWorkspaceSuspended + } + return nil } // ChangePassword 修改密码(会触发全局登出) diff --git a/backend/internal/domain/service/auth_service_test.go b/backend/internal/domain/service/auth_service_test.go new file mode 100644 index 0000000..ae983ce --- /dev/null +++ b/backend/internal/domain/service/auth_service_test.go @@ -0,0 +1,322 @@ +package service + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/google/uuid" + "github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" + jwtpkg "github.com/ocdp/cluster-service/internal/pkg/jwt" +) + +func TestAuthServiceUpdateUserDowngradeReusesUsernameWorkspace(t *testing.T) { + ctx := adminContext() + userRepo := mock.NewUserRepositoryMock() + workspaceRepo := mock.NewWorkspaceRepositoryMock() + svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{}) + + target := testUser("user-1", "alice", authz.RoleAdmin, entity.DefaultWorkspaceID) + if err := userRepo.Create(ctx, target); err != nil { + t.Fatalf("seed user: %v", err) + } + workspace := entity.NewWorkspace(userWorkspaceName("alice"), "admin") + workspace.ID = "workspace-alice" + workspace.K8sNamespace = entity.NamespaceForUser("alice") + workspace.K8sSAName = entity.ServiceAccountForNamespace(workspace.K8sNamespace) + if err := workspaceRepo.Create(ctx, workspace); err != nil { + t.Fatalf("seed workspace: %v", err) + } + + updated, err := svc.UpdateUser(ctx, target.ID, authz.RoleUser, "", UserWorkspaceOptions{DefaultClusterID: "cluster-1"}, nil, nil) + if err != nil { + t.Fatalf("UpdateUser returned error: %v", err) + } + + if updated.Role != authz.RoleUser { + t.Fatalf("expected user role, got %q", updated.Role) + } + if updated.WorkspaceID != workspace.ID { + t.Fatalf("expected reused workspace %q, got %q", workspace.ID, updated.WorkspaceID) + } + reused, err := workspaceRepo.GetByID(ctx, workspace.ID) + if err != nil { + t.Fatalf("get reused workspace: %v", err) + } + if reused.DefaultClusterID != "cluster-1" { + t.Fatalf("expected updated default cluster, got %q", reused.DefaultClusterID) + } +} + +func TestAuthServiceRegisterUserAlwaysCreatesPrivateWorkspaceWithZeroDefaultQuotas(t *testing.T) { + ctx := adminContext() + userRepo := mock.NewUserRepositoryMock() + workspaceRepo := mock.NewWorkspaceRepositoryMock() + svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{}) + + user, err := svc.Register(ctx, "alice", "password", authz.RoleUser, "shared-workspace", UserWorkspaceOptions{}, nil, nil) + if err != nil { + t.Fatalf("Register returned error: %v", err) + } + if user.WorkspaceID == "shared-workspace" || user.WorkspaceID == entity.DefaultWorkspaceID { + t.Fatalf("expected private user workspace, got %q", user.WorkspaceID) + } + workspace, err := workspaceRepo.GetByID(ctx, user.WorkspaceID) + if err != nil { + t.Fatalf("get user workspace: %v", err) + } + if workspace.K8sNamespace != entity.NamespaceForUser("alice") { + t.Fatalf("expected user namespace %q, got %q", entity.NamespaceForUser("alice"), workspace.K8sNamespace) + } + if workspace.QuotaCPU != "" || workspace.QuotaMemory != "" || workspace.QuotaGPU != "0" || workspace.QuotaGPUMem != "0" { + t.Fatalf("expected omitted CPU/memory to stay unlimited and GPU/gpumem to default zero, got cpu=%q memory=%q gpu=%q gpumem=%q", workspace.QuotaCPU, workspace.QuotaMemory, workspace.QuotaGPU, workspace.QuotaGPUMem) + } +} + +func TestAuthServiceUpdateUserDowngradeRejectsNamespaceConflict(t *testing.T) { + ctx := adminContext() + userRepo := mock.NewUserRepositoryMock() + workspaceRepo := mock.NewWorkspaceRepositoryMock() + svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{}) + + target := testUser("user-1", "alice", authz.RoleAdmin, entity.DefaultWorkspaceID) + if err := userRepo.Create(ctx, target); err != nil { + t.Fatalf("seed user: %v", err) + } + conflicting := entity.NewWorkspace("someone-else", "admin") + conflicting.ID = "workspace-other" + conflicting.K8sNamespace = entity.NamespaceForUser("alice") + conflicting.K8sSAName = entity.ServiceAccountForNamespace(conflicting.K8sNamespace) + if err := workspaceRepo.Create(ctx, conflicting); err != nil { + t.Fatalf("seed conflicting workspace: %v", err) + } + + _, err := svc.UpdateUser(ctx, target.ID, authz.RoleUser, "", UserWorkspaceOptions{}, nil, nil) + if !errors.Is(err, entity.ErrWorkspaceNamespaceConflict) { + t.Fatalf("expected namespace conflict, got %v", err) + } +} + +func TestAuthServiceDeleteUserRejectsUserWithInstances(t *testing.T) { + ctx := adminContext() + userRepo := mock.NewUserRepositoryMock() + workspaceRepo := mock.NewWorkspaceRepositoryMock() + instanceRepo := mock.NewInstanceRepositoryMock() + svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{}) + svc.SetUserLifecycleCleanup(instanceRepo, nil, nil, nil) + + user := testUser("user-1", "alice", authz.RoleUser, "workspace-alice") + if err := userRepo.Create(ctx, user); err != nil { + t.Fatalf("seed user: %v", err) + } + instance := entity.NewInstance("cluster-1", "app", "ocdp-u-alice", "registry-1", "repo", "chart", "1.0.0") + instance.ID = "instance-1" + instance.OwnerID = user.ID + instance.WorkspaceID = user.WorkspaceID + if err := instanceRepo.Create(ctx, instance); err != nil { + t.Fatalf("seed instance: %v", err) + } + + err := svc.DeleteUser(ctx, user.ID) + if !errors.Is(err, entity.ErrUserHasInstances) { + t.Fatalf("expected user instance conflict, got %v", err) + } + if _, err := userRepo.GetByID(ctx, user.ID); err != nil { + t.Fatalf("user should not be deleted: %v", err) + } +} + +func TestAuthServiceDeleteUserRejectsWorkspaceInstanceEvenWithDifferentOwner(t *testing.T) { + ctx := adminContext() + userRepo := mock.NewUserRepositoryMock() + workspaceRepo := mock.NewWorkspaceRepositoryMock() + instanceRepo := mock.NewInstanceRepositoryMock() + svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{}) + svc.SetUserLifecycleCleanup(instanceRepo, nil, nil, nil) + + user := testUser("user-1", "alice", authz.RoleUser, "workspace-alice") + if err := userRepo.Create(ctx, user); err != nil { + t.Fatalf("seed user: %v", err) + } + instance := entity.NewInstance("cluster-1", "shared-workspace-app", "ocdp-u-alice", "registry-1", "repo", "chart", "1.0.0") + instance.ID = "instance-1" + instance.OwnerID = "other-user" + instance.WorkspaceID = user.WorkspaceID + if err := instanceRepo.Create(ctx, instance); err != nil { + t.Fatalf("seed workspace instance: %v", err) + } + + err := svc.DeleteUser(ctx, user.ID) + if !errors.Is(err, entity.ErrUserHasInstances) { + t.Fatalf("expected workspace instance conflict, got %v", err) + } + if _, err := userRepo.GetByID(ctx, user.ID); err != nil { + t.Fatalf("user should not be deleted: %v", err) + } +} + +func TestAuthServiceDeleteUserCleansExclusiveWorkspaceBindings(t *testing.T) { + ctx := adminContext() + userRepo := mock.NewUserRepositoryMock() + workspaceRepo := mock.NewWorkspaceRepositoryMock() + instanceRepo := mock.NewInstanceRepositoryMock() + bindingRepo := mock.NewWorkspaceClusterBindingRepositoryMock() + clusterRepo := &testClusterRepo{clusters: map[string]*entity.Cluster{ + "cluster-1": {ID: "cluster-1", Name: "cluster-1", Host: "https://cluster.invalid", Token: "token"}, + }} + tenantClient := &recordingTenantClient{} + svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{}) + svc.SetUserLifecycleCleanup(instanceRepo, clusterRepo, bindingRepo, tenantClient) + + workspace := entity.NewWorkspace(userWorkspaceName("alice"), "admin") + workspace.ID = "workspace-alice" + workspace.K8sNamespace = entity.NamespaceForUser("alice") + workspace.K8sSAName = entity.ServiceAccountForNamespace(workspace.K8sNamespace) + if err := workspaceRepo.Create(ctx, workspace); err != nil { + t.Fatalf("seed workspace: %v", err) + } + user := testUser("user-1", "alice", authz.RoleUser, workspace.ID) + if err := userRepo.Create(ctx, user); err != nil { + t.Fatalf("seed user: %v", err) + } + if err := bindingRepo.Upsert(ctx, &entity.WorkspaceClusterBinding{ + ID: "binding-1", + WorkspaceID: workspace.ID, + ClusterID: "cluster-1", + Namespace: workspace.K8sNamespace, + ServiceAccount: workspace.K8sSAName, + Status: "active", + }); err != nil { + t.Fatalf("seed binding: %v", err) + } + + if err := svc.DeleteUser(ctx, user.ID); err != nil { + t.Fatalf("DeleteUser returned error: %v", err) + } + if _, err := userRepo.GetByID(ctx, user.ID); !errors.Is(err, entity.ErrUserNotFound) { + t.Fatalf("expected user deleted, got %v", err) + } + if bindings, err := bindingRepo.ListByWorkspace(ctx, workspace.ID); err != nil || len(bindings) != 0 { + t.Fatalf("expected bindings cleaned, got len=%d err=%v", len(bindings), err) + } + if len(tenantClient.deleted) != 1 || tenantClient.deleted[0] != workspace.K8sNamespace { + t.Fatalf("expected tenant namespace cleanup, got %#v", tenantClient.deleted) + } + if _, err := workspaceRepo.GetByID(ctx, workspace.ID); !errors.Is(err, entity.ErrWorkspaceNotFound) { + t.Fatalf("expected exclusive workspace deleted, got %v", err) + } +} + +func adminContext() context.Context { + return authz.WithPrincipal(context.Background(), &authz.Principal{ + UserID: "admin-1", + Username: "admin", + Role: authz.RoleAdmin, + WorkspaceID: entity.DefaultWorkspaceID, + }) +} + +func testUser(id, username, role, workspaceID string) *entity.User { + user := entity.NewUser(username, "hash", username+"@local.ocdp") + user.ID = id + user.Role = role + user.WorkspaceID = workspaceID + return user +} + +type testPasswordHasher struct{} + +func (testPasswordHasher) Hash(password string) (string, error) { return "hash:" + password, nil } +func (testPasswordHasher) Verify(password, hash string) error { return nil } + +type testTokenGenerator struct{} + +func (testTokenGenerator) Generate(userID, username, role, workspaceID string) (string, string, error) { + return "access", "refresh", nil +} +func (testTokenGenerator) Verify(token string) (string, string, error) { return "", "", nil } +func (testTokenGenerator) VerifyWithIssuedAt(token string) (string, string, int64, error) { + return "", "", 0, nil +} +func (testTokenGenerator) VerifyAccess(token string) (*jwtpkg.Claims, error) { return nil, nil } +func (testTokenGenerator) VerifyRefresh(token string) (*jwtpkg.Claims, error) { return nil, nil } +func (testTokenGenerator) Refresh(refreshToken string) (string, error) { return "access", nil } + +type testClusterRepo struct { + clusters map[string]*entity.Cluster +} + +func (r *testClusterRepo) Create(ctx context.Context, cluster *entity.Cluster) error { + if cluster.ID == "" { + cluster.ID = uuid.New().String() + } + copy := *cluster + r.clusters[cluster.ID] = © + return nil +} +func (r *testClusterRepo) GetByID(ctx context.Context, id string) (*entity.Cluster, error) { + cluster, ok := r.clusters[id] + if !ok { + return nil, entity.ErrClusterNotFound + } + copy := *cluster + return ©, nil +} +func (r *testClusterRepo) GetByName(ctx context.Context, name string) (*entity.Cluster, error) { + for _, cluster := range r.clusters { + if cluster.Name == name { + copy := *cluster + return ©, nil + } + } + return nil, entity.ErrClusterNotFound +} +func (r *testClusterRepo) Update(ctx context.Context, cluster *entity.Cluster) error { + copy := *cluster + r.clusters[cluster.ID] = © + return nil +} +func (r *testClusterRepo) Delete(ctx context.Context, id string) error { + delete(r.clusters, id) + return nil +} +func (r *testClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) { + result := make([]*entity.Cluster, 0, len(r.clusters)) + for _, cluster := range r.clusters { + copy := *cluster + result = append(result, ©) + } + return result, nil +} + +type recordingTenantClient struct { + deleted []string + usage *repository.ResourceQuotaUsage +} + +func (c *recordingTenantClient) EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + return nil +} +func (c *recordingTenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) { + return nil, nil +} +func (c *recordingTenantClient) GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*repository.ResourceQuotaUsage, error) { + if c.usage != nil { + return c.usage, nil + } + return &repository.ResourceQuotaUsage{}, nil +} +func (c *recordingTenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + return nil +} +func (c *recordingTenantClient) DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { + if err := binding.Validate(); err != nil { + return err + } + c.deleted = append(c.deleted, binding.Namespace) + return nil +} diff --git a/backend/internal/domain/service/cluster_service.go b/backend/internal/domain/service/cluster_service.go index a6986b8..aeaa182 100644 --- a/backend/internal/domain/service/cluster_service.go +++ b/backend/internal/domain/service/cluster_service.go @@ -5,6 +5,7 @@ import ( "github.com/google/uuid" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // ClusterService 集群管理领域服务 @@ -21,8 +22,21 @@ func NewClusterService(clusterRepo repository.ClusterRepository) *ClusterService // CreateCluster 创建新集群 func (s *ClusterService) CreateCluster(ctx context.Context, cluster *entity.Cluster) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 生成 ID cluster.ID = uuid.New().String() + cluster.OwnerID = principal.UserID + cluster.WorkspaceID = principal.WorkspaceID + if principal.IsAdmin() && cluster.WorkspaceID == "" { + cluster.WorkspaceID = entity.DefaultWorkspaceID + } + if !principal.IsAdmin() && cluster.Visibility == authz.VisibilityGlobalShared { + return entity.ErrForbidden + } + cluster.Visibility = authz.NormalizeVisibility(principal.Role, cluster.Visibility) // 验证 if err := cluster.Validate(); err != nil { @@ -30,9 +44,11 @@ func (s *ClusterService) CreateCluster(ctx context.Context, cluster *entity.Clus } // 检查是否已存在 - existingCluster, _ := s.clusterRepo.GetByName(ctx, cluster.Name) - if existingCluster != nil { - return entity.ErrClusterExists + clusters, _ := s.clusterRepo.List(ctx) + for _, existingCluster := range clusters { + if existingCluster.Name == cluster.Name && existingCluster.WorkspaceID == cluster.WorkspaceID && existingCluster.OwnerID == cluster.OwnerID { + return entity.ErrClusterExists + } } return s.clusterRepo.Create(ctx, cluster) @@ -40,16 +56,41 @@ func (s *ClusterService) CreateCluster(ctx context.Context, cluster *entity.Clus // GetCluster 获取集群 func (s *ClusterService) GetCluster(ctx context.Context, id string) (*entity.Cluster, error) { - return s.clusterRepo.GetByID(ctx, id) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + cluster, err := s.clusterRepo.GetByID(ctx, id) + if err != nil { + return nil, err + } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } + return cluster, nil } // UpdateCluster 更新集群 func (s *ClusterService) UpdateCluster(ctx context.Context, cluster *entity.Cluster) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查是否存在 - _, err := s.clusterRepo.GetByID(ctx, cluster.ID) + existing, err := s.clusterRepo.GetByID(ctx, cluster.ID) if err != nil { return entity.ErrClusterNotFound } + if !authz.CanWriteResource(principal, existing.WorkspaceID, existing.OwnerID, existing.Visibility) { + return entity.ErrForbidden + } + cluster.WorkspaceID = existing.WorkspaceID + cluster.OwnerID = existing.OwnerID + if principal.IsAdmin() { + cluster.Visibility = authz.NormalizeVisibility(principal.Role, cluster.Visibility) + } else { + cluster.Visibility = existing.Visibility + } // 验证 if err := cluster.Validate(); err != nil { @@ -61,17 +102,37 @@ func (s *ClusterService) UpdateCluster(ctx context.Context, cluster *entity.Clus // DeleteCluster 删除集群 func (s *ClusterService) DeleteCluster(ctx context.Context, id string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查是否存在 - _, err := s.clusterRepo.GetByID(ctx, id) + cluster, err := s.clusterRepo.GetByID(ctx, id) if err != nil { return entity.ErrClusterNotFound } + if !authz.CanWriteResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return entity.ErrForbidden + } return s.clusterRepo.Delete(ctx, id) } // ListClusters 列出所有集群 func (s *ClusterService) ListClusters(ctx context.Context) ([]*entity.Cluster, error) { - return s.clusterRepo.List(ctx) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + clusters, err := s.clusterRepo.List(ctx) + if err != nil { + return nil, err + } + visible := make([]*entity.Cluster, 0, len(clusters)) + for _, cluster := range clusters { + if authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + visible = append(visible, cluster) + } + } + return visible, nil } - diff --git a/backend/internal/domain/service/instance_service.go b/backend/internal/domain/service/instance_service.go index 6e5ec7f..7ef4b52 100644 --- a/backend/internal/domain/service/instance_service.go +++ b/backend/internal/domain/service/instance_service.go @@ -6,21 +6,38 @@ import ( "fmt" "os" "path/filepath" + "strings" "time" "github.com/google/uuid" + "github.com/ocdp/cluster-service/internal/adapter/input/http/dto" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" ) +// ScaleClient defines the interface for K8s-native workload scaling +type ScaleClient interface { + GetDeploymentReplicas(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string) (int32, error) + ScaleDeployment(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string, replicas int32) error +} + // InstanceService Helm 实例管理领域服务 type InstanceService struct { - instanceRepo repository.InstanceRepository - clusterRepo repository.ClusterRepository - registryRepo repository.RegistryRepository - helmClient repository.HelmClient - ociClient repository.OCIClient - entryClient repository.InstanceEntryClient + instanceRepo repository.InstanceRepository + clusterRepo repository.ClusterRepository + registryRepo repository.RegistryRepository + bindingRepo repository.WorkspaceClusterBindingRepository + helmClient repository.HelmClient + ociClient repository.OCIClient + entryClient repository.InstanceEntryClient + diagClient repository.InstanceDiagnosticsClient + workspaceRepo repository.WorkspaceRepository + userRepo repository.UserRepository + tenantClient repository.TenantKubeClient + scaleClient ScaleClient } // NewInstanceService 创建实例服务 @@ -31,17 +48,40 @@ func NewInstanceService( helmClient repository.HelmClient, ociClient repository.OCIClient, entryClient repository.InstanceEntryClient, + bindingRepo ...repository.WorkspaceClusterBindingRepository, ) *InstanceService { + var workspaceBindingRepo repository.WorkspaceClusterBindingRepository + if len(bindingRepo) > 0 { + workspaceBindingRepo = bindingRepo[0] + } return &InstanceService{ instanceRepo: instanceRepo, clusterRepo: clusterRepo, registryRepo: registryRepo, + bindingRepo: workspaceBindingRepo, helmClient: helmClient, ociClient: ociClient, entryClient: entryClient, } } +func (s *InstanceService) SetDiagnosticsClient(client repository.InstanceDiagnosticsClient) { + s.diagClient = client +} + +func (s *InstanceService) SetScaleClient(client ScaleClient) { + s.scaleClient = client +} + +func (s *InstanceService) SetTenantProvisioning(workspaceRepo repository.WorkspaceRepository, tenantClient repository.TenantKubeClient) { + s.workspaceRepo = workspaceRepo + s.tenantClient = tenantClient +} + +func (s *InstanceService) SetUserRepository(userRepo repository.UserRepository) { + s.userRepo = userRepo +} + const chartCacheDir = "/tmp/charts" func (s *InstanceService) chartArchivePath(instance *entity.Instance) string { @@ -62,8 +102,14 @@ func (s *InstanceService) downloadChart(ctx context.Context, registry *entity.Re // CreateInstance 创建(安装)新实例 func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.Instance) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 生成 ID instance.ID = uuid.New().String() + instance.WorkspaceID = principal.WorkspaceID + instance.OwnerID = principal.UserID // 验证 if err := instance.Validate(); err != nil { @@ -75,18 +121,37 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I if err != nil { return entity.ErrClusterNotFound } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return entity.ErrClusterNotFound + } // 检查 Registry 是否存在 registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID) if err != nil { return entity.ErrRegistryNotFound } + if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + return entity.ErrRegistryNotFound + } + if err := s.applyNamespacePolicy(ctx, principal, cluster, instance); err != nil { + return err + } + enforceNamespaceValues(instance) - // 检查实例是否已存在 existingInstance, _ := s.instanceRepo.GetByClusterAndName(ctx, instance.ClusterID, instance.Name) if existingInstance != nil { return entity.ErrInstanceExists } + if err := s.downloadChart(ctx, registry, instance); err != nil { + return err + } + binding, err := s.ensureTenantForInstance(ctx, principal, cluster, instance) + if err != nil { + return err + } + if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, instance, nil); err != nil { + return err + } instance.BeginOperation(entity.OperationInstall, "Preparing installation") @@ -95,13 +160,6 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I return err } - // 下载 chart artifact 供 Helm 使用 - if err := s.downloadChart(ctx, registry, instance); err != nil { - instance.MarkFailure("Failed to download chart", err) - _ = s.instanceRepo.Update(ctx, instance) - return err - } - // 异步执行 Helm 安装并监控状态 go s.executeAndSyncInstall(context.Background(), instance.ID, cluster, registry, instance) @@ -111,13 +169,25 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I // GetInstance 获取实例 func (s *InstanceService) GetInstance(ctx context.Context, id string) (*entity.Instance, error) { - return s.instanceRepo.GetByID(ctx, id) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + instance, err := s.instanceRepo.GetByID(ctx, id) + if err != nil { + return nil, err + } + if !s.canReadInstance(principal, instance) { + return nil, entity.ErrInstanceNotFound + } + s.enrichOwnerUsernames(ctx, []*entity.Instance{instance}) + return instance, nil } // GetInstanceStatus 获取实例实时状态 func (s *InstanceService) GetInstanceStatus(ctx context.Context, id string) (*entity.Instance, error) { // 从数据库获取基本信息 - instance, err := s.instanceRepo.GetByID(ctx, id) + instance, err := s.GetInstance(ctx, id) if err != nil { return nil, entity.ErrInstanceNotFound } @@ -143,11 +213,34 @@ func (s *InstanceService) GetInstanceStatus(ctx context.Context, id string) (*en // UpdateInstance 更新(升级)实例 func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.Instance) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查实例是否存在 existingInstance, err := s.instanceRepo.GetByID(ctx, instance.ID) if err != nil { return entity.ErrInstanceNotFound } + if !s.canWriteInstance(principal, existingInstance) { + return entity.ErrForbidden + } + instance.ClusterID = existingInstance.ClusterID + instance.WorkspaceID = existingInstance.WorkspaceID + instance.OwnerID = existingInstance.OwnerID + instance.Name = existingInstance.Name + if instance.RegistryID == "" { + instance.RegistryID = existingInstance.RegistryID + } + if instance.Repository == "" { + instance.Repository = existingInstance.Repository + } + if instance.Chart == "" { + instance.Chart = existingInstance.Chart + } + if instance.Version == "" { + instance.Version = existingInstance.Version + } // 获取集群信息 cluster, err := s.clusterRepo.GetByID(ctx, existingInstance.ClusterID) @@ -161,15 +254,23 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I return entity.ErrRegistryNotFound } - instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade") - if err := s.instanceRepo.Update(ctx, instance); err != nil { - return err - } + instance.Namespace = existingInstance.Namespace + enforceNamespaceValues(instance) // 下载所需 Chart if err := s.downloadChart(ctx, registry, instance); err != nil { - instance.MarkFailure("Failed to download chart", err) - _ = s.instanceRepo.Update(ctx, instance) + return err + } + binding, err := s.ensureTenantForInstance(ctx, principal, cluster, instance) + if err != nil { + return err + } + if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, instance, existingInstance); err != nil { + return err + } + + instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade") + if err := s.instanceRepo.Update(ctx, instance); err != nil { return err } @@ -182,11 +283,18 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I // DeleteInstance 删除(卸载)实例 func (s *InstanceService) DeleteInstance(ctx context.Context, id string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查实例是否存在 instance, err := s.instanceRepo.GetByID(ctx, id) if err != nil { return entity.ErrInstanceNotFound } + if !s.canWriteInstance(principal, instance) { + return entity.ErrForbidden + } // 获取集群信息 cluster, err := s.clusterRepo.GetByID(ctx, instance.ClusterID) @@ -208,11 +316,18 @@ func (s *InstanceService) DeleteInstance(ctx context.Context, id string) error { // RollbackInstance 回滚实例 func (s *InstanceService) RollbackInstance(ctx context.Context, id string, revision int) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查实例是否存在 instance, err := s.instanceRepo.GetByID(ctx, id) if err != nil { return entity.ErrInstanceNotFound } + if !s.canWriteInstance(principal, instance) { + return entity.ErrForbidden + } // 获取集群信息 cluster, err := s.clusterRepo.GetByID(ctx, instance.ClusterID) @@ -235,7 +350,7 @@ func (s *InstanceService) RollbackInstance(ctx context.Context, id string, revis // GetInstanceHistory 获取实例历史 func (s *InstanceService) GetInstanceHistory(ctx context.Context, id string) ([]*entity.ReleaseHistory, error) { // 检查实例是否存在 - instance, err := s.instanceRepo.GetByID(ctx, id) + instance, err := s.GetInstance(ctx, id) if err != nil { return nil, entity.ErrInstanceNotFound } @@ -252,18 +367,58 @@ func (s *InstanceService) GetInstanceHistory(ctx context.Context, id string) ([] // ListInstancesByCluster 列出集群的所有实例 func (s *InstanceService) ListInstancesByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } // 检查集群是否存在 - _, err := s.clusterRepo.GetByID(ctx, clusterID) + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) if err != nil { return nil, entity.ErrClusterNotFound } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } - return s.instanceRepo.ListByCluster(ctx, clusterID) + instances, err := s.instanceRepo.ListByCluster(ctx, clusterID) + if err != nil { + return nil, err + } + visible := make([]*entity.Instance, 0, len(instances)) + for _, instance := range instances { + if s.canReadInstance(principal, instance) { + visible = append(visible, instance) + } + } + s.enrichOwnerUsernames(ctx, visible) + return visible, nil +} + +func (s *InstanceService) enrichOwnerUsernames(ctx context.Context, instances []*entity.Instance) { + if s.userRepo == nil || len(instances) == 0 { + return + } + usernames := make(map[string]string) + for _, instance := range instances { + if instance == nil || instance.OwnerID == "" { + continue + } + if username, ok := usernames[instance.OwnerID]; ok { + instance.OwnerUsername = username + continue + } + user, err := s.userRepo.GetByID(ctx, instance.OwnerID) + if err != nil || user == nil { + continue + } + usernames[instance.OwnerID] = user.Username + instance.OwnerUsername = user.Username + } } // ListInstanceEntries 列出实例关联的入口信息(Service / Ingress) func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, instanceID string) ([]*entity.InstanceEntry, error) { - instance, err := s.instanceRepo.GetByID(ctx, instanceID) + instance, err := s.GetInstance(ctx, instanceID) if err != nil { return nil, entity.ErrInstanceNotFound } @@ -283,6 +438,480 @@ func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, in return s.entryClient.ListEntries(ctx, cluster, instance) } +func (s *InstanceService) GetInstanceDiagnostics(ctx context.Context, clusterID, instanceID string, tailLines int64) (*entity.InstanceDiagnostics, error) { + instance, err := s.GetInstance(ctx, instanceID) + if err != nil { + return nil, entity.ErrInstanceNotFound + } + if instance.ClusterID != clusterID { + return nil, entity.ErrInstanceNotFound + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if s.diagClient == nil { + return nil, fmt.Errorf("instance diagnostics client is not configured") + } + return s.diagClient.GetDiagnostics(ctx, cluster, instance, tailLines) +} + +func (s *InstanceService) StreamInstanceLogs(ctx context.Context, clusterID, instanceID, podName, containerName string, tailLines int64) (<-chan string, <-chan error, error) { + instance, err := s.GetInstance(ctx, instanceID) + if err != nil { + return nil, nil, entity.ErrInstanceNotFound + } + if instance.ClusterID != clusterID { + return nil, nil, entity.ErrInstanceNotFound + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, nil, entity.ErrClusterNotFound + } + if s.diagClient == nil { + return nil, nil, fmt.Errorf("instance diagnostics client is not configured") + } + streamer, ok := s.diagClient.(repository.PodLogStreamer) + if !ok { + return nil, nil, fmt.Errorf("diagnostics client does not support log streaming") + } + return streamer.StreamPodLogs(ctx, cluster, instance.Namespace, podName, containerName, tailLines) +} + +// ScaleInstance 扩缩容实例(修改 replicaCount 后执行 Helm upgrade) +func (s *InstanceService) ScaleInstance(ctx context.Context, clusterID, instanceID string, replicas int, workload string) (*entity.Instance, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + instance, err := s.instanceRepo.GetByID(ctx, instanceID) + if err != nil { + return nil, entity.ErrInstanceNotFound + } + if !s.canWriteInstance(principal, instance) { + return nil, entity.ErrForbidden + } + if instance.ClusterID != clusterID { + return nil, entity.ErrInstanceNotFound + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + + current := cloneInstanceForQuota(instance) + currentValues, err := s.helmClient.GetValues(ctx, cluster, instance.Name, instance.Namespace) + if err == nil && currentValues != nil { + current.SetValues(currentValues) + } + target := cloneInstanceForQuota(instance) + targetValues := copyValues(current.Values) + if targetValues == nil { + targetValues = copyValues(instance.Values) + } + if targetValues == nil { + targetValues = map[string]interface{}{} + } + targetValues["replicaCount"] = replicas + target.SetValues(targetValues) + registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID) + if err != nil { + return nil, entity.ErrRegistryNotFound + } + if err := s.downloadChart(ctx, registry, target); err != nil { + return nil, err + } + binding, err := s.ensureTenantForInstance(ctx, principal, cluster, target) + if err != nil { + return nil, err + } + if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, target, current); err != nil { + return nil, err + } + + // Scale via K8s API directly (like kubectl scale deploy --replicas=N) + if s.scaleClient != nil { + if err := s.scaleClient.ScaleDeployment(ctx, cluster, instance.Namespace, instance.Name, int32(replicas)); err != nil { + return nil, fmt.Errorf("failed to scale deployment: %w", err) + } + instance.SetValues(targetValues) + instance.Replicas = replicas + if err := s.instanceRepo.Update(ctx, instance); err != nil { + return nil, err + } + } else { + // Fallback: Helm upgrade with replicaCount + instance.SetValues(targetValues) + instance.BeginOperation(entity.OperationUpgrade, fmt.Sprintf("Scaling to %d replicas", replicas)) + if err := s.instanceRepo.Update(ctx, instance); err != nil { + return nil, err + } + go s.executeAndSyncUpgrade(context.Background(), instance.ID, cluster, nil, instance) + } + + return instance, nil +} + +// EnrichReplicas 批量获取实例的 K8s 实际副本数并设置到 entity 上 +func (s *InstanceService) EnrichReplicas(ctx context.Context, clusterID string, instances []*entity.Instance) []*entity.Instance { + if s.scaleClient == nil || len(instances) == 0 { + return instances + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return instances + } + for _, inst := range instances { + r, err := s.scaleClient.GetDeploymentReplicas(ctx, cluster, inst.Namespace, inst.Name) + if err == nil { + inst.Replicas = int(r) + } + } + return instances +} + +// GetRunningReplicas returns the actual K8s deployment replicas count. +func (s *InstanceService) GetRunningReplicas(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) int { + if s.scaleClient == nil { + return 0 + } + r, err := s.scaleClient.GetDeploymentReplicas(ctx, cluster, instance.Namespace, instance.Name) + if err != nil { + return 0 + } + return int(r) +} + +// GetInstanceValuesDiff 获取实例当前 values 与 chart 默认 values 的差异 +func (s *InstanceService) GetInstanceValuesDiff(ctx context.Context, clusterID, instanceID string) (*dto.InstanceValuesDiffResponse, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + instance, err := s.instanceRepo.GetByID(ctx, instanceID) + if err != nil { + return nil, entity.ErrInstanceNotFound + } + if !s.canReadInstance(principal, instance) { + return nil, entity.ErrInstanceNotFound + } + if instance.ClusterID != clusterID { + return nil, entity.ErrInstanceNotFound + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + + current, err := s.helmClient.GetValues(ctx, cluster, instance.Name, instance.Namespace) + if err != nil { + return nil, err + } + + // Get default values from the chart archive + chartPath := s.chartArchivePath(instance) + if _, statErr := os.Stat(chartPath); statErr != nil { + if !errors.Is(statErr, os.ErrNotExist) { + return nil, fmt.Errorf("failed to inspect chart defaults: %w", statErr) + } + registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID) + if err != nil { + return nil, entity.ErrRegistryNotFound + } + if err := s.downloadChart(ctx, registry, instance); err != nil { + return nil, err + } + } + defaults, err := s.helmClient.GetChartDefaultValues(chartPath) + if err != nil { + return nil, fmt.Errorf("failed to read chart defaults: %w", err) + } + + return &dto.InstanceValuesDiffResponse{ + Current: current, + Defaults: defaults, + }, nil +} + +func (s *InstanceService) canReadInstance(principal *authz.Principal, instance *entity.Instance) bool { + if principal.IsAdmin() { + return true + } + return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID +} + +func (s *InstanceService) canWriteInstance(principal *authz.Principal, instance *entity.Instance) bool { + if principal.IsAdmin() { + return true + } + return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID +} + +func enforceNamespaceValues(instance *entity.Instance) { + if instance == nil || instance.Namespace == "" { + return + } + if instance.Values == nil { + instance.Values = map[string]interface{}{} + } + instance.Values["namespace"] = instance.Namespace + setExistingStringValue(instance.Values, "namespaceOverride", instance.Namespace) + setExistingStringValue(instance.Values, "namespace_override", instance.Namespace) + setExistingStringValue(instance.Values, "targetNamespace", instance.Namespace) + setExistingStringValue(instance.Values, "target_namespace", instance.Namespace) + setExistingNestedStringValue(instance.Values, "global", "namespace", instance.Namespace) + setExistingNestedStringValue(instance.Values, "global", "namespaceOverride", instance.Namespace) + setExistingNestedStringValue(instance.Values, "global", "namespace_override", instance.Namespace) +} + +func setExistingStringValue(values map[string]interface{}, key, namespace string) { + if _, ok := values[key]; ok { + values[key] = namespace + } +} + +func setExistingNestedStringValue(values map[string]interface{}, parent, key, namespace string) { + child, ok := values[parent].(map[string]interface{}) + if !ok { + return + } + if _, ok := child[key]; ok { + child[key] = namespace + } +} + +func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error { + if principal.IsAdmin() { + if isProtectedSystemNamespace(instance.Namespace) { + return entity.ErrInvalidNamespace + } + return nil + } + if cluster.Visibility != authz.VisibilityPrivate || cluster.OwnerID != principal.UserID { + namespace := principal.Namespace + if namespace == "" { + namespace = entity.NamespaceForWorkspace(principal.WorkspaceName) + } + if s.bindingRepo != nil { + if binding, err := s.bindingRepo.Get(ctx, principal.WorkspaceID, cluster.ID); err == nil && binding != nil && binding.Namespace != "" { + namespace = binding.Namespace + } + } + if instance.Namespace != "" && instance.Namespace != namespace { + return entity.ErrForbidden + } + instance.Namespace = namespace + return nil + } + if isReservedNamespace(instance.Namespace) { + return entity.ErrInvalidNamespace + } + if instance.Namespace == "" { + if cluster.DefaultNamespace != "" { + instance.Namespace = cluster.DefaultNamespace + } else if principal.Namespace != "" { + instance.Namespace = principal.Namespace + } else { + instance.Namespace = entity.NamespaceForWorkspace(principal.Username) + } + } + return nil +} + +func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) (*entity.WorkspaceClusterBinding, error) { + if principal.IsAdmin() || s.workspaceRepo == nil || s.tenantClient == nil { + return nil, nil + } + workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID) + if err != nil { + return nil, err + } + if workspace.Status == entity.WorkspaceSuspended { + return nil, entity.ErrWorkspaceSuspended + } + binding := &entity.WorkspaceClusterBinding{ + ID: uuid.New().String(), + WorkspaceID: workspace.ID, + ClusterID: cluster.ID, + Namespace: instance.Namespace, + ServiceAccount: workspace.K8sSAName, + QuotaCPU: strings.TrimSpace(workspace.QuotaCPU), + QuotaMemory: strings.TrimSpace(workspace.QuotaMemory), + QuotaGPU: zeroIfEmptyQuota(workspace.QuotaGPU), + QuotaGPUMem: zeroIfEmptyQuota(workspace.QuotaGPUMem), + Status: "active", + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + } + if s.bindingRepo != nil { + if existing, err := s.bindingRepo.Get(ctx, workspace.ID, cluster.ID); err == nil && existing != nil { + binding.ID = existing.ID + binding.CreatedAt = existing.CreatedAt + if existing.Namespace != "" { + binding.Namespace = existing.Namespace + instance.Namespace = existing.Namespace + enforceNamespaceValues(instance) + } + if existing.ServiceAccount != "" { + binding.ServiceAccount = existing.ServiceAccount + } + if existing.Status != "" { + binding.Status = existing.Status + } + } + } + tenantBinding := tenantBindingFromWorkspaceClusterBinding(binding) + if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil { + return nil, err + } + if s.bindingRepo != nil { + if err := s.bindingRepo.Upsert(ctx, binding); err != nil { + return nil, err + } + } + return binding, nil +} + +func (s *InstanceService) precheckInstanceQuota(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, binding *entity.WorkspaceClusterBinding, target, current *entity.Instance) error { + if principal.IsAdmin() || s.workspaceRepo == nil || s.helmClient == nil { + return nil + } + workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID) + if err != nil { + return err + } + if workspace.Status == entity.WorkspaceSuspended { + return entity.ErrWorkspaceSuspended + } + if binding == nil { + binding = &entity.WorkspaceClusterBinding{ + WorkspaceID: principal.WorkspaceID, + ClusterID: cluster.ID, + Namespace: target.Namespace, + QuotaCPU: strings.TrimSpace(workspace.QuotaCPU), + QuotaMemory: strings.TrimSpace(workspace.QuotaMemory), + QuotaGPU: zeroIfEmptyQuota(workspace.QuotaGPU), + QuotaGPUMem: zeroIfEmptyQuota(workspace.QuotaGPUMem), + } + } + var usage *repository.ResourceQuotaUsage + if s.tenantClient != nil { + tenantBinding := tenantBindingFromWorkspaceClusterBinding(binding) + quotaUsage, err := s.tenantClient.GetResourceQuotaUsage(ctx, cluster, tenantBinding) + if err != nil { + return err + } + usage = quotaUsage + } + result, err := NewQuotaPrecheckService(s.helmClient).EstimateAndCompareBinding(ctx, cluster, binding, usage, target, current) + if err == nil { + return nil + } + if errors.Is(err, ErrQuotaExceeded) && result != nil { + return fmt.Errorf("%w: %s", ErrQuotaExceeded, formatQuotaExceeded(result.Exceeded)) + } + return err +} + +func formatQuotaExceeded(exceeded []QuotaExceededResource) string { + if len(exceeded) == 0 { + return "requested resources exceed workspace quota" + } + parts := make([]string, 0, len(exceeded)) + for _, item := range exceeded { + parts = append(parts, fmt.Sprintf("%s required=%s quota=%s", item.Name, item.Required, item.Hard)) + } + return strings.Join(parts, "; ") +} + +func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList { + hard := corev1.ResourceList{} + addQuantity := func(name corev1.ResourceName, value string) { + value = normalizeStandardQuotaQuantity(value) + if value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[name] = quantity + } + } + addGPUMemoryQuantity := func(value string) { + value, err := normalizeGPUMemoryQuota(value) + if err != nil || value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity + } + } + if workspace == nil { + return hard + } + addQuantity(corev1.ResourceName("requests.cpu"), workspace.QuotaCPU) + addQuantity(corev1.ResourceName("requests.memory"), workspace.QuotaMemory) + addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), workspace.QuotaGPU) + addGPUMemoryQuantity(workspace.QuotaGPUMem) + return hard +} + +func tenantBindingFromWorkspaceClusterBinding(binding *entity.WorkspaceClusterBinding) entity.TenantBinding { + namespace := "" + if binding != nil { + namespace = binding.Namespace + } + tenantBinding := entity.NewTenantBinding(namespace) + if binding != nil { + tenantBinding.ServiceAccountName = binding.ServiceAccount + tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding) + } + return tenantBinding +} + +func zeroIfEmptyQuota(value string) string { + if strings.TrimSpace(value) == "" { + return "0" + } + return strings.TrimSpace(value) +} + +func cloneInstanceForQuota(instance *entity.Instance) *entity.Instance { + if instance == nil { + return nil + } + cloned := *instance + cloned.SetValues(copyValues(instance.Values)) + return &cloned +} + +func copyValues(values map[string]interface{}) map[string]interface{} { + if values == nil { + return nil + } + copied := make(map[string]interface{}, len(values)) + for key, value := range values { + copied[key] = value + } + return copied +} + +func isReservedNamespace(namespace string) bool { + switch namespace { + case "default", "kube-system", "kube-public", "kube-node-lease": + return true + default: + return false + } +} + +func isProtectedSystemNamespace(namespace string) bool { + switch namespace { + case "kube-system", "kube-public", "kube-node-lease": + return true + default: + return false + } +} + // executeAndSyncInstall 异步执行安装并监控状态 func (s *InstanceService) executeAndSyncInstall(ctx context.Context, instanceID string, cluster *entity.Cluster, registry *entity.Registry, instance *entity.Instance) { // 执行 Helm 安装 @@ -338,7 +967,7 @@ func (s *InstanceService) executeAndSyncRollback(ctx context.Context, instanceID func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceID string, cluster *entity.Cluster, releaseName, namespace string) { // 执行 Helm 卸载 err := s.helmClient.Uninstall(ctx, cluster, releaseName, namespace) - + // 获取实例 instance, getErr := s.instanceRepo.GetByID(ctx, instanceID) if getErr != nil { @@ -360,7 +989,7 @@ func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceI // 卸载成功,标记为已卸载 instance.MarkSuccess(entity.StatusUninstalled, instance.Revision, "Instance uninstalled successfully") _ = s.instanceRepo.Update(ctx, instance) - + // 验证卸载是否完成:尝试获取状态,如果获取不到说明已卸载 time.Sleep(3 * time.Second) _, statusErr := s.helmClient.GetStatus(ctx, cluster, releaseName, namespace) @@ -377,7 +1006,7 @@ func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceI // syncInstanceStatus 同步实例状态(定期检查 Helm 状态并更新数据库) func (s *InstanceService) syncInstanceStatus(ctx context.Context, instanceID string, cluster *entity.Cluster, releaseName, namespace string, operation entity.InstanceOperation) { - maxAttempts := 30 // 最多尝试30次(约5分钟) + maxAttempts := 30 // 最多尝试30次(约5分钟) interval := 10 * time.Second // 每10秒检查一次 for i := 0; i < maxAttempts; i++ { diff --git a/backend/internal/domain/service/instance_service_test.go b/backend/internal/domain/service/instance_service_test.go index ae9d53c..066b3c1 100644 --- a/backend/internal/domain/service/instance_service_test.go +++ b/backend/internal/domain/service/instance_service_test.go @@ -4,21 +4,27 @@ import ( "context" "errors" "testing" + "time" persistencemock "github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" + "k8s.io/apimachinery/pkg/api/resource" ) func TestDeleteInstanceIgnoresMissingRelease(t *testing.T) { - ctx := context.Background() + principal := &authz.Principal{UserID: "user-1", Username: "tester", Role: authz.RoleUser, WorkspaceID: entity.DefaultWorkspaceID} + ctx := authz.WithPrincipal(context.Background(), principal) instanceRepo := persistencemock.NewInstanceRepositoryMock() instance := &entity.Instance{ - ID: "inst-1", - ClusterID: "cluster-1", - Name: "demo", - Namespace: "default", + ID: "inst-1", + WorkspaceID: entity.DefaultWorkspaceID, + OwnerID: "user-1", + ClusterID: "cluster-1", + Name: "demo", + Namespace: "default", } if err := instanceRepo.Create(ctx, instance); err != nil { t.Fatalf("failed to seed instance: %v", err) @@ -40,8 +46,267 @@ func TestDeleteInstanceIgnoresMissingRelease(t *testing.T) { t.Fatalf("DeleteInstance returned error: %v", err) } - if _, err := instanceRepo.GetByID(ctx, instance.ID); !errors.Is(err, entity.ErrInstanceNotFound) { - t.Fatalf("expected instance removed, got err=%v", err) + waitForInstanceDeleted(t, ctx, instanceRepo, instance.ID) +} + +func TestEnforceNamespaceValuesOverridesChartNamespaceKnobs(t *testing.T) { + instance := &entity.Instance{ + Namespace: "ocdp-u-alice", + Values: map[string]interface{}{ + "namespace": "default", + "namespaceOverride": "default", + "targetNamespace": "default", + "global": map[string]interface{}{ + "namespace": "default", + "namespaceOverride": "default", + }, + "image": map[string]interface{}{ + "repository": "nginx", + }, + }, + } + + enforceNamespaceValues(instance) + + if instance.Values["namespace"] != "ocdp-u-alice" { + t.Fatalf("expected top-level namespace to be enforced, got %#v", instance.Values["namespace"]) + } + if instance.Values["namespaceOverride"] != "ocdp-u-alice" { + t.Fatalf("expected namespaceOverride to be enforced, got %#v", instance.Values["namespaceOverride"]) + } + if instance.Values["targetNamespace"] != "ocdp-u-alice" { + t.Fatalf("expected targetNamespace to be enforced, got %#v", instance.Values["targetNamespace"]) + } + global, ok := instance.Values["global"].(map[string]interface{}) + if !ok { + t.Fatalf("expected global map, got %#v", instance.Values["global"]) + } + if global["namespace"] != "ocdp-u-alice" || global["namespaceOverride"] != "ocdp-u-alice" { + t.Fatalf("expected global namespace keys to be enforced, got %#v", global) + } +} + +func TestApplyNamespacePolicyRejectsMismatchedTenantNamespace(t *testing.T) { + principal := &authz.Principal{ + UserID: "user-1", + Username: "alice", + Role: authz.RoleUser, + WorkspaceID: "workspace-1", + WorkspaceName: "alice", + Namespace: "ocdp-u-alice", + } + cluster := &entity.Cluster{ + ID: "cluster-1", + OwnerID: "admin", + Visibility: authz.VisibilityWorkspaceShared, + } + instance := &entity.Instance{Namespace: "other-namespace"} + svc := NewInstanceService(nil, nil, nil, nil, nil, nil) + + if err := svc.applyNamespacePolicy(context.Background(), principal, cluster, instance); !errors.Is(err, entity.ErrForbidden) { + t.Fatalf("expected ErrForbidden for mismatched tenant namespace, got %v", err) + } + if instance.Namespace != "other-namespace" { + t.Fatalf("expected namespace to remain unchanged on rejection, got %q", instance.Namespace) + } +} + +func TestApplyNamespacePolicyAllowsTenantNamespace(t *testing.T) { + principal := &authz.Principal{ + UserID: "user-1", + Username: "alice", + Role: authz.RoleUser, + WorkspaceID: "workspace-1", + WorkspaceName: "alice", + Namespace: "ocdp-u-alice", + } + cluster := &entity.Cluster{ + ID: "cluster-1", + OwnerID: "admin", + Visibility: authz.VisibilityWorkspaceShared, + } + instance := &entity.Instance{Namespace: "ocdp-u-alice"} + svc := NewInstanceService(nil, nil, nil, nil, nil, nil) + + if err := svc.applyNamespacePolicy(context.Background(), principal, cluster, instance); err != nil { + t.Fatalf("expected matching tenant namespace to be allowed, got %v", err) + } + if instance.Namespace != "ocdp-u-alice" { + t.Fatalf("expected namespace to remain the allowed tenant namespace, got %q", instance.Namespace) + } +} + +func TestEnrichReplicasSetsLiveReplicaCount(t *testing.T) { + ctx := context.Background() + cluster := &entity.Cluster{ID: "cluster-1", Name: "cluster"} + svc := NewInstanceService(nil, &stubClusterRepo{cluster: cluster}, nil, nil, nil, nil) + svc.SetScaleClient(&stubScaleClient{replicas: 3}) + + instances := []*entity.Instance{{ + ID: "inst-1", + ClusterID: "cluster-1", + Name: "demo", + Namespace: "ocdp-u-alice", + Replicas: 1, + }} + + enriched := svc.EnrichReplicas(ctx, "cluster-1", instances) + if enriched[0].Replicas != 3 { + t.Fatalf("expected live replicas to overwrite stored count, got %d", enriched[0].Replicas) + } +} + +func TestListInstancesByClusterHydratesOwnerUsername(t *testing.T) { + ctx := authz.WithPrincipal(context.Background(), &authz.Principal{ + UserID: "admin-1", + Username: "admin", + Role: authz.RoleAdmin, + WorkspaceID: "workspace-admin", + }) + instanceRepo := persistencemock.NewInstanceRepositoryMock() + userRepo := persistencemock.NewUserRepositoryMock() + if err := userRepo.Create(ctx, &entity.User{ID: "user-1", Username: "alice", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-1"}); err != nil { + t.Fatalf("failed to seed user: %v", err) + } + instance := &entity.Instance{ + ID: "inst-1", + WorkspaceID: "workspace-1", + OwnerID: "user-1", + ClusterID: "cluster-1", + Name: "demo", + Namespace: "ocdp-u-alice", + } + if err := instanceRepo.Create(ctx, instance); err != nil { + t.Fatalf("failed to seed instance: %v", err) + } + svc := NewInstanceService( + instanceRepo, + &stubClusterRepo{cluster: &entity.Cluster{ID: "cluster-1", Name: "cluster"}}, + nil, + nil, + nil, + nil, + ) + svc.SetUserRepository(userRepo) + + instances, err := svc.ListInstancesByCluster(ctx, "cluster-1") + if err != nil { + t.Fatalf("ListInstancesByCluster returned error: %v", err) + } + if len(instances) != 1 { + t.Fatalf("expected 1 instance, got %d", len(instances)) + } + if instances[0].OwnerUsername != "alice" { + t.Fatalf("expected owner username alice, got %q", instances[0].OwnerUsername) + } +} + +func TestCreateInstanceRejectsGPUWhenWorkspaceQuotaEmptyBeforeCreate(t *testing.T) { + ctx := authz.WithPrincipal(context.Background(), &authz.Principal{ + UserID: "user-ivanwu", + Username: "ivanwu", + Role: authz.RoleUser, + WorkspaceID: "workspace-ivanwu", + WorkspaceName: "ivanwu", + Namespace: "ocdp-u-ivanwu", + }) + instanceRepo := persistencemock.NewInstanceRepositoryMock() + workspaceRepo := persistencemock.NewWorkspaceRepositoryMock() + bindingRepo := persistencemock.NewWorkspaceClusterBindingRepositoryMock() + workspace := entity.NewWorkspace("ivanwu", "admin") + workspace.ID = "workspace-ivanwu" + workspace.K8sNamespace = "ocdp-u-ivanwu" + workspace.K8sSAName = entity.ServiceAccountForNamespace(workspace.K8sNamespace) + workspace.QuotaCPU = "8" + workspace.QuotaMemory = "32Gi" + workspace.QuotaGPU = "" + workspace.QuotaGPUMem = "" + if err := workspaceRepo.Create(ctx, workspace); err != nil { + t.Fatalf("seed workspace: %v", err) + } + + cluster := &entity.Cluster{ + ID: "k3s", + Name: "k3s", + Host: "https://k3s.invalid", + Token: "token", + OwnerID: "admin", + Visibility: authz.VisibilityGlobalShared, + } + registry := &entity.Registry{ + ID: "registry-1", + Name: "harbor", + URL: "https://harbor.invalid", + OwnerID: "admin", + Visibility: authz.VisibilityGlobalShared, + } + helm := &stubHelmClient{ + estimate: &repository.ResourceEstimate{ + Requests: repository.ResourceVector{ + CPU: resource.MustParse("2"), + Memory: resource.MustParse("8Gi"), + GPU: 1, + GPUMemoryMB: 10000, + }, + }, + } + oci := &stubOCIClient{} + svc := NewInstanceService( + instanceRepo, + &stubClusterRepo{cluster: cluster}, + &stubRegistryRepo{registry: registry}, + helm, + oci, + nil, + bindingRepo, + ) + svc.SetTenantProvisioning(workspaceRepo, &recordingTenantClient{usage: &repository.ResourceQuotaUsage{}}) + + instance := entity.NewInstance("k3s", "vllm-qwen", "ocdp-u-ivanwu", registry.ID, "library/vllm-serve", "vllm-serve", "0.1.0") + instance.SetValues(map[string]interface{}{ + "image": map[string]interface{}{ + "repository": "harbor.bwgdi.com/library/vllm-openai", + "tag": "v0.17.1", + }, + "model": "Qwen/Qwen2.5-0.5B", + }) + + err := svc.CreateInstance(ctx, instance) + if !errors.Is(err, ErrQuotaExceeded) { + t.Fatalf("expected GPU quota rejection, got %v", err) + } + instances, listErr := instanceRepo.List(ctx) + if listErr != nil { + t.Fatalf("list instances: %v", listErr) + } + if len(instances) != 0 { + t.Fatalf("expected quota rejection before instance DB create, got %#v", instances) + } + if helm.installCalls != 0 { + t.Fatalf("expected Helm install not to be called, got %d calls", helm.installCalls) + } + if oci.pullCalls != 1 { + t.Fatalf("expected chart pull for quota rendering, got %d pulls", oci.pullCalls) + } +} + +func waitForInstanceDeleted(t *testing.T, ctx context.Context, repo repository.InstanceRepository, id string) { + t.Helper() + + deadline := time.After(2 * time.Second) + ticker := time.NewTicker(10 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-deadline: + _, err := repo.GetByID(ctx, id) + t.Fatalf("expected instance removed, got err=%v", err) + case <-ticker.C: + if _, err := repo.GetByID(ctx, id); errors.Is(err, entity.ErrInstanceNotFound) { + return + } + } } } @@ -73,13 +338,19 @@ func (*stubClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) { r type stubHelmClient struct { uninstallErr error + estimate *repository.ResourceEstimate + values map[string]interface{} + installCalls int + upgradeCalls int } -func (*stubHelmClient) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { +func (s *stubHelmClient) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { + s.installCalls++ return nil } -func (*stubHelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { +func (s *stubHelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error { + s.upgradeCalls++ return nil } @@ -103,9 +374,116 @@ func (*stubHelmClient) List(ctx context.Context, cluster *entity.Cluster, namesp return nil, nil } -func (*stubHelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) { +func (s *stubHelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) { + return s.values, nil +} + +func (*stubHelmClient) GetChartDefaultValues(chartPath string) (map[string]interface{}, error) { return nil, nil } +func (s *stubHelmClient) EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*repository.ResourceEstimate, error) { + if s.estimate != nil { + return s.estimate, nil + } + return &repository.ResourceEstimate{}, nil +} + +type stubRegistryRepo struct { + registry *entity.Registry +} + +func (s *stubRegistryRepo) Create(ctx context.Context, registry *entity.Registry) error { + s.registry = registry + return nil +} + +func (s *stubRegistryRepo) GetByID(ctx context.Context, id string) (*entity.Registry, error) { + if s.registry != nil && s.registry.ID == id { + return s.registry, nil + } + return nil, entity.ErrRegistryNotFound +} + +func (s *stubRegistryRepo) GetByName(ctx context.Context, name string) (*entity.Registry, error) { + if s.registry != nil && s.registry.Name == name { + return s.registry, nil + } + return nil, entity.ErrRegistryNotFound +} + +func (s *stubRegistryRepo) Update(ctx context.Context, registry *entity.Registry) error { + s.registry = registry + return nil +} + +func (s *stubRegistryRepo) Delete(ctx context.Context, id string) error { + if s.registry != nil && s.registry.ID == id { + s.registry = nil + return nil + } + return entity.ErrRegistryNotFound +} + +func (s *stubRegistryRepo) List(ctx context.Context) ([]*entity.Registry, error) { + if s.registry == nil { + return nil, nil + } + return []*entity.Registry{s.registry}, nil +} + +type stubOCIClient struct { + pullCalls int +} + +func (*stubOCIClient) ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) { + return nil, nil +} + +func (*stubOCIClient) ListArtifacts(ctx context.Context, registry *entity.Registry, repositoryName, mediaTypeFilter string) ([]*entity.Artifact, error) { + return nil, nil +} + +func (*stubOCIClient) GetArtifact(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (*entity.Artifact, error) { + return nil, nil +} + +func (*stubOCIClient) GetValuesSchema(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (string, error) { + return "", nil +} + +func (*stubOCIClient) GetValuesYAML(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (string, error) { + return "", nil +} + +func (s *stubOCIClient) PullArtifact(ctx context.Context, registry *entity.Registry, repositoryName, reference, destPath string) error { + s.pullCalls++ + return nil +} + +func (*stubOCIClient) PushArtifact(ctx context.Context, registry *entity.Registry, repositoryName, tag, sourcePath string) error { + return nil +} + +func (*stubOCIClient) CheckHealth(ctx context.Context, registry *entity.Registry) error { + return nil +} + +type stubScaleClient struct { + replicas int32 +} + +func (s *stubScaleClient) GetDeploymentReplicas(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string) (int32, error) { + return s.replicas, nil +} + +func (s *stubScaleClient) ScaleDeployment(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string, replicas int32) error { + s.replicas = replicas + return nil +} + var _ repository.ClusterRepository = (*stubClusterRepo)(nil) +var _ repository.RegistryRepository = (*stubRegistryRepo)(nil) var _ repository.HelmClient = (*stubHelmClient)(nil) +var _ repository.OCIClient = (*stubOCIClient)(nil) +var _ ScaleClient = (*stubScaleClient)(nil) diff --git a/backend/internal/domain/service/monitoring_service.go b/backend/internal/domain/service/monitoring_service.go index b07c1fb..513398e 100644 --- a/backend/internal/domain/service/monitoring_service.go +++ b/backend/internal/domain/service/monitoring_service.go @@ -3,39 +3,64 @@ package service import ( "context" "fmt" + "sort" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // MonitoringService 监控服务 type MonitoringService struct { - clusterRepo repository.ClusterRepository + clusterRepo repository.ClusterRepository metricsClient repository.MetricsClient + instanceRepo repository.InstanceRepository + userRepo repository.UserRepository } // NewMonitoringService 创建监控服务 func NewMonitoringService( clusterRepo repository.ClusterRepository, metricsClient repository.MetricsClient, + instanceRepo repository.InstanceRepository, + userRepo repository.UserRepository, ) *MonitoringService { return &MonitoringService{ - clusterRepo: clusterRepo, + clusterRepo: clusterRepo, metricsClient: metricsClient, + instanceRepo: instanceRepo, + userRepo: userRepo, } } // GetClusterMonitoring 获取单个集群的监控信息 func (s *MonitoringService) GetClusterMonitoring(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } metrics, err := s.metricsClient.GetClusterMetrics(ctx, clusterID) if err != nil { return nil, fmt.Errorf("failed to get cluster metrics: %w", err) } + s.enrichResourceUsage(ctx, principal, metrics) + s.scopeTenantMetrics(principal, metrics) return metrics, nil } // ListClusterMonitoring 获取所有集群的监控信息 func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entity.ClusterMetrics, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } // 获取所有集群 clusters, err := s.clusterRepo.List(ctx) if err != nil { @@ -45,6 +70,9 @@ func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entit // 获取每个集群的监控数据 result := make([]*entity.ClusterMetrics, 0, len(clusters)) for _, cluster := range clusters { + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + continue + } metrics, err := s.metricsClient.GetClusterMetrics(ctx, cluster.ID) if err != nil { // 如果某个集群获取失败,记录错误但继续 @@ -56,12 +84,310 @@ func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entit Status: "unknown", } } + s.enrichResourceUsage(ctx, principal, metrics) + s.scopeTenantMetrics(principal, metrics) result = append(result, metrics) } return result, nil } +func (s *MonitoringService) enrichResourceUsage(ctx context.Context, principal *authz.Principal, metrics *entity.ClusterMetrics) { + if metrics == nil || s.instanceRepo == nil || s.metricsClient == nil { + s.addVisibleUserRows(ctx, principal, metrics) + return + } + instances, err := s.instanceRepo.ListByCluster(ctx, metrics.ClusterID) + if err != nil { + fmt.Printf("Warning: failed to list instances for cluster %s resource usage: %v\n", metrics.ClusterID, err) + s.addVisibleUserRows(ctx, principal, metrics) + return + } + allocations, err := s.metricsClient.GetPodResourceAllocations(ctx, metrics.ClusterID) + if err != nil { + fmt.Printf("Warning: failed to list pod resource allocations for cluster %s: %v\n", metrics.ClusterID, err) + s.addVisibleUserRows(ctx, principal, metrics) + return + } + + visibleInstances := make(map[string]*entity.Instance) + for _, instance := range instances { + if instance == nil || !canReadMonitoringInstance(principal, instance) { + continue + } + key := monitoringInstanceKey(instance.Namespace, instance.Name) + visibleInstances[key] = instance + } + + type usageAccumulator struct { + userID string + username string + workspaceID string + allocation entity.ResourceAllocation + podCount int + instances map[string]struct{} + } + byUser := make(map[string]*usageAccumulator) + total := entity.ResourceAllocation{} + + for _, pod := range allocations { + if pod == nil { + continue + } + instance := visibleInstances[monitoringInstanceKey(pod.Namespace, pod.InstanceName)] + if instance == nil { + continue + } + total = addResourceAllocation(total, pod.Allocation) + username := instance.OwnerUsername + if username == "" { + username = s.usernameForOwner(ctx, instance.OwnerID, principal) + } + acc := byUser[instance.OwnerID] + if acc == nil { + acc = &usageAccumulator{ + userID: instance.OwnerID, + username: username, + workspaceID: instance.WorkspaceID, + instances: map[string]struct{}{}, + } + byUser[instance.OwnerID] = acc + } + if acc.username == "" { + acc.username = username + } + acc.allocation = addResourceAllocation(acc.allocation, pod.Allocation) + acc.podCount++ + acc.instances[instance.ID] = struct{}{} + } + + metrics.CPURequests = formatCPUAllocation(total.CPURequestsMilli) + metrics.CPULimits = formatCPUAllocation(total.CPULimitsMilli) + metrics.MemoryRequests = formatMemoryAllocation(total.MemoryRequestsBytes) + metrics.MemoryLimits = formatMemoryAllocation(total.MemoryLimitsBytes) + metrics.GPURequests = total.GPURequests + metrics.GPULimits = total.GPULimits + metrics.GPUMemoryRequestsMB = total.GPUMemoryRequestsMB + metrics.GPUMemoryLimitsMB = total.GPUMemoryLimitsMB + metrics.AllocatedGPU = total.GPURequests + metrics.AllocatedGPUMemoryMB = total.GPUMemoryRequestsMB + + userIDs := make([]string, 0, len(byUser)) + for userID := range byUser { + userIDs = append(userIDs, userID) + } + sort.Slice(userIDs, func(i, j int) bool { + left := byUser[userIDs[i]] + right := byUser[userIDs[j]] + if left.username == right.username { + return left.userID < right.userID + } + return left.username < right.username + }) + + usage := make([]entity.UserResourceUsage, 0, len(userIDs)) + for _, userID := range userIDs { + acc := byUser[userID] + usage = append(usage, entity.UserResourceUsage{ + UserID: acc.userID, + Username: acc.username, + WorkspaceID: acc.workspaceID, + InstanceCount: len(acc.instances), + PodCount: acc.podCount, + CPURequests: formatCPUAllocation(acc.allocation.CPURequestsMilli), + CPULimits: formatCPUAllocation(acc.allocation.CPULimitsMilli), + MemoryRequests: formatMemoryAllocation(acc.allocation.MemoryRequestsBytes), + MemoryLimits: formatMemoryAllocation(acc.allocation.MemoryLimitsBytes), + GPURequests: acc.allocation.GPURequests, + GPULimits: acc.allocation.GPULimits, + GPUMemoryRequestsMB: acc.allocation.GPUMemoryRequestsMB, + GPUMemoryLimitsMB: acc.allocation.GPUMemoryLimitsMB, + }) + } + metrics.ResourceUsageByUser = usage + s.addVisibleUserRows(ctx, principal, metrics) +} + +func (s *MonitoringService) addVisibleUserRows(ctx context.Context, principal *authz.Principal, metrics *entity.ClusterMetrics) { + if principal == nil || metrics == nil { + return + } + existing := make(map[string]struct{}, len(metrics.ResourceUsageByUser)) + for _, row := range metrics.ResourceUsageByUser { + if row.UserID != "" { + existing[row.UserID] = struct{}{} + } + } + appendEmpty := func(userID, username, workspaceID string) { + if userID == "" { + return + } + if _, ok := existing[userID]; ok { + return + } + metrics.ResourceUsageByUser = append(metrics.ResourceUsageByUser, entity.UserResourceUsage{ + UserID: userID, + Username: username, + WorkspaceID: workspaceID, + InstanceCount: 0, + PodCount: 0, + CPURequests: "0 cores", + CPULimits: "0 cores", + MemoryRequests: "0 B", + MemoryLimits: "0 B", + }) + existing[userID] = struct{}{} + } + if !principal.IsAdmin() { + appendEmpty(principal.UserID, principal.Username, principal.WorkspaceID) + return + } + if s.userRepo == nil { + return + } + users, err := s.userRepo.List(ctx) + if err != nil { + fmt.Printf("Warning: failed to list users for monitoring rows: %v\n", err) + return + } + for _, user := range users { + if user == nil || user.Role != authz.RoleUser || !user.IsActive { + continue + } + appendEmpty(user.ID, user.Username, user.WorkspaceID) + } + sort.Slice(metrics.ResourceUsageByUser, func(i, j int) bool { + left := metrics.ResourceUsageByUser[i] + right := metrics.ResourceUsageByUser[j] + if left.Username == right.Username { + return left.UserID < right.UserID + } + return left.Username < right.Username + }) +} + +func (s *MonitoringService) scopeTenantMetrics(principal *authz.Principal, metrics *entity.ClusterMetrics) { + if principal == nil || principal.IsAdmin() || metrics == nil { + return + } + var total entity.ResourceAllocation + podCount := 0 + instanceCount := 0 + for _, usage := range metrics.ResourceUsageByUser { + if usage.UserID != principal.UserID { + continue + } + podCount += usage.PodCount + instanceCount += usage.InstanceCount + total.GPURequests += usage.GPURequests + total.GPULimits += usage.GPULimits + total.GPUMemoryRequestsMB += usage.GPUMemoryRequestsMB + total.GPUMemoryLimitsMB += usage.GPUMemoryLimitsMB + } + metrics.NodeCount = 0 + metrics.Nodes = nil + metrics.PodCount = podCount + metrics.TotalCPU = "" + metrics.TotalMemory = "" + metrics.TotalGPU = 0 + metrics.UsedCPU = metrics.CPURequests + metrics.UsedMemory = metrics.MemoryRequests + metrics.UsedGPU = int(total.GPURequests) + metrics.CPUUsage = 0 + metrics.MemoryUsage = 0 + metrics.GPUUsage = 0 + metrics.MaxNodeCPU = "" + metrics.MaxNodeMemory = "" + metrics.MaxNodeGPU = 0 + metrics.MaxNodeCPUUsage = 0 + metrics.MaxNodeMemUsage = 0 + metrics.MaxNodeGPUUsage = 0 + metrics.ResourceUsageByUser = filterSelfUsage(principal.UserID, metrics.ResourceUsageByUser) + if instanceCount == 0 { + metrics.CPURequests = "" + metrics.CPULimits = "" + metrics.MemoryRequests = "" + metrics.MemoryLimits = "" + metrics.GPURequests = 0 + metrics.GPULimits = 0 + metrics.GPUMemoryRequestsMB = 0 + metrics.GPUMemoryLimitsMB = 0 + metrics.AllocatedGPU = 0 + metrics.AllocatedGPUMemoryMB = 0 + } +} + +func filterSelfUsage(userID string, usage []entity.UserResourceUsage) []entity.UserResourceUsage { + filtered := make([]entity.UserResourceUsage, 0, len(usage)) + for _, row := range usage { + if row.UserID == userID { + filtered = append(filtered, row) + } + } + return filtered +} + +func canReadMonitoringInstance(principal *authz.Principal, instance *entity.Instance) bool { + if principal == nil || instance == nil { + return false + } + if principal.IsAdmin() { + return true + } + return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID +} + +func (s *MonitoringService) usernameForOwner(ctx context.Context, ownerID string, principal *authz.Principal) string { + if ownerID == "" { + return "" + } + if principal != nil && ownerID == principal.UserID { + return principal.Username + } + if s.userRepo == nil { + return "" + } + user, err := s.userRepo.GetByID(ctx, ownerID) + if err != nil || user == nil { + return "" + } + return user.Username +} + +func monitoringInstanceKey(namespace, name string) string { + return namespace + "/" + name +} + +func addResourceAllocation(left, right entity.ResourceAllocation) entity.ResourceAllocation { + return entity.ResourceAllocation{ + CPURequestsMilli: left.CPURequestsMilli + right.CPURequestsMilli, + CPULimitsMilli: left.CPULimitsMilli + right.CPULimitsMilli, + MemoryRequestsBytes: left.MemoryRequestsBytes + right.MemoryRequestsBytes, + MemoryLimitsBytes: left.MemoryLimitsBytes + right.MemoryLimitsBytes, + GPURequests: left.GPURequests + right.GPURequests, + GPULimits: left.GPULimits + right.GPULimits, + GPUMemoryRequestsMB: left.GPUMemoryRequestsMB + right.GPUMemoryRequestsMB, + GPUMemoryLimitsMB: left.GPUMemoryLimitsMB + right.GPUMemoryLimitsMB, + } +} + +func formatCPUAllocation(milli int64) string { + return fmt.Sprintf("%.2f cores", float64(milli)/1000.0) +} + +func formatMemoryAllocation(bytes int64) string { + const unit = 1024 + if bytes < unit { + return fmt.Sprintf("%d B", bytes) + } + div, exp := int64(unit), 0 + for n := bytes / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp]) +} + // GetMonitoringSummary 获取监控汇总信息 func (s *MonitoringService) GetMonitoringSummary(ctx context.Context) (*entity.MonitoringSummary, error) { // 获取所有集群监控数据 @@ -93,10 +419,23 @@ func (s *MonitoringService) GetMonitoringSummary(ctx context.Context) (*entity.M // GetNodeMetrics 获取集群的节点指标 func (s *MonitoringService) GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } + if !principal.IsAdmin() { + return nil, entity.ErrForbidden + } nodes, err := s.metricsClient.GetNodeMetrics(ctx, clusterID) if err != nil { return nil, fmt.Errorf("failed to get node metrics: %w", err) } return nodes, nil } - diff --git a/backend/internal/domain/service/monitoring_service_test.go b/backend/internal/domain/service/monitoring_service_test.go new file mode 100644 index 0000000..98f3fd7 --- /dev/null +++ b/backend/internal/domain/service/monitoring_service_test.go @@ -0,0 +1,228 @@ +package service + +import ( + "context" + "testing" + "time" + + persistencemock "github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/pkg/authz" +) + +func TestListClusterMonitoringAggregatesResourceUsageForAdmin(t *testing.T) { + ctx := authz.WithPrincipal(context.Background(), &authz.Principal{ + UserID: "admin-1", + Username: "admin", + Role: authz.RoleAdmin, + WorkspaceID: "workspace-admin", + }) + instanceRepo, userRepo := seedMonitoringOwners(t, ctx) + svc := NewMonitoringService( + &monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}}, + &stubMetricsClient{allocations: monitoringAllocations()}, + instanceRepo, + userRepo, + ) + + metrics, err := svc.ListClusterMonitoring(ctx) + if err != nil { + t.Fatalf("ListClusterMonitoring returned error: %v", err) + } + if len(metrics) != 1 { + t.Fatalf("expected 1 cluster metric, got %d", len(metrics)) + } + got := metrics[0] + if got.AllocatedGPU != 3 || got.AllocatedGPUMemoryMB != 30000 { + t.Fatalf("expected total GPU/gpumem allocation 3/30000, got %d/%d", got.AllocatedGPU, got.AllocatedGPUMemoryMB) + } + if len(got.ResourceUsageByUser) != 2 { + t.Fatalf("expected 2 user usage rows, got %d: %#v", len(got.ResourceUsageByUser), got.ResourceUsageByUser) + } + if got.ResourceUsageByUser[0].Username != "alice" || got.ResourceUsageByUser[0].GPURequests != 1 { + t.Fatalf("expected alice GPU request row first, got %#v", got.ResourceUsageByUser[0]) + } + if got.ResourceUsageByUser[1].Username != "bob" || got.ResourceUsageByUser[1].GPURequests != 2 { + t.Fatalf("expected bob GPU request row second, got %#v", got.ResourceUsageByUser[1]) + } +} + +func TestListClusterMonitoringFiltersResourceUsageForOrdinaryUser(t *testing.T) { + ctx := authz.WithPrincipal(context.Background(), &authz.Principal{ + UserID: "user-1", + Username: "alice", + Role: authz.RoleUser, + WorkspaceID: "workspace-1", + }) + instanceRepo, userRepo := seedMonitoringOwners(t, ctx) + svc := NewMonitoringService( + &monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}}, + &stubMetricsClient{allocations: monitoringAllocations()}, + instanceRepo, + userRepo, + ) + + metrics, err := svc.ListClusterMonitoring(ctx) + if err != nil { + t.Fatalf("ListClusterMonitoring returned error: %v", err) + } + got := metrics[0] + if got.AllocatedGPU != 1 || got.AllocatedGPUMemoryMB != 10000 { + t.Fatalf("expected ordinary user allocation to be scoped to alice, got %d/%d", got.AllocatedGPU, got.AllocatedGPUMemoryMB) + } + if len(got.ResourceUsageByUser) != 1 { + t.Fatalf("expected only alice usage row, got %d: %#v", len(got.ResourceUsageByUser), got.ResourceUsageByUser) + } + if got.ResourceUsageByUser[0].UserID != "user-1" || got.ResourceUsageByUser[0].Username != "alice" { + t.Fatalf("expected alice usage row, got %#v", got.ResourceUsageByUser[0]) + } + if got.NodeCount != 0 || len(got.Nodes) != 0 || got.TotalCPU != "" || got.TotalMemory != "" { + t.Fatalf("expected ordinary user cluster-wide metrics to be sanitized, got nodes=%d/%d totalCPU=%q totalMemory=%q", got.NodeCount, len(got.Nodes), got.TotalCPU, got.TotalMemory) + } + if got.PodCount != 1 { + t.Fatalf("expected ordinary user pod count to be self scoped, got %d", got.PodCount) + } +} + +func TestGetNodeMetricsForbiddenForOrdinaryUser(t *testing.T) { + ctx := authz.WithPrincipal(context.Background(), &authz.Principal{ + UserID: "user-1", + Username: "alice", + Role: authz.RoleUser, + WorkspaceID: "workspace-1", + }) + svc := NewMonitoringService( + &monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}}, + &stubMetricsClient{allocations: monitoringAllocations()}, + nil, + nil, + ) + + _, err := svc.GetNodeMetrics(ctx, "cluster-1") + if err != entity.ErrForbidden { + t.Fatalf("expected ordinary user node metrics to be forbidden, got %v", err) + } +} + +func seedMonitoringOwners(t *testing.T, ctx context.Context) (*persistencemock.InstanceRepositoryMock, *persistencemock.UserRepositoryMock) { + t.Helper() + instanceRepo := persistencemock.NewInstanceRepositoryMock().(*persistencemock.InstanceRepositoryMock) + userRepo := persistencemock.NewUserRepositoryMock().(*persistencemock.UserRepositoryMock) + for _, user := range []*entity.User{ + {ID: "user-1", Username: "alice", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-1"}, + {ID: "user-2", Username: "bob", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-2"}, + } { + if err := userRepo.Create(ctx, user); err != nil { + t.Fatalf("failed to seed user %s: %v", user.ID, err) + } + } + for _, instance := range []*entity.Instance{ + {ID: "inst-1", ClusterID: "cluster-1", Name: "alice-app", Namespace: "ocdp-u-alice", WorkspaceID: "workspace-1", OwnerID: "user-1"}, + {ID: "inst-2", ClusterID: "cluster-1", Name: "bob-app", Namespace: "ocdp-u-bob", WorkspaceID: "workspace-2", OwnerID: "user-2"}, + } { + if err := instanceRepo.Create(ctx, instance); err != nil { + t.Fatalf("failed to seed instance %s: %v", instance.ID, err) + } + } + return instanceRepo, userRepo +} + +func monitoringAllocations() []*entity.PodResourceAllocation { + return []*entity.PodResourceAllocation{ + { + ClusterID: "cluster-1", + Namespace: "ocdp-u-alice", + PodName: "alice-app-0", + InstanceName: "alice-app", + Allocation: entity.ResourceAllocation{ + CPURequestsMilli: 500, + CPULimitsMilli: 1000, + MemoryRequestsBytes: 1024 * 1024 * 1024, + MemoryLimitsBytes: 2 * 1024 * 1024 * 1024, + GPURequests: 1, + GPULimits: 1, + GPUMemoryRequestsMB: 10000, + GPUMemoryLimitsMB: 10000, + }, + }, + { + ClusterID: "cluster-1", + Namespace: "ocdp-u-bob", + PodName: "bob-app-0", + InstanceName: "bob-app", + Allocation: entity.ResourceAllocation{ + CPURequestsMilli: 2000, + CPULimitsMilli: 4000, + MemoryRequestsBytes: 4 * 1024 * 1024 * 1024, + MemoryLimitsBytes: 8 * 1024 * 1024 * 1024, + GPURequests: 2, + GPULimits: 2, + GPUMemoryRequestsMB: 20000, + GPUMemoryLimitsMB: 20000, + }, + }, + } +} + +type monitoringClusterRepo struct { + clusters []*entity.Cluster +} + +func (r *monitoringClusterRepo) Create(ctx context.Context, cluster *entity.Cluster) error { + r.clusters = append(r.clusters, cluster) + return nil +} + +func (r *monitoringClusterRepo) GetByID(ctx context.Context, id string) (*entity.Cluster, error) { + for _, cluster := range r.clusters { + if cluster.ID == id { + return cluster, nil + } + } + return nil, entity.ErrClusterNotFound +} + +func (r *monitoringClusterRepo) GetByName(ctx context.Context, name string) (*entity.Cluster, error) { + for _, cluster := range r.clusters { + if cluster.Name == name { + return cluster, nil + } + } + return nil, entity.ErrClusterNotFound +} + +func (r *monitoringClusterRepo) Update(ctx context.Context, cluster *entity.Cluster) error { + return nil +} + +func (r *monitoringClusterRepo) Delete(ctx context.Context, id string) error { return nil } + +func (r *monitoringClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) { + return r.clusters, nil +} + +type stubMetricsClient struct { + allocations []*entity.PodResourceAllocation +} + +func (c *stubMetricsClient) GetClusterMetrics(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) { + return &entity.ClusterMetrics{ + ClusterID: clusterID, + ClusterName: "cluster", + Status: "healthy", + NodeCount: 3, + PodCount: 99, + TotalCPU: "48 cores", + TotalMemory: "256Gi", + Nodes: []entity.NodeMetrics{{NodeName: "node-a"}}, + LastCheck: time.Now(), + }, nil +} + +func (c *stubMetricsClient) GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) { + return nil, nil +} + +func (c *stubMetricsClient) GetPodResourceAllocations(ctx context.Context, clusterID string) ([]*entity.PodResourceAllocation, error) { + return c.allocations, nil +} diff --git a/backend/internal/domain/service/quota_precheck.go b/backend/internal/domain/service/quota_precheck.go new file mode 100644 index 0000000..4d888ec --- /dev/null +++ b/backend/internal/domain/service/quota_precheck.go @@ -0,0 +1,400 @@ +package service + +import ( + "context" + "errors" + "fmt" + "io" + "sort" + "strconv" + "strings" + + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/util/yaml" +) + +var ErrQuotaExceeded = errors.New("quota exceeded") + +type QuotaExceededResource struct { + Name string + Required string + Hard string +} + +type QuotaPrecheckResult struct { + Allowed bool + Required repository.ResourceEstimate + Hard repository.ResourceVector + Exceeded []QuotaExceededResource +} + +type QuotaPrecheckService struct { + helmClient repository.HelmClient +} + +func NewQuotaPrecheckService(helmClient repository.HelmClient) *QuotaPrecheckService { + return &QuotaPrecheckService{helmClient: helmClient} +} + +func (s *QuotaPrecheckService) EstimateAndCompare(ctx context.Context, cluster *entity.Cluster, workspace *entity.Workspace, instance *entity.Instance) (*QuotaPrecheckResult, error) { + if s == nil || s.helmClient == nil { + return nil, errors.New("quota precheck requires helm client") + } + estimate, err := s.helmClient.EstimateInstanceResources(ctx, cluster, instance) + if err != nil { + return nil, err + } + result, err := CompareWorkspaceQuota(workspace, estimate) + if err != nil { + return result, err + } + return result, nil +} + +func (s *QuotaPrecheckService) EstimateAndCompareBinding(ctx context.Context, cluster *entity.Cluster, binding *entity.WorkspaceClusterBinding, usage *repository.ResourceQuotaUsage, target *entity.Instance, current *entity.Instance) (*QuotaPrecheckResult, error) { + if s == nil || s.helmClient == nil { + return nil, errors.New("quota precheck requires helm client") + } + targetEstimate, err := s.helmClient.EstimateInstanceResources(ctx, cluster, target) + if err != nil { + return nil, err + } + var currentEstimate *repository.ResourceEstimate + if current != nil { + currentEstimate, err = s.helmClient.EstimateInstanceResources(ctx, cluster, current) + if err != nil { + return nil, err + } + } + result, err := CompareBindingQuota(binding, usage, targetEstimate, currentEstimate) + if err != nil { + return result, err + } + return result, nil +} + +func CompareWorkspaceQuota(workspace *entity.Workspace, estimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) { + return compareQuotaList(resourceQuotaHard(workspace), nil, estimate, nil) +} + +func CompareBindingQuota(binding *entity.WorkspaceClusterBinding, usage *repository.ResourceQuotaUsage, targetEstimate, currentEstimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) { + return compareQuotaList(bindingQuotaHard(binding), usage, targetEstimate, currentEstimate) +} + +func compareQuotaList(hardList corev1.ResourceList, usage *repository.ResourceQuotaUsage, targetEstimate, currentEstimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) { + if targetEstimate == nil { + targetEstimate = &repository.ResourceEstimate{} + } + current := effectiveQuotaRequests(currentEstimate) + target := effectiveQuotaRequests(targetEstimate) + used := repository.ResourceVector{} + if usage != nil { + used = usage.Used + } + required := addResourceVector(subtractResourceVectorFloorZero(used, current), target) + hard := resourceVectorFromQuotaHard(hardList) + result := &QuotaPrecheckResult{ + Allowed: true, + Required: repository.ResourceEstimate{ + Requests: required, + }, + Hard: hard, + } + addExceeded := func(name, required, limit string) { + result.Allowed = false + result.Exceeded = append(result.Exceeded, QuotaExceededResource{ + Name: name, + Required: required, + Hard: limit, + }) + } + if quantity, ok := hardList[corev1.ResourceName("requests.cpu")]; ok && required.CPU.Cmp(quantity) > 0 { + addExceeded("requests.cpu", required.CPU.String(), quantity.String()) + } + if quantity, ok := hardList[corev1.ResourceName("requests.memory")]; ok && required.Memory.Cmp(quantity) > 0 { + addExceeded("requests.memory", required.Memory.String(), quantity.String()) + } + if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpu")]; ok && required.GPU > quantity.Value() { + addExceeded("requests.nvidia.com/gpu", strconv.FormatInt(required.GPU, 10), quantity.String()) + } + if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpumem")]; ok && required.GPUMemoryMB > quantity.Value() { + addExceeded("requests.nvidia.com/gpumem", strconv.FormatInt(required.GPUMemoryMB, 10), quantity.String()) + } + sort.Slice(result.Exceeded, func(i, j int) bool { + return result.Exceeded[i].Name < result.Exceeded[j].Name + }) + if !result.Allowed { + return result, ErrQuotaExceeded + } + return result, nil +} + +func legacyCompareWorkspaceQuota(workspace *entity.Workspace, estimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) { + if estimate == nil { + estimate = &repository.ResourceEstimate{} + } + hardList := resourceQuotaHard(workspace) + hard := resourceVectorFromQuotaHard(hardList) + result := &QuotaPrecheckResult{ + Allowed: true, + Required: *estimate, + Hard: hard, + } + effectiveRequests := effectiveQuotaRequests(estimate) + addExceeded := func(name, required, limit string) { + result.Allowed = false + result.Exceeded = append(result.Exceeded, QuotaExceededResource{ + Name: name, + Required: required, + Hard: limit, + }) + } + if quantity, ok := hardList[corev1.ResourceName("requests.cpu")]; ok && effectiveRequests.CPU.Cmp(quantity) > 0 { + addExceeded("requests.cpu", effectiveRequests.CPU.String(), quantity.String()) + } + if quantity, ok := hardList[corev1.ResourceName("requests.memory")]; ok && effectiveRequests.Memory.Cmp(quantity) > 0 { + addExceeded("requests.memory", effectiveRequests.Memory.String(), quantity.String()) + } + if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpu")]; ok && effectiveRequests.GPU > quantity.Value() { + addExceeded("requests.nvidia.com/gpu", strconv.FormatInt(effectiveRequests.GPU, 10), quantity.String()) + } + if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpumem")]; ok && effectiveRequests.GPUMemoryMB > quantity.Value() { + addExceeded("requests.nvidia.com/gpumem", strconv.FormatInt(effectiveRequests.GPUMemoryMB, 10), quantity.String()) + } + sort.Slice(result.Exceeded, func(i, j int) bool { + return result.Exceeded[i].Name < result.Exceeded[j].Name + }) + if !result.Allowed { + return result, ErrQuotaExceeded + } + return result, nil +} + +func effectiveQuotaRequests(estimate *repository.ResourceEstimate) repository.ResourceVector { + if estimate == nil { + return repository.ResourceVector{} + } + return repository.ResourceVector{ + CPU: maxQuantity(estimate.Requests.CPU, estimate.Limits.CPU), + Memory: maxQuantity(estimate.Requests.Memory, estimate.Limits.Memory), + GPU: maxInt64(estimate.Requests.GPU, estimate.Limits.GPU), + GPUMemoryMB: maxInt64(estimate.Requests.GPUMemoryMB, estimate.Limits.GPUMemoryMB), + } +} + +func addResourceVector(left, right repository.ResourceVector) repository.ResourceVector { + out := left + out.CPU.Add(right.CPU) + out.Memory.Add(right.Memory) + out.GPU += right.GPU + out.GPUMemoryMB += right.GPUMemoryMB + return out +} + +func subtractResourceVectorFloorZero(left, right repository.ResourceVector) repository.ResourceVector { + out := left + out.CPU.Sub(right.CPU) + if out.CPU.Sign() < 0 { + out.CPU = resource.Quantity{} + } + out.Memory.Sub(right.Memory) + if out.Memory.Sign() < 0 { + out.Memory = resource.Quantity{} + } + out.GPU -= right.GPU + if out.GPU < 0 { + out.GPU = 0 + } + out.GPUMemoryMB -= right.GPUMemoryMB + if out.GPUMemoryMB < 0 { + out.GPUMemoryMB = 0 + } + return out +} + +func maxQuantity(left, right resource.Quantity) resource.Quantity { + if left.Cmp(right) >= 0 { + return left + } + return right +} + +func maxInt64(left, right int64) int64 { + if left >= right { + return left + } + return right +} + +func EstimateRenderedManifestResources(manifest string) (*repository.ResourceEstimate, error) { + decoder := yaml.NewYAMLOrJSONDecoder(strings.NewReader(manifest), 4096) + estimate := &repository.ResourceEstimate{} + for { + var obj unstructured.Unstructured + if err := decoder.Decode(&obj); err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, fmt.Errorf("failed to decode rendered manifest: %w", err) + } + if obj.GetKind() == "" { + continue + } + podSpec, replicas, ok := podTemplateSpec(obj.Object) + if !ok { + continue + } + addPodSpecResources(estimate, podSpec, replicas) + } + return estimate, nil +} + +func resourceVectorFromQuotaHard(hard corev1.ResourceList) repository.ResourceVector { + gpu := hard[corev1.ResourceName("requests.nvidia.com/gpu")] + gpuMemory := hard[corev1.ResourceName("requests.nvidia.com/gpumem")] + return repository.ResourceVector{ + CPU: hard[corev1.ResourceName("requests.cpu")], + Memory: hard[corev1.ResourceName("requests.memory")], + GPU: gpu.Value(), + GPUMemoryMB: gpuMemory.Value(), + } +} + +func bindingQuotaHard(binding *entity.WorkspaceClusterBinding) corev1.ResourceList { + hard := corev1.ResourceList{} + if binding == nil { + return hard + } + addQuantity := func(name corev1.ResourceName, value string) { + value = normalizeStandardQuotaQuantity(value) + if value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[name] = quantity + } + } + addGPUMemoryQuantity := func(value string) { + value, err := normalizeGPUMemoryQuota(value) + if err != nil || value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity + } + } + addQuantity(corev1.ResourceName("requests.cpu"), binding.QuotaCPU) + addQuantity(corev1.ResourceName("requests.memory"), binding.QuotaMemory) + addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), binding.QuotaGPU) + addGPUMemoryQuantity(binding.QuotaGPUMem) + return hard +} + +func podTemplateSpec(obj map[string]interface{}) (map[string]interface{}, int64, bool) { + kind, _, _ := unstructured.NestedString(obj, "kind") + switch kind { + case "Pod": + spec, ok := nestedMap(obj, "spec") + return spec, 1, ok + case "Deployment", "ReplicaSet", "StatefulSet", "ReplicationController": + spec, replicas, ok := workloadTemplateSpec(obj) + return spec, replicas, ok + case "DaemonSet", "Job": + spec, ok := nestedMap(obj, "spec", "template", "spec") + return spec, 1, ok + case "CronJob": + spec, ok := nestedMap(obj, "spec", "jobTemplate", "spec", "template", "spec") + return spec, 1, ok + default: + return nil, 0, false + } +} + +func workloadTemplateSpec(obj map[string]interface{}) (map[string]interface{}, int64, bool) { + spec, ok := nestedMap(obj, "spec", "template", "spec") + if !ok { + return nil, 0, false + } + replicas, _, err := unstructured.NestedInt64(obj, "spec", "replicas") + if err != nil || replicas < 1 { + replicas = 1 + } + return spec, replicas, true +} + +func nestedMap(obj map[string]interface{}, fields ...string) (map[string]interface{}, bool) { + value, ok, err := unstructured.NestedMap(obj, fields...) + return value, ok && err == nil +} + +func addPodSpecResources(estimate *repository.ResourceEstimate, podSpec map[string]interface{}, replicas int64) { + if replicas < 1 { + replicas = 1 + } + for _, field := range []string{"initContainers", "containers"} { + containers, ok, err := unstructured.NestedSlice(podSpec, field) + if err != nil || !ok { + continue + } + for _, item := range containers { + container, ok := item.(map[string]interface{}) + if !ok { + continue + } + addContainerResourceList(&estimate.Requests, replicas, container, "resources", "requests") + addContainerResourceList(&estimate.Limits, replicas, container, "resources", "limits") + } + } +} + +func addContainerResourceList(target *repository.ResourceVector, replicas int64, container map[string]interface{}, fields ...string) { + resources, ok := nestedMap(container, fields...) + if !ok { + return + } + for name, value := range resources { + switch name { + case "cpu": + addQuantity(&target.CPU, value, replicas) + case "memory": + addQuantity(&target.Memory, value, replicas) + case "nvidia.com/gpu", "requests.nvidia.com/gpu", "limits.nvidia.com/gpu": + target.GPU += parseIntegerResource(value) * replicas + case "nvidia.com/gpumem", "requests.nvidia.com/gpumem", "limits.nvidia.com/gpumem": + target.GPUMemoryMB += parseGPUMemoryResource(value) * replicas + } + } +} + +func addQuantity(target *resource.Quantity, value interface{}, replicas int64) { + quantity, err := resource.ParseQuantity(fmt.Sprint(value)) + if err != nil { + return + } + quantity.Mul(replicas) + target.Add(quantity) +} + +func parseIntegerResource(value interface{}) int64 { + quantity, err := resource.ParseQuantity(fmt.Sprint(value)) + if err != nil { + return 0 + } + return quantity.Value() +} + +func parseGPUMemoryResource(value interface{}) int64 { + normalized, err := normalizeGPUMemoryQuota(fmt.Sprint(value)) + if err != nil || normalized == "" { + return 0 + } + parsed, err := strconv.ParseInt(normalized, 10, 64) + if err != nil { + return 0 + } + return parsed +} diff --git a/backend/internal/domain/service/quota_precheck_test.go b/backend/internal/domain/service/quota_precheck_test.go new file mode 100644 index 0000000..70bcf47 --- /dev/null +++ b/backend/internal/domain/service/quota_precheck_test.go @@ -0,0 +1,241 @@ +package service + +import ( + "errors" + "testing" + + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +func TestCompareWorkspaceQuotaReportsExceededRequests(t *testing.T) { + t.Parallel() + + workspace := &entity.Workspace{ + QuotaCPU: "2", + QuotaMemory: "4Gi", + QuotaGPU: "1", + QuotaGPUMem: "10000", + } + estimate := &repository.ResourceEstimate{ + Requests: repository.ResourceVector{ + CPU: resource.MustParse("2500m"), + Memory: resource.MustParse("3Gi"), + GPU: 1, + GPUMemoryMB: 12000, + }, + } + + result, err := CompareWorkspaceQuota(workspace, estimate) + if !errors.Is(err, ErrQuotaExceeded) { + t.Fatalf("expected ErrQuotaExceeded, got %v", err) + } + if result == nil || result.Allowed { + t.Fatalf("expected denied result, got %#v", result) + } + if len(result.Exceeded) != 2 { + t.Fatalf("expected 2 exceeded resources, got %#v", result.Exceeded) + } + if result.Exceeded[0].Name != "requests.cpu" { + t.Fatalf("expected requests.cpu exceeded first, got %#v", result.Exceeded) + } + if result.Exceeded[1].Name != "requests.nvidia.com/gpumem" { + t.Fatalf("expected requests.nvidia.com/gpumem exceeded second, got %#v", result.Exceeded) + } +} + +func TestCompareWorkspaceQuotaUsesLimitsAsEffectiveRequests(t *testing.T) { + t.Parallel() + + workspace := &entity.Workspace{ + QuotaGPU: "0", + QuotaGPUMem: "9999", + } + estimate := &repository.ResourceEstimate{ + Limits: repository.ResourceVector{ + GPU: 1, + GPUMemoryMB: 10000, + }, + } + + result, err := CompareWorkspaceQuota(workspace, estimate) + if !errors.Is(err, ErrQuotaExceeded) { + t.Fatalf("expected ErrQuotaExceeded from limits-only GPU resources, got %v", err) + } + if result == nil || len(result.Exceeded) != 2 { + t.Fatalf("expected gpu and gpumem to be exceeded, got %#v", result) + } +} + +func TestCompareBindingQuotaSubtractsCurrentReleaseFromUsedQuota(t *testing.T) { + t.Parallel() + + binding := &entity.WorkspaceClusterBinding{ + QuotaCPU: "1", + QuotaMemory: "2Gi", + QuotaGPU: "1", + QuotaGPUMem: "10000", + } + usage := &repository.ResourceQuotaUsage{ + Used: repository.ResourceVector{ + CPU: resource.MustParse("1"), + Memory: resource.MustParse("2Gi"), + GPU: 1, + GPUMemoryMB: 10000, + }, + } + current := &repository.ResourceEstimate{ + Requests: repository.ResourceVector{ + CPU: resource.MustParse("1"), + Memory: resource.MustParse("2Gi"), + GPU: 1, + GPUMemoryMB: 10000, + }, + } + targetSameSize := &repository.ResourceEstimate{ + Requests: repository.ResourceVector{ + CPU: resource.MustParse("1"), + Memory: resource.MustParse("2Gi"), + GPU: 1, + GPUMemoryMB: 10000, + }, + } + + result, err := CompareBindingQuota(binding, usage, targetSameSize, current) + if err != nil { + t.Fatalf("expected update with same resource footprint to fit quota, got %v", err) + } + if result.Required.Requests.GPU != 1 || result.Required.Requests.GPUMemoryMB != 10000 { + t.Fatalf("expected required resources to subtract current release before target, got %#v", result.Required.Requests) + } + + targetScaledUp := &repository.ResourceEstimate{ + Requests: repository.ResourceVector{ + CPU: resource.MustParse("2"), + Memory: resource.MustParse("4Gi"), + GPU: 2, + GPUMemoryMB: 20000, + }, + } + result, err = CompareBindingQuota(binding, usage, targetScaledUp, current) + if !errors.Is(err, ErrQuotaExceeded) { + t.Fatalf("expected scale-up beyond quota to be rejected, got %v", err) + } + if result == nil || result.Allowed { + t.Fatalf("expected denied quota result, got %#v", result) + } +} + +func TestCompareBindingQuotaTreatsExplicitZeroGPUAsNoGPUAllowed(t *testing.T) { + t.Parallel() + + binding := &entity.WorkspaceClusterBinding{ + QuotaCPU: "8", + QuotaMemory: "32Gi", + QuotaGPU: "0", + QuotaGPUMem: "0", + } + vllmLikeEstimate := &repository.ResourceEstimate{ + Requests: repository.ResourceVector{ + CPU: resource.MustParse("2"), + Memory: resource.MustParse("8Gi"), + GPU: 1, + GPUMemoryMB: 10000, + }, + } + + result, err := CompareBindingQuota(binding, &repository.ResourceQuotaUsage{}, vllmLikeEstimate, nil) + if !errors.Is(err, ErrQuotaExceeded) { + t.Fatalf("expected GPU request to exceed explicit zero quota, got %v", err) + } + exceeded := map[string]bool{} + for _, item := range result.Exceeded { + exceeded[item.Name] = true + } + for _, name := range []string{"requests.nvidia.com/gpu", "requests.nvidia.com/gpumem"} { + if !exceeded[name] { + t.Fatalf("expected %s to be exceeded, got %#v", name, result.Exceeded) + } + } +} + +func TestBindingQuotaHardKeepsGPUMemoryAsIntegerMB(t *testing.T) { + t.Parallel() + + hard := bindingQuotaHard(&entity.WorkspaceClusterBinding{QuotaGPU: "1", QuotaGPUMem: "10000"}) + gpuMem := hard[corev1.ResourceName("requests.nvidia.com/gpumem")] + if gpuMem.Value() != 10000 { + t.Fatalf("expected gpumem quota to remain integer MB 10000, got %s value=%d", gpuMem.String(), gpuMem.Value()) + } +} + +func TestEstimateRenderedManifestResourcesSumsPodTemplates(t *testing.T) { + t.Parallel() + + manifest := ` +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gpu-worker +spec: + replicas: 3 + template: + spec: + initContainers: + - name: init + image: busybox + resources: + requests: + cpu: 100m + memory: 128Mi + containers: + - name: app + image: busybox + resources: + requests: + cpu: 500m + memory: 1Gi + nvidia.com/gpu: "1" + nvidia.com/gpumem: "10000" + limits: + cpu: "1" + memory: 2Gi + nvidia.com/gpu: "1" + nvidia.com/gpumem: "12000" +--- +apiVersion: v1 +kind: Service +metadata: + name: ignored +` + estimate, err := EstimateRenderedManifestResources(manifest) + if err != nil { + t.Fatalf("EstimateRenderedManifestResources returned error: %v", err) + } + if estimate.Requests.CPU.Cmp(resource.MustParse("1800m")) != 0 { + t.Fatalf("expected requests cpu 1800m, got %s", estimate.Requests.CPU.String()) + } + if estimate.Requests.Memory.Cmp(resource.MustParse("3456Mi")) != 0 { + t.Fatalf("expected requests memory 3456Mi, got %s", estimate.Requests.Memory.String()) + } + if estimate.Requests.GPU != 3 { + t.Fatalf("expected requests gpu 3, got %d", estimate.Requests.GPU) + } + if estimate.Requests.GPUMemoryMB != 30000 { + t.Fatalf("expected requests gpumem 30000, got %d", estimate.Requests.GPUMemoryMB) + } + if estimate.Limits.CPU.Cmp(resource.MustParse("3")) != 0 { + t.Fatalf("expected limits cpu 3, got %s", estimate.Limits.CPU.String()) + } + if estimate.Limits.Memory.Cmp(resource.MustParse("6Gi")) != 0 { + t.Fatalf("expected limits memory 6Gi, got %s", estimate.Limits.Memory.String()) + } + if estimate.Limits.GPU != 3 { + t.Fatalf("expected limits gpu 3, got %d", estimate.Limits.GPU) + } + if estimate.Limits.GPUMemoryMB != 36000 { + t.Fatalf("expected limits gpumem 36000, got %d", estimate.Limits.GPUMemoryMB) + } +} diff --git a/backend/internal/domain/service/quota_quantity.go b/backend/internal/domain/service/quota_quantity.go new file mode 100644 index 0000000..b838633 --- /dev/null +++ b/backend/internal/domain/service/quota_quantity.go @@ -0,0 +1,58 @@ +package service + +import ( + "strconv" + "strings" + + "github.com/ocdp/cluster-service/internal/domain/entity" +) + +func normalizeStandardQuotaQuantity(value string) string { + value = strings.TrimSpace(value) + switch strings.ToLower(value) { + case "unlimited", "none", "no-limit", "nolimit": + return "" + } + upper := strings.ToUpper(value) + switch { + case strings.HasSuffix(upper, "MB"): + return strings.TrimSpace(value[:len(value)-2]) + "M" + case strings.HasSuffix(upper, "GB"): + return strings.TrimSpace(value[:len(value)-2]) + "G" + default: + return value + } +} + +func normalizeGPUMemoryQuota(value string) (string, error) { + value = strings.TrimSpace(value) + if value == "" { + return "", nil + } + upper := strings.ToUpper(value) + multiplier := int64(1) + number := value + switch { + case strings.HasSuffix(upper, "MB"): + number = strings.TrimSpace(value[:len(value)-2]) + case strings.HasSuffix(upper, "M"): + number = strings.TrimSpace(value[:len(value)-1]) + case strings.HasSuffix(upper, "GB"): + number = strings.TrimSpace(value[:len(value)-2]) + multiplier = 1000 + case strings.HasSuffix(upper, "G"): + number = strings.TrimSpace(value[:len(value)-1]) + multiplier = 1000 + case strings.HasSuffix(upper, "GIB"): + number = strings.TrimSpace(value[:len(value)-3]) + multiplier = 1024 + case strings.HasSuffix(upper, "GI"): + number = strings.TrimSpace(value[:len(value)-2]) + multiplier = 1024 + } + parsed, err := strconv.ParseInt(number, 10, 64) + if err != nil || parsed < 0 { + return "", entity.ErrInvalidTenantResourceQuota + } + return strconv.FormatInt(parsed*multiplier, 10), nil +} diff --git a/backend/internal/domain/service/registry_service.go b/backend/internal/domain/service/registry_service.go index 92e7f80..99e66ab 100644 --- a/backend/internal/domain/service/registry_service.go +++ b/backend/internal/domain/service/registry_service.go @@ -5,6 +5,7 @@ import ( "github.com/google/uuid" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" ) // RegistryService Registry 管理领域服务 @@ -26,8 +27,21 @@ func NewRegistryService( // CreateRegistry 创建新 Registry func (s *RegistryService) CreateRegistry(ctx context.Context, registry *entity.Registry) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 生成 ID registry.ID = uuid.New().String() + registry.OwnerID = principal.UserID + registry.WorkspaceID = principal.WorkspaceID + if principal.IsAdmin() && registry.WorkspaceID == "" { + registry.WorkspaceID = entity.DefaultWorkspaceID + } + if !principal.IsAdmin() && registry.Visibility == authz.VisibilityGlobalShared { + return entity.ErrForbidden + } + registry.Visibility = authz.NormalizeVisibility(principal.Role, registry.Visibility) // 验证 if err := registry.Validate(); err != nil { @@ -35,9 +49,11 @@ func (s *RegistryService) CreateRegistry(ctx context.Context, registry *entity.R } // 检查是否已存在 - existingRegistry, _ := s.registryRepo.GetByName(ctx, registry.Name) - if existingRegistry != nil { - return entity.ErrRegistryExists + registries, _ := s.registryRepo.List(ctx) + for _, existingRegistry := range registries { + if existingRegistry.Name == registry.Name && existingRegistry.WorkspaceID == registry.WorkspaceID && existingRegistry.OwnerID == registry.OwnerID { + return entity.ErrRegistryExists + } } return s.registryRepo.Create(ctx, registry) @@ -45,16 +61,41 @@ func (s *RegistryService) CreateRegistry(ctx context.Context, registry *entity.R // GetRegistry 获取 Registry func (s *RegistryService) GetRegistry(ctx context.Context, id string) (*entity.Registry, error) { - return s.registryRepo.GetByID(ctx, id) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + registry, err := s.registryRepo.GetByID(ctx, id) + if err != nil { + return nil, err + } + if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + return nil, entity.ErrRegistryNotFound + } + return registry, nil } // UpdateRegistry 更新 Registry func (s *RegistryService) UpdateRegistry(ctx context.Context, registry *entity.Registry) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查是否存在 - _, err := s.registryRepo.GetByID(ctx, registry.ID) + existing, err := s.registryRepo.GetByID(ctx, registry.ID) if err != nil { return entity.ErrRegistryNotFound } + if !authz.CanWriteResource(principal, existing.WorkspaceID, existing.OwnerID, existing.Visibility) { + return entity.ErrForbidden + } + registry.WorkspaceID = existing.WorkspaceID + registry.OwnerID = existing.OwnerID + if principal.IsAdmin() { + registry.Visibility = authz.NormalizeVisibility(principal.Role, registry.Visibility) + } else { + registry.Visibility = existing.Visibility + } // 验证 if err := registry.Validate(); err != nil { @@ -66,27 +107,47 @@ func (s *RegistryService) UpdateRegistry(ctx context.Context, registry *entity.R // DeleteRegistry 删除 Registry func (s *RegistryService) DeleteRegistry(ctx context.Context, id string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } // 检查是否存在 - _, err := s.registryRepo.GetByID(ctx, id) + registry, err := s.registryRepo.GetByID(ctx, id) if err != nil { return entity.ErrRegistryNotFound } + if !authz.CanWriteResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + return entity.ErrForbidden + } return s.registryRepo.Delete(ctx, id) } // ListRegistries 列出所有 Registries func (s *RegistryService) ListRegistries(ctx context.Context) ([]*entity.Registry, error) { - return s.registryRepo.List(ctx) + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + registries, err := s.registryRepo.List(ctx) + if err != nil { + return nil, err + } + visible := make([]*entity.Registry, 0, len(registries)) + for _, registry := range registries { + if authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) { + visible = append(visible, registry) + } + } + return visible, nil } // CheckHealth 检查 Registry 健康状态 func (s *RegistryService) CheckHealth(ctx context.Context, id string) error { - registry, err := s.registryRepo.GetByID(ctx, id) + registry, err := s.GetRegistry(ctx, id) if err != nil { return entity.ErrRegistryNotFound } return s.ociClient.CheckHealth(ctx, registry) } - diff --git a/backend/internal/domain/service/workspace_service.go b/backend/internal/domain/service/workspace_service.go new file mode 100644 index 0000000..744d1ee --- /dev/null +++ b/backend/internal/domain/service/workspace_service.go @@ -0,0 +1,321 @@ +package service + +import ( + "context" + "sort" + "strings" + "time" + + "github.com/google/uuid" + "github.com/ocdp/cluster-service/internal/domain/entity" + "github.com/ocdp/cluster-service/internal/domain/repository" + "github.com/ocdp/cluster-service/internal/pkg/authz" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +type WorkspaceService struct { + workspaceRepo repository.WorkspaceRepository + bindingRepo repository.WorkspaceClusterBindingRepository + clusterRepo repository.ClusterRepository + tenantClient repository.TenantKubeClient + auditRepo repository.AuditLogRepository +} + +func NewWorkspaceService( + workspaceRepo repository.WorkspaceRepository, + bindingRepo repository.WorkspaceClusterBindingRepository, + clusterRepo repository.ClusterRepository, + tenantClient repository.TenantKubeClient, + auditRepo repository.AuditLogRepository, +) *WorkspaceService { + return &WorkspaceService{ + workspaceRepo: workspaceRepo, + bindingRepo: bindingRepo, + clusterRepo: clusterRepo, + tenantClient: tenantClient, + auditRepo: auditRepo, + } +} + +func (s *WorkspaceService) ListWorkspaces(ctx context.Context) ([]*entity.Workspace, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if principal.IsAdmin() { + return s.workspaceRepo.List(ctx) + } + workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID) + if err != nil { + return nil, err + } + return []*entity.Workspace{workspace}, nil +} + +func (s *WorkspaceService) CreateWorkspace(ctx context.Context, name string) (*entity.Workspace, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return nil, entity.ErrForbidden + } + workspace := entity.NewWorkspace(name, principal.UserID) + workspace.ID = uuid.New().String() + if err := s.workspaceRepo.Create(ctx, workspace); err != nil { + return nil, err + } + s.audit(ctx, principal, "create", "workspace", workspace.ID, workspace.Name, nil) + return workspace, nil +} + +func (s *WorkspaceService) EnsureClusterBinding(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() && workspaceID != principal.WorkspaceID { + return nil, entity.ErrForbidden + } + workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID) + if err != nil { + return nil, err + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if !principal.IsAdmin() && !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } + binding := &entity.WorkspaceClusterBinding{ + ID: uuid.New().String(), + WorkspaceID: workspace.ID, + ClusterID: cluster.ID, + Namespace: workspace.K8sNamespace, + ServiceAccount: workspace.K8sSAName, + QuotaCPU: strings.TrimSpace(workspace.QuotaCPU), + QuotaMemory: strings.TrimSpace(workspace.QuotaMemory), + QuotaGPU: zeroIfEmptyQuota(workspace.QuotaGPU), + QuotaGPUMem: zeroIfEmptyQuota(workspace.QuotaGPUMem), + Status: "active", + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + } + tenantBinding := entity.NewTenantBinding(binding.Namespace) + tenantBinding.ServiceAccountName = binding.ServiceAccount + tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding) + if s.tenantClient != nil { + if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil { + return nil, err + } + } + if err := s.bindingRepo.Upsert(ctx, binding); err != nil { + return nil, err + } + s.audit(ctx, principal, "init", "workspace_cluster_binding", binding.ID, binding.Namespace, map[string]interface{}{"cluster_id": clusterID}) + return binding, nil +} + +func (s *WorkspaceService) IssueKubeconfig(ctx context.Context, workspaceID, clusterID string, ttl time.Duration) (*entity.TenantKubeconfig, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if !principal.IsAdmin() && workspaceID != principal.WorkspaceID { + return nil, entity.ErrForbidden + } + workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID) + if err != nil { + return nil, err + } + if workspace.Status == entity.WorkspaceSuspended { + return nil, entity.ErrWorkspaceSuspended + } + cluster, err := s.clusterRepo.GetByID(ctx, clusterID) + if err != nil { + return nil, entity.ErrClusterNotFound + } + if !principal.IsAdmin() && !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + return nil, entity.ErrClusterNotFound + } + binding, err := s.bindingRepo.Get(ctx, workspaceID, clusterID) + if err != nil { + binding, err = s.EnsureClusterBinding(ctx, workspaceID, clusterID) + if err != nil { + return nil, err + } + } else { + binding.QuotaCPU = strings.TrimSpace(workspace.QuotaCPU) + binding.QuotaMemory = strings.TrimSpace(workspace.QuotaMemory) + binding.QuotaGPU = zeroIfEmptyQuota(workspace.QuotaGPU) + binding.QuotaGPUMem = zeroIfEmptyQuota(workspace.QuotaGPUMem) + binding.UpdatedAt = time.Now() + } + tenantBinding := entity.NewTenantBinding(binding.Namespace) + tenantBinding.ServiceAccountName = binding.ServiceAccount + tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding) + if s.tenantClient != nil { + if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil { + return nil, err + } + } + _ = s.bindingRepo.Upsert(ctx, binding) + kubeconfig, err := s.tenantClient.IssueKubeconfig(ctx, cluster, tenantBinding, ttl) + if err != nil { + return nil, err + } + s.audit(ctx, principal, "issue_kubeconfig", "workspace_cluster_binding", binding.ID, binding.Namespace, map[string]interface{}{"cluster_id": clusterID, "ttl_seconds": int64(entity.TenantTokenTTL(ttl).Seconds())}) + return kubeconfig, nil +} + +func resourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList { + hard := corev1.ResourceList{} + addQuantity := func(name corev1.ResourceName, value string) { + value = normalizeStandardQuotaQuantity(value) + if value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[name] = quantity + } + } + addGPUMemoryQuantity := func(value string) { + value, err := normalizeGPUMemoryQuota(value) + if err != nil || value == "" { + return + } + if quantity, err := resource.ParseQuantity(value); err == nil { + hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity + } + } + if workspace == nil { + return hard + } + addQuantity(corev1.ResourceName("requests.cpu"), workspace.QuotaCPU) + addQuantity(corev1.ResourceName("requests.memory"), workspace.QuotaMemory) + addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), workspace.QuotaGPU) + addGPUMemoryQuantity(workspace.QuotaGPUMem) + return hard +} + +func (s *WorkspaceService) IssueCurrentKubeconfig(ctx context.Context, requestedClusterID string, ttl time.Duration) (*entity.TenantKubeconfig, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + if requestedClusterID != "" { + return s.IssueKubeconfig(ctx, principal.WorkspaceID, requestedClusterID, ttl) + } + workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID) + if err != nil { + return nil, err + } + if workspace.DefaultClusterID != "" { + return s.IssueKubeconfig(ctx, principal.WorkspaceID, workspace.DefaultClusterID, ttl) + } + return s.IssueDefaultKubeconfig(ctx, ttl) +} + +func (s *WorkspaceService) IssueDefaultKubeconfig(ctx context.Context, ttl time.Duration) (*entity.TenantKubeconfig, error) { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return nil, entity.ErrUnauthorized + } + clusters, err := s.clusterRepo.List(ctx) + if err != nil { + return nil, err + } + candidates := make([]*entity.Cluster, 0, len(clusters)) + for _, cluster := range clusters { + if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) { + continue + } + switch cluster.Visibility { + case authz.VisibilityGlobalShared: + candidates = append(candidates, cluster) + case authz.VisibilityWorkspaceShared: + if cluster.WorkspaceID == principal.WorkspaceID { + candidates = append(candidates, cluster) + } + } + } + sort.SliceStable(candidates, func(i, j int) bool { + leftRank := defaultKubeconfigClusterRank(candidates[i]) + rightRank := defaultKubeconfigClusterRank(candidates[j]) + if leftRank != rightRank { + return leftRank < rightRank + } + return candidates[i].Name < candidates[j].Name + }) + var firstIssueErr error + for _, cluster := range candidates { + if kubeconfig, err := s.IssueKubeconfig(ctx, principal.WorkspaceID, cluster.ID, ttl); err == nil { + return kubeconfig, nil + } else if firstIssueErr == nil { + firstIssueErr = err + } + } + if firstIssueErr != nil { + return nil, firstIssueErr + } + return nil, entity.ErrClusterNotFound +} + +func defaultKubeconfigClusterRank(cluster *entity.Cluster) int { + switch cluster.Visibility { + case authz.VisibilityGlobalShared: + return 0 + case authz.VisibilityWorkspaceShared: + return 1 + default: + return 2 + } +} + +func (s *WorkspaceService) SuspendWorkspace(ctx context.Context, workspaceID string) error { + principal, err := authz.RequirePrincipal(ctx) + if err != nil { + return entity.ErrUnauthorized + } + if !principal.IsAdmin() { + return entity.ErrForbidden + } + workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID) + if err != nil { + return err + } + workspace.Status = entity.WorkspaceSuspended + if err := s.workspaceRepo.Update(ctx, workspace); err != nil { + return err + } + clusters, _ := s.clusterRepo.List(ctx) + for _, cluster := range clusters { + binding, err := s.bindingRepo.Get(ctx, workspaceID, cluster.ID) + if err != nil { + continue + } + tenantBinding := entity.NewTenantBinding(binding.Namespace) + tenantBinding.ServiceAccountName = binding.ServiceAccount + _ = s.tenantClient.SuspendTenant(ctx, cluster, tenantBinding) + } + s.audit(ctx, principal, "suspend", "workspace", workspace.ID, workspace.Name, nil) + return nil +} + +func (s *WorkspaceService) audit(ctx context.Context, principal *authz.Principal, action, resourceType, resourceID, resourceName string, details map[string]interface{}) { + if s.auditRepo == nil || principal == nil { + return + } + _ = s.auditRepo.Create(ctx, &entity.AuditLog{ + WorkspaceID: principal.WorkspaceID, + UserID: principal.UserID, + Action: action, + ResourceType: resourceType, + ResourceID: resourceID, + ResourceName: resourceName, + Details: details, + CreatedAt: time.Now(), + }) +} diff --git a/backend/internal/pkg/authz/authz.go b/backend/internal/pkg/authz/authz.go new file mode 100644 index 0000000..b8e2d1e --- /dev/null +++ b/backend/internal/pkg/authz/authz.go @@ -0,0 +1,144 @@ +package authz + +import ( + "context" + "errors" +) + +type contextKey string + +const principalKey contextKey = "principal" + +const ( + RoleAdmin = "admin" + RoleUser = "user" +) + +const ( + VisibilityPrivate = "private" + VisibilityWorkspaceShared = "workspace_shared" + VisibilityGlobalShared = "global_shared" +) + +var ( + ErrUnauthenticated = errors.New("authentication required") + ErrForbidden = errors.New("permission denied") +) + +type Principal struct { + UserID string + Username string + Role string + WorkspaceID string + WorkspaceName string + Namespace string + DefaultClusterID string + QuotaCPU string + QuotaMemory string + QuotaGPU string + QuotaGPUMem string + Permissions []string + PermissionVersion int +} + +func WithPrincipal(ctx context.Context, principal *Principal) context.Context { + return context.WithValue(ctx, principalKey, principal) +} + +func PrincipalFromContext(ctx context.Context) (*Principal, bool) { + principal, ok := ctx.Value(principalKey).(*Principal) + return principal, ok && principal != nil +} + +func RequirePrincipal(ctx context.Context) (*Principal, error) { + principal, ok := PrincipalFromContext(ctx) + if !ok { + return nil, ErrUnauthenticated + } + return principal, nil +} + +func (p *Principal) IsAdmin() bool { + return p != nil && p.Role == RoleAdmin +} + +func CanReadResource(p *Principal, workspaceID, ownerID, visibility string) bool { + if p == nil { + return false + } + if p.IsAdmin() { + return true + } + switch visibility { + case VisibilityGlobalShared: + return true + case VisibilityWorkspaceShared: + return workspaceID != "" && workspaceID == p.WorkspaceID + default: + return ownerID != "" && ownerID == p.UserID + } +} + +func CanWriteResource(p *Principal, workspaceID, ownerID, visibility string) bool { + if p == nil { + return false + } + if p.IsAdmin() { + return true + } + if visibility == VisibilityGlobalShared { + return false + } + return workspaceID != "" && workspaceID == p.WorkspaceID && ownerID != "" && ownerID == p.UserID +} + +func NormalizeVisibility(role, requested string) string { + switch requested { + case VisibilityWorkspaceShared: + if role == RoleAdmin { + return requested + } + return VisibilityPrivate + case VisibilityGlobalShared: + if role == RoleAdmin { + return requested + } + return VisibilityPrivate + case VisibilityPrivate: + return requested + default: + return VisibilityPrivate + } +} + +func PermissionsForRole(role string) []string { + if role == RoleAdmin { + return []string{ + "*", + "home:view", + "workspaces:manage", + "users:manage", + "configuration:clusters:manage", + "configuration:registries:manage", + "artifact:registries:view", + "artifact:instances:manage", + "monitoring:clusters:view", + "clusters:manage:any", + "registries:manage:any", + "instances:manage:any", + "kubeconfig:issue:any", + } + } + return []string{ + "home:view", + "configuration:clusters:manage_own", + "configuration:registries:manage_own", + "artifact:registries:view", + "artifact:instances:manage_own", + "monitoring:clusters:view", + "clusters:manage:own", + "registries:manage:own", + "instances:manage:own", + "kubeconfig:issue:own", + } +} diff --git a/backend/internal/pkg/crypto/crypto_test.go b/backend/internal/pkg/crypto/crypto_test.go index 9b5dd8e..fef1b8f 100644 --- a/backend/internal/pkg/crypto/crypto_test.go +++ b/backend/internal/pkg/crypto/crypto_test.go @@ -12,7 +12,7 @@ func TestAESEncryptor(t *testing.T) { plaintext string }{ {"simple password", "password123"}, - {"harbor password", "BWGDIP@ssw0rd1401#"}, + {"registry password", "registry-password-example"}, {"empty string", ""}, {"long certificate", "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkekNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pP"}, {"unicode", "密码123!@#"}, @@ -121,4 +121,3 @@ func TestEncryptionConsistency(t *testing.T) { t.Error("Decryption should produce original plaintext") } } - diff --git a/backend/internal/pkg/jwt/jwt.go b/backend/internal/pkg/jwt/jwt.go index 5133c73..1ca221c 100644 --- a/backend/internal/pkg/jwt/jwt.go +++ b/backend/internal/pkg/jwt/jwt.go @@ -3,13 +3,13 @@ package jwt import ( "fmt" "time" - + "github.com/golang-jwt/jwt/v5" ) const ( - AccessTokenDuration = 24 * time.Hour // Access Token 有效期 - RefreshTokenDuration = 7 * 24 * time.Hour // Refresh Token 有效期 + AccessTokenDuration = 24 * time.Hour // Access Token 有效期 + RefreshTokenDuration = 7 * 24 * time.Hour // Refresh Token 有效期 ) // JWTManager JWT 管理器 @@ -26,98 +26,133 @@ func NewJWTManager(secretKey string) *JWTManager { // Claims JWT Claims type Claims struct { - UserID string `json:"user_id"` - Username string `json:"username"` + UserID string `json:"user_id"` + Username string `json:"username"` + Role string `json:"role"` + WorkspaceID string `json:"workspace_id"` + TokenType string `json:"token_type"` jwt.RegisteredClaims } // Generate 生成 Access Token 和 Refresh Token -func (m *JWTManager) Generate(userID, username string) (accessToken, refreshToken string, err error) { +func (m *JWTManager) Generate(userID, username, role, workspaceID string) (accessToken, refreshToken string, err error) { // 生成 Access Token accessClaims := &Claims{ - UserID: userID, - Username: username, + UserID: userID, + Username: username, + Role: role, + WorkspaceID: workspaceID, + TokenType: "access", RegisteredClaims: jwt.RegisteredClaims{ ExpiresAt: jwt.NewNumericDate(time.Now().Add(AccessTokenDuration)), IssuedAt: jwt.NewNumericDate(time.Now()), }, } - + accessTokenObj := jwt.NewWithClaims(jwt.SigningMethodHS256, accessClaims) accessToken, err = accessTokenObj.SignedString([]byte(m.secretKey)) if err != nil { return "", "", fmt.Errorf("failed to sign access token: %w", err) } - + // 生成 Refresh Token refreshClaims := &Claims{ - UserID: userID, - Username: username, + UserID: userID, + Username: username, + Role: role, + WorkspaceID: workspaceID, + TokenType: "refresh", RegisteredClaims: jwt.RegisteredClaims{ ExpiresAt: jwt.NewNumericDate(time.Now().Add(RefreshTokenDuration)), IssuedAt: jwt.NewNumericDate(time.Now()), }, } - + refreshTokenObj := jwt.NewWithClaims(jwt.SigningMethodHS256, refreshClaims) refreshToken, err = refreshTokenObj.SignedString([]byte(m.secretKey)) if err != nil { return "", "", fmt.Errorf("failed to sign refresh token: %w", err) } - + return accessToken, refreshToken, nil } // Verify 验证 Token func (m *JWTManager) Verify(tokenString string) (userID, username string, err error) { - userID, username, _, err = m.VerifyWithIssuedAt(tokenString) - return userID, username, err + claims, err := m.VerifyClaims(tokenString, "") + if err != nil { + return "", "", err + } + return claims.UserID, claims.Username, nil +} + +func (m *JWTManager) VerifyAccess(tokenString string) (*Claims, error) { + return m.VerifyClaims(tokenString, "access") +} + +func (m *JWTManager) VerifyRefresh(tokenString string) (*Claims, error) { + return m.VerifyClaims(tokenString, "refresh") } -// VerifyWithIssuedAt 验证 Token 并返回签发时间 func (m *JWTManager) VerifyWithIssuedAt(tokenString string) (userID, username string, issuedAt int64, err error) { + claims, err := m.VerifyClaims(tokenString, "access") + if err != nil { + return "", "", 0, err + } + return claims.UserID, claims.Username, claims.IssuedAt.Unix(), nil +} + +func (m *JWTManager) VerifyClaims(tokenString, expectedType string) (*Claims, error) { token, err := jwt.ParseWithClaims(tokenString, &Claims{}, func(token *jwt.Token) (interface{}, error) { if _, ok := token.Method.(*jwt.SigningMethodHMAC); !ok { return nil, fmt.Errorf("unexpected signing method: %v", token.Header["alg"]) } return []byte(m.secretKey), nil }) - + if err != nil { - return "", "", 0, fmt.Errorf("failed to parse token: %w", err) + return nil, fmt.Errorf("failed to parse token: %w", err) } - - if claims, ok := token.Claims.(*Claims); ok && token.Valid { - return claims.UserID, claims.Username, claims.IssuedAt.Unix(), nil + + claims, ok := token.Claims.(*Claims) + if !ok || !token.Valid { + return nil, fmt.Errorf("invalid token") } - - return "", "", 0, fmt.Errorf("invalid token") + if expectedType != "" && claims.TokenType != expectedType { + return nil, fmt.Errorf("invalid token type") + } + if claims.IssuedAt == nil { + return nil, fmt.Errorf("token missing issued_at") + } + return claims, nil } // Refresh 刷新 Token func (m *JWTManager) Refresh(refreshToken string) (string, error) { // 验证 Refresh Token - userID, username, err := m.Verify(refreshToken) + claims, err := m.VerifyRefresh(refreshToken) if err != nil { return "", fmt.Errorf("invalid refresh token: %w", err) } - + // 生成新的 Access Token accessClaims := &Claims{ - UserID: userID, - Username: username, + UserID: claims.UserID, + Username: claims.Username, + Role: claims.Role, + WorkspaceID: claims.WorkspaceID, + TokenType: "access", RegisteredClaims: jwt.RegisteredClaims{ ExpiresAt: jwt.NewNumericDate(time.Now().Add(AccessTokenDuration)), IssuedAt: jwt.NewNumericDate(time.Now()), }, } - + accessTokenObj := jwt.NewWithClaims(jwt.SigningMethodHS256, accessClaims) newAccessToken, err := accessTokenObj.SignedString([]byte(m.secretKey)) if err != nil { return "", fmt.Errorf("failed to sign new access token: %w", err) } - + return newAccessToken, nil } - diff --git a/backend/scripts/docker-quick-start.sh b/backend/scripts/docker-quick-start.sh index ac761e1..2f0174f 100755 --- a/backend/scripts/docker-quick-start.sh +++ b/backend/scripts/docker-quick-start.sh @@ -197,8 +197,8 @@ start_pgadmin() { echo "" print_info "访问地址: http://localhost:5050" print_info "登录信息:" - echo " 📧 邮箱: admin@ocdp.local" - echo " 🔑 密码: admin" + echo " 📧 邮箱: ${PGADMIN_EMAIL:-admin@ocdp.local}" + echo " 🔑 密码: ${PGADMIN_PASSWORD:-change-me}" echo "" print_info "连接数据库配置:" echo " 📍 Host: postgres" @@ -270,4 +270,3 @@ main() { # 运行主函数 main - diff --git a/backend/scripts/generate-bootstrap-config.sh b/backend/scripts/generate-bootstrap-config.sh index cb9c7b6..569b434 100755 --- a/backend/scripts/generate-bootstrap-config.sh +++ b/backend/scripts/generate-bootstrap-config.sh @@ -23,13 +23,7 @@ TMP_FILE=$(mktemp) cat > "$TMP_FILE" <<'EOF' { "enabled": true, - "users": [ - { - "username": "admin", - "password": "admin123", - "email": "admin@example.com" - } - ], + "users": [], "registries": [], "clusters": [] } @@ -38,6 +32,38 @@ EOF echo "📋 请按提示输入信息..." echo "" +# ===== Admin 用户配置 ===== +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "👤 Admin 用户配置" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +read -p "是否添加初始管理员用户? (y/n) [y]: " ADD_ADMIN +ADD_ADMIN=${ADD_ADMIN:-y} + +if [[ "$ADD_ADMIN" == "y" ]]; then + read -p "Admin 用户名: " ADMIN_USER + read -sp "Admin 密码: " ADMIN_PASS + echo "" + read -p "Admin 邮箱 [${ADMIN_USER}@example.local]: " ADMIN_EMAIL + ADMIN_EMAIL=${ADMIN_EMAIL:-"${ADMIN_USER}@example.local"} + + if [[ -z "$ADMIN_USER" || -z "$ADMIN_PASS" ]]; then + echo "❌ Admin 用户名和密码不能为空" + exit 1 + fi + + TMP_USER=$(jq -n \ + --arg username "$ADMIN_USER" \ + --arg password "$ADMIN_PASS" \ + --arg email "$ADMIN_EMAIL" \ + '{username: $username, password: $password, email: $email}') + + jq ".users += [$TMP_USER]" "$TMP_FILE" > "${TMP_FILE}.tmp" && mv "${TMP_FILE}.tmp" "$TMP_FILE" + echo "✅ Admin 用户 '$ADMIN_USER' 已添加" +fi + +echo "" + # ===== Registries 配置 ===== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "📦 Registry 配置" @@ -47,20 +73,23 @@ read -p "是否添加 Registry? (y/n) [y]: " ADD_REGISTRY ADD_REGISTRY=${ADD_REGISTRY:-y} if [[ "$ADD_REGISTRY" == "y" ]]; then - read -p "Registry 名称 [harbor-bwgdi]: " REGISTRY_NAME - REGISTRY_NAME=${REGISTRY_NAME:-harbor-bwgdi} + read -p "Registry 名称 [harbor]: " REGISTRY_NAME + REGISTRY_NAME=${REGISTRY_NAME:-harbor} - read -p "Registry URL [https://harbor.bwgdi.com]: " REGISTRY_URL - REGISTRY_URL=${REGISTRY_URL:-https://harbor.bwgdi.com} + read -p "Registry URL: " REGISTRY_URL - read -p "Registry 描述 [BWGDI Harbor Registry]: " REGISTRY_DESC - REGISTRY_DESC=${REGISTRY_DESC:-"BWGDI Harbor Registry"} + read -p "Registry 描述 [Harbor Registry]: " REGISTRY_DESC + REGISTRY_DESC=${REGISTRY_DESC:-"Harbor Registry"} - read -p "Registry 用户名 [admin]: " REGISTRY_USER - REGISTRY_USER=${REGISTRY_USER:-admin} + read -p "Registry 用户名(推荐 Harbor robot 账号): " REGISTRY_USER read -sp "Registry 密码: " REGISTRY_PASS echo "" + + if [[ -z "$REGISTRY_URL" ]]; then + echo "❌ Registry URL 不能为空" + exit 1 + fi read -p "是否跳过 SSL 验证? (y/n) [n]: " REGISTRY_INSECURE REGISTRY_INSECURE=${REGISTRY_INSECURE:-n} @@ -72,17 +101,14 @@ if [[ "$ADD_REGISTRY" == "y" ]]; then fi # 添加 Registry 到配置 - TMP_REGISTRY=$(cat < "${TMP_FILE}.tmp" && mv "${TMP_FILE}.tmp" "$TMP_FILE" echo "✅ Registry '$REGISTRY_NAME' 已添加" @@ -232,4 +258,3 @@ echo " curl http://localhost:8080/api/v1/clusters" echo "" echo "✨ 完成!" - diff --git a/backend/scripts/quick-start-production.sh b/backend/scripts/quick-start-production.sh index b73274e..a568177 100755 --- a/backend/scripts/quick-start-production.sh +++ b/backend/scripts/quick-start-production.sh @@ -75,11 +75,10 @@ echo " - Health: http://localhost:8080/health" echo "" echo "📍 数据库管理:" echo " - pgAdmin: http://localhost:5050" -echo " Email: admin@ocdp.local" -echo " Password: admin" +echo " Email: ${PGADMIN_EMAIL:-admin@ocdp.local}" +echo " Password: ${PGADMIN_PASSWORD:-change-me}" echo "" echo "✨ 按 Ctrl+C 停止服务" echo "" ./bin/ocdp-backend - diff --git a/backend/scripts/test-all-modes.sh b/backend/scripts/test-all-modes.sh index 0366159..79d4a96 100755 --- a/backend/scripts/test-all-modes.sh +++ b/backend/scripts/test-all-modes.sh @@ -87,9 +87,11 @@ test_api() { log_info "测试 API..." # 测试注册 + local test_username="testuser$RANDOM" + local test_password="test123" register_response=$(curl -s -X POST http://localhost:8080/api/v1/auth/register \ -H "Content-Type: application/json" \ - -d '{"username":"testuser'"$RANDOM"'","password":"test123","email":"test@example.com"}') + -d '{"username":"'"$test_username"'","password":"'"$test_password"'","email":"test@example.com"}') if echo "$register_response" | grep -q "id"; then log_success "$mode 模式 API 注册测试通过" @@ -100,7 +102,7 @@ test_api() { # 测试登录 login_response=$(curl -s -X POST http://localhost:8080/api/v1/auth/login \ -H "Content-Type: application/json" \ - -d '{"username":"admin","password":"admin123"}') + -d '{"username":"'"$test_username"'","password":"'"$test_password"'"}') if echo "$login_response" | grep -q "accessToken"; then log_success "$mode 模式 API 登录测试通过" @@ -392,4 +394,3 @@ main() { # 执行主函数 main - diff --git a/database.md b/database.md new file mode 100644 index 0000000..e2291e0 --- /dev/null +++ b/database.md @@ -0,0 +1,598 @@ +# OCDP 数据库结构说明 + +## 概述 + +OCDP (Open Container Deployment Platform) 是一个多租户容器部署平台,支持: +- 多 Workspace 隔离 +- RBAC 权限控制 (Admin / User) +- Kubernetes 集群管理 +- OCI Registry 集成 (Harbor) +- Helm Chart 部署 +- Values 模板版本管理 +- 资源配额控制 +- 审计日志 + +## 数据库配置 + +```yaml +# PostgreSQL 连接信息 +Host: localhost +Port: 5430 (docker) / 5432 (local) +Database: ocdp +User: ocdp +Password: ocdp_password +``` + +--- + +## 表结构 + +### 1. users - 用户表 + +存储用户账户信息,支持多租户和角色管理。 + +```sql +CREATE TABLE users ( + id VARCHAR(36) PRIMARY KEY, + username VARCHAR(255) NOT NULL UNIQUE, + password_hash TEXT NOT NULL, + email VARCHAR(255) NOT NULL, + role VARCHAR(20) NOT NULL DEFAULT 'user', -- 'admin' | 'user' + workspace_id VARCHAR(36), -- 所属工作空间,admin 为 NULL 表示全局 + is_active BOOLEAN NOT NULL DEFAULT TRUE, -- 账户是否激活 + must_change_password BOOLEAN NOT NULL DEFAULT FALSE, -- 首次登录必须修改密码 + revoked_after TIMESTAMP NOT NULL DEFAULT '1970-01-01 00:00:00', -- 全局 Token 撤销时间 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | 550e8400-e29b-41d4-a716-446655440000 | +| username | VARCHAR(255) | 用户名,唯一 | admin | +| password_hash | TEXT | bcrypt 密码哈希 | $2a$10$... | +| email | VARCHAR(255) | 邮箱 | admin@ocdp.local | +| role | VARCHAR(20) | 角色:admin/user | admin | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| is_active | BOOLEAN | 账户是否激活 | true | +| must_change_password | BOOLEAN | 首次登录必须修改密码 | false | +| revoked_after | TIMESTAMP | Token 撤销时间(修改密码后自动撤销旧 Token) | 2024-01-01 10:00:00 | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**索引**: +- `idx_users_username` - 用户名查询 +- `idx_users_role` - 角色筛选 +- `idx_users_workspace_id` - 工作空间筛选 +- `idx_users_is_active` - 激活状态筛选 + +**角色说明**: +- `admin`: 管理员,可管理所有 Workspace 和资源,workspace_id 为 NULL +- `user`: 普通用户,仅可访问自己 Workspace 内的资源 + +--- + +### 2. workspaces - 工作空间表 + +租户/团队隔离单元。 + +```sql +CREATE TABLE workspaces ( + id VARCHAR(36) PRIMARY KEY, + name VARCHAR(255) NOT NULL UNIQUE, + description TEXT, + created_by VARCHAR(36), + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | workspace-uuid | +| name | VARCHAR(255) | 工作空间名称,唯一 | team-alpha | +| description | TEXT | 描述 | Alpha 团队工作空间 | +| created_by | VARCHAR(36) | 创建者用户 ID | user-uuid | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**索引**: +- `idx_workspaces_name` - 名称查询 + +--- + +### 3. workspace_quotas - 工作空间配额表 + +每个 Workspace 的资源配额限制。 + +```sql +CREATE TABLE workspace_quotas ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36) NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, + resource_type VARCHAR(50) NOT NULL, -- 'cpu' | 'gpu' | 'gpu_memory' + hard_limit DECIMAL(10,2) NOT NULL, -- 硬限制(0 表示无限制) + soft_limit DECIMAL(10,2) NOT NULL, -- 软限制(警告阈值) + used DECIMAL(10,2) NOT NULL DEFAULT 0, -- 当前使用量 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(workspace_id, resource_type) +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | quota-uuid | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| resource_type | VARCHAR(50) | 资源类型:cpu/gpu/gpu_memory | cpu | +| hard_limit | DECIMAL(10,2) | 硬限制(0=无限制) | 10.00 | +| soft_limit | DECIMAL(10,2) | 软限制(警告阈值) | 8.00 | +| used | DECIMAL(10,2) | 当前使用量 | 5.00 | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**配额检查逻辑**: +1. 部署实例前检查 `used + new_request <= hard_limit` +2. 超过硬限制返回 403 Forbidden +3. 超过软限制发送警告通知 +4. 实例删除后释放配额 + +--- + +### 4. clusters - Kubernetes 集群表 + +管理 Kubernetes 集群连接信息。 + +```sql +CREATE TABLE clusters ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), -- 所属工作空间,NULL 表示全局共享 + owner_id VARCHAR(36), -- 创建者用户 ID + name VARCHAR(255) NOT NULL UNIQUE, + host TEXT NOT NULL, -- Kubernetes API Server URL + ca_data TEXT, -- CA 证书(Base64 编码) + cert_data TEXT, -- 客户端证书(Base64 编码) + key_data TEXT, -- 客户端密钥(Base64 编码) + token TEXT, -- Bearer Token(与证书认证二选一) + description TEXT, + isolation_mode VARCHAR(20) NOT NULL DEFAULT 'namespace', -- 'namespace' | 'cluster' + default_namespace VARCHAR(255), -- 默认 namespace 前缀 + is_shared BOOLEAN NOT NULL DEFAULT FALSE, -- 是否为共享集群 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | cluster-uuid | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| owner_id | VARCHAR(36) | 创建者用户 ID | user-uuid | +| name | VARCHAR(255) | 集群名称,唯一 | prod-k8s | +| host | VARCHAR(255) | Kubernetes API URL | https://k8s.example.com:6443 | +| ca_data | TEXT | CA 证书 Base64 | LS0tLS1... | +| cert_data | TEXT | 客户端证书 Base64 | LS0tLS1... | +| key_data | TEXT | 客户端密钥 Base64 | LS0tLS1... | +| token | TEXT | Bearer Token | eyJhbGci... | +| description | TEXT | 描述 | 生产环境集群 | +| isolation_mode | VARCHAR(20) | 隔离模式:namespace/cluster | namespace | +| default_namespace | VARCHAR(255) | 默认 namespace 前缀 | team-alpha | +| is_shared | BOOLEAN | 是否共享(admin 创建供多 Workspace 使用) | false | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**隔离模式说明**: +- `namespace`: 共享集群模式,多个 Workspace 使用不同 namespace + - 部署时自动分配:`{default_namespace}-{instance_name}` +- `cluster`: 私有集群模式,每个 Workspace 独立集群或独立凭证 + +**认证方式**: +1. 证书认证:`ca_data` + `cert_data` + `key_data` +2. Token 认证:`token` + +--- + +### 5. registries - OCI Registry 表 + +管理 Docker/OCI 镜像仓库(支持 Harbor)。 + +```sql +CREATE TABLE registries ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), -- 所属工作空间,NULL 表示全局共享 + owner_id VARCHAR(36), -- 创建者用户 ID + name VARCHAR(255) NOT NULL UNIQUE, + url TEXT NOT NULL, -- Registry URL + description TEXT, + username VARCHAR(255), -- 认证用户名 + password TEXT, -- 认证密码(加密存储) + insecure BOOLEAN DEFAULT FALSE, -- 是否跳过 TLS 验证 + is_shared BOOLEAN DEFAULT FALSE, -- 是否为共享 Registry + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | registry-uuid | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| owner_id | VARCHAR(36) | 创建者用户 ID | user-uuid | +| name | VARCHAR(255) | Registry 名称,唯一 | harbor-prod | +| url | TEXT | Registry URL | https://harbor.example.com | +| description | TEXT | 描述 | 生产环境 Harbor | +| username | VARCHAR(255) | 认证用户名 | admin | +| password | TEXT | 认证密码(加密) | encrypted... | +| insecure | BOOLEAN | 跳过 TLS 验证 | false | +| is_shared | BOOLEAN | 是否共享 | false | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +--- + +### 6. instances - Helm 实例表 + +部署的 Helm Release 管理。 + +```sql +CREATE TABLE instances ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), -- 所属工作空间 + owner_id VARCHAR(36), -- 创建者用户 ID + cluster_id VARCHAR(36) NOT NULL, + registry_id VARCHAR(36) NOT NULL, + chart_reference_id VARCHAR(36), -- 引用的 Chart 引用 + values_template_id VARCHAR(36), -- 使用的 Values 模板 + + name VARCHAR(255) NOT NULL, -- Helm Release 名称 + namespace VARCHAR(255) NOT NULL, -- Kubernetes 命名空间 + repository TEXT NOT NULL, -- OCI Repository (e.g., charts/app) + chart VARCHAR(255) NOT NULL, -- Chart 名称 + version VARCHAR(255) NOT NULL, -- Chart 版本 + description TEXT, + values JSONB, -- Helm Values (JSON) + values_yaml TEXT, -- Helm Values (YAML) + user_override_yaml TEXT, -- 用户额外覆盖配置 + + status VARCHAR(50) NOT NULL, -- 实例状态 + status_reason TEXT, -- 状态说明 + last_operation VARCHAR(50), -- 最后操作类型 + last_error TEXT, -- 最近错误 + revision INTEGER NOT NULL DEFAULT 1, -- Helm Release Revision + + cpu_requested DECIMAL(10,2) NOT NULL DEFAULT 0, -- CPU 请求量 (cores) + memory_requested VARCHAR(50) NOT NULL DEFAULT '0Mi', -- 内存请求量 + gpu_requested DECIMAL(10,2) NOT NULL DEFAULT 0, -- GPU 请求量 (cards) + gpu_memory_requested VARCHAR(50) NOT NULL DEFAULT '0Mi', -- GPU 内存请求量 + + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT fk_cluster FOREIGN KEY (cluster_id) REFERENCES clusters(id) ON DELETE CASCADE, + CONSTRAINT fk_registry FOREIGN KEY (registry_id) REFERENCES registries(id) ON DELETE CASCADE, + UNIQUE (cluster_id, name, namespace) +); +``` + +| 字段 | 类型 | 说明 | 示例 | +|------|------|------|------| +| id | VARCHAR(36) | 主键 UUID | instance-uuid | +| workspace_id | VARCHAR(36) | 所属工作空间 ID | workspace-uuid | +| owner_id | VARCHAR(36) | 创建者用户 ID | user-uuid | +| cluster_id | VARCHAR(36) | 所属集群 ID | cluster-uuid | +| registry_id | VARCHAR(36) | 所属 Registry ID | registry-uuid | +| chart_reference_id | VARCHAR(36) | Chart 引用 ID | chart-ref-uuid | +| values_template_id | VARCHAR(36) | Values 模板 ID | template-uuid | +| name | VARCHAR(255) | Release 名称(RFC 1123) | my-app | +| namespace | VARCHAR(255) | Kubernetes 命名空间 | team-alpha-my-app | +| repository | TEXT | OCI Repository | harbor.example.com/charts/nginx | +| chart | VARCHAR(255) | Chart 名称 | nginx | +| version | VARCHAR(255) | Chart 版本 | 1.0.0 | +| description | TEXT | 描述 | Nginx 应用 | +| values | JSONB | Values JSON | {"replicas": 2} | +| values_yaml | TEXT | Values YAML | replicas: 2 | +| user_override_yaml | TEXT | 用户覆盖配置 | replicas: 3 | +| status | VARCHAR(50) | 状态 | deployed | +| status_reason | TEXT | 状态说明 | Install complete | +| last_operation | VARCHAR(50) | 最后操作 | install | +| last_error | TEXT | 错误信息 | - | +| revision | INTEGER | Helm Revision | 1 | +| cpu_requested | DECIMAL(10,2) | CPU 请求 | 2.00 | +| memory_requested | VARCHAR(50) | 内存请求 | 1Gi | +| gpu_requested | DECIMAL(10,2) | GPU 请求 | 0 | +| gpu_memory_requested | VARCHAR(50) | GPU 内存 | 0Mi | +| created_at | TIMESTAMP | 创建时间 | 2024-01-01 10:00:00 | +| updated_at | TIMESTAMP | 更新时间 | 2024-01-01 10:00:00 | + +**状态说明**: +| 状态 | 说明 | +|------|------| +| deployed | 部署成功 | +| failed | 部署失败 | +| pending-install | 安装中 | +| pending-upgrade | 升级中 | +| pending-rollback | 回滚中 | +| pending-delete | 删除中 | +| uninstalled | 已卸载 | +| superseded | 已被取代 | +| unknown | 未知 | + +--- + +### 7. storage_backends - 存储后端表 + +NFS/PV/HostPath 存储配置。 + +```sql +CREATE TABLE storage_backends ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + owner_id VARCHAR(36), + name VARCHAR(255) NOT NULL, + type VARCHAR(50) NOT NULL, -- 'nfs' | 'pv' | 'hostPath' + config JSONB NOT NULL, -- 存储配置 + description TEXT, + is_default BOOLEAN NOT NULL DEFAULT FALSE, -- 是否默认存储 + is_shared BOOLEAN NOT NULL DEFAULT FALSE, -- 是否共享 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(workspace_id, name) +); +``` + +**Config 结构**: +```json +// NFS +{"nfs": {"server": "192.168.1.100", "path": "/data"}} + +// PV +{"pv": {"storageClassName": "nfs", "capacity": "10Gi", "accessModes": ["ReadWriteMany"]}} + +// HostPath +{"hostPath": {"path": "/mnt/data"}} +``` + +--- + +### 8. chart_references - Chart 引用表 + +管理可用的 Helm Chart 引用。 + +```sql +CREATE TABLE chart_references ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + registry_id VARCHAR(36), + repository VARCHAR(500) NOT NULL, -- OCI repository path + chart_name VARCHAR(255) NOT NULL, + description TEXT, + is_enabled BOOLEAN NOT NULL DEFAULT TRUE, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(workspace_id, registry_id, repository) +); +``` + +--- + +### 9. values_templates - Values 模板表 + +Helm Values 模板,支持版本管理。 + +```sql +CREATE TABLE values_templates ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + owner_id VARCHAR(36), + chart_reference_id VARCHAR(36), + name VARCHAR(255) NOT NULL, + description TEXT, + values_yaml TEXT NOT NULL, + version INTEGER NOT NULL DEFAULT 1, -- 模板版本号 + is_default BOOLEAN NOT NULL DEFAULT FALSE, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(workspace_id, chart_reference_id, name) +); +``` + +**版本管理**: +- 每次更新创建新版本(version + 1) +- 支持回滚到历史版本 + +--- + +### 10. user_config_overrides - 用户配置覆盖表 + +用户个人配置覆盖。 + +```sql +CREATE TABLE user_config_overrides ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + user_id VARCHAR(36), + target_type VARCHAR(50) NOT NULL, -- 'storage' | 'template' | 'global' + target_id VARCHAR(36), + config JSONB NOT NULL, -- 覆盖配置 + priority INTEGER NOT NULL DEFAULT 0, -- 优先级 + is_active BOOLEAN NOT NULL DEFAULT TRUE, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +--- + +### 11. audit_logs - 审计日志表 + +记录所有操作行为。 + +```sql +CREATE TABLE audit_logs ( + id VARCHAR(36) PRIMARY KEY, + workspace_id VARCHAR(36), + user_id VARCHAR(36), + action VARCHAR(100) NOT NULL, -- 'create' | 'update' | 'delete' | 'deploy' | 'scale' + resource_type VARCHAR(50) NOT NULL, -- 'cluster' | 'registry' | 'instance' | ... + resource_id VARCHAR(36), + resource_name VARCHAR(255), + details JSONB, + ip_address VARCHAR(50), + user_agent TEXT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +--- + +### 12. schema_migrations - 迁移版本表 + +数据库版本记录。 + +```sql +CREATE TABLE schema_migrations ( + version VARCHAR(50) PRIMARY KEY, + applied_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +``` + +--- + +## ER 关系图 + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ workspaces │ +│ (id, name, description, created_by, created_at, updated_at) │ +└────────────────────────────────────┬────────────────────────────────────┘ + │ 1:N + ┌────────────────────────────┼────────────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌───────────────────┐ ┌───────────────────┐ ┌───────────────────┐ +│ workspace_quotas│ │ clusters │ │ registries │ +│ (workspace_id, │ │ (workspace_id, │ │ (workspace_id, │ +│ resource_type, │ │ owner_id, name, │ │ owner_id, name, │ +│ hard_limit, │ │ host, is_shared) │ │ url, is_shared) │ +│ soft_limit, used)│ └─────────┬─────────┘ └────────┬─────────┘ +└───────────────────┘ │ │ + │ │ + ┌───────────────────────────┼───────────────────────┘ + │ │ + ▼ ▼ +┌───────────────────┐ ┌───────────────────┐ +│ instances │ │ storage_backends│ +│ (workspace_id, │ │ (workspace_id, │ +│ owner_id, │ │ owner_id, name, │ +│ cluster_id, │ │ type, config) │ +│ registry_id, │ └───────────────────┘ +│ values_template) │ +└───────────────────┘ + +┌─────────────────────────────────────────────────────────────────────────┐ +│ users │ +│ (id, username, password_hash, email, role, workspace_id, is_active) │ +└────────────────────────────────────┬────────────────────────────────────┘ + │ + ┌────────────────────────────┼────────────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌───────────────────┐ ┌───────────────────┐ ┌───────────────────┐ +│ chart_references│ │ values_templates │ │ audit_logs │ +│ (workspace_id, │ │ (workspace_id, │ │ (user_id, action,│ +│ registry_id, │ │ owner_id, │ │ resource_type) │ +│ repository) │ │ chart_ref_id) │ └───────────────────┘ +└───────────────────┘ └───────────────────┘ +``` + +--- + +## 资源可见性规则 + +| 用户角色 | 可见范围 | +|---------|---------| +| Admin | 所有 Workspace 的所有资源(workspace_id 为 NULL 或有值都能看到) | +| User | 仅自己 Workspace 的资源 | +| 共享资源 | `is_shared=TRUE` 时,同 Workspace 内可见 | + +--- + +## 常用 SQL 操作 + +### 查询用户及其 Workspace +```sql +SELECT u.id, u.username, u.role, w.name as workspace_name +FROM users u +LEFT JOIN workspaces w ON u.workspace_id = w.id +WHERE u.is_active = TRUE; +``` + +### 查询 Workspace 配额使用情况 +```sql +SELECT w.name as workspace, + q.resource_type, + q.hard_limit, + q.soft_limit, + q.used, + CASE WHEN q.hard_limit > 0 THEN ROUND(q.used / q.hard_limit * 100, 2) ELSE 0 END as usage_percent +FROM workspace_quotas q +JOIN workspaces w ON q.workspace_id = w.id; +``` + +### 查询用户可用的集群 +```sql +-- Admin: 所有集群 +SELECT * FROM clusters; + +-- User: 自己 Workspace 的集群 + 共享集群 +SELECT * FROM clusters +WHERE workspace_id = 'user-workspace-id' + OR is_shared = TRUE; +``` + +### 查询实例状态统计 +```sql +SELECT status, COUNT(*) as count +FROM instances +WHERE workspace_id = 'workspace-id' +GROUP BY status; +``` + +### 查询审计日志 +```sql +SELECT a.created_at, u.username, a.action, a.resource_type, a.resource_name +FROM audit_logs a +JOIN users u ON a.user_id = u.id +WHERE a.workspace_id = 'workspace-id' +ORDER BY a.created_at DESC +LIMIT 50; +``` + +--- + +## 迁移历史 + +| 版本 | 说明 | 日期 | +|------|------|------| +| v1.0.0 | 初始版本(单租户) | 2024-01 | +| v2.0.0-multi-tenant | 多租户迁移:添加 workspaces, quotas, 扩展 users/clusters/registries/instances | 2025-04 | + +--- + +## 初始数据 + +### 创建 Admin 用户 +```sql +-- 默认密码: admin123 (bcrypt hash 需由应用设置) +INSERT INTO users (id, username, password_hash, email, role, workspace_id, is_active, must_change_password) +VALUES ( + '00000000-0000-0000-0000-000000000001', + 'admin', + '$2a$10$placeholder', -- 由应用初始化时设置 + 'admin@ocdp.local', + 'admin', + NULL, -- admin 的 workspace_id 为 NULL,表示全局 + TRUE, + TRUE -- 首次登录必须修改密码 +); +``` \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index b7ce80d..db85436 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,19 +1,83 @@ # ================================================== -# OCDP Docker Compose (frontend + gateway layer) +# OCDP Docker Compose (complete local stack) # ================================================== # 使用方式: -# docker compose -f docker-compose.yml \ -# -f ./backend/docker-compose.yml \ -# --profile backend up --build -d +# docker compose up --build # # 说明: -# - 本文件只负责前端构建和 Nginx。 -# - Backend / PostgreSQL / pgAdmin 由 backend/docker-compose.yml 提供。 -# - Nginx 统一监听 80/443(默认映射 WEB_HTTP_PORT=80、WEB_HTTPS_PORT=443), +# - 本文件是本地部署主入口,包含 PostgreSQL、Backend、前端构建和 Nginx。 +# - 默认使用高位宿主端口,避免和本机其他项目冲突。 +# - Nginx 统一监听容器内 80/443(默认映射 WEB_HTTP_PORT=18080、WEB_HTTPS_PORT=18443), # 根据路径转发:/api/* → backend,其他路径 → 前端静态文件。 # ================================================== services: + # -------------------------------------------------- + # PostgreSQL 数据库 + # -------------------------------------------------- + postgres: + image: postgres:17-alpine + container_name: ocdp-postgres + restart: unless-stopped + environment: + POSTGRES_DB: ${POSTGRES_DB:-ocdp} + POSTGRES_USER: ${POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} + POSTGRES_INITDB_ARGS: "--encoding=UTF8 --lc-collate=C --lc-ctype=C" + ports: + - "${POSTGRES_PORT:-15432}:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + - ./backend/scripts/init-db.sql:/docker-entrypoint-initdb.d/01-init.sql:ro + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres} -d ${POSTGRES_DB:-ocdp}"] + interval: 10s + timeout: 5s + retries: 30 + start_period: 60s + networks: + - ocdp-network + + # -------------------------------------------------- + # Backend API + # -------------------------------------------------- + backend: + build: + context: ./backend + dockerfile: Dockerfile + args: + GOPROXY: ${GOPROXY:-https://goproxy.cn,direct} + GOSUMDB: ${GOSUMDB:-sum.golang.google.cn} + image: ocdp-backend:latest + container_name: ocdp-backend + restart: unless-stopped + env_file: + - path: ./.env + required: false + format: raw + environment: + ADAPTER_MODE: ${ADAPTER_MODE:-production} + PORT: 8080 + JWT_SECRET: ${JWT_SECRET:-change-me-in-production} + ENCRYPTION_KEY: ${ENCRYPTION_KEY:-change-me-32-bytes-long-key-here} + DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432/${POSTGRES_DB:-ocdp}?sslmode=disable + ports: + - "${BACKEND_PORT:-18081}:8080" + volumes: + - ./config:/app/config:ro + - ./data:/app/data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + depends_on: + postgres: + condition: service_healthy + networks: + - ocdp-network + # -------------------------------------------------- # 构建前端静态资源 (一次性 Job) # -------------------------------------------------- @@ -34,6 +98,7 @@ services: sh -c " set -eux; npm ci; + rm -rf node_modules/.tmp; npm run build; mkdir -p /build; rm -rf /build/*; @@ -54,18 +119,21 @@ services: nginx: image: nginx:1.27-alpine container_name: ocdp-nginx + restart: unless-stopped depends_on: frontend-build: condition: service_completed_successfully + backend: + condition: service_healthy ports: - - "${WEB_HTTP_PORT:-80}:80" - - "${WEB_HTTPS_PORT:-443}:443" + - "${WEB_HTTP_PORT:-18080}:80" + - "${WEB_HTTPS_PORT:-18443}:443" volumes: - frontend_dist:/usr/share/nginx/html:ro - ./infra/nginx/default.conf:/etc/nginx/conf.d/default.conf:ro - ./infra/nginx/certs:/etc/nginx/certs:ro healthcheck: - test: ["CMD-SHELL", "wget -qO- http://localhost/healthz || exit 1"] + test: ["CMD-SHELL", "wget -qO- http://127.0.0.1/healthz || exit 1"] interval: 30s timeout: 5s retries: 5 @@ -84,6 +152,8 @@ networks: # Volumes # ================================================== volumes: + postgres_data: + name: ocdp-postgres-data frontend_dist: driver: local frontend_node_modules: diff --git a/docs/UNRESOLVED-BUGS.md b/docs/UNRESOLVED-BUGS.md new file mode 100644 index 0000000..affbe68 --- /dev/null +++ b/docs/UNRESOLVED-BUGS.md @@ -0,0 +1,74 @@ +# OCDP 未修复问题清单 + +**最后更新:** 2026-05-14 (Round 3 回归测试) +**测试覆盖:** 3 轮测试 (Round 1: v1 基线, Round 2: 配额+YAML, Round 3: 回归+新功能) + +--- + +## 已知未修复 Bug (15 个) + +### P1 — 高优先级 (1) + +| # | 标题 | 严重度 | 描述 | Round | +|----|------|--------|------|-------| +| 1 | Detail API 返回 replicas: 0 | **P1** | `GET /instances/{id}` 始终返回 `replicas: 0`,与 List API 不一致 | R3 NEW | + +### P2 — 中优先级 (8) + +| # | 标题 | 严重度 | 描述 | Round | +|----|------|--------|------|-------| +| 2 | List API 移除 values 字段 | **P2** | List instances 不再返回 `values`,仅在详情API返回。可能是性能优化,但属于 API 行为变更 | R3 NEW | +| 3 | API 层无配额预检查 | **P2** | 后端接受所有部署请求(返回200),不验证是否超配额。K8s ResourceQuota 在 pod 级阻止,但 Helm release 仍创建 | R2 | +| 4 | Values 冲突时无警告 | **P2** | 同时提供 `values` JSON 和 `valuesYaml` 时,JSON 静默覆盖 YAML,无任何警告 | R2 | +| 5 | Tags 端点缺失 | **P2** | `GET /registries/{id}/repositories/{repo}/tags` 返回 404 | R1 | +| 6 | Metrics API 缺失 | **P2** | `GET /monitoring/clusters/{id}/metrics` 返回 404 | R1 | +| 7 | Stats API 缺失 | **P2** | `GET /clusters/{id}/stats` 返回 404 | R1 | +| 8 | Kubeconfig API 缺失 | **P2** | `GET /clusters/{id}/kubeconfig` 返回 404 | R1 | +| 9 | Namespace 静默覆盖 + HTTP 200 | **P2** | 用户部署到他人的 namespace 时,API 返回 201 但 namespace 被静默改为自己的。应返回 403 | R1 | + +### P3 — 低优先级 (6) + +| # | 标题 | 严重度 | 描述 | Round | +|----|------|--------|------|-------| +| 10 | 用户枚举漏洞 | **P3** | 不存在用户 "user not found" vs 存在用户 "invalid password",错误消息不同 | R1 | +| 11 | 无登录速率限制 | **P3** | 10 次快速失败全部返回 401,无 429 或锁定 | R1 | +| 12 | Nginx 版本泄露 | **P3** | `Server: nginx/1.27.5` 响应头暴露精确版本 | R1 | +| 13 | CORS: * | **P3** | `Access-Control-Allow-Origin: *` 允许任意跨域 | R1 | +| 14 | 缺少安全响应头 | **P3** | 无 HSTS, X-Frame-Options, CSP, X-Content-Type-Options | R1 | +| 15 | `/health` 端点返回 SPA HTML | **P3** | 健康检查返回 index.html 而非 `{"status":"ok"}` | R1 | + +--- + +## 已修复 (Round 3 验证通过) + +| 原 Bug ID | 描述 | 修复后行为 | +|-----------|------|-----------| +| BUG-001 | Launch 按钮无反应 (P0) | ✅ 部署端到端正常 | +| BUG-002 | SPA 旧路由空白页 (P0) | ✅ 所有旧路由返回 SPA | +| BUG-003 | DELETE 返回 404 (P1) | ✅ 返回 HTTP 204 | +| BUG-004 | DELETE 空响应体 (P1) | ✅ HTTP 204 No Content | +| — | InstanceCard 无 scaling UI | ✅ +/- 按钮 + K8s API | +| — | ModifyModal values 为空 | ✅ Full Helm values + diff | +| — | Per-card Refresh button | ✅ 移除,改为 page-level | + +--- + +## 修复优先级排序 + +``` +立即修复 (P1): + 1. Detail API replicas=0 → 从 K8s live state 同步 + +短期修复 (P2): + 2. API 层配额预检查 → POST instances 时验证 + 3. Values 冲突警告 → 两者同时提供时返回 warning + 4. Namespace 拒绝而非覆盖 → 返回 403 + 5. 缺失端点实现 (tags/stats/metrics/kubeconfig) + +安全加固 (P3): + 6. 登录错误消息统一 → "Invalid username or password" + 7. 速率限制 → max 5/min per IP + 8. Nginx: server_tokens off + 安全头 + 9. CORS 收紧 → 具体域名 + 10. /health → JSON 响应 +``` diff --git a/docs/regression-full-report.md b/docs/regression-full-report.md new file mode 100644 index 0000000..d7e4484 --- /dev/null +++ b/docs/regression-full-report.md @@ -0,0 +1,209 @@ +# OCDP 回归测试完整报告 (Round 3) + +**测试日期:** 2026-05-14 +**环境:** http://10.6.80.114:18080 +**集群:** k3s v1.28.0 (dbf824f1-9962-4d8e-881e-870c75fdb6f5), k8s (23880994-dfe4-48d0-abc0-b49692cc630a) +**Harbor:** harbor.bwgdi.com (83b823af-873b-457c-912c-9ccde3cb12e6) + +--- + +## 代码变更概要 (Commit b88fe24 +) + +| Commit | 变更 | +|--------|------| +| b88fe24 | fix: real K8s replicas in list API, full Helm values in modify YAML editor | +| 96d42ee | fix: scale replicas in response, YAML lineWidth, delta values, modified keys | +| 4441f58 | fix: direct K8s scaling, replicas from K8s API, button labels, modify fetch | +| 49b92e6 | fix: UI redesign — horizontal instance rows, proper scaling, readable tag cards | +| 28ecb2e | feat: scale instances, --reuse-values, values diff, UI redesign, hover animations | +| 87eaaa5 | fix: remove per-card Refresh button, consolidate to page-level refresh | + +--- + +## 测试结果总览 + +### 总评分: 85/100 + +| 测试领域 | 状态 | 问题数 | +|----------|------|--------| +| 前端 UI / Launch / Routes | ✅ ALL PASS | 0 | +| API CRUD / 部署生命周期 | ⚠️ 2 issues | 2 | +| 配额执行 | ⚠️ 1 issue (pre-existing) | 1 | +| Values 优先级 / gpuMem | ⚠️ 1 issue | 1 | +| 权限隔离 | ✅ ALL PASS | 0 | +| 安全测试 | ⚠️ 3 issues (pre-existing) | 3 | +| DELETE 行为 | ✅ FIXED | 0 | +| InstanceCard / Scaling UI | ✅ ALL PASS | 0 | +| ModifyModal / Values Diff | ✅ ALL PASS | 0 | + +--- + +## 新发现 Bug + +### 🆕 BUG-R3-001: Detail API replicas=0 与 List API 不一致 (High) + +| 属性 | 值 | +|------|-----| +| 严重度 | **P1 (High)** | +| 端点 | `GET /clusters/{id}/instances/{instance_id}` | +| 现象 | List API 返回正确 replicas(如 1, 5),但 Detail API 始终返回 `replicas: 0` | +| 根因 | Detail endpoint 从数据库读取实例记录,replicas 字段未同步自 K8s 实时状态 | +| 影响 | 前端依赖 Detail API 的页面(如刷新后详情页)显示错误的副本数 | +| 修复建议 | Detail endpoint 也从 K8s live state 填充 replicas,或确保数据库同步 | + +### 🆕 BUG-R3-002: List API 移除 values 字段 + +| 属性 | 值 | +|------|-----| +| 严重度 | **P2 (Medium)** | +| 端点 | `GET /clusters/{id}/instances` | +| 现象 | List API 响应不再包含 `values` 字段(之前版本有);values 仅在单实例 GET 中返回 | +| 影响 | 依赖 list API values 的测试脚本和前端组件会 break | +| 备注 | 可能是故意的性能优化,但属于 API 行为变更 | + +--- + +## 已修复 Bug (验证通过) + +| Bug ID | 描述 | 之前状态 | 现在状态 | 验证 | +|--------|------|----------|----------|------| +| BUG-001 | Launch 按钮无反应 | P0 Blocker | ✅ FIXED | 部署端到端成功 | +| BUG-002 | SPA 旧路由空白页 | P0 Blocker | ✅ FIXED | 所有旧路由返回 SPA | +| BUG-003 | DELETE 返回 404 | P1 High | ✅ FIXED | 返回 HTTP 204 | +| BUG-004 | DELETE 空 body | P1 High | ✅ FIXED | HTTP 204 No Content | +| - | InstanceCard 无 scaling UI | New Feature | ✅ ADDED | +/- 按钮 + API 调用 | +| - | ModifyModal values 为空 | Bug | ✅ FIXED | Full Helm values 返回 | + +--- + +## 仍未修复的已知问题 + +| Bug ID | 描述 | 严重度 | 状态 | +|--------|------|--------|------| +| BUG-013 | 用户枚举(login 错误消息差异) | Medium | 未修复 | +| BUG-014 | 无速率限制 | Medium | 未修复 | +| BUG-015 | Nginx 版本泄露 | Low | 未修复 | +| BUG-016 | CORS: * | Low | 未修复 | +| BUG-017 | 缺少安全响应头 | Low | 未修复 | +| BUG-018 | /health 返回 HTML | Low | 未修复 | +| BUG-005 | Tags 端点 404 | Medium | 未修复 | +| BUG-006 | Namespace 静默覆盖无警告 | Medium | 未修复 | +| BUG-007-009 | Metrics/Stats/Kubeconfig 端点缺失 | Medium | 未修复 | +| BUG-011 | API 响应格式不一致 | Low | 未修复 | +| BUG-012 | /auth/me 空 token 字段 | Low | 未修复 | +| - | API 层配额预检查 | New Feature | 未实现 | +| - | Values 冲突时无警告 | UX | 未修复 | + +--- + +## 详细测试结果 + +### 1. 前端 UI (test-user-a) + +| 测试项 | 结果 | +|--------|------| +| 登录页加载 | ✅ HTTP 200, SPA 398 bytes | +| JWT 认证 | ✅ role=user, 10 permissions, quota 字段 | +| 所有页面路由 | ✅ 6 个路由全部返回 398 bytes (非空白) | +| 旧路由重定向 | ✅ /clusters, /registries, /launch, /monitoring 全部返回 SPA | +| Chart Browser | ✅ nginx:22.1.1 可发现,Launch 按钮可用 | +| 部署 Pipeline | ✅ pending-install → deployed (~15s) | +| InstanceCard Scale UI | ✅ +/- 按钮,副本数显示 | +| InstanceCard Actions | ✅ Entries/Diag/Modify/Delete 全部可见 | +| ModifyModal YAML Editor | ✅ full Helm values, lineWidth:0, diff 检测 | +| TagCard UI | ✅ 色标 (chart=blue, image=green), Copy helm pull 命令, LATEST badge | +| DELETE | ✅ HTTP 204 | + +### 2. API 后端 (test-user-b) + +| 测试项 | 结果 | +|--------|------| +| Login + /auth/me | ✅ 完整 profile (quota, namespace, permissions) | +| Cluster list | ✅ 2 clusters | +| Registry list | ✅ 1 registry (harbor-bwgdi) | +| Repository artifacts | ✅ Harbor API proxy 正常 | +| Cluster health | ✅ k3s healthy, v1.28.0 | +| 部署 nginx (default) | ✅ deployed, replicas=1 | +| 部署 nginx (over-quota) | ⚠️ 接受部署 (API 无预检查) | +| 实例状态轮询 | ✅ ~20s 到达 deployed | +| List API replicas | ✅ 正确显示 1/5 | +| Detail API replicas | ❌ 返回 0 (BUG-R3-001) | +| 实例删除 | ✅ HTTP 204 (BUG-003 FIXED) | +| 缺失端点 | ✅ 正常 404 | + +### 3. Values 优先级 (test-user-c) + +| 方法 | 结果 | +|------|------| +| values JSON only | ✅ gpuMem=10000 正确接受 | +| valuesYaml only | ✅ YAML -> JSON 解析正确 | +| 同时提供 (冲突) | ✅ values JSON 覆盖 valuesYaml | +| 冲突警告 | ❌ 无警告(建议添加) | +| 默认值 (空 values) | ✅ 使用 chart 内置 defaults | +| gpuMem=10000 | ✅ 整数 MB scalar 正确 | +| 清理 | ✅ 所有 3 个实例已删除 | + +### 4. 权限隔离 + +| 测试项 | 结果 | +|--------|------| +| Admin lists users | ✅ 10 users | +| test-admin-d lists users | ✅ 10 users (admin role works) | +| test-user-c GET /users | ✅ 403 Forbidden | +| test-user-c POST /auth/register | ✅ 403 Forbidden | +| Cross-tenant deploy (c→b) | ✅ Silent override to ocdp-u-test-c (secure) | +| Deploy into own namespace | ✅ 成功 | +| Instance visibility | ✅ 仅看到自己的实例 | +| Disable user | ✅ 用户被禁用 | +| Disabled user login | ✅ 401 | +| Re-enable user | ✅ 恢复 | +| Self-registration | ✅ 401 (必须认证) | + +### 5. 安全 (回归) + +| 测试项 | 结果 | +|--------|------| +| 未认证端点 | ✅ 全部 401 | +| JWT 验证 | ✅ 篡改 token 被拒绝 | +| XSS/SQLi | ✅ 安全处理 | +| 敏感数据脱敏 | ✅ creds 显示为 •••••••• | +| 普通用户权限提升 | ✅ 403 blocked | +| 用户枚举 | ❌ 不同错误消息 (未修复) | +| 速率限制 | ❌ 无 429 (未修复) | +| CORS * + 缺失安全头 | ❌ 未修复 | +| Nginx 版本泄露 | ❌ "nginx/1.27.5" (未修复) | +| /auth/me 空 token | ✅ tokens 已正确清空 | + +--- + +## 优先修复清单 + +### 立即修复 (P0/P1) +1. **BUG-R3-001**: Detail API replicas=0 — 从 K8s live state 填充 +2. **配额预检查** — POST instances 时验证请求资源 ≤ 用户配额 + +### 尽快修复 (P2) +3. Values 冲突时添加 API 警告 +4. Tags/Metrics/Stats/Kubeconfig 端点实现 +5. Namespace 静默覆盖返回警告 +6. List API values 字段回归或文档化 + +### 安全加固 (P3) +7. 登录错误消息统一 +8. 速率限制 +9. Nginx 安全头 + 关闭 server_tokens +10. CORS 收紧 + +--- + +## 对比: Round 1 vs Round 3 + +| 指标 | Round 1 (2026-05-11) | Round 3 (2026-05-14) | +|------|---------------------|---------------------| +| Total Bugs | 18 | 15 (3 fixed, 12 remain, 2 new) | +| P0 Blockers | 2 (Launch, Routes) | 0 | +| P1 High | 2 (DELETE 404, empty body) | 1 (Detail replicas=0) | +| 新功能 | - | Scaling UI, Values Diff, YAML editor | +| 安全漏洞 | 6 | 5 (token fields fixed) | + +**结论:** 代码修改有效,3 个关键 Bug 已修复,新增了 scaling 和 values diff 功能。仍有 12 个已知问题和 2 个新问题待修复。 diff --git a/docs/test-scenarios.md b/docs/test-scenarios.md new file mode 100644 index 0000000..dccdfa0 --- /dev/null +++ b/docs/test-scenarios.md @@ -0,0 +1,1640 @@ +# OCDP Test Scenarios + +> **Platform**: OCDP (Open Cloud Deployment Platform) +> **Deployed at**: http://10.6.80.114:18080 +> **Scope**: Full-stack test scenarios covering authentication, configuration, artifact browser, instance lifecycle, monitoring, user management, multi-tenancy, UI/UX, data persistence, security, and edge cases. + +--- + +## Category 1: Authentication & Authorization (25+ cases) + +### AUTH-001 — Login with valid credentials +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin account exists in the system | +| **Steps** | 1. Navigate to `/`
2. Enter valid username and password
3. Click "Login" | +| **Expected Result** | User is authenticated, redirected to `/home`, token stored in localStorage/session, toast "Welcome, [username]!" displayed | + +### AUTH-002 — Login with incorrect password +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Valid username exists | +| **Steps** | 1. Enter valid username with wrong password
2. Click "Login" | +| **Expected Result** | Login fails with 401 error, red error message displayed, user stays on login page | + +### AUTH-003 — Login with non-existent username +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Enter username that does not exist
2. Enter any password
3. Click "Login" | +| **Expected Result** | 401 returned, error message shown, no user enumerated | + +### AUTH-004 — Login with empty credentials +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Leave username and password empty
2. Click "Login" | +| **Expected Result** | HTML5 form validation prevents submission, or backend returns validation error | + +### AUTH-005 — Login with special characters in username +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Enter username with SQL injection patterns: `admin' OR '1'='1`
2. Enter password
3. Click "Login" | +| **Expected Result** | Login fails, no SQL injection succeeds, no data leak | + +### AUTH-006 — Successful login response contains expected fields +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Valid credentials | +| **Steps** | 1. Call `POST /api/v1/auth/login`
2. Inspect response body | +| **Expected Result** | Response contains `accessToken`, `refreshToken`, `username`, `role`, `permissions`, `userId`, `workspaceId` | + +### AUTH-007 — JWT token sent in Authorization header +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Valid token obtained | +| **Steps** | 1. Capture XHR request to any protected API
2. Inspect Authorization header | +| **Expected Result** | Header contains `Bearer ` | + +### AUTH-008 — Access protected route without token +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Clear all auth tokens | +| **Steps** | 1. Navigate directly to `/home`
2. Navigate to `/artifact/instances`
3. API call to `/api/v1/clusters` without token | +| **Expected Result** | Frontend redirects to `/`, backend returns 401 | + +### AUTH-009 — Access protected API without token +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | None | +| **Steps** | 1. Call `GET /api/v1/clusters` without Authorization header | +| **Expected Result** | 401 Unauthorized returned | + +### AUTH-010 — Token expiry handling +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Use a token near expiry or manipulate expiry | +| **Steps** | 1. Make API call with expired token | +| **Expected Result** | Backend returns 401, frontend should redirect to login page or attempt token refresh | + +### AUTH-011 — Token refresh flow +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Valid refresh token exists | +| **Steps** | 1. Call `POST /api/v1/auth/refresh` with valid refresh token
2. Call with expired/invalid refresh token | +| **Expected Result** | Valid refresh returns new access token; invalid returns 401 | + +### AUTH-012 — Logout behavior +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | User is logged in | +| **Steps** | 1. Click logout/sign out button
2. Try to navigate to previously visited protected page | +| **Expected Result** | Token cleared from storage, redirected to login page, protected routes inaccessible | + +### AUTH-013 — Logout clears token from localStorage +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User is logged in | +| **Steps** | 1. Inspect localStorage for auth tokens after login
2. Logout
3. Inspect localStorage again | +| **Expected Result** | Tokens removed after logout | + +### AUTH-014 — Role-based page access: admin +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin user logged in | +| **Steps** | 1. Navigate to `/configuration/users`
2. Navigate to `/configuration/clusters`
3. Navigate to `/artifact/instances` | +| **Expected Result** | All pages accessible | + +### AUTH-015 — Role-based page access: regular user +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Regular user logged in (non-admin) | +| **Steps** | 1. Navigate to `/configuration/users`
2. Navigate to `/admin` | +| **Expected Result** | Redirected to `/forbidden` or access denied page | + +### AUTH-016 — Regular user can access own resources pages +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Navigate to `/home`
2. Navigate to `/configuration/clusters`
3. Navigate to `/configuration/registries`
4. Navigate to `/artifact/registries`
5. Navigate to `/artifact/instances` | +| **Expected Result** | All pages accessible (user sees own resources) | + +### AUTH-017 — Login page redirect when already authenticated +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User is logged in | +| **Steps** | 1. Navigate to `/`
2. Observe behavior | +| **Expected Result** | Redirected to `/home` instead of showing login form | + +### AUTH-018 — Login page UI elements +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Not authenticated | +| **Steps** | 1. Observe login page
2. Check for OCDP Console branding, username input, password input, Login button | +| **Expected Result** | Page displays brand icon, "OCDP Console" title, username/password fields with correct autocomplete attributes, Login button | + +### AUTH-019 — Login button loading state +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Enter credentials and click Login
2. Observe button state during API call | +| **Expected Result** | Button shows spinner/loading state, text changes to "Logging in...", button disabled during request | + +### AUTH-020 — Login error display +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Enter wrong credentials and submit
2. Observe error message | +| **Expected Result** | Red error text appears below the login button, message is user-friendly (not a raw stack trace) | + +### AUTH-021 — Password change flow (mustChangePassword) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User created with `mustChangePassword: true` | +| **Steps** | 1. Login as that user
2. Observe redirect/behavior
3. Change password
4. Login again with new password | +| **Expected Result** | First login forces password change, old password rejected after change | + +### AUTH-022 — Refresh token expiry logout +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Both access and refresh tokens expired | +| **Steps** | 1. Wait for full token expiry
2. Make any API call that triggers refresh | +| **Expected Result** | User is logged out, redirected to login page | + +### AUTH-023 — Concurrent login sessions +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | User account exists | +| **Steps** | 1. Login in browser tab 1
2. Login in browser tab 2 with same credentials
3. Perform operations in both tabs | +| **Expected Result** | Both sessions work independently, no cross-tab interference | + +### AUTH-024 — Admin login shows "Admin only" badge on User Management +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Navigate to `/configuration/users`
2. Check for admin badge | +| **Expected Result** | "Admin only" badge visible in the User Management page header | + +### AUTH-025 — Token manipulation (tampered JWT) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Get valid token, modify its payload | +| **Steps** | 1. Decode JWT, change `role` to "admin" for a regular user token
2. Re-encode with modified payload and send API request | +| **Expected Result** | Backend rejects tampered token (signature verification fails), returns 401 | + +--- + +## Category 2: Cluster CRUD (15+ cases) + +### CLU-001 — Create cluster with all required fields +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Logged in as admin/user with cluster permissions | +| **Steps** | 1. Navigate to `/configuration/clusters`
2. Click "Add Cluster"
3. Fill in name, API Server URL, CA cert, client cert, client key
4. Click "Save" | +| **Expected Result** | Cluster created successfully, success toast shown, cluster appears in the list | + +### CLU-002 — Create cluster with token auth +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Click "Add Cluster"
2. Fill name, API Server URL, Bearer Token (leave cert fields empty)
3. Click "Save" | +| **Expected Result** | Cluster created using token authentication | + +### CLU-003 — Create cluster with empty name +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Leave name empty
2. Fill all other required fields
3. Click "Save" | +| **Expected Result** | Validation error "Cluster name is required" displayed near the name field | + +### CLU-004 — Create cluster with invalid API Server URL +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Enter name
2. Enter invalid URL (e.g., `not-a-url`, `ftp://...`)
3. Click "Save" | +| **Expected Result** | Validation error "Invalid URL format" displayed | + +### CLU-005 — Create cluster without auth credentials +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Enter name and URL
2. Leave all cert/key/token fields empty
3. Click "Save" | +| **Expected Result** | Validation errors on CA/Client Cert/Client Key fields | + +### CLU-006 — Edit cluster name and URL +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Existing cluster present | +| **Steps** | 1. Click edit on existing cluster
2. Change name and host
3. Click "Save" | +| **Expected Result** | Cluster updated, changes reflected in list | + +### CLU-007 — Edit cluster with new certificate (overwrite) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Existing cluster with cert auth | +| **Steps** | 1. Edit cluster
2. Enter new CA cert, client cert, client key in the "new" fields
3. Click "Save" | +| **Expected Result** | Certificate updated, "hasCaData" still appears as configured | + +### CLU-008 — Edit cluster leaving cert fields empty (no change) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Existing cluster with cert auth | +| **Steps** | 1. Edit cluster
2. Leave the "new" cert fields empty
3. Click "Save" | +| **Expected Result** | Cluster updated, existing certs retained | + +### CLU-009 — Delete cluster +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Existing cluster with no running instances (or expected behavior defined) | +| **Steps** | 1. Click delete icon on a cluster
2. Confirm deletion in browser confirm dialog | +| **Expected Result** | Cluster removed from list, success toast shown | + +### CLU-010 — Delete cluster cancellation +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Existing cluster | +| **Steps** | 1. Click delete on a cluster
2. Click "Cancel" in the confirmation dialog | +| **Expected Result** | Cluster not deleted, still visible in the list | + +### CLU-011 — Health check on reachable cluster +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | A reachable Kubernetes cluster configured | +| **Steps** | 1. Click health check / test button on the cluster row | +| **Expected Result** | Success toast with connection healthy message | + +### CLU-012 — Health check on unreachable cluster +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster with invalid host/cert configured | +| **Steps** | 1. Click health check / test button on the cluster | +| **Expected Result** | Error toast with connection failure message | + +### CLU-013 — Empty clusters state +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No clusters configured | +| **Steps** | 1. Navigate to `/configuration/clusters` | +| **Expected Result** | Empty state message displayed, add cluster action available | + +### CLU-014 — Cluster list with multiple clusters +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | 3+ clusters configured | +| **Steps** | 1. Navigate to `/configuration/clusters`
2. Scroll list | +| **Expected Result** | All clusters listed with name, URL, status indicators | + +### CLU-015 — Cluster description display in list +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Cluster with description exists | +| **Steps** | 1. View cluster list
2. Check if description is visible | +| **Expected Result** | Description shown as subtitle or tooltip in the cluster row | + +### CLU-016 — Cluster CRUD as regular user +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Create a new cluster
2. Edit the cluster
3. Delete the cluster | +| **Expected Result** | User can manage their own clusters, or see appropriate empty/permission state | + +### CLU-017 — Cluster form modal close/reset +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Create modal open with partially filled form | +| **Steps** | 1. Fill partial data
2. Click Cancel | +| **Expected Result** | Modal closes, form data cleared when reopened | + +--- + +## Category 3: Registry CRUD (15+ cases) + +### REG-001 — Create registry with all required fields +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Logged in | +| **Steps** | 1. Navigate to `/configuration/registries`
2. Click "Add Registry"
3. Fill name, URL, username, password
4. Click "Save" | +| **Expected Result** | Registry created, success toast shown, appears in list | + +### REG-002 — Create registry with insecure flag +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Open add registry modal
2. Fill required fields
3. Check "Allow insecure connection"
4. Click "Save" | +| **Expected Result** | Registry created with `insecure: true`, works for HTTP/self-signed registries | + +### REG-003 — Create registry without name +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Leave name empty
2. Fill other fields
3. Click "Save" | +| **Expected Result** | HTML5 form validation prevents submission (required attribute) | + +### REG-004 — Create registry with invalid URL +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Enter non-URL string for URL field (type=url)
2. Fill other fields
3. Click "Save" | +| **Expected Result** | HTML5 form validation prevents submission (type=url validation) | + +### REG-005 — Test registry connection +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Saved registry exists, it's reachable | +| **Steps** | 1. Edit an existing registry
2. Click "Test Connection" button | +| **Expected Result** | Connection test runs, success/error toast based on connectivity | + +### REG-006 — Test registry connection without saving first +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Creating new registry (unsaved) | +| **Steps** | 1. Fill registry form but do not save
2. Check if "Test Connection" is available | +| **Expected Result** | "Test Connection" button is not shown (only visible for saved registries) | + +### REG-007 — Edit registry name and URL +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Existing registry | +| **Steps** | 1. Edit a registry
2. Change its name and URL
3. Save | +| **Expected Result** | Registry updated, changes reflected | + +### REG-008 — Edit registry with new password +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Existing registry with password set | +| **Steps** | 1. Edit registry
2. Enter new password in the "New Password" field
3. Save | +| **Expected Result** | Password updated, "hasPassword" indicator shows as configured | + +### REG-009 — Edit registry leaving password empty +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Existing registry | +| **Steps** | 1. Edit registry
2. Leave new password field empty
3. Save | +| **Expected Result** | Registry updated, existing password retained | + +### REG-010 — Delete registry +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Existing registry with no active dependencies | +| **Steps** | 1. Click delete on a registry
2. Confirm deletion | +| **Expected Result** | Registry removed from list, success toast | + +### REG-011 — Delete registry with existing instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Registry has active instances deployed from it | +| **Steps** | 1. Try to delete registry that has active instances deriving from it | +| **Expected Result** | Backend should return error preventing deletion, or handle cascading gracefully | + +### REG-012 — Empty registries state +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No registries configured | +| **Steps** | 1. Navigate to `/configuration/registries` | +| **Expected Result** | Empty state message displayed | + +### REG-013 — Registry toggle insecure flag +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Existing registry | +| **Steps** | 1. Edit registry
2. Toggle insecure checkbox
3. Save | +| **Expected Result** | Insecure flag updated | + +### REG-014 — Registry list display +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Multiple registries exist | +| **Steps** | 1. View the registries page
2. Check each row | +| **Expected Result** | Each registry shows name, URL, username, insecure badge (if enabled) | + +### REG-015 — Registry CRUD as regular user +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Create a new registry
2. Edit the registry
3. Delete the registry | +| **Expected Result** | User can manage their own registries | + +--- + +## Category 4: Chart Browser / Launch Instance (20+ cases) + +### CHT-001 — Browse registries in chart browser +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Registries configured with Helm charts | +| **Steps** | 1. Navigate to `/artifact/registries`
2. Observe left panel | +| **Expected Result** | Registries listed with expand/collapse toggle, count badge | + +### CHT-002 — Expand registry tree and list repositories +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Registry has chart repositories | +| **Steps** | 1. Click on a registry to expand it
2. Observe repositories listed underneath | +| **Expected Result** | Repositories displayed as clickable items, each showing name | + +### CHT-003 — Empty repository list message +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Registry exists but has no chart repositories | +| **Steps** | 1. Expand registry
2. Observe sub-items | +| **Expected Result** | "No chart repositories found." message shown | + +### CHT-004 — Select repository and view artifacts +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Repository with chart artifacts exists | +| **Steps** | 1. Click on a repository in the left panel
2. Observe right panel | +| **Expected Result** | Repository name displayed in header, artifact tags shown as cards | + +### CHT-005 — Filter artifacts by Charts / All tags +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Repository has both chart and non-chart artifacts | +| **Steps** | 1. Select a repository
2. Click "Charts" filter button
3. Click "All tags" filter button | +| **Expected Result** | "Charts" filter shows only chart artifacts, "All tags" shows all | + +### CHT-006 — Filter toggle active state +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Repository selected | +| **Steps** | 1. Toggle between Charts and All tags | +| **Expected Result** | Active filter button has blue highlight, inactive has default styling | + +### CHT-007 — Tag card displays correct info +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Artifact loaded | +| **Steps** | 1. Observe a tag card | +| **Expected Result** | Card shows tag name, artifact type badge (chart/image), repository path, size | + +### CHT-008 — Launch button visible only for chart tags +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Chart and non-chart artifacts exist | +| **Steps** | 1. Observe a chart tag card
2. Observe a non-chart tag card | +| **Expected Result** | Chart tag card has blue "Launch" button; non-chart card does not | + +### CHT-009 — Copy pull command from tag card +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Tag card displayed | +| **Steps** | 1. Click "Copy" on a tag card | +| **Expected Result** | Helm pull command copied to clipboard, success toast shown | + +### CHT-010 — Search registries/repositories +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Multiple registries with repositories exist | +| **Steps** | 1. Type in the search box in the left panel
2. Observe filtering | +| **Expected Result** | List filters to matching registries and repositories; non-matching entries hidden | + +### CHT-011 — Open Launch modal +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Chart tag selected | +| **Steps** | 1. Click "Launch" on a chart tag | +| **Expected Result** | Launch modal opens with repository:tag header, cluster selector, instance name, namespace, values options | + +### CHT-012 — Launch modal loads clusters +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Clusters exist in the system | +| **Steps** | 1. Open Launch modal
2. Observe cluster dropdown | +| **Expected Result** | Cluster dropdown populated with available clusters | + +### CHT-013 — Launch modal: no clusters available +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No clusters configured | +| **Steps** | 1. Open Launch modal
2. Observe cluster section | +| **Expected Result** | Warning message "No clusters available. Please add a cluster first." displayed, Launch button disabled | + +### CHT-014 — Launch modal: instance name validation +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Launch modal open with cluster selected | +| **Steps** | 1. Leave instance name empty
2. Click Launch | +| **Expected Result** | Toast error "Instance name is required" | + +### CHT-015 — Launch modal: namespace validation +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster with namespace policy configured | +| **Steps** | 1. Select a disallowed namespace (not in allowedNamespaces)
2. Click Launch | +| **Expected Result** | Toast error "Selected namespace is not allowed for this cluster." | + +### CHT-016 — Launch modal: Quick / Form / YAML input modes +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Launch modal open | +| **Steps** | 1. Click each mode button (Quick, Guided, YAML)
2. Observe content changes | +| **Expected Result** | Quick: info panel about chart defaults. Guided: schema form (if schema exists). YAML: textarea for YAML input. Active mode highlighted. | + +### CHT-017 — Launch modal: YAML validation +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | YAML input mode selected | +| **Steps** | 1. Enter invalid YAML (e.g., `key: [invalid`)
2. Observe error state | +| **Expected Result** | Red error text below textarea, Launch button disabled | + +### CHT-018 — Launch modal: Load Defaults from values.yaml +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Chart has values.yaml with defaults | +| **Steps** | 1. Switch to YAML mode
2. Click "Load Defaults from values.yaml" | +| **Expected Result** | values.yaml content loaded into the textarea | + +### CHT-019 — Submit launch and navigate to instances +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | All required fields filled with valid data | +| **Steps** | 1. Fill cluster, instance name, namespace
2. Click Launch
3. Wait for redirect | +| **Expected Result** | Instance creation API called, success toast, redirected to `/artifact/instances`, instance shown with "Pending Install" status | + +### CHT-020 — Launch modal: namespace controlled by workspace policy +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster has namespace readOnly policy | +| **Steps** | 1. Open Launch modal
2. Select cluster with readonly namespace policy
3. Check namespace field | +| **Expected Result** | Namespace field is disabled with blue info message: "Namespace is controlled by your workspace policy." | + +### CHT-021 — Launch modal: namespace dropdown (allowed namespaces) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster has allowedNamespaces configured | +| **Steps** | 1. Select such cluster
2. Observe namespace field | +| **Expected Result** | Namespace becomes a dropdown with only allowed values | + +### CHT-022 — Launch modal: user's default cluster pre-selected +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | User has defaultClusterId set | +| **Steps** | 1. Open Launch modal | +| **Expected Result** | Default cluster auto-selected in the dropdown | + +--- + +## Category 5: Instance Management (20+ cases) + +### INS-001 — View instances (all clusters) +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Instances exist across clusters | +| **Steps** | 1. Navigate to `/artifact/instances` | +| **Expected Result** | All instances listed grouped by cluster, stats cards show totals | + +### INS-002 — Filter instances by cluster +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Multiple clusters with instances | +| **Steps** | 1. Navigate to instances page
2. Select a specific cluster from dropdown | +| **Expected Result** | Only instances from that cluster displayed | + +### INS-003 — Instance status: Deployed +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Instance in deployed state | +| **Steps** | 1. Look for a deployed instance card | +| **Expected Result** | Green "DEPLOYED" badge with checkmark icon, status reason shown | + +### INS-004 — Instance status: Failed +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance in failed state | +| **Steps** | 1. Look for a failed instance card | +| **Expected Result** | Red "FAILED" badge, error details visible (lastError section appears) | + +### INS-005 — Instance status: Pending (Install/Upgrade/Rollback/Delete) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance in transition state | +| **Steps** | 1. Look for pending instance card | +| **Expected Result** | Amber/yellow "PENDING INSTALL/UPGRADE/ROLLBACK/DELETE" badge | + +### INS-006 — Instance status: Unknown +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Instance with unknown status | +| **Steps** | 1. Look for unknown instance card | +| **Expected Result** | Gray "UNKNOWN" badge | + +### INS-007 — Refresh instance status +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance exists | +| **Steps** | 1. Click "Refresh" button on the instance card | +| **Expected Result** | Instance status re-fetched, card updates with latest status | + +### INS-008 — Instance card displays metadata correctly +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance exists | +| **Steps** | 1. Examine instance card content | +| **Expected Result** | Card shows: instance name, repository, version tag, namespace, revision, launch date, status reason | + +### INS-009 — Instance action buttons visibility +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance exists | +| **Steps** | 1. Check the action bar at bottom of instance card | +| **Expected Result** | Five buttons visible: Refresh, Entries, Diagnostics, Modify, Delete | + +### INS-010 — View entries (Services) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance with services | +| **Steps** | 1. Click "Entries" on the instance card
2. Observe modal | +| **Expected Result** | Modal shows Services with name, type, cluster IP, ports; source badge visible | + +### INS-011 — View entries (Ingresses) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance with ingresses | +| **Steps** | 1. Open entries modal
2. Check for Ingresses section | +| **Expected Result** | Ingresses listed with host, paths, TLS status | + +### INS-012 — View diagnostics (Describe) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance | +| **Steps** | 1. Click "Diagnostics" on instance card
2. Observe Describe tab | +| **Expected Result** | Modal shows Pods (with status, node, restarts, containers) and Services summary | + +### INS-013 — View diagnostics (Events) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance | +| **Steps** | 1. Open diagnostics
2. Click "Events" tab | +| **Expected Result** | Kubernetes events listed with type badge, reason, message, timestamp, count | + +### INS-014 — View diagnostics (Pod Logs) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance with running pods | +| **Steps** | 1. Open diagnostics
2. Click "Pod Logs" tab | +| **Expected Result** | Pod logs displayed in dark terminal-style blocks, copy button available | + +### INS-015 — Copy pod logs +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Diagnostics with logs loaded | +| **Steps** | 1. Open pod logs
2. Click "Copy Logs" | +| **Expected Result** | Combined logs copied to clipboard, success toast shown | + +### INS-016 — Modify instance version tag +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance, new chart version available | +| **Steps** | 1. Click "Modify" on instance
2. Change version tag
3. Confirm | +| **Expected Result** | Instance upgrade initiated, instance moves to "Pending Upgrade" status | + +### INS-017 — Modify instance with values changes +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Deployed instance | +| **Steps** | 1. Open modify modal
2. Switch to YAML input
3. Update values
4. Confirm | +| **Expected Result** | Instance upgraded with modified values | + +### INS-018 — Terminate/delete instance with confirmation +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Deployed instance exists | +| **Steps** | 1. Click "Delete" on instance card
2. Confirm in browser dialog | +| **Expected Result** | Deletion initiated, instance enters "Pending Delete" status, eventually disappears | + +### INS-019 — Terminate instance cancellation +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Deployed instance | +| **Steps** | 1. Click "Delete"
2. Click "Cancel" in the confirmation dialog | +| **Expected Result** | Instance not deleted, dialog dismissed | + +### INS-020 — Empty instances state +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No instances deployed | +| **Steps** | 1. Navigate to `/artifact/instances` | +| **Expected Result** | Empty state displayed: "No instances found. Launch your first service instance from Artifact Registries" | + +### INS-021 — Instances auto-refresh +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Instances page open | +| **Steps** | 1. Stay on instances page
2. Observe network requests for 30+ seconds | +| **Expected Result** | Background auto-refresh fires every 30 seconds without user interaction | + +--- + +## Category 6: Cluster Monitoring (10+ cases) + +### MON-001 — View cluster health monitoring +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Clusters configured | +| **Steps** | 1. Navigate to `/monitoring/clusters` | +| **Expected Result** | Cluster monitoring cards displayed with health status badges, metrics grid | + +### MON-002 — Stats cards display summary +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | 3+ clusters with varying health | +| **Steps** | 1. Navigate to monitoring page | +| **Expected Result** | Stats cards show: Total Clusters, Healthy count, Warning count, Error count | + +### MON-003 — Monitoring card shows cluster metrics +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Healthy cluster | +| **Steps** | 1. Observe a cluster monitoring card | +| **Expected Result** | Card shows: cluster name, uptime, node count, pod count, GPU usage, CPU usage bar, memory usage bar, last checked time | + +### MON-004 — Expand node details +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster has nodes | +| **Steps** | 1. Click "Show Nodes" button on a cluster card
2. Observe node list | +| **Expected Result** | Node list expands showing individual node metrics (CPU, memory, GPU per node) | + +### MON-005 — Healthy cluster status display +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster is healthy | +| **Steps** | 1. Check card header | +| **Expected Result** | Green "Healthy" badge, green checkmark icon | + +### MON-006 — Error cluster status display +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster is unhealthy/error | +| **Steps** | 1. Check card header | +| **Expected Result** | Red "Error" badge, red X icon | + +### MON-007 — Auto-refresh monitoring +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Monitoring page open | +| **Steps** | 1. Stay on page
2. Observe metrics updates over time | +| **Expected Result** | Page auto-refreshes every 30 seconds, "Auto-refresh every 30 seconds" text visible | + +### MON-008 — Manual refresh +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Monitoring page open | +| **Steps** | 1. Click "Refresh" button | +| **Expected Result** | Data reloaded, loading state shown during refresh | + +### MON-009 — Empty monitoring state +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | No clusters configured | +| **Steps** | 1. Navigate to monitoring page | +| **Expected Result** | "No Clusters Available" empty state displayed | + +### MON-010 — Error state when cluster unreachable +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster monitoring API returns error | +| **Steps** | 1. Simulate API failure
2. Observe page | +| **Expected Result** | Error state with retry button shown, error message displayed | + +--- + +## Category 7: User Management (Admin) (15+ cases) + +### USR-001 — Create user with role "user" +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Navigate to `/configuration/users`
2. Fill username, password, role=User
3. Set namespace, default cluster, resource limits
4. Click "Create User" | +| **Expected Result** | User created, appears in accounts table | + +### USR-002 — Create user with role "admin" +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Open create user form
2. Select Role=Admin
3. Fill username and password only (namespace/limits hidden for admin)
4. Click "Create User" | +| **Expected Result** | Admin user created, namespace/limits not required, role badge shows "admin" | + +### USR-003 — Create user with mustChangePassword +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create user modal | +| **Steps** | 1. Ensure "Require password change after first login" checkbox is checked
2. Create user | +| **Expected Result** | User created and must change password on first login | + +### USR-004 — Create user without required fields +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create form open | +| **Steps** | 1. Leave username or password empty
2. Click "Create User" | +| **Expected Result** | Validation error toast "Username and initial password are required." | + +### USR-005 — Edit user resource limits +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Non-admin user exists | +| **Steps** | 1. Click "Limits" on a user row
2. Change CPU, Memory, GPU, GPU Mem values
3. Click "Save Limits" | +| **Expected Result** | Limits modal closes, success toast, updated values shown in table | + +### USR-006 — Toggle user role (user ↔ admin) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User exists | +| **Steps** | 1. Click "Make Admin" on a user row
2. Observe role change
3. Click "Make User" to revert | +| **Expected Result** | Role toggled, badge updates, admin users can access all pages after re-login | + +### USR-007 — Enable/disable user +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Active user exists | +| **Steps** | 1. Click "Disable" on an active user
2. Observe badge change
3. Try to login as that user | +| **Expected Result** | Badge changes to "Disabled", disabled user cannot login (returns 401) | + +### USR-008 — Delete user +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Non-self user exists | +| **Steps** | 1. Click "Delete" on a user
2. Confirm deletion | +| **Expected Result** | User removed from table | + +### USR-009 — Cannot delete own admin account +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Current admin logged in | +| **Steps** | 1. Look at own user row | +| **Expected Result** | Delete button is disabled (or not rendered) for the current user | + +### USR-010 — Cannot disable own admin account +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Current admin logged in | +| **Steps** | 1. Look at own user row
2. Check Disable button state | +| **Expected Result** | Disable button is disabled for current user | + +### USR-011 — User Management page admin-only badge +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Observe page header | +| **Expected Result** | "Admin only" badge visible near the title | + +### USR-012 — User table displays all columns +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Users exist | +| **Steps** | 1. Observe the accounts table | +| **Expected Result** | Columns: User (username+email), Role (badge), Status (Active/Disabled), Namespace, Quota (CPU/Mem/GPU), Actions | + +### USR-013 — Namespace auto-generation for user +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Creating user with role=user | +| **Steps** | 1. Enter username
2. Check namespace field (before user edits it) | +| **Expected Result** | Namespace auto-populated as `ocdp-u-` | + +### USR-014 — Create user with resource limits +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create form open, role=User | +| **Steps** | 1. Set specific CPU, Memory, GPU, GPU memory limits
2. Create user
3. View user in table | +| **Expected Result** | Limits stored and displayed in the quota column | + +### USR-015 — User list refresh +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Users page open | +| **Steps** | 1. Click "Refresh" button | +| **Expected Result** | User list reloaded, loading state shown | + +--- + +## Category 8: Multi-tenancy & Permissions (15+ cases) + +### MTN-001 — User A cannot see User B's clusters +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Two regular users (A, B), each with their own cluster | +| **Steps** | 1. Login as User A
2. List clusters via API
3. Login as User B
4. List clusters via API | +| **Expected Result** | User A sees only their clusters, User B sees only their clusters. No cross-tenant leakage | + +### MTN-002 — User A cannot see User B's registries +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Two regular users with separate registries | +| **Steps** | 1. List registries as User A
2. List registries as User B | +| **Expected Result** | Each user sees only their own registries | + +### MTN-003 — User A cannot delete User B's instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User A and B each have instances | +| **Steps** | 1. As User A, try to call DELETE on User B's instance | +| **Expected Result** | Backend returns 403 Forbidden or 404 Not Found | + +### MTN-004 — User A cannot modify User B's instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User B has an instance | +| **Steps** | 1. As User A, try to update User B's instance | +| **Expected Result** | Backend returns 403 Forbidden | + +### MTN-005 — Admin can see all clusters +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin user, clusters belonging to multiple users exist | +| **Steps** | 1. Login as admin
2. List clusters | +| **Expected Result** | Admin sees all clusters across all users | + +### MTN-006 — Admin can see all registries +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Admin user, registries belonging to multiple users exist | +| **Steps** | 1. Login as admin
2. List registries | +| **Expected Result** | Admin sees all registries across all users | + +### MTN-007 — Admin can see all instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instances exist across different users | +| **Steps** | 1. Login as admin
2. List instances per cluster | +| **Expected Result** | Admin sees instances from all users' releases | + +### MTN-008 — ResourceQuota enforcement (CPU) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User with CPU quota set, deploying | +| **Steps** | 1. As user with CPU quota=4, try to deploy chart requesting >4 CPU
2. Check deployment outcome | +| **Expected Result** | Deployment should fail or ResourceQuota enforced in the namespace | + +### MTN-009 — ResourceQuota enforcement (GPU) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | User with GPU quota=0 | +| **Steps** | 1. Try to deploy a chart requiring GPU | +| **Expected Result** | Deployment fails due to quota enforcement | + +### MTN-010 — Namespace isolation across users +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Users A and B configured with different namespaces | +| **Steps** | 1. User A deploys instance to their namespace
2. User B deploys instance to their namespace
3. Verify User A cannot see User B's pods/instances | +| **Expected Result** | Instances isolated by namespace, no cross-tenant visibility | + +### MTN-011 — Regular user cannot access admin pages +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Navigate to `/configuration/users`
2. Navigate to `/admin` | +| **Expected Result** | Redirected to `/forbidden`, access denied page shown | + +### MTN-012 — Regular user does not see "Users" in home setup card +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Regular user logged in | +| **Steps** | 1. Navigate to `/home`
2. Check "Setup" section | +| **Expected Result** | "Users" card is not rendered for non-admin users | + +### MTN-013 — Default user permissions match expected set +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Regular user created without custom permissions | +| **Steps** | 1. Get user info from `/api/v1/auth/me` or similar
2. Inspect permissions array | +| **Expected Result** | Default permissions include: home:view, configuration:clusters:manage_own, configuration:registries:manage_own, artifact:registries:view, artifact:instances:manage_own | + +### MTN-014 — User workspace metadata stored and returned +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | User exists | +| **Steps** | 1. Login and inspect user response
2. Check workspaceId, workspaceName, namespace, defaultClusterId | +| **Expected Result** | Workspace metadata present and consistent | + +### MTN-015 — Admin can create resources under any user scope +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Admin logged in | +| **Steps** | 1. Check if admin can create clusters/registries without ownership restriction | +| **Expected Result** | Admin-created resources are accessible to admin (global scope) | + +--- + +## Category 9: UI/UX Bugs (20+ cases) + +### UI-001 — Page layout does not overflow horizontally +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Navigate to each page at 1440px viewport width
2. Check for horizontal scrollbar | +| **Expected Result** | No horizontal overflow, all content fits within viewport | + +### UI-002 — Responsive layout at mobile breakpoint (768px) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Resize browser to 768px width
2. Navigate through all pages | +| **Expected Result** | Navigation collapses, content stacks vertically, no broken layout | + +### UI-003 — Responsive layout at tablet breakpoint (1024px) +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Resize to 1024px width
2. Check all pages | +| **Expected Result** | Content reflows gracefully, no overlap | + +### UI-004 — Loading state displays correctly +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Slow network (simulated) | +| **Steps** | 1. Enable network throttling
2. Navigate to each page | +| **Expected Result** | Loading spinner/message appears while data is being fetched, content appears after load | + +### UI-005 — No flickering during page transitions +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Navigate between pages rapidly
2. Observe visual transitions | +| **Expected Result** | Smooth transitions, no white flash or layout shift | + +### UI-006 — Empty states show informative messages +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Fresh/empty system | +| **Steps** | 1. Check clusters page (empty)
2. Check registries page (empty)
3. Check instances page (empty) | +| **Expected Result** | Each page has a distinct, informative empty state message with relevant icon | + +### UI-007 — Error states show retry action +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | API returns error | +| **Steps** | 1. Simulate backend error
2. Observe error state | +| **Expected Result** | Error message displayed with a "Retry" button | + +### UI-008 — Form validation feedback is visible +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Submit forms with invalid data | +| **Expected Result** | Red error text appears near the invalid field, or toast notification with specific message | + +### UI-009 — Toast notifications appear and disappear +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Perform actions that trigger toasts (save, delete, error)
2. Observe toast behavior | +| **Expected Result** | Toast appears at expected position, auto-dismisses after timeout, can be dismissed manually | + +### UI-010 — Button states: disabled +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Find disabled buttons (Launch when no clusters, Submit with invalid YAML) | +| **Expected Result** | Disabled buttons have reduced opacity, no pointer cursor, cannot be clicked | + +### UI-011 — Button states: loading +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Action in progress | +| **Steps** | 1. Click a button that triggers an API call
2. Observe button during request | +| **Expected Result** | Button shows spinner/loading indicator, disabled during request | + +### UI-012 — Truncation of long text labels +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Long names exist | +| **Steps** | 1. Create resources with very long names
2. Observe display in cards and lists | +| **Expected Result** | Long text is truncated with ellipsis, no layout breakage | + +### UI-013 — Sidebar navigation highlight matches current page +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Navigate to each page
2. Check sidebar nav item highlight | +| **Expected Result** | Current page's nav item is highlighted/active | + +### UI-014 — Page header shows correct title and icon +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Navigate to each page | +| **Expected Result** | Page header displays correct title, icon, and description | + +### UI-015 — Color contrast meets readability +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Inspect text colors against backgrounds using DevTools | +| **Expected Result** | All text meets WCAG AA contrast ratio (4.5:1 for normal text, 3:1 for large text) | + +### UI-016 — Access denied page renders correctly +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | User with insufficient permissions | +| **Steps** | 1. Access a restricted page | +| **Expected Result** | Access denied page shown with "Back Home" button | + +### UI-017 — Cluster list shows health status indicator +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Clusters exist | +| **Steps** | 1. Navigate to cluster config page
2. Check each cluster row | +| **Expected Result** | Each cluster shows a health status indicator (green/yellow/red dot or similar) | + +### UI-018 — Search/filter in chart browser works correctly +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Multiple registries/repositories exist | +| **Steps** | 1. Type partial name in search box
2. Type a query that matches no results | +| **Expected Result** | Matching entries remain visible, non-matching hidden. "No registries" state when nothing matches. | + +### UI-019 — Modal backdrop click behavior +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Any modal open | +| **Steps** | 1. Open a modal (e.g., Launch modal, Cluster form, Modify modal)
2. Click on the dark backdrop | +| **Expected Result** | Modal closes (or stays open depending on design). Should not cause errors. | + +### UI-020 — Home page displays all sections +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Logged in as admin | +| **Steps** | 1. Navigate to `/home` | +| **Expected Result** | Three sections visible: primary actions (Launch Instance, Instances, Cluster Monitoring), runtime focus sidebar, setup section | + +--- + +## Category 10: Data Persistence (10+ cases) + +### PER-001 — Data survives page refresh (clusters) +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Clusters exist | +| **Steps** | 1. Navigate to clusters page
2. Refresh the page (F5) | +| **Expected Result** | Clusters still displayed after refresh | + +### PER-002 — Data survives page refresh (registries) +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Registries exist | +| **Steps** | 1. Navigate to registries page
2. Refresh | +| **Expected Result** | Registries still displayed | + +### PER-003 — Data survives page refresh (instances) +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Instances exist | +| **Steps** | 1. Navigate to instances page
2. Refresh | +| **Expected Result** | Instances still displayed | + +### PER-004 — Data survives browser tab close/reopen +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Resources exist | +| **Steps** | 1. Close browser tab
2. Open new tab and navigate to app
3. Login
4. Check all pages | +| **Expected Result** | All data intact after session restoration | + +### PER-005 — Created cluster persists after logout/login +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster was created | +| **Steps** | 1. Logout
2. Login again
3. Check cluster list | +| **Expected Result** | Cluster still present | + +### PER-006 — Created registry persists after logout/login +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Registry was created | +| **Steps** | 1. Logout
2. Login
3. Check registry list | +| **Expected Result** | Registry still present | + +### PER-007 — Instance deployment persists across page navigation +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance was launched | +| **Steps** | 1. Navigate away from instances page
2. Navigate back to instances page | +| **Expected Result** | Instance still listed with its status | + +### PER-008 — Cache consistency: new cluster appears in Launch dropdown +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Clusters page and artifact browser cached | +| **Steps** | 1. Add a new cluster
2. Navigate to chart browser
3. Open Launch modal
4. Check cluster dropdown | +| **Expected Result** | New cluster visible in dropdown (cache refreshed properly) | + +### PER-009 — Cache consistency: new registry appears in chart browser +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Registry page open, then chart browser | +| **Steps** | 1. Add a new registry
2. Navigate to chart browser
3. Check left panel | +| **Expected Result** | New registry visible (after refresh or auto-reload) | + +### PER-010 — Delete data persists (no phantom data) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Item deleted earlier | +| **Steps** | 1. Delete a cluster/registry
2. Refresh page
3. Check list | +| **Expected Result** | Deleted item does not reappear | + +--- + +## Category 11: Security (15+ cases) + +### SEC-001 — XSS via form inputs (cluster name) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Create cluster with name ``
2. Observe if script executes on the list page | +| **Expected Result** | Script tag is escaped/rendered as text, no XSS execution | + +### SEC-002 — XSS via form inputs (registry description) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Create registry with description containing HTML/script tags
2. Observe rendering | +| **Expected Result** | HTML is escaped, no script execution | + +### SEC-003 — XSS via instance name +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Logged in | +| **Steps** | 1. Launch instance with name ``
2. Navigate to instances page | +| **Expected Result** | Name is rendered safely, no XSS | + +### SEC-004 — IDOR: access another user's instance detail +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | User A has an instance, User B knows its ID | +| **Steps** | 1. Login as User B
2. Try to access User A's instance detail by ID | +| **Expected Result** | Backend returns 403 Forbidden or 404 | + +### SEC-005 — IDOR: modify another user's instance +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | User B has instance ID of User A | +| **Steps** | 1. Login as User A
2. Attempt PUT on User B's instance | +| **Expected Result** | 403 Forbidden | + +### SEC-006 — IDOR: delete another user's cluster +| Field | Value | +|-------|-------| +| **Priority** | P0 | +| **Preconditions** | Two regular users exist | +| **Steps** | 1. User A creates a cluster
2. User B attempts to delete it using cluster ID | +| **Expected Result** | 403 Forbidden | + +### SEC-007 — Sensitive data in API responses +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Call `GET /api/v1/clusters`
2. Inspect response for raw certs/keys/tokens | +| **Expected Result** | Sensitive fields are masked or encrypted (e.g., `hasCaData: true` instead of raw cert) | + +### SEC-008 — Sensitive data in registry responses +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Call `GET /api/v1/registries`
2. Check response for password exposure | +| **Expected Result** | Password not returned in plain text; `hasPassword` boolean used instead | + +### SEC-009 — JWT token manipulation: signature removed +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Valid JWT obtained | +| **Steps** | 1. Strip JWT signature, keep base64 payload
2. Send API request with tampered token | +| **Expected Result** | Backend rejects token, returns 401 | + +### SEC-010 — JWT token manipulation: alg changed to "none" +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Valid JWT obtained | +| **Steps** | 1. Change JWT header `alg` to `none`
2. Send modified token | +| **Expected Result** | Backend rejects, returns 401 | + +### SEC-011 — Directory traversal in repository name +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Try to access artifacts with `../../etc/passwd` as repository name | +| **Expected Result** | Returns 400 Bad Request or 404, no directory traversal occurs | + +### SEC-012 — Rate limiting on login endpoint +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Send rapid login requests (20+ in 1 second) with wrong passwords | +| **Expected Result** | After threshold, rate limiting kicks in (429 Too Many Requests) | + +### SEC-013 — Brute force protection +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Attempt login with wrong password 10+ times in succession | +| **Expected Result** | Account should be temporarily locked or delayed responses introduced | + +### SEC-014 — Session fixation test +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Capture pre-auth token
2. Login
3. Check if pre-auth token is still valid | +| **Expected Result** | Pre-auth token invalidated, new token issued on login | + +### SEC-015 — No sensitive data in error messages +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | None | +| **Steps** | 1. Trigger various API errors (invalid auth, bad request, server error)
2. Inspect error responses | +| **Expected Result** | Error messages do not reveal stack traces, SQL queries, or system internals | + +--- + +## Category 12: Edge Cases (10+ cases) + +### EDG-001 — Rapid double-click on submit buttons +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Create modal open | +| **Steps** | 1. Click "Save" or "Create" button rapidly multiple times | +| **Expected Result** | Button is disabled after first click (loading state), duplicate submissions prevented | + +### EDG-002 — Very long instance name +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Launch modal open | +| **Steps** | 1. Enter instance name of 253+ characters
2. Submit | +| **Expected Result** | Backend validates Kubernetes naming constraints, returns error if too long | + +### EDG-003 — Special characters in namespace +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Launch modal open | +| **Steps** | 1. Enter namespace with uppercase letters or special characters
2. Submit | +| **Expected Result** | Backend validates DNS-1123 label constraints, returns error | + +### EDG-004 — Browser back/forward navigation +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Authenticated | +| **Steps** | 1. Navigate to page A, then page B
2. Click browser back button
3. Click browser forward button | +| **Expected Result** | Navigation works correctly, no infinite redirects, no blank pages | + +### EDG-005 — Concurrent operations: launch instance in two tabs +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Same user, same cluster, same namespace | +| **Steps** | 1. Tab 1: Launch instance "test-a"
2. Tab 2 (simultaneously): Launch instance "test-b" | +| **Expected Result** | Both instances created, no data race or corruption | + +### EDG-006 — Delete cluster with running instances +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Cluster has active Helm releases | +| **Steps** | 1. Attempt to delete a cluster that has running instances | +| **Expected Result** | Backend should reject deletion or return a warning about active instances | + +### EDG-007 — Instance name collision (same namespace) +| Field | Value | +|-------|-------| +| **Priority** | P1 | +| **Preconditions** | Instance "test" already exists in namespace "default" on cluster X | +| **Steps** | 1. Try to create another instance named "test" in the same namespace and cluster | +| **Expected Result** | Backend returns conflict error, instance not created | + +### EDG-008 — Rapid create/delete/create same resource name +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | None | +| **Steps** | 1. Create cluster named "test-cluster"
2. Delete it
3. Immediately create another cluster named "test-cluster" | +| **Expected Result** | Second creation succeeds after deletion completes | + +### EDG-009 — Helm release name collision across namespaces +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Helm release exists in namespace-a | +| **Steps** | 1. Launch instance with same name in namespace-b on the same cluster | +| **Expected Result** | Helm releases are namespaced, so creation should succeed in different namespace | + +### EDG-010 — YAML values with non-object top-level structure +| Field | Value | +|-------|-------| +| **Priority** | P2 | +| **Preconditions** | Launch modal open, YAML mode | +| **Steps** | 1. Enter just a string `"hello"` or array `[1,2,3]` as YAML values
2. Click Launch | +| **Expected Result** | YAML validation error: "Values YAML must be an object" | + +--- + +## Priority Distribution Summary + +| Priority | Count | +|----------|-------| +| P0 (Critical) | 26 | +| P1 (High) | 87 | +| P2 (Medium) | 34 | +| **Total** | **147** | + +## Existing Test Coverage Reference + +The following test scripts already exist in `test/` and cover portions of these scenarios: + +| Test Script | Coverage | +|-------------|----------| +| `current-platform-smoke.sh` | Login, registry health, chart browsing, optional deploy/cleanup | +| `frontend-playwright-smoke.py` | Login UI, chart browser rendering, instance page, mobile layout | +| `frontend-interactions-audit.py` | Auth, navigation, config modals, health buttons, launch modes | +| `multitenant_rbac_api_contract.py` | Auth denial, RBAC differences, resource isolation, admin cleanup | +| `multitenant_rbac_ui_playwright.py` | Multi-tenant UI isolation tests | +| `vllm_k3s_deploy_smoke.py` | Real k3s deployment, GPU quota, ResourceQuota, diagnostics | +| `chart_values_yaml_api_contract.py` | Values YAML API contract validation | +| `user_namespace_quota_api_contract.py` | User namespace and quota API contract | +| `instance_card_action_layout_playwright.py` | Instance card action button layout | diff --git a/docs/test-users.json b/docs/test-users.json new file mode 100644 index 0000000..7aec55d --- /dev/null +++ b/docs/test-users.json @@ -0,0 +1,79 @@ +{ + "meta": { + "createdAt": "2026-05-11T09:58:00Z", + "apiBase": "http://10.6.80.114:18080/api/v1", + "adminUsername": "admin", + "adminPassword": "admin123" + }, + "existingResources": { + "clusters": { + "k8s": { + "id": "23880994-dfe4-48d0-abc0-b49692cc630a", + "host": "https://10.6.80.12:6443" + }, + "k3s": { + "id": "dbf824f1-9962-4d8e-881e-870c75fdb6f5", + "host": "https://10.6.80.23:6443" + } + }, + "registries": { + "harbor-bwgdi": { + "id": "83b823af-873b-457c-912c-9ccde3cb12e6", + "url": "https://harbor.bwgdi.com" + } + } + }, + "testUsers": [ + { + "id": "0c70fce6-fa69-4231-979a-5970ff9b854b", + "username": "test-user-a", + "password": "TestUserA123!", + "email": "test-user-a@local.ocdp", + "role": "user", + "purpose": "Frontend UI testing", + "namespace": "ocdp-u-test-a", + "defaultClusterId": "dbf824f1-9962-4d8e-881e-870c75fdb6f5", + "quotaCpu": "4", + "quotaMemory": "8Gi", + "quotaGpu": "1", + "quotaGpuMemory": "5000" + }, + { + "id": "819b12ec-718e-48be-92bc-0cd1f7205926", + "username": "test-user-b", + "password": "TestUserB123!", + "email": "test-user-b@local.ocdp", + "role": "user", + "purpose": "API/deploy testing", + "namespace": "ocdp-u-test-b", + "defaultClusterId": "dbf824f1-9962-4d8e-881e-870c75fdb6f5", + "quotaCpu": "2", + "quotaMemory": "4Gi", + "quotaGpu": "0", + "quotaGpuMemory": "0" + }, + { + "id": "04ef67ba-49c2-44e2-87b4-b71b5d9f36dc", + "username": "test-user-c", + "password": "TestUserC123!", + "email": "test-user-c@local.ocdp", + "role": "user", + "purpose": "Permission isolation testing", + "namespace": "ocdp-u-test-c", + "defaultClusterId": "dbf824f1-9962-4d8e-881e-870c75fdb6f5", + "quotaCpu": "4", + "quotaMemory": "8Gi", + "quotaGpu": "1", + "quotaGpuMemory": "5000" + }, + { + "id": "8bcffd0e-4e7a-4e9a-a47b-bfdb463698c2", + "username": "test-admin-d", + "password": "TestAdminD123!", + "email": "test-admin-d@local.ocdp", + "role": "admin", + "purpose": "Admin features testing", + "namespace": "ocdp-ws-default" + } + ] +} diff --git a/docs/user-guide.md b/docs/user-guide.md new file mode 100644 index 0000000..06283ab --- /dev/null +++ b/docs/user-guide.md @@ -0,0 +1,752 @@ +# OCDP Platform User Guide + +## Table of Contents + +1. [Overview](#1-overview) +2. [Login / Authentication](#2-login--authentication) +3. [Home Page](#3-home-page) +4. [Launch Instance (Chart Browser)](#4-launch-instance-chart-browser) +5. [Instances Management](#5-instances-management) +6. [Cluster Monitoring](#6-cluster-monitoring) +7. [Setup — Clusters](#7-setup--clusters) +8. [Setup — Registries](#8-setup--registries) +9. [Setup — Users (Admin)](#9-setup--users-admin) +10. [Navigation](#10-navigation) + +--- + +## 1. Overview + +**OCDP (Open Cloud Deployment Platform)** is a Kubernetes LLM inference deployment platform. Its primary use case is: a user selects a `vllm-serve` Helm chart from a Harbor registry, fills in the instance name, namespace, and values, and the backend pulls the packaged OCI Helm chart and deploys it to a configured Kubernetes cluster via the Helm SDK. + +### Architecture + +``` +Frontend (React 18 + TypeScript + Vite + TailwindCSS) + | + | HTTP /api/* + v +Nginx (Reverse Proxy / Static File Server) + | + | HTTP /api/* + v +Backend (Go 1.24 + Gorilla Mux + Hexagonal Architecture) + | + +---> PostgreSQL (persistence) + +---> ORAS SDK (OCI chart pull) + +---> Helm SDK (deploy/upgrade/delete) + +---> client-go (Kubernetes API) +``` + +### Tech Stack + +| Layer | Technology | +|-------------|-----------------------------------------------------------------| +| Frontend | React 18, TypeScript, Vite, TailwindCSS, React Router, Lucide icons | +| Backend | Go 1.24, Gorilla Mux, PostgreSQL, ORAS SDK, Helm SDK, client-go | +| Gateway | Nginx (reverse proxy + static file serving) | +| Database | PostgreSQL | +| Deployment | Docker Compose | + +--- + +## 2. Login / Authentication + +### Access + +The frontend is deployed at `http://10.6.80.114:18080`. Navigating to the root URL redirects to the login page. + +### Login Page + +The login page displays: + +- **OCDP Console** title with a shield icon +- Subtitle: "Sign in with an account created by an administrator" +- **Username** text field (required) +- **Password** text field (required, masked) +- **Login** button — blue, centered, full-width + +When you click **Login**: + +1. A toast notification "Logging in..." appears briefly +2. The button shows a spinning loader and "Logging in..." text +3. On success: a "Welcome, {username}!" toast appears, and you are redirected to `/home` +4. On failure: a red error message is shown below the button (e.g., "Invalid credentials" or "Network error") + +### Default Admin Credentials + +If the system was bootstrapped via `.env` configuration, the default admin credentials are: + +- **Username:** `admin` (or whatever was set as `BOOTSTRAP_ADMIN_USER`) +- **Password:** The value of `BOOTSTRAP_ADMIN_PASS` in your `.env` file + +### JWT Session Behavior + +- The backend issues JWT tokens upon successful login +- The frontend stores the tokens and sends them as `Authorization: Bearer ` headers +- Session persists until the token expires or the user signs out +- Clicking the **logout icon** (top-right corner, person icon with a door arrow) signs the user out + +### Routing When Authenticated + +- Unauthenticated users are always redirected to `/` (login page) +- Authenticated users visiting `/` are redirected to `/home` +- Protected routes are wrapped in a `ProtectedRoute` component; unauthorized access redirects to login +- Route-level access is enforced per user role (admin vs regular user) + +--- + +## 3. Home Page + +The home page at `/home` is the main landing page after login. It has three sections. + +### Section 1: Primary Actions (3 Cards) + +A large card titled "One Click Deployment Platform" / "Operations Workbench" contains three action cards arranged in a row: + +**1. Launch Instance** + - Icon: Rocket (blue background) + - Description: "Browse Helm charts and deploy a new inference service." + - Clicking navigates to `/artifact/registries` + - Shows "Open" with an arrow on hover + +**2. Instances** + - Icon: Package (emerald background) + - Description: "Check release status, entries, upgrades, and deletion." + - Clicking navigates to `/artifact/instances` + - Shows "Open" with an arrow on hover + +**3. Cluster Monitoring** + - Icon: Activity (dark slate background) + - Description: "Inspect cluster health and node resource pressure." + - Clicking navigates to `/monitoring/clusters` + - Shows "Open" with an arrow on hover + +Each card: +- Has a subtle border, slate background, and hover effect (lifts slightly, adds blue border) +- Shows a colored icon box at the top +- Shows title and description +- Has an "Open" link with right-arrow at the bottom + +### Section 2: Runtime Focus Sidebar + +On the right side of the primary actions, a smaller card titled "Runtime Focus" with subtitle "High-frequency checks": + +- **Release status** — clickable row that navigates to `/artifact/instances`. Subtitle: "Installed, failed, deleting" +- **Cluster health** — clickable row that navigates to `/monitoring/clusters`. Subtitle: "Nodes, pods, CPU, memory" + +### Section 3: Setup + +A bottom section titled "Setup" with subtitle "Less frequent administrative tasks". Contains three buttons in a row: + +1. **Clusters** — Server icon. Description: "Kubeconfig and namespace policy". Navigates to `/configuration/clusters` +2. **Registries** — Database icon. Description: "Harbor robot account and chart access". Navigates to `/configuration/registries` +3. **Users** — Users icon. Description: "Admin-only account management". Navigates to `/configuration/users`. Only visible to admin users. + +--- + +## 4. Launch Instance (Chart Browser) + +The Artifact Browser page at `/artifact/registries` is the chart browser for selecting and launching Helm charts. + +### Page Layout + +The page is a split-pane layout: +- **Left sidebar (w-80):** Registry tree with search +- **Right main panel:** Repository info, tags, and launch actions + +### Left Panel: Registry Tree + +**Header bar** (top of page): +- Title: "Chart Browser" +- Subtitle: "Select a Harbor chart and launch it into a Kubernetes cluster" +- **Refresh** button (secondary style, refresh icon) — reloads all registries and repositories, clears cache + +**Search bar** at the top of the sidebar: +- Placeholder: "Search registries / repositories..." +- Filters the tree as you type (matches registry name and repository name) +- Shows a search icon on the left + +**Registry nodes** listed below the search bar: +- Each registry shows: + - Chevron (down/right) to expand/collapse + - Database icon (blue) + - Registry name + - Registry URL (truncated, small text) + - Badge showing count of repositories +- Registries are expanded by default +- Clicking a registry header toggles expansion + +**Repository items** under each registry: +- Each shows the repository name +- Clicking a repository selects it (highlighted blue background) and loads its artifacts in the right panel +- Shows artifact count if available +- If no repositories: shows "No chart repositories found." +- If loading: shows "Loading repositories..." + +### Right Panel: Repository Details + +When a repository is selected: + +**Repository header:** +- Label: "Chart repository" (uppercase, small) +- Repository name (large, bold) +- Registry name below +- **Filter chips:** Two toggle buttons: "Charts" (default selected, blue) and "All tags" +- "Charts" filter shows only artifacts of type `chart` (i.e., deployable Helm charts) +- "All tags" shows every artifact version regardless of type + +**Artifact grid** (responsive: 1-3 columns): +- Each artifact is displayed in a **TagCard** component (see below) + +When no repository is selected: +- Shows empty state: "Select a repository" with "Choose a chart repository from the left panel." + +### TagCard Component + +Each TagCard shows: + +- **Type icon**: Package (chart), Box (image), or File (other) with color-coded background +- **Tag name** (e.g., `1.0.0`) with a type badge (e.g., "chart" in blue) +- **Repository path** (truncated) +- **Size** in KB or MB (e.g., "12.5 MB") + +**TagCard buttons:** + +1. **Launch button** — Blue button, only visible when the artifact type is `chart`. Shows rocket icon + "Launch". Opens the LaunchModal. +2. **Copy button** — White button with copy icon. Copies the `helm pull oci://...` command to the clipboard. Shows a success toast. + +### LaunchModal + +Opens when "Launch" is clicked on a chart artifact. Title: "Launch Instance" with rocket icon. + +**Modal header:** +- Shows the repository name and tag (e.g., `vllm-serve:1.0.0`) + +**Form fields:** + +1. **Target Cluster** (required) + - Dropdown select listing all configured clusters + - Auto-selects the first available cluster (or user's default cluster) + - If no clusters: shows an amber warning "No clusters available. Please add a cluster first." + - Shows loading state while fetching + +2. **Instance Name** (required) + - Text input, placeholder: "my-app" + - Help text: "Lowercase alphanumeric characters, '-' or '.'" + +3. **Namespace** (required) + - If the selected cluster has allowed namespaces: shows a dropdown of allowed namespaces + - If no restrictions: shows a text input, default "default" + - If namespace is controlled by workspace policy: input is disabled with a blue info notice + +4. **Description** (optional) + - Text input, placeholder: "Optional description" + +5. **Configuration Values** — Three input modes: + + **a) Quick mode** (default): + - Blue info box explaining "Quick launch uses the chart defaults" + - Shows badges: "No values override" and if available "Chart values.yaml available" + - If `values.yaml` exists: "Load Defaults from values.yaml" button switches to YAML mode with defaults pre-filled + - Best for simple deployments with no custom overrides + + **b) Guided mode** (form): + - Only available when the chart provides a JSON Schema for its values + - Dynamically generates form fields based on the schema + - Supports various schema types: string, number, boolean, object, array + - **"Load Defaults"** button — fills in values from the schema defaults + - Shows schema-generated form in a scrollable container + + **c) YAML mode**: + - A code editor (textarea) for entering custom values in YAML format + - Real-time YAML validation with error display + - "Load Defaults from values.yaml" button + - "Load Schema Defaults" button (if no values.yaml but schema exists) + - "Clear" button to reset the YAML + - Help text changes based on whether schema is available + +6. **Artifact Info** (read-only summary): + - Repository name + - Tag (badge) + - Type + +**Footer buttons:** +- **Cancel** (secondary) — closes the modal +- **Launch** (success/green style with rocket icon) — submits the deployment + +**Validation on submit:** +- Cluster must be selected +- Instance name must not be empty +- Namespace must not be empty +- If namespace policy restricts namespaces, the selected namespace must be in the allowed list +- YAML values are parsed and validated before submission + +**After successful submit:** +- Form resets +- Modal closes +- Navigates to `/artifact/instances` to show the deploying instance +- Shows "Instance deployed successfully" toast + +**Error states:** +- Loading clusters fails: error toast +- Missing required fields: validation error toast +- YAML parse error: inline error + toast +- API failure: error toast with message + +--- + +## 5. Instances Management + +The Instances page at `/artifact/instances` manages all deployed Helm releases across clusters. + +### Stats Cards + +Three gradient stat cards at the top (shown when clusters exist): + +1. **Total Instances** (blue) — total count across all clusters +2. **Clusters** (emerald) — number of clusters +3. **Showing** (violet) — count of currently displayed instances (only shown when filtering across 2+ clusters) + +### Filter Controls + +When more than one cluster exists, a filter bar appears: +- **Filter by Cluster** dropdown with "All Clusters" and each cluster with instance count +- Selecting a cluster filters the instance list to that cluster only + +### Instance Display + +Instances are grouped by cluster when "All Clusters" is selected, each cluster section showing: +- Cluster name with instance count +- Instances in a responsive 2-column grid + +### InstanceCard Component + +Each card shows: + +**Header:** +- Instance name (bold, large) +- Repository name with version badge (cyan) +- **Status badge** with colored background glow: + +| Status | Badge Color | Description | +|-----------------|-------------|----------------------------------------------------| +| Deployed | Emerald | Deployment completed successfully | +| Failed | Rose/Red | Last operation reported a failure | +| Pending Install | Amber | Installation is in progress | +| Pending Upgrade | Amber | Upgrade is in progress | +| Pending Rollback| Amber | Rollback is in progress | +| Pending Delete | Orange | Deletion is in progress | +| Superseded | Indigo | A newer revision has replaced this instance | +| Uninstalled | Slate | Instance has been removed from the cluster | +| Unknown | Slate | Awaiting next state update | + +- Status reason text (e.g., "Deployment completed successfully." or a custom message) +- Last operation label (Install / Upgrade / Rollback / Delete / Sync) + +**Details grid:** +- **Namespace** — purple icon +- **Revision** — green icon, Helm revision number +- **Repository** — full-width, truncated, monospace +- **Launched** — date the instance was created + +**Last error alert** (conditionally shown): +- Red alert box with warning icon +- Shows the last error message if the instance encountered errors + +**Action buttons (5 buttons in a row):** + +1. **Refresh** — Refresh icon. Refreshes the status of this specific instance from the cluster. +2. **Entries** — Network icon (emerald). Opens Entries modal. +3. **Diagnostics** — Activity icon (indigo). Opens Diagnostics modal. +4. **Modify** — Settings icon (blue). Opens Modify modal. +5. **Delete** — Stop icon (rose/red). Prompts confirmation, then deletes. + +### Empty / Loading / Error States + +- **Loading:** Shows spinning indicator with "Loading instances..." +- **Error:** Shows error state with retry button +- **Empty:** "No instances found" with link to launch from registries +- **Auto-refresh:** Data refreshes every 30 seconds silently + +### Entries Modal + +Displays network entry information for the instance. + +**Header:** +- Title: "Instance Entries" +- Instance name and namespace + +**Data Source badge:** +- **Live from Kubernetes** (green) — fetched directly from the cluster +- **From Helm Manifest** (blue) — extracted from Helm manifest +- **From Helm Notes** (yellow) — from Helm release notes +- **No Data Available** (gray) + +**Services section** (if any): +- Lists each Kubernetes Service with: + - Service name and type badge + - Cluster IP (copyable) + - Ports with mapping (e.g., `80 → 8080 TCP`, NodePort) + - LoadBalancer entries (if applicable) with external link and copy + +**Ingresses section** (if any): +- Lists each Kubernetes Ingress with: + - Ingress name and class + - Host with external link and copy buttons + - Path routing (e.g., `/ → service:80`) + - TLS indicator if HTTPS is configured + +**Helm Notes** (as fallback): +- Raw Helm notes text shown in a monospace pre block + +**Footer:** Close button + +### Diagnostics Modal + +Provides Kubernetes-level diagnostics for the instance. + +**Header:** +- "Runtime diagnostics" label +- Instance name +- Namespace and data collection timestamp + +**Refresh button** in the header — reloads diagnostics data from Kubernetes + +**Three tabs:** + +**1. Describe tab** (default): +- Summary metrics: Pods count, Services count, Events count +- **Pods section** — each pod shows: + - Pod name, node, pod IP, restart count + - Status badge (Running=success, other=warning) + - Containers with name, state badge, image, and reason/message +- **Services section** — each service with name, type badge, ClusterIP, ports + +**2. Events tab**: +- Kubernetes events sorted by time +- Each event shows: type badge (Warning/ Normal), reason, timestamp, message, involved object, count + +**3. Pod Logs tab**: +- Logs from each container, labeled by pod/container name +- Monospace display on dark background +- **Copy Logs** button copies all logs to clipboard +- Last 300 lines are fetched per container + +**Error states:** +- Loading fails: error toast +- No data: amber info box "Diagnostics data is not available" +- Empty pods/events/logs: relevant empty state message + +### Modify Modal + +Allows modifying an existing instance. + +**Header:** "Modify Instance - {name}" with settings icon + +**Current info section** (read-only): +- Current version +- Cluster ID +- Repository + +**Fields:** +1. **Version Tag** — text input, pre-filled with current version. Help: "Leave unchanged to keep current version" +2. **Description** — text input +3. **Configuration Values** — Form or YAML mode (auto-detects if schema exists): + - **Form mode:** Dynamic form generated from values schema, with real-time sync to YAML + - **YAML mode:** Textarea with monospace font, pre-filled with current values + +**Footer:** Cancel / Modify buttons + +**After submit:** Instance is upgraded via Helm, data refreshes, modal closes. + +--- + +## 6. Cluster Monitoring + +The Monitoring page at `/monitoring/clusters` shows the health and resource usage of all configured Kubernetes clusters. + +### Summary Stats Cards + +Four stat cards at the top: + +1. **Total Clusters** (blue) — total number of clusters +2. **Healthy** (green) — clusters with status "healthy" +3. **Warning** (orange) — clusters with status "warning" or "unknown" +4. **Error** (red) — clusters with status "error" or "unhealthy" + +### Auto-Refresh + +- The page auto-refreshes every **30 seconds** +- A small note shows "Auto-refresh every 30 seconds" with a refresh indicator +- Manual **Refresh** button in the page header + +### ClusterMonitorCard + +Each cluster is shown in an expandable card. + +**Card header:** +- Status icon (green check, yellow warning, red X, or gray question mark) +- Cluster name with status badge (Healthy / Warning / Error) + +**Metrics grid** (4 columns): +- **Uptime** — how long the cluster has been running +- **Nodes** — node count +- **Pods** — pod count +- **GPU** — used/total GPU count + +**Resource usage** (3 columns with progress bars): +- **CPU** — used/total, percentage bar, max per node with peak usage +- **Memory** — used/total, percentage bar, max per node with peak usage +- **GPU** (only if GPUs exist) — used/total, percentage bar, max per node with peak usage + +**Last checked** timestamp + +**Show Nodes / Hide Nodes** toggle button (only if nodes exist) + +### NodeMetricCard (Expandable Nodes) + +When nodes are expanded, each node shows: + +- Node name with status icon (Ready green / NotReady red) +- Status badge (Ready / NotReady) and role badge (Control Plane / Worker) +- Age +- **CPU** — usage/allocatable with progress bar +- **Memory** — usage/allocatable with progress bar +- **GPU** — usage/capacity with progress bar (shows "No GPU" if none) +- Additional info: Pod count, Kubelet version + +### States + +- **Loading:** Shows "Loading cluster monitoring data..." +- **Error:** Error state with retry button, "Failed to Load Clusters" +- **Empty:** "No Clusters Available" with suggestion to add clusters in configuration + +--- + +## 7. Setup — Clusters + +The Cluster Configuration page at `/configuration/clusters` manages Kubernetes cluster connections. + +### Page Header + +- Title: "Configuration - Clusters" +- Description changes based on role (admin sees "Manage all..." , regular user sees "Manage your private...") +- **Refresh** button (secondary) +- **Add Cluster** button (primary, blue, plus icon) + +### ClusterList Component + +**Loading state:** Spinner with "Loading clusters..." +**Empty state:** Server icon with "No clusters" and "Add your first cluster..." + +**Cluster cards** (2-column grid): +- Cluster name with server icon and visibility label (Private / Workspace / Global) +- Description (if any) +- Three action buttons: + - **Test Connection** (Activity icon, emerald) — performs a health check against the cluster + - **Edit** (pencil icon, blue) — opens edit modal + - **Delete** (trash icon, red) — prompts confirmation then deletes +- API Server URL (monospace, truncated) +- Auth status grid (3 columns): CA Certificate, Client Cert, Client Key — each shows "✅ Configured" or "✗ Not Configured" +- Created date + +Action buttons may be disabled based on user permissions (read-only access). + +### Add / Edit Cluster Modal + +**Form fields:** + +1. **Cluster Name** (required) — text input, e.g., "Production Cluster" +2. **API Server URL** (required) — must start with `https://`, e.g., `https://kubernetes.example.com:6443` +3. **CA Certificate (Base64)** — required for create. Textarea for base64-encoded CA cert. In edit mode: shows current status and optional new input. +4. **Client Certificate (Base64)** — required for create. In edit mode: shows current status. +5. **Client Key (Base64)** — required for create. In edit mode: shows current status. +6. **Bearer Token** — optional alternative to client certificates. Textarea for service account token. +7. **Description** — optional textarea + +**Validation:** +- Name and API Server URL required +- URL must start with `http://` or `https://` +- Create mode requires either token OR all three certificate fields +- Edit mode: certificate fields are optional (leave blank to keep existing) + +**Footer:** Cancel / Add Cluster or Save + +### Health Check + +Clicking the **Test Connection** (Activity) button on a cluster: +- Shows "Testing cluster..." toast +- If successful: green toast with success message +- If failed: red toast with error message +- Checks connectivity to the Kubernetes API server + +--- + +## 8. Setup — Registries + +The Registry Configuration page at `/configuration/registries` manages OCI registry connections. + +### Page Header + +- Title: "Configuration - Registries" +- **Refresh** button (secondary) +- **Add Registry** button (primary, blue, plus icon) + +### RegistryList Component + +**Loading state:** "Loading registries..." +**Empty state:** Database icon with "No registries" and "Add your first registry..." + +**Registry cards** (vertical list): +- Registry name with database icon and visibility label +- **Insecure** badge (yellow, if insecure flag is on) +- Registry URL (clickable link, opens in new tab) +- Description (if any) +- Username display +- Two action buttons: + - **Edit** (pencil icon, blue) — opens edit modal + - **Delete** (trash icon, red) — prompts confirmation then deletes + +### Add / Edit Registry Modal + +**Form fields:** + +1. **Name** (required) — e.g., "Harbor Production" +2. **Registry URL** (required) — e.g., `https://registry.example.com` +3. **Username** (required) — registry username (Harbor robot account recommended) +4. **Password** — required for create. In edit mode: shows current status ("Password set - encrypted") and optional new password input +5. **Description** — optional textarea +6. **Insecure** — checkbox. "Allow insecure connection (skip SSL certificate verification)" — for registries using HTTP or self-signed certs + +**Test Connection button** (in edit mode only, after saving): +- Tests the registry connectivity by calling the backend health endpoint +- Button shows a pulsing test tube icon while testing +- Shows success/failure toast + +**Footer:** Save / Test Connection / Cancel + +--- + +## 9. Setup — Users (Admin) + +The User Management page at `/configuration/users` is **admin-only**. Non-admin users cannot access this route. + +### Page Header + +- "Admin only" label with shield icon +- Title: "User Management" +- Description: "Create accounts, assign roles, and disable access without public self-registration." +- **Refresh** button (secondary) + +### Create User Form (Left Panel) + +**Username** (required) — text input +**Initial password** (required) — masked input +**Role** dropdown — "User" or "Admin" + +When **User** role is selected, additional fields appear: + +**Tenant namespace section:** +- **Namespace** — text input, auto-generated from username as `ocdp-u-{username}` +- **Default cluster** — dropdown of available clusters + +**Resource limits section:** +- **CPU** — default "4" (Kubernetes quantity, e.g., "4" or "500m") +- **Memory** — default "16Gi" +- **GPU** — default "0" (integer count) +- **GPU Mem** — default "0" (integer MB, e.g., 10000) +- Help text explains the units + +**Checkbox:** +- "Require password change after first login" — checked by default + +**Create User button** (primary, full-width, user-plus icon) + +### Accounts Table (Right Panel) + +A table with columns: + +| Column | Content | +|-----------|------------------------------------------------------| +| User | Username + email | +| Role | Badge: "admin" (info blue) or "user" (secondary) | +| Status | Badge: "Active" (green) or "Disabled" (warning) | +| Namespace | Namespace + workspace name + default cluster | +| Quota | CPU, Memory, GPU/GPU Mem (admin shows "default workspace") | +| Actions | See below | + +**Actions per row (4 buttons):** + +1. **Make User / Make Admin** — toggles the user's role between admin and user +2. **Limits** (pencil icon) — opens the Edit Limits modal (only for non-admin users) +3. **Enable / Disable** — toggles the user's active status (disabled for own account) +4. **Delete** (trash icon, red) — deletes user after confirmation (disabled for own account) + +### Edit Limits Modal + +Opens when "Limits" button is clicked for a non-admin user: + +- **Tenant limits** label with gauge icon +- User's name as title +- Description: "Changes are applied to workspace metadata..." +- Fields: Namespace, Default cluster, CPU, Memory, GPU, GPU Memory +- **Cancel** / **Save Limits** buttons + +--- + +## 10. Navigation + +### Left Sidebar + +The sidebar shows the "Operations" branding at the top with the following navigation items: + +| Item | Icon | Route | +|-------------------|-----------------------|----------------------------------| +| Home | Home (gray) | `/home` | +| Launch Instance | Rocket (blue) | `/artifact/registries` | +| Instances | Boxes (emerald) | `/artifact/instances` | +| Cluster Monitoring| LineChart (teal) | `/monitoring/clusters` | +| **Setup** (collapsible) | Settings | | +| └ Clusters | Server (teal) | `/configuration/clusters` | +| └ Registries | Database | `/configuration/registries` | +| └ Users | Users (blue) | `/configuration/users` | + +- The Setup section is expanded by default +- Active nav item is highlighted with a blue background +- Sidebar collapses on mobile with a hamburger menu toggle +- Navigator items dynamically filter based on user role: + - "Users" is only shown to admin users + - Routes are protected server-side too + +### Page Header / Breadcrumbs + +Each page shows a header in the top navigation bar with: +- Page icon +- Page title (e.g., "Launch Instance", "Instances", "Setup - Clusters") +- Current user's name and role badge on the right +- **Sign Out** button (door icon with arrow, top-right) + +The title mapping is: + +| Route | Header Title | +|-------------------------------|-------------------------| +| `/artifact/registries` | Launch Instance | +| `/artifact/instances` | Instances | +| `/configuration/clusters` | Setup - Clusters | +| `/configuration/registries` | Setup - Registries | +| `/configuration/users` | Setup - Users | +| `/monitoring/clusters` | Monitoring - Clusters | +| `/home` | OCDP Platform | + +### Legacy Route Redirects + +Several legacy URL patterns redirect to current routes: +- `/config/*` → `/configuration/clusters` +- `/monitor`, `/cluster`, `/cluster/monitor` → `/monitoring/clusters` +- `/artifact/registry` → `/artifact/registries` +- `/artifact/instance` → `/artifact/instances` +- `/registry` → `/artifact/registries` +- `/register` → `/` + +--- diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 3dcc6fa..831988c 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -14,7 +14,8 @@ "react": "^19.1.1", "react-dom": "^19.1.1", "react-router-dom": "^7.9.4", - "reflect-metadata": "^0.2.2" + "reflect-metadata": "^0.2.2", + "yaml": "^2.8.4" }, "devDependencies": { "@eslint/js": "^9.36.0", @@ -8688,16 +8689,18 @@ "license": "ISC" }, "node_modules/yaml": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz", - "integrity": "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==", - "dev": true, + "version": "2.8.4", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.4.tgz", + "integrity": "sha512-ml/JPOj9fOQK8RNnWojA67GbZ0ApXAUlN2UQclwv2eVgTgn7O9gg9o7paZWKMp4g0H3nTLtS9LVzhkpOFIKzog==", "license": "ISC", "bin": { "yaml": "bin.mjs" }, "engines": { "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" } }, "node_modules/yargs": { diff --git a/frontend/package.json b/frontend/package.json index 00edbe2..6fadcd5 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -18,7 +18,8 @@ "react": "^19.1.1", "react-dom": "^19.1.1", "react-router-dom": "^7.9.4", - "reflect-metadata": "^0.2.2" + "reflect-metadata": "^0.2.2", + "yaml": "^2.8.4" }, "devDependencies": { "@eslint/js": "^9.36.0", diff --git a/frontend/src/api/axios-mutator.ts b/frontend/src/api/axios-mutator.ts index ce47bca..5d8d62e 100644 --- a/frontend/src/api/axios-mutator.ts +++ b/frontend/src/api/axios-mutator.ts @@ -28,9 +28,11 @@ const isTransformablePayload = (payload: unknown) => { return typeof payload === "object"; }; +const SKIP_RECURSE_KEYS = new Set(["values", "valuesYaml"]); + AXIOS_INSTANCE.interceptors.request.use((config) => { if (isTransformablePayload(config.data)) { - config.data = keysToSnake(config.data); + config.data = keysToSnake(config.data, SKIP_RECURSE_KEYS); } if (isTransformablePayload(config.params)) { config.params = keysToSnake(config.params); diff --git a/frontend/src/api/generated-orval/api.schemas.ts b/frontend/src/api/generated-orval/api.schemas.ts index 41c7b7d..64ba63f 100644 --- a/frontend/src/api/generated-orval/api.schemas.ts +++ b/frontend/src/api/generated-orval/api.schemas.ts @@ -271,6 +271,7 @@ export interface GithubComOcdpClusterServiceInternalAdapterInputHttpDtoInstanceR name?: string; namespace?: string; registryId?: string; + replicas?: number; repository?: string; revision?: number; /** 实例当前状态 */ diff --git a/frontend/src/api/index.ts b/frontend/src/api/index.ts index 7ce62ff..da5bf8e 100644 --- a/frontend/src/api/index.ts +++ b/frontend/src/api/index.ts @@ -3,7 +3,7 @@ * Export configured API client, generated functions, and friendly aliases. */ -type AxiosOptions any> = Parameters[2]; +type AxiosOptions unknown> = Parameters[2]; import { deleteClustersClusterId, @@ -76,6 +76,8 @@ import type { PutRegistriesRegistryIdPathParameters, } from './generated-orval/api.schemas'; +import { AXIOS_INSTANCE, customAxiosInstance } from './axios-mutator'; + import { GithubComOcdpClusterServiceInternalAdapterInputHttpDtoInstanceResponseLastOperation as GeneratedInstanceLastOperationEnum, GithubComOcdpClusterServiceInternalAdapterInputHttpDtoInstanceResponseStatus as GeneratedInstanceStatusEnum, @@ -91,9 +93,46 @@ export type * from './generated-orval/api.schemas'; // ---------- Friendly type aliases ---------- export type AuthResponse = GeneratedAuthResponse; export type RegisterBody = GeneratedRegisterRequest; +export type AdminCreateUserRequest = RegisterBody & { + role?: string; + workspaceId?: string; + namespace?: string; + defaultClusterId?: string; + quotaCpu?: string; + quotaMemory?: string; + quotaGpu?: string; + quotaGpuMemory?: string; + isActive?: boolean; + mustChangePassword?: boolean; +}; export type LoginBody = GeneratedLoginRequest; export type RefreshTokenBody = GeneratedRefreshTokenRequest; -export type UserResponse = GeneratedUserResponse; +export type UserResponse = GeneratedUserResponse & { + role?: string; + workspaceId?: string; + workspaceName?: string; + namespace?: string; + defaultClusterId?: string; + quotaCpu?: string; + quotaMemory?: string; + quotaGpu?: string; + quotaGpuMemory?: string; + isActive?: boolean; + mustChangePassword?: boolean; +}; +export type UpdateUserRequest = { + role?: string; + workspaceId?: string; + namespace?: string; + defaultClusterId?: string; + quotaCpu?: string; + quotaMemory?: string; + quotaGpu?: string; + quotaGpuMemory?: string; + isActive?: boolean; + mustChangePassword?: boolean; +}; +export type ValuesYamlResponse = { valuesYaml: string }; export type ClusterResponse = GeneratedClusterResponse; export type CreateClusterRequest = GeneratedCreateClusterRequest; @@ -104,10 +143,56 @@ export type CreateRegistryRequest = GeneratedCreateRegistryRequest; export type UpdateRegistryRequest = GeneratedUpdateRegistryRequest; export type RegistryHealthResponse = GeneratedRegistryHealthResponse; -export type InstanceResponse = GeneratedInstanceResponse; +export type InstanceResponse = GeneratedInstanceResponse & { + ownerId?: string; + ownerUsername?: string; +}; export type CreateInstanceRequest = GeneratedCreateInstanceRequest; export type UpdateInstanceRequest = GeneratedUpdateInstanceRequest; export type InstanceEntry = GeneratedInstanceEntry; +export type InstanceDiagnosticsResponse = { + instanceName?: string; + namespace?: string; + collectedAt?: string; + pods?: Array<{ + name?: string; + namespace?: string; + phase?: string; + nodeName?: string; + podIp?: string; + hostIp?: string; + restartCount?: number; + containers?: Array<{ + name?: string; + image?: string; + ready?: boolean; + restartCount?: number; + state?: string; + reason?: string; + message?: string; + }>; + conditions?: Array<{ type?: string; status?: string; reason?: string; message?: string }>; + creationTimestamp?: string; + }>; + services?: Array<{ + name?: string; + namespace?: string; + type?: string; + clusterIP?: string; + ports?: Array<{ name?: string; protocol?: string; port?: number; targetPort?: string; nodePort?: number }>; + }>; + events?: Array<{ + type?: string; + reason?: string; + message?: string; + involvedKind?: string; + involvedName?: string; + count?: number; + firstTimestamp?: string; + lastTimestamp?: string; + }>; + logs?: Array<{ pod?: string; container?: string; tailLines?: number; log?: string; error?: string }>; +}; export const INSTANCE_STATUS = GeneratedInstanceStatusEnum; export type InstanceStatus = NonNullable; export const INSTANCE_LAST_OPERATION = GeneratedInstanceLastOperationEnum; @@ -134,6 +219,17 @@ export type NodeMetricsResponse = GeneratedNodeMetricsResponse; export const login = postAuthLogin; export const register = postAuthRegister; export const refreshAuth = postAuthRefresh; +export const fetchAuthStatus = () => + customAxiosInstance<{ needsSetup: boolean; hasUsers: boolean }>({ url: "/auth/status", method: "GET" }); +export const setupInitialAdmin = (data: { username: string; password: string; email?: string }) => + customAxiosInstance<{ accessToken: string; refreshToken: string }>({ url: "/auth/setup", method: "POST", data }); +export const listUsers = () => customAxiosInstance({ url: "/users", method: "GET" }); +export const createUser = (data: AdminCreateUserRequest) => + customAxiosInstance({ url: "/users", method: "POST", data }); +export const updateUser = (userId: string, data: UpdateUserRequest) => + customAxiosInstance({ url: `/users/${encodeURIComponent(userId)}`, method: "PUT", data }); +export const deleteUser = (userId: string) => + customAxiosInstance({ url: `/users/${encodeURIComponent(userId)}`, method: "DELETE" }); export const listClusters = getClusters; export const createCluster = postClusters; @@ -148,6 +244,117 @@ export const getInstance = getClustersClusterIdInstancesInstanceId; export const updateInstance = putClustersClusterIdInstancesInstanceId; export const deleteInstance = deleteClustersClusterIdInstancesInstanceId; export const listInstanceEntries = getClustersClusterIdInstancesInstanceIdEntries; +export const scaleInstance = ( + clusterId: string, + instanceId: string, + body: { replicas: number; workload?: string }, +) => { + return customAxiosInstance<{ instance: InstanceResponse; replicas: number; message: string }>({ + url: `/clusters/${encodeURIComponent(clusterId)}/instances/${encodeURIComponent(instanceId)}/scale`, + method: "POST", + data: body, + }); +}; +export const getInstanceValuesDiff = ( + clusterId: string, + instanceId: string, +) => { + return customAxiosInstance<{ current: Record; defaults: Record }>({ + url: `/clusters/${encodeURIComponent(clusterId)}/instances/${encodeURIComponent(instanceId)}/values-diff`, + method: "GET", + }); +}; +export const getInstanceDiagnostics = ( + params: { clusterId: string; instanceId: string }, + options?: { tailLines?: number }, +) => + customAxiosInstance({ + url: `/clusters/${encodeURIComponent(params.clusterId)}/instances/${encodeURIComponent(params.instanceId)}/diagnostics`, + method: "GET", + params: options?.tailLines ? { tailLines: options.tailLines } : undefined, + }); + +/** + * Stream pod logs via SSE from the backend. + * Returns an AbortController to cancel the stream at any time. + */ +export function streamInstanceLogs( + clusterId: string, + instanceId: string, + pod: string, + container: string, + tailLines: number = 200, + onLine: (line: string) => void, + onDone: () => void, + onError: (err: Error) => void, +): AbortController { + const controller = new AbortController(); + const baseUrl = AXIOS_INSTANCE.defaults.baseURL ?? "/api/v1"; + const authHeader = AXIOS_INSTANCE.defaults.headers.common["Authorization"] as string | undefined; + + const params = new URLSearchParams({ pod, container, tailLines: String(tailLines) }); + const url = `${baseUrl}/clusters/${encodeURIComponent(clusterId)}/instances/${encodeURIComponent(instanceId)}/logs/stream?${params}`; + + const headers: Record = { Accept: "text/event-stream" }; + if (authHeader) { + headers["Authorization"] = authHeader; + } + + fetch(url, { headers, signal: controller.signal }) + .then(async (response) => { + if (!response.ok) { + const text = await response.text().catch(() => response.statusText); + onError(new Error(`HTTP ${response.status}: ${text}`)); + return; + } + const reader = response.body?.getReader(); + if (!reader) { + onError(new Error("ReadableStream not supported")); + return; + } + const decoder = new TextDecoder(); + let buffer = ""; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + // Keep the last potentially-incomplete line in the buffer + buffer = lines.pop() ?? ""; + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed || !trimmed.startsWith("data:")) continue; + const data = trimmed.slice(5).trim(); + if (data === "[DONE]") { + onDone(); + return; + } + if (data.startsWith("[ERROR]")) { + onError(new Error(data.slice(7).trim())); + continue; + } + onLine(data); + } + } + } catch (err: unknown) { + if (err instanceof DOMException && err.name === "AbortError") { + // Stream was intentionally cancelled - not an error + return; + } + onError(err instanceof Error ? err : new Error(String(err))); + } + onDone(); + }) + .catch((err: unknown) => { + if (err instanceof DOMException && err.name === "AbortError") { + return; + } + onError(err instanceof Error ? err : new Error(String(err))); + }); + + return controller; +} export const listRegistries = getRegistries; export const createRegistry = postRegistries; @@ -156,7 +363,13 @@ export const updateRegistry = putRegistriesRegistryId; export const deleteRegistry = deleteRegistriesRegistryId; export const checkRegistryHealth = getRegistriesRegistryIdHealth; -export const listRepositories = getRegistriesRegistryIdRepositories; +export const listRepositories = ( + params: GetRegistriesRegistryIdRepositoriesPathParameters, + options?: { artifactType?: 'chart' | 'all' }, +) => + getRegistriesRegistryIdRepositories(params, { + params: options?.artifactType ? { artifact_type: options.artifactType } : undefined, + }); type ListArtifactsRequestOptions = AxiosOptions; export const listArtifacts = ( @@ -173,6 +386,11 @@ export const listArtifacts = ( export const getArtifact = getRegistriesRegistryIdRepositoriesRepositoryNameArtifactsReference; export const getValuesSchema = getRegistriesRegistryIdRepositoriesRepositoryNameArtifactsReferenceValuesSchema; +export const getValuesYaml = (params: GetValuesSchemaPathParameters) => + customAxiosInstance({ + url: `/registries/${encodeURIComponent(params.registryId)}/repositories/${encodeURIComponent(params.repositoryName)}/artifacts/${encodeURIComponent(params.reference)}/values-yaml`, + method: "GET", + }); export const listClusterMonitoring = getMonitoringClusters; export const getClusterMonitoring = getMonitoringClustersClusterId; diff --git a/frontend/src/app/App.tsx b/frontend/src/app/App.tsx index c24d891..bf68fea 100644 --- a/frontend/src/app/App.tsx +++ b/frontend/src/app/App.tsx @@ -16,18 +16,22 @@ import { getNavItems } from "./constants/navigation"; export default function App() { const location = useLocation(); const navigate = useNavigate(); - const { isAuthenticated, login, logout } = useAuth(); + const { isAuthenticated, login, logout, user } = useAuth(); // Generate navigation items based on current location const navItems = useMemo( - () => getNavItems(location.pathname, navigate), - [location.pathname, navigate] + () => getNavItems(location.pathname, navigate, user), + [location.pathname, navigate, user] ); + const displayName = user?.workspaceName + ? `${user.username || "User"} · ${user.workspaceName}` + : user?.username || "User"; return ( void; children?: NavItem[]; @@ -22,7 +25,7 @@ export interface NavItem { */ export interface PageInfo { title: string; - icon: React.ReactNode; + icon: ReactNode; } /** @@ -33,75 +36,95 @@ export interface PageInfo { */ export const getNavItems = ( currentPath: string, - navigate: (path: string) => void -): NavItem[] => [ - { - key: "home", - label: "Home", - icon: , - active: currentPath === "/home", - onClick: () => navigate("/home"), - }, - // Configuration - { - key: "configuration", - label: "Configuration", - icon: , - children: [ - { - key: "configuration-clusters", - label: "Clusters", - icon: , - active: currentPath === "/configuration/clusters", - onClick: () => navigate("/configuration/clusters"), - }, - { - key: "configuration-registries", - label: "Registries", - icon: , - active: currentPath === "/configuration/registries", - onClick: () => navigate("/configuration/registries"), - }, - ], - }, - // Monitoring - 监控资源状态 - { - key: "monitoring", - label: "Monitoring", - icon: , - children: [ - { - key: "monitoring-clusters", - label: "Clusters", - icon: , - active: currentPath === "/monitoring/clusters", - onClick: () => navigate("/monitoring/clusters"), - }, - ], - }, - // Artifact - 浏览和部署制品 - { - key: "artifact", - label: "Artifact", - icon: , - children: [ - { - key: "artifact-registries", - label: "Registries", - icon: , - active: currentPath === "/artifact/registries", - onClick: () => navigate("/artifact/registries"), - }, - { - key: "artifact-instances", - label: "Instances", - icon: , - active: currentPath === "/artifact/instances", - onClick: () => navigate("/artifact/instances"), - }, - ], - }, -]; + navigate: (path: string) => void, + user?: User | null +): NavItem[] => { + const canAccess = (path: string) => (user ? canAccessRoute(path, user) : true); + const items: NavItem[] = [ + { + key: "home", + label: "Home", + icon: , + active: currentPath === "/home", + onClick: () => navigate("/home"), + }, + { + key: "artifact-registries", + label: "Launch Instance", + icon: , + active: currentPath === "/artifact/registries", + onClick: () => navigate("/artifact/registries"), + }, + { + key: "artifact-instances", + label: "Instances", + icon: , + active: currentPath === "/artifact/instances", + onClick: () => navigate("/artifact/instances"), + }, + { + key: "monitoring-clusters", + label: "Cluster Monitoring", + icon: , + active: currentPath === "/monitoring/clusters", + onClick: () => navigate("/monitoring/clusters"), + }, + { + key: "setup", + label: "Setup", + icon: , + children: [ + { + key: "configuration-clusters", + label: "Clusters", + icon: , + active: currentPath === "/configuration/clusters", + onClick: () => navigate("/configuration/clusters"), + }, + { + key: "configuration-registries", + label: "Registries", + icon: , + active: currentPath === "/configuration/registries", + onClick: () => navigate("/configuration/registries"), + }, + { + key: "configuration-users", + label: "Users", + icon: , + active: currentPath === "/configuration/users", + onClick: () => navigate("/configuration/users"), + }, + ], + }, + ]; + + return items + .map((item) => ({ + ...item, + children: item.children?.filter((child) => { + const routePath = routePathByKey[child.key]; + return routePath ? canAccess(routePath) : true; + }), + })) + .filter((item) => { + const routePath = routePathByKey[item.key]; + if (routePath && !canAccess(routePath)) { + return false; + } + return !item.children || item.children.length > 0; + }); +}; + +const routePathByKey: Record = { + home: "/home", + "artifact-registries": "/artifact/registries", + "artifact-instances": "/artifact/instances", + "monitoring-clusters": "/monitoring/clusters", + "configuration-clusters": "/configuration/clusters", + "configuration-registries": "/configuration/registries", + "configuration-users": "/configuration/users", +}; /** * Get page header info based on current path @@ -110,21 +133,22 @@ export const getNavItems = ( */ export const getPageInfo = (pathname: string): PageInfo => { if (pathname === "/artifact/registries") { - return { title: "Artifact Browser", icon: }; + return { title: "Launch Instance", icon: }; } if (pathname === "/artifact/instances") { - return { title: "Artifact - Instances", icon: }; + return { title: "Instances", icon: }; } if (pathname === "/configuration/clusters") { - return { title: "Configuration - Clusters", icon: }; + return { title: "Setup - Clusters", icon: }; } if (pathname === "/configuration/registries") { - return { title: "Configuration - Registries", icon: }; + return { title: "Setup - Registries", icon: }; + } + if (pathname === "/configuration/users") { + return { title: "Setup - Users", icon: }; } if (pathname === "/monitoring/clusters") { return { title: "Monitoring - Clusters", icon: }; } return { title: "OCDP Platform", icon: }; }; - - diff --git a/frontend/src/app/providers/AuthContext.ts b/frontend/src/app/providers/AuthContext.ts index 627f005..1367928 100644 --- a/frontend/src/app/providers/AuthContext.ts +++ b/frontend/src/app/providers/AuthContext.ts @@ -6,18 +6,32 @@ import { createContext } from "react"; import type { AuthResponse } from "@/api"; +export type UserRole = "admin" | "user" | string; + export interface User { + userId?: string; username: string; - role?: string; + role: UserRole; + workspaceId?: string; + workspaceName?: string; + namespace?: string; + defaultClusterId?: string; + quotaCpu?: string; + quotaMemory?: string; + quotaGpu?: string; + quotaGpuMemory?: string; + permissions: string[]; + permissionVersion?: string | number; } export interface AuthContextType { token: string | null; user: User | null; isAuthenticated: boolean; + isHydratingUser: boolean; login: (response: AuthResponse) => void; logout: () => void; + refreshUser: () => Promise; } export const AuthContext = createContext(undefined); - diff --git a/frontend/src/app/providers/AuthProvider.tsx b/frontend/src/app/providers/AuthProvider.tsx index 6ad4c5f..d4f236d 100644 --- a/frontend/src/app/providers/AuthProvider.tsx +++ b/frontend/src/app/providers/AuthProvider.tsx @@ -6,8 +6,10 @@ import { useState, useEffect } from "react"; import type { ReactNode } from "react"; import type { AuthResponse } from "@/api"; -import { setAuthToken } from "@/api"; +import { AXIOS_INSTANCE, setAuthToken } from "@/api"; +import { globalCache } from "@/shared/services/artifact-cache"; import { AuthContext, type User } from "./AuthContext"; +import { normalizeUser } from "./auth-model"; interface AuthProviderProps { children: ReactNode; @@ -19,15 +21,66 @@ interface AuthProviderProps { * Manages authentication state and provides auth context */ export const AuthProvider = ({ children, devMode = false }: AuthProviderProps) => { - const [token, setToken] = useState(devMode ? "dev-token" : null); - const [user, setUser] = useState(null); + const [token, setToken] = useState(() => { + if (devMode) return "dev-token"; + return localStorage.getItem("access_token"); + }); + const [user, setUser] = useState(() => { + if (devMode) return null; + const storedUser = localStorage.getItem("user"); + if (!storedUser) return null; + try { + return normalizeUser(JSON.parse(storedUser)); + } catch { + localStorage.removeItem("user"); + return null; + } + }); + const [isHydratingUser, setIsHydratingUser] = useState(false); + + if (token) { + setAuthToken(token); + } + + const persistUser = (nextUser: User) => { + localStorage.setItem("user", JSON.stringify(nextUser)); + setUser(nextUser); + }; + + const refreshUser = async () => { + const activeToken = localStorage.getItem("access_token"); + if (!activeToken) { + return; + } + + setIsHydratingUser(true); + try { + setAuthToken(activeToken); + const response = await AXIOS_INSTANCE.get("/auth/me"); + const nextUser = normalizeUser(response.data as Record, user); + persistUser(nextUser); + } catch (error: any) { + if (error?.response?.status === 401) { + logout(); + } else if (error?.response?.status !== 404) { + console.info("Unable to hydrate user profile from /auth/me:", error); + } + } finally { + setIsHydratingUser(false); + } + }; // Initialize: read token and user from localStorage useEffect(() => { if (devMode) { const devUser: User = { + userId: "dev-user", username: "dev-user", role: "admin", + workspaceId: "dev-workspace", + workspaceName: "Development", + namespace: "ocdp-ws-development", + permissions: ["*"], }; localStorage.setItem("access_token", "dev-token"); localStorage.setItem("user", JSON.stringify(devUser)); @@ -37,18 +90,15 @@ export const AuthProvider = ({ children, devMode = false }: AuthProviderProps) = } const storedToken = localStorage.getItem("access_token"); - const storedUser = localStorage.getItem("user"); if (storedToken) { setToken(storedToken); + setAuthToken(storedToken); } - if (storedUser) { - try { - setUser(JSON.parse(storedUser)); - } catch (e) { - console.error("Failed to parse stored user:", e); - } + if (storedToken) { + void refreshUser(); } + // eslint-disable-next-line react-hooks/exhaustive-deps }, [devMode]); // Sync token changes to axios headers @@ -65,14 +115,12 @@ export const AuthProvider = ({ children, devMode = false }: AuthProviderProps) = localStorage.setItem("access_token", accessToken); localStorage.setItem("refresh_token", refreshToken); - const user: User = { - username: response.username || "", - role: "user", // 后端暂未返回 role,默认为 user - }; - localStorage.setItem("user", JSON.stringify(user)); + const nextUser = normalizeUser(response as Record); setToken(accessToken); - setUser(user); + setAuthToken(accessToken); + persistUser(nextUser); + void refreshUser(); }; // Handle logout @@ -83,6 +131,7 @@ export const AuthProvider = ({ children, devMode = false }: AuthProviderProps) = setToken(null); setUser(null); setAuthToken(null); + globalCache.clearAll(); }; return ( @@ -91,8 +140,10 @@ export const AuthProvider = ({ children, devMode = false }: AuthProviderProps) = token, user, isAuthenticated: !!token, + isHydratingUser, login, logout, + refreshUser, }} > {children} diff --git a/frontend/src/app/providers/auth-model.ts b/frontend/src/app/providers/auth-model.ts new file mode 100644 index 0000000..b40b6bb --- /dev/null +++ b/frontend/src/app/providers/auth-model.ts @@ -0,0 +1,149 @@ +import type { AuthResponse, UserResponse } from "@/api"; +import type { User } from "./AuthContext"; + +type AuthLike = Partial & Record; + +export const DEFAULT_USER_PERMISSIONS = [ + "home:view", + "configuration:clusters:manage_own", + "configuration:registries:manage_own", + "artifact:registries:view", + "artifact:instances:manage_own", +]; + +const ADMIN_ROLE = "admin"; + +const asString = (value: unknown): string | undefined => + typeof value === "string" && value.trim() ? value.trim() : undefined; + +const asStringArray = (value: unknown): string[] => + Array.isArray(value) + ? value.filter((item): item is string => typeof item === "string" && item.trim().length > 0) + : []; + +export const normalizeUser = (source: AuthLike, fallback?: User | null): User => { + const role = asString(source.role) ?? fallback?.role ?? "user"; + const permissions = asStringArray(source.permissions); + + return { + userId: asString(source.userId) ?? asString(source.id) ?? fallback?.userId, + username: asString(source.username) ?? fallback?.username ?? "", + role, + workspaceId: asString(source.workspaceId) ?? fallback?.workspaceId, + workspaceName: asString(source.workspaceName) ?? fallback?.workspaceName, + namespace: asString(source.namespace) ?? fallback?.namespace, + defaultClusterId: asString(source.defaultClusterId) ?? fallback?.defaultClusterId, + quotaCpu: asString(source.quotaCpu) ?? fallback?.quotaCpu, + quotaMemory: asString(source.quotaMemory) ?? fallback?.quotaMemory, + quotaGpu: asString(source.quotaGpu) ?? fallback?.quotaGpu, + quotaGpuMemory: asString(source.quotaGpuMemory) ?? fallback?.quotaGpuMemory, + permissions: permissions.length > 0 ? permissions : fallback?.permissions ?? DEFAULT_USER_PERMISSIONS, + permissionVersion: + typeof source.permissionVersion === "string" || typeof source.permissionVersion === "number" + ? source.permissionVersion + : fallback?.permissionVersion, + }; +}; + +export const isAdminUser = (user: User | null | undefined): boolean => + user?.role?.toLowerCase() === ADMIN_ROLE; + +export const hasPermission = ( + user: User | null | undefined, + permission: string, + fallbackAllowed = true +): boolean => { + if (!user) { + return false; + } + if (isAdminUser(user)) { + return true; + } + if (user.permissions.length === 0) { + return fallbackAllowed; + } + return user.permissions.includes(permission) || user.permissions.includes("*"); +}; + +export const canAccessRoute = (pathname: string, user: User | null | undefined): boolean => { + if (!user) { + return false; + } + + if (pathname === "/admin" || pathname.startsWith("/admin/")) return isAdminUser(user); + if (pathname === "/configuration/users" || pathname === "/configuration/workspaces") { + return isAdminUser(user); + } + + if (pathname === "/home") return hasPermission(user, "home:view"); + if (pathname === "/configuration/clusters") { + return ( + hasPermission(user, "configuration:clusters:manage", false) || + hasPermission(user, "configuration:clusters:manage_own") + ); + } + if (pathname === "/configuration/registries") { + return ( + hasPermission(user, "configuration:registries:manage", false) || + hasPermission(user, "configuration:registries:manage_own") + ); + } + if (pathname === "/artifact/registries") return hasPermission(user, "artifact:registries:view"); + if (pathname === "/artifact/instances") { + return ( + hasPermission(user, "artifact:instances:manage", false) || + hasPermission(user, "artifact:instances:manage_own") + ); + } + if (pathname === "/monitoring/clusters") { + return hasPermission(user, "monitoring:clusters:view", isAdminUser(user)); + } + + return true; +}; + +export type ResourceVisibility = "private" | "workspace_shared" | "global_shared" | string; + +export type ResourceWithAccess = { + visibility?: ResourceVisibility; + ownerId?: string; + allowedActions?: string[]; +}; + +export const canUseResourceAction = ( + resource: ResourceWithAccess, + action: "view" | "create" | "update" | "delete" | "test" | "launch", + user: User | null | undefined +): boolean => { + if (!user) { + return false; + } + if (isAdminUser(user)) { + return true; + } + if (Array.isArray(resource.allowedActions) && resource.allowedActions.length > 0) { + const allowedActions = resource.allowedActions.map((allowedAction) => allowedAction.toLowerCase()); + const aliases = actionAliases[action] ?? [action]; + return allowedActions.includes("*") || aliases.some((alias) => allowedActions.includes(alias)); + } + if (!resource.ownerId) { + return true; + } + return resource.ownerId === user.userId; +}; + +export const getVisibilityLabel = (visibility?: ResourceVisibility): string => { + if (visibility === "workspace_shared") return "Workspace"; + if (visibility === "global_shared") return "Global"; + if (visibility === "private") return "Private"; + return visibility ? visibility.replace(/_/g, " ") : "Private"; +}; + +const actionAliases: Record = { + view: ["view", "read"], + create: ["create", "add"], + update: ["update", "edit", "manage"], + delete: ["delete", "remove", "manage"], + test: ["test", "health", "manage"], + launch: ["launch", "deploy"], +}; diff --git a/frontend/src/app/routes/AccessDeniedPage.tsx b/frontend/src/app/routes/AccessDeniedPage.tsx new file mode 100644 index 0000000..57392e9 --- /dev/null +++ b/frontend/src/app/routes/AccessDeniedPage.tsx @@ -0,0 +1,24 @@ +import { ShieldAlert } from "lucide-react"; +import { Button } from "@/shared/components"; + +interface AccessDeniedPageProps { + onBackHome: () => void; +} + +export const AccessDeniedPage = ({ onBackHome }: AccessDeniedPageProps) => ( +
+
+ +
+

Access denied

+

+ Your current role or workspace permissions do not allow this page. If access was just granted, + sign out and sign back in to refresh the permission version. +

+
+ +
+
+); diff --git a/frontend/src/app/routes/AppRoutes.tsx b/frontend/src/app/routes/AppRoutes.tsx index f191b70..b4e11c0 100644 --- a/frontend/src/app/routes/AppRoutes.tsx +++ b/frontend/src/app/routes/AppRoutes.tsx @@ -4,17 +4,22 @@ */ import { Routes, Route, Navigate } from "react-router-dom"; +import type { ReactNode } from "react"; import { ProtectedRoute } from "./RouteGuard"; +import { AccessDeniedPage } from "./AccessDeniedPage"; import AppShell from "@/shared/components/layout/AppShell"; import { getPageInfo, type NavItem } from "../constants/navigation"; -import { useLocation } from "react-router-dom"; +import { useLocation, useNavigate } from "react-router-dom"; import type { AuthResponse } from "@/api"; +import type { User } from "../providers/AuthContext"; +import { canAccessRoute } from "../providers/auth-model"; // Feature pages import AuthPage from "@/features/auth/pages/AuthPage"; import HomePage from "@/features/home/pages/HomePage"; import ClusterConfigPage from "@/features/configuration/clusters/pages/ClusterConfigPage"; import RegistryConfigPage from "@/features/configuration/registries/pages/RegistryConfigPage"; +import UserManagementPage from "@/features/configuration/users/pages/UserManagementPage"; import ArtifactBrowserPage from "@/features/artifact/registries/pages/ArtifactBrowserPage"; import InstancesManagementPage from "@/features/artifact/instances/pages/InstancesManagementPage"; import MonitoringClustersPage from "@/features/monitoring/clusters/pages/MonitoringClustersPage"; @@ -23,6 +28,7 @@ import { ApiTest } from "@/components/ApiTest"; interface AppRoutesProps { isAuthenticated: boolean; userName?: string; + user: User | null; navItems: NavItem[]; onLogin: (tokens: AuthResponse) => void; onLogout: () => void; @@ -34,12 +40,31 @@ interface AppRoutesProps { export const AppRoutes = ({ isAuthenticated, userName = "User", + user, navItems, onLogin, onLogout, }: AppRoutesProps) => { const location = useLocation(); + const navigate = useNavigate(); const pageInfo = getPageInfo(location.pathname); + const shell = (children: ReactNode) => ( + + {children} + + ); + const protectedPage = (path: string, children: ReactNode) => ( + + {shell(children)} + + ); return ( @@ -58,102 +83,54 @@ export const AppRoutes = ({ {/* Protected routes - wrapped in AppShell */} - - - - - } + element={protectedPage("/home", )} /> - - - - - } + element={protectedPage("/configuration/clusters", )} /> - - - - - } + element={protectedPage("/configuration/registries", )} /> - - - - - } + element={protectedPage("/artifact/registries", )} /> - - - - - } + element={protectedPage("/artifact/instances", )} /> )} + /> + + navigate("/home")} />)} + /> + + )} + /> + + navigate("/home")} />)} + /> + + - - - + {shell( navigate("/home")} />)} } /> @@ -178,5 +155,3 @@ export const AppRoutes = ({ ); }; - - diff --git a/frontend/src/app/routes/RouteGuard.tsx b/frontend/src/app/routes/RouteGuard.tsx index 8aaf42a..abb71c6 100644 --- a/frontend/src/app/routes/RouteGuard.tsx +++ b/frontend/src/app/routes/RouteGuard.tsx @@ -5,9 +5,11 @@ import { Navigate } from "react-router-dom"; import type { ReactNode } from "react"; +import type { User } from "../providers/AuthContext"; interface RouteGuardProps { isAuthenticated: boolean; + isAllowed?: boolean; redirectTo?: string; children: ReactNode; } @@ -17,11 +19,16 @@ interface RouteGuardProps { * Redirects to auth page if not authenticated */ export const ProtectedRoute = ({ - isAuthenticated, + isAuthenticated, + isAllowed = true, redirectTo = "/", children }: RouteGuardProps) => { - return isAuthenticated ? <>{children} : ; + if (!isAuthenticated) { + return ; + } + + return isAllowed ? <>{children} : ; }; /** @@ -36,4 +43,6 @@ export const PublicRoute = ({ return !isAuthenticated ? <>{children} : ; }; +export const canUseRoute = (user: User | null, predicate?: (user: User) => boolean): boolean => + !predicate || (user ? predicate(user) : false); diff --git a/frontend/src/core/types/index.ts b/frontend/src/core/types/index.ts index 8d3788b..c98fa61 100644 --- a/frontend/src/core/types/index.ts +++ b/frontend/src/core/types/index.ts @@ -71,11 +71,66 @@ import type { ClusterMonitoring, ClusterMonitoringStatus, NodeMetricsResponse } export type NodeMetrics = NodeMetricsResponse; +export interface UserResourceUsage { + userId?: string; + userName?: string; + username?: string; + namespace?: string; + cpuUsed?: string; + usedCpu?: string; + cpuRequest?: string; + cpuLimit?: string; + memoryUsed?: string; + usedMemory?: string; + memoryRequest?: string; + memoryLimit?: string; + gpuUsed?: number; + usedGpu?: number; + gpuAllocated?: number; + gpuAllocation?: number; + gpuMemoryUsed?: string | number; + usedGpuMemory?: string | number; + gpuMemUsed?: string | number; + gpuMemoryAllocated?: string | number; + gpuMemAllocated?: string | number; + podCount?: number; + instanceCount?: number; + cpuRequests?: string; + cpuLimits?: string; + memoryRequests?: string; + memoryLimits?: string; + gpuRequests?: number; + gpuLimits?: number; + gpuMemoryRequestsMb?: number; + gpuMemoryLimitsMb?: number; +} + export interface ClusterMetrics extends ClusterMonitoring { /** Internal UI identifier (legacy) */ id?: string; nodes?: NodeMetrics[]; status?: ClusterMonitoringStatus | 'warning' | 'error'; + allocatedGpu?: number; + allocatedGpuMemoryMb?: number; + allocatedGpuMemoryMB?: number; + gpuMemoryRequestsMb?: number; + gpuMemoryLimitsMb?: number; + gpuAllocated?: number; + gpuAllocation?: number; + cpuRequests?: string; + cpuLimits?: string; + memoryRequests?: string; + memoryLimits?: string; + totalGpuMemory?: string | number; + usedGpuMemory?: string | number; + gpuMemoryUsed?: string | number; + totalGpuMem?: string | number; + usedGpuMem?: string | number; + userResources?: UserResourceUsage[]; + resourceUsageByUser?: UserResourceUsage[]; + userResourceUsage?: UserResourceUsage[]; + resourcesByUser?: UserResourceUsage[]; + userResourceRows?: UserResourceUsage[]; } // ==================== Common Types ==================== diff --git a/frontend/src/features/artifact/instances/components/DiagnosticsModal.tsx b/frontend/src/features/artifact/instances/components/DiagnosticsModal.tsx new file mode 100644 index 0000000..808b00b --- /dev/null +++ b/frontend/src/features/artifact/instances/components/DiagnosticsModal.tsx @@ -0,0 +1,358 @@ +import React, { useEffect, useMemo, useRef, useState } from "react"; +import { Activity, AlertTriangle, Box, Copy, FileText, RotateCw, Server, Terminal, X } from "lucide-react"; +import { getInstanceDiagnostics, streamInstanceLogs, type InstanceDiagnosticsResponse, type InstanceResponse } from "@/api"; +import { Button, Badge, LoadingState } from "@/shared/components"; +import { formatApiError } from "@/shared/utils"; +import { useToast } from "@/shared"; + +type TabKey = "summary" | "events" | "logs"; + +interface DiagnosticsModalProps { + instance: InstanceResponse; + onClose: () => void; +} + +export const DiagnosticsModal: React.FC = ({ instance, onClose }) => { + const { success, error: toastError } = useToast(); + const [data, setData] = useState(null); + const [loading, setLoading] = useState(true); + const [activeTab, setActiveTab] = useState("summary"); + const [streamingKey, setStreamingKey] = useState(null); + const [streamingLines, setStreamingLines] = useState([]); + const streamCtrlRef = useRef(null); + + const loadDiagnostics = async () => { + if (!instance.clusterId || !instance.id) return; + setLoading(true); + try { + setData(await getInstanceDiagnostics({ clusterId: instance.clusterId, instanceId: instance.id }, { tailLines: 300 })); + } catch (err) { + toastError(formatApiError(err) || "Failed to load diagnostics"); + } finally { + setLoading(false); + } + }; + + const startStream = (pod: string, container: string) => { + // Stop any existing stream first + stopStream(); + const key = `${pod}/${container}`; + setStreamingKey(key); + setStreamingLines([]); + const ctrl = streamInstanceLogs( + instance.clusterId!, + instance.id!, + pod, + container, + 200, + (line) => setStreamingLines((prev) => [...prev, line]), + () => { setStreamingKey(null); }, + (err) => { toastError(formatApiError(err) || "Stream error"); setStreamingKey(null); }, + ); + streamCtrlRef.current = ctrl; + }; + + const stopStream = () => { + if (streamCtrlRef.current) { + streamCtrlRef.current.abort(); + streamCtrlRef.current = null; + } + setStreamingKey(null); + setStreamingLines([]); + }; + + useEffect(() => { + void loadDiagnostics(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [instance.clusterId, instance.id]); + + // Cleanup stream on unmount + useEffect(() => { + return () => { + if (streamCtrlRef.current) { + streamCtrlRef.current.abort(); + } + }; + }, []); + + const combinedLogs = useMemo( + () => + (data?.logs ?? []) + .map((entry) => `# ${entry.pod || "pod"} / ${entry.container || "container"}\n${entry.error || entry.log || ""}`) + .join("\n\n"), + [data?.logs] + ); + + const copyLogs = async () => { + await navigator.clipboard.writeText(combinedLogs); + success("Logs copied"); + }; + + return ( +
+
+
+
+
+ + Runtime diagnostics +
+

{instance.name}

+

+ {instance.namespace} · {data?.collectedAt ? new Date(data.collectedAt).toLocaleString() : "live Kubernetes data"} +

+
+
+ {streamingKey ? ( + + ) : ( + + )} + +
+
+ +
+
+ setActiveTab("summary")} icon={Box} label="Describe" /> + setActiveTab("events")} icon={AlertTriangle} label="Events" /> + setActiveTab("logs")} icon={Terminal} label="Pod Logs" /> +
+
+ +
+ {loading ? ( + + ) : !data ? ( +
+ Diagnostics data is not available. +
+ ) : activeTab === "summary" ? ( + + ) : activeTab === "events" ? ( + + ) : ( + + )} +
+
+
+ ); +}; + +const TabButton: React.FC<{ + active: boolean; + icon: React.ComponentType<{ className?: string }>; + label: string; + onClick: () => void; +}> = ({ active, icon: Icon, label, onClick }) => ( + +); + +const SummaryTab = ({ data }: { data: InstanceDiagnosticsResponse }) => ( +
+
+ + + +
+
+

Pods

+ {(data.pods ?? []).length === 0 ? ( + + ) : ( + (data.pods ?? []).map((pod) => ( +
+
+
+

{pod.name}

+

+ {pod.nodeName || "unscheduled"} · podIP {pod.podIp || "-"} · restarts {pod.restartCount ?? 0} +

+
+ + {pod.phase || "Unknown"} + +
+
+ {(pod.containers ?? []).map((container) => ( +
+
+ {container.name} + + {container.state || "unknown"} + +
+

+ {container.image} +

+ {(container.reason || container.message) && ( +

{container.reason || container.message}

+ )} +
+ ))} +
+
+ )) + )} +
+
+

Services

+ {(data.services ?? []).length === 0 ? : null} + {(data.services ?? []).map((svc) => ( +
+
+ {svc.name} + {svc.type} +
+

ClusterIP {svc.clusterIP || "-"}

+
+ {(svc.ports ?? []).map((port) => ( + + {port.name || "port"} {port.port}:{port.targetPort} + + ))} +
+
+ ))} +
+
+); + +const EventsTab = ({ data }: { data: InstanceDiagnosticsResponse }) => ( +
+ {(data.events ?? []).length === 0 ? : null} + {(data.events ?? []).map((event, index) => ( +
+
+
+ {event.type || "Normal"} + {event.reason} +
+ {event.lastTimestamp ? new Date(event.lastTimestamp).toLocaleString() : ""} +
+

{event.message}

+

+ {event.involvedKind}/{event.involvedName} · count {event.count ?? 1} +

+
+ ))} +
+); + +const LogsTab = ({ + data, + combinedLogs, + onCopy, + streamingKey, + streamingLines, + onStartStream, + onStopStream, +}: { + data: InstanceDiagnosticsResponse; + combinedLogs: string; + onCopy: () => void; + streamingKey: string | null; + streamingLines: string[]; + onStartStream: (pod: string, container: string) => void; + onStopStream: () => void; +}) => { + const preRef = useRef(null); + + useEffect(() => { + if (streamingKey && preRef.current) { + preRef.current.scrollTop = preRef.current.scrollHeight; + } + }, [streamingLines, streamingKey]); + + return ( +
+
+ +
+ {(data.logs ?? []).length === 0 ? : null} + {(data.logs ?? []).map((entry) => { + const entryKey = `${entry.pod}/${entry.container}`; + const isStreaming = streamingKey === entryKey; + return ( +
+
+ + {entryKey} + {isStreaming && ( + + + Live + + )} + {isStreaming ? ( + + ) : ( + + )} +
+
+              {isStreaming
+                ? streamingLines.join("\n") || "Waiting for log data..."
+                : entry.error || entry.log || ""}
+            
+
+ ); + })} +
+ ); +}; + +const MetricCard = ({ icon: Icon, label, value }: { icon: React.ComponentType<{ className?: string }>; label: string; value: number }) => ( +
+
+ {label} + +
+

{value}

+
+); + +const EmptyLine = ({ text }: { text: string }) => ( +
{text}
+); diff --git a/frontend/src/features/artifact/instances/components/EntriesModal.tsx b/frontend/src/features/artifact/instances/components/EntriesModal.tsx index f567146..6e70d3b 100644 --- a/frontend/src/features/artifact/instances/components/EntriesModal.tsx +++ b/frontend/src/features/artifact/instances/components/EntriesModal.tsx @@ -321,7 +321,7 @@ export const EntriesModal: React.FC = ({ instance, onClose }) kubernetes: { color: "bg-green-600/20 text-green-400 border-green-500/30", label: "Live from Kubernetes" }, manifest: { color: "bg-blue-600/20 text-blue-400 border-blue-500/30", label: "From Helm Manifest" }, notes: { color: "bg-yellow-600/20 text-yellow-400 border-yellow-500/30", label: "From Helm Notes" }, - none: { color: "bg-gray-600/20 text-gray-400 border-gray-500/30", label: "No Data Available" }, + none: { color: "bg-slate-200/20 text-slate-500 border-gray-500/30", label: "No Data Available" }, }; const badge = badges[source as keyof typeof badges] || badges.none; @@ -335,11 +335,11 @@ export const EntriesModal: React.FC = ({ instance, onClose }) }; const renderService = (service: ServiceEntry, index: number) => ( -
+
-

{service.name || `Service ${index + 1}`}

-

Type: {service.type || 'Unknown'}

+

{service.name || `Service ${index + 1}`}

+

Type: {service.type || 'Unknown'}

{service.type || 'Unknown'} @@ -349,18 +349,18 @@ export const EntriesModal: React.FC = ({ instance, onClose })
{/* Cluster IP */} {service.cluster_ip && ( -
- Cluster IP: +
+ Cluster IP:
- {service.cluster_ip} + {service.cluster_ip}
@@ -369,10 +369,10 @@ export const EntriesModal: React.FC = ({ instance, onClose }) {/* Ports */} {service.ports && service.ports.length > 0 && service.ports.map((port, idx) => ( -
- {port.name || `Port ${idx + 1}`}: +
+ {port.name || `Port ${idx + 1}`}:
- + {port.port} → {port.target_port} {port.protocol || 'TCP'} {port.node_port && ` (NodePort: ${port.node_port})`} @@ -386,7 +386,7 @@ export const EntriesModal: React.FC = ({ instance, onClose })

LoadBalancer Entries:

{service.loadBalancer.ingress.map((ing, idx) => (
- + {ing.ip || ing.hostname}
@@ -396,19 +396,19 @@ export const EntriesModal: React.FC = ({ instance, onClose }) href={`http://${ing.ip}:${service.ports?.[0]?.port || 80}`} target="_blank" rel="noopener noreferrer" - className="p-1 hover:bg-gray-700 rounded transition" + className="p-1 hover:bg-slate-100 rounded transition" title="Open in browser" > @@ -423,12 +423,12 @@ export const EntriesModal: React.FC = ({ instance, onClose }) ); const renderIngress = (ingress: IngressEntry, index: number) => ( -
+
-

{ingress.name || `Ingress ${index + 1}`}

+

{ingress.name || `Ingress ${index + 1}`}

{ingress.class_name && ( -

Class: {ingress.class_name}

+

Class: {ingress.class_name}

)}
@@ -436,30 +436,30 @@ export const EntriesModal: React.FC = ({ instance, onClose })
{ingress.rules?.map((rule, ruleIdx) => ( -
+
{(() => { const host = rule.host; if (!host) return null; return (
- {host} + {host}
@@ -470,7 +470,7 @@ export const EntriesModal: React.FC = ({ instance, onClose }) const serviceName = path.backend?.service?.name || "service"; const servicePort = path.backend?.service?.port ?? "-"; return ( -
+
• {path.path || '/'} → {serviceName}:{servicePort}
); @@ -489,20 +489,20 @@ export const EntriesModal: React.FC = ({ instance, onClose }) return (
-
+
{/* Header */} -
+
-

Instance Entries

-

+

Instance Entries

+

{instance.name} ({instance.namespace})

@@ -511,14 +511,14 @@ export const EntriesModal: React.FC = ({ instance, onClose }) {loading ? (
- Loading entries... + Loading entries...
) : error ? (

{error}

@@ -527,7 +527,7 @@ export const EntriesModal: React.FC = ({ instance, onClose })
{/* Source Badge */}
-

Data Source:

+

Data Source:

{getSourceBadge(entries.source)}
@@ -536,7 +536,7 @@ export const EntriesModal: React.FC = ({ instance, onClose })
-

+

Services ({entries.services.length})

@@ -551,7 +551,7 @@ export const EntriesModal: React.FC = ({ instance, onClose })
-

+

Ingresses ({entries.ingresses.length})

@@ -564,9 +564,9 @@ export const EntriesModal: React.FC = ({ instance, onClose }) {/* Helm Notes (as fallback) */} {entries.notes && entries.source === "notes" && (
-

Helm Notes

-
-
+                  

Helm Notes

+
+
                       {entries.notes}
                     
@@ -579,8 +579,8 @@ export const EntriesModal: React.FC = ({ instance, onClose }) !entries.notes && (
-

No entries found for this instance

-

Data source: {entries.source || 'unknown'}

+

No entries found for this instance

+

Data source: {entries.source || 'unknown'}

)}
@@ -588,10 +588,10 @@ export const EntriesModal: React.FC = ({ instance, onClose })
{/* Footer */} -
+
diff --git a/frontend/src/features/artifact/instances/components/InstanceCard.tsx b/frontend/src/features/artifact/instances/components/InstanceCard.tsx index 2ab0565..18ef703 100644 --- a/frontend/src/features/artifact/instances/components/InstanceCard.tsx +++ b/frontend/src/features/artifact/instances/components/InstanceCard.tsx @@ -1,326 +1,284 @@ /** - * Instance Card Component - * Display instance information with action buttons + * Instance Card Component — horizontal row layout + * Compact, readable, with inline scale controls and action buttons */ import React from "react"; import { - Package, - Settings, - StopCircle, - RefreshCw, - CheckCircle, - XCircle, - Clock, - Network, - Box, - Calendar, - GitBranch, - Layers, - AlertTriangle, - History, - HelpCircle, + Box, Settings, StopCircle, CheckCircle, XCircle, Clock, + Network, Activity, GitBranch, Layers, User, + AlertTriangle, HelpCircle, Minus, Plus, Loader2, } from "lucide-react"; -import type { InstanceResponse, InstanceStatus } from "@/api"; -import { INSTANCE_LAST_OPERATION, INSTANCE_STATUS } from "@/api"; +import type { InstanceResponse } from "@/api"; +import { scaleInstance } from "@/api"; +import { useToast } from "@/shared"; +import { formatApiError } from "@/shared/utils"; interface InstanceCardProps { instance: InstanceResponse; onModify: (instance: InstanceResponse) => void; onTerminate: (instance: InstanceResponse) => void; - onRefresh: (instance: InstanceResponse) => void; onViewEntries: (instance: InstanceResponse) => void; + onViewDiagnostics: (instance: InstanceResponse) => void; + onScale?: (instance: InstanceResponse) => void; } type StatusVisual = { icon: React.ComponentType<{ className?: string }>; color: string; bg: string; - glow: string; + border: string; label: string; - defaultReason: string; }; -const STATUS_INFO_MAP: Record = { - [INSTANCE_STATUS.deployed]: { +const STATUS_INFO_MAP: Record = { + deployed: { icon: CheckCircle, - color: "text-emerald-400", - bg: "bg-gradient-to-r from-emerald-500/20 to-green-500/20 border-emerald-500/40", - glow: "shadow-emerald-500/20", + color: "text-emerald-500", + bg: "bg-emerald-50", + border: "border-emerald-400", label: "Deployed", - defaultReason: "Deployment completed successfully.", }, - [INSTANCE_STATUS.failed]: { + failed: { icon: XCircle, - color: "text-rose-400", - bg: "bg-gradient-to-r from-rose-500/20 to-red-500/20 border-rose-500/40", - glow: "shadow-rose-500/20", + color: "text-rose-500", + bg: "bg-rose-50", + border: "border-rose-400", label: "Failed", - defaultReason: "Last operation reported a failure.", }, - [INSTANCE_STATUS["pending-install"]]: { + "pending-install": { icon: Clock, - color: "text-amber-400", - bg: "bg-gradient-to-r from-amber-500/20 to-yellow-500/20 border-amber-500/40", - glow: "shadow-amber-500/20", + color: "text-amber-500", + bg: "bg-amber-50", + border: "border-amber-400", label: "Pending Install", - defaultReason: "Installation is in progress.", }, - [INSTANCE_STATUS["pending-upgrade"]]: { + "pending-upgrade": { icon: Clock, - color: "text-amber-400", - bg: "bg-gradient-to-r from-amber-500/20 to-yellow-500/20 border-amber-500/40", - glow: "shadow-amber-500/20", + color: "text-amber-500", + bg: "bg-amber-50", + border: "border-amber-400", label: "Pending Upgrade", - defaultReason: "Upgrade is in progress.", }, - [INSTANCE_STATUS["pending-rollback"]]: { + "pending-rollback": { icon: Clock, - color: "text-amber-400", - bg: "bg-gradient-to-r from-amber-500/20 to-yellow-500/20 border-amber-500/40", - glow: "shadow-amber-500/20", + color: "text-amber-500", + bg: "bg-amber-50", + border: "border-amber-400", label: "Pending Rollback", - defaultReason: "Rollback is in progress.", }, - [INSTANCE_STATUS["pending-delete"]]: { + "pending-delete": { icon: Clock, - color: "text-orange-400", - bg: "bg-gradient-to-r from-orange-500/20 to-red-500/20 border-orange-500/40", - glow: "shadow-orange-500/20", + color: "text-orange-500", + bg: "bg-orange-50", + border: "border-orange-400", label: "Pending Delete", - defaultReason: "Deletion is in progress.", }, - [INSTANCE_STATUS.superseded]: { - icon: History, - color: "text-indigo-300", - bg: "bg-gradient-to-r from-indigo-500/20 to-purple-500/20 border-indigo-500/40", - glow: "shadow-indigo-500/20", + superseded: { + icon: Layers, + color: "text-indigo-400", + bg: "bg-indigo-50", + border: "border-indigo-300", label: "Superseded", - defaultReason: "A newer revision has replaced this instance.", }, - [INSTANCE_STATUS.uninstalled]: { + uninstalled: { icon: StopCircle, - color: "text-slate-300", - bg: "bg-gradient-to-r from-slate-500/20 to-gray-500/20 border-slate-500/40", - glow: "shadow-slate-500/20", + color: "text-slate-500", + bg: "bg-slate-50", + border: "border-slate-300", label: "Uninstalled", - defaultReason: "Instance has been removed from the cluster.", }, - [INSTANCE_STATUS.unknown]: { + unknown: { icon: HelpCircle, - color: "text-slate-300", - bg: "bg-gradient-to-r from-slate-500/20 to-gray-500/20 border-slate-500/40", - glow: "shadow-slate-500/20", + color: "text-slate-400", + bg: "bg-slate-50", + border: "border-slate-300", label: "Unknown", - defaultReason: "Awaiting next state update.", }, }; -const LAST_OPERATION_LABELS: Record = { - [INSTANCE_LAST_OPERATION.install]: "Install", - [INSTANCE_LAST_OPERATION.upgrade]: "Upgrade", - [INSTANCE_LAST_OPERATION.rollback]: "Rollback", - [INSTANCE_LAST_OPERATION.delete]: "Delete", - [INSTANCE_LAST_OPERATION.sync]: "Sync", -}; - -function toTitleCase(value: string): string { - return value - .split(/[\s-]+/) - .map((part) => part.charAt(0).toUpperCase() + part.slice(1)) - .join(" "); -} - export const InstanceCard: React.FC = ({ instance, onModify, onTerminate, - onRefresh, onViewEntries, + onViewDiagnostics, + onScale, }) => { - const normalizedStatus = (instance.status ?? INSTANCE_STATUS.unknown) as InstanceStatus; - const statusInfo = - STATUS_INFO_MAP[normalizedStatus] ?? STATUS_INFO_MAP[INSTANCE_STATUS.unknown]; + const [scaling, setScaling] = React.useState(false); + const { error: toastError } = useToast(); + + const statusKey = instance.status ?? "unknown"; + const statusInfo = STATUS_INFO_MAP[statusKey] ?? STATUS_INFO_MAP["unknown"]; const StatusIcon = statusInfo.icon; - const statusLabel = statusInfo.label.toUpperCase(); - const instanceName = instance.name || "Unnamed Instance"; - const repository = instance.repository || "unknown"; - const version = instance.version || "latest"; + + const instanceName = instance.name || "Unnamed"; + const chart = instance.chart || instance.repository || "—"; + const version = instance.version || "—"; const namespace = instance.namespace || "default"; - const revision = instance.revision ?? "-"; - const createdAtText = instance.createdAt - ? new Date(instance.createdAt).toLocaleDateString() - : "N/A"; + const revision = instance.revision ?? "—"; + const ownerLabel = ownerDisplayName(instance.ownerUsername, instance.ownerId); + + const currentReplicas: number = instance.replicas ?? 0; + const statusReason = - typeof instance.statusReason === "string" && instance.statusReason.trim().length > 0 + typeof instance.statusReason === "string" && instance.statusReason.trim() ? instance.statusReason.trim() - : statusInfo.defaultReason; - const rawOperation = - typeof instance.lastOperation === "string" ? instance.lastOperation.trim() : ""; - const lastOperationLabel = - rawOperation.length > 0 - ? LAST_OPERATION_LABELS[rawOperation] ?? toTitleCase(rawOperation) : null; + const lastError = - typeof instance.lastError === "string" ? instance.lastError.trim() : ""; + typeof instance.lastError === "string" && instance.lastError.trim() + ? instance.lastError.trim() + : ""; + + const canScale = instance.status === "deployed" || instance.status === "failed"; + + const handleScale = async (delta: number) => { + const newReplicas = Math.max(0, currentReplicas + delta); + if (newReplicas === currentReplicas) return; + if (!instance.clusterId || !instance.id) return; + + setScaling(true); + try { + const result = await scaleInstance(instance.clusterId, instance.id, { + replicas: newReplicas, + }); + onScale?.(result.instance ?? { ...instance, replicas: newReplicas }); + } catch (err) { + toastError(formatApiError(err) || "Scale failed"); + } finally { + setScaling(false); + } + }; return ( -
- {/* Decorative gradient overlay */} -
- - {/* Header with enhanced design */} -
-
-
- {/* Enhanced icon with glow effect */} -
- -
-
- -
-

- {instanceName} -

-
- -

- {repository} -

- - - {version} - -
-
-
+
+ {/* Left color bar (status) */} +
- {/* Enhanced Status Badge with glow */} -
- - - {statusLabel} - -
-
+ {/* Status icon + label */} +
+ + {statusInfo.label} +
-
- {statusReason} - {lastOperationLabel && ( - - Operation: {lastOperationLabel} + {/* Name + Chart info */} +
+
+

{instanceName}

+
+ + + {chart}:{version} - )} + + + {namespace} + + + + rev{revision} + + {ownerLabel && ( + + + {ownerLabel} + + )} +
- {/* Enhanced Content Grid */} -
-
- {/* Namespace */} -
-
- -

Namespace

-
-

- {namespace} + {/* Status message or error */} + {(statusReason || lastError) && ( +

+ {lastError ? ( +

+ + {lastError}

-
- - {/* Revision */} -
-
- -

Revision

-
-

- {revision} -

-
- - {/* Repository - Full Width */} -
-
- -

Repository

-
-

- {repository} -

-
- - {/* Launched Date - Full Width */} -
-
- -

Launched

-
-

- {createdAtText} -

-
+ ) : statusReason ? ( +

{statusReason}

+ ) : null}
+ )} - {lastError && ( -
-
- -
-
-

Last error

-

{lastError}

-
-
+ {/* Scale controls */} +
+ {canScale ? ( + <> + + + {currentReplicas} + + + + ) : ( + {currentReplicas} repl. )}
- {/* Enhanced Actions Bar */} -
-
-
- - - -
- -
- - - -
-
+ {/* Action buttons */} +
+ + + +
); }; + +const ownerDisplayName = (ownerUsername?: string, ownerId?: string): string => { + const username = ownerUsername?.trim(); + if (username) return username; + const id = ownerId?.trim(); + if (!id) return ""; + if (id.length <= 12) return id; + return `${id.slice(0, 8)}...${id.slice(-4)}`; +}; diff --git a/frontend/src/features/artifact/instances/components/ModifyModal.tsx b/frontend/src/features/artifact/instances/components/ModifyModal.tsx index d06df2f..780cf9f 100644 --- a/frontend/src/features/artifact/instances/components/ModifyModal.tsx +++ b/frontend/src/features/artifact/instances/components/ModifyModal.tsx @@ -5,21 +5,19 @@ */ import React, { useState, useEffect } from "react"; import { Settings } from "lucide-react"; +import { parse as parseYaml, stringify as stringifyYaml } from "yaml"; import type { InstanceResponse, UpdateInstanceRequest } from "@/api"; -import { getValuesSchema } from "@/api"; -import { - Modal, - Button, - FormField, - Input, +import { getInstance, getInstanceValuesDiff } from "@/api"; +import { + Modal, + Button, + FormField, + Input, Textarea, - Checkbox, ErrorState, LoadingState, Badge, - SchemaFormGenerator } from "@/shared/components"; -import type { JsonSchema } from "@/shared/components/form/SchemaFormGenerator"; interface ModifyModalProps { instance: InstanceResponse; @@ -35,77 +33,111 @@ export const ModifyModal: React.FC = ({ const [tag, setTag] = useState(""); const [description, setDescription] = useState(""); const [valuesYaml, setValuesYaml] = useState(""); - const [wait, setWait] = useState(true); - const [timeout, setTimeout_] = useState(300); const [loading, setLoading] = useState(false); const [error, setError] = useState(null); - - // Values Schema support - const [loadingSchema, setLoadingSchema] = useState(false); - const [valuesSchema, setValuesSchema] = useState(null); - const [inputMethod, setInputMethod] = useState<'form' | 'yaml'>('yaml'); - const [formValues, setFormValues] = useState>({}); + const [modifiedKeys, setModifiedKeys] = useState([]); - // Initialize with current values + // Values Diff support + const [showDiff, setShowDiff] = useState(false); + const [loadingDiff] = useState(false); + const [diffData, setDiffData] = useState<{ + current: Record; + defaults: Record; + } | null>(null); + const [diffError] = useState(null); + + // Fetch full Helm values (via values-diff API) and instance detail useEffect(() => { setTag(instance.version || ""); - setDescription(""); // InstanceResponse doesn't have description field - - // Parse existing values - if (instance.values) { - try { - const parsedValues = typeof instance.values === 'string' - ? JSON.parse(instance.values) - : instance.values; - setFormValues(parsedValues); - setValuesYaml(typeof parsedValues === 'object' ? JSON.stringify(parsedValues, null, 2) : String(parsedValues)); - } catch (err) { - console.error('[ModifyModal] Failed to parse existing values:', err); - setValuesYaml(String(instance.values) || ""); - } - } + setDescription(""); - // Load values schema - loadValuesSchema(); + const loadData = async () => { + if (instance.clusterId && instance.id) { + // Load values diff first — gives us the full current Helm values + try { + const data = await getInstanceValuesDiff(instance.clusterId, instance.id); + if (data?.current && Object.keys(data.current).length > 0) { + const currentYaml = stringifyYaml(data.current, { lineWidth: 0 }); + setValuesYaml(currentYaml); + setDiffData({ current: data.current, defaults: data.defaults ?? {} }); + } + } catch (err) { + console.error('[ModifyModal] Failed to load values diff:', err); + // Fallback: try instance detail + try { + const detail = await getInstance({ clusterId: instance.clusterId, instanceId: instance.id }); + if (detail.values && Object.keys(detail.values).length > 0) { + const y = stringifyYaml(detail.values, { lineWidth: 0 }); + setValuesYaml(y); + } + } catch (err2) { + console.error('[ModifyModal] Failed to load instance detail:', err2); + } + } + } + }; + + loadData(); }, [instance]); - const loadValuesSchema = async () => { - if (!instance.registryId || !instance.repository || !instance.version) { - setValuesSchema(null); - setInputMethod('yaml'); - return; - } - - setLoadingSchema(true); + // Recompute modified keys when valuesYaml or diffData changes + useEffect(() => { + if (!diffData?.defaults || !valuesYaml) return; try { - const schemaResponse = await getValuesSchema({ - registryId: instance.registryId, - repositoryName: instance.repository, - reference: instance.version, - }); - const normalizedSchema = extractJsonSchema(schemaResponse); - setValuesSchema(normalizedSchema); - - if (normalizedSchema) { - setInputMethod('form'); - console.log(`[ModifyModal] Loaded values schema with ${Object.keys(normalizedSchema.properties ?? {}).length} properties`); - } else { - setInputMethod('yaml'); - console.log('[ModifyModal] No values schema available, using YAML input'); - } - } catch (err) { - console.error('[ModifyModal] Failed to load values schema:', err); - setValuesSchema(null); - setInputMethod('yaml'); - } finally { - setLoadingSchema(false); - } + const current = parseYaml(valuesYaml); + const defaults = diffData.defaults; + const changed: string[] = []; + const walkKeys = (curr: any, def: any, prefix: string) => { + if (curr === null || curr === undefined) return; + if (typeof curr !== 'object') return; + for (const key of Object.keys(curr)) { + const fullKey = prefix ? `${prefix}.${key}` : key; + if (JSON.stringify(curr[key]) !== JSON.stringify(def?.[key])) { + changed.push(fullKey); + } + if (typeof curr[key] === 'object' && curr[key] !== null && !Array.isArray(curr[key])) { + walkKeys(curr[key], def?.[key] ?? {}, fullKey); + } + } + }; + walkKeys(current, defaults, ''); + setModifiedKeys(changed); + } catch { /* ignore parse errors */ } + }, [valuesYaml, diffData]); + + + const applyDefaults = () => { + if (!diffData?.defaults) return; + setValuesYaml(stringifyYaml(diffData.defaults, { lineWidth: 0 })); }; - const handleFormValuesChange = (values: Record) => { - setFormValues(values); - // Also update YAML representation - setValuesYaml(JSON.stringify(values, null, 2)); + /** + * Render a values object as YAML lines, bolding keys that differ from defaults. + */ + const renderDiffValues = ( + values: Record, + compare: Record, + ): React.ReactNode => { + const yaml = stringifyYaml(values); + const lines = yaml.split("\n"); + return lines.map((line, i) => { + // Extract the key name from a YAML line + const keyMatch = line.match(/^(\s*)([a-zA-Z_][\w-]*)\s*:/); + if (keyMatch) { + const key = keyMatch[2]; + const keyChanged = + compare[key] !== undefined && + JSON.stringify(values[key]) !== JSON.stringify(compare[key]); + if (keyChanged) { + return ( + + {keyMatch[1]}{key}:{line.slice(keyMatch[0].length)} + + ); + } + } + return {line}; + }); }; const handleSubmit = async (e: React.FormEvent) => { @@ -114,9 +146,13 @@ export const ModifyModal: React.FC = ({ setError(null); try { + if (valuesYaml.trim()) { + parseValuesYaml(valuesYaml); + } const payload: UpdateInstanceRequest = { version: tag && tag !== instance.version ? tag : undefined, - values: valuesYaml.trim() ? JSON.parse(valuesYaml) : undefined, + description: description.trim() || undefined, + valuesYaml: valuesYaml.trim() || undefined, }; if (!instance.clusterId || !instance.id) { @@ -128,8 +164,8 @@ export const ModifyModal: React.FC = ({ await onConfirm(instance.clusterId, instance.id, payload); onClose(); } catch (err: unknown) { - if (err instanceof SyntaxError) { - setError("Invalid JSON/YAML values. Please fix the configuration."); + if (err instanceof Error && err.message.includes("YAML")) { + setError(err.message); } else { setError((err as Error).message || "Failed to modify instance"); } @@ -144,7 +180,7 @@ export const ModifyModal: React.FC = ({ onClose={onClose} title={`Modify Instance - ${instance.name || "Unnamed"}`} icon={Settings} - iconColor="text-blue-400" + iconColor="text-blue-600" size="lg" footer={ <> @@ -175,15 +211,15 @@ export const ModifyModal: React.FC = ({ )} {/* Current Info */} -
-

- Current Version: {instance.version || "N/A"} +

+

+ Current Version: {instance.version || "N/A"}

-

- Cluster: {instance.clusterId || "N/A"} +

+ Cluster: {instance.clusterId || "N/A"}

-

- Repository: {instance.repository || "N/A"} +

+ Repository: {instance.repository || "N/A"}

@@ -212,115 +248,119 @@ export const ModifyModal: React.FC = ({ /> - {/* Values Configuration */} -
-
- - {valuesSchema?.properties && ( -
- - + {/* Current Values — directly editable as YAML */} +
+ +

+ Editing current deployed values. The full YAML is submitted so nested chart values stay intact. +

+ {modifiedKeys.length > 0 && ( +
+ Modified: + {modifiedKeys.map((k) => ( + + {k} + + ))} +
+ )} +