Merge pull request 'dev' (#1 ) from dev into main

Reviewed-on: #1
fix: clear tsbuildinfo cache in frontend-build to prevent stale builds
2026-05-22 09:41:11 +00:00 · 2026-05-21 15:01:14 +08:00 · 2026-05-21 14:22:52 +08:00 · 2026-05-21 13:53:13 +08:00 · 2026-05-21 13:49:36 +08:00 · 2026-05-20 18:00:49 +08:00
83 changed files with 6421 additions and 2545 deletions
--- a/49
+++ b/49
@ -8,7 +8,7 @@ COMPOSE_BIN ?= docker compose
 ROOT_COMPOSE := docker-compose.yml
 COMPOSE := $(COMPOSE_BIN) -f $(ROOT_COMPOSE)
-.PHONY: help install run-2 clean-2 docker-dev docker-prod docker-up docker-down docker-logs docker-ps test
+.PHONY: help install up restart stop clean run-2 clean-2 docker-dev docker-prod docker-up docker-down docker-logs docker-ps test
 .DEFAULT_GOAL := help
@ -17,10 +17,14 @@ help:
 	@echo "OCDP commands"
 	@echo "────────────────────────────────────────"
 	@echo "  make install      Install local Go / frontend dependencies"
-	@echo "  make run-2        Build and start full Docker Compose stack in background"
+	@echo "  make up           Build and start the complete platform: DB + API + web gateway"
-	@echo "  make docker-dev   Alias of run-2, kept for old docs / muscle memory"
+	@echo "  make restart      Restart the complete platform without removing volumes"
-	@echo "  make docker-prod  Alias of run-2"
+	@echo "  make stop         Stop containers, keep volumes"
-	@echo "  make docker-up    Alias of run-2"
+	@echo "  make clean        Stop containers and remove project volumes"
 	@echo "  make run-2        Alias of up, kept for old docs / muscle memory"
 	@echo "  make docker-dev   Alias of up"
 	@echo "  make docker-prod  Alias of up"
 	@echo "  make docker-up    Alias of up"
 	@echo "  make docker-down  Stop containers, keep volumes"
 	@echo "  make clean-2      Stop containers and remove project volumes"
 	@echo "  make docker-logs  Follow Compose logs"
@ -37,32 +41,45 @@ install:
 	@echo "→ Installing frontend dependencies"
 	@cd frontend && npm ci
-run-2:
+up:
 	@echo "→ Building and starting OCDP stack"
-	@$(COMPOSE) up --build -d postgres backend nginx
+	@$(COMPOSE) up --build -d
 	@echo ""
-	@$(COMPOSE) ps
+	@$(COMPOSE) ps -a
 	@echo ""
 	@echo "Web:     http://localhost:$${WEB_HTTP_PORT:-18080}"
 	@echo "Backend: http://localhost:$${BACKEND_PORT:-18081}/health"
-docker-dev: run-2
+restart:
 	@echo "→ Restarting OCDP stack"
 	@$(COMPOSE) up --build -d --force-recreate
 	@$(COMPOSE) ps -a
-docker-prod: run-2
+stop:
 docker-up: run-2
 docker-down:
 	@$(COMPOSE) down --remove-orphans
-clean-2:
+clean:
 	@$(COMPOSE) down -v --remove-orphans
 run-2: up
 docker-dev: up
 docker-prod: up
 docker-up: up
 docker-down:
 	@$(MAKE) stop
 clean-2:
 	@$(MAKE) clean
 docker-logs:
 	@$(COMPOSE) logs -f
 docker-ps:
-	@$(COMPOSE) ps
+	@$(COMPOSE) ps -a
 test:
 	@test/readme-deployment-refresh.sh
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# OCDP - Open Cloud Deployment Platform
+# OCDP - One Click Deployment Platform
 OCDP 是一个面向 Kubernetes 的大模型推理部署平台。当前核心场景是：用户在页面选择 Harbor 中的 `vllm-serve` Helm Chart，填写实例名称、命名空间和 values 后，后端从 Harbor 拉取封装好的 OCI Helm Chart，并通过 Helm SDK 部署到已配置好的 Kubernetes 集群。
@ -29,9 +29,9 @@ ocdp-go/
 │   └── internal/bootstrap/      # 首次启动数据注入
 ├── frontend/                    # React + Vite 前端
 ├── infra/nginx/                 # Nginx 网关配置和 TLS 证书
-├── docker-compose.yml           # 本地完整部署：PostgreSQL + Backend + 前端 build + Nginx
+├── docker-compose.yml           # 本地完整部署入口：PostgreSQL + Backend + 前端 build job + Nginx
 ├── backend/docker-compose.yml   # PostgreSQL + Backend + pgAdmin
-├── Makefile                     # 推荐入口：install / run-2 / docker-dev / docker-down
+├── Makefile                     # 推荐入口：up / restart / stop / logs / ps
 └── tasks/                       # Agent 工作记录
 ```
@ -75,7 +75,8 @@ BOOTSTRAP_REGISTRY_USER=admin-or-user
 BOOTSTRAP_REGISTRY_PASS=change-me
 BOOTSTRAP_REGISTRY_INSECURE=false
-# Kubernetes 集群 bootstrap，名称列表用逗号分隔
+# Kubernetes 集群 bootstrap，需要显式启用并设置名称列表
 BOOTSTRAP_ENABLE_CLUSTERS=true
 BOOTSTRAP_CLUSTERS=cluster1,cluster2
 BOOTSTRAP_CLUSTER_CLUSTER1_HOST=https://x.x.x.x:6443
 BOOTSTRAP_CLUSTER_CLUSTER1_DESC=GPU Cluster 1
@ -119,29 +120,23 @@ GOSUMDB=sum.golang.google.cn
 ## 推荐部署流程
-当前推荐使用根目录 Makefile。`docker-dev`、`docker-prod`、`docker-up` 都是兼容旧文档的别名，实际会启动同一套完整 Docker Compose 栈：PostgreSQL、Backend、前端静态构建和 Nginx。
+`.env` 文件为可选配置。不提供 `.env` 时，系统以空白状态启动，首次访问时展示管理员注册页面（Setup），第一个注册用户即为管理员。
 ```bash
-# 1. 在根目录检查 .env
+# 1. 克隆代码
-ls .env
+git clone https://gitea.bwgdi.com/OCDP/ocdp-go.git
 cd ocdp-go
-# 2. 可选：安装本地依赖。只部署 Docker 栈时不是必须，但这个命令可用。
+# 2. 构建并后台启动完整平台（无需 .env）
-make install
+make up
-# 3. 如果默认高位端口仍被其他项目占用，再临时换端口
+# 3. 打开浏览器访问 http://<host>:18080
-export WEB_HTTP_PORT=18080
+# 首次访问会看到 Initial Setup 页面，创建管理员账号和密码即可开始使用
-export WEB_HTTPS_PORT=18443
+```
 export BACKEND_PORT=18081
 export POSTGRES_PORT=15432
-# 4. 构建并后台启动完整栈
+有 `.env` 时，可以预注入初始管理员账号、Registry 和 Cluster（用于开发/测试）：
 make run-2
-# 兼容旧文档，也可以执行：
+# 4. 查看服务；postgres/backend/nginx 应为 Up，frontend-build Exited(0) 正常
 make docker-dev
 make docker-prod
 # 5. 查看服务
 make docker-ps
 ```
@ -152,14 +147,23 @@ make docker-ps
 - Swagger UI：http://localhost:${BACKEND_PORT:-18081}/api/docs
 - Nginx 健康检查：http://localhost:${WEB_HTTP_PORT:-18080}/healthz
-没有 Make 时，直接用根目录 Compose 文件即可。注意要加 `--build`，因为后端镜像和前端静态资源需要构建：
+兼容旧文档的命令仍可用，但只是 `make up` 的别名：
 ```bash
-docker compose up --build -d postgres backend nginx
+make run-2
-docker compose ps
+make docker-dev
 make docker-prod
 make docker-up
 ```
-如果直接执行 `docker compose up`，Compose 也会使用同一个完整栈；但在代码或 Dockerfile 改动后建议显式加 `--build`，避免复用旧镜像。
+没有 Make 时，直接用根目录 Compose 文件：
 ```bash
 docker compose up --build -d
 docker compose ps -a
 ```
 代码、Dockerfile、前端资源变更后都建议使用 `make up` 或 `docker compose up --build -d`，避免复用旧镜像或旧前端静态资源。
 ## 验证部署
@ -168,10 +172,18 @@ docker compose ps
 curl http://localhost:${BACKEND_PORT:-18081}/health
 curl http://localhost:${WEB_HTTP_PORT:-18080}/healthz
-# 登录，返回 token。把 password 替换成 .env 里的 BOOTSTRAP_ADMIN_PASS。
+# 检查是否需要初始化管理员（无 .env 部署时返回 needsSetup: true）
 curl http://localhost:${BACKEND_PORT:-18081}/api/v1/auth/status
 # 初始化管理员账号（仅限尚无管理员时可用）
 curl -s -X POST http://localhost:${BACKEND_PORT:-18081}/api/v1/auth/setup \
  -H "Content-Type: application/json" \
  -d '{"username":"admin","password":"your-password"}'
 # 登录
 curl -s -X POST http://localhost:${BACKEND_PORT:-18081}/api/v1/auth/login \
  -H "Content-Type: application/json" \
-  -d '{"username":"admin","password":"<BOOTSTRAP_ADMIN_PASS>"}'
+  -d '{"username":"admin","password":"your-password"}'
 # 查看 bootstrap 是否生效，需要带 Bearer token
 curl http://localhost:${BACKEND_PORT:-18081}/api/v1/registries \
@ -210,20 +222,30 @@ ADMIN_PASS="<BOOTSTRAP_ADMIN_PASS>" \
 ## 常用运维命令
 ```bash
 # 一条命令启动/更新完整平台
 make up
 # 强制重建并重启完整平台
 make restart
 # 查看当前状态；需要关注 postgres/backend/nginx 是否 Up
 make docker-ps
 docker compose ps -a
 # 查看日志
 make docker-logs
-# 重启后端
+# 只重启后端
 docker compose restart backend
-# 如果后端容器被重建过，Nginx 可能仍缓存旧 upstream IP；只需重启本项目 Nginx
+# 只重启 Web 网关
 docker compose restart nginx
-# 停止本项目服务，但保留数据卷
+# 停止本项目服务，保留数据库和前端构建卷
-make docker-down
+make stop
-# 清理本项目容器和数据卷，谨慎使用
+# 清理本项目容器和数据卷，谨慎使用，会删除 PostgreSQL 数据
-make clean-2
+make clean
 ```
 ## 本地开发与测试
@ -253,7 +275,8 @@ docker compose -f backend/docker-compose.yml --profile mock up -d backend-mock
 ## 注意事项
 - 不要为了端口冲突停止其他项目；优先通过 `WEB_HTTP_PORT`、`WEB_HTTPS_PORT`、`BACKEND_PORT`、`POSTGRES_PORT` 换端口。当前默认端口已经是 `18080/18443/18081/15432`。
- 如果旧文档提到 `make docker-dev`、`make docker-prod`，现在这些命令仍可用，都会启动同一套 Docker 栈。
+- `frontend-build` 是一次性构建任务，退出码 `0` 是正常状态；前端页面由 `nginx` 容器提供。若只看到 backend/postgres 在运行，请执行 `make up` 或 `docker compose up --build -d` 恢复完整栈。
 - 如果旧文档提到 `make docker-dev`、`make docker-prod`，现在这些命令仍可用，都会调用 `make up` 启动同一套 Docker 栈。
 - 如果之前用旧配置启动失败过，PostgreSQL 卷里可能残留旧的加密数据，表现为 `/api/v1/clusters` 或 `/api/v1/registries` 解密失败。开发/重装环境可执行 `make clean-2 && make docker-dev` 重新初始化；生产环境不要直接删卷，应先备份数据库。
 - `vllm-serve` 必须以 Helm Chart OCI artifact 的形式存在于 Harbor 中；后端会寻找 Helm Chart layer 并保存为 `.tgz`。
 - Harbor 浏览使用 `/api/v2.0/projects`、project repositories 和 artifacts API。若 robot 账号无法列项目或 artifacts，页面会显示明确错误；请检查 Harbor 项目成员/robot 权限，而不是给普通用户开放全局 catalog。
--- a/backend/cmd/api/main.go
+++ b/backend/cmd/api/main.go
@ -34,6 +34,7 @@ import (
 	"github.com/ocdp/cluster-service/internal/adapter/input/http/rest"
 	"github.com/ocdp/cluster-service/internal/adapter/output"
 	"github.com/ocdp/cluster-service/internal/adapter/output/k8s"
 	"github.com/ocdp/cluster-service/internal/bootstrap"
 	"github.com/ocdp/cluster-service/internal/domain/service"
 	"github.com/ocdp/cluster-service/internal/pkg/authz"
@ -78,6 +79,12 @@ func main() {
 		passwordHasher,
 		tokenGenerator,
 	)
 	authService.SetUserLifecycleCleanup(
 		repos.InstanceRepo,
 		repos.ClusterRepo,
 		repos.BindingRepo,
 		repos.TenantKubeClient,
 	)
 	clusterService := service.NewClusterService(
 		repos.ClusterRepo,
@ -104,10 +111,14 @@ func main() {
 	)
 	instanceService.SetDiagnosticsClient(repos.DiagnosticsClient)
 	instanceService.SetTenantProvisioning(repos.WorkspaceRepo, repos.TenantKubeClient)
 	instanceService.SetScaleClient(k8s.NewScaleClient())
 	instanceService.SetUserRepository(repos.UserRepo)
 	monitoringService := service.NewMonitoringService(
 		repos.ClusterRepo,
 		repos.MetricsClient,
 		repos.InstanceRepo,
 		repos.UserRepo,
 	)
 	workspaceService := service.NewWorkspaceService(
@ -241,8 +252,10 @@ func setupRouter(
 	api := router.PathPrefix("/api/v1").Subrouter()
 	// ===== 认证路由 =====
-	api.HandleFunc("/auth/login", authHandler.Login)
+	api.HandleFunc("/auth/login", authHandler.Login).Methods(http.MethodPost)
-	api.HandleFunc("/auth/refresh", authHandler.RefreshToken)
+	api.HandleFunc("/auth/refresh", authHandler.RefreshToken).Methods(http.MethodPost)
 	api.HandleFunc("/auth/status", authHandler.AuthStatus).Methods(http.MethodGet)
 	api.HandleFunc("/auth/setup", authHandler.Setup).Methods(http.MethodPost)
 	protected := api.PathPrefix("").Subrouter()
 	protected.Use(authMiddleware(authService))
@ -260,6 +273,8 @@ func setupRouter(
 	protected.HandleFunc("/clusters/{cluster_id}", clusterHandler.UpdateCluster).Methods(http.MethodPut)
 	protected.HandleFunc("/clusters/{cluster_id}", clusterHandler.DeleteCluster).Methods(http.MethodDelete)
 	protected.HandleFunc("/clusters/{cluster_id}/health", clusterHandler.GetClusterHealth).Methods(http.MethodGet)
 	protected.HandleFunc("/clusters/{cluster_id}/stats", monitoringHandler.GetClusterStats).Methods(http.MethodGet)
 	protected.HandleFunc("/clusters/{cluster_id}/kubeconfig", workspaceHandler.IssueClusterKubeconfig).Methods(http.MethodGet)
 	// ===== Registry 路由 =====
 	protected.HandleFunc("/registries", registryHandler.CreateRegistry).Methods(http.MethodPost)
@ -271,7 +286,9 @@ func setupRouter(
 	// ===== Artifact 路由 =====
 	protected.HandleFunc("/registries/{registry_id}/repositories", artifactHandler.ListRepositories).Methods(http.MethodGet)
 	protected.HandleFunc("/repositories/{repository_name:.+}/tags", artifactHandler.ListRepositoryTags).Methods(http.MethodGet)
 	protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts", artifactHandler.ListArtifacts).Methods(http.MethodGet)
 	protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/tags", artifactHandler.ListRepositoryTags).Methods(http.MethodGet)
 	protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}", artifactHandler.GetArtifact).Methods(http.MethodGet)
 	protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}/values-schema", artifactHandler.GetArtifactValuesSchema).Methods(http.MethodGet)
 	protected.HandleFunc("/registries/{registry_id}/repositories/{repository_name:.+}/artifacts/{reference}/values-yaml", artifactHandler.GetArtifactValuesYAML).Methods(http.MethodGet)
@ -285,10 +302,13 @@ func setupRouter(
 	protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/entries", instanceHandler.ListInstanceEntries).Methods(http.MethodGet)
 	protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/diagnostics", instanceHandler.GetInstanceDiagnostics).Methods(http.MethodGet)
 	protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/logs/stream", instanceHandler.StreamInstanceLogs).Methods(http.MethodGet)
 	protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/scale", instanceHandler.ScaleInstance).Methods(http.MethodPost)
 	protected.HandleFunc("/clusters/{cluster_id}/instances/{instance_id}/values-diff", instanceHandler.GetInstanceValuesDiff).Methods(http.MethodGet)
 	// ===== Monitoring 路由 =====
 	protected.HandleFunc("/monitoring/clusters", monitoringHandler.ListClusterMonitoring).Methods(http.MethodGet)
 	protected.HandleFunc("/monitoring/clusters/{cluster_id}", monitoringHandler.GetClusterMonitoring).Methods(http.MethodGet)
 	protected.HandleFunc("/monitoring/clusters/{cluster_id}/metrics", monitoringHandler.GetClusterMonitoring).Methods(http.MethodGet)
 	protected.HandleFunc("/monitoring/clusters/{cluster_id}/nodes", monitoringHandler.GetNodeMetrics).Methods(http.MethodGet)
 	protected.HandleFunc("/monitoring/summary", monitoringHandler.GetMonitoringSummary).Methods(http.MethodGet)
@ -354,15 +374,16 @@ func loggingMiddleware(next http.Handler) http.Handler {
 // corsMiddleware CORS 中间件
 func corsMiddleware(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		// 设置 CORS 头
 		origin := r.Header.Get("Origin")
-		if origin == "" {
+		if origin != "" {
-			origin = "*"
+			w.Header().Add("Vary", "Origin")
 			if corsOriginAllowed(origin) {
 				w.Header().Set("Access-Control-Allow-Origin", origin)
 				w.Header().Set("Access-Control-Allow-Credentials", "true")
 			}
 		}
 		w.Header().Set("Access-Control-Allow-Origin", origin)
 		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
 		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With")
 		w.Header().Set("Access-Control-Allow-Credentials", "true")
 		w.Header().Set("Access-Control-Max-Age", "86400")
 		// 处理 OPTIONS 预检请求
@ -374,3 +395,47 @@ func corsMiddleware(next http.Handler) http.Handler {
 		next.ServeHTTP(w, r)
 	})
 }
 func corsOriginAllowed(origin string) bool {
 	origin = strings.TrimSpace(origin)
 	if origin == "" {
 		return false
 	}
 	for _, allowed := range corsAllowedOrigins() {
 		if origin == allowed {
 			return true
 		}
 	}
 	return false
 }
 func corsAllowedOrigins() []string {
 	configured := strings.TrimSpace(os.Getenv("CORS_ALLOWED_ORIGINS"))
 	if configured == "" {
 		configured = strings.TrimSpace(os.Getenv("ALLOWED_ORIGINS"))
 	}
 	if configured == "" {
 		return []string{
 			"http://localhost:3000",
 			"http://localhost:5173",
 			"http://localhost:8080",
 			"http://localhost:18080",
 			"http://localhost:18081",
 			"http://127.0.0.1:3000",
 			"http://127.0.0.1:5173",
 			"http://127.0.0.1:8080",
 			"http://127.0.0.1:18080",
 			"http://127.0.0.1:18081",
 			"http://10.6.80.114:18080",
 		}
 	}
 	origins := make([]string, 0)
 	for _, origin := range strings.Split(configured, ",") {
 		origin = strings.TrimSpace(origin)
 		if origin == "" || origin == "*" {
 			continue
 		}
 		origins = append(origins, origin)
 	}
 	return origins
 }
--- a/backend/cmd/api/main_test.go
+++ b/backend/cmd/api/main_test.go
@ -0,0 +1,50 @@
 package main
 import (
 	"net/http"
 	"net/http/httptest"
 	"testing"
 )
 func TestCORSMiddlewareAllowsDefaultLocalhostOrigin(t *testing.T) {
 	t.Setenv("CORS_ALLOWED_ORIGINS", "")
 	t.Setenv("ALLOWED_ORIGINS", "")
 	req := httptest.NewRequest(http.MethodGet, "/health", nil)
 	req.Header.Set("Origin", "http://localhost:5173")
 	rec := httptest.NewRecorder()
 	corsMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)
 	})).ServeHTTP(rec, req)
 	if got := rec.Header().Get("Access-Control-Allow-Origin"); got != "http://localhost:5173" {
 		t.Fatalf("expected localhost origin to be allowed, got %q", got)
 	}
 	if got := rec.Header().Get("Access-Control-Allow-Credentials"); got != "true" {
 		t.Fatalf("expected credentials header for allowed origin, got %q", got)
 	}
 }
 func TestCORSMiddlewareDoesNotReflectDisallowedOrigin(t *testing.T) {
 	t.Setenv("CORS_ALLOWED_ORIGINS", "https://app.example.com")
 	t.Setenv("ALLOWED_ORIGINS", "")
 	req := httptest.NewRequest(http.MethodOptions, "/api/v1/auth/login", nil)
 	req.Header.Set("Origin", "https://evil.example.com")
 	rec := httptest.NewRecorder()
 	corsMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		t.Fatal("preflight should not call next handler")
 	})).ServeHTTP(rec, req)
 	if got := rec.Code; got != http.StatusNoContent {
 		t.Fatalf("expected preflight status %d, got %d", http.StatusNoContent, got)
 	}
 	if got := rec.Header().Get("Access-Control-Allow-Origin"); got != "" {
 		t.Fatalf("expected disallowed origin not to be reflected, got %q", got)
 	}
 	if got := rec.Header().Get("Access-Control-Allow-Credentials"); got != "" {
 		t.Fatalf("expected credentials header to be omitted for disallowed origin, got %q", got)
 	}
 }
--- a/backend/internal/adapter/input/http/dto/auth_dto.go
+++ b/backend/internal/adapter/input/http/dto/auth_dto.go
@ -1,19 +1,47 @@
 package dto
 import "strings"
 // RegisterRequest 用户注册请求
 type RegisterRequest struct {
-	Username           string `json:"username" binding:"required"`
+	Username                string `json:"username" binding:"required"`
-	Password           string `json:"password" binding:"required,min=6"`
+	Password                string `json:"password" binding:"required,min=6"`
-	Role               string `json:"role,omitempty"`
+	Role                    string `json:"role,omitempty"`
-	WorkspaceID        string `json:"workspaceId,omitempty"`
+	WorkspaceID             string `json:"workspaceId,omitempty"`
-	Namespace          string `json:"namespace,omitempty"`
+	WorkspaceIDSnake        string `json:"workspace_id,omitempty"`
-	DefaultClusterID   string `json:"defaultClusterId,omitempty"`
+	Namespace               string `json:"namespace,omitempty"`
-	QuotaCPU           string `json:"quotaCpu,omitempty"`
+	DefaultClusterID        string `json:"defaultClusterId,omitempty"`
-	QuotaMemory        string `json:"quotaMemory,omitempty"`
+	DefaultClusterIDSnake   string `json:"default_cluster_id,omitempty"`
-	QuotaGPU           string `json:"quotaGpu,omitempty"`
+	QuotaCPU                string `json:"quotaCpu,omitempty"`
-	QuotaGPUMem        string `json:"quotaGpuMemory,omitempty"`
+	QuotaCPUSnake           string `json:"quota_cpu,omitempty"`
-	IsActive           *bool  `json:"isActive,omitempty"`
+	QuotaMemory             string `json:"quotaMemory,omitempty"`
-	MustChangePassword *bool  `json:"mustChangePassword,omitempty"`
+	QuotaMemorySnake        string `json:"quota_memory,omitempty"`
 	QuotaGPU                string `json:"quotaGpu,omitempty"`
 	QuotaGPUSnake           string `json:"quota_gpu,omitempty"`
 	QuotaGPUMem             string `json:"quotaGpuMemory,omitempty"`
 	QuotaGPUMemSnake        string `json:"quota_gpu_memory,omitempty"`
 	IsActive                *bool  `json:"isActive,omitempty"`
 	IsActiveSnake           *bool  `json:"is_active,omitempty"`
 	MustChangePassword      *bool  `json:"mustChangePassword,omitempty"`
 	MustChangePasswordSnake *bool  `json:"must_change_password,omitempty"`
 }
 func (r *RegisterRequest) Normalize() {
 	if r == nil {
 		return
 	}
 	r.WorkspaceID = firstNonBlank(r.WorkspaceID, r.WorkspaceIDSnake)
 	r.DefaultClusterID = firstNonBlank(r.DefaultClusterID, r.DefaultClusterIDSnake)
 	r.QuotaCPU = firstNonBlank(r.QuotaCPU, r.QuotaCPUSnake)
 	r.QuotaMemory = firstNonBlank(r.QuotaMemory, r.QuotaMemorySnake)
 	r.QuotaGPU = firstNonBlank(r.QuotaGPU, r.QuotaGPUSnake)
 	r.QuotaGPUMem = firstNonBlank(r.QuotaGPUMem, r.QuotaGPUMemSnake)
 	if r.IsActive == nil {
 		r.IsActive = r.IsActiveSnake
 	}
 	if r.MustChangePassword == nil {
 		r.MustChangePassword = r.MustChangePasswordSnake
 	}
 }
 // LoginRequest 用户登录请求
@ -22,6 +50,13 @@ type LoginRequest struct {
 	Password string `json:"password" binding:"required"`
 }
 // SetupRequest 初始管理员注册请求
 type SetupRequest struct {
 	Username string `json:"username" binding:"required"`
 	Password string `json:"password" binding:"required"`
 	Email    string `json:"email,omitempty"`
 }
 // RefreshTokenRequest 刷新 Token 请求
 type RefreshTokenRequest struct {
 	RefreshToken string `json:"refreshToken" binding:"required"`
@ -68,14 +103,47 @@ type UserResponse struct {
 // UpdateUserRequest 管理员更新用户状态/角色请求
 type UpdateUserRequest struct {
-	Role               string `json:"role,omitempty"`
+	Role                    string `json:"role,omitempty"`
-	WorkspaceID        string `json:"workspaceId,omitempty"`
+	WorkspaceID             string `json:"workspaceId,omitempty"`
-	Namespace          string `json:"namespace,omitempty"`
+	WorkspaceIDSnake        string `json:"workspace_id,omitempty"`
-	DefaultClusterID   string `json:"defaultClusterId,omitempty"`
+	Namespace               string `json:"namespace,omitempty"`
-	QuotaCPU           string `json:"quotaCpu,omitempty"`
+	DefaultClusterID        string `json:"defaultClusterId,omitempty"`
-	QuotaMemory        string `json:"quotaMemory,omitempty"`
+	DefaultClusterIDSnake   string `json:"default_cluster_id,omitempty"`
-	QuotaGPU           string `json:"quotaGpu,omitempty"`
+	QuotaCPU                string `json:"quotaCpu,omitempty"`
-	QuotaGPUMem        string `json:"quotaGpuMemory,omitempty"`
+	QuotaCPUSnake           string `json:"quota_cpu,omitempty"`
-	IsActive           *bool  `json:"isActive,omitempty"`
+	QuotaMemory             string `json:"quotaMemory,omitempty"`
-	MustChangePassword *bool  `json:"mustChangePassword,omitempty"`
+	QuotaMemorySnake        string `json:"quota_memory,omitempty"`
 	QuotaGPU                string `json:"quotaGpu,omitempty"`
 	QuotaGPUSnake           string `json:"quota_gpu,omitempty"`
 	QuotaGPUMem             string `json:"quotaGpuMemory,omitempty"`
 	QuotaGPUMemSnake        string `json:"quota_gpu_memory,omitempty"`
 	IsActive                *bool  `json:"isActive,omitempty"`
 	IsActiveSnake           *bool  `json:"is_active,omitempty"`
 	MustChangePassword      *bool  `json:"mustChangePassword,omitempty"`
 	MustChangePasswordSnake *bool  `json:"must_change_password,omitempty"`
 }
 func (r *UpdateUserRequest) Normalize() {
 	if r == nil {
 		return
 	}
 	r.WorkspaceID = firstNonBlank(r.WorkspaceID, r.WorkspaceIDSnake)
 	r.DefaultClusterID = firstNonBlank(r.DefaultClusterID, r.DefaultClusterIDSnake)
 	r.QuotaCPU = firstNonBlank(r.QuotaCPU, r.QuotaCPUSnake)
 	r.QuotaMemory = firstNonBlank(r.QuotaMemory, r.QuotaMemorySnake)
 	r.QuotaGPU = firstNonBlank(r.QuotaGPU, r.QuotaGPUSnake)
 	r.QuotaGPUMem = firstNonBlank(r.QuotaGPUMem, r.QuotaGPUMemSnake)
 	if r.IsActive == nil {
 		r.IsActive = r.IsActiveSnake
 	}
 	if r.MustChangePassword == nil {
 		r.MustChangePassword = r.MustChangePasswordSnake
 	}
 }
 func firstNonBlank(primary, alternate string) string {
 	if strings.TrimSpace(primary) != "" {
 		return primary
 	}
 	return alternate
 }
--- a/backend/internal/adapter/input/http/dto/auth_dto_test.go
+++ b/backend/internal/adapter/input/http/dto/auth_dto_test.go
@ -0,0 +1,51 @@
 package dto
 import "testing"
 func TestRegisterRequestNormalizeUsesSnakeCaseAlternates(t *testing.T) {
 	active := false
 	mustChange := true
 	req := RegisterRequest{
 		WorkspaceIDSnake:        "workspace-1",
 		DefaultClusterIDSnake:   "cluster-1",
 		QuotaCPUSnake:           "2",
 		QuotaMemorySnake:        "4Gi",
 		QuotaGPUSnake:           "1",
 		QuotaGPUMemSnake:        "10000",
 		IsActiveSnake:           &active,
 		MustChangePasswordSnake: &mustChange,
 	}
 	req.Normalize()
 	if req.WorkspaceID != "workspace-1" || req.DefaultClusterID != "cluster-1" {
 		t.Fatalf("expected snake case workspace/cluster fields to normalize, got %#v", req)
 	}
 	if req.QuotaCPU != "2" || req.QuotaMemory != "4Gi" || req.QuotaGPU != "1" || req.QuotaGPUMem != "10000" {
 		t.Fatalf("expected snake case quota fields to normalize, got %#v", req)
 	}
 	if req.IsActive == nil || *req.IsActive {
 		t.Fatalf("expected is_active=false to normalize, got %#v", req.IsActive)
 	}
 	if req.MustChangePassword == nil || !*req.MustChangePassword {
 		t.Fatalf("expected must_change_password=true to normalize, got %#v", req.MustChangePassword)
 	}
 }
 func TestUpdateUserRequestNormalizeKeepsCamelCasePrimary(t *testing.T) {
 	req := UpdateUserRequest{
 		DefaultClusterID:      "camel-cluster",
 		DefaultClusterIDSnake: "snake-cluster",
 		QuotaCPU:              "3",
 		QuotaCPUSnake:         "4",
 	}
 	req.Normalize()
 	if req.DefaultClusterID != "camel-cluster" {
 		t.Fatalf("expected camelCase defaultClusterId to win, got %q", req.DefaultClusterID)
 	}
 	if req.QuotaCPU != "3" {
 		t.Fatalf("expected camelCase quotaCpu to win, got %q", req.QuotaCPU)
 	}
 }
--- a/backend/internal/adapter/input/http/dto/instance_dto.go
+++ b/backend/internal/adapter/input/http/dto/instance_dto.go
@ -2,25 +2,25 @@ package dto
 // CreateInstanceRequest 创建实例请求
 type CreateInstanceRequest struct {
-	Name           string                 `json:"name" binding:"required"`
+	Name          string                 `json:"name" binding:"required"`
-	Namespace      string                 `json:"namespace" binding:"required"`
+	Namespace     string                 `json:"namespace" binding:"required"`
-	RegistryID     string                 `json:"registryId" binding:"required"`
+	RegistryID    string                 `json:"registryId" binding:"required"`
-	RegistryIDAlt  string                 `json:"registry_id"`
+	RegistryIDAlt string                 `json:"registry_id"`
-	Repository     string                 `json:"repository" binding:"required"`
+	Repository    string                 `json:"repository" binding:"required"`
-	Tag            string                 `json:"tag" binding:"required"`
+	Tag           string                 `json:"tag" binding:"required"`
-	Description    string                 `json:"description"`
+	Description   string                 `json:"description"`
-	Values         map[string]interface{} `json:"values"`
+	Values        map[string]interface{} `json:"values"`
-	ValuesYAML     string                 `json:"valuesYaml"`
+	ValuesYAML    string                 `json:"valuesYaml"`
-	ValuesYAMLAlt  string                 `json:"values_yaml"`
+	ValuesYAMLAlt string                 `json:"values_yaml"`
 }
 // UpdateInstanceRequest 更新实例请求
 type UpdateInstanceRequest struct {
-	Version      string                 `json:"version"`
+	Version       string                 `json:"version"`
-	Description  string                 `json:"description"`
+	Description   string                 `json:"description"`
-	Values       map[string]interface{} `json:"values"`
+	Values        map[string]interface{} `json:"values"`
-	ValuesYAML   string                 `json:"valuesYaml"`
+	ValuesYAML    string                 `json:"valuesYaml"`
-	ValuesYAMLAlt string                `json:"values_yaml"`
+	ValuesYAMLAlt string                 `json:"values_yaml"`
 }
 // Normalize 将多种命名风格的字段合并到统一字段
@ -67,12 +67,14 @@ type InstanceResponse struct {
 	Status         string                 `json:"status"`
 	WorkspaceID    string                 `json:"workspaceId"`
 	OwnerID        string                 `json:"ownerId"`
 	OwnerUsername  string                 `json:"ownerUsername,omitempty"`
 	AllowedActions []string               `json:"allowedActions,omitempty"`
 	StatusReason   string                 `json:"statusReason,omitempty"`
 	LastOperation  string                 `json:"lastOperation,omitempty"`
 	LastError      string                 `json:"lastError,omitempty"`
 	Revision       int                    `json:"revision"`
 	Values         map[string]interface{} `json:"values,omitempty"`
 	Replicas       int                    `json:"replicas"`
 	CreatedAt      string                 `json:"createdAt"`
 	UpdatedAt      string                 `json:"updatedAt"`
 }
@ -206,6 +208,25 @@ type InstanceEventDiagnostics struct {
 	LastTimestamp  string `json:"lastTimestamp,omitempty"`
 }
 // ScaleInstanceRequest 扩缩容实例请求
 type ScaleInstanceRequest struct {
 	Replicas int    `json:"replicas" binding:"required"`
 	Workload string `json:"workload"`
 }
 // ScaleInstanceResponse 扩缩容实例响应
 type ScaleInstanceResponse struct {
 	Instance *InstanceResponse `json:"instance"`
 	Replicas int               `json:"replicas"`
 	Message  string            `json:"message"`
 }
 // InstanceValuesDiffResponse 实例 values 差异响应
 type InstanceValuesDiffResponse struct {
 	Current  map[string]interface{} `json:"current"`
 	Defaults map[string]interface{} `json:"defaults"`
 }
 type InstancePodLogResponse struct {
 	Pod       string `json:"pod"`
 	Container string `json:"container"`
--- a/backend/internal/adapter/input/http/dto/monitoring_dto.go
+++ b/backend/internal/adapter/input/http/dto/monitoring_dto.go
@ -8,29 +8,56 @@ import (
 // ClusterMetricsResponse 集群监控响应
 type ClusterMetricsResponse struct {
-	ClusterID       string                `json:"clusterId"`
+	ClusterID            string                      `json:"clusterId"`
-	ClusterName     string                `json:"clusterName"`
+	ClusterName          string                      `json:"clusterName"`
-	Status          string                `json:"status"`
+	Status               string                      `json:"status"`
-	Uptime          string                `json:"uptime"`
+	Uptime               string                      `json:"uptime"`
-	NodeCount       int                   `json:"nodeCount"`
+	NodeCount            int                         `json:"nodeCount"`
-	PodCount        int                   `json:"podCount"`
+	PodCount             int                         `json:"podCount"`
-	LastCheck       time.Time             `json:"lastCheck"`
+	LastCheck            time.Time                   `json:"lastCheck"`
-	TotalCPU        string                `json:"totalCpu"`
+	TotalCPU             string                      `json:"totalCpu"`
-	TotalMemory     string                `json:"totalMemory"`
+	TotalMemory          string                      `json:"totalMemory"`
-	TotalGPU        int                   `json:"totalGpu"`
+	TotalGPU             int                         `json:"totalGpu"`
-	UsedCPU         string                `json:"usedCpu"`
+	UsedCPU              string                      `json:"usedCpu"`
-	UsedMemory      string                `json:"usedMemory"`
+	UsedMemory           string                      `json:"usedMemory"`
-	UsedGPU         int                   `json:"usedGpu"`
+	UsedGPU              int                         `json:"usedGpu"`
-	CPUUsage        float64               `json:"cpuUsage"`
+	CPUUsage             float64                     `json:"cpuUsage"`
-	MemoryUsage     float64               `json:"memoryUsage"`
+	MemoryUsage          float64                     `json:"memoryUsage"`
-	GPUUsage        float64               `json:"gpuUsage"`
+	GPUUsage             float64                     `json:"gpuUsage"`
-	MaxNodeCPU      string                `json:"maxNodeCpu"`
+	CPURequests          string                      `json:"cpuRequests,omitempty"`
-	MaxNodeMemory   string                `json:"maxNodeMemory"`
+	CPULimits            string                      `json:"cpuLimits,omitempty"`
-	MaxNodeGPU      int                   `json:"maxNodeGpu"`
+	MemoryRequests       string                      `json:"memoryRequests,omitempty"`
-	MaxNodeCPUUsage float64               `json:"maxNodeCpuUsage"`
+	MemoryLimits         string                      `json:"memoryLimits,omitempty"`
-	MaxNodeMemUsage float64               `json:"maxNodeMemUsage"`
+	GPURequests          int64                       `json:"gpuRequests,omitempty"`
-	MaxNodeGPUUsage float64               `json:"maxNodeGpuUsage"`
+	GPULimits            int64                       `json:"gpuLimits,omitempty"`
-	Nodes           []NodeMetricsResponse `json:"nodes,omitempty"`
+	GPUMemoryRequestsMB  int64                       `json:"gpuMemoryRequestsMb,omitempty"`
 	GPUMemoryLimitsMB    int64                       `json:"gpuMemoryLimitsMb,omitempty"`
 	AllocatedGPU         int64                       `json:"allocatedGpu,omitempty"`
 	AllocatedGPUMemoryMB int64                       `json:"allocatedGpuMemoryMb,omitempty"`
 	ResourceUsageByUser  []UserResourceUsageResponse `json:"resourceUsageByUser,omitempty"`
 	MaxNodeCPU           string                      `json:"maxNodeCpu"`
 	MaxNodeMemory        string                      `json:"maxNodeMemory"`
 	MaxNodeGPU           int                         `json:"maxNodeGpu"`
 	MaxNodeCPUUsage      float64                     `json:"maxNodeCpuUsage"`
 	MaxNodeMemUsage      float64                     `json:"maxNodeMemUsage"`
 	MaxNodeGPUUsage      float64                     `json:"maxNodeGpuUsage"`
 	Nodes                []NodeMetricsResponse       `json:"nodes,omitempty"`
 }
 type UserResourceUsageResponse struct {
 	UserID              string `json:"userId"`
 	Username            string `json:"username"`
 	WorkspaceID         string `json:"workspaceId"`
 	InstanceCount       int    `json:"instanceCount"`
 	PodCount            int    `json:"podCount"`
 	CPURequests         string `json:"cpuRequests"`
 	CPULimits           string `json:"cpuLimits"`
 	MemoryRequests      string `json:"memoryRequests"`
 	MemoryLimits        string `json:"memoryLimits"`
 	GPURequests         int64  `json:"gpuRequests"`
 	GPULimits           int64  `json:"gpuLimits"`
 	GPUMemoryRequestsMB int64  `json:"gpuMemoryRequestsMb"`
 	GPUMemoryLimitsMB   int64  `json:"gpuMemoryLimitsMb"`
 }
 // NodeMetricsResponse 节点监控响应
@ -72,28 +99,59 @@ type MonitoringSummaryResponse struct {
 // ToClusterMetricsResponse 转换为响应
 func ToClusterMetricsResponse(m *entity.ClusterMetrics) *ClusterMetricsResponse {
 	resp := &ClusterMetricsResponse{
-		ClusterID:       m.ClusterID,
+		ClusterID:            m.ClusterID,
-		ClusterName:     m.ClusterName,
+		ClusterName:          m.ClusterName,
-		Status:          m.Status,
+		Status:               m.Status,
-		Uptime:          m.Uptime,
+		Uptime:               m.Uptime,
-		NodeCount:       m.NodeCount,
+		NodeCount:            m.NodeCount,
-		PodCount:        m.PodCount,
+		PodCount:             m.PodCount,
-		LastCheck:       m.LastCheck,
+		LastCheck:            m.LastCheck,
-		TotalCPU:        m.TotalCPU,
+		TotalCPU:             m.TotalCPU,
-		TotalMemory:     m.TotalMemory,
+		TotalMemory:          m.TotalMemory,
-		TotalGPU:        m.TotalGPU,
+		TotalGPU:             m.TotalGPU,
-		UsedCPU:         m.UsedCPU,
+		UsedCPU:              m.UsedCPU,
-		UsedMemory:      m.UsedMemory,
+		UsedMemory:           m.UsedMemory,
-		UsedGPU:         m.UsedGPU,
+		UsedGPU:              m.UsedGPU,
-		CPUUsage:        m.CPUUsage,
+		CPUUsage:             m.CPUUsage,
-		MemoryUsage:     m.MemoryUsage,
+		MemoryUsage:          m.MemoryUsage,
-		GPUUsage:        m.GPUUsage,
+		GPUUsage:             m.GPUUsage,
-		MaxNodeCPU:      m.MaxNodeCPU,
+		CPURequests:          m.CPURequests,
-		MaxNodeMemory:   m.MaxNodeMemory,
+		CPULimits:            m.CPULimits,
-		MaxNodeGPU:      m.MaxNodeGPU,
+		MemoryRequests:       m.MemoryRequests,
-		MaxNodeCPUUsage: m.MaxNodeCPUUsage,
+		MemoryLimits:         m.MemoryLimits,
-		MaxNodeMemUsage: m.MaxNodeMemUsage,
+		GPURequests:          m.GPURequests,
-		MaxNodeGPUUsage: m.MaxNodeGPUUsage,
+		GPULimits:            m.GPULimits,
 		GPUMemoryRequestsMB:  m.GPUMemoryRequestsMB,
 		GPUMemoryLimitsMB:    m.GPUMemoryLimitsMB,
 		AllocatedGPU:         m.AllocatedGPU,
 		AllocatedGPUMemoryMB: m.AllocatedGPUMemoryMB,
 		MaxNodeCPU:           m.MaxNodeCPU,
 		MaxNodeMemory:        m.MaxNodeMemory,
 		MaxNodeGPU:           m.MaxNodeGPU,
 		MaxNodeCPUUsage:      m.MaxNodeCPUUsage,
 		MaxNodeMemUsage:      m.MaxNodeMemUsage,
 		MaxNodeGPUUsage:      m.MaxNodeGPUUsage,
 	}
 	if len(m.ResourceUsageByUser) > 0 {
 		resp.ResourceUsageByUser = make([]UserResourceUsageResponse, len(m.ResourceUsageByUser))
 		for i, usage := range m.ResourceUsageByUser {
 			resp.ResourceUsageByUser[i] = UserResourceUsageResponse{
 				UserID:              usage.UserID,
 				Username:            usage.Username,
 				WorkspaceID:         usage.WorkspaceID,
 				InstanceCount:       usage.InstanceCount,
 				PodCount:            usage.PodCount,
 				CPURequests:         usage.CPURequests,
 				CPULimits:           usage.CPULimits,
 				MemoryRequests:      usage.MemoryRequests,
 				MemoryLimits:        usage.MemoryLimits,
 				GPURequests:         usage.GPURequests,
 				GPULimits:           usage.GPULimits,
 				GPUMemoryRequestsMB: usage.GPUMemoryRequestsMB,
 				GPUMemoryLimitsMB:   usage.GPUMemoryLimitsMB,
 			}
 		}
 	}
 	if len(m.Nodes) > 0 {
--- a/backend/internal/adapter/input/http/rest/artifact_handler.go
+++ b/backend/internal/adapter/input/http/rest/artifact_handler.go
@ -126,6 +126,25 @@ func (h *ArtifactHandler) ListArtifacts(w http.ResponseWriter, r *http.Request)
 	respondJSON(w, http.StatusOK, tagResponses)
 }
 // ListRepositoryTags is a compatibility alias for clients that request tags
 // directly instead of the canonical artifacts endpoint.
 func (h *ArtifactHandler) ListRepositoryTags(w http.ResponseWriter, r *http.Request) {
 	vars := mux.Vars(r)
 	if vars["registry_id"] == "" {
 		registryID := r.URL.Query().Get("registry_id")
 		if registryID == "" {
 			registryID = r.URL.Query().Get("registryId")
 		}
 		if registryID == "" {
 			respondError(w, http.StatusBadRequest, "Missing registry ID", "registry_id query parameter is required")
 			return
 		}
 		vars["registry_id"] = registryID
 		r = mux.SetURLVars(r, vars)
 	}
 	h.ListArtifacts(w, r)
 }
 // GetArtifact 获取 artifact 详情
 // @Summary 获取 Artifact 详情
 // @Description 获取指定 Artifact 的详细信息
--- a/backend/internal/adapter/input/http/rest/auth_handler.go
+++ b/backend/internal/adapter/input/http/rest/auth_handler.go
@ -3,8 +3,11 @@ package rest
 import (
 	"context"
 	"encoding/json"
 	"net"
 	"net/http"
 	"strings"
 	"sync"
 	"time"
 	"github.com/gorilla/mux"
 	"github.com/ocdp/cluster-service/internal/adapter/input/http/dto"
@ -18,6 +21,74 @@ type AuthHandler struct {
 	authService *service.AuthService
 }
 const (
 	loginRateLimitWindow   = time.Minute
 	loginRateLimitFailures = 5
 )
 var defaultLoginRateLimiter = newLoginRateLimiter(loginRateLimitWindow, loginRateLimitFailures)
 type loginRateLimiter struct {
 	mu       sync.Mutex
 	window   time.Duration
 	limit    int
 	failures map[string]loginFailureState
 	now      func() time.Time
 }
 type loginFailureState struct {
 	count      int
 	windowEnds time.Time
 }
 func newLoginRateLimiter(window time.Duration, limit int) *loginRateLimiter {
 	return &loginRateLimiter{
 		window:   window,
 		limit:    limit,
 		failures: make(map[string]loginFailureState),
 		now:      time.Now,
 	}
 }
 func (l *loginRateLimiter) Allow(key string) bool {
 	if l == nil || key == "" {
 		return true
 	}
 	l.mu.Lock()
 	defer l.mu.Unlock()
 	state, ok := l.failures[key]
 	now := l.now()
 	if !ok || now.After(state.windowEnds) {
 		return true
 	}
 	return state.count < l.limit
 }
 func (l *loginRateLimiter) RecordFailure(key string) {
 	if l == nil || key == "" {
 		return
 	}
 	l.mu.Lock()
 	defer l.mu.Unlock()
 	now := l.now()
 	state, ok := l.failures[key]
 	if !ok || now.After(state.windowEnds) {
 		l.failures[key] = loginFailureState{count: 1, windowEnds: now.Add(l.window)}
 		return
 	}
 	state.count++
 	l.failures[key] = state
 }
 func (l *loginRateLimiter) Reset(key string) {
 	if l == nil || key == "" {
 		return
 	}
 	l.mu.Lock()
 	defer l.mu.Unlock()
 	delete(l.failures, key)
 }
 // NewAuthHandler 创建认证 Handler
 func NewAuthHandler(authService *service.AuthService) *AuthHandler {
 	return &AuthHandler{
@ -41,6 +112,7 @@ func (h *AuthHandler) Register(w http.ResponseWriter, r *http.Request) {
 		respondError(w, http.StatusBadRequest, "Invalid request body", err.Error())
 		return
 	}
 	req.Normalize()
 	// 调用领域服务
 	user, err := h.authService.Register(r.Context(), req.Username, req.Password, req.Role, req.WorkspaceID, service.UserWorkspaceOptions{
@ -79,6 +151,7 @@ func (h *AuthHandler) UpdateUser(w http.ResponseWriter, r *http.Request) {
 		respondError(w, http.StatusBadRequest, "Invalid request body", err.Error())
 		return
 	}
 	req.Normalize()
 	user, err := h.authService.UpdateUser(r.Context(), userID, req.Role, req.WorkspaceID, service.UserWorkspaceOptions{
 		Namespace:        req.Namespace,
 		DefaultClusterID: req.DefaultClusterID,
@ -120,12 +193,21 @@ func (h *AuthHandler) Login(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 	rateLimitKey := loginRateLimitKey(r, req.Username)
 	if !defaultLoginRateLimiter.Allow(rateLimitKey) {
 		w.Header().Set("Retry-After", "60")
 		respondError(w, http.StatusTooManyRequests, "Too many login attempts", "too many login attempts; retry later")
 		return
 	}
 	// 调用领域服务
 	accessToken, refreshToken, user, err := h.authService.Login(r.Context(), req.Username, req.Password)
 	if err != nil {
-		respondError(w, http.StatusUnauthorized, "Login failed", err.Error())
+		defaultLoginRateLimiter.RecordFailure(rateLimitKey)
 		respondError(w, http.StatusUnauthorized, "Invalid username or password", "invalid username or password")
 		return
 	}
 	defaultLoginRateLimiter.Reset(rateLimitKey)
 	workspace, _ := h.authService.GetWorkspaceByID(r.Context(), user.WorkspaceID)
@ -151,6 +233,60 @@ func (h *AuthHandler) Login(w http.ResponseWriter, r *http.Request) {
 	respondJSON(w, http.StatusOK, response)
 }
 func loginRateLimitKey(r *http.Request, username string) string {
 	client := strings.TrimSpace(r.Header.Get("X-Forwarded-For"))
 	if idx := strings.Index(client, ","); idx >= 0 {
 		client = strings.TrimSpace(client[:idx])
 	}
 	if client == "" {
 		client = strings.TrimSpace(r.Header.Get("X-Real-IP"))
 	}
 	if client == "" {
 		client = r.RemoteAddr
 		if host, _, err := net.SplitHostPort(client); err == nil {
 			client = host
 		}
 	}
 	return strings.ToLower(strings.TrimSpace(username)) + "|" + client
 }
 // AuthStatus returns whether the system needs initial setup (no admin exists).
 func (h *AuthHandler) AuthStatus(w http.ResponseWriter, r *http.Request) {
 	hasAdmin, err := h.authService.IsAdminExists(r.Context())
 	if err != nil {
 		respondError(w, http.StatusInternalServerError, "Failed to check status", err.Error())
 		return
 	}
 	respondJSON(w, http.StatusOK, map[string]any{
 		"needsSetup": !hasAdmin,
 		"hasUsers":   hasAdmin,
 	})
 }
 // Setup creates the first admin user. Only works when no admin exists.
 func (h *AuthHandler) Setup(w http.ResponseWriter, r *http.Request) {
 	var req dto.SetupRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		respondError(w, http.StatusBadRequest, "Invalid request body", err.Error())
 		return
 	}
 	if strings.TrimSpace(req.Username) == "" || strings.TrimSpace(req.Password) == "" {
 		respondError(w, http.StatusBadRequest, "Missing fields", "username and password are required")
 		return
 	}
 	_, accessToken, refreshToken, err := h.authService.SetupInitialAdmin(r.Context(), req.Username, req.Password, req.Email)
 	if err != nil {
 		respondServiceError(w, err, "Failed to create initial admin")
 		return
 	}
 	respondJSON(w, http.StatusCreated, map[string]string{
 		"accessToken":  accessToken,
 		"refreshToken": refreshToken,
 	})
 }
 func (h *AuthHandler) convertUserResponse(ctx context.Context, user *entity.User) *dto.UserResponse {
 	workspace, _ := h.authService.GetWorkspaceByID(ctx, user.WorkspaceID)
 	return &dto.UserResponse{
--- a/backend/internal/adapter/input/http/rest/auth_handler_test.go
+++ b/backend/internal/adapter/input/http/rest/auth_handler_test.go
@ -0,0 +1,44 @@
 package rest
 import (
 	"testing"
 	"time"
 )
 func TestLoginRateLimiterBlocksAfterConfiguredFailures(t *testing.T) {
 	now := time.Date(2026, 5, 14, 12, 0, 0, 0, time.UTC)
 	limiter := newLoginRateLimiter(time.Minute, 2)
 	limiter.now = func() time.Time { return now }
 	key := "user|127.0.0.1"
 	if !limiter.Allow(key) {
 		t.Fatal("expected first attempt to be allowed")
 	}
 	limiter.RecordFailure(key)
 	if !limiter.Allow(key) {
 		t.Fatal("expected second attempt to be allowed")
 	}
 	limiter.RecordFailure(key)
 	if limiter.Allow(key) {
 		t.Fatal("expected third attempt inside the window to be blocked")
 	}
 	now = now.Add(time.Minute + time.Second)
 	if !limiter.Allow(key) {
 		t.Fatal("expected attempts to be allowed after the window expires")
 	}
 }
 func TestLoginRateLimiterResetClearsFailures(t *testing.T) {
 	limiter := newLoginRateLimiter(time.Minute, 1)
 	key := "user|127.0.0.1"
 	limiter.RecordFailure(key)
 	if limiter.Allow(key) {
 		t.Fatal("expected key to be blocked after one failure")
 	}
 	limiter.Reset(key)
 	if !limiter.Allow(key) {
 		t.Fatal("expected reset key to be allowed")
 	}
 }
--- a/backend/internal/adapter/input/http/rest/instance_handler.go
+++ b/backend/internal/adapter/input/http/rest/instance_handler.go
@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"net/http"
 	"reflect"
 	"strconv"
 	"strings"
 	"time"
@ -49,6 +50,11 @@ func (h *InstanceHandler) CreateInstance(w http.ResponseWriter, r *http.Request)
 		return
 	}
 	req.Normalize()
 	parsedYAML, hasValuesYAML, err := parseAndCompareValues(req.Values, req.ValuesYAML)
 	if err != nil {
 		respondError(w, http.StatusBadRequest, "Invalid values", err.Error())
 		return
 	}
 	// Extract chart name from repository (e.g., "charts/nginx" -> "nginx")
 	chart := req.Repository
@ -71,21 +77,16 @@ func (h *InstanceHandler) CreateInstance(w http.ResponseWriter, r *http.Request)
 	if req.Values != nil {
 		instance.SetValues(req.Values)
 	}
-	if req.ValuesYAML != "" {
+	if hasValuesYAML {
 		instance.SetValuesYAML(req.ValuesYAML)
 		if req.Values == nil {
-			values, err := parseValuesYAML(req.ValuesYAML)
+			instance.SetValues(parsedYAML)
 			if err != nil {
 				respondError(w, http.StatusBadRequest, "Invalid values YAML", err.Error())
 				return
 			}
 			instance.SetValues(values)
 		}
 	}
 	// 调用领域服务
 	if err := h.instanceService.CreateInstance(r.Context(), instance); err != nil {
-		respondError(w, http.StatusBadRequest, "Failed to create instance", err.Error())
+		respondServiceError(w, err, "Failed to create instance")
 		return
 	}
@ -116,6 +117,7 @@ func (h *InstanceHandler) GetInstance(w http.ResponseWriter, r *http.Request) {
 		respondError(w, http.StatusNotFound, "Instance not found", "resource does not belong to cluster")
 		return
 	}
 	h.instanceService.EnrichReplicas(r.Context(), clusterID, []*entity.Instance{instance})
 	respondJSON(w, http.StatusOK, convertInstanceResponse(instance, true))
 }
@ -139,9 +141,12 @@ func (h *InstanceHandler) ListInstances(w http.ResponseWriter, r *http.Request)
 		return
 	}
 	// Enrich with running replicas from K8s
 	instances = h.instanceService.EnrichReplicas(r.Context(), clusterID, instances)
 	responses := make([]*dto.InstanceResponse, 0, len(instances))
 	for _, instance := range instances {
-		responses = append(responses, convertInstanceResponse(instance, false))
+		responses = append(responses, convertInstanceResponse(instance, true))
 	}
 	response := &dto.InstanceListResponse{
@ -174,6 +179,11 @@ func (h *InstanceHandler) UpdateInstance(w http.ResponseWriter, r *http.Request)
 		return
 	}
 	req.Normalize()
 	parsedYAML, hasValuesYAML, err := parseAndCompareValues(req.Values, req.ValuesYAML)
 	if err != nil {
 		respondError(w, http.StatusBadRequest, "Invalid values", err.Error())
 		return
 	}
 	// 获取现有实例
 	instance, err := h.instanceService.GetInstance(r.Context(), instanceID)
@ -191,21 +201,16 @@ func (h *InstanceHandler) UpdateInstance(w http.ResponseWriter, r *http.Request)
 	if req.Description != "" {
 		instance.Description = req.Description
 	}
-	if req.ValuesYAML != "" {
+	if hasValuesYAML {
 		instance.SetValuesYAML(req.ValuesYAML)
 		if req.Values == nil {
-			values, err := parseValuesYAML(req.ValuesYAML)
+			instance.SetValues(parsedYAML)
 			if err != nil {
 				respondError(w, http.StatusBadRequest, "Invalid values YAML", err.Error())
 				return
 			}
 			instance.SetValues(values)
 		}
 	}
 	// 调用领域服务
 	if err := h.instanceService.UpdateInstance(r.Context(), instance); err != nil {
-		respondError(w, http.StatusBadRequest, "Failed to update instance", err.Error())
+		respondServiceError(w, err, "Failed to update instance")
 		return
 	}
@ -342,7 +347,6 @@ func (h *InstanceHandler) StreamInstanceLogs(w http.ResponseWriter, r *http.Requ
 	w.Header().Set("Content-Type", "text/event-stream")
 	w.Header().Set("Cache-Control", "no-cache")
 	w.Header().Set("Connection", "keep-alive")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 	flusher, ok := w.(http.Flusher)
 	if !ok {
@ -371,6 +375,52 @@ func (h *InstanceHandler) StreamInstanceLogs(w http.ResponseWriter, r *http.Requ
 	}
 }
 // ScaleInstance 扩缩容实例
 func (h *InstanceHandler) ScaleInstance(w http.ResponseWriter, r *http.Request) {
 	vars := mux.Vars(r)
 	clusterID := vars["cluster_id"]
 	instanceID := vars["instance_id"]
 	var req dto.ScaleInstanceRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		respondError(w, http.StatusBadRequest, "Invalid request body", err.Error())
 		return
 	}
 	if req.Replicas < 0 {
 		respondError(w, http.StatusBadRequest, "Invalid replicas", "replicas must be >= 0")
 		return
 	}
 	result, err := h.instanceService.ScaleInstance(r.Context(), clusterID, instanceID, req.Replicas, req.Workload)
 	if err != nil {
 		respondServiceError(w, err, "Failed to scale instance")
 		return
 	}
 	instResp := convertInstanceResponse(result, true)
 	instResp.Replicas = req.Replicas
 	respondJSON(w, http.StatusOK, dto.ScaleInstanceResponse{
 		Instance: instResp,
 		Replicas: req.Replicas,
 		Message:  fmt.Sprintf("Scaled to %d replicas", req.Replicas),
 	})
 }
 // GetInstanceValuesDiff 获取实例 values 差异
 func (h *InstanceHandler) GetInstanceValuesDiff(w http.ResponseWriter, r *http.Request) {
 	vars := mux.Vars(r)
 	clusterID := vars["cluster_id"]
 	instanceID := vars["instance_id"]
 	diff, err := h.instanceService.GetInstanceValuesDiff(r.Context(), clusterID, instanceID)
 	if err != nil {
 		respondServiceError(w, err, "Failed to get values diff")
 		return
 	}
 	respondJSON(w, http.StatusOK, diff)
 }
 func convertInstanceEntry(entry *entity.InstanceEntry) *dto.InstanceEntryResponse {
 	portResponses := make([]dto.InstanceEntryPortResponse, 0, len(entry.Ports))
 	for _, port := range entry.Ports {
@ -536,10 +586,12 @@ func convertInstanceResponse(instance *entity.Instance, includeValues bool) *dto
 		Status:         string(instance.Status),
 		WorkspaceID:    instance.WorkspaceID,
 		OwnerID:        instance.OwnerID,
 		OwnerUsername:  instance.OwnerUsername,
 		StatusReason:   instance.StatusReason,
 		LastOperation:  string(instance.LastOperation),
 		LastError:      instance.LastError,
 		Revision:       instance.Revision,
 		Replicas:       instance.Replicas,
 		AllowedActions: []string{"view", "update", "delete"},
 		CreatedAt:      instance.CreatedAt.Format("2006-01-02T15:04:05Z07:00"),
 		UpdatedAt:      instance.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"),
@ -572,6 +624,43 @@ func parseValuesYAML(valuesYAML string) (map[string]interface{}, error) {
 	return values, nil
 }
 func parseAndCompareValues(values map[string]interface{}, valuesYAML string) (map[string]interface{}, bool, error) {
 	if strings.TrimSpace(valuesYAML) == "" {
 		return nil, false, nil
 	}
 	parsed, err := parseValuesYAML(valuesYAML)
 	if err != nil {
 		return nil, true, fmt.Errorf("invalid values YAML: %w", err)
 	}
 	if values == nil {
 		return parsed, true, nil
 	}
 	normalizedValues, err := normalizeJSONComparable(values)
 	if err != nil {
 		return nil, true, fmt.Errorf("invalid values: %w", err)
 	}
 	normalizedYAML, err := normalizeJSONComparable(parsed)
 	if err != nil {
 		return nil, true, fmt.Errorf("invalid values YAML: %w", err)
 	}
 	if !reflect.DeepEqual(normalizedValues, normalizedYAML) {
 		return nil, true, fmt.Errorf("values and valuesYaml conflict")
 	}
 	return parsed, true, nil
 }
 func normalizeJSONComparable(value interface{}) (interface{}, error) {
 	data, err := json.Marshal(value)
 	if err != nil {
 		return nil, err
 	}
 	var normalized interface{}
 	if err := json.Unmarshal(data, &normalized); err != nil {
 		return nil, err
 	}
 	return normalized, nil
 }
 func normalizeYAMLValue(value interface{}) (interface{}, error) {
 	switch typed := value.(type) {
 	case map[string]interface{}:
--- a/backend/internal/adapter/input/http/rest/monitoring_handler.go
+++ b/backend/internal/adapter/input/http/rest/monitoring_handler.go
@ -43,6 +43,12 @@ func (h *MonitoringHandler) GetClusterMonitoring(w http.ResponseWriter, r *http.
 	respondJSON(w, http.StatusOK, response)
 }
 // GetClusterStats is a compatibility alias for cluster detail dashboards that
 // historically read stats from /clusters/{id}/stats.
 func (h *MonitoringHandler) GetClusterStats(w http.ResponseWriter, r *http.Request) {
 	h.GetClusterMonitoring(w, r)
 }
 // ListClusterMonitoring 获取所有集群的监控信息
 // @Summary 列出集群监控
 // @Tags Monitoring
--- a/backend/internal/adapter/input/http/rest/workspace_handler.go
+++ b/backend/internal/adapter/input/http/rest/workspace_handler.go
@ -2,6 +2,7 @@ package rest
 import (
 	"encoding/json"
 	"errors"
 	"net/http"
 	"time"
@ -113,6 +114,15 @@ func (h *WorkspaceHandler) IssueCurrentKubeconfig(w http.ResponseWriter, r *http
 	if clusterID == "" {
 		clusterID = r.URL.Query().Get("cluster_id")
 	}
 	h.issueCurrentKubeconfigForCluster(w, r, clusterID)
 }
 func (h *WorkspaceHandler) IssueClusterKubeconfig(w http.ResponseWriter, r *http.Request) {
 	clusterID := mux.Vars(r)["cluster_id"]
 	h.issueCurrentKubeconfigForCluster(w, r, clusterID)
 }
 func (h *WorkspaceHandler) issueCurrentKubeconfigForCluster(w http.ResponseWriter, r *http.Request, clusterID string) {
 	kubeconfig, err := h.workspaceService.IssueCurrentKubeconfig(r.Context(), clusterID, 2*time.Hour)
 	if err != nil {
 		respondServiceError(w, err, "Failed to issue kubeconfig")
@ -152,11 +162,19 @@ func toWorkspaceResponse(workspace *entity.Workspace) workspaceResponse {
 }
 func respondServiceError(w http.ResponseWriter, err error, fallback string) {
 	if errors.Is(err, service.ErrQuotaExceeded) {
 		respondError(w, http.StatusUnprocessableEntity, "Quota exceeded", err.Error())
 		return
 	}
 	switch err {
 	case entity.ErrUnauthorized, authz.ErrUnauthenticated:
 		respondError(w, http.StatusUnauthorized, "Unauthorized", err.Error())
 	case entity.ErrForbidden, authz.ErrForbidden, entity.ErrUserInactive, entity.ErrWorkspaceSuspended:
 		respondError(w, http.StatusForbidden, "Forbidden", err.Error())
 	case entity.ErrWorkspaceNamespaceConflict, entity.ErrUserHasInstances, entity.ErrWorkspaceExists, entity.ErrInstanceExists:
 		respondError(w, http.StatusConflict, "Conflict", err.Error())
 	case entity.ErrProtectedNamespace:
 		respondError(w, http.StatusForbidden, "Forbidden", err.Error())
 	case entity.ErrClusterNotFound, entity.ErrRegistryNotFound, entity.ErrInstanceNotFound, entity.ErrWorkspaceNotFound:
 		respondError(w, http.StatusNotFound, fallback, err.Error())
 	default:
--- a/backend/internal/adapter/output/helm/mock/helm_client_mock.go
+++ b/backend/internal/adapter/output/helm/mock/helm_client_mock.go
@ -4,7 +4,7 @@ import (
 	"context"
 	"fmt"
 	"time"
-	
+
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"github.com/ocdp/cluster-service/internal/domain/repository"
 )
@ -12,38 +12,47 @@ import (
 // HelmClientMock Helm 客户端 Mock 实现
 type HelmClientMock struct {
 	// Mock 数据存储
-	releases map[string]map[string]*entity.Instance    // clusterID -> releaseName -> instance
+	releases  map[string]map[string]*entity.Instance             // clusterID -> releaseName -> instance
-	history  map[string]map[string][]*entity.ReleaseHistory // clusterID -> releaseName -> []history
+	history   map[string]map[string][]*entity.ReleaseHistory     // clusterID -> releaseName -> []history
 	estimates map[string]map[string]*repository.ResourceEstimate // clusterID -> releaseName -> estimate
 }
 // NewHelmClientMock 创建 Mock 实现
 func NewHelmClientMock() repository.HelmClient {
 	return &HelmClientMock{
-		releases: make(map[string]map[string]*entity.Instance),
+		releases:  make(map[string]map[string]*entity.Instance),
-		history:  make(map[string]map[string][]*entity.ReleaseHistory),
+		history:   make(map[string]map[string][]*entity.ReleaseHistory),
 		estimates: make(map[string]map[string]*repository.ResourceEstimate),
 	}
 }
 func (c *HelmClientMock) SetResourceEstimate(clusterID, namespace, releaseName string, estimate *repository.ResourceEstimate) {
 	if c.estimates[clusterID] == nil {
 		c.estimates[clusterID] = make(map[string]*repository.ResourceEstimate)
 	}
 	c.estimates[clusterID][fmt.Sprintf("%s/%s", namespace, releaseName)] = estimate
 }
 func (c *HelmClientMock) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
 	// 初始化集群数据
 	if c.releases[cluster.ID] == nil {
 		c.releases[cluster.ID] = make(map[string]*entity.Instance)
 		c.history[cluster.ID] = make(map[string][]*entity.ReleaseHistory)
 	}
-	
+
 	// 检查是否已存在
 	key := fmt.Sprintf("%s/%s", instance.Namespace, instance.Name)
 	if _, exists := c.releases[cluster.ID][key]; exists {
 		return entity.ErrInstanceExists
 	}
-	
+
 	// Mock 安装
 	instance.Status = entity.StatusDeployed
 	instance.Revision = 1
 	instance.UpdatedAt = time.Now()
-	
+
 	c.releases[cluster.ID][key] = instance
-	
+
 	// 添加历史记录
 	c.history[cluster.ID][key] = []*entity.ReleaseHistory{
 		{
@ -55,25 +64,25 @@ func (c *HelmClientMock) Install(ctx context.Context, cluster *entity.Cluster, i
 			Description: "Install complete",
 		},
 	}
-	
+
 	return nil
 }
 func (c *HelmClientMock) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
 	key := fmt.Sprintf("%s/%s", instance.Namespace, instance.Name)
-	
+
 	existing, exists := c.releases[cluster.ID][key]
 	if !exists {
 		return entity.ErrInstanceNotFound
 	}
-	
+
 	// Mock 升级
 	instance.Revision = existing.Revision + 1
 	instance.Status = entity.StatusDeployed
 	instance.UpdatedAt = time.Now()
-	
+
 	c.releases[cluster.ID][key] = instance
-	
+
 	// 添加历史记录
 	history := &entity.ReleaseHistory{
 		Revision:    instance.Revision,
@ -84,44 +93,44 @@ func (c *HelmClientMock) Upgrade(ctx context.Context, cluster *entity.Cluster, i
 		Description: "Upgrade complete",
 	}
 	c.history[cluster.ID][key] = append(c.history[cluster.ID][key], history)
-	
+
 	return nil
 }
 func (c *HelmClientMock) Uninstall(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) error {
 	key := fmt.Sprintf("%s/%s", namespace, releaseName)
-	
+
 	if _, exists := c.releases[cluster.ID][key]; !exists {
 		return entity.ErrInstanceNotFound
 	}
-	
+
 	// Mock 卸载
 	delete(c.releases[cluster.ID], key)
-	
+
 	return nil
 }
 func (c *HelmClientMock) Rollback(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string, revision int) error {
 	key := fmt.Sprintf("%s/%s", namespace, releaseName)
-	
+
 	instance, exists := c.releases[cluster.ID][key]
 	if !exists {
 		return entity.ErrInstanceNotFound
 	}
-	
+
 	// 检查历史记录是否存在
 	histories := c.history[cluster.ID][key]
 	if revision > len(histories) || revision < 1 {
 		return fmt.Errorf("revision %d not found", revision)
 	}
-	
+
 	// Mock 回滚
 	instance.Revision = len(histories) + 1
 	instance.Status = entity.StatusDeployed
 	instance.UpdatedAt = time.Now()
-	
+
 	c.releases[cluster.ID][key] = instance
-	
+
 	// 添加回滚历史记录
 	history := &entity.ReleaseHistory{
 		Revision:    instance.Revision,
@ -132,33 +141,33 @@ func (c *HelmClientMock) Rollback(ctx context.Context, cluster *entity.Cluster,
 		Description: fmt.Sprintf("Rollback to revision %d", revision),
 	}
 	c.history[cluster.ID][key] = append(c.history[cluster.ID][key], history)
-	
+
 	return nil
 }
 func (c *HelmClientMock) GetStatus(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (*entity.Instance, error) {
 	key := fmt.Sprintf("%s/%s", namespace, releaseName)
-	
+
 	instance, exists := c.releases[cluster.ID][key]
 	if !exists {
 		return nil, entity.ErrInstanceNotFound
 	}
-	
+
 	return instance, nil
 }
 func (c *HelmClientMock) GetHistory(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) ([]*entity.ReleaseHistory, error) {
 	key := fmt.Sprintf("%s/%s", namespace, releaseName)
-	
+
 	if _, exists := c.releases[cluster.ID][key]; !exists {
 		return nil, entity.ErrInstanceNotFound
 	}
-	
+
 	histories := c.history[cluster.ID][key]
 	if histories == nil {
 		return []*entity.ReleaseHistory{}, nil
 	}
-	
+
 	return histories, nil
 }
@ -167,7 +176,7 @@ func (c *HelmClientMock) List(ctx context.Context, cluster *entity.Cluster, name
 	if clusterReleases == nil {
 		return []*entity.Instance{}, nil
 	}
-	
+
 	instances := make([]*entity.Instance, 0)
 	for key, instance := range clusterReleases {
 		// 如果指定了 namespace，只返回该 namespace 的
@ -179,18 +188,41 @@ func (c *HelmClientMock) List(ctx context.Context, cluster *entity.Cluster, name
 		}
 		instances = append(instances, c.releases[cluster.ID][key])
 	}
-	
+
 	return instances, nil
 }
 func (c *HelmClientMock) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) {
 	key := fmt.Sprintf("%s/%s", namespace, releaseName)
-	
+
 	instance, exists := c.releases[cluster.ID][key]
 	if !exists {
 		return nil, entity.ErrInstanceNotFound
 	}
-	
+
 	return instance.Values, nil
 }
 func (c *HelmClientMock) GetChartDefaultValues(chartPath string) (map[string]interface{}, error) {
 	return map[string]interface{}{
 		"replicaCount": 1,
 		"image": map[string]interface{}{
 			"repository": "nginx",
 			"tag":        "latest",
 		},
 	}, nil
 }
 func (c *HelmClientMock) EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*repository.ResourceEstimate, error) {
 	clusterID := ""
 	if cluster != nil {
 		clusterID = cluster.ID
 	}
 	key := fmt.Sprintf("%s/%s", instance.Namespace, instance.Name)
 	if c.estimates[clusterID] != nil {
 		if estimate := c.estimates[clusterID][key]; estimate != nil {
 			return estimate, nil
 		}
 	}
 	return &repository.ResourceEstimate{}, nil
 }
--- a/backend/internal/adapter/output/helm/real/helm_client.go
+++ b/backend/internal/adapter/output/helm/real/helm_client.go
@ -10,6 +10,7 @@ import (
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"github.com/ocdp/cluster-service/internal/domain/repository"
 	domainservice "github.com/ocdp/cluster-service/internal/domain/service"
 	"helm.sh/helm/v3/pkg/action"
 	"helm.sh/helm/v3/pkg/chart/loader"
 	"helm.sh/helm/v3/pkg/cli"
@ -159,6 +160,7 @@ func (h *HelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, insta
 	upgrade := action.NewUpgrade(actionConfig)
 	upgrade.Namespace = instance.Namespace
 	upgrade.ReuseValues = true
 	upgrade.Wait = true
 	upgrade.Timeout = helmOperationTimeout()
@ -321,6 +323,7 @@ func (h *HelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, rel
 	defer cleanup()
 	getValues := action.NewGetValues(actionConfig)
 	getValues.AllValues = true
 	values, err := getValues.Run(releaseName)
 	if err != nil {
 		return nil, fmt.Errorf("failed to get values: %w", err)
@ -329,6 +332,56 @@ func (h *HelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, rel
 	return values, nil
 }
 // GetChartDefaultValues 从 chart 包中读取默认 values
 func (h *HelmClient) GetChartDefaultValues(chartPath string) (map[string]interface{}, error) {
 	chart, err := loader.Load(chartPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to load chart: %w", err)
 	}
 	vals := make(map[string]interface{})
 	if chart.Values != nil {
 		for k, v := range chart.Values {
 			vals[k] = v
 		}
 	}
 	return vals, nil
 }
 func (h *HelmClient) EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*repository.ResourceEstimate, error) {
 	chartPath := fmt.Sprintf("/tmp/charts/%s-%s.tgz", instance.Chart, instance.Version)
 	chart, err := loader.Load(chartPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to load chart: %w", err)
 	}
 	actionConfig := new(action.Configuration)
 	actionConfig.Log = func(format string, v ...interface{}) {}
 	install := action.NewInstall(actionConfig)
 	install.ReleaseName = instance.Name
 	if install.ReleaseName == "" {
 		install.ReleaseName = "quota-precheck"
 	}
 	install.Namespace = instance.Namespace
 	if install.Namespace == "" {
 		install.Namespace = "default"
 	}
 	install.DryRun = true
 	install.DryRunOption = "client"
 	install.ClientOnly = true
 	install.Replace = true
 	install.SkipSchemaValidation = true
 	values := instance.Values
 	if values == nil {
 		values = map[string]interface{}{}
 	}
 	release, err := install.RunWithContext(ctx, chart, values)
 	if err != nil {
 		return nil, fmt.Errorf("failed to render chart for quota estimate: %w", err)
 	}
 	return domainservice.EstimateRenderedManifestResources(release.Manifest)
 }
 // convertReleaseToInstance 转换 Helm Release 为 Instance
 func (h *HelmClient) convertReleaseToInstance(rel *release.Release) *entity.Instance {
 	return &entity.Instance{
--- a/backend/internal/adapter/output/k8s/metrics_client.go
+++ b/backend/internal/adapter/output/k8s/metrics_client.go
@ -63,7 +63,7 @@ func (c *MetricsClient) GetClusterMetrics(ctx context.Context, clusterID string)
 	// 计算集群级别汇总
 	metrics := c.aggregateClusterMetrics(cluster, nodes.Items, pods.Items, nodeMetrics)
-	
+
 	return metrics, nil
 }
@ -87,6 +87,37 @@ func (c *MetricsClient) GetNodeMetrics(ctx context.Context, clusterID string) ([
 	return c.getNodeMetricsData(ctx, clientset, metricsClient, nodes.Items)
 }
 // GetPodResourceAllocations returns Kubernetes Pod requests/limits without
 // inventing utilization values. GPU memory is treated as vendor integer MB.
 func (c *MetricsClient) GetPodResourceAllocations(ctx context.Context, clusterID string) ([]*entity.PodResourceAllocation, error) {
 	cluster, err := c.clusterRepo.GetByID(ctx, clusterID)
 	if err != nil {
 		return nil, fmt.Errorf("failed to get cluster: %w", err)
 	}
 	clientset, _, err := c.createK8sClients(cluster)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create k8s client: %w", err)
 	}
 	pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{})
 	if err != nil {
 		return nil, fmt.Errorf("failed to list pods: %w", err)
 	}
 	result := make([]*entity.PodResourceAllocation, 0, len(pods.Items))
 	for _, pod := range pods.Items {
 		result = append(result, &entity.PodResourceAllocation{
 			ClusterID:    clusterID,
 			Namespace:    pod.Namespace,
 			PodName:      pod.Name,
 			InstanceName: inferHelmReleaseName(pod.Labels),
 			Allocation:   podResourceAllocation(&pod),
 		})
 	}
 	return result, nil
 }
 // createK8sClients 创建 Kubernetes 客户端
 func (c *MetricsClient) createK8sClients(cluster *entity.Cluster) (*kubernetes.Clientset, *metricsv.Clientset, error) {
 	config, err := clientcmd.RESTConfigFromKubeConfig([]byte(cluster.GetKubeConfig()))
@ -127,14 +158,14 @@ func (c *MetricsClient) getNodeMetricsData(
 	for _, node := range nodes {
 		nodeMetric := &entity.NodeMetrics{
-			NodeName:          node.Name,
+			NodeName:         node.Name,
-			Status:            getNodeStatus(&node),
+			Status:           getNodeStatus(&node),
-			Role:              getNodeRole(&node),
+			Role:             getNodeRole(&node),
-			Age:               getNodeAge(&node),
+			Age:              getNodeAge(&node),
-			OSImage:           node.Status.NodeInfo.OSImage,
+			OSImage:          node.Status.NodeInfo.OSImage,
-			KernelVersion:     node.Status.NodeInfo.KernelVersion,
+			KernelVersion:    node.Status.NodeInfo.KernelVersion,
-			ContainerRuntime:  node.Status.NodeInfo.ContainerRuntimeVersion,
+			ContainerRuntime: node.Status.NodeInfo.ContainerRuntimeVersion,
-			KubeletVersion:    node.Status.NodeInfo.KubeletVersion,
+			KubeletVersion:   node.Status.NodeInfo.KubeletVersion,
 		}
 		// CPU
@ -213,7 +244,7 @@ func (c *MetricsClient) aggregateClusterMetrics(
 	var totalCPU, totalMem, usedCPU, usedMem int64
 	var totalGPU, usedGPU int
 	healthyNodes := 0
-	
+
 	// 单机最大值
 	var maxNodeCPU, maxNodeMem int64
 	var maxNodeGPU int
@ -251,7 +282,7 @@ func (c *MetricsClient) aggregateClusterMetrics(
 		// 从 nodeMetrics 获取使用情况
 		if i < len(nodeMetrics) && nodeMetrics[i] != nil {
 			metrics.Nodes = append(metrics.Nodes, *nodeMetrics[i])
-			
+
 			// 更新单机最大使用率
 			if nodeMetrics[i].CPUPercent > maxNodeCPUUsage {
 				maxNodeCPUUsage = nodeMetrics[i].CPUPercent
@ -274,7 +305,7 @@ func (c *MetricsClient) aggregateClusterMetrics(
 	metrics.TotalCPU = fmt.Sprintf("%.2f cores", float64(totalCPU)/1000.0)
 	metrics.TotalMemory = formatBytes(totalMem)
 	metrics.TotalGPU = totalGPU
-	
+
 	// 格式化单机最大值
 	metrics.MaxNodeCPU = fmt.Sprintf("%.2f cores", float64(maxNodeCPU)/1000.0)
 	metrics.MaxNodeMemory = formatBytes(maxNodeMem)
@ -292,7 +323,7 @@ func (c *MetricsClient) aggregateClusterMetrics(
 			usedMem += int64(nm.MemoryPercent * float64(totalMem) / 100.0)
 			usedGPU += nm.GPUUsage
 		}
-		
+
 		if totalCPU > 0 {
 			metrics.CPUUsage = float64(usedCPU) / float64(totalCPU) * 100
 		}
@ -302,7 +333,7 @@ func (c *MetricsClient) aggregateClusterMetrics(
 		if totalGPU > 0 {
 			metrics.GPUUsage = float64(usedGPU) / float64(totalGPU) * 100
 		}
-		
+
 		metrics.UsedCPU = fmt.Sprintf("%.2f cores", float64(usedCPU)/1000.0)
 		metrics.UsedMemory = formatBytes(usedMem)
 		metrics.UsedGPU = usedGPU
@ -348,7 +379,7 @@ func getNodeAge(node *corev1.Node) string {
 	age := time.Since(node.CreationTimestamp.Time)
 	days := int(age.Hours() / 24)
 	hours := int(age.Hours()) % 24
-	
+
 	if days > 0 {
 		return fmt.Sprintf("%dd %dh", days, hours)
 	}
@ -368,3 +399,110 @@ func formatBytes(bytes int64) string {
 	return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp])
 }
 func inferHelmReleaseName(labels map[string]string) string {
 	if labels == nil {
 		return ""
 	}
 	for _, key := range []string{
 		"app.kubernetes.io/instance",
 		"release",
 		"helm.sh/release",
 		"meta.helm.sh/release-name",
 		"app",
 	} {
 		if value := labels[key]; value != "" {
 			return value
 		}
 	}
 	return ""
 }
 func podResourceAllocation(pod *corev1.Pod) entity.ResourceAllocation {
 	if pod == nil {
 		return entity.ResourceAllocation{}
 	}
 	sum := entity.ResourceAllocation{}
 	for _, container := range pod.Spec.Containers {
 		sum = addContainerAllocation(sum, container)
 	}
 	initMax := entity.ResourceAllocation{}
 	for _, container := range pod.Spec.InitContainers {
 		initMax = maxAllocation(initMax, containerAllocation(container))
 	}
 	return maxAllocation(sum, initMax)
 }
 func addContainerAllocation(base entity.ResourceAllocation, container corev1.Container) entity.ResourceAllocation {
 	return addAllocation(base, containerAllocation(container))
 }
 func containerAllocation(container corev1.Container) entity.ResourceAllocation {
 	requests := container.Resources.Requests
 	limits := container.Resources.Limits
 	return entity.ResourceAllocation{
 		CPURequestsMilli:    quantityMilliValue(requests, corev1.ResourceCPU),
 		CPULimitsMilli:      quantityMilliValue(limits, corev1.ResourceCPU),
 		MemoryRequestsBytes: quantityValue(requests, corev1.ResourceMemory),
 		MemoryLimitsBytes:   quantityValue(limits, corev1.ResourceMemory),
 		GPURequests:         quantityValue(requests, corev1.ResourceName("nvidia.com/gpu")),
 		GPULimits:           quantityValue(limits, corev1.ResourceName("nvidia.com/gpu")),
 		GPUMemoryRequestsMB: quantityValueAny(requests, corev1.ResourceName("nvidia.com/gpumem"), corev1.ResourceName("requests.nvidia.com/gpumem")),
 		GPUMemoryLimitsMB:   quantityValueAny(limits, corev1.ResourceName("nvidia.com/gpumem"), corev1.ResourceName("requests.nvidia.com/gpumem")),
 	}
 }
 func addAllocation(left, right entity.ResourceAllocation) entity.ResourceAllocation {
 	return entity.ResourceAllocation{
 		CPURequestsMilli:    left.CPURequestsMilli + right.CPURequestsMilli,
 		CPULimitsMilli:      left.CPULimitsMilli + right.CPULimitsMilli,
 		MemoryRequestsBytes: left.MemoryRequestsBytes + right.MemoryRequestsBytes,
 		MemoryLimitsBytes:   left.MemoryLimitsBytes + right.MemoryLimitsBytes,
 		GPURequests:         left.GPURequests + right.GPURequests,
 		GPULimits:           left.GPULimits + right.GPULimits,
 		GPUMemoryRequestsMB: left.GPUMemoryRequestsMB + right.GPUMemoryRequestsMB,
 		GPUMemoryLimitsMB:   left.GPUMemoryLimitsMB + right.GPUMemoryLimitsMB,
 	}
 }
 func maxAllocation(left, right entity.ResourceAllocation) entity.ResourceAllocation {
 	return entity.ResourceAllocation{
 		CPURequestsMilli:    maxInt64(left.CPURequestsMilli, right.CPURequestsMilli),
 		CPULimitsMilli:      maxInt64(left.CPULimitsMilli, right.CPULimitsMilli),
 		MemoryRequestsBytes: maxInt64(left.MemoryRequestsBytes, right.MemoryRequestsBytes),
 		MemoryLimitsBytes:   maxInt64(left.MemoryLimitsBytes, right.MemoryLimitsBytes),
 		GPURequests:         maxInt64(left.GPURequests, right.GPURequests),
 		GPULimits:           maxInt64(left.GPULimits, right.GPULimits),
 		GPUMemoryRequestsMB: maxInt64(left.GPUMemoryRequestsMB, right.GPUMemoryRequestsMB),
 		GPUMemoryLimitsMB:   maxInt64(left.GPUMemoryLimitsMB, right.GPUMemoryLimitsMB),
 	}
 }
 func quantityMilliValue(resources corev1.ResourceList, name corev1.ResourceName) int64 {
 	if quantity, ok := resources[name]; ok {
 		return quantity.MilliValue()
 	}
 	return 0
 }
 func quantityValue(resources corev1.ResourceList, name corev1.ResourceName) int64 {
 	if quantity, ok := resources[name]; ok {
 		return quantity.Value()
 	}
 	return 0
 }
 func quantityValueAny(resources corev1.ResourceList, names ...corev1.ResourceName) int64 {
 	for _, name := range names {
 		if quantity, ok := resources[name]; ok {
 			return quantity.Value()
 		}
 	}
 	return 0
 }
 func maxInt64(left, right int64) int64 {
 	if left > right {
 		return left
 	}
 	return right
 }
--- a/backend/internal/adapter/output/k8s/metrics_client_test.go
+++ b/backend/internal/adapter/output/k8s/metrics_client_test.go
@ -0,0 +1,29 @@
 package k8s
 import (
 	"testing"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 )
 func TestContainerAllocationCountsVendorGPUMemoryKey(t *testing.T) {
 	container := corev1.Container{
 		Resources: corev1.ResourceRequirements{
 			Requests: corev1.ResourceList{
 				corev1.ResourceName("nvidia.com/gpumem"): resource.MustParse("10000"),
 			},
 			Limits: corev1.ResourceList{
 				corev1.ResourceName("nvidia.com/gpumem"): resource.MustParse("12000"),
 			},
 		},
 	}
 	allocation := containerAllocation(container)
 	if allocation.GPUMemoryRequestsMB != 10000 {
 		t.Fatalf("expected GPU memory requests 10000 MB, got %d", allocation.GPUMemoryRequestsMB)
 	}
 	if allocation.GPUMemoryLimitsMB != 12000 {
 		t.Fatalf("expected GPU memory limits 12000 MB, got %d", allocation.GPUMemoryLimitsMB)
 	}
 }
--- a/backend/internal/adapter/output/k8s/scale_client.go
+++ b/backend/internal/adapter/output/k8s/scale_client.go
@ -0,0 +1,134 @@
 package k8s
 import (
 	"context"
 	"fmt"
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	appsv1 "k8s.io/api/apps/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes"
 )
 // ScaleClient provides K8s-native workload scaling (bypasses Helm)
 type ScaleClient struct{}
 // NewScaleClient creates a ScaleClient
 func NewScaleClient() *ScaleClient {
 	return &ScaleClient{}
 }
 // findDeployment searches for a deployment matching the release name using various label strategies.
 func (c *ScaleClient) findDeployment(ctx context.Context, clientset *kubernetes.Clientset, namespace, releaseName string) (*appsv1.Deployment, error) {
 	labelQueries := []string{
 		fmt.Sprintf("app.kubernetes.io/instance=%s", releaseName),
 		fmt.Sprintf("release=%s", releaseName),
 		fmt.Sprintf("app=%s", releaseName),
 		fmt.Sprintf("app.kubernetes.io/name=%s", releaseName),
 	}
 	for _, query := range labelQueries {
 		deployments, err := clientset.AppsV1().Deployments(namespace).List(ctx, metav1.ListOptions{
 			LabelSelector: query,
 		})
 		if err != nil {
 			continue
 		}
 		if len(deployments.Items) > 0 {
 			return &deployments.Items[0], nil
 		}
 	}
 	// Fallback: get by name directly
 	dep, err := clientset.AppsV1().Deployments(namespace).Get(ctx, releaseName, metav1.GetOptions{})
 	if err == nil && dep != nil {
 		return dep, nil
 	}
 	return nil, nil
 }
 // GetDeploymentReplicas returns the current replicas count for a deployment.
 func (c *ScaleClient) GetDeploymentReplicas(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string) (int32, error) {
 	clientset, err := c.clientsetForCluster(cluster)
 	if err != nil {
 		return 0, fmt.Errorf("failed to create k8s client: %w", err)
 	}
 	dep, err := c.findDeployment(ctx, clientset, namespace, releaseName)
 	if err != nil {
 		return 0, err
 	}
 	if dep != nil && dep.Spec.Replicas != nil {
 		return *dep.Spec.Replicas, nil
 	}
 	// Fallback to statefulsets
 	return c.getStatefulSetReplicas(ctx, clientset, namespace, releaseName)
 }
 func (c *ScaleClient) getStatefulSetReplicas(ctx context.Context, clientset *kubernetes.Clientset, namespace, releaseName string) (int32, error) {
 	stsList, err := clientset.AppsV1().StatefulSets(namespace).List(ctx, metav1.ListOptions{
 		LabelSelector: fmt.Sprintf("app.kubernetes.io/instance=%s", releaseName),
 	})
 	if err != nil {
 		return 0, err
 	}
 	if len(stsList.Items) == 0 {
 		return 0, nil // No replicable workload found
 	}
 	sts := stsList.Items[0]
 	if sts.Spec.Replicas != nil {
 		return *sts.Spec.Replicas, nil
 	}
 	return 0, nil
 }
 // ScaleDeployment scales the K8s deployment directly (bypasses Helm).
 func (c *ScaleClient) ScaleDeployment(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string, replicas int32) error {
 	clientset, err := c.clientsetForCluster(cluster)
 	if err != nil {
 		return fmt.Errorf("failed to create k8s client: %w", err)
 	}
 	dep, err := c.findDeployment(ctx, clientset, namespace, releaseName)
 	if err != nil {
 		return err
 	}
 	if dep != nil {
 		dep.Spec.Replicas = &replicas
 		_, err = clientset.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{})
 		if err != nil {
 			return fmt.Errorf("failed to scale deployment %s: %w", dep.Name, err)
 		}
 		return nil
 	}
 	// Try StatefulSets
 	stsList, err := clientset.AppsV1().StatefulSets(namespace).List(ctx, metav1.ListOptions{
 		LabelSelector: fmt.Sprintf("app.kubernetes.io/instance=%s", releaseName),
 	})
 	if err == nil && len(stsList.Items) > 0 {
 		sts := stsList.Items[0]
 		sts.Spec.Replicas = &replicas
 		_, err = clientset.AppsV1().StatefulSets(namespace).Update(ctx, &sts, metav1.UpdateOptions{})
 		if err != nil {
 			return fmt.Errorf("failed to scale statefulset %s: %w", sts.Name, err)
 		}
 		return nil
 	}
 	return fmt.Errorf("no deployment or statefulset found for release %s in namespace %s", releaseName, namespace)
 }
 func (c *ScaleClient) clientsetForCluster(cluster *entity.Cluster) (*kubernetes.Clientset, error) {
 	restConfig, err := restConfigFromCluster(cluster)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create rest config: %w", err)
 	}
 	clientset, err := kubernetes.NewForConfig(restConfig)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create clientset: %w", err)
 	}
 	return clientset, nil
 }
--- a/backend/internal/adapter/output/k8s/tenant_client.go
+++ b/backend/internal/adapter/output/k8s/tenant_client.go
@ -106,6 +106,25 @@ func (c *TenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.Clus
 	}, nil
 }
 func (c *TenantClient) GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*repository.ResourceQuotaUsage, error) {
 	binding = binding.WithDefaults()
 	if err := binding.Validate(); err != nil {
 		return nil, err
 	}
 	clientset, _, err := c.clientsetForCluster(cluster)
 	if err != nil {
 		return nil, err
 	}
 	quota, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{})
 	if err != nil {
 		return nil, fmt.Errorf("failed to get tenant resource quota usage: %w", err)
 	}
 	return &repository.ResourceQuotaUsage{
 		Hard: resourceVectorFromList(quota.Status.Hard),
 		Used: resourceVectorFromList(quota.Status.Used),
 	}, nil
 }
 // SuspendTenant revokes tenant API access by deleting only the RoleBinding.
 func (c *TenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
 	binding = binding.WithDefaults()
@ -128,6 +147,82 @@ func (c *TenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluste
 	return nil
 }
 func (c *TenantClient) DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
 	binding = binding.WithDefaults()
 	if err := binding.Validate(); err != nil {
 		return err
 	}
 	if isProtectedTenantNamespace(binding.Namespace) {
 		return entity.ErrProtectedNamespace
 	}
 	clientset, _, err := c.clientsetForCluster(cluster)
 	if err != nil {
 		return err
 	}
 	if err := deleteIgnoringNotFound(ctx, func() error {
 		return clientset.RbacV1().RoleBindings(binding.Namespace).Delete(ctx, binding.RoleBindingName, metav1.DeleteOptions{})
 	}); err != nil {
 		return fmt.Errorf("failed to delete tenant role binding: %w", err)
 	}
 	if err := deleteIgnoringNotFound(ctx, func() error {
 		return clientset.CoreV1().ResourceQuotas(binding.Namespace).Delete(ctx, binding.ResourceQuotaName, metav1.DeleteOptions{})
 	}); err != nil {
 		return fmt.Errorf("failed to delete tenant resource quota: %w", err)
 	}
 	if err := deleteIgnoringNotFound(ctx, func() error {
 		return clientset.CoreV1().ServiceAccounts(binding.Namespace).Delete(ctx, binding.ServiceAccountName, metav1.DeleteOptions{})
 	}); err != nil {
 		return fmt.Errorf("failed to delete tenant service account: %w", err)
 	}
 	namespace, err := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{})
 	if apierrors.IsNotFound(err) {
 		return nil
 	}
 	if err != nil {
 		return fmt.Errorf("failed to get tenant namespace before deletion: %w", err)
 	}
 	if namespace.Labels["ocdp.io/managed-by"] != "ocdp" || namespace.Labels["ocdp.io/tenant"] != binding.Namespace {
 		return fmt.Errorf("refusing to delete unmanaged namespace %q", binding.Namespace)
 	}
 	if err := deleteIgnoringNotFound(ctx, func() error {
 		return clientset.CoreV1().Namespaces().Delete(ctx, binding.Namespace, metav1.DeleteOptions{})
 	}); err != nil {
 		return fmt.Errorf("failed to delete tenant namespace: %w", err)
 	}
 	return nil
 }
 func deleteIgnoringNotFound(ctx context.Context, deleteFn func() error) error {
 	if err := ctx.Err(); err != nil {
 		return err
 	}
 	err := deleteFn()
 	if apierrors.IsNotFound(err) {
 		return nil
 	}
 	return err
 }
 func isProtectedTenantNamespace(namespace string) bool {
 	switch strings.TrimSpace(namespace) {
 	case "", "default", "kube-system", "kube-public", "kube-node-lease":
 		return true
 	default:
 		return false
 	}
 }
 func resourceVectorFromList(values corev1.ResourceList) repository.ResourceVector {
 	gpu := values[corev1.ResourceName("requests.nvidia.com/gpu")]
 	gpuMem := values[corev1.ResourceName("requests.nvidia.com/gpumem")]
 	return repository.ResourceVector{
 		CPU:         values[corev1.ResourceName("requests.cpu")],
 		Memory:      values[corev1.ResourceName("requests.memory")],
 		GPU:         gpu.Value(),
 		GPUMemoryMB: gpuMem.Value(),
 	}
 }
 func (c *TenantClient) clientsetForCluster(cluster *entity.Cluster) (kubernetes.Interface, *rest.Config, error) {
 	if c.clientset != nil {
 		config := &rest.Config{Host: "https://kubernetes.default.svc"}
--- a/backend/internal/adapter/output/k8s/tenant_client_test.go
+++ b/backend/internal/adapter/output/k8s/tenant_client_test.go
@ -2,6 +2,7 @@ package k8s
 import (
 	"context"
 	"errors"
 	"strings"
 	"testing"
 	"time"
@ -58,7 +59,7 @@ func TestTenantClientEnsureTenantUpdatesExistingResources(t *testing.T) {
 	ctx := context.Background()
 	binding := tenantBinding()
 	clientset := fake.NewSimpleClientset(
-		&corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace}},
+		&corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace, Labels: binding.Labels}},
 		&corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}},
 		&rbacv1.RoleBinding{
 			ObjectMeta: metav1.ObjectMeta{Name: binding.RoleBindingName, Namespace: binding.Namespace},
@ -100,7 +101,7 @@ func TestTenantClientSuspendTenantDeletesOnlyRoleBinding(t *testing.T) {
 	ctx := context.Background()
 	binding := tenantBinding()
 	clientset := fake.NewSimpleClientset(
-		&corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace}},
+		&corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace, Labels: binding.Labels}},
 		&corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}},
 		desiredRoleBinding(binding),
 	)
@ -117,6 +118,47 @@ func TestTenantClientSuspendTenantDeletesOnlyRoleBinding(t *testing.T) {
 	}
 }
 func TestTenantClientDeleteTenantDeletesTenantResources(t *testing.T) {
 	ctx := context.Background()
 	binding := tenantBinding()
 	clientset := fake.NewSimpleClientset(
 		&corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: binding.Namespace, Labels: binding.Labels}},
 		&corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: binding.ServiceAccountName, Namespace: binding.Namespace}},
 		desiredRoleBinding(binding),
 		&corev1.ResourceQuota{ObjectMeta: metav1.ObjectMeta{Name: binding.ResourceQuotaName, Namespace: binding.Namespace}},
 	)
 	client := NewTenantClientForClientset(clientset)
 	if err := client.DeleteTenant(ctx, nil, binding); err != nil {
 		t.Fatalf("DeleteTenant returned error: %v", err)
 	}
 	if _, err := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}); !apierrors.IsNotFound(err) {
 		t.Fatalf("expected role binding deleted, got %v", err)
 	}
 	if _, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}); !apierrors.IsNotFound(err) {
 		t.Fatalf("expected resource quota deleted, got %v", err)
 	}
 	if _, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{}); !apierrors.IsNotFound(err) {
 		t.Fatalf("expected service account deleted, got %v", err)
 	}
 	if _, err := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{}); !apierrors.IsNotFound(err) {
 		t.Fatalf("expected namespace deleted, got %v", err)
 	}
 }
 func TestTenantClientDeleteTenantRejectsProtectedNamespace(t *testing.T) {
 	ctx := context.Background()
 	client := NewTenantClientForClientset(fake.NewSimpleClientset(
 		&corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "default"}},
 	))
 	binding := entity.NewTenantBinding("default")
 	err := client.DeleteTenant(ctx, nil, binding)
 	if !errors.Is(err, entity.ErrProtectedNamespace) {
 		t.Fatalf("expected protected namespace error, got %v", err)
 	}
 }
 func TestTenantClientIssueKubeconfigCapsTokenTTL(t *testing.T) {
 	ctx := context.Background()
 	binding := tenantBinding()
--- a/backend/internal/adapter/output/k8s/tenant_mock.go
+++ b/backend/internal/adapter/output/k8s/tenant_mock.go
@ -31,6 +31,28 @@ func (c *MockTenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.
 	}, nil
 }
 func (c *MockTenantClient) GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*repository.ResourceQuotaUsage, error) {
 	if err := binding.Validate(); err != nil {
 		return nil, err
 	}
 	return &repository.ResourceQuotaUsage{
 		Hard: resourceVectorFromList(binding.ResourceQuotaHard),
 		Used: repository.ResourceVector{},
 	}, nil
 }
 func (c *MockTenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
 	return binding.Validate()
 }
 func (c *MockTenantClient) DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
 	if err := binding.Validate(); err != nil {
 		return err
 	}
 	switch binding.Namespace {
 	case "", "default", "kube-system", "kube-public", "kube-node-lease":
 		return entity.ErrProtectedNamespace
 	default:
 		return nil
 	}
 }
--- a/backend/internal/adapter/output/persistence/mock/user_repository_mock.go
+++ b/backend/internal/adapter/output/persistence/mock/user_repository_mock.go
@ -85,6 +85,17 @@ func (r *UserRepositoryMock) Delete(ctx context.Context, id string) error {
 	return nil
 }
 func (r *UserRepositoryMock) AdminExists(ctx context.Context) (bool, error) {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 	for _, u := range r.users {
 		if u.Role == "admin" {
 			return true, nil
 		}
 	}
 	return false, nil
 }
 func (r *UserRepositoryMock) List(ctx context.Context) ([]*entity.User, error) {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
--- a/backend/internal/adapter/output/persistence/mock/workspace_repository_mock.go
+++ b/backend/internal/adapter/output/persistence/mock/workspace_repository_mock.go
@ -72,6 +72,16 @@ func (r *WorkspaceRepositoryMock) Update(ctx context.Context, workspace *entity.
 	return nil
 }
 func (r *WorkspaceRepositoryMock) Delete(ctx context.Context, id string) error {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	if _, ok := r.workspaces[id]; !ok {
 		return entity.ErrWorkspaceNotFound
 	}
 	delete(r.workspaces, id)
 	return nil
 }
 func (r *WorkspaceRepositoryMock) List(ctx context.Context) ([]*entity.Workspace, error) {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
@ -118,6 +128,20 @@ func (r *WorkspaceClusterBindingRepositoryMock) Get(ctx context.Context, workspa
 	return &copy, nil
 }
 func (r *WorkspaceClusterBindingRepositoryMock) ListByWorkspace(ctx context.Context, workspaceID string) ([]*entity.WorkspaceClusterBinding, error) {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 	result := make([]*entity.WorkspaceClusterBinding, 0)
 	for _, binding := range r.bindings {
 		if binding.WorkspaceID != workspaceID {
 			continue
 		}
 		copy := *binding
 		result = append(result, &copy)
 	}
 	return result, nil
 }
 func (r *WorkspaceClusterBindingRepositoryMock) Delete(ctx context.Context, workspaceID, clusterID string) error {
 	r.mu.Lock()
 	defer r.mu.Unlock()
--- a/backend/internal/adapter/output/persistence/postgres/user_repository.go
+++ b/backend/internal/adapter/output/persistence/postgres/user_repository.go
@ -120,6 +120,12 @@ func (r *UserRepository) GetByUsername(ctx context.Context, username string) (*e
 }
 // Update 更新用户
 func (r *UserRepository) AdminExists(ctx context.Context) (bool, error) {
 	var exists bool
 	err := r.db.conn.QueryRowContext(ctx, `SELECT EXISTS(SELECT 1 FROM users WHERE role = 'admin')`).Scan(&exists)
 	return exists, err
 }
 func (r *UserRepository) Update(ctx context.Context, user *entity.User) error {
 	user.UpdatedAt = time.Now()
--- a/backend/internal/adapter/output/persistence/postgres/workspace_repository.go
+++ b/backend/internal/adapter/output/persistence/postgres/workspace_repository.go
@ -27,8 +27,9 @@ func (r *WorkspaceRepository) Create(ctx context.Context, workspace *entity.Work
 	query := `
 		INSERT INTO workspaces (id, name, status, k8s_namespace, k8s_sa_name, default_cluster_id, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, created_by, created_at, updated_at)
 		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
 		ON CONFLICT (name) DO NOTHING
 	`
-	_, err := r.db.conn.ExecContext(ctx, query,
+	result, err := r.db.conn.ExecContext(ctx, query,
 		workspace.ID,
 		workspace.Name,
 		workspace.Status,
@ -46,6 +47,13 @@ func (r *WorkspaceRepository) Create(ctx context.Context, workspace *entity.Work
 	if err != nil {
 		return fmt.Errorf("failed to create workspace: %w", err)
 	}
 	rows, err := result.RowsAffected()
 	if err != nil {
 		return fmt.Errorf("failed to get affected rows: %w", err)
 	}
 	if rows == 0 {
 		return entity.ErrWorkspaceExists
 	}
 	return nil
 }
@ -132,6 +140,21 @@ func (r *WorkspaceRepository) Update(ctx context.Context, workspace *entity.Work
 	return nil
 }
 func (r *WorkspaceRepository) Delete(ctx context.Context, id string) error {
 	result, err := r.db.conn.ExecContext(ctx, `DELETE FROM workspaces WHERE id = $1`, id)
 	if err != nil {
 		return fmt.Errorf("failed to delete workspace: %w", err)
 	}
 	rows, err := result.RowsAffected()
 	if err != nil {
 		return fmt.Errorf("failed to get affected rows: %w", err)
 	}
 	if rows == 0 {
 		return entity.ErrWorkspaceNotFound
 	}
 	return nil
 }
 func (r *WorkspaceRepository) List(ctx context.Context) ([]*entity.Workspace, error) {
 	query := `
 		SELECT id, name, status, k8s_namespace, k8s_sa_name, default_cluster_id, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, created_by, created_at, updated_at
@ -256,6 +279,42 @@ func (r *WorkspaceClusterBindingRepository) Get(ctx context.Context, workspaceID
 	return binding, nil
 }
 func (r *WorkspaceClusterBindingRepository) ListByWorkspace(ctx context.Context, workspaceID string) ([]*entity.WorkspaceClusterBinding, error) {
 	query := `
 		SELECT id, workspace_id, cluster_id, namespace, service_account, quota_cpu, quota_memory, quota_gpu, quota_gpu_memory, status, created_at, updated_at
 		FROM workspace_cluster_bindings
 		WHERE workspace_id = $1
 		ORDER BY created_at ASC
 	`
 	rows, err := r.db.conn.QueryContext(ctx, query, workspaceID)
 	if err != nil {
 		return nil, fmt.Errorf("failed to list workspace cluster bindings: %w", err)
 	}
 	defer rows.Close()
 	bindings := make([]*entity.WorkspaceClusterBinding, 0)
 	for rows.Next() {
 		binding := &entity.WorkspaceClusterBinding{}
 		if err := rows.Scan(
 			&binding.ID,
 			&binding.WorkspaceID,
 			&binding.ClusterID,
 			&binding.Namespace,
 			&binding.ServiceAccount,
 			&binding.QuotaCPU,
 			&binding.QuotaMemory,
 			&binding.QuotaGPU,
 			&binding.QuotaGPUMem,
 			&binding.Status,
 			&binding.CreatedAt,
 			&binding.UpdatedAt,
 		); err != nil {
 			return nil, fmt.Errorf("failed to scan workspace cluster binding: %w", err)
 		}
 		bindings = append(bindings, binding)
 	}
 	return bindings, rows.Err()
 }
 func (r *WorkspaceClusterBindingRepository) Delete(ctx context.Context, workspaceID, clusterID string) error {
 	_, err := r.db.conn.ExecContext(ctx, `DELETE FROM workspace_cluster_bindings WHERE workspace_id = $1 AND cluster_id = $2`, workspaceID, clusterID)
 	return err
--- a/backend/internal/bootstrap/config.go
+++ b/backend/internal/bootstrap/config.go
@ -138,7 +138,9 @@ func loadBootstrapConfigFromEnv() (*BootstrapConfig, bool) {
 		})
 	}
-	if parseBoolEnv("BOOTSTRAP_ENABLE_CLUSTERS", false) {
+	enableClusters := parseBoolEnv("BOOTSTRAP_ENABLE_CLUSTERS", false) ||
 		os.Getenv("BOOTSTRAP_CLUSTERS") != ""
 	if enableClusters {
 		for _, clusterName := range discoverBootstrapClusters() {
 			prefix := "BOOTSTRAP_CLUSTER_" + normalizeEnvName(clusterName) + "_"
 			host := os.Getenv(prefix + "HOST")
--- a/backend/internal/domain/entity/errors.go
+++ b/backend/internal/domain/entity/errors.go
@ -43,6 +43,9 @@ var (
 	ErrValuesSchemaNotFound = errors.New("values schema not found")
 	// Workspace errors
-	ErrWorkspaceNotFound = errors.New("workspace not found")
+	ErrWorkspaceNotFound          = errors.New("workspace not found")
-	ErrWorkspaceExists   = errors.New("workspace already exists")
+	ErrWorkspaceExists            = errors.New("workspace already exists")
 	ErrWorkspaceNamespaceConflict = errors.New("workspace namespace conflict")
 	ErrUserHasInstances           = errors.New("user has active instances")
 	ErrProtectedNamespace         = errors.New("protected namespace")
 )
--- a/backend/internal/domain/entity/instance.go
+++ b/backend/internal/domain/entity/instance.go
@ -53,6 +53,8 @@ type Instance struct {
 	Revision      int // Helm Release Revision
 	CreatedAt     time.Time
 	UpdatedAt     time.Time
 	Replicas      int // Running K8s replicas (enriched, not persisted)
 	OwnerUsername string
 }
 // NewInstance 创建新实例
--- a/backend/internal/domain/entity/metrics.go
+++ b/backend/internal/domain/entity/metrics.go
@ -25,6 +25,18 @@ type ClusterMetrics struct {
 	MemoryUsage float64 `json:"memory_usage"` // 百分比
 	GPUUsage    float64 `json:"gpu_usage"`    // 百分比
 	CPURequests          string              `json:"cpu_requests,omitempty"`
 	CPULimits            string              `json:"cpu_limits,omitempty"`
 	MemoryRequests       string              `json:"memory_requests,omitempty"`
 	MemoryLimits         string              `json:"memory_limits,omitempty"`
 	GPURequests          int64               `json:"gpu_requests,omitempty"`
 	GPULimits            int64               `json:"gpu_limits,omitempty"`
 	GPUMemoryRequestsMB  int64               `json:"gpu_memory_requests_mb,omitempty"`
 	GPUMemoryLimitsMB    int64               `json:"gpu_memory_limits_mb,omitempty"`
 	AllocatedGPU         int64               `json:"allocated_gpu,omitempty"`
 	AllocatedGPUMemoryMB int64               `json:"allocated_gpu_memory_mb,omitempty"`
 	ResourceUsageByUser  []UserResourceUsage `json:"resource_usage_by_user,omitempty"`
 	// 单机资源最大值
 	MaxNodeCPU      string  `json:"max_node_cpu"`       // 单机最大CPU容量，如 "8 cores"
 	MaxNodeMemory   string  `json:"max_node_memory"`    // 单机最大内存容量，如 "32 GB"
@ -37,6 +49,42 @@ type ClusterMetrics struct {
 	Nodes []NodeMetrics `json:"nodes,omitempty"`
 }
 // ResourceAllocation is derived from Kubernetes Pod resources requests/limits.
 type ResourceAllocation struct {
 	CPURequestsMilli    int64
 	CPULimitsMilli      int64
 	MemoryRequestsBytes int64
 	MemoryLimitsBytes   int64
 	GPURequests         int64
 	GPULimits           int64
 	GPUMemoryRequestsMB int64
 	GPUMemoryLimitsMB   int64
 }
 type PodResourceAllocation struct {
 	ClusterID    string
 	Namespace    string
 	PodName      string
 	InstanceName string
 	Allocation   ResourceAllocation
 }
 type UserResourceUsage struct {
 	UserID              string `json:"user_id"`
 	Username            string `json:"username"`
 	WorkspaceID         string `json:"workspace_id"`
 	InstanceCount       int    `json:"instance_count"`
 	PodCount            int    `json:"pod_count"`
 	CPURequests         string `json:"cpu_requests"`
 	CPULimits           string `json:"cpu_limits"`
 	MemoryRequests      string `json:"memory_requests"`
 	MemoryLimits        string `json:"memory_limits"`
 	GPURequests         int64  `json:"gpu_requests"`
 	GPULimits           int64  `json:"gpu_limits"`
 	GPUMemoryRequestsMB int64  `json:"gpu_memory_requests_mb"`
 	GPUMemoryLimitsMB   int64  `json:"gpu_memory_limits_mb"`
 }
 // NodeMetrics 节点监控指标
 type NodeMetrics struct {
 	NodeName string `json:"node_name"`
--- a/backend/internal/domain/repository/helm_client.go
+++ b/backend/internal/domain/repository/helm_client.go
@ -3,8 +3,21 @@ package repository
 import (
 	"context"
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"k8s.io/apimachinery/pkg/api/resource"
 )
 type ResourceVector struct {
 	CPU         resource.Quantity
 	Memory      resource.Quantity
 	GPU         int64
 	GPUMemoryMB int64
 }
 type ResourceEstimate struct {
 	Requests ResourceVector
 	Limits   ResourceVector
 }
 // HelmClient Helm 客户端接口（Output Port）
 type HelmClient interface {
 	// Install 安装 Helm Chart
@ -30,4 +43,10 @@ type HelmClient interface {
 	// GetValues 获取 Release 的 values
 	GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error)
 	// GetChartDefaultValues 从 chart 包中读取默认 values
 	GetChartDefaultValues(chartPath string) (map[string]interface{}, error)
 	// EstimateInstanceResources renders an instance chart with final values and sums Pod template resources.
 	EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*ResourceEstimate, error)
 }
--- a/backend/internal/domain/repository/metrics_client.go
+++ b/backend/internal/domain/repository/metrics_client.go
@ -13,4 +13,7 @@ type MetricsClient interface {
 	// GetNodeMetrics 获取集群的节点指标
 	GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error)
 	// GetPodResourceAllocations returns Pod requests/limits grouped by Pod.
 	GetPodResourceAllocations(ctx context.Context, clusterID string) ([]*entity.PodResourceAllocation, error)
 }
--- a/backend/internal/domain/repository/tenant_kube_client.go
+++ b/backend/internal/domain/repository/tenant_kube_client.go
@ -11,5 +11,12 @@ import (
 type TenantKubeClient interface {
 	EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error
 	IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error)
 	GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*ResourceQuotaUsage, error)
 	SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error
 	DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error
 }
 type ResourceQuotaUsage struct {
 	Hard ResourceVector
 	Used ResourceVector
 }
--- a/backend/internal/domain/repository/user_repository.go
+++ b/backend/internal/domain/repository/user_repository.go
@ -24,4 +24,7 @@ type UserRepository interface {
 	// List 列出所有用户
 	List(ctx context.Context) ([]*entity.User, error)
 	// AdminExists checks whether any admin user exists (lightweight EXISTS query)
 	AdminExists(ctx context.Context) (bool, error)
 }
--- a/backend/internal/domain/repository/workspace_repository.go
+++ b/backend/internal/domain/repository/workspace_repository.go
@ -11,12 +11,14 @@ type WorkspaceRepository interface {
 	GetByID(ctx context.Context, id string) (*entity.Workspace, error)
 	GetByName(ctx context.Context, name string) (*entity.Workspace, error)
 	Update(ctx context.Context, workspace *entity.Workspace) error
 	Delete(ctx context.Context, id string) error
 	List(ctx context.Context) ([]*entity.Workspace, error)
 }
 type WorkspaceClusterBindingRepository interface {
 	Upsert(ctx context.Context, binding *entity.WorkspaceClusterBinding) error
 	Get(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error)
 	ListByWorkspace(ctx context.Context, workspaceID string) ([]*entity.WorkspaceClusterBinding, error)
 	Delete(ctx context.Context, workspaceID, clusterID string) error
 }
--- a/backend/internal/domain/service/auth_service.go
+++ b/backend/internal/domain/service/auth_service.go
@ -2,6 +2,7 @@ package service
 import (
 	"context"
 	"errors"
 	"strings"
 	"time"
@ -18,6 +19,10 @@ import (
 type AuthService struct {
 	userRepo       repository.UserRepository
 	workspaceRepo  repository.WorkspaceRepository
 	instanceRepo   repository.InstanceRepository
 	clusterRepo    repository.ClusterRepository
 	bindingRepo    repository.WorkspaceClusterBindingRepository
 	tenantClient   repository.TenantKubeClient
 	passwordHasher PasswordHasher
 	tokenGenerator TokenGenerator
 }
@ -53,6 +58,18 @@ func NewAuthService(
 	}
 }
 func (s *AuthService) SetUserLifecycleCleanup(
 	instanceRepo repository.InstanceRepository,
 	clusterRepo repository.ClusterRepository,
 	bindingRepo repository.WorkspaceClusterBindingRepository,
 	tenantClient repository.TenantKubeClient,
 ) {
 	s.instanceRepo = instanceRepo
 	s.clusterRepo = clusterRepo
 	s.bindingRepo = bindingRepo
 	s.tenantClient = tenantClient
 }
 // Register 注册新用户。业务入口只允许 admin 调用；初始 admin 由 bootstrap seeder 创建。
 type UserWorkspaceOptions struct {
 	Namespace        string
@ -63,6 +80,55 @@ type UserWorkspaceOptions struct {
 	QuotaGPUMem      string
 }
 func defaultEmail(username string) string {
 	return username + "@local.ocdp"
 }
 // IsAdminExists checks whether any admin user already exists in the database.
 func (s *AuthService) IsAdminExists(ctx context.Context) (bool, error) {
 	return s.userRepo.AdminExists(ctx)
 }
 // SetupInitialAdmin creates the first admin user and returns access + refresh tokens.
 // Fails if an admin already exists.
 func (s *AuthService) SetupInitialAdmin(ctx context.Context, username, password, email string) (*entity.User, string, string, error) {
 	hasAdmin, err := s.IsAdminExists(ctx)
 	if err != nil {
 		return nil, "", "", err
 	}
 	if hasAdmin {
 		return nil, "", "", entity.ErrForbidden
 	}
 	passwordHash, err := s.passwordHasher.Hash(password)
 	if err != nil {
 		return nil, "", "", err
 	}
 	if email == "" {
 		email = defaultEmail(username)
 	}
 	user := entity.NewUser(username, passwordHash, email)
 	user.ID = uuid.New().String()
 	user.Role = authz.RoleAdmin
 	user.WorkspaceID = entity.DefaultWorkspaceID
 	if err := user.Validate(); err != nil {
 		return nil, "", "", err
 	}
 	if err := s.userRepo.Create(ctx, user); err != nil {
 		return nil, "", "", err
 	}
 	// Generate tokens directly — avoid a separate login round-trip
 	accessToken, refreshToken, err := s.tokenGenerator.Generate(user.ID, user.Username, user.Role, user.WorkspaceID)
 	if err != nil {
 		return nil, "", "", err
 	}
 	return user, accessToken, refreshToken, nil
 }
 func (s *AuthService) Register(ctx context.Context, username, password, role, workspaceID string, opts UserWorkspaceOptions, isActive, mustChangePassword *bool) (*entity.User, error) {
 	principal, err := authz.RequirePrincipal(ctx)
 	if err != nil {
@ -87,16 +153,19 @@ func (s *AuthService) Register(ctx context.Context, username, password, role, wo
 	if err != nil {
 		return nil, err
 	}
 	if normalizeUserRole(role) == authz.RoleUser {
 		normalizedOpts = defaultUserQuotaOptions(normalizedOpts)
 	}
 	// 默认生成占位邮箱，避免数据库约束失败
-	email := username + "@local.ocdp"
+	email := defaultEmail(username)
 	// 创建用户
 	user := entity.NewUser(username, passwordHash, email)
 	user.ID = uuid.New().String()
 	user.Role = normalizeUserRole(role)
 	user.WorkspaceID = workspaceID
-	if user.Role == authz.RoleUser && (user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID) {
+	if user.Role == authz.RoleUser {
 		workspace, err := s.createUserWorkspace(ctx, username, principal.UserID, normalizedOpts)
 		if err != nil {
 			return nil, err
@ -131,10 +200,7 @@ func (s *AuthService) createUserWorkspace(ctx context.Context, username, created
 	if s.workspaceRepo == nil {
 		return nil, entity.ErrWorkspaceNotFound
 	}
-	name := strings.TrimPrefix(entity.NamespaceForUser(username), "ocdp-u-")
+	name := userWorkspaceName(username)
 	workspace := entity.NewWorkspace(name, createdBy)
 	workspace.ID = uuid.New().String()
 	workspace.DefaultClusterID = strings.TrimSpace(opts.DefaultClusterID)
 	namespace := strings.TrimSpace(opts.Namespace)
 	if namespace == "" {
 		namespace = entity.NamespaceForUser(username)
@ -143,6 +209,32 @@ func (s *AuthService) createUserWorkspace(ctx context.Context, username, created
 		if len(validation.IsDNS1123Label(namespace)) > 0 {
 			return nil, entity.ErrInvalidNamespace
 		}
 	}
 	if existing, err := s.workspaceRepo.GetByName(ctx, name); err == nil && existing != nil {
 		if namespace != "" && existing.K8sNamespace != namespace {
 			if err := s.ensureNamespaceAvailable(ctx, namespace, existing.ID); err != nil {
 				return nil, err
 			}
 		}
 		applyWorkspaceOptions(existing, opts)
 		if namespace != "" {
 			existing.K8sNamespace = namespace
 			existing.K8sSAName = entity.ServiceAccountForNamespace(namespace)
 		}
 		if err := s.workspaceRepo.Update(ctx, existing); err != nil {
 			return nil, err
 		}
 		return existing, nil
 	} else if err != nil && !errors.Is(err, entity.ErrWorkspaceNotFound) {
 		return nil, err
 	}
 	if err := s.ensureNamespaceAvailable(ctx, namespace, ""); err != nil {
 		return nil, err
 	}
 	workspace := entity.NewWorkspace(name, createdBy)
 	workspace.ID = uuid.New().String()
 	workspace.DefaultClusterID = strings.TrimSpace(opts.DefaultClusterID)
 	if namespace != "" {
 		workspace.K8sNamespace = namespace
 		workspace.K8sSAName = entity.ServiceAccountForNamespace(namespace)
 	}
@ -151,11 +243,45 @@ func (s *AuthService) createUserWorkspace(ctx context.Context, username, created
 	workspace.QuotaGPU = strings.TrimSpace(opts.QuotaGPU)
 	workspace.QuotaGPUMem = strings.TrimSpace(opts.QuotaGPUMem)
 	if err := s.workspaceRepo.Create(ctx, workspace); err != nil {
 		if errors.Is(err, entity.ErrWorkspaceExists) {
 			existing, getErr := s.workspaceRepo.GetByName(ctx, name)
 			if getErr != nil {
 				return nil, err
 			}
 			if existing.K8sNamespace != namespace {
 				return nil, entity.ErrWorkspaceNamespaceConflict
 			}
 			return existing, nil
 		}
 		return nil, err
 	}
 	return workspace, nil
 }
 func userWorkspaceName(username string) string {
 	return strings.TrimPrefix(entity.NamespaceForUser(username), "ocdp-u-")
 }
 func (s *AuthService) ensureNamespaceAvailable(ctx context.Context, namespace, allowedWorkspaceID string) error {
 	if s.workspaceRepo == nil || strings.TrimSpace(namespace) == "" {
 		return nil
 	}
 	workspaces, err := s.workspaceRepo.List(ctx)
 	if err != nil {
 		return err
 	}
 	for _, workspace := range workspaces {
 		if workspace == nil || workspace.K8sNamespace != namespace {
 			continue
 		}
 		if allowedWorkspaceID != "" && workspace.ID == allowedWorkspaceID {
 			continue
 		}
 		return entity.ErrWorkspaceNamespaceConflict
 	}
 	return nil
 }
 func normalizeQuotaOptions(opts UserWorkspaceOptions) (UserWorkspaceOptions, error) {
 	opts.Namespace = strings.TrimSpace(opts.Namespace)
 	opts.DefaultClusterID = strings.TrimSpace(opts.DefaultClusterID)
@ -181,6 +307,16 @@ func normalizeQuotaOptions(opts UserWorkspaceOptions) (UserWorkspaceOptions, err
 	return opts, nil
 }
 func defaultUserQuotaOptions(opts UserWorkspaceOptions) UserWorkspaceOptions {
 	if strings.TrimSpace(opts.QuotaGPU) == "" {
 		opts.QuotaGPU = "0"
 	}
 	if strings.TrimSpace(opts.QuotaGPUMem) == "" {
 		opts.QuotaGPUMem = "0"
 	}
 	return opts
 }
 func (s *AuthService) ListUsers(ctx context.Context) ([]*entity.User, error) {
 	principal, err := authz.RequirePrincipal(ctx)
 	if err != nil {
@ -204,25 +340,35 @@ func (s *AuthService) UpdateUser(ctx context.Context, userID, role, workspaceID
 	if err != nil {
 		return nil, entity.ErrUserNotFound
 	}
 	previousRole := user.Role
 	if role != "" {
 		user.Role = normalizeUserRole(role)
 	}
-	if workspaceID != "" {
+	if workspaceID != "" && user.Role != authz.RoleUser {
 		user.WorkspaceID = workspaceID
 	}
 	workspaceHandled := false
 	if user.Role == authz.RoleAdmin {
 		user.WorkspaceID = entity.DefaultWorkspaceID
 	}
-	if user.Role == authz.RoleUser && (user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID) {
+	if user.Role == authz.RoleUser && (role != "" || workspaceID != "" || hasWorkspaceUpdates(opts)) {
 		normalizedOpts, err := normalizeQuotaOptions(opts)
 		if err != nil {
 			return nil, err
 		}
-		workspace, err := s.createUserWorkspace(ctx, user.Username, principal.UserID, normalizedOpts)
+		normalizedOpts = defaultUserQuotaOptions(normalizedOpts)
 		currentWorkspace, _ := s.currentUserWorkspace(ctx, user)
 		if currentWorkspace != nil && shouldCreatePrivateWorkspace(user, previousRole, currentWorkspace) {
 			if normalizedOpts.Namespace == "" || normalizedOpts.Namespace == currentWorkspace.K8sNamespace {
 				normalizedOpts.Namespace = ""
 			}
 		}
 		workspace, err := s.ensureUserWorkspaceForUpdate(ctx, user, previousRole, currentWorkspace, opts, normalizedOpts, principal.UserID)
 		if err != nil {
 			return nil, err
 		}
 		user.WorkspaceID = workspace.ID
 		workspaceHandled = true
 	}
 	if isActive != nil {
 		if user.ID == principal.UserID && !*isActive {
@ -233,7 +379,7 @@ func (s *AuthService) UpdateUser(ctx context.Context, userID, role, workspaceID
 	if mustChangePassword != nil {
 		user.MustChangePassword = *mustChangePassword
 	}
-	if user.Role != authz.RoleAdmin && hasWorkspaceUpdates(opts) {
+	if user.Role != authz.RoleAdmin && !workspaceHandled && hasWorkspaceUpdates(opts) {
 		normalizedOpts, err := normalizeQuotaOptions(opts)
 		if err != nil {
 			return nil, err
@ -242,10 +388,13 @@ func (s *AuthService) UpdateUser(ctx context.Context, userID, role, workspaceID
 		if err != nil {
 			return nil, err
 		}
-		applyWorkspaceOptions(workspace, normalizedOpts)
+		applyWorkspaceOptionsForUpdate(workspace, opts, normalizedOpts)
 		if err := s.workspaceRepo.Update(ctx, workspace); err != nil {
 			return nil, err
 		}
 		if err := s.syncWorkspaceBindings(ctx, workspace); err != nil {
 			return nil, err
 		}
 	}
 	user.RevokedAfter = time.Now()
 	user.UpdatedAt = time.Now()
@ -289,6 +438,115 @@ func applyWorkspaceOptions(workspace *entity.Workspace, opts UserWorkspaceOption
 	}
 }
 func (s *AuthService) currentUserWorkspace(ctx context.Context, user *entity.User) (*entity.Workspace, error) {
 	if s.workspaceRepo == nil || user == nil || user.WorkspaceID == "" {
 		return nil, entity.ErrWorkspaceNotFound
 	}
 	return s.workspaceRepo.GetByID(ctx, user.WorkspaceID)
 }
 func shouldCreatePrivateWorkspace(user *entity.User, previousRole string, current *entity.Workspace) bool {
 	if user == nil {
 		return true
 	}
 	if previousRole == authz.RoleAdmin || user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID {
 		return true
 	}
 	if current == nil {
 		return true
 	}
 	return current.Name != userWorkspaceName(user.Username)
 }
 func (s *AuthService) ensureUserWorkspaceForUpdate(ctx context.Context, user *entity.User, previousRole string, current *entity.Workspace, rawOpts, normalizedOpts UserWorkspaceOptions, createdBy string) (*entity.Workspace, error) {
 	if s.workspaceRepo == nil {
 		return nil, entity.ErrWorkspaceNotFound
 	}
 	if shouldCreatePrivateWorkspace(user, previousRole, current) {
 		return s.createUserWorkspace(ctx, user.Username, createdBy, normalizedOpts)
 	}
 	if rawNamespace := strings.TrimSpace(rawOpts.Namespace); rawNamespace != "" && rawNamespace != current.K8sNamespace {
 		if err := s.ensureNamespaceAvailable(ctx, rawNamespace, current.ID); err != nil {
 			return nil, err
 		}
 	}
 	applyWorkspaceOptionsForUpdate(current, rawOpts, normalizedOpts)
 	if err := s.workspaceRepo.Update(ctx, current); err != nil {
 		return nil, err
 	}
 	if err := s.syncWorkspaceBindings(ctx, current); err != nil {
 		return nil, err
 	}
 	return current, nil
 }
 func applyWorkspaceOptionsForUpdate(workspace *entity.Workspace, rawOpts, normalizedOpts UserWorkspaceOptions) {
 	if namespace := strings.TrimSpace(rawOpts.Namespace); namespace != "" {
 		workspace.K8sNamespace = namespace
 		workspace.K8sSAName = entity.ServiceAccountForNamespace(namespace)
 	}
 	if strings.TrimSpace(rawOpts.DefaultClusterID) != "" {
 		workspace.DefaultClusterID = normalizedOpts.DefaultClusterID
 	}
 	if strings.TrimSpace(rawOpts.QuotaCPU) != "" {
 		workspace.QuotaCPU = normalizedOpts.QuotaCPU
 	}
 	if strings.TrimSpace(rawOpts.QuotaMemory) != "" {
 		workspace.QuotaMemory = normalizedOpts.QuotaMemory
 	}
 	if strings.TrimSpace(rawOpts.QuotaGPU) != "" {
 		workspace.QuotaGPU = normalizedOpts.QuotaGPU
 	}
 	if strings.TrimSpace(rawOpts.QuotaGPUMem) != "" {
 		workspace.QuotaGPUMem = normalizedOpts.QuotaGPUMem
 	}
 }
 func (s *AuthService) syncWorkspaceBindings(ctx context.Context, workspace *entity.Workspace) error {
 	if workspace == nil || s.bindingRepo == nil {
 		return nil
 	}
 	bindings, err := s.bindingRepo.ListByWorkspace(ctx, workspace.ID)
 	if err != nil {
 		return err
 	}
 	for _, binding := range bindings {
 		if binding == nil {
 			continue
 		}
 		binding.QuotaCPU = strings.TrimSpace(workspace.QuotaCPU)
 		binding.QuotaMemory = strings.TrimSpace(workspace.QuotaMemory)
 		binding.QuotaGPU = strings.TrimSpace(workspace.QuotaGPU)
 		if binding.QuotaGPU == "" {
 			binding.QuotaGPU = "0"
 		}
 		binding.QuotaGPUMem = strings.TrimSpace(workspace.QuotaGPUMem)
 		if binding.QuotaGPUMem == "" {
 			binding.QuotaGPUMem = "0"
 		}
 		binding.UpdatedAt = time.Now()
 		if s.tenantClient != nil && s.clusterRepo != nil {
 			cluster, err := s.clusterRepo.GetByID(ctx, binding.ClusterID)
 			if err != nil {
 				if errors.Is(err, entity.ErrClusterNotFound) {
 					continue
 				}
 				return err
 			}
 			tenantBinding := entity.NewTenantBinding(binding.Namespace)
 			tenantBinding.ServiceAccountName = binding.ServiceAccount
 			tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding)
 			if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil {
 				return err
 			}
 		}
 		if err := s.bindingRepo.Upsert(ctx, binding); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 func (s *AuthService) DeleteUser(ctx context.Context, userID string) error {
 	principal, err := authz.RequirePrincipal(ctx)
 	if err != nil {
@ -300,9 +558,117 @@ func (s *AuthService) DeleteUser(ctx context.Context, userID string) error {
 	if userID == principal.UserID {
 		return entity.ErrForbidden
 	}
 	user, err := s.userRepo.GetByID(ctx, userID)
 	if err != nil {
 		return entity.ErrUserNotFound
 	}
 	if err := s.ensureUserHasNoInstances(ctx, user); err != nil {
 		return err
 	}
 	if s.isExclusiveUserWorkspace(ctx, user) {
 		if err := s.cleanupUserWorkspace(ctx, user.WorkspaceID); err != nil {
 			return err
 		}
 	}
 	return s.userRepo.Delete(ctx, userID)
 }
 func (s *AuthService) ensureUserHasNoInstances(ctx context.Context, user *entity.User) error {
 	if s.instanceRepo == nil || user == nil {
 		return nil
 	}
 	instances, err := s.instanceRepo.List(ctx)
 	if err != nil {
 		return err
 	}
 	for _, instance := range instances {
 		if instance == nil {
 			continue
 		}
 		if instance.OwnerID == user.ID {
 			return entity.ErrUserHasInstances
 		}
 		if user.WorkspaceID != "" && user.WorkspaceID != entity.DefaultWorkspaceID && instance.WorkspaceID == user.WorkspaceID {
 			return entity.ErrUserHasInstances
 		}
 	}
 	return nil
 }
 func (s *AuthService) isExclusiveUserWorkspace(ctx context.Context, user *entity.User) bool {
 	if user == nil || user.Role == authz.RoleAdmin || user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID {
 		return false
 	}
 	users, err := s.userRepo.List(ctx)
 	if err != nil {
 		return false
 	}
 	for _, other := range users {
 		if other == nil || other.ID == user.ID {
 			continue
 		}
 		if other.WorkspaceID == user.WorkspaceID {
 			return false
 		}
 	}
 	return true
 }
 func (s *AuthService) cleanupUserWorkspace(ctx context.Context, workspaceID string) error {
 	if s.workspaceRepo == nil || s.bindingRepo == nil {
 		return nil
 	}
 	workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID)
 	if err != nil {
 		return err
 	}
 	if isProtectedWorkspaceNamespace(workspace.K8sNamespace) {
 		return entity.ErrProtectedNamespace
 	}
 	bindings, err := s.bindingRepo.ListByWorkspace(ctx, workspace.ID)
 	if err != nil {
 		return err
 	}
 	for _, binding := range bindings {
 		if binding == nil {
 			continue
 		}
 		if isProtectedWorkspaceNamespace(binding.Namespace) {
 			return entity.ErrProtectedNamespace
 		}
 		if s.tenantClient != nil && s.clusterRepo != nil {
 			cluster, err := s.clusterRepo.GetByID(ctx, binding.ClusterID)
 			if err != nil && !errors.Is(err, entity.ErrClusterNotFound) {
 				return err
 			}
 			if err == nil {
 				tenantBinding := entity.NewTenantBinding(binding.Namespace)
 				tenantBinding.ServiceAccountName = binding.ServiceAccount
 				tenantBinding.ResourceQuotaHard = resourceQuotaHard(workspace)
 				if err := s.tenantClient.DeleteTenant(ctx, cluster, tenantBinding); err != nil {
 					return err
 				}
 			}
 		}
 		if err := s.bindingRepo.Delete(ctx, binding.WorkspaceID, binding.ClusterID); err != nil {
 			return err
 		}
 	}
 	if err := s.workspaceRepo.Delete(ctx, workspace.ID); err != nil && !errors.Is(err, entity.ErrWorkspaceNotFound) {
 		return err
 	}
 	return nil
 }
 func isProtectedWorkspaceNamespace(namespace string) bool {
 	switch strings.TrimSpace(namespace) {
 	case "", "default", "kube-system", "kube-public", "kube-node-lease":
 		return true
 	default:
 		return false
 	}
 }
 func normalizeUserRole(role string) string {
 	if role == authz.RoleAdmin {
 		return authz.RoleAdmin
--- a/backend/internal/domain/service/auth_service_test.go
+++ b/backend/internal/domain/service/auth_service_test.go
@ -0,0 +1,322 @@
 package service
 import (
 	"context"
 	"errors"
 	"testing"
 	"time"
 	"github.com/google/uuid"
 	"github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock"
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"github.com/ocdp/cluster-service/internal/domain/repository"
 	"github.com/ocdp/cluster-service/internal/pkg/authz"
 	jwtpkg "github.com/ocdp/cluster-service/internal/pkg/jwt"
 )
 func TestAuthServiceUpdateUserDowngradeReusesUsernameWorkspace(t *testing.T) {
 	ctx := adminContext()
 	userRepo := mock.NewUserRepositoryMock()
 	workspaceRepo := mock.NewWorkspaceRepositoryMock()
 	svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{})
 	target := testUser("user-1", "alice", authz.RoleAdmin, entity.DefaultWorkspaceID)
 	if err := userRepo.Create(ctx, target); err != nil {
 		t.Fatalf("seed user: %v", err)
 	}
 	workspace := entity.NewWorkspace(userWorkspaceName("alice"), "admin")
 	workspace.ID = "workspace-alice"
 	workspace.K8sNamespace = entity.NamespaceForUser("alice")
 	workspace.K8sSAName = entity.ServiceAccountForNamespace(workspace.K8sNamespace)
 	if err := workspaceRepo.Create(ctx, workspace); err != nil {
 		t.Fatalf("seed workspace: %v", err)
 	}
 	updated, err := svc.UpdateUser(ctx, target.ID, authz.RoleUser, "", UserWorkspaceOptions{DefaultClusterID: "cluster-1"}, nil, nil)
 	if err != nil {
 		t.Fatalf("UpdateUser returned error: %v", err)
 	}
 	if updated.Role != authz.RoleUser {
 		t.Fatalf("expected user role, got %q", updated.Role)
 	}
 	if updated.WorkspaceID != workspace.ID {
 		t.Fatalf("expected reused workspace %q, got %q", workspace.ID, updated.WorkspaceID)
 	}
 	reused, err := workspaceRepo.GetByID(ctx, workspace.ID)
 	if err != nil {
 		t.Fatalf("get reused workspace: %v", err)
 	}
 	if reused.DefaultClusterID != "cluster-1" {
 		t.Fatalf("expected updated default cluster, got %q", reused.DefaultClusterID)
 	}
 }
 func TestAuthServiceRegisterUserAlwaysCreatesPrivateWorkspaceWithZeroDefaultQuotas(t *testing.T) {
 	ctx := adminContext()
 	userRepo := mock.NewUserRepositoryMock()
 	workspaceRepo := mock.NewWorkspaceRepositoryMock()
 	svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{})
 	user, err := svc.Register(ctx, "alice", "password", authz.RoleUser, "shared-workspace", UserWorkspaceOptions{}, nil, nil)
 	if err != nil {
 		t.Fatalf("Register returned error: %v", err)
 	}
 	if user.WorkspaceID == "shared-workspace" || user.WorkspaceID == entity.DefaultWorkspaceID {
 		t.Fatalf("expected private user workspace, got %q", user.WorkspaceID)
 	}
 	workspace, err := workspaceRepo.GetByID(ctx, user.WorkspaceID)
 	if err != nil {
 		t.Fatalf("get user workspace: %v", err)
 	}
 	if workspace.K8sNamespace != entity.NamespaceForUser("alice") {
 		t.Fatalf("expected user namespace %q, got %q", entity.NamespaceForUser("alice"), workspace.K8sNamespace)
 	}
 	if workspace.QuotaCPU != "" || workspace.QuotaMemory != "" || workspace.QuotaGPU != "0" || workspace.QuotaGPUMem != "0" {
 		t.Fatalf("expected omitted CPU/memory to stay unlimited and GPU/gpumem to default zero, got cpu=%q memory=%q gpu=%q gpumem=%q", workspace.QuotaCPU, workspace.QuotaMemory, workspace.QuotaGPU, workspace.QuotaGPUMem)
 	}
 }
 func TestAuthServiceUpdateUserDowngradeRejectsNamespaceConflict(t *testing.T) {
 	ctx := adminContext()
 	userRepo := mock.NewUserRepositoryMock()
 	workspaceRepo := mock.NewWorkspaceRepositoryMock()
 	svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{})
 	target := testUser("user-1", "alice", authz.RoleAdmin, entity.DefaultWorkspaceID)
 	if err := userRepo.Create(ctx, target); err != nil {
 		t.Fatalf("seed user: %v", err)
 	}
 	conflicting := entity.NewWorkspace("someone-else", "admin")
 	conflicting.ID = "workspace-other"
 	conflicting.K8sNamespace = entity.NamespaceForUser("alice")
 	conflicting.K8sSAName = entity.ServiceAccountForNamespace(conflicting.K8sNamespace)
 	if err := workspaceRepo.Create(ctx, conflicting); err != nil {
 		t.Fatalf("seed conflicting workspace: %v", err)
 	}
 	_, err := svc.UpdateUser(ctx, target.ID, authz.RoleUser, "", UserWorkspaceOptions{}, nil, nil)
 	if !errors.Is(err, entity.ErrWorkspaceNamespaceConflict) {
 		t.Fatalf("expected namespace conflict, got %v", err)
 	}
 }
 func TestAuthServiceDeleteUserRejectsUserWithInstances(t *testing.T) {
 	ctx := adminContext()
 	userRepo := mock.NewUserRepositoryMock()
 	workspaceRepo := mock.NewWorkspaceRepositoryMock()
 	instanceRepo := mock.NewInstanceRepositoryMock()
 	svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{})
 	svc.SetUserLifecycleCleanup(instanceRepo, nil, nil, nil)
 	user := testUser("user-1", "alice", authz.RoleUser, "workspace-alice")
 	if err := userRepo.Create(ctx, user); err != nil {
 		t.Fatalf("seed user: %v", err)
 	}
 	instance := entity.NewInstance("cluster-1", "app", "ocdp-u-alice", "registry-1", "repo", "chart", "1.0.0")
 	instance.ID = "instance-1"
 	instance.OwnerID = user.ID
 	instance.WorkspaceID = user.WorkspaceID
 	if err := instanceRepo.Create(ctx, instance); err != nil {
 		t.Fatalf("seed instance: %v", err)
 	}
 	err := svc.DeleteUser(ctx, user.ID)
 	if !errors.Is(err, entity.ErrUserHasInstances) {
 		t.Fatalf("expected user instance conflict, got %v", err)
 	}
 	if _, err := userRepo.GetByID(ctx, user.ID); err != nil {
 		t.Fatalf("user should not be deleted: %v", err)
 	}
 }
 func TestAuthServiceDeleteUserRejectsWorkspaceInstanceEvenWithDifferentOwner(t *testing.T) {
 	ctx := adminContext()
 	userRepo := mock.NewUserRepositoryMock()
 	workspaceRepo := mock.NewWorkspaceRepositoryMock()
 	instanceRepo := mock.NewInstanceRepositoryMock()
 	svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{})
 	svc.SetUserLifecycleCleanup(instanceRepo, nil, nil, nil)
 	user := testUser("user-1", "alice", authz.RoleUser, "workspace-alice")
 	if err := userRepo.Create(ctx, user); err != nil {
 		t.Fatalf("seed user: %v", err)
 	}
 	instance := entity.NewInstance("cluster-1", "shared-workspace-app", "ocdp-u-alice", "registry-1", "repo", "chart", "1.0.0")
 	instance.ID = "instance-1"
 	instance.OwnerID = "other-user"
 	instance.WorkspaceID = user.WorkspaceID
 	if err := instanceRepo.Create(ctx, instance); err != nil {
 		t.Fatalf("seed workspace instance: %v", err)
 	}
 	err := svc.DeleteUser(ctx, user.ID)
 	if !errors.Is(err, entity.ErrUserHasInstances) {
 		t.Fatalf("expected workspace instance conflict, got %v", err)
 	}
 	if _, err := userRepo.GetByID(ctx, user.ID); err != nil {
 		t.Fatalf("user should not be deleted: %v", err)
 	}
 }
 func TestAuthServiceDeleteUserCleansExclusiveWorkspaceBindings(t *testing.T) {
 	ctx := adminContext()
 	userRepo := mock.NewUserRepositoryMock()
 	workspaceRepo := mock.NewWorkspaceRepositoryMock()
 	instanceRepo := mock.NewInstanceRepositoryMock()
 	bindingRepo := mock.NewWorkspaceClusterBindingRepositoryMock()
 	clusterRepo := &testClusterRepo{clusters: map[string]*entity.Cluster{
 		"cluster-1": {ID: "cluster-1", Name: "cluster-1", Host: "https://cluster.invalid", Token: "token"},
 	}}
 	tenantClient := &recordingTenantClient{}
 	svc := NewAuthService(userRepo, workspaceRepo, testPasswordHasher{}, testTokenGenerator{})
 	svc.SetUserLifecycleCleanup(instanceRepo, clusterRepo, bindingRepo, tenantClient)
 	workspace := entity.NewWorkspace(userWorkspaceName("alice"), "admin")
 	workspace.ID = "workspace-alice"
 	workspace.K8sNamespace = entity.NamespaceForUser("alice")
 	workspace.K8sSAName = entity.ServiceAccountForNamespace(workspace.K8sNamespace)
 	if err := workspaceRepo.Create(ctx, workspace); err != nil {
 		t.Fatalf("seed workspace: %v", err)
 	}
 	user := testUser("user-1", "alice", authz.RoleUser, workspace.ID)
 	if err := userRepo.Create(ctx, user); err != nil {
 		t.Fatalf("seed user: %v", err)
 	}
 	if err := bindingRepo.Upsert(ctx, &entity.WorkspaceClusterBinding{
 		ID:             "binding-1",
 		WorkspaceID:    workspace.ID,
 		ClusterID:      "cluster-1",
 		Namespace:      workspace.K8sNamespace,
 		ServiceAccount: workspace.K8sSAName,
 		Status:         "active",
 	}); err != nil {
 		t.Fatalf("seed binding: %v", err)
 	}
 	if err := svc.DeleteUser(ctx, user.ID); err != nil {
 		t.Fatalf("DeleteUser returned error: %v", err)
 	}
 	if _, err := userRepo.GetByID(ctx, user.ID); !errors.Is(err, entity.ErrUserNotFound) {
 		t.Fatalf("expected user deleted, got %v", err)
 	}
 	if bindings, err := bindingRepo.ListByWorkspace(ctx, workspace.ID); err != nil || len(bindings) != 0 {
 		t.Fatalf("expected bindings cleaned, got len=%d err=%v", len(bindings), err)
 	}
 	if len(tenantClient.deleted) != 1 || tenantClient.deleted[0] != workspace.K8sNamespace {
 		t.Fatalf("expected tenant namespace cleanup, got %#v", tenantClient.deleted)
 	}
 	if _, err := workspaceRepo.GetByID(ctx, workspace.ID); !errors.Is(err, entity.ErrWorkspaceNotFound) {
 		t.Fatalf("expected exclusive workspace deleted, got %v", err)
 	}
 }
 func adminContext() context.Context {
 	return authz.WithPrincipal(context.Background(), &authz.Principal{
 		UserID:      "admin-1",
 		Username:    "admin",
 		Role:        authz.RoleAdmin,
 		WorkspaceID: entity.DefaultWorkspaceID,
 	})
 }
 func testUser(id, username, role, workspaceID string) *entity.User {
 	user := entity.NewUser(username, "hash", username+"@local.ocdp")
 	user.ID = id
 	user.Role = role
 	user.WorkspaceID = workspaceID
 	return user
 }
 type testPasswordHasher struct{}
 func (testPasswordHasher) Hash(password string) (string, error) { return "hash:" + password, nil }
 func (testPasswordHasher) Verify(password, hash string) error   { return nil }
 type testTokenGenerator struct{}
 func (testTokenGenerator) Generate(userID, username, role, workspaceID string) (string, string, error) {
 	return "access", "refresh", nil
 }
 func (testTokenGenerator) Verify(token string) (string, string, error) { return "", "", nil }
 func (testTokenGenerator) VerifyWithIssuedAt(token string) (string, string, int64, error) {
 	return "", "", 0, nil
 }
 func (testTokenGenerator) VerifyAccess(token string) (*jwtpkg.Claims, error)  { return nil, nil }
 func (testTokenGenerator) VerifyRefresh(token string) (*jwtpkg.Claims, error) { return nil, nil }
 func (testTokenGenerator) Refresh(refreshToken string) (string, error)        { return "access", nil }
 type testClusterRepo struct {
 	clusters map[string]*entity.Cluster
 }
 func (r *testClusterRepo) Create(ctx context.Context, cluster *entity.Cluster) error {
 	if cluster.ID == "" {
 		cluster.ID = uuid.New().String()
 	}
 	copy := *cluster
 	r.clusters[cluster.ID] = &copy
 	return nil
 }
 func (r *testClusterRepo) GetByID(ctx context.Context, id string) (*entity.Cluster, error) {
 	cluster, ok := r.clusters[id]
 	if !ok {
 		return nil, entity.ErrClusterNotFound
 	}
 	copy := *cluster
 	return &copy, nil
 }
 func (r *testClusterRepo) GetByName(ctx context.Context, name string) (*entity.Cluster, error) {
 	for _, cluster := range r.clusters {
 		if cluster.Name == name {
 			copy := *cluster
 			return &copy, nil
 		}
 	}
 	return nil, entity.ErrClusterNotFound
 }
 func (r *testClusterRepo) Update(ctx context.Context, cluster *entity.Cluster) error {
 	copy := *cluster
 	r.clusters[cluster.ID] = &copy
 	return nil
 }
 func (r *testClusterRepo) Delete(ctx context.Context, id string) error {
 	delete(r.clusters, id)
 	return nil
 }
 func (r *testClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) {
 	result := make([]*entity.Cluster, 0, len(r.clusters))
 	for _, cluster := range r.clusters {
 		copy := *cluster
 		result = append(result, &copy)
 	}
 	return result, nil
 }
 type recordingTenantClient struct {
 	deleted []string
 	usage   *repository.ResourceQuotaUsage
 }
 func (c *recordingTenantClient) EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
 	return nil
 }
 func (c *recordingTenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) {
 	return nil, nil
 }
 func (c *recordingTenantClient) GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*repository.ResourceQuotaUsage, error) {
 	if c.usage != nil {
 		return c.usage, nil
 	}
 	return &repository.ResourceQuotaUsage{}, nil
 }
 func (c *recordingTenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
 	return nil
 }
 func (c *recordingTenantClient) DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
 	if err := binding.Validate(); err != nil {
 		return err
 	}
 	c.deleted = append(c.deleted, binding.Namespace)
 	return nil
 }
--- a/backend/internal/domain/service/instance_service.go
+++ b/backend/internal/domain/service/instance_service.go
@ -6,9 +6,11 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"
 	"github.com/google/uuid"
 	"github.com/ocdp/cluster-service/internal/adapter/input/http/dto"
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"github.com/ocdp/cluster-service/internal/domain/repository"
 	"github.com/ocdp/cluster-service/internal/pkg/authz"
@ -16,6 +18,12 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 )
 // ScaleClient defines the interface for K8s-native workload scaling
 type ScaleClient interface {
 	GetDeploymentReplicas(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string) (int32, error)
 	ScaleDeployment(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string, replicas int32) error
 }
 // InstanceService Helm 实例管理领域服务
 type InstanceService struct {
 	instanceRepo  repository.InstanceRepository
@ -27,7 +35,9 @@ type InstanceService struct {
 	entryClient   repository.InstanceEntryClient
 	diagClient    repository.InstanceDiagnosticsClient
 	workspaceRepo repository.WorkspaceRepository
 	userRepo      repository.UserRepository
 	tenantClient  repository.TenantKubeClient
 	scaleClient   ScaleClient
 }
 // NewInstanceService 创建实例服务
@ -59,11 +69,19 @@ func (s *InstanceService) SetDiagnosticsClient(client repository.InstanceDiagnos
 	s.diagClient = client
 }
 func (s *InstanceService) SetScaleClient(client ScaleClient) {
 	s.scaleClient = client
 }
 func (s *InstanceService) SetTenantProvisioning(workspaceRepo repository.WorkspaceRepository, tenantClient repository.TenantKubeClient) {
 	s.workspaceRepo = workspaceRepo
 	s.tenantClient = tenantClient
 }
 func (s *InstanceService) SetUserRepository(userRepo repository.UserRepository) {
 	s.userRepo = userRepo
 }
 const chartCacheDir = "/tmp/charts"
 func (s *InstanceService) chartArchivePath(instance *entity.Instance) string {
@ -119,15 +137,21 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
 		return err
 	}
 	enforceNamespaceValues(instance)
 	if err := s.ensureTenantForInstance(ctx, principal, cluster, instance); err != nil {
 		return err
 	}
 	// 检查实例是否已存在
 	existingInstance, _ := s.instanceRepo.GetByClusterAndName(ctx, instance.ClusterID, instance.Name)
 	if existingInstance != nil {
 		return entity.ErrInstanceExists
 	}
 	if err := s.downloadChart(ctx, registry, instance); err != nil {
 		return err
 	}
 	binding, err := s.ensureTenantForInstance(ctx, principal, cluster, instance)
 	if err != nil {
 		return err
 	}
 	if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, instance, nil); err != nil {
 		return err
 	}
 	instance.BeginOperation(entity.OperationInstall, "Preparing installation")
@ -136,13 +160,6 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
 		return err
 	}
 	// 下载 chart artifact 供 Helm 使用
 	if err := s.downloadChart(ctx, registry, instance); err != nil {
 		instance.MarkFailure("Failed to download chart", err)
 		_ = s.instanceRepo.Update(ctx, instance)
 		return err
 	}
 	// 异步执行 Helm 安装并监控状态
 	go s.executeAndSyncInstall(context.Background(), instance.ID, cluster, registry, instance)
@ -163,6 +180,7 @@ func (s *InstanceService) GetInstance(ctx context.Context, id string) (*entity.I
 	if !s.canReadInstance(principal, instance) {
 		return nil, entity.ErrInstanceNotFound
 	}
 	s.enrichOwnerUsernames(ctx, []*entity.Instance{instance})
 	return instance, nil
 }
@ -207,8 +225,22 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
 	if !s.canWriteInstance(principal, existingInstance) {
 		return entity.ErrForbidden
 	}
 	instance.ClusterID = existingInstance.ClusterID
 	instance.WorkspaceID = existingInstance.WorkspaceID
 	instance.OwnerID = existingInstance.OwnerID
 	instance.Name = existingInstance.Name
 	if instance.RegistryID == "" {
 		instance.RegistryID = existingInstance.RegistryID
 	}
 	if instance.Repository == "" {
 		instance.Repository = existingInstance.Repository
 	}
 	if instance.Chart == "" {
 		instance.Chart = existingInstance.Chart
 	}
 	if instance.Version == "" {
 		instance.Version = existingInstance.Version
 	}
 	// 获取集群信息
 	cluster, err := s.clusterRepo.GetByID(ctx, existingInstance.ClusterID)
@ -224,15 +256,21 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
 	instance.Namespace = existingInstance.Namespace
 	enforceNamespaceValues(instance)
 	instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade")
 	if err := s.instanceRepo.Update(ctx, instance); err != nil {
 		return err
 	}
 	// 下载所需 Chart
 	if err := s.downloadChart(ctx, registry, instance); err != nil {
-		instance.MarkFailure("Failed to download chart", err)
+		return err
-		_ = s.instanceRepo.Update(ctx, instance)
+	}
 	binding, err := s.ensureTenantForInstance(ctx, principal, cluster, instance)
 	if err != nil {
 		return err
 	}
 	if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, instance, existingInstance); err != nil {
 		return err
 	}
 	instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade")
 	if err := s.instanceRepo.Update(ctx, instance); err != nil {
 		return err
 	}
@ -352,9 +390,32 @@ func (s *InstanceService) ListInstancesByCluster(ctx context.Context, clusterID
 			visible = append(visible, instance)
 		}
 	}
 	s.enrichOwnerUsernames(ctx, visible)
 	return visible, nil
 }
 func (s *InstanceService) enrichOwnerUsernames(ctx context.Context, instances []*entity.Instance) {
 	if s.userRepo == nil || len(instances) == 0 {
 		return
 	}
 	usernames := make(map[string]string)
 	for _, instance := range instances {
 		if instance == nil || instance.OwnerID == "" {
 			continue
 		}
 		if username, ok := usernames[instance.OwnerID]; ok {
 			instance.OwnerUsername = username
 			continue
 		}
 		user, err := s.userRepo.GetByID(ctx, instance.OwnerID)
 		if err != nil || user == nil {
 			continue
 		}
 		usernames[instance.OwnerID] = user.Username
 		instance.OwnerUsername = user.Username
 	}
 }
 // ListInstanceEntries 列出实例关联的入口信息（Service / Ingress）
 func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, instanceID string) ([]*entity.InstanceEntry, error) {
 	instance, err := s.GetInstance(ctx, instanceID)
@ -417,6 +478,161 @@ func (s *InstanceService) StreamInstanceLogs(ctx context.Context, clusterID, ins
 	return streamer.StreamPodLogs(ctx, cluster, instance.Namespace, podName, containerName, tailLines)
 }
 // ScaleInstance 扩缩容实例（修改 replicaCount 后执行 Helm upgrade）
 func (s *InstanceService) ScaleInstance(ctx context.Context, clusterID, instanceID string, replicas int, workload string) (*entity.Instance, error) {
 	principal, err := authz.RequirePrincipal(ctx)
 	if err != nil {
 		return nil, entity.ErrUnauthorized
 	}
 	instance, err := s.instanceRepo.GetByID(ctx, instanceID)
 	if err != nil {
 		return nil, entity.ErrInstanceNotFound
 	}
 	if !s.canWriteInstance(principal, instance) {
 		return nil, entity.ErrForbidden
 	}
 	if instance.ClusterID != clusterID {
 		return nil, entity.ErrInstanceNotFound
 	}
 	cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
 	if err != nil {
 		return nil, entity.ErrClusterNotFound
 	}
 	current := cloneInstanceForQuota(instance)
 	currentValues, err := s.helmClient.GetValues(ctx, cluster, instance.Name, instance.Namespace)
 	if err == nil && currentValues != nil {
 		current.SetValues(currentValues)
 	}
 	target := cloneInstanceForQuota(instance)
 	targetValues := copyValues(current.Values)
 	if targetValues == nil {
 		targetValues = copyValues(instance.Values)
 	}
 	if targetValues == nil {
 		targetValues = map[string]interface{}{}
 	}
 	targetValues["replicaCount"] = replicas
 	target.SetValues(targetValues)
 	registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID)
 	if err != nil {
 		return nil, entity.ErrRegistryNotFound
 	}
 	if err := s.downloadChart(ctx, registry, target); err != nil {
 		return nil, err
 	}
 	binding, err := s.ensureTenantForInstance(ctx, principal, cluster, target)
 	if err != nil {
 		return nil, err
 	}
 	if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, target, current); err != nil {
 		return nil, err
 	}
 	// Scale via K8s API directly (like kubectl scale deploy --replicas=N)
 	if s.scaleClient != nil {
 		if err := s.scaleClient.ScaleDeployment(ctx, cluster, instance.Namespace, instance.Name, int32(replicas)); err != nil {
 			return nil, fmt.Errorf("failed to scale deployment: %w", err)
 		}
 		instance.SetValues(targetValues)
 		instance.Replicas = replicas
 		if err := s.instanceRepo.Update(ctx, instance); err != nil {
 			return nil, err
 		}
 	} else {
 		// Fallback: Helm upgrade with replicaCount
 		instance.SetValues(targetValues)
 		instance.BeginOperation(entity.OperationUpgrade, fmt.Sprintf("Scaling to %d replicas", replicas))
 		if err := s.instanceRepo.Update(ctx, instance); err != nil {
 			return nil, err
 		}
 		go s.executeAndSyncUpgrade(context.Background(), instance.ID, cluster, nil, instance)
 	}
 	return instance, nil
 }
 // EnrichReplicas 批量获取实例的 K8s 实际副本数并设置到 entity 上
 func (s *InstanceService) EnrichReplicas(ctx context.Context, clusterID string, instances []*entity.Instance) []*entity.Instance {
 	if s.scaleClient == nil || len(instances) == 0 {
 		return instances
 	}
 	cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
 	if err != nil {
 		return instances
 	}
 	for _, inst := range instances {
 		r, err := s.scaleClient.GetDeploymentReplicas(ctx, cluster, inst.Namespace, inst.Name)
 		if err == nil {
 			inst.Replicas = int(r)
 		}
 	}
 	return instances
 }
 // GetRunningReplicas returns the actual K8s deployment replicas count.
 func (s *InstanceService) GetRunningReplicas(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) int {
 	if s.scaleClient == nil {
 		return 0
 	}
 	r, err := s.scaleClient.GetDeploymentReplicas(ctx, cluster, instance.Namespace, instance.Name)
 	if err != nil {
 		return 0
 	}
 	return int(r)
 }
 // GetInstanceValuesDiff 获取实例当前 values 与 chart 默认 values 的差异
 func (s *InstanceService) GetInstanceValuesDiff(ctx context.Context, clusterID, instanceID string) (*dto.InstanceValuesDiffResponse, error) {
 	principal, err := authz.RequirePrincipal(ctx)
 	if err != nil {
 		return nil, entity.ErrUnauthorized
 	}
 	instance, err := s.instanceRepo.GetByID(ctx, instanceID)
 	if err != nil {
 		return nil, entity.ErrInstanceNotFound
 	}
 	if !s.canReadInstance(principal, instance) {
 		return nil, entity.ErrInstanceNotFound
 	}
 	if instance.ClusterID != clusterID {
 		return nil, entity.ErrInstanceNotFound
 	}
 	cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
 	if err != nil {
 		return nil, entity.ErrClusterNotFound
 	}
 	current, err := s.helmClient.GetValues(ctx, cluster, instance.Name, instance.Namespace)
 	if err != nil {
 		return nil, err
 	}
 	// Get default values from the chart archive
 	chartPath := s.chartArchivePath(instance)
 	if _, statErr := os.Stat(chartPath); statErr != nil {
 		if !errors.Is(statErr, os.ErrNotExist) {
 			return nil, fmt.Errorf("failed to inspect chart defaults: %w", statErr)
 		}
 		registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID)
 		if err != nil {
 			return nil, entity.ErrRegistryNotFound
 		}
 		if err := s.downloadChart(ctx, registry, instance); err != nil {
 			return nil, err
 		}
 	}
 	defaults, err := s.helmClient.GetChartDefaultValues(chartPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to read chart defaults: %w", err)
 	}
 	return &dto.InstanceValuesDiffResponse{
 		Current:  current,
 		Defaults: defaults,
 	}, nil
 }
 func (s *InstanceService) canReadInstance(principal *authz.Principal, instance *entity.Instance) bool {
 	if principal.IsAdmin() {
 		return true
@ -471,9 +687,6 @@ func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *a
 		}
 		return nil
 	}
 	if isReservedNamespace(instance.Namespace) {
 		return entity.ErrInvalidNamespace
 	}
 	if cluster.Visibility != authz.VisibilityPrivate || cluster.OwnerID != principal.UserID {
 		namespace := principal.Namespace
 		if namespace == "" {
@ -484,9 +697,15 @@ func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *a
 				namespace = binding.Namespace
 			}
 		}
 		if instance.Namespace != "" && instance.Namespace != namespace {
 			return entity.ErrForbidden
 		}
 		instance.Namespace = namespace
 		return nil
 	}
 	if isReservedNamespace(instance.Namespace) {
 		return entity.ErrInvalidNamespace
 	}
 	if instance.Namespace == "" {
 		if cluster.DefaultNamespace != "" {
 			instance.Namespace = cluster.DefaultNamespace
@ -499,8 +718,62 @@ func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *a
 	return nil
 }
-func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error {
+func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) (*entity.WorkspaceClusterBinding, error) {
 	if principal.IsAdmin() || s.workspaceRepo == nil || s.tenantClient == nil {
 		return nil, nil
 	}
 	workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
 	if err != nil {
 		return nil, err
 	}
 	if workspace.Status == entity.WorkspaceSuspended {
 		return nil, entity.ErrWorkspaceSuspended
 	}
 	binding := &entity.WorkspaceClusterBinding{
 		ID:             uuid.New().String(),
 		WorkspaceID:    workspace.ID,
 		ClusterID:      cluster.ID,
 		Namespace:      instance.Namespace,
 		ServiceAccount: workspace.K8sSAName,
 		QuotaCPU:       strings.TrimSpace(workspace.QuotaCPU),
 		QuotaMemory:    strings.TrimSpace(workspace.QuotaMemory),
 		QuotaGPU:       zeroIfEmptyQuota(workspace.QuotaGPU),
 		QuotaGPUMem:    zeroIfEmptyQuota(workspace.QuotaGPUMem),
 		Status:         "active",
 		CreatedAt:      time.Now(),
 		UpdatedAt:      time.Now(),
 	}
 	if s.bindingRepo != nil {
 		if existing, err := s.bindingRepo.Get(ctx, workspace.ID, cluster.ID); err == nil && existing != nil {
 			binding.ID = existing.ID
 			binding.CreatedAt = existing.CreatedAt
 			if existing.Namespace != "" {
 				binding.Namespace = existing.Namespace
 				instance.Namespace = existing.Namespace
 				enforceNamespaceValues(instance)
 			}
 			if existing.ServiceAccount != "" {
 				binding.ServiceAccount = existing.ServiceAccount
 			}
 			if existing.Status != "" {
 				binding.Status = existing.Status
 			}
 		}
 	}
 	tenantBinding := tenantBindingFromWorkspaceClusterBinding(binding)
 	if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil {
 		return nil, err
 	}
 	if s.bindingRepo != nil {
 		if err := s.bindingRepo.Upsert(ctx, binding); err != nil {
 			return nil, err
 		}
 	}
 	return binding, nil
 }
 func (s *InstanceService) precheckInstanceQuota(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, binding *entity.WorkspaceClusterBinding, target, current *entity.Instance) error {
 	if principal.IsAdmin() || s.workspaceRepo == nil || s.helmClient == nil {
 		return nil
 	}
 	workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
@ -510,29 +783,45 @@ func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal
 	if workspace.Status == entity.WorkspaceSuspended {
 		return entity.ErrWorkspaceSuspended
 	}
-	binding := entity.NewTenantBinding(instance.Namespace)
+	if binding == nil {
-	binding.ServiceAccountName = workspace.K8sSAName
+		binding = &entity.WorkspaceClusterBinding{
-	binding.ResourceQuotaHard = instanceResourceQuotaHard(workspace)
+			WorkspaceID: principal.WorkspaceID,
-	if err := s.tenantClient.EnsureTenant(ctx, cluster, binding); err != nil {
+			ClusterID:   cluster.ID,
-		return err
+			Namespace:   target.Namespace,
 			QuotaCPU:    strings.TrimSpace(workspace.QuotaCPU),
 			QuotaMemory: strings.TrimSpace(workspace.QuotaMemory),
 			QuotaGPU:    zeroIfEmptyQuota(workspace.QuotaGPU),
 			QuotaGPUMem: zeroIfEmptyQuota(workspace.QuotaGPUMem),
 		}
 	}
-	if s.bindingRepo != nil {
+	var usage *repository.ResourceQuotaUsage
-		_ = s.bindingRepo.Upsert(ctx, &entity.WorkspaceClusterBinding{
+	if s.tenantClient != nil {
-			ID:             uuid.New().String(),
+		tenantBinding := tenantBindingFromWorkspaceClusterBinding(binding)
-			WorkspaceID:    workspace.ID,
+		quotaUsage, err := s.tenantClient.GetResourceQuotaUsage(ctx, cluster, tenantBinding)
-			ClusterID:      cluster.ID,
+		if err != nil {
-			Namespace:      instance.Namespace,
+			return err
-			ServiceAccount: workspace.K8sSAName,
+		}
-			QuotaCPU:       workspace.QuotaCPU,
+		usage = quotaUsage
 			QuotaMemory:    workspace.QuotaMemory,
 			QuotaGPU:       workspace.QuotaGPU,
 			QuotaGPUMem:    workspace.QuotaGPUMem,
 			Status:         "active",
 			CreatedAt:      time.Now(),
 			UpdatedAt:      time.Now(),
 		})
 	}
-	return nil
+	result, err := NewQuotaPrecheckService(s.helmClient).EstimateAndCompareBinding(ctx, cluster, binding, usage, target, current)
 	if err == nil {
 		return nil
 	}
 	if errors.Is(err, ErrQuotaExceeded) && result != nil {
 		return fmt.Errorf("%w: %s", ErrQuotaExceeded, formatQuotaExceeded(result.Exceeded))
 	}
 	return err
 }
 func formatQuotaExceeded(exceeded []QuotaExceededResource) string {
 	if len(exceeded) == 0 {
 		return "requested resources exceed workspace quota"
 	}
 	parts := make([]string, 0, len(exceeded))
 	for _, item := range exceeded {
 		parts = append(parts, fmt.Sprintf("%s required=%s quota=%s", item.Name, item.Required, item.Hard))
 	}
 	return strings.Join(parts, "; ")
 }
 func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList {
@ -565,6 +854,46 @@ func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList
 	return hard
 }
 func tenantBindingFromWorkspaceClusterBinding(binding *entity.WorkspaceClusterBinding) entity.TenantBinding {
 	namespace := ""
 	if binding != nil {
 		namespace = binding.Namespace
 	}
 	tenantBinding := entity.NewTenantBinding(namespace)
 	if binding != nil {
 		tenantBinding.ServiceAccountName = binding.ServiceAccount
 		tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding)
 	}
 	return tenantBinding
 }
 func zeroIfEmptyQuota(value string) string {
 	if strings.TrimSpace(value) == "" {
 		return "0"
 	}
 	return strings.TrimSpace(value)
 }
 func cloneInstanceForQuota(instance *entity.Instance) *entity.Instance {
 	if instance == nil {
 		return nil
 	}
 	cloned := *instance
 	cloned.SetValues(copyValues(instance.Values))
 	return &cloned
 }
 func copyValues(values map[string]interface{}) map[string]interface{} {
 	if values == nil {
 		return nil
 	}
 	copied := make(map[string]interface{}, len(values))
 	for key, value := range values {
 		copied[key] = value
 	}
 	return copied
 }
 func isReservedNamespace(namespace string) bool {
 	switch namespace {
 	case "default", "kube-system", "kube-public", "kube-node-lease":
--- a/backend/internal/domain/service/instance_service_test.go
+++ b/backend/internal/domain/service/instance_service_test.go
@ -10,6 +10,7 @@ import (
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"github.com/ocdp/cluster-service/internal/domain/repository"
 	"github.com/ocdp/cluster-service/internal/pkg/authz"
 	"k8s.io/apimachinery/pkg/api/resource"
 )
 func TestDeleteInstanceIgnoresMissingRelease(t *testing.T) {
@ -85,6 +86,210 @@ func TestEnforceNamespaceValuesOverridesChartNamespaceKnobs(t *testing.T) {
 	}
 }
 func TestApplyNamespacePolicyRejectsMismatchedTenantNamespace(t *testing.T) {
 	principal := &authz.Principal{
 		UserID:        "user-1",
 		Username:      "alice",
 		Role:          authz.RoleUser,
 		WorkspaceID:   "workspace-1",
 		WorkspaceName: "alice",
 		Namespace:     "ocdp-u-alice",
 	}
 	cluster := &entity.Cluster{
 		ID:         "cluster-1",
 		OwnerID:    "admin",
 		Visibility: authz.VisibilityWorkspaceShared,
 	}
 	instance := &entity.Instance{Namespace: "other-namespace"}
 	svc := NewInstanceService(nil, nil, nil, nil, nil, nil)
 	if err := svc.applyNamespacePolicy(context.Background(), principal, cluster, instance); !errors.Is(err, entity.ErrForbidden) {
 		t.Fatalf("expected ErrForbidden for mismatched tenant namespace, got %v", err)
 	}
 	if instance.Namespace != "other-namespace" {
 		t.Fatalf("expected namespace to remain unchanged on rejection, got %q", instance.Namespace)
 	}
 }
 func TestApplyNamespacePolicyAllowsTenantNamespace(t *testing.T) {
 	principal := &authz.Principal{
 		UserID:        "user-1",
 		Username:      "alice",
 		Role:          authz.RoleUser,
 		WorkspaceID:   "workspace-1",
 		WorkspaceName: "alice",
 		Namespace:     "ocdp-u-alice",
 	}
 	cluster := &entity.Cluster{
 		ID:         "cluster-1",
 		OwnerID:    "admin",
 		Visibility: authz.VisibilityWorkspaceShared,
 	}
 	instance := &entity.Instance{Namespace: "ocdp-u-alice"}
 	svc := NewInstanceService(nil, nil, nil, nil, nil, nil)
 	if err := svc.applyNamespacePolicy(context.Background(), principal, cluster, instance); err != nil {
 		t.Fatalf("expected matching tenant namespace to be allowed, got %v", err)
 	}
 	if instance.Namespace != "ocdp-u-alice" {
 		t.Fatalf("expected namespace to remain the allowed tenant namespace, got %q", instance.Namespace)
 	}
 }
 func TestEnrichReplicasSetsLiveReplicaCount(t *testing.T) {
 	ctx := context.Background()
 	cluster := &entity.Cluster{ID: "cluster-1", Name: "cluster"}
 	svc := NewInstanceService(nil, &stubClusterRepo{cluster: cluster}, nil, nil, nil, nil)
 	svc.SetScaleClient(&stubScaleClient{replicas: 3})
 	instances := []*entity.Instance{{
 		ID:        "inst-1",
 		ClusterID: "cluster-1",
 		Name:      "demo",
 		Namespace: "ocdp-u-alice",
 		Replicas:  1,
 	}}
 	enriched := svc.EnrichReplicas(ctx, "cluster-1", instances)
 	if enriched[0].Replicas != 3 {
 		t.Fatalf("expected live replicas to overwrite stored count, got %d", enriched[0].Replicas)
 	}
 }
 func TestListInstancesByClusterHydratesOwnerUsername(t *testing.T) {
 	ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
 		UserID:      "admin-1",
 		Username:    "admin",
 		Role:        authz.RoleAdmin,
 		WorkspaceID: "workspace-admin",
 	})
 	instanceRepo := persistencemock.NewInstanceRepositoryMock()
 	userRepo := persistencemock.NewUserRepositoryMock()
 	if err := userRepo.Create(ctx, &entity.User{ID: "user-1", Username: "alice", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-1"}); err != nil {
 		t.Fatalf("failed to seed user: %v", err)
 	}
 	instance := &entity.Instance{
 		ID:          "inst-1",
 		WorkspaceID: "workspace-1",
 		OwnerID:     "user-1",
 		ClusterID:   "cluster-1",
 		Name:        "demo",
 		Namespace:   "ocdp-u-alice",
 	}
 	if err := instanceRepo.Create(ctx, instance); err != nil {
 		t.Fatalf("failed to seed instance: %v", err)
 	}
 	svc := NewInstanceService(
 		instanceRepo,
 		&stubClusterRepo{cluster: &entity.Cluster{ID: "cluster-1", Name: "cluster"}},
 		nil,
 		nil,
 		nil,
 		nil,
 	)
 	svc.SetUserRepository(userRepo)
 	instances, err := svc.ListInstancesByCluster(ctx, "cluster-1")
 	if err != nil {
 		t.Fatalf("ListInstancesByCluster returned error: %v", err)
 	}
 	if len(instances) != 1 {
 		t.Fatalf("expected 1 instance, got %d", len(instances))
 	}
 	if instances[0].OwnerUsername != "alice" {
 		t.Fatalf("expected owner username alice, got %q", instances[0].OwnerUsername)
 	}
 }
 func TestCreateInstanceRejectsGPUWhenWorkspaceQuotaEmptyBeforeCreate(t *testing.T) {
 	ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
 		UserID:        "user-ivanwu",
 		Username:      "ivanwu",
 		Role:          authz.RoleUser,
 		WorkspaceID:   "workspace-ivanwu",
 		WorkspaceName: "ivanwu",
 		Namespace:     "ocdp-u-ivanwu",
 	})
 	instanceRepo := persistencemock.NewInstanceRepositoryMock()
 	workspaceRepo := persistencemock.NewWorkspaceRepositoryMock()
 	bindingRepo := persistencemock.NewWorkspaceClusterBindingRepositoryMock()
 	workspace := entity.NewWorkspace("ivanwu", "admin")
 	workspace.ID = "workspace-ivanwu"
 	workspace.K8sNamespace = "ocdp-u-ivanwu"
 	workspace.K8sSAName = entity.ServiceAccountForNamespace(workspace.K8sNamespace)
 	workspace.QuotaCPU = "8"
 	workspace.QuotaMemory = "32Gi"
 	workspace.QuotaGPU = ""
 	workspace.QuotaGPUMem = ""
 	if err := workspaceRepo.Create(ctx, workspace); err != nil {
 		t.Fatalf("seed workspace: %v", err)
 	}
 	cluster := &entity.Cluster{
 		ID:         "k3s",
 		Name:       "k3s",
 		Host:       "https://k3s.invalid",
 		Token:      "token",
 		OwnerID:    "admin",
 		Visibility: authz.VisibilityGlobalShared,
 	}
 	registry := &entity.Registry{
 		ID:         "registry-1",
 		Name:       "harbor",
 		URL:        "https://harbor.invalid",
 		OwnerID:    "admin",
 		Visibility: authz.VisibilityGlobalShared,
 	}
 	helm := &stubHelmClient{
 		estimate: &repository.ResourceEstimate{
 			Requests: repository.ResourceVector{
 				CPU:         resource.MustParse("2"),
 				Memory:      resource.MustParse("8Gi"),
 				GPU:         1,
 				GPUMemoryMB: 10000,
 			},
 		},
 	}
 	oci := &stubOCIClient{}
 	svc := NewInstanceService(
 		instanceRepo,
 		&stubClusterRepo{cluster: cluster},
 		&stubRegistryRepo{registry: registry},
 		helm,
 		oci,
 		nil,
 		bindingRepo,
 	)
 	svc.SetTenantProvisioning(workspaceRepo, &recordingTenantClient{usage: &repository.ResourceQuotaUsage{}})
 	instance := entity.NewInstance("k3s", "vllm-qwen", "ocdp-u-ivanwu", registry.ID, "library/vllm-serve", "vllm-serve", "0.1.0")
 	instance.SetValues(map[string]interface{}{
 		"image": map[string]interface{}{
 			"repository": "harbor.bwgdi.com/library/vllm-openai",
 			"tag":        "v0.17.1",
 		},
 		"model": "Qwen/Qwen2.5-0.5B",
 	})
 	err := svc.CreateInstance(ctx, instance)
 	if !errors.Is(err, ErrQuotaExceeded) {
 		t.Fatalf("expected GPU quota rejection, got %v", err)
 	}
 	instances, listErr := instanceRepo.List(ctx)
 	if listErr != nil {
 		t.Fatalf("list instances: %v", listErr)
 	}
 	if len(instances) != 0 {
 		t.Fatalf("expected quota rejection before instance DB create, got %#v", instances)
 	}
 	if helm.installCalls != 0 {
 		t.Fatalf("expected Helm install not to be called, got %d calls", helm.installCalls)
 	}
 	if oci.pullCalls != 1 {
 		t.Fatalf("expected chart pull for quota rendering, got %d pulls", oci.pullCalls)
 	}
 }
 func waitForInstanceDeleted(t *testing.T, ctx context.Context, repo repository.InstanceRepository, id string) {
 	t.Helper()
@ -133,13 +338,19 @@ func (*stubClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) { r
 type stubHelmClient struct {
 	uninstallErr error
 	estimate     *repository.ResourceEstimate
 	values       map[string]interface{}
 	installCalls int
 	upgradeCalls int
 }
-func (*stubHelmClient) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
+func (s *stubHelmClient) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
 	s.installCalls++
 	return nil
 }
-func (*stubHelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
+func (s *stubHelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
 	s.upgradeCalls++
 	return nil
 }
@ -163,9 +374,116 @@ func (*stubHelmClient) List(ctx context.Context, cluster *entity.Cluster, namesp
 	return nil, nil
 }
-func (*stubHelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) {
+func (s *stubHelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) {
 	return s.values, nil
 }
 func (*stubHelmClient) GetChartDefaultValues(chartPath string) (map[string]interface{}, error) {
 	return nil, nil
 }
 func (s *stubHelmClient) EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*repository.ResourceEstimate, error) {
 	if s.estimate != nil {
 		return s.estimate, nil
 	}
 	return &repository.ResourceEstimate{}, nil
 }
 type stubRegistryRepo struct {
 	registry *entity.Registry
 }
 func (s *stubRegistryRepo) Create(ctx context.Context, registry *entity.Registry) error {
 	s.registry = registry
 	return nil
 }
 func (s *stubRegistryRepo) GetByID(ctx context.Context, id string) (*entity.Registry, error) {
 	if s.registry != nil && s.registry.ID == id {
 		return s.registry, nil
 	}
 	return nil, entity.ErrRegistryNotFound
 }
 func (s *stubRegistryRepo) GetByName(ctx context.Context, name string) (*entity.Registry, error) {
 	if s.registry != nil && s.registry.Name == name {
 		return s.registry, nil
 	}
 	return nil, entity.ErrRegistryNotFound
 }
 func (s *stubRegistryRepo) Update(ctx context.Context, registry *entity.Registry) error {
 	s.registry = registry
 	return nil
 }
 func (s *stubRegistryRepo) Delete(ctx context.Context, id string) error {
 	if s.registry != nil && s.registry.ID == id {
 		s.registry = nil
 		return nil
 	}
 	return entity.ErrRegistryNotFound
 }
 func (s *stubRegistryRepo) List(ctx context.Context) ([]*entity.Registry, error) {
 	if s.registry == nil {
 		return nil, nil
 	}
 	return []*entity.Registry{s.registry}, nil
 }
 type stubOCIClient struct {
 	pullCalls int
 }
 func (*stubOCIClient) ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) {
 	return nil, nil
 }
 func (*stubOCIClient) ListArtifacts(ctx context.Context, registry *entity.Registry, repositoryName, mediaTypeFilter string) ([]*entity.Artifact, error) {
 	return nil, nil
 }
 func (*stubOCIClient) GetArtifact(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (*entity.Artifact, error) {
 	return nil, nil
 }
 func (*stubOCIClient) GetValuesSchema(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (string, error) {
 	return "", nil
 }
 func (*stubOCIClient) GetValuesYAML(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (string, error) {
 	return "", nil
 }
 func (s *stubOCIClient) PullArtifact(ctx context.Context, registry *entity.Registry, repositoryName, reference, destPath string) error {
 	s.pullCalls++
 	return nil
 }
 func (*stubOCIClient) PushArtifact(ctx context.Context, registry *entity.Registry, repositoryName, tag, sourcePath string) error {
 	return nil
 }
 func (*stubOCIClient) CheckHealth(ctx context.Context, registry *entity.Registry) error {
 	return nil
 }
 type stubScaleClient struct {
 	replicas int32
 }
 func (s *stubScaleClient) GetDeploymentReplicas(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string) (int32, error) {
 	return s.replicas, nil
 }
 func (s *stubScaleClient) ScaleDeployment(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string, replicas int32) error {
 	s.replicas = replicas
 	return nil
 }
 var _ repository.ClusterRepository = (*stubClusterRepo)(nil)
 var _ repository.RegistryRepository = (*stubRegistryRepo)(nil)
 var _ repository.HelmClient = (*stubHelmClient)(nil)
 var _ repository.OCIClient = (*stubOCIClient)(nil)
 var _ ScaleClient = (*stubScaleClient)(nil)
--- a/backend/internal/domain/service/monitoring_service.go
+++ b/backend/internal/domain/service/monitoring_service.go
@ -3,6 +3,7 @@ package service
 import (
 	"context"
 	"fmt"
 	"sort"
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"github.com/ocdp/cluster-service/internal/domain/repository"
@ -13,16 +14,22 @@ import (
 type MonitoringService struct {
 	clusterRepo   repository.ClusterRepository
 	metricsClient repository.MetricsClient
 	instanceRepo  repository.InstanceRepository
 	userRepo      repository.UserRepository
 }
 // NewMonitoringService 创建监控服务
 func NewMonitoringService(
 	clusterRepo repository.ClusterRepository,
 	metricsClient repository.MetricsClient,
 	instanceRepo repository.InstanceRepository,
 	userRepo repository.UserRepository,
 ) *MonitoringService {
 	return &MonitoringService{
 		clusterRepo:   clusterRepo,
 		metricsClient: metricsClient,
 		instanceRepo:  instanceRepo,
 		userRepo:      userRepo,
 	}
 }
@ -43,6 +50,8 @@ func (s *MonitoringService) GetClusterMonitoring(ctx context.Context, clusterID
 	if err != nil {
 		return nil, fmt.Errorf("failed to get cluster metrics: %w", err)
 	}
 	s.enrichResourceUsage(ctx, principal, metrics)
 	s.scopeTenantMetrics(principal, metrics)
 	return metrics, nil
 }
@ -75,12 +84,310 @@ func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entit
 				Status:      "unknown",
 			}
 		}
 		s.enrichResourceUsage(ctx, principal, metrics)
 		s.scopeTenantMetrics(principal, metrics)
 		result = append(result, metrics)
 	}
 	return result, nil
 }
 func (s *MonitoringService) enrichResourceUsage(ctx context.Context, principal *authz.Principal, metrics *entity.ClusterMetrics) {
 	if metrics == nil || s.instanceRepo == nil || s.metricsClient == nil {
 		s.addVisibleUserRows(ctx, principal, metrics)
 		return
 	}
 	instances, err := s.instanceRepo.ListByCluster(ctx, metrics.ClusterID)
 	if err != nil {
 		fmt.Printf("Warning: failed to list instances for cluster %s resource usage: %v\n", metrics.ClusterID, err)
 		s.addVisibleUserRows(ctx, principal, metrics)
 		return
 	}
 	allocations, err := s.metricsClient.GetPodResourceAllocations(ctx, metrics.ClusterID)
 	if err != nil {
 		fmt.Printf("Warning: failed to list pod resource allocations for cluster %s: %v\n", metrics.ClusterID, err)
 		s.addVisibleUserRows(ctx, principal, metrics)
 		return
 	}
 	visibleInstances := make(map[string]*entity.Instance)
 	for _, instance := range instances {
 		if instance == nil || !canReadMonitoringInstance(principal, instance) {
 			continue
 		}
 		key := monitoringInstanceKey(instance.Namespace, instance.Name)
 		visibleInstances[key] = instance
 	}
 	type usageAccumulator struct {
 		userID      string
 		username    string
 		workspaceID string
 		allocation  entity.ResourceAllocation
 		podCount    int
 		instances   map[string]struct{}
 	}
 	byUser := make(map[string]*usageAccumulator)
 	total := entity.ResourceAllocation{}
 	for _, pod := range allocations {
 		if pod == nil {
 			continue
 		}
 		instance := visibleInstances[monitoringInstanceKey(pod.Namespace, pod.InstanceName)]
 		if instance == nil {
 			continue
 		}
 		total = addResourceAllocation(total, pod.Allocation)
 		username := instance.OwnerUsername
 		if username == "" {
 			username = s.usernameForOwner(ctx, instance.OwnerID, principal)
 		}
 		acc := byUser[instance.OwnerID]
 		if acc == nil {
 			acc = &usageAccumulator{
 				userID:      instance.OwnerID,
 				username:    username,
 				workspaceID: instance.WorkspaceID,
 				instances:   map[string]struct{}{},
 			}
 			byUser[instance.OwnerID] = acc
 		}
 		if acc.username == "" {
 			acc.username = username
 		}
 		acc.allocation = addResourceAllocation(acc.allocation, pod.Allocation)
 		acc.podCount++
 		acc.instances[instance.ID] = struct{}{}
 	}
 	metrics.CPURequests = formatCPUAllocation(total.CPURequestsMilli)
 	metrics.CPULimits = formatCPUAllocation(total.CPULimitsMilli)
 	metrics.MemoryRequests = formatMemoryAllocation(total.MemoryRequestsBytes)
 	metrics.MemoryLimits = formatMemoryAllocation(total.MemoryLimitsBytes)
 	metrics.GPURequests = total.GPURequests
 	metrics.GPULimits = total.GPULimits
 	metrics.GPUMemoryRequestsMB = total.GPUMemoryRequestsMB
 	metrics.GPUMemoryLimitsMB = total.GPUMemoryLimitsMB
 	metrics.AllocatedGPU = total.GPURequests
 	metrics.AllocatedGPUMemoryMB = total.GPUMemoryRequestsMB
 	userIDs := make([]string, 0, len(byUser))
 	for userID := range byUser {
 		userIDs = append(userIDs, userID)
 	}
 	sort.Slice(userIDs, func(i, j int) bool {
 		left := byUser[userIDs[i]]
 		right := byUser[userIDs[j]]
 		if left.username == right.username {
 			return left.userID < right.userID
 		}
 		return left.username < right.username
 	})
 	usage := make([]entity.UserResourceUsage, 0, len(userIDs))
 	for _, userID := range userIDs {
 		acc := byUser[userID]
 		usage = append(usage, entity.UserResourceUsage{
 			UserID:              acc.userID,
 			Username:            acc.username,
 			WorkspaceID:         acc.workspaceID,
 			InstanceCount:       len(acc.instances),
 			PodCount:            acc.podCount,
 			CPURequests:         formatCPUAllocation(acc.allocation.CPURequestsMilli),
 			CPULimits:           formatCPUAllocation(acc.allocation.CPULimitsMilli),
 			MemoryRequests:      formatMemoryAllocation(acc.allocation.MemoryRequestsBytes),
 			MemoryLimits:        formatMemoryAllocation(acc.allocation.MemoryLimitsBytes),
 			GPURequests:         acc.allocation.GPURequests,
 			GPULimits:           acc.allocation.GPULimits,
 			GPUMemoryRequestsMB: acc.allocation.GPUMemoryRequestsMB,
 			GPUMemoryLimitsMB:   acc.allocation.GPUMemoryLimitsMB,
 		})
 	}
 	metrics.ResourceUsageByUser = usage
 	s.addVisibleUserRows(ctx, principal, metrics)
 }
 func (s *MonitoringService) addVisibleUserRows(ctx context.Context, principal *authz.Principal, metrics *entity.ClusterMetrics) {
 	if principal == nil || metrics == nil {
 		return
 	}
 	existing := make(map[string]struct{}, len(metrics.ResourceUsageByUser))
 	for _, row := range metrics.ResourceUsageByUser {
 		if row.UserID != "" {
 			existing[row.UserID] = struct{}{}
 		}
 	}
 	appendEmpty := func(userID, username, workspaceID string) {
 		if userID == "" {
 			return
 		}
 		if _, ok := existing[userID]; ok {
 			return
 		}
 		metrics.ResourceUsageByUser = append(metrics.ResourceUsageByUser, entity.UserResourceUsage{
 			UserID:         userID,
 			Username:       username,
 			WorkspaceID:    workspaceID,
 			InstanceCount:  0,
 			PodCount:       0,
 			CPURequests:    "0 cores",
 			CPULimits:      "0 cores",
 			MemoryRequests: "0 B",
 			MemoryLimits:   "0 B",
 		})
 		existing[userID] = struct{}{}
 	}
 	if !principal.IsAdmin() {
 		appendEmpty(principal.UserID, principal.Username, principal.WorkspaceID)
 		return
 	}
 	if s.userRepo == nil {
 		return
 	}
 	users, err := s.userRepo.List(ctx)
 	if err != nil {
 		fmt.Printf("Warning: failed to list users for monitoring rows: %v\n", err)
 		return
 	}
 	for _, user := range users {
 		if user == nil || user.Role != authz.RoleUser || !user.IsActive {
 			continue
 		}
 		appendEmpty(user.ID, user.Username, user.WorkspaceID)
 	}
 	sort.Slice(metrics.ResourceUsageByUser, func(i, j int) bool {
 		left := metrics.ResourceUsageByUser[i]
 		right := metrics.ResourceUsageByUser[j]
 		if left.Username == right.Username {
 			return left.UserID < right.UserID
 		}
 		return left.Username < right.Username
 	})
 }
 func (s *MonitoringService) scopeTenantMetrics(principal *authz.Principal, metrics *entity.ClusterMetrics) {
 	if principal == nil || principal.IsAdmin() || metrics == nil {
 		return
 	}
 	var total entity.ResourceAllocation
 	podCount := 0
 	instanceCount := 0
 	for _, usage := range metrics.ResourceUsageByUser {
 		if usage.UserID != principal.UserID {
 			continue
 		}
 		podCount += usage.PodCount
 		instanceCount += usage.InstanceCount
 		total.GPURequests += usage.GPURequests
 		total.GPULimits += usage.GPULimits
 		total.GPUMemoryRequestsMB += usage.GPUMemoryRequestsMB
 		total.GPUMemoryLimitsMB += usage.GPUMemoryLimitsMB
 	}
 	metrics.NodeCount = 0
 	metrics.Nodes = nil
 	metrics.PodCount = podCount
 	metrics.TotalCPU = ""
 	metrics.TotalMemory = ""
 	metrics.TotalGPU = 0
 	metrics.UsedCPU = metrics.CPURequests
 	metrics.UsedMemory = metrics.MemoryRequests
 	metrics.UsedGPU = int(total.GPURequests)
 	metrics.CPUUsage = 0
 	metrics.MemoryUsage = 0
 	metrics.GPUUsage = 0
 	metrics.MaxNodeCPU = ""
 	metrics.MaxNodeMemory = ""
 	metrics.MaxNodeGPU = 0
 	metrics.MaxNodeCPUUsage = 0
 	metrics.MaxNodeMemUsage = 0
 	metrics.MaxNodeGPUUsage = 0
 	metrics.ResourceUsageByUser = filterSelfUsage(principal.UserID, metrics.ResourceUsageByUser)
 	if instanceCount == 0 {
 		metrics.CPURequests = ""
 		metrics.CPULimits = ""
 		metrics.MemoryRequests = ""
 		metrics.MemoryLimits = ""
 		metrics.GPURequests = 0
 		metrics.GPULimits = 0
 		metrics.GPUMemoryRequestsMB = 0
 		metrics.GPUMemoryLimitsMB = 0
 		metrics.AllocatedGPU = 0
 		metrics.AllocatedGPUMemoryMB = 0
 	}
 }
 func filterSelfUsage(userID string, usage []entity.UserResourceUsage) []entity.UserResourceUsage {
 	filtered := make([]entity.UserResourceUsage, 0, len(usage))
 	for _, row := range usage {
 		if row.UserID == userID {
 			filtered = append(filtered, row)
 		}
 	}
 	return filtered
 }
 func canReadMonitoringInstance(principal *authz.Principal, instance *entity.Instance) bool {
 	if principal == nil || instance == nil {
 		return false
 	}
 	if principal.IsAdmin() {
 		return true
 	}
 	return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID
 }
 func (s *MonitoringService) usernameForOwner(ctx context.Context, ownerID string, principal *authz.Principal) string {
 	if ownerID == "" {
 		return ""
 	}
 	if principal != nil && ownerID == principal.UserID {
 		return principal.Username
 	}
 	if s.userRepo == nil {
 		return ""
 	}
 	user, err := s.userRepo.GetByID(ctx, ownerID)
 	if err != nil || user == nil {
 		return ""
 	}
 	return user.Username
 }
 func monitoringInstanceKey(namespace, name string) string {
 	return namespace + "/" + name
 }
 func addResourceAllocation(left, right entity.ResourceAllocation) entity.ResourceAllocation {
 	return entity.ResourceAllocation{
 		CPURequestsMilli:    left.CPURequestsMilli + right.CPURequestsMilli,
 		CPULimitsMilli:      left.CPULimitsMilli + right.CPULimitsMilli,
 		MemoryRequestsBytes: left.MemoryRequestsBytes + right.MemoryRequestsBytes,
 		MemoryLimitsBytes:   left.MemoryLimitsBytes + right.MemoryLimitsBytes,
 		GPURequests:         left.GPURequests + right.GPURequests,
 		GPULimits:           left.GPULimits + right.GPULimits,
 		GPUMemoryRequestsMB: left.GPUMemoryRequestsMB + right.GPUMemoryRequestsMB,
 		GPUMemoryLimitsMB:   left.GPUMemoryLimitsMB + right.GPUMemoryLimitsMB,
 	}
 }
 func formatCPUAllocation(milli int64) string {
 	return fmt.Sprintf("%.2f cores", float64(milli)/1000.0)
 }
 func formatMemoryAllocation(bytes int64) string {
 	const unit = 1024
 	if bytes < unit {
 		return fmt.Sprintf("%d B", bytes)
 	}
 	div, exp := int64(unit), 0
 	for n := bytes / unit; n >= unit; n /= unit {
 		div *= unit
 		exp++
 	}
 	return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp])
 }
 // GetMonitoringSummary 获取监控汇总信息
 func (s *MonitoringService) GetMonitoringSummary(ctx context.Context) (*entity.MonitoringSummary, error) {
 	// 获取所有集群监控数据
@ -123,6 +430,9 @@ func (s *MonitoringService) GetNodeMetrics(ctx context.Context, clusterID string
 	if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
 		return nil, entity.ErrClusterNotFound
 	}
 	if !principal.IsAdmin() {
 		return nil, entity.ErrForbidden
 	}
 	nodes, err := s.metricsClient.GetNodeMetrics(ctx, clusterID)
 	if err != nil {
 		return nil, fmt.Errorf("failed to get node metrics: %w", err)
--- a/backend/internal/domain/service/monitoring_service_test.go
+++ b/backend/internal/domain/service/monitoring_service_test.go
@ -0,0 +1,228 @@
 package service
 import (
 	"context"
 	"testing"
 	"time"
 	persistencemock "github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock"
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"github.com/ocdp/cluster-service/internal/pkg/authz"
 )
 func TestListClusterMonitoringAggregatesResourceUsageForAdmin(t *testing.T) {
 	ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
 		UserID:      "admin-1",
 		Username:    "admin",
 		Role:        authz.RoleAdmin,
 		WorkspaceID: "workspace-admin",
 	})
 	instanceRepo, userRepo := seedMonitoringOwners(t, ctx)
 	svc := NewMonitoringService(
 		&monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}},
 		&stubMetricsClient{allocations: monitoringAllocations()},
 		instanceRepo,
 		userRepo,
 	)
 	metrics, err := svc.ListClusterMonitoring(ctx)
 	if err != nil {
 		t.Fatalf("ListClusterMonitoring returned error: %v", err)
 	}
 	if len(metrics) != 1 {
 		t.Fatalf("expected 1 cluster metric, got %d", len(metrics))
 	}
 	got := metrics[0]
 	if got.AllocatedGPU != 3 || got.AllocatedGPUMemoryMB != 30000 {
 		t.Fatalf("expected total GPU/gpumem allocation 3/30000, got %d/%d", got.AllocatedGPU, got.AllocatedGPUMemoryMB)
 	}
 	if len(got.ResourceUsageByUser) != 2 {
 		t.Fatalf("expected 2 user usage rows, got %d: %#v", len(got.ResourceUsageByUser), got.ResourceUsageByUser)
 	}
 	if got.ResourceUsageByUser[0].Username != "alice" || got.ResourceUsageByUser[0].GPURequests != 1 {
 		t.Fatalf("expected alice GPU request row first, got %#v", got.ResourceUsageByUser[0])
 	}
 	if got.ResourceUsageByUser[1].Username != "bob" || got.ResourceUsageByUser[1].GPURequests != 2 {
 		t.Fatalf("expected bob GPU request row second, got %#v", got.ResourceUsageByUser[1])
 	}
 }
 func TestListClusterMonitoringFiltersResourceUsageForOrdinaryUser(t *testing.T) {
 	ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
 		UserID:      "user-1",
 		Username:    "alice",
 		Role:        authz.RoleUser,
 		WorkspaceID: "workspace-1",
 	})
 	instanceRepo, userRepo := seedMonitoringOwners(t, ctx)
 	svc := NewMonitoringService(
 		&monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}},
 		&stubMetricsClient{allocations: monitoringAllocations()},
 		instanceRepo,
 		userRepo,
 	)
 	metrics, err := svc.ListClusterMonitoring(ctx)
 	if err != nil {
 		t.Fatalf("ListClusterMonitoring returned error: %v", err)
 	}
 	got := metrics[0]
 	if got.AllocatedGPU != 1 || got.AllocatedGPUMemoryMB != 10000 {
 		t.Fatalf("expected ordinary user allocation to be scoped to alice, got %d/%d", got.AllocatedGPU, got.AllocatedGPUMemoryMB)
 	}
 	if len(got.ResourceUsageByUser) != 1 {
 		t.Fatalf("expected only alice usage row, got %d: %#v", len(got.ResourceUsageByUser), got.ResourceUsageByUser)
 	}
 	if got.ResourceUsageByUser[0].UserID != "user-1" || got.ResourceUsageByUser[0].Username != "alice" {
 		t.Fatalf("expected alice usage row, got %#v", got.ResourceUsageByUser[0])
 	}
 	if got.NodeCount != 0 || len(got.Nodes) != 0 || got.TotalCPU != "" || got.TotalMemory != "" {
 		t.Fatalf("expected ordinary user cluster-wide metrics to be sanitized, got nodes=%d/%d totalCPU=%q totalMemory=%q", got.NodeCount, len(got.Nodes), got.TotalCPU, got.TotalMemory)
 	}
 	if got.PodCount != 1 {
 		t.Fatalf("expected ordinary user pod count to be self scoped, got %d", got.PodCount)
 	}
 }
 func TestGetNodeMetricsForbiddenForOrdinaryUser(t *testing.T) {
 	ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
 		UserID:      "user-1",
 		Username:    "alice",
 		Role:        authz.RoleUser,
 		WorkspaceID: "workspace-1",
 	})
 	svc := NewMonitoringService(
 		&monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}},
 		&stubMetricsClient{allocations: monitoringAllocations()},
 		nil,
 		nil,
 	)
 	_, err := svc.GetNodeMetrics(ctx, "cluster-1")
 	if err != entity.ErrForbidden {
 		t.Fatalf("expected ordinary user node metrics to be forbidden, got %v", err)
 	}
 }
 func seedMonitoringOwners(t *testing.T, ctx context.Context) (*persistencemock.InstanceRepositoryMock, *persistencemock.UserRepositoryMock) {
 	t.Helper()
 	instanceRepo := persistencemock.NewInstanceRepositoryMock().(*persistencemock.InstanceRepositoryMock)
 	userRepo := persistencemock.NewUserRepositoryMock().(*persistencemock.UserRepositoryMock)
 	for _, user := range []*entity.User{
 		{ID: "user-1", Username: "alice", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-1"},
 		{ID: "user-2", Username: "bob", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-2"},
 	} {
 		if err := userRepo.Create(ctx, user); err != nil {
 			t.Fatalf("failed to seed user %s: %v", user.ID, err)
 		}
 	}
 	for _, instance := range []*entity.Instance{
 		{ID: "inst-1", ClusterID: "cluster-1", Name: "alice-app", Namespace: "ocdp-u-alice", WorkspaceID: "workspace-1", OwnerID: "user-1"},
 		{ID: "inst-2", ClusterID: "cluster-1", Name: "bob-app", Namespace: "ocdp-u-bob", WorkspaceID: "workspace-2", OwnerID: "user-2"},
 	} {
 		if err := instanceRepo.Create(ctx, instance); err != nil {
 			t.Fatalf("failed to seed instance %s: %v", instance.ID, err)
 		}
 	}
 	return instanceRepo, userRepo
 }
 func monitoringAllocations() []*entity.PodResourceAllocation {
 	return []*entity.PodResourceAllocation{
 		{
 			ClusterID:    "cluster-1",
 			Namespace:    "ocdp-u-alice",
 			PodName:      "alice-app-0",
 			InstanceName: "alice-app",
 			Allocation: entity.ResourceAllocation{
 				CPURequestsMilli:    500,
 				CPULimitsMilli:      1000,
 				MemoryRequestsBytes: 1024 * 1024 * 1024,
 				MemoryLimitsBytes:   2 * 1024 * 1024 * 1024,
 				GPURequests:         1,
 				GPULimits:           1,
 				GPUMemoryRequestsMB: 10000,
 				GPUMemoryLimitsMB:   10000,
 			},
 		},
 		{
 			ClusterID:    "cluster-1",
 			Namespace:    "ocdp-u-bob",
 			PodName:      "bob-app-0",
 			InstanceName: "bob-app",
 			Allocation: entity.ResourceAllocation{
 				CPURequestsMilli:    2000,
 				CPULimitsMilli:      4000,
 				MemoryRequestsBytes: 4 * 1024 * 1024 * 1024,
 				MemoryLimitsBytes:   8 * 1024 * 1024 * 1024,
 				GPURequests:         2,
 				GPULimits:           2,
 				GPUMemoryRequestsMB: 20000,
 				GPUMemoryLimitsMB:   20000,
 			},
 		},
 	}
 }
 type monitoringClusterRepo struct {
 	clusters []*entity.Cluster
 }
 func (r *monitoringClusterRepo) Create(ctx context.Context, cluster *entity.Cluster) error {
 	r.clusters = append(r.clusters, cluster)
 	return nil
 }
 func (r *monitoringClusterRepo) GetByID(ctx context.Context, id string) (*entity.Cluster, error) {
 	for _, cluster := range r.clusters {
 		if cluster.ID == id {
 			return cluster, nil
 		}
 	}
 	return nil, entity.ErrClusterNotFound
 }
 func (r *monitoringClusterRepo) GetByName(ctx context.Context, name string) (*entity.Cluster, error) {
 	for _, cluster := range r.clusters {
 		if cluster.Name == name {
 			return cluster, nil
 		}
 	}
 	return nil, entity.ErrClusterNotFound
 }
 func (r *monitoringClusterRepo) Update(ctx context.Context, cluster *entity.Cluster) error {
 	return nil
 }
 func (r *monitoringClusterRepo) Delete(ctx context.Context, id string) error { return nil }
 func (r *monitoringClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) {
 	return r.clusters, nil
 }
 type stubMetricsClient struct {
 	allocations []*entity.PodResourceAllocation
 }
 func (c *stubMetricsClient) GetClusterMetrics(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) {
 	return &entity.ClusterMetrics{
 		ClusterID:   clusterID,
 		ClusterName: "cluster",
 		Status:      "healthy",
 		NodeCount:   3,
 		PodCount:    99,
 		TotalCPU:    "48 cores",
 		TotalMemory: "256Gi",
 		Nodes:       []entity.NodeMetrics{{NodeName: "node-a"}},
 		LastCheck:   time.Now(),
 	}, nil
 }
 func (c *stubMetricsClient) GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) {
 	return nil, nil
 }
 func (c *stubMetricsClient) GetPodResourceAllocations(ctx context.Context, clusterID string) ([]*entity.PodResourceAllocation, error) {
 	return c.allocations, nil
 }
--- a/backend/internal/domain/service/quota_precheck.go
+++ b/backend/internal/domain/service/quota_precheck.go
@ -0,0 +1,400 @@
 package service
 import (
 	"context"
 	"errors"
 	"fmt"
 	"io"
 	"sort"
 	"strconv"
 	"strings"
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"github.com/ocdp/cluster-service/internal/domain/repository"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/util/yaml"
 )
 var ErrQuotaExceeded = errors.New("quota exceeded")
 type QuotaExceededResource struct {
 	Name     string
 	Required string
 	Hard     string
 }
 type QuotaPrecheckResult struct {
 	Allowed  bool
 	Required repository.ResourceEstimate
 	Hard     repository.ResourceVector
 	Exceeded []QuotaExceededResource
 }
 type QuotaPrecheckService struct {
 	helmClient repository.HelmClient
 }
 func NewQuotaPrecheckService(helmClient repository.HelmClient) *QuotaPrecheckService {
 	return &QuotaPrecheckService{helmClient: helmClient}
 }
 func (s *QuotaPrecheckService) EstimateAndCompare(ctx context.Context, cluster *entity.Cluster, workspace *entity.Workspace, instance *entity.Instance) (*QuotaPrecheckResult, error) {
 	if s == nil || s.helmClient == nil {
 		return nil, errors.New("quota precheck requires helm client")
 	}
 	estimate, err := s.helmClient.EstimateInstanceResources(ctx, cluster, instance)
 	if err != nil {
 		return nil, err
 	}
 	result, err := CompareWorkspaceQuota(workspace, estimate)
 	if err != nil {
 		return result, err
 	}
 	return result, nil
 }
 func (s *QuotaPrecheckService) EstimateAndCompareBinding(ctx context.Context, cluster *entity.Cluster, binding *entity.WorkspaceClusterBinding, usage *repository.ResourceQuotaUsage, target *entity.Instance, current *entity.Instance) (*QuotaPrecheckResult, error) {
 	if s == nil || s.helmClient == nil {
 		return nil, errors.New("quota precheck requires helm client")
 	}
 	targetEstimate, err := s.helmClient.EstimateInstanceResources(ctx, cluster, target)
 	if err != nil {
 		return nil, err
 	}
 	var currentEstimate *repository.ResourceEstimate
 	if current != nil {
 		currentEstimate, err = s.helmClient.EstimateInstanceResources(ctx, cluster, current)
 		if err != nil {
 			return nil, err
 		}
 	}
 	result, err := CompareBindingQuota(binding, usage, targetEstimate, currentEstimate)
 	if err != nil {
 		return result, err
 	}
 	return result, nil
 }
 func CompareWorkspaceQuota(workspace *entity.Workspace, estimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
 	return compareQuotaList(resourceQuotaHard(workspace), nil, estimate, nil)
 }
 func CompareBindingQuota(binding *entity.WorkspaceClusterBinding, usage *repository.ResourceQuotaUsage, targetEstimate, currentEstimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
 	return compareQuotaList(bindingQuotaHard(binding), usage, targetEstimate, currentEstimate)
 }
 func compareQuotaList(hardList corev1.ResourceList, usage *repository.ResourceQuotaUsage, targetEstimate, currentEstimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
 	if targetEstimate == nil {
 		targetEstimate = &repository.ResourceEstimate{}
 	}
 	current := effectiveQuotaRequests(currentEstimate)
 	target := effectiveQuotaRequests(targetEstimate)
 	used := repository.ResourceVector{}
 	if usage != nil {
 		used = usage.Used
 	}
 	required := addResourceVector(subtractResourceVectorFloorZero(used, current), target)
 	hard := resourceVectorFromQuotaHard(hardList)
 	result := &QuotaPrecheckResult{
 		Allowed: true,
 		Required: repository.ResourceEstimate{
 			Requests: required,
 		},
 		Hard: hard,
 	}
 	addExceeded := func(name, required, limit string) {
 		result.Allowed = false
 		result.Exceeded = append(result.Exceeded, QuotaExceededResource{
 			Name:     name,
 			Required: required,
 			Hard:     limit,
 		})
 	}
 	if quantity, ok := hardList[corev1.ResourceName("requests.cpu")]; ok && required.CPU.Cmp(quantity) > 0 {
 		addExceeded("requests.cpu", required.CPU.String(), quantity.String())
 	}
 	if quantity, ok := hardList[corev1.ResourceName("requests.memory")]; ok && required.Memory.Cmp(quantity) > 0 {
 		addExceeded("requests.memory", required.Memory.String(), quantity.String())
 	}
 	if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpu")]; ok && required.GPU > quantity.Value() {
 		addExceeded("requests.nvidia.com/gpu", strconv.FormatInt(required.GPU, 10), quantity.String())
 	}
 	if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpumem")]; ok && required.GPUMemoryMB > quantity.Value() {
 		addExceeded("requests.nvidia.com/gpumem", strconv.FormatInt(required.GPUMemoryMB, 10), quantity.String())
 	}
 	sort.Slice(result.Exceeded, func(i, j int) bool {
 		return result.Exceeded[i].Name < result.Exceeded[j].Name
 	})
 	if !result.Allowed {
 		return result, ErrQuotaExceeded
 	}
 	return result, nil
 }
 func legacyCompareWorkspaceQuota(workspace *entity.Workspace, estimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
 	if estimate == nil {
 		estimate = &repository.ResourceEstimate{}
 	}
 	hardList := resourceQuotaHard(workspace)
 	hard := resourceVectorFromQuotaHard(hardList)
 	result := &QuotaPrecheckResult{
 		Allowed:  true,
 		Required: *estimate,
 		Hard:     hard,
 	}
 	effectiveRequests := effectiveQuotaRequests(estimate)
 	addExceeded := func(name, required, limit string) {
 		result.Allowed = false
 		result.Exceeded = append(result.Exceeded, QuotaExceededResource{
 			Name:     name,
 			Required: required,
 			Hard:     limit,
 		})
 	}
 	if quantity, ok := hardList[corev1.ResourceName("requests.cpu")]; ok && effectiveRequests.CPU.Cmp(quantity) > 0 {
 		addExceeded("requests.cpu", effectiveRequests.CPU.String(), quantity.String())
 	}
 	if quantity, ok := hardList[corev1.ResourceName("requests.memory")]; ok && effectiveRequests.Memory.Cmp(quantity) > 0 {
 		addExceeded("requests.memory", effectiveRequests.Memory.String(), quantity.String())
 	}
 	if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpu")]; ok && effectiveRequests.GPU > quantity.Value() {
 		addExceeded("requests.nvidia.com/gpu", strconv.FormatInt(effectiveRequests.GPU, 10), quantity.String())
 	}
 	if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpumem")]; ok && effectiveRequests.GPUMemoryMB > quantity.Value() {
 		addExceeded("requests.nvidia.com/gpumem", strconv.FormatInt(effectiveRequests.GPUMemoryMB, 10), quantity.String())
 	}
 	sort.Slice(result.Exceeded, func(i, j int) bool {
 		return result.Exceeded[i].Name < result.Exceeded[j].Name
 	})
 	if !result.Allowed {
 		return result, ErrQuotaExceeded
 	}
 	return result, nil
 }
 func effectiveQuotaRequests(estimate *repository.ResourceEstimate) repository.ResourceVector {
 	if estimate == nil {
 		return repository.ResourceVector{}
 	}
 	return repository.ResourceVector{
 		CPU:         maxQuantity(estimate.Requests.CPU, estimate.Limits.CPU),
 		Memory:      maxQuantity(estimate.Requests.Memory, estimate.Limits.Memory),
 		GPU:         maxInt64(estimate.Requests.GPU, estimate.Limits.GPU),
 		GPUMemoryMB: maxInt64(estimate.Requests.GPUMemoryMB, estimate.Limits.GPUMemoryMB),
 	}
 }
 func addResourceVector(left, right repository.ResourceVector) repository.ResourceVector {
 	out := left
 	out.CPU.Add(right.CPU)
 	out.Memory.Add(right.Memory)
 	out.GPU += right.GPU
 	out.GPUMemoryMB += right.GPUMemoryMB
 	return out
 }
 func subtractResourceVectorFloorZero(left, right repository.ResourceVector) repository.ResourceVector {
 	out := left
 	out.CPU.Sub(right.CPU)
 	if out.CPU.Sign() < 0 {
 		out.CPU = resource.Quantity{}
 	}
 	out.Memory.Sub(right.Memory)
 	if out.Memory.Sign() < 0 {
 		out.Memory = resource.Quantity{}
 	}
 	out.GPU -= right.GPU
 	if out.GPU < 0 {
 		out.GPU = 0
 	}
 	out.GPUMemoryMB -= right.GPUMemoryMB
 	if out.GPUMemoryMB < 0 {
 		out.GPUMemoryMB = 0
 	}
 	return out
 }
 func maxQuantity(left, right resource.Quantity) resource.Quantity {
 	if left.Cmp(right) >= 0 {
 		return left
 	}
 	return right
 }
 func maxInt64(left, right int64) int64 {
 	if left >= right {
 		return left
 	}
 	return right
 }
 func EstimateRenderedManifestResources(manifest string) (*repository.ResourceEstimate, error) {
 	decoder := yaml.NewYAMLOrJSONDecoder(strings.NewReader(manifest), 4096)
 	estimate := &repository.ResourceEstimate{}
 	for {
 		var obj unstructured.Unstructured
 		if err := decoder.Decode(&obj); err != nil {
 			if errors.Is(err, io.EOF) {
 				break
 			}
 			return nil, fmt.Errorf("failed to decode rendered manifest: %w", err)
 		}
 		if obj.GetKind() == "" {
 			continue
 		}
 		podSpec, replicas, ok := podTemplateSpec(obj.Object)
 		if !ok {
 			continue
 		}
 		addPodSpecResources(estimate, podSpec, replicas)
 	}
 	return estimate, nil
 }
 func resourceVectorFromQuotaHard(hard corev1.ResourceList) repository.ResourceVector {
 	gpu := hard[corev1.ResourceName("requests.nvidia.com/gpu")]
 	gpuMemory := hard[corev1.ResourceName("requests.nvidia.com/gpumem")]
 	return repository.ResourceVector{
 		CPU:         hard[corev1.ResourceName("requests.cpu")],
 		Memory:      hard[corev1.ResourceName("requests.memory")],
 		GPU:         gpu.Value(),
 		GPUMemoryMB: gpuMemory.Value(),
 	}
 }
 func bindingQuotaHard(binding *entity.WorkspaceClusterBinding) corev1.ResourceList {
 	hard := corev1.ResourceList{}
 	if binding == nil {
 		return hard
 	}
 	addQuantity := func(name corev1.ResourceName, value string) {
 		value = normalizeStandardQuotaQuantity(value)
 		if value == "" {
 			return
 		}
 		if quantity, err := resource.ParseQuantity(value); err == nil {
 			hard[name] = quantity
 		}
 	}
 	addGPUMemoryQuantity := func(value string) {
 		value, err := normalizeGPUMemoryQuota(value)
 		if err != nil || value == "" {
 			return
 		}
 		if quantity, err := resource.ParseQuantity(value); err == nil {
 			hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity
 		}
 	}
 	addQuantity(corev1.ResourceName("requests.cpu"), binding.QuotaCPU)
 	addQuantity(corev1.ResourceName("requests.memory"), binding.QuotaMemory)
 	addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), binding.QuotaGPU)
 	addGPUMemoryQuantity(binding.QuotaGPUMem)
 	return hard
 }
 func podTemplateSpec(obj map[string]interface{}) (map[string]interface{}, int64, bool) {
 	kind, _, _ := unstructured.NestedString(obj, "kind")
 	switch kind {
 	case "Pod":
 		spec, ok := nestedMap(obj, "spec")
 		return spec, 1, ok
 	case "Deployment", "ReplicaSet", "StatefulSet", "ReplicationController":
 		spec, replicas, ok := workloadTemplateSpec(obj)
 		return spec, replicas, ok
 	case "DaemonSet", "Job":
 		spec, ok := nestedMap(obj, "spec", "template", "spec")
 		return spec, 1, ok
 	case "CronJob":
 		spec, ok := nestedMap(obj, "spec", "jobTemplate", "spec", "template", "spec")
 		return spec, 1, ok
 	default:
 		return nil, 0, false
 	}
 }
 func workloadTemplateSpec(obj map[string]interface{}) (map[string]interface{}, int64, bool) {
 	spec, ok := nestedMap(obj, "spec", "template", "spec")
 	if !ok {
 		return nil, 0, false
 	}
 	replicas, _, err := unstructured.NestedInt64(obj, "spec", "replicas")
 	if err != nil || replicas < 1 {
 		replicas = 1
 	}
 	return spec, replicas, true
 }
 func nestedMap(obj map[string]interface{}, fields ...string) (map[string]interface{}, bool) {
 	value, ok, err := unstructured.NestedMap(obj, fields...)
 	return value, ok && err == nil
 }
 func addPodSpecResources(estimate *repository.ResourceEstimate, podSpec map[string]interface{}, replicas int64) {
 	if replicas < 1 {
 		replicas = 1
 	}
 	for _, field := range []string{"initContainers", "containers"} {
 		containers, ok, err := unstructured.NestedSlice(podSpec, field)
 		if err != nil || !ok {
 			continue
 		}
 		for _, item := range containers {
 			container, ok := item.(map[string]interface{})
 			if !ok {
 				continue
 			}
 			addContainerResourceList(&estimate.Requests, replicas, container, "resources", "requests")
 			addContainerResourceList(&estimate.Limits, replicas, container, "resources", "limits")
 		}
 	}
 }
 func addContainerResourceList(target *repository.ResourceVector, replicas int64, container map[string]interface{}, fields ...string) {
 	resources, ok := nestedMap(container, fields...)
 	if !ok {
 		return
 	}
 	for name, value := range resources {
 		switch name {
 		case "cpu":
 			addQuantity(&target.CPU, value, replicas)
 		case "memory":
 			addQuantity(&target.Memory, value, replicas)
 		case "nvidia.com/gpu", "requests.nvidia.com/gpu", "limits.nvidia.com/gpu":
 			target.GPU += parseIntegerResource(value) * replicas
 		case "nvidia.com/gpumem", "requests.nvidia.com/gpumem", "limits.nvidia.com/gpumem":
 			target.GPUMemoryMB += parseGPUMemoryResource(value) * replicas
 		}
 	}
 }
 func addQuantity(target *resource.Quantity, value interface{}, replicas int64) {
 	quantity, err := resource.ParseQuantity(fmt.Sprint(value))
 	if err != nil {
 		return
 	}
 	quantity.Mul(replicas)
 	target.Add(quantity)
 }
 func parseIntegerResource(value interface{}) int64 {
 	quantity, err := resource.ParseQuantity(fmt.Sprint(value))
 	if err != nil {
 		return 0
 	}
 	return quantity.Value()
 }
 func parseGPUMemoryResource(value interface{}) int64 {
 	normalized, err := normalizeGPUMemoryQuota(fmt.Sprint(value))
 	if err != nil || normalized == "" {
 		return 0
 	}
 	parsed, err := strconv.ParseInt(normalized, 10, 64)
 	if err != nil {
 		return 0
 	}
 	return parsed
 }
--- a/backend/internal/domain/service/quota_precheck_test.go
+++ b/backend/internal/domain/service/quota_precheck_test.go
@ -0,0 +1,241 @@
 package service
 import (
 	"errors"
 	"testing"
 	"github.com/ocdp/cluster-service/internal/domain/entity"
 	"github.com/ocdp/cluster-service/internal/domain/repository"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 )
 func TestCompareWorkspaceQuotaReportsExceededRequests(t *testing.T) {
 	t.Parallel()
 	workspace := &entity.Workspace{
 		QuotaCPU:    "2",
 		QuotaMemory: "4Gi",
 		QuotaGPU:    "1",
 		QuotaGPUMem: "10000",
 	}
 	estimate := &repository.ResourceEstimate{
 		Requests: repository.ResourceVector{
 			CPU:         resource.MustParse("2500m"),
 			Memory:      resource.MustParse("3Gi"),
 			GPU:         1,
 			GPUMemoryMB: 12000,
 		},
 	}
 	result, err := CompareWorkspaceQuota(workspace, estimate)
 	if !errors.Is(err, ErrQuotaExceeded) {
 		t.Fatalf("expected ErrQuotaExceeded, got %v", err)
 	}
 	if result == nil || result.Allowed {
 		t.Fatalf("expected denied result, got %#v", result)
 	}
 	if len(result.Exceeded) != 2 {
 		t.Fatalf("expected 2 exceeded resources, got %#v", result.Exceeded)
 	}
 	if result.Exceeded[0].Name != "requests.cpu" {
 		t.Fatalf("expected requests.cpu exceeded first, got %#v", result.Exceeded)
 	}
 	if result.Exceeded[1].Name != "requests.nvidia.com/gpumem" {
 		t.Fatalf("expected requests.nvidia.com/gpumem exceeded second, got %#v", result.Exceeded)
 	}
 }
 func TestCompareWorkspaceQuotaUsesLimitsAsEffectiveRequests(t *testing.T) {
 	t.Parallel()
 	workspace := &entity.Workspace{
 		QuotaGPU:    "0",
 		QuotaGPUMem: "9999",
 	}
 	estimate := &repository.ResourceEstimate{
 		Limits: repository.ResourceVector{
 			GPU:         1,
 			GPUMemoryMB: 10000,
 		},
 	}
 	result, err := CompareWorkspaceQuota(workspace, estimate)
 	if !errors.Is(err, ErrQuotaExceeded) {
 		t.Fatalf("expected ErrQuotaExceeded from limits-only GPU resources, got %v", err)
 	}
 	if result == nil || len(result.Exceeded) != 2 {
 		t.Fatalf("expected gpu and gpumem to be exceeded, got %#v", result)
 	}
 }
 func TestCompareBindingQuotaSubtractsCurrentReleaseFromUsedQuota(t *testing.T) {
 	t.Parallel()
 	binding := &entity.WorkspaceClusterBinding{
 		QuotaCPU:    "1",
 		QuotaMemory: "2Gi",
 		QuotaGPU:    "1",
 		QuotaGPUMem: "10000",
 	}
 	usage := &repository.ResourceQuotaUsage{
 		Used: repository.ResourceVector{
 			CPU:         resource.MustParse("1"),
 			Memory:      resource.MustParse("2Gi"),
 			GPU:         1,
 			GPUMemoryMB: 10000,
 		},
 	}
 	current := &repository.ResourceEstimate{
 		Requests: repository.ResourceVector{
 			CPU:         resource.MustParse("1"),
 			Memory:      resource.MustParse("2Gi"),
 			GPU:         1,
 			GPUMemoryMB: 10000,
 		},
 	}
 	targetSameSize := &repository.ResourceEstimate{
 		Requests: repository.ResourceVector{
 			CPU:         resource.MustParse("1"),
 			Memory:      resource.MustParse("2Gi"),
 			GPU:         1,
 			GPUMemoryMB: 10000,
 		},
 	}
 	result, err := CompareBindingQuota(binding, usage, targetSameSize, current)
 	if err != nil {
 		t.Fatalf("expected update with same resource footprint to fit quota, got %v", err)
 	}
 	if result.Required.Requests.GPU != 1 || result.Required.Requests.GPUMemoryMB != 10000 {
 		t.Fatalf("expected required resources to subtract current release before target, got %#v", result.Required.Requests)
 	}
 	targetScaledUp := &repository.ResourceEstimate{
 		Requests: repository.ResourceVector{
 			CPU:         resource.MustParse("2"),
 			Memory:      resource.MustParse("4Gi"),
 			GPU:         2,
 			GPUMemoryMB: 20000,
 		},
 	}
 	result, err = CompareBindingQuota(binding, usage, targetScaledUp, current)
 	if !errors.Is(err, ErrQuotaExceeded) {
 		t.Fatalf("expected scale-up beyond quota to be rejected, got %v", err)
 	}
 	if result == nil || result.Allowed {
 		t.Fatalf("expected denied quota result, got %#v", result)
 	}
 }
 func TestCompareBindingQuotaTreatsExplicitZeroGPUAsNoGPUAllowed(t *testing.T) {
 	t.Parallel()
 	binding := &entity.WorkspaceClusterBinding{
 		QuotaCPU:    "8",
 		QuotaMemory: "32Gi",
 		QuotaGPU:    "0",
 		QuotaGPUMem: "0",
 	}
 	vllmLikeEstimate := &repository.ResourceEstimate{
 		Requests: repository.ResourceVector{
 			CPU:         resource.MustParse("2"),
 			Memory:      resource.MustParse("8Gi"),
 			GPU:         1,
 			GPUMemoryMB: 10000,
 		},
 	}
 	result, err := CompareBindingQuota(binding, &repository.ResourceQuotaUsage{}, vllmLikeEstimate, nil)
 	if !errors.Is(err, ErrQuotaExceeded) {
 		t.Fatalf("expected GPU request to exceed explicit zero quota, got %v", err)
 	}
 	exceeded := map[string]bool{}
 	for _, item := range result.Exceeded {
 		exceeded[item.Name] = true
 	}
 	for _, name := range []string{"requests.nvidia.com/gpu", "requests.nvidia.com/gpumem"} {
 		if !exceeded[name] {
 			t.Fatalf("expected %s to be exceeded, got %#v", name, result.Exceeded)
 		}
 	}
 }
 func TestBindingQuotaHardKeepsGPUMemoryAsIntegerMB(t *testing.T) {
 	t.Parallel()
 	hard := bindingQuotaHard(&entity.WorkspaceClusterBinding{QuotaGPU: "1", QuotaGPUMem: "10000"})
 	gpuMem := hard[corev1.ResourceName("requests.nvidia.com/gpumem")]
 	if gpuMem.Value() != 10000 {
 		t.Fatalf("expected gpumem quota to remain integer MB 10000, got %s value=%d", gpuMem.String(), gpuMem.Value())
 	}
 }
 func TestEstimateRenderedManifestResourcesSumsPodTemplates(t *testing.T) {
 	t.Parallel()
 	manifest := `
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: gpu-worker
 spec:
  replicas: 3
  template:
    spec:
      initContainers:
      - name: init
        image: busybox
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
      containers:
      - name: app
        image: busybox
        resources:
          requests:
            cpu: 500m
            memory: 1Gi
            nvidia.com/gpu: "1"
            nvidia.com/gpumem: "10000"
          limits:
            cpu: "1"
            memory: 2Gi
            nvidia.com/gpu: "1"
            nvidia.com/gpumem: "12000"
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: ignored
 `
 	estimate, err := EstimateRenderedManifestResources(manifest)
 	if err != nil {
 		t.Fatalf("EstimateRenderedManifestResources returned error: %v", err)
 	}
 	if estimate.Requests.CPU.Cmp(resource.MustParse("1800m")) != 0 {
 		t.Fatalf("expected requests cpu 1800m, got %s", estimate.Requests.CPU.String())
 	}
 	if estimate.Requests.Memory.Cmp(resource.MustParse("3456Mi")) != 0 {
 		t.Fatalf("expected requests memory 3456Mi, got %s", estimate.Requests.Memory.String())
 	}
 	if estimate.Requests.GPU != 3 {
 		t.Fatalf("expected requests gpu 3, got %d", estimate.Requests.GPU)
 	}
 	if estimate.Requests.GPUMemoryMB != 30000 {
 		t.Fatalf("expected requests gpumem 30000, got %d", estimate.Requests.GPUMemoryMB)
 	}
 	if estimate.Limits.CPU.Cmp(resource.MustParse("3")) != 0 {
 		t.Fatalf("expected limits cpu 3, got %s", estimate.Limits.CPU.String())
 	}
 	if estimate.Limits.Memory.Cmp(resource.MustParse("6Gi")) != 0 {
 		t.Fatalf("expected limits memory 6Gi, got %s", estimate.Limits.Memory.String())
 	}
 	if estimate.Limits.GPU != 3 {
 		t.Fatalf("expected limits gpu 3, got %d", estimate.Limits.GPU)
 	}
 	if estimate.Limits.GPUMemoryMB != 36000 {
 		t.Fatalf("expected limits gpumem 36000, got %d", estimate.Limits.GPUMemoryMB)
 	}
 }
--- a/backend/internal/domain/service/quota_quantity.go
+++ b/backend/internal/domain/service/quota_quantity.go
@ -9,6 +9,10 @@ import (
 func normalizeStandardQuotaQuantity(value string) string {
 	value = strings.TrimSpace(value)
 	switch strings.ToLower(value) {
 	case "unlimited", "none", "no-limit", "nolimit":
 		return ""
 	}
 	upper := strings.ToUpper(value)
 	switch {
 	case strings.HasSuffix(upper, "MB"):
--- a/backend/internal/domain/service/workspace_service.go
+++ b/backend/internal/domain/service/workspace_service.go
@ -3,6 +3,7 @@ package service
 import (
 	"context"
 	"sort"
 	"strings"
 	"time"
 	"github.com/google/uuid"
@ -94,17 +95,17 @@ func (s *WorkspaceService) EnsureClusterBinding(ctx context.Context, workspaceID
 		ClusterID:      cluster.ID,
 		Namespace:      workspace.K8sNamespace,
 		ServiceAccount: workspace.K8sSAName,
-		QuotaCPU:       workspace.QuotaCPU,
+		QuotaCPU:       strings.TrimSpace(workspace.QuotaCPU),
-		QuotaMemory:    workspace.QuotaMemory,
+		QuotaMemory:    strings.TrimSpace(workspace.QuotaMemory),
-		QuotaGPU:       workspace.QuotaGPU,
+		QuotaGPU:       zeroIfEmptyQuota(workspace.QuotaGPU),
-		QuotaGPUMem:    workspace.QuotaGPUMem,
+		QuotaGPUMem:    zeroIfEmptyQuota(workspace.QuotaGPUMem),
 		Status:         "active",
 		CreatedAt:      time.Now(),
 		UpdatedAt:      time.Now(),
 	}
 	tenantBinding := entity.NewTenantBinding(binding.Namespace)
 	tenantBinding.ServiceAccountName = binding.ServiceAccount
-	tenantBinding.ResourceQuotaHard = resourceQuotaHard(workspace)
+	tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding)
 	if s.tenantClient != nil {
 		if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil {
 			return nil, err
@ -145,10 +146,22 @@ func (s *WorkspaceService) IssueKubeconfig(ctx context.Context, workspaceID, clu
 		if err != nil {
 			return nil, err
 		}
 	} else {
 		binding.QuotaCPU = strings.TrimSpace(workspace.QuotaCPU)
 		binding.QuotaMemory = strings.TrimSpace(workspace.QuotaMemory)
 		binding.QuotaGPU = zeroIfEmptyQuota(workspace.QuotaGPU)
 		binding.QuotaGPUMem = zeroIfEmptyQuota(workspace.QuotaGPUMem)
 		binding.UpdatedAt = time.Now()
 	}
 	tenantBinding := entity.NewTenantBinding(binding.Namespace)
 	tenantBinding.ServiceAccountName = binding.ServiceAccount
-	tenantBinding.ResourceQuotaHard = resourceQuotaHard(workspace)
+	tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding)
 	if s.tenantClient != nil {
 		if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil {
 			return nil, err
 		}
 	}
 	_ = s.bindingRepo.Upsert(ctx, binding)
 	kubeconfig, err := s.tenantClient.IssueKubeconfig(ctx, cluster, tenantBinding, ttl)
 	if err != nil {
 		return nil, err
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -28,6 +28,7 @@ services:
      - "${POSTGRES_PORT:-15432}:5432"
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./backend/scripts/init-db.sql:/docker-entrypoint-initdb.d/01-init.sql:ro
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres} -d ${POSTGRES_DB:-ocdp}"]
      interval: 10s
@ -97,6 +98,7 @@ services:
      sh -c "
        set -eux;
        npm ci;
        rm -rf node_modules/.tmp;
        npm run build;
        mkdir -p /build;
        rm -rf /build/*;
@ -117,6 +119,7 @@ services:
  nginx:
    image: nginx:1.27-alpine
    container_name: ocdp-nginx
    restart: unless-stopped
    depends_on:
      frontend-build:
        condition: service_completed_successfully
--- a/docs/UNRESOLVED-BUGS.md
+++ b/docs/UNRESOLVED-BUGS.md
@ -0,0 +1,74 @@
 # OCDP 未修复问题清单
 **最后更新:** 2026-05-14 (Round 3 回归测试)
 **测试覆盖:** 3 轮测试 (Round 1: v1 基线, Round 2: 配额+YAML, Round 3: 回归+新功能)
 ---
 ## 已知未修复 Bug (15 个)
 ### P1 — 高优先级 (1)
 | # | 标题 | 严重度 | 描述 | Round |
 |----|------|--------|------|-------|
 | 1 | Detail API 返回 replicas: 0 | **P1** | `GET /instances/{id}` 始终返回 `replicas: 0`，与 List API 不一致 | R3 NEW |
 ### P2 — 中优先级 (8)
 | # | 标题 | 严重度 | 描述 | Round |
 |----|------|--------|------|-------|
 | 2 | List API 移除 values 字段 | **P2** | List instances 不再返回 `values`，仅在详情API返回。可能是性能优化，但属于 API 行为变更 | R3 NEW |
 | 3 | API 层无配额预检查 | **P2** | 后端接受所有部署请求（返回200），不验证是否超配额。K8s ResourceQuota 在 pod 级阻止，但 Helm release 仍创建 | R2 |
 | 4 | Values 冲突时无警告 | **P2** | 同时提供 `values` JSON 和 `valuesYaml` 时，JSON 静默覆盖 YAML，无任何警告 | R2 |
 | 5 | Tags 端点缺失 | **P2** | `GET /registries/{id}/repositories/{repo}/tags` 返回 404 | R1 |
 | 6 | Metrics API 缺失 | **P2** | `GET /monitoring/clusters/{id}/metrics` 返回 404 | R1 |
 | 7 | Stats API 缺失 | **P2** | `GET /clusters/{id}/stats` 返回 404 | R1 |
 | 8 | Kubeconfig API 缺失 | **P2** | `GET /clusters/{id}/kubeconfig` 返回 404 | R1 |
 | 9 | Namespace 静默覆盖 + HTTP 200 | **P2** | 用户部署到他人的 namespace 时，API 返回 201 但 namespace 被静默改为自己的。应返回 403 | R1 |
 ### P3 — 低优先级 (6)
 | # | 标题 | 严重度 | 描述 | Round |
 |----|------|--------|------|-------|
 | 10 | 用户枚举漏洞 | **P3** | 不存在用户 "user not found" vs 存在用户 "invalid password"，错误消息不同 | R1 |
 | 11 | 无登录速率限制 | **P3** | 10 次快速失败全部返回 401，无 429 或锁定 | R1 |
 | 12 | Nginx 版本泄露 | **P3** | `Server: nginx/1.27.5` 响应头暴露精确版本 | R1 |
 | 13 | CORS: * | **P3** | `Access-Control-Allow-Origin: *` 允许任意跨域 | R1 |
 | 14 | 缺少安全响应头 | **P3** | 无 HSTS, X-Frame-Options, CSP, X-Content-Type-Options | R1 |
 | 15 | `/health` 端点返回 SPA HTML | **P3** | 健康检查返回 index.html 而非 `{"status":"ok"}` | R1 |
 ---
 ## 已修复 (Round 3 验证通过)
 | 原 Bug ID | 描述 | 修复后行为 |
 |-----------|------|-----------|
 | BUG-001 | Launch 按钮无反应 (P0) | ✅ 部署端到端正常 |
 | BUG-002 | SPA 旧路由空白页 (P0) | ✅ 所有旧路由返回 SPA |
 | BUG-003 | DELETE 返回 404 (P1) | ✅ 返回 HTTP 204 |
 | BUG-004 | DELETE 空响应体 (P1) | ✅ HTTP 204 No Content |
 | — | InstanceCard 无 scaling UI | ✅ +/- 按钮 + K8s API |
 | — | ModifyModal values 为空 | ✅ Full Helm values + diff |
 | — | Per-card Refresh button | ✅ 移除，改为 page-level |
 ---
 ## 修复优先级排序
 ```
 立即修复 (P1):
  1. Detail API replicas=0  →  从 K8s live state 同步
 短期修复 (P2):
  2. API 层配额预检查  →  POST instances 时验证
  3. Values 冲突警告  →  两者同时提供时返回 warning
  4. Namespace 拒绝而非覆盖  →  返回 403
  5. 缺失端点实现 (tags/stats/metrics/kubeconfig)
 安全加固 (P3):
  6. 登录错误消息统一 → "Invalid username or password"
  7. 速率限制 → max 5/min per IP
  8. Nginx: server_tokens off + 安全头
  9. CORS 收紧 → 具体域名
  10. /health → JSON 响应
 ```
--- a/docs/bug-report.md
+++ b/docs/bug-report.md
@ -1,164 +0,0 @@
 # OCDP 系统测试 Bug 报告
 **测试日期:** 2026-05-11
 **测试环境:** http://10.6.80.114:18080
 **集群:** k3s (dbf824f1-9962-4d8e-881e-870c75fdb6f5), k8s (23880994-dfe4-48d0-abc0-b49692cc630a)
 **Harbor:** harbor.bwgdi.com (83b823af-873b-457c-912c-9ccde3cb12e6)
 ---
 ## 测试团队
 | Agent | 角色 | 账号 |
 |-------|------|------|
 | user-a-agent | 前端 UI 测试 | test-user-a / TestUserA123! |
 | user-b-agent | API/部署测试 | test-user-b / TestUserB123! |
 | user-c-agent | 权限隔离测试 | test-user-c / TestUserC123! |
 | security-agent | 安全测试 | admin + 普通用户 |
 ---
 ## Bug 列表 (按严重度排序)
 ### P0 - Blocker (核心功能不可用)
 | ID | 标题 | 发现者 | 页面/端点 | 描述 |
 |----|------|--------|-----------|------|
 | BUG-001 | **Launch 按钮点击无任何反应** | user-a | `/artifact/registries` (TagCard) | Chart Browser 中 TagCard 的 "Launch" 按钮显示为可用状态 (`is_enabled() == True`)，但点击后无任何效果：不弹出 Launch Modal，无 URL 变化，无控制台错误。**核心"一键部署"流程完全阻塞** |
 | BUG-002 | **SPA 直接路由返回空白页面** | user-a | `/clusters`, `/registries`, `/monitoring`, `/launch` | 直接访问 SPA 旧路由时只渲染 `<div id="root">` 空壳，React SPA 无法挂载。代码中已定义 redirect 映射但未生效（如 `/clusters` → `/configuration/clusters`） |
 ### P1 - 高 (High)
 | ID | 标题 | 发现者 | 页面/端点 | 描述 |
 |----|------|--------|-----------|------|
 | BUG-003 | DELETE 实例返回 404 但实际成功删除 | user-b, user-c | `DELETE /clusters/{id}/instances/{id}` | 删除操作正确触发 `pending-delete` 状态转换，但 HTTP 返回 **404**（空 body），非预期 202/204。客户端误判为失败 |
 | BUG-004 | DELETE 实例返回空响应体 | user-b | `DELETE /clusters/{id}/instances/{id}` | 用正确的 token 和 ID 请求，返回空 body（无 JSON），前端解析会失败 |
 ### P2 - 中 (Medium)
 | ID | 标题 | 发现者 | 页面/端点 | 描述 |
 |----|------|--------|-----------|------|
 | BUG-005 | Tags 专用端点缺失 | user-b | `GET /registries/{id}/repositories/{repo}/tags` | 端点未实现，返回纯文本 "404 page not found"。虽可通过 `/artifacts` 获取 tag，但 API 不完整 |
 | BUG-006 | 跨用户 namespace 部署时静默覆盖 | user-c | `POST /clusters/{id}/instances` | 用户请求部署到其他用户的 namespace 时，服务端静默使用自己的 namespace，返回 200 且无任何警告或提示 |
 | BUG-007 | Clusters Metrics API 缺失 | user-b | `GET /monitoring/clusters/{id}/metrics` | 监控页面可能需要的数据端点未实现（404） |
 | BUG-008 | Cluster Stats API 缺失 | user-b | `GET /clusters/{id}/stats` | 统计端点未实现（404） |
 | BUG-009 | Kubeconfig API 缺失 | user-b | `GET /clusters/{id}/kubeconfig` | kubeconfig 签发端点未实现（404） |
 | BUG-010 | "Launch" 按钮缺乏可访问性标识 | user-a | TagCard "Launch" | Chart 上的 "Launch" 按钮无 `aria-label`，与侧边栏 "Launch Instance" 导航项标签冲突，屏幕阅读器用户无法区分 |
 ### P3 - 低 (Low)
 | ID | 标题 | 发现者 | 页面/端点 | 描述 |
 |----|------|--------|-----------|------|
 | BUG-011 | API 响应格式不一致 | user-b | 列表 API | Clusters/Registries 返回裸数组，Instances 返回 `{ "instances": [...], "total": N }` 包装对象 |
 | BUG-012 | `/auth/me` 返回空的 token 字段 | user-b | `GET /auth/me` | 响应中包含 `"accessToken": ""` 和 `"refreshToken": ""` 空字段，复用了 login 响应 DTO 未清理 |
 | BUG-013 | 登录接口存在用户枚举漏洞 | security | `POST /auth/login` | 不存在用户返回 "user not found"，存在用户返回 "invalid password"，攻击者可枚举有效用户名 |
 | BUG-014 | 登录接口无速率限制 | security | `POST /auth/login` | 10 次连续请求全部返回 401，无 429 限流或锁定 |
 | BUG-015 | Nginx 版本信息泄露 | security | HTTP Headers | `Server: nginx/1.27.5` 暴露精确版本号 |
 | BUG-016 | CORS 配置过于宽松 | security | All API | `Access-Control-Allow-Origin: *` 允许任意跨域请求 |
 | BUG-017 | 缺少安全响应头 | security | All pages | 缺少 HSTS、X-Frame-Options、Content-Security-Policy 等 |
 | BUG-018 | `/health` 端点返回 SPA HTML | security | `GET /health` | 健康检查返回完整 index.html，非 JSON 状态响应 |
 ---
 ## 分类汇总
 ### 前端 Bug
 | ID | 描述 | 严重度 |
 |----|------|--------|
 | BUG-001 | Launch 按钮无反应（核心功能阻塞） | P0 🔴 |
 | BUG-002 | SPA 路由空白页 | P0 🔴 |
 | BUG-010 | Launch 按钮缺少 aria-label | P2 🟡 |
 ### 后端 API Bug
 | ID | 描述 | 严重度 |
 |----|------|--------|
 | BUG-003 | DELETE 返回 404 | P1 🟠 |
 | BUG-004 | DELETE 空 body | P1 🟠 |
 | BUG-005 | Tags 端点缺失 | P2 🟡 |
 | BUG-007 | Metrics API 缺失 | P2 🟡 |
 | BUG-008 | Stats API 缺失 | P2 🟡 |
 | BUG-009 | Kubeconfig API 缺失 | P2 🟡 |
 | BUG-011 | 响应格式不一致 | P3 🔵 |
 | BUG-012 | auth/me 空 token 字段 | P3 🔵 |
 | BUG-018 | /health 返回 HTML | P3 🔵 |
 ### 安全/权限 Bug
 | ID | 描述 | 严重度 |
 |----|------|--------|
 | BUG-006 | Namespace 静默覆盖（安全但令人困惑） | P2 🟡 |
 | BUG-013 | 用户枚举（错误消息差异） | P3 🔵 |
 | BUG-014 | 无速率限制 | P3 🔵 |
 | BUG-015 | Nginx 版本泄露 | P3 🔵 |
 | BUG-016 | CORS Origin: * | P3 🔵 |
 | BUG-017 | 缺少安全响应头 | P3 🔵 |
 ### 严重度分布
 | 级别 | 数量 |
 |------|------|
 | P0 (Blocker) | 2 |
 | P1 (High) | 2 |
 | P2 (Medium) | 6 |
 | P3 (Low) | 8 |
 | **合计** | **18** |
 ---
 ## 测试通过项
 ### 认证
 - [x] 有效凭据登录 (admin + 所有 test-user)
 - [x] 无效凭据返回 401
 - [x] 无 token 访问被保护 API 返回 401
 - [x] 无效/篡改 JWT token 全部被拒绝
 - [x] /auth/me 返回正确的用户信息
 - [x] JWT payload 包含角色、权限、namespace
 ### Cluster / Registry API
 - [x] 集群列表正常返回
 - [x] 集群健康检查正常
 - [x] Registry 列表正常返回
 - [x] 通过 artifacts 端点浏览 repository 正常
 - [x] 无效 registry/repository 返回恰当错误
 ### 权限隔离
 - [x] GET /users 返回 403 (普通用户)
 - [x] POST /auth/register 返回 403 (普通用户)
 - [x] 用户无法访问其他用户的 workspace 资源
 - [x] 用户无法部署到其他用户的 Kubernetes namespace
 - [x] 安全架构：核心认证/授权/脱敏/隔离控制均正确实现
 ### 实例部署生命周期
 - [x] 实例创建操作成功（pending-install）
 - [x] 实例状态正确追踪（pending-install → deployed）
 - [x] 实例删除正确转换状态（pending-delete → 消失）
 - [x] 实例列表按 clusterId 正确过滤
 ### 安全测试通过项
 - [x] XSS/SQLi 注入安全处理
 - [x] 路径遍历攻击被阻止
 - [x] JWT alg=none/无效格式被拒绝
 - [x] 集群凭据和 Registry 密码脱敏显示 (••••••••)
 - [x] 自注册端点需认证 (401)
 ---
 ## 建议修复优先级
 ### 立即修复 (P0)
 1. **BUG-001**: 调查 Launch 按钮 onClick handler — TagCard 组件中 `onLaunch` prop 未正确传递给 LaunchModal，或 launch 状态 / artifactType 检查阻止了 modal 打开
 2. **BUG-002**: 检查 React Router `<Navigate redirect>` 组件和 SPA 的 index.html 配置，确保旧路由正确重定向
 ### 尽快修复 (P1)
 3. **BUG-003/004**: InstanceHandler.Delete 应返回 202 Accepted + `{"status":"deleting"}` 而非 404+空 body
 ### 短期修复 (P2)
 4. 实现 `/metrics`, `/stats` 等缺失 API
 5. Launch 按钮添加 `aria-label` 属性
 6. Namespace 覆盖时返回警告或 403
 ### 安全加固 (P3)
 7. 登录错误消息统一为 "Invalid username or password"
 8. 实现速率限制
 9. Nginx 安全加固：`server_tokens off` + 安全响应头
 10. CORS 收紧为具体域名
 11. 修复 `/health` 端点
 12. 统一 API 响应格式
--- a/docs/bugs-user-a.md
+++ b/docs/bugs-user-a.md
@ -1,92 +0,0 @@
 # OCDP Platform QA Report - test-user-a
 **Date:** 2026-05-11
 **Environment:** http://10.6.80.114:18080
 **User:** test-user-a (non-admin)
 ## Summary
 - **Total Bugs Found:** 3
 - **Screenshots Taken:** 12
 - **Test Status:** 7/8 areas covered, 1 blocked (Launch button non-functional)
 ---
 ## Bug List
 ### Bug #1: Direct SPA Routes Return Empty Pages (🔴 HIGH)
 - **Page:** Multiple — `/clusters`, `/registries`, `/monitoring`, `/launch`
 - **Action:** Navigate directly to these URLs
 - **Actual:** Returns only the React `<div id="root">` shell with no rendered content (~0 chars body text). The SPA fails to mount when hitting these routes directly.
 - **Expected:** Should either render content or redirect to correct working routes:
  - `/clusters` → `/configuration/clusters`
  - `/registries` → `/configuration/registries`
  - `/monitoring` → `/monitoring/clusters`
  - `/launch` → `/artifact/registries`
 - **Severity:** HIGH — Users who bookmark or type these URLs see blank pages
 - **Screenshot:** `01-login` (representative of empty state)
 **Working routes for reference:**
 - `/configuration/clusters` ✅
 - `/configuration/registries` ✅
 - `/monitoring/clusters` ✅
 - `/artifact/registries` ✅
 - `/artifact/instances` ✅
 ---
 ### Bug #2: Launch Button Does Nothing When Clicked (🔴 HIGH)
 - **Page:** Chart Browser (`/artifact/registries`)
 - **Action:**
  1. Navigate to `/artifact/registries`
  2. Registry `harbor-bwgdi` loads with 13 charts
  3. Expand `charts/chromadb` folder
  4. Tag `0.1.4` appears with "Launch" and "Copy" buttons
  5. Click the "Launch" button
 - **Actual:** No visible reaction — no modal opens, no URL change, no console error. The button is not disabled (no `disabled` attribute, no `aria-disabled`), is visibly styled as active (`bg-blue-50 text-blue-700 border-blue-200 shadow-sm`), and Playwright confirms `is_enabled() == True`. The React onClick handler produces no observable effect.
 - **Expected:** Clicking "Launch" on a chart tag should open a deployment form/dialog with cluster selector, instance name, namespace, and values configuration fields.
 - **Severity:** HIGH — Core platform feature (deploying Helm charts) is completely blocked
 - **Screenshot:** `04-chart-expanded`
 ---
 ### Bug #3: Ambiguous "Launch" Button Labels (🟡 MEDIUM)
 - **Page:** Chart Browser (`/artifact/registries`)
 - **Action:** Inspect button accessible names
 - **Actual:** Both the sidebar navigation item "Launch Instance" and the chart action button "Launch" appear on the same page. The chart action button has no distinguishing `aria-label` or accessible description. The "Copy" button next to it has a `title="Copy pull command"` attribute, but "Launch" does not.
 - **Expected:** The chart action should have a descriptive label like `aria-label="Launch chart chromadb version 0.1.4"` to differentiate from the nav item.
 - **Severity:** MEDIUM — Accessibility concern; minor confusion for sighted users with multiple "Launch" targets
 ---
 ## Test Results by Area
 | Area | Status | Notes |
 |------|--------|-------|
 | Login | ✅ PASS | test-user-a login successful, redirect to `/home` |
 | Home Page | ✅ PASS | All cards visible, nav clicks work, no Users section |
 | Sidebar Nav | ✅ PASS | All 6 items navigate correctly, Users hidden |
 | Chart Browser | ❌ BLOCKED | Registry loads, charts expand, but **Launch button dead** |
 | Instances | ✅ PASS | Empty state, filter, refresh all work |
 | Monitoring | ✅ PASS | 2 clusters, health data, CPU/Memory/GPU stats all load |
 | Config - Clusters | ✅ PASS | Both clusters listed, Add form opens |
 | Config - Registries | ✅ PASS | Harbor registry listed, Add form opens |
 | Direct Routes | ❌ FAIL | 4 routes return empty pages |
 ## Screenshots
 - `01-login` → `/tmp/ocdp-qa-screenshots/01-login.png`
 - `02-home` → `/tmp/ocdp-qa-screenshots/02-home.png`
 - `02-home-full` → `/tmp/ocdp-qa-screenshots/02-home-full.png`
 - `04-chart-browser` → `/tmp/ocdp-qa-screenshots/04-chart-browser.png`
 - `04-chart-expanded` → `/tmp/ocdp-qa-screenshots/04-chart-expanded.png`
 - `04-launch-modal` → `/tmp/ocdp-qa-screenshots/04-launch-modal.png`
 - `05-instances` → `/tmp/ocdp-qa-screenshots/05-instances.png`
 - `06-monitoring` → `/tmp/ocdp-qa-screenshots/06-monitoring.png`
 - `07-clusters` → `/tmp/ocdp-qa-screenshots/07-clusters.png`
 - `07-add-cluster-form` → `/tmp/ocdp-qa-screenshots/07-add-cluster-form.png`
 - `08-registries` → `/tmp/ocdp-qa-screenshots/08-registries.png`
 - `08-add-registry-form` → `/tmp/ocdp-qa-screenshots/08-add-registry-form.png`
--- a/docs/bugs-user-b.md
+++ b/docs/bugs-user-b.md
@ -1,149 +0,0 @@
 # Bug Report: test-user-b QA Test
 **Tester:** test-user-b (user role)
 **Date:** 2026-05-11
 **Environment:** http://10.6.80.114:18080
 ---
 ## Bug 1: Repository Tags Endpoint Returns 404
 **Endpoint:** `GET /api/v1/registries/{registryId}/repositories/{repository}/tags`
 **Status Code:** 404
 **Response Body:** `404 page not found` (plain text, not JSON)
 **Expected:** Should return a list of tags for the chart/artifact.
 **Actual:** The dedicated tags endpoint is not implemented or routes incorrectly. The artifacts endpoint (`/repositories/{repository}/artifacts`) does work and returns tag info.
 **Severity:** Medium — tags are still discoverable via artifacts endpoint but the dedicated tags API is broken.
 ---
 ## Bug 2: DELETE Instance Returns Empty Response Body
 **Endpoint:** `DELETE /api/v1/clusters/{clusterId}/instances/{instanceId}`
 **Status Code:** 200
 **Response Body:** (empty — no content at all)
 **Expected:** Should return a confirmation JSON body (e.g., `{"message": "Instance deletion initiated", "id": "..."}`) or at minimum a 202 Accepted with status details.
 **Actual:** Returns a completely empty body. The instance does transition to `pending-delete` state, but the API consumer receives no feedback.
 **Severity:** Medium — operation works but API consumer gets no confirmation.
 ---
 ## Bug 3: Cluster Stats Endpoint Returns 404
 **Endpoint:** `GET /api/v1/clusters/{clusterId}/stats`
 **Status Code:** 404
 **Response Body:** `404 page not found` (plain text)
 **Expected:** Should return cluster resource statistics (CPU, memory, pod counts, etc.) or a proper JSON error if not implemented.
 **Actual:** Endpoint is not implemented — returns a raw 404 with no JSON error structure.
 **Severity:** Low — but given the user has `monitoring:clusters:view` permission, this is a missing feature.
 ---
 ## Bug 4: Kubeconfig Endpoint Returns 404
 **Endpoint:** `GET /api/v1/clusters/{clusterId}/kubeconfig`
 **Status Code:** 404
 **Response Body:** `404 page not found` (plain text)
 **Expected:** Should return a kubeconfig file content or JSON error. User has `kubeconfig:issue:own` permission.
 **Actual:** Endpoint is not implemented.
 **Severity:** Low — the permission exists but the endpoint does nothing.
 ---
 ## Bug 5: Monitoring Metrics Endpoint Returns 404
 **Endpoint:** `GET /api/v1/monitoring/clusters/{clusterId}/metrics`
 **Status Code:** 404
 **Response Body:** `404 page not found` (plain text)
 **Expected:** Monitoring metrics data. User has `monitoring:clusters:view` permission.
 **Actual:** Endpoint not found.
 **Severity:** Low — monitoring permissions exist but backend endpoints missing.
 ---
 ## Bug 6: Inconsistent API Response Format (Array vs Object Wrapper)
 **Clusters and Registries** return bare arrays:
 ```json
 [
    { "id": "...", "name": "k3s", ... }
 ]
 ```
 **Instances** returns an object wrapper:
 ```json
 {
    "instances": [
        { "id": "...", "name": "test-nginx-b", ... }
    ],
    "total": 1
 }
 ```
 **Expected:** Consistent response format across all list endpoints. Either all return bare arrays or all use the `{ "items": [...], "total": N }` wrapper pattern.
 **Severity:** Low — API consistency issue. Makes client code harder to write generically.
 ---
 ## Bug 7: auth/me Returns Empty Token Fields
 **Endpoint:** `GET /api/v1/auth/me`
 **Response includes empty/unpopulated fields:**
 ```json
 {
    "accessToken": "",
    "refreshToken": "",
    ...
 }
 ```
 **Expected:** Either remove these fields from the `/auth/me` response (they are only meaningful in login/refresh responses) or populate them with valid values.
 **Actual:** Emptry string values for both token fields create confusion about whether they should be present.
 **Severity:** Low — cosmetic issue, but suggests the DTO is reusing the login response struct without clearing token fields.
 ---
 ## Summary
 | # | Bug | Severity | Category |
 |---|-----|----------|----------|
 | 1 | Tags endpoint 404 | Medium | Missing Implementation |
 | 2 | DELETE returns empty body | Medium | API Response Quality |
 | 3 | Cluster stats endpoint 404 | Low | Missing Implementation |
 | 4 | Kubeconfig endpoint 404 | Low | Missing Implementation |
 | 5 | Monitoring metrics endpoint 404 | Low | Missing Implementation |
 | 6 | Inconsistent list response format | Low | API Consistency |
 | 7 | auth/me returns empty tokens | Low | API Response Quality |
 **Passed Tests:**
 - Login/authentication ✓
 - Auth/me user info ✓
 - Cluster listing ✓
 - Cluster health check ✓
 - Registry listing ✓
 - Repository browsing (artifacts) ✓
 - Instance deployment (nginx chart) ✓
 - Instance status tracking (pending-install → deployed) ✓
 - Instance deletion (async, transitions to pending-delete then removed) ✓
 - Error handling for invalid repository ✓
 - Error handling for missing required fields ✓
 - Auth rejects invalid tokens ✓
 - Auth rejects missing tokens ✓
 - Instance cleanup confirmed ✓
--- a/docs/bugs-user-c.md
+++ b/docs/bugs-user-c.md
@ -1,109 +0,0 @@
 # QA Report: Permission Isolation & Multi-Tenancy Testing — test-user-c
 **Tester:** test-user-c (role: `user`)
 **Date:** 2026-05-11
 **Environment:** http://10.6.80.114:18080
 ## Summary
 Test-user-c is a standard `user` role with namespace `ocdp-u-test-c`, workspace `71459030-7166-4c79-b53c-81c61da4c313`. Permissions follow the `manage_own` / `view` pattern — no admin-level permissions.
 ---
 ## Test Results
 ### 1. Login & Basic Access ✅
 | Test | Result | Notes |
 |------|--------|-------|
 | POST /auth/login | ✅ Pass | Token issued, role=`user`, workspace/namespace correctly assigned |
 | GET /auth/me | ✅ Pass | Returns correct user profile with permissions |
 | GET /clusters | ✅ Pass | Sees all `global_shared` clusters (k8s, k3s) |
 | GET /registries | ✅ Pass | Sees all `global_shared` registries (harbor) |
 ### 2. Admin Endpoint Protection
 | Test | Result | Notes |
 |------|--------|-------|
 | GET /api/v1/users | ✅ **403 Forbidden** | Properly blocked — `permission denied` |
 | POST /auth/register | ✅ **403 Forbidden** | Cannot register new users as non-admin |
 | GET /api/v1/admin/* | ✅ **404** | Admin route prefix doesn't exist (not a bypass risk) |
 ### 3. Frontend Access
 | Test | Result | Notes |
 |------|--------|-------|
 | GET /configuration/users | ⚠️ **200 (OK)** | SPA returns index.html — expected. Auth is enforced via API, not routes. |
 | GET /configuration/clusters | ⚠️ **200 (OK)** | Same — SPA behavior. |
 | GET /configuration/registries | ⚠️ **200 (OK)** | Same. |
 **Risk: Low.** This is standard SPA behavior. Authorization is enforced at the API level. However, if the frontend relies solely on hiding UI elements rather than checking permissions, users who manually navigate could see empty/error states.
 ### 4. Namespace Isolation Enforcement
 | Test | Result | Notes |
 |------|--------|-------|
 | Deploy with `namespace: ocdp-u-test-a` | ⚠️ **Silently overridden** | Server ignored requested namespace and used `ocdp-u-test-c` instead. **No warning or error returned.** |
 | PATCH to change namespace | ✅ **404** | PATCH endpoint doesn't exist — namespace cannot be changed after creation |
 🔴 **Bug: Silent namespace override (Low severity)**
 When a user specifies a namespace that doesn't belong to them in the instance creation request, the server silently overrides it with the user's own namespace. This is secure (prevents cross-namespace deployment) but:
 - The user receives HTTP 200 with the overridden value — no indication that their request was modified
 - The response does not differentiate between "user's own namespace" and "requested namespace"
 - This could lead to user confusion about where their resources were actually deployed
 - It's unclear whether the user's Helm values also get silently overridden (e.g., the `values.namespace` field)
 ### 5. Resource Isolation
 | Test | Result | Notes |
 |------|--------|-------|
 | GET instances with other workspaceId query param | ✅ **Isolated** | Returns only own instances (workspaceId filter is server-enforced) |
 | DELETE on own instance | ⚠️ **Async deletion** | Returns HTTP 404 on DELETE itself, but instance transitions to `pending-delete` then disappears |
 🔴 **Bug: DELETE returns 404 on success (Medium severity)**
 When deleting an instance via `DELETE /clusters/{clusterId}/instances/{instanceId}`:
 - The instance transitions to `pending-delete` status
 - But the HTTP response status code is **404** rather than 200/202/204
 - The first raw DELETE call returns an empty body (causing JSON parse errors)
 - This is an API inconsistency — async deletions should return HTTP 202 Accepted
 ### 6. Monitoring & Other Endpoints
 | Test | Result | Notes |
 |------|--------|-------|
 | GET /monitoring/clusters/.../pods | ✅ **404** | Monitoring endpoints not implemented for this cluster type |
 | POST /kubeconfig | ✅ **404** | Kubeconfig endpoint not implemented |
 These endpoints return 404 which is acceptable behavior for features not yet implemented.
 ---
 ## Security Assessment
 ### Works as Intended ✅
 - Admin endpoints (`/users`, `/auth/register`) properly return 403
 - User cannot access other users' instances via workspaceId manipulation
 - User cannot deploy into other users' Kubernetes namespaces
 - No PATCH/PUT verbs available to modify existing instance namespaces
 - No admin-specific route paths leak data
 ### Bugs Found
 1. **DELETE returns 404 on successful async deletion** (Medium)
   - Endpoint: `DELETE /clusters/{id}/instances/{id}`
   - After call, instance status becomes `pending-delete` and eventually disappears
   - But the HTTP response is `404` with empty body
   - Expected: `202 Accepted` with a `status: "deleted"` or similar response
   - Risk: Clients interpreting HTTP 404 as "not found" will retry or report errors incorrectly
 2. **Silent namespace override without user feedback** (Low)
   - Endpoint: `POST /clusters/{id}/instances`
   - When requesting deployment into another user's namespace, the server silently uses the caller's namespace
   - No warning, no error, no indication in the response
   - Expected: Either `403 Forbidden` with "cannot deploy into namespace owned by another user" or a response field indicating the override occurred
   - Risk: Low for security (the override correctly prevents cross-tenant deployment), but could cause user confusion
 ### No Critical Vulnerabilities Found
 - No privilege escalation vectors identified
 - No data leakage across workspaces
 - No ability to access or manipulate other users' resources
--- a/docs/regression-full-report.md
+++ b/docs/regression-full-report.md
@ -0,0 +1,209 @@
 # OCDP 回归测试完整报告 (Round 3)
 **测试日期:** 2026-05-14
 **环境:** http://10.6.80.114:18080
 **集群:** k3s v1.28.0 (dbf824f1-9962-4d8e-881e-870c75fdb6f5), k8s (23880994-dfe4-48d0-abc0-b49692cc630a)
 **Harbor:** harbor.bwgdi.com (83b823af-873b-457c-912c-9ccde3cb12e6)
 ---
 ## 代码变更概要 (Commit b88fe24 +)
 | Commit | 变更 |
 |--------|------|
 | b88fe24 | fix: real K8s replicas in list API, full Helm values in modify YAML editor |
 | 96d42ee | fix: scale replicas in response, YAML lineWidth, delta values, modified keys |
 | 4441f58 | fix: direct K8s scaling, replicas from K8s API, button labels, modify fetch |
 | 49b92e6 | fix: UI redesign — horizontal instance rows, proper scaling, readable tag cards |
 | 28ecb2e | feat: scale instances, --reuse-values, values diff, UI redesign, hover animations |
 | 87eaaa5 | fix: remove per-card Refresh button, consolidate to page-level refresh |
 ---
 ## 测试结果总览
 ### 总评分: 85/100
 | 测试领域 | 状态 | 问题数 |
 |----------|------|--------|
 | 前端 UI / Launch / Routes | ✅ ALL PASS | 0 |
 | API CRUD / 部署生命周期 | ⚠️ 2 issues | 2 |
 | 配额执行 | ⚠️ 1 issue (pre-existing) | 1 |
 | Values 优先级 / gpuMem | ⚠️ 1 issue | 1 |
 | 权限隔离 | ✅ ALL PASS | 0 |
 | 安全测试 | ⚠️ 3 issues (pre-existing) | 3 |
 | DELETE 行为 | ✅ FIXED | 0 |
 | InstanceCard / Scaling UI | ✅ ALL PASS | 0 |
 | ModifyModal / Values Diff | ✅ ALL PASS | 0 |
 ---
 ## 新发现 Bug
 ### 🆕 BUG-R3-001: Detail API replicas=0 与 List API 不一致 (High)
 | 属性 | 值 |
 |------|-----|
 | 严重度 | **P1 (High)** |
 | 端点 | `GET /clusters/{id}/instances/{instance_id}` |
 | 现象 | List API 返回正确 replicas（如 1, 5），但 Detail API 始终返回 `replicas: 0` |
 | 根因 | Detail endpoint 从数据库读取实例记录，replicas 字段未同步自 K8s 实时状态 |
 | 影响 | 前端依赖 Detail API 的页面（如刷新后详情页）显示错误的副本数 |
 | 修复建议 | Detail endpoint 也从 K8s live state 填充 replicas，或确保数据库同步 |
 ### 🆕 BUG-R3-002: List API 移除 values 字段
 | 属性 | 值 |
 |------|-----|
 | 严重度 | **P2 (Medium)** |
 | 端点 | `GET /clusters/{id}/instances` |
 | 现象 | List API 响应不再包含 `values` 字段（之前版本有）；values 仅在单实例 GET 中返回 |
 | 影响 | 依赖 list API values 的测试脚本和前端组件会 break |
 | 备注 | 可能是故意的性能优化，但属于 API 行为变更 |
 ---
 ## 已修复 Bug (验证通过)
 | Bug ID | 描述 | 之前状态 | 现在状态 | 验证 |
 |--------|------|----------|----------|------|
 | BUG-001 | Launch 按钮无反应 | P0 Blocker | ✅ FIXED | 部署端到端成功 |
 | BUG-002 | SPA 旧路由空白页 | P0 Blocker | ✅ FIXED | 所有旧路由返回 SPA |
 | BUG-003 | DELETE 返回 404 | P1 High | ✅ FIXED | 返回 HTTP 204 |
 | BUG-004 | DELETE 空 body | P1 High | ✅ FIXED | HTTP 204 No Content |
 | - | InstanceCard 无 scaling UI | New Feature | ✅ ADDED | +/- 按钮 + API 调用 |
 | - | ModifyModal values 为空 | Bug | ✅ FIXED | Full Helm values 返回 |
 ---
 ## 仍未修复的已知问题
 | Bug ID | 描述 | 严重度 | 状态 |
 |--------|------|--------|------|
 | BUG-013 | 用户枚举（login 错误消息差异） | Medium | 未修复 |
 | BUG-014 | 无速率限制 | Medium | 未修复 |
 | BUG-015 | Nginx 版本泄露 | Low | 未修复 |
 | BUG-016 | CORS: * | Low | 未修复 |
 | BUG-017 | 缺少安全响应头 | Low | 未修复 |
 | BUG-018 | /health 返回 HTML | Low | 未修复 |
 | BUG-005 | Tags 端点 404 | Medium | 未修复 |
 | BUG-006 | Namespace 静默覆盖无警告 | Medium | 未修复 |
 | BUG-007-009 | Metrics/Stats/Kubeconfig 端点缺失 | Medium | 未修复 |
 | BUG-011 | API 响应格式不一致 | Low | 未修复 |
 | BUG-012 | /auth/me 空 token 字段 | Low | 未修复 |
 | - | API 层配额预检查 | New Feature | 未实现 |
 | - | Values 冲突时无警告 | UX | 未修复 |
 ---
 ## 详细测试结果
 ### 1. 前端 UI (test-user-a)
 | 测试项 | 结果 |
 |--------|------|
 | 登录页加载 | ✅ HTTP 200, SPA 398 bytes |
 | JWT 认证 | ✅ role=user, 10 permissions, quota 字段 |
 | 所有页面路由 | ✅ 6 个路由全部返回 398 bytes (非空白) |
 | 旧路由重定向 | ✅ /clusters, /registries, /launch, /monitoring 全部返回 SPA |
 | Chart Browser | ✅ nginx:22.1.1 可发现，Launch 按钮可用 |
 | 部署 Pipeline | ✅ pending-install → deployed (~15s) |
 | InstanceCard Scale UI | ✅ +/- 按钮，副本数显示 |
 | InstanceCard Actions | ✅ Entries/Diag/Modify/Delete 全部可见 |
 | ModifyModal YAML Editor | ✅ full Helm values, lineWidth:0, diff 检测 |
 | TagCard UI | ✅ 色标 (chart=blue, image=green), Copy helm pull 命令, LATEST badge |
 | DELETE | ✅ HTTP 204 |
 ### 2. API 后端 (test-user-b)
 | 测试项 | 结果 |
 |--------|------|
 | Login + /auth/me | ✅ 完整 profile (quota, namespace, permissions) |
 | Cluster list | ✅ 2 clusters |
 | Registry list | ✅ 1 registry (harbor-bwgdi) |
 | Repository artifacts | ✅ Harbor API proxy 正常 |
 | Cluster health | ✅ k3s healthy, v1.28.0 |
 | 部署 nginx (default) | ✅ deployed, replicas=1 |
 | 部署 nginx (over-quota) | ⚠️ 接受部署 (API 无预检查) |
 | 实例状态轮询 | ✅ ~20s 到达 deployed |
 | List API replicas | ✅ 正确显示 1/5 |
 | Detail API replicas | ❌ 返回 0 (BUG-R3-001) |
 | 实例删除 | ✅ HTTP 204 (BUG-003 FIXED) |
 | 缺失端点 | ✅ 正常 404 |
 ### 3. Values 优先级 (test-user-c)
 | 方法 | 结果 |
 |------|------|
 | values JSON only | ✅ gpuMem=10000 正确接受 |
 | valuesYaml only | ✅ YAML -> JSON 解析正确 |
 | 同时提供 (冲突) | ✅ values JSON 覆盖 valuesYaml |
 | 冲突警告 | ❌ 无警告（建议添加） |
 | 默认值 (空 values) | ✅ 使用 chart 内置 defaults |
 | gpuMem=10000 | ✅ 整数 MB scalar 正确 |
 | 清理 | ✅ 所有 3 个实例已删除 |
 ### 4. 权限隔离
 | 测试项 | 结果 |
 |--------|------|
 | Admin lists users | ✅ 10 users |
 | test-admin-d lists users | ✅ 10 users (admin role works) |
 | test-user-c GET /users | ✅ 403 Forbidden |
 | test-user-c POST /auth/register | ✅ 403 Forbidden |
 | Cross-tenant deploy (c→b) | ✅ Silent override to ocdp-u-test-c (secure) |
 | Deploy into own namespace | ✅ 成功 |
 | Instance visibility | ✅ 仅看到自己的实例 |
 | Disable user | ✅ 用户被禁用 |
 | Disabled user login | ✅ 401 |
 | Re-enable user | ✅ 恢复 |
 | Self-registration | ✅ 401 (必须认证) |
 ### 5. 安全 (回归)
 | 测试项 | 结果 |
 |--------|------|
 | 未认证端点 | ✅ 全部 401 |
 | JWT 验证 | ✅ 篡改 token 被拒绝 |
 | XSS/SQLi | ✅ 安全处理 |
 | 敏感数据脱敏 | ✅ creds 显示为 •••••••• |
 | 普通用户权限提升 | ✅ 403 blocked |
 | 用户枚举 | ❌ 不同错误消息 (未修复) |
 | 速率限制 | ❌ 无 429 (未修复) |
 | CORS * + 缺失安全头 | ❌ 未修复 |
 | Nginx 版本泄露 | ❌ "nginx/1.27.5" (未修复) |
 | /auth/me 空 token | ✅ tokens 已正确清空 |
 ---
 ## 优先修复清单
 ### 立即修复 (P0/P1)
 1. **BUG-R3-001**: Detail API replicas=0 — 从 K8s live state 填充
 2. **配额预检查** — POST instances 时验证请求资源 ≤ 用户配额
 ### 尽快修复 (P2)
 3. Values 冲突时添加 API 警告
 4. Tags/Metrics/Stats/Kubeconfig 端点实现
 5. Namespace 静默覆盖返回警告
 6. List API values 字段回归或文档化
 ### 安全加固 (P3)
 7. 登录错误消息统一
 8. 速率限制
 9. Nginx 安全头 + 关闭 server_tokens
 10. CORS 收紧
 ---
 ## 对比: Round 1 vs Round 3
 | 指标 | Round 1 (2026-05-11) | Round 3 (2026-05-14) |
 |------|---------------------|---------------------|
 | Total Bugs | 18 | 15 (3 fixed, 12 remain, 2 new) |
 | P0 Blockers | 2 (Launch, Routes) | 0 |
 | P1 High | 2 (DELETE 404, empty body) | 1 (Detail replicas=0) |
 | 新功能 | - | Scaling UI, Values Diff, YAML editor |
 | 安全漏洞 | 6 | 5 (token fields fixed) |
 **结论:** 代码修改有效，3 个关键 Bug 已修复，新增了 scaling 和 values diff 功能。仍有 12 个已知问题和 2 个新问题待修复。
--- a/docs/security/bugs-security.md
+++ b/docs/security/bugs-security.md
@ -1,284 +0,0 @@
 # OCDP Security Audit Report
 **Date:** 2026-05-11
 **Target:** http://10.6.80.114:18080
 **API Base:** http://10.6.80.114:18080/api/v1
 ---
 ## Finding 1: User Enumeration via Login Error Messages
 | Field | Value |
 |-------|-------|
 | **Test** | Authentication Error Disclosure |
 | **Severity** | **Medium** |
 | **Endpoint** | `POST /api/v1/auth/login` |
 | **Status** | Confirmed |
 ### What I Did
 ```bash
 # Non-existent user
 curl -s -X POST http://10.6.80.114:18080/api/v1/auth/login \
  -H "Content-Type: application/json" \
  -d '{"username":"nonexistent_user_xyz","password":"test123"}'
 # Existing user with wrong password
 curl -s -X POST http://10.6.80.114:18080/api/v1/auth/login \
  -H "Content-Type: application/json" \
  -d '{"username":"admin","password":"wrongpassword"}'
 ```
 ### Expected
 Both requests should return the same generic error message (e.g., "Invalid credentials") to prevent username enumeration.
 ### Actual
 - Non-existent user: `{"error":"Login failed","message":"user not found","code":401}`
 - Existing user: `{"error":"Login failed","message":"invalid password","code":401}`
 The error messages are different, allowing an attacker to determine whether a username exists in the system.
 ### Impact
 An attacker can enumerate valid usernames by observing the error message difference. This is the first step in a targeted brute force or credential stuffing attack.
 ### Recommendation
 Return identical error messages for both cases, e.g., `"Invalid username or password"`.
 ---
 ## Finding 2: No Rate Limiting on Login Endpoint
 | Field | Value |
 |-------|-------|
 | **Test** | Brute Force Protection |
 | **Severity** | **Medium** |
 | **Endpoint** | `POST /api/v1/auth/login` |
 | **Status** | Confirmed |
 ### What I Did
 ```bash
 for i in $(seq 1 10); do
  curl -s -o /dev/null -w "%{http_code}" \
    -X POST http://10.6.80.114:18080/api/v1/auth/login \
    -H "Content-Type: application/json" \
    -d '{"username":"admin","password":"wrongpassword"}'
 done
 ```
 ### Expected
 After a threshold (e.g., 5 failed attempts), the server should return HTTP 429 Too Many Requests or temporarily lock the account.
 ### Actual
 All 10 rapid sequential attempts returned HTTP 401. No rate limiting, no account lockout, no progressive delay.
 ### Impact
 An attacker can brute force passwords without restriction. Combined with Finding 1 (user enumeration), the attack surface is increased.
 ### Recommendation
 - Implement rate limiting on the login endpoint (e.g., max 5 attempts per minute per IP).
 - Consider account lockout after N failed attempts.
 - Add progressive response delays after repeated failures.
 ---
 ## Finding 3: Server Version Disclosure
 | Field | Value |
 |-------|-------|
 | **Test** | Information Disclosure |
 | **Severity** | **Low** |
 | **Endpoint** | All (HTTP response headers) |
 | **Status** | Confirmed |
 ### What I Did
 ```bash
 curl -s -D - http://10.6.80.114:18080/ | head -10
 ```
 ### Expected
 Server header should be generic (e.g., `Server: nginx`) or removed entirely.
 ### Actual
 ```http
 Server: nginx/1.27.5
 ```
 ### Impact
 Knowing the exact nginx version helps attackers target known vulnerabilities for that specific version.
 ### Recommendation
 Disable or obfuscate the Server header in nginx configuration:
 ```nginx
 server_tokens off;
 ```
 ---
 ## Finding 4: Permissive CORS Policy
 | Field | Value |
 |-------|-------|
 | **Test** | CORS Misconfiguration |
 | **Severity** | **Low** |
 | **Endpoint** | All API endpoints |
 | **Status** | Confirmed |
 ### What I Did
 ```bash
 curl -s -D - http://10.6.80.114:18080/api/v1/auth/login \
  -X POST -H "Content-Type: application/json" \
  -d '{"username":"test","password":"test"}'
 ```
 ### Expected
 CORS `Access-Control-Allow-Origin` should be restricted to the application's origin (e.g., `http://10.6.80.114:18080`) rather than allowing all origins.
 ### Actual
 ```http
 Access-Control-Allow-Origin: *
 Access-Control-Allow-Credentials: true
 Access-Control-Allow-Methods: GET, POST, PUT, DELETE, OPTIONS
 Access-Control-Allow-Headers: Content-Type, Authorization, X-Requested-With
 Access-Control-Max-Age: 86400
 ```
 ### Impact
 Any website can make cross-origin requests to the API. If a user is logged in, a malicious site could potentially make authenticated API calls on their behalf (CSRF-style attack, though mitigated by the Bearer token requirement).
 ### Recommendation
 Restrict `Access-Control-Allow-Origin` to the specific frontend origin(s) instead of `*`.
 ---
 ## Finding 5: Missing Security Headers
 | Field | Value |
 |-------|-------|
 | **Test** | Security Headers Audit |
 | **Severity** | **Low** |
 | **Endpoint** | All |
 | **Status** | Confirmed |
 ### What I Did
 ```bash
 curl -s -D - http://10.6.80.114:18080/ | head -20
 ```
 ### Expected
 Security headers should include:
 - `Strict-Transport-Security`
 - `X-Content-Type-Options: nosniff`
 - `X-Frame-Options: DENY`
 - `Content-Security-Policy`
 ### Actual
 None of these security headers are present in responses.
 ### Impact
 Increases attack surface for clickjacking, MIME-type confusion, and XSS attacks.
 ### Recommendation
 Add the following headers to nginx configuration:
 ```
 add_header X-Frame-Options "DENY" always;
 add_header X-Content-Type-Options "nosniff" always;
 add_header X-XSS-Protection "0" always;
 add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
 add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline';" always;
 ```
 ---
 ## Finding 6: `/health` Endpoint Returns HTML Instead of Health Status
 | Field | Value |
 |-------|-------|
 | **Test** | Health Endpoint Behavior |
 | **Severity** | **Low** |
 | **Endpoint** | `GET /health` |
 | **Status** | Confirmed |
 ### What I Did
 ```bash
 curl -s http://10.6.80.114:18080/health
 ```
 ### Expected
 A health check endpoint should return a structured JSON response (e.g., `{"status":"healthy"}`) with HTTP 200.
 ### Actual
 Returns the full `index.html` SPA page with HTTP 200:
 ```html
 <!doctype html>
 <html lang="en">
  <head>
    <title>OCDP Platform</title>
    ...
 ```
 ### Impact
 Not a direct vulnerability, but misconfigured health checks can cause false positives in monitoring/load balancer health checks. It also means the SPA is served at `/health`, which is unexpected.
 ### Recommendation
 Implement a dedicated health endpoint that returns `{"status":"ok"}` with appropriate content type, or remove the `/health` route if not needed.
 ---
 ## Tests Passed (No Issues Found)
 | Test | Result |
 |------|--------|
 | **1. Unauthenticated Access** | **PASS** - All business endpoints return 401 |
 | **2. JWT Token Manipulation** | **PASS** - Tampered tokens, alg=none, invalid formats all rejected (401) |
 | **3. XSS/SQLi Testing** | **PASS** - Script injection, SQLi patterns safely handled |
 | **4. IDOR - Instance Access** | **PASS** - No instances deployed to test; cluster/registry isolation confirmed working |
 | **5. Sensitive Data Masking** | **PASS** - Cluster certs/keys and registry passwords masked as `••••••••` |
 | **6. Self-Registration** | **PASS** - Registration endpoint requires authentication (401) |
 | **7. Path Traversal** | **PASS** - Path traversal attempts return index.html (not /etc/passwd) |
 | **8. Admin Permission Escalation** | **PASS** - Regular users blocked from admin endpoints (403) |
 ---
 ## Summary
 | Severity | Count | Findings |
 |----------|-------|----------|
 | Critical | 0 | — |
 | High | 0 | — |
 | **Medium** | **2** | User enumeration, No rate limiting |
 | **Low** | **4** | Server version disclosure, Permissive CORS, Missing security headers, `/health` returns HTML |
 | **Total** | **6** | |
 The platform's core security controls (authentication, JWT validation, authorization, sensitive data masking) are properly implemented. The main areas for improvement are authentication hardening (rate limiting, user enumeration) and HTTP security hardening (headers, CORS).
--- a/docs/test2-quota.md
+++ b/docs/test2-quota.md
@ -1,156 +0,0 @@
 # Resource Quota Enforcement Test Report
 **Date:** 2026-05-11
 **Tester:** test-user-b
 **Namespace:** ocdp-u-test-b
 **User Quota:** cpu=2, memory=4Gi, gpu=0, gpumem=0
 ---
 ## Test Summary
 | Test | Description | Expected | Actual | Result |
 |------|-------------|----------|--------|--------|
 | A | Deploy nginx (default, within quota) | Success | Deployed (status: `deployed`) | ✅ PASS |
 | B | Deploy nginx (cpu=4, mem=8Gi, replicas=5, exceeds quota) | Blocked by quota | Helm release created, Service created, all pods blocked by ResourceQuota (status: `pending-install`) | ⚠️ PARTIAL |
 | C | Deploy vllm-serve with gpu=1 (gpu quota = 0) | Blocked by quota | Helm release created, all pods blocked by ResourceQuota (status: `pending-install`) | ⚠️ PARTIAL |
 ---
 ## Detailed Results
 ### Test A: Deploy nginx within quota limits
 - **Instance:** `quota-test-nginx` (ed846c33-3631-4d54-adce-c7f00210176f)
 - **Chart:** charts/nginx:22.1.1
 - **Values:** defaults
 - **API Response:** HTTP 200, status: `pending-install`
 - **Final Status after 21s:** `deployed` ("Instance deployed successfully")
 - **K8s Resource Usage:** requests.cpu=100m/2, requests.memory=128Mi/4Gi
 ### Test B: Deploy nginx exceeding quota
 - **Instance:** `quota-test-nginx-2` (36c0350f-089c-41c2-a66e-e93539c00d52)
 - **Chart:** charts/nginx:22.1.1
 - **Values:** replicaCount=5, resources.limits.cpu=4/memory=8Gi, resources.requests.cpu=2/memory=4Gi
 - **API Response:** HTTP 200, status: `pending-install`
 - **Final Status (observed for 90s+):** `pending-install` (never transitioned to `deployed` or `failed`)
 - **K8s Behavior:**
  - Helm release created: `sh.helm.release.v1.quota-test-nginx-2.v1`
  - TLS secret created
  - Service created, IP assigned
  - Deployment created, ReplicaSet scaled up
  - **All pod creations FAILED** with: `Error creating: pods "..." is forbidden: exceeded quota: tenant-quota, requested: requests.cpu=2,requests.memory=4Gi, used: requests.cpu=100m,requests.memory=128Mi, limited: requests.cpu=2,requests.memory=4Gi`
 ### Test C: Deploy GPU instance (gpu quota = 0)
 - **Instance:** `quota-test-gpu` (a0d692c8-cdf8-4248-a6d4-1468ad4a7cc7)
 - **Chart:** charts/vllm-serve:0.6.0
 - **Values:** resources.gpuLimit=1, resources.gpuMem=5000
 - **API Response:** HTTP 200, status: `pending-install`
 - **Final Status (observed for 30s+):** `pending-install`
 - **K8s Behavior:**
  - vllm-serve chart defaults: requests.cpu=8, requests.memory=16Gi, requests.nvidia.com/gpu=1, requests.nvidia.com/gpumem=5k
  - All pods blocked: `exceeded quota: tenant-quota, requested: requests.cpu=8,requests.memory=16Gi,requests.nvidia.com/gpu=1,..., limited: requests.cpu=2,requests.memory=4Gi,requests.nvidia.com/gpu=0`
 ---
 ## Key Findings
 ### 1. No API-Level (Pre-flight) Quota Enforcement
 The backend API accepts **all** deployment requests regardless of whether they exceed the user's quota. There is no validation at the API layer that checks:
 - Whether the requested resources exceed the user's quota limits
 - Whether the user's quota is already fully consumed by existing deployments
 **Evidence:** All three deployments returned HTTP 200 with `status: pending-install`. The backend logs contain zero quota-related entries.
 ### 2. Kubernetes ResourceQuota Enforces at Pod Level
 The Kubernetes `ResourceQuota` object `tenant-quota` in namespace `ocdp-u-test-b` does enforce limits, but only at the **pod creation** level:
 ```yaml
 spec:
  hard:
    requests.cpu: "2"
    requests.memory: 4Gi
    requests.nvidia.com/gpu: "0"
    requests.nvidia.com/gpumem: "0"
 ```
 When pods exceed quota, Kubernetes explicitly refuses to create them with a clear error message.
 However, Helm releases, Services, Deployments, and ReplicaSets are **still created** even when pods are blocked.
 ### 3. Stuck at "pending-install"
 Instances that exceed quota remain stuck in `pending-install` status **indefinitely** — they never transition to `deployed`, `failed`, or any error status. The OCDP platform does not detect the ResourceQuota rejection and update the instance status accordingly. The only way to know about the failure is to check Kubernetes events directly:
 ```bash
 kubectl get events -n ocdp-u-test-b
 ```
 ### 4. GPU Quota Enforcement
 Users with `gpu=0` quota **can** submit deployments referencing GPU-enabled charts. The API does not reject them. Only the K8s ResourceQuota blocks pod creation at runtime. This could lead to:
 - Unnecessary Helm releases and resource overhead in the cluster
 - Confusion for users whose deployments appear to hang at `pending-install`
 ### 5. Quota Exposed in Login Response
 The login response includes quota information:
 ```json
 {
  "quotaCpu": "2",
  "quotaMemory": "4Gi",
  "quotaGpu": "0",
  "quotaGpuMemory": "0"
 }
 ```
 This could be used by the frontend to show usage limits, but no pre-flight check uses it server-side.
 ---
 ## Recommendations
 1. **Add pre-flight quota validation** in the backend API: before accepting a deployment, check whether the requested resources (from chart values) would exceed the user's quota. Return HTTP 4xx with a clear error message.
 2. **Handle "pending-install" timeout**: implement a watcher that detects when a Helm release has been created but pods remain stuck (e.g., due to ResourceQuota) and:
   - Update instance status to `failed` with a descriptive `statusReason`
   - Clean up the Helm release, Service, etc.
   - Optionally surface the K8s error message via the API
 3. **GPU quota pre-check**: if a chart requests GPU resources and the user's `gpu=0`, reject the deployment at the API level before creating any Kubernetes resources.
 4. **UI quota indicator**: show remaining quota (used vs. hard limit) on the deployment form so users know their limits before submitting.
 ---
 ## ResourceQuota YAML (for reference)
 ```yaml
 apiVersion: v1
 kind: ResourceQuota
 metadata:
  name: tenant-quota
  namespace: ocdp-u-test-b
  labels:
    ocdp.io/managed-by: ocdp
    ocdp.io/tenant: ocdp-u-test-b
 spec:
  hard:
    requests.cpu: "2"
    requests.memory: 4Gi
    requests.nvidia.com/gpu: "0"
    requests.nvidia.com/gpumem: "0"
 ```
 ---
 ## Cleanup Verification
 All test instances were removed after testing:
 - `quota-test-nginx` ✅ deleted (pods terminated, helm release removed, quota back to 0)
 - `quota-test-nginx-2` ✅ cleaned up (no pods created, resources released)
 - `quota-test-gpu` ✅ cleaned up (no pods created, resources released)
 - ResourceQuota used: all resources at 0
--- a/docs/test2-report.md
+++ b/docs/test2-report.md
@ -1,141 +0,0 @@
 # OCDP 第二次测试报告
 **测试日期:** 2026-05-11
 **测试环境:** http://10.6.80.114:18080
 ---
 ## 测试1: 资源配额限额
 ### 测试方法
 使用 test-user-b（quota: cpu=2, mem=4Gi, gpu=0, gpumem=0）在 k3s 集群部署 nginx chart
 ### 测试结果
 | 测试 | 操作 | 预期 | 实际 | 结论 |
 |------|------|------|------|------|
 | Test A | 部署 nginx（默认值，在配额内） | 成功 | 部署完成，状态 deployed | ✅ |
 | Test B | 部署 nginx（requests.cpu=2, mem=4Gi, replica=5，超配额） | 被配额阻止 | Helm release 创建成功，所有 Pod 被 ResourceQuota 阻塞，状态永远 stuck 在 pending-install | ⚠️ 部分通过 |
 | Test C | 部署 vllm-serve（gpuLimit=1，gpu配额=0） | 被配额阻止 | Helm release 创建成功，Pod 被 ResourceQuota 阻塞，状态 pending-install | ⚠️ 部分通过 |
 ### 关键发现
 **1. 没有 API 层的预检查配额验证**
 - 后端 API 无条件接受所有部署请求，不检查是否超配额
 - 所有超配额请求返回 HTTP 200 + status: pending-install
 - 后端日志中**没有任何配额相关的条目**
 **2. K8s ResourceQuota 在 Pod 级别执行**
 - `tenant-quota` ResourceQuota 对象确实存在并执行限制
 - 当 Pod 超配额时，K8s 明确拒绝创建并给出错误消息
 - 但 Helm release、Service、Deployment、ReplicaSet **仍然被创建**
 **3. 实例永远 stuck 在 "pending-install"**
 - 超配额的实例永远不会转换到 deployed/failed/error
 - OCDP 平台不检测 ResourceQuota 拒绝事件
 - 唯一知道失败的方式是直接查 K8s events
 **4. GPU 配额绕过**
 - gpu=0 的用户可以提交需要 GPU 的 chart 部署
 - K8s ResourceQuota 最终会阻止，但 Helm release 等资源已被创建
 **5. 有效的 ResourceQuota 配置**
 ```yaml
 apiVersion: v1
 kind: ResourceQuota
 metadata:
  name: tenant-quota
  namespace: ocdp-u-test-b
 spec:
  hard:
    requests.cpu: "2"
    requests.memory: 4Gi
    requests.nvidia.com/gpu: "0"
    requests.nvidia.com/gpumem: "0"
 ```
 ### 建议
 1. **添加 API 层预检查配额验证** — 在接受部署前检查请求资源是否超过用户配额
 2. **处理 pending-install 超时** — 监控 Helm release 创建后 Pod 是否 stuck，更新状态为 failed
 3. **GPU 配额预检查** — 如果 chart 需要 GPU 而用户 gpu=0，在 API 层拒绝
 4. **UI 配额指示器** — 在部署表单上显示剩余配额
 ---
 ## 测试2: values.yaml 覆盖优先级
 ### 测试方法
 使用 test-user-c（quota: cpu=4, mem=8Gi, gpu=1, gpumem=5000）部署 vllm-serve:0.6.0 chart
 ### 测试结果
 | 方法 | 提交方式 | 是否部署成功 | 存储的值 | 结论 |
 |------|----------|-------------|---------|------|
 | 方法1 | `values` JSON 字段 | ✅ | cpuRequest=2, gpuMem=10000 | JSON 值被准确接受和存储 |
 | 方法2 | `valuesYaml` 字符串 | ✅ | cpuRequest=4, gpuMem=10000 | YAML 被正确解析为结构化 values |
 | 方法3 | 同时提供 `values` + `valuesYaml`（冲突） | ✅ 无任何错误/警告 | **values JSON 全胜** | `values` JSON 静默覆盖 `valuesYaml` |
 | 方法4 | 不提供任何 values（使用 chart 默认） | ✅ | 仅 namespace | chart 默认值不存储在 API 响应中 |
 ### 优先级最终结论
 | 优先级 | 来源 | 说明 |
 |--------|------|------|
 | **最高** | `values` JSON 字段 | 请求体中的结构化 JSON |
 | **中** | `valuesYaml` 字符串 | 请求体中的 YAML 字符串 |
 | **最低** | Chart 内置 values.yaml | Helm chart 打包的默认值 |
 ### 冲突测试详细结果
 当同时提供 `values` 和 `valuesYaml` 且值冲突时：
 - `values` JSON 字段**完全覆盖** `valuesYaml`
 - **没有任何错误或警告**返回给用户
 - 两者被合并到统一的 DB `values` 字段
 ### gpuMem=10000 行为
 - 整数值 `10000` 在 `values` JSON 和 `valuesYaml` 中都被**正确接受**
 - 无单位转换（作为整数 MB 标量存储）
 - 符合项目规范
 ### 建议
 1. **记录优先级顺序** — 用户需知道同时提供两者时 values JSON 优先
 2. **添加冲突警告** — 当两个字段存在冲突值时应返回警告
 3. **考虑废弃一个字段** — values 和 valuesYaml 语义重复易混淆
 ---
 ## 测试3: 前端 UI 溢出/滚动/刷新
 ### 测试方法
 Playwright + 源码分析，测试 1920/768/375 三个视口
 ### 测试结果
 **总体结论: PASS** — 没有导致功能问题的关键溢出问题
 | 测试项 | 结果 | 详情 |
 |--------|------|------|
 | 水平溢出 | ✅ 无问题 | 所有视口均无水平溢出 |
 | 文本截断 | ⚠️ 1 个低风险 | InstanceCard h3 标题 truncate 无 title tooltip |
 | 响应式设计 | ✅ 正确 | sm/md/lg/xl 断点覆盖完整 |
 | 滚动行为 | ✅ 流畅 | Sidebar 和内容区独立滚动，overscroll-contain 防滚动穿透 |
 | 模态框布局 | ✅ 正确 | body scroll lock + 内容独立滚动 |
 | 页面刷新 | ✅ 正常 | 受保护路由正确重定向到登录页 |
 | 颜色对比度 | ⚠️ 1 个中风险 | 登录页错误文本 red-400 在白色背景上仅 2.5:1 (WCAG AA 要求 4.5:1) |
 ### 通过的细分项
 - Chart Browser 全高 + overflow-y-auto 布局 ✅
 - InstanceCard 操作按钮网格 grid-cols-2/3/5 响应正确 ✅
 - Tabs 支持 overflow-x-auto 水平滚动 ✅
 - 用户管理表格 overflow-x-auto ✅
 - iOS 触摸滚动 (`-webkit-overflow-scrolling: touch`) 已配置 ✅
 ### 建议
 1. 将登录页错误文本从 text-red-400 改为 text-red-600/700
 2. InstanceCard h3 标题添加 title 属性
 ---
 ## 综合建议
 1. 添加 API 层配额预检查
 2. 处理 pending-install 超时 + 状态更新
 3. 记录 values 覆盖优先级并添加冲突警告
 4. 统一 values JSON/YAML 的 API 设计
--- a/docs/test2-ui-overflow.md
+++ b/docs/test2-ui-overflow.md
@ -1,271 +0,0 @@
 # QA Report: UI Layout Overflow & Responsiveness Test
 **Date:** 2026-05-11
 **Environment:** http://10.6.80.114:18080
 **Browser:** Chromium (Playwright headless)
 **Test Credentials:** test-user-a / TestUserA123!
 ---
 ## Test Results Summary
 | #  | Test | Status | Issues Found |
 |----|------|--------|-------------|
 | 1 | Login Page Layout | ✅ Pass | 1 Low |
 | 2 | Home Page | ✅ Pass | 0 |
 | 3 | Chart Browser (Registries) | ✅ Pass | 0 |
 | 4 | Instances Page | ✅ Pass | 0 |
 | 5 | Monitoring Page | ✅ Pass | 0 |
 | 6 | Tablet Responsive (768px) | ✅ Pass | 0 |
 | 7 | Mobile Responsive (375px) | ✅ Pass | 0 |
 | 8 | Deep DOM Overflow Analysis | ✅ Pass | 0 |
 | 9 | Source Code CSS Pattern Audit | ✅ Pass | 2 Info |
 | 10 | Text Visibility & Contrast | ⚠️ 1 Issue | 1 Medium |
 ---
 ## 1. Login Page (AuthPage.tsx)
 **Location:** `frontend/src/features/auth/pages/AuthPage.tsx`
 **Layout:**
 - Form card is `max-w-md` (448px), horizontally centered via `flex items-center justify-center`
 - Desktop viewport (1920×1080): card is perfectly centered (checked via bounding rect)
 - Background: `bg-slate-50` with gradient overlay
 - Card: `bg-white/95 backdrop-blur-xl` with `shadow-2xl`
 **Responsive:**
 - Padding: `px-4 sm:px-6` — increases from 16px → 24px on `sm:` breakpoint
 - Card padding: `p-6 sm:p-7`
 - Icon: `w-11 h-11` — fixed size, not responsive
 ### ✅ Issue #1-LOW: Login error text color contrast
 - **File:** `AuthPage.tsx:96`
 - **Pattern:** `<p className="text-red-400 text-center text-sm">`
 - **Problem:** `text-red-400` (`#f87171`) on white background has a contrast ratio of ~2.5:1, which fails WCAG AA (minimum 4.5:1 for normal text). Error messages may be hard to read for users with visual impairments.
 - **Recommendation:** Use `text-red-600` or `text-red-700` for error text on white backgrounds.
 ---
 ## 2. Home Page
 **Location:** `frontend/src/features/home/pages/HomePage.tsx`
 **Layout:**
 - Main container: `min-h-full bg-slate-50 px-4 py-6 sm:px-6 lg:px-8`
 - Two-column layout on large screens: `lg:grid-cols-[1.4fr_0.8fr]`
 - Feature cards: `md:grid-cols-3`
 - Quick actions: `md:grid-cols-3`
 **Scroll:** ScrollHeight=1080, Viewport=1080 — content fits exactly without scrolling on 1080p.
 **Overflow:** No horizontal overflow detected. Proper use of responsive padding and grid columns.
 ### Passing — no issues found.
 ---
 ## 3. Chart Browser / Registries
 **Location:** `frontend/src/features/artifact/registries/pages/ArtifactBrowserPage.tsx`
 **Layout (Desktop):**
 - Main layout: `flex-1 flex overflow-hidden bg-slate-50` (sidebar + detail panes)
 - Sidebar tree: `flex-1 overflow-y-auto custom-scrollbar`
 - Detail pane: `flex-1 flex flex-col bg-white overflow-hidden`
 - Tag grid: `grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-4`
 **Tablet (768px):** No overflow. Grid collapses to 2 columns.
 **Mobile (375px):** No overflow. Grid collapses to 1 column.
 ### Key Patterns Found:
 - `RepositoryItem.tsx:212` — `<span className="text-sm text-gray-200 font-mono truncate" title={repository}>` — proper truncation with `title` tooltip
 - `ArtifactBrowserPage.tsx:336` — `<p className="text-[11px] text-slate-500 truncate">` — uses 11px text with truncation
 - `TagCard.tsx` — uses truncation with `title` attribute for long names
 ### Passing — no overflow issues found.
 ---
 ## 4. Instances Page
 **Location:** `frontend/src/features/artifact/instances/pages/InstancesManagementPage.tsx`  
 **Component:** `InstanceCard.tsx`
 **Layout:**
 - Cluster cards: responsive grid `clusters.length > 1 ? 'md:grid-cols-3' : 'md:grid-cols-2'`
 - Instance cards listed in single column then `lg:grid-cols-2 gap-6`
 - Action buttons grid: `grid-cols-2 gap-2 md:grid-cols-3 xl:grid-cols-5`
 ### ✅ Issue #2-INFO: Action button text truncation on InstanceCards
 - **File:** `InstanceCard.tsx:285-327`
 - **Pattern:**
  ```
  <div className="grid grid-cols-2 gap-2 md:grid-cols-3 xl:grid-cols-5">
    <button>
      <span className="truncate">Refresh</span>
    </button>
  </div>
  ```
 - **Analysis:** At `grid-cols-2` (small screens), two buttons share each row. The buttons use `min-w-0` which allows them to shrink, and `truncate` on the text span. However, the button text is short ("Refresh", "Entries", "Diagnostics", "Modify", "Delete"), so truncation is unlikely to occur in practice.
 - **Mitigation:** Each `<span>` has `title` attribute on parent button, providing tooltip fallback.
 - **Verdict:** Acceptable — button labels are intentionally short and tooltips are present.
 ### ✅ Issue #3-INFO: Header text truncation with tooltip
 - **File:** `InstanceCard.tsx:185`
 - **Pattern:** `<h3 className="text-xl font-bold text-slate-950 truncate">{instanceName}</h3>`
 - **Analysis:** Instance names could be long, `truncate` will clip with ellipsis. No `title` attribute on this element — unlike repository text below it.
 - **Recommendation:** Add `title={instanceName}` to the `<h3>` element for tooltip on overflow.
 ### Passing — no critical overflow issues found.
 ---
 ## 5. Monitoring / Clusters
 **Location:** `frontend/src/features/monitoring/clusters/`
 **Layout:**
 - Cluster cards grid: `grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4`
 - Card header: `<h3 className="text-lg font-semibold text-slate-900 truncate">` with cluster name
 - Metrics: `grid-cols-2 sm:grid-cols-4 gap-4 mb-3`
 - Resource bars: `overflow-hidden` for proper progress bar clipping
 - Node details: `grid-cols-1 lg:grid-cols-2 gap-3`
 **Overflow Check:** ScrollWidth = clientWidth at all tested viewports — no horizontal overflow.
 **Responsive:**
 - 1920px: 4 columns of cluster cards
 - 768px: 2 columns
 - 375px: 1 column
 ### Passing — no issues found.
 ---
 ## 6. Sidebar Layout
 **Location:** `frontend/src/shared/components/layout/SidebarLayout/`
 **Layout:**
 - Parent: `min-h-screen flex bg-dark text-primary overflow-hidden`
 - Nav: `flex-1 p-3 space-y-1 overflow-y-auto` — independently scrollable
 - Footer: Fixed at bottom, `p-3 text-xs text-muted`
 **Scroll Analysis:**
 - Content area has `overflow-y-auto`, so sidebar nav items scroll independently when they exceed viewport height
 - The footer anchors to the bottom of the sidebar (not the scroll area)
 - At 1080px viewport, sidebar content fits without scrolling
 **Potential Concern:** If many nav items are added, the footer will push below the fold and the user must scroll the nav to see it. The `overflow-y-auto` on the `<nav>` element handles this correctly.
 ### Passing — no issues found.
 ---
 ## 7. Tabs Component
 **Location:** `frontend/src/shared/components/layout/Tabs.tsx`
 **Pattern:**
 ```
 <div className="flex gap-2 sm:gap-4 overflow-x-auto scrollbar-thin scrollbar-thumb-gray-600 scrollbar-track-transparent">
  <button className="whitespace-nowrap flex-shrink-0">
 ```
 **Analysis:**
 - Uses `overflow-x-auto` — horizontal scroll appears when tabs exceed container width
 - `whitespace-nowrap` prevents tab text from wrapping
 - `flex-shrink-0` prevents tab items from compressing
 - Custom thin scrollbar styling for webkit browsers
 **Edge Case:** On very small viewports with many tabs, users need to horizontally scroll. This is acceptable UX for a tabs pattern.
 ### Passing — no issues found.
 ---
 ## 8. Modal Component
 **Location:** `frontend/src/shared/components/layout/Modal.tsx`
 **Pattern:**
 - Body scroll lock: sets `document.body.style.overflow = 'hidden'` on open
 - Modal overlay: `fixed inset-0 z-[90] flex items-start sm:items-center justify-center overflow-y-auto p-4 sm:p-6`
 - Content: `max-h-[calc(100vh-12rem)] sm:max-h-[calc(100vh-10rem)] overflow-y-auto`
 **Analysis:**
 - Properly handles body scroll prevention
 - Modal content is independently scrollable when content exceeds viewport
 - `padding-right` compensation prevents layout shift when scrollbar disappears
 ### Passing — no issues found.
 ---
 ## 9. Deep DOM Overflow Analysis
 The Playwright script ran a comprehensive scan of ALL DOM elements, checking:
 - `overflow: hidden` elements where `scrollWidth > clientWidth` (clipped content)
 - `text-overflow: ellipsis` elements where content overflows
 - `white-space: nowrap` causing overflow
 - Tiny text (< 10px)
 **Result: No overflow:hidden clipping detected, no text truncation overflow detected on the monitoring page (last page tested).**
 Note: The test scans the current page's DOM after navigating through pages. Some truncation exists (`InstanceCard.tsx`, `RepositoryItem.tsx`, etc.) but all uses are intentional and include `title` tooltip fallbacks.
 ---
 ## 10. CSS Pattern Summary
 ### Overflow Patterns Used in Codebase
 | Pattern | Used For | Risk |
 |---------|----------|------|
 | `overflow-hidden` | Card containers, modal wrappers, progress bars | Low — decorative/structural |
 | `overflow-y-auto` | Scrollable content areas (sidebar nav, modals, detail panes) | None — intentional scroll |
 | `overflow-x-auto` | Tabs, data tables | Low — scroll indicator needed |
 | `truncate` | Instance names, repository names, tags, button labels | Low — tooltips provided on most |
 | `whitespace-nowrap` | Tab items, table headers | Low — paired with overflow-x-auto |
 | `line-clamp-1` | Registry descriptions | None — CSS line clamp |
 ### Responsive Breakpoints Used
 | Breakpoint | Usage |
 |-----------|-------|
 | `sm:` (640px) | Login form padding, cluster form layout, button layouts, tabs spacing |
 | `md:` (768px) | Grid columns (2-3 cols), card layouts, diagnostics modal |
 | `lg:` (1024px) | Two-column layouts, 4-col monitoring grids |
 | `xl:` (1280px) | 5 action button columns, 3-col tag grids |
 | `2xl:` (1536px) | Not used |
 ### Fixed Widths Checked
 No problematic fixed widths found. All layouts use `max-w-` constraints (`max-w-md`, `max-w-6xl`, `max-w-7xl`) rather than fixed pixel widths.
 ---
 ## 11. Page Refresh Behavior
 The SPA uses React Router. When navigating to authenticated routes:
 - `ProtectedRoute` component checks `isAuthenticated` and `isAllowed`
 - If not authenticated, users are redirected to login page (`/`)
 - After login, `navigate("/home", { replace: true })` navigates to home
 - Page refresh at any route should redirect to login if token is expired
 **Note:** The `/login` route path does not exist in the SPA router — login is handled by `AuthPage` rendered at the root `/` path when the user is unauthenticated.
 ---
 ## Final Verdict
 | Category | Score |
 |----------|-------|
 | Horizontal Overflow | ✅ No issues at any viewport |
 | Text Truncation | ⚠️ InstanceCard `<h3>` missing tooltip fallback |
 | Responsive Design | ✅ Proper breakpoints at sm/md/lg/xl |
 | Scroll Behavior | ✅ Sidebar and content areas properly scrollable |
 | Color Contrast | ⚠️ Login error text (red-400) fails WCAG AA |
 | Modal Layout | ✅ Body scroll lock + content scroll work correctly |
 | Page Refresh | ✅ Protected routes redirect to login |
 **Overall: PASS** — Two minor issues found (color contrast on login error text, missing tooltip on InstanceCard title), neither causing functional problems.
--- a/docs/test2-values-priority.md
+++ b/docs/test2-values-priority.md
@ -1,110 +0,0 @@
 # Test Report: values.yaml Override Priority
 **Date:** 2026-05-11
 **Tester:** test-user-c
 **Cluster:** dbf824f1-9962-4d8e-881e-870c75fdb6f5
 **Chart:** charts/vllm-serve:0.6.0
 **Namespace:** ocdp-u-test-c
 ---
 ## Test Results
 ### Method 1: `values` JSON field only (vllm-values-json)
 - **Deployment:** ✅ Success (status: pending-install, no errors)
 - **Submitted values:**
  ```json
  { "cpuRequest": 2, "gpuLimit": 1, "gpuMem": 10000, "memoryLimit": "4Gi" }
  ```
 - **Stored values (from API response):**
  ```json
  { "cpuRequest": 2, "gpuLimit": 1, "gpuMem": 10000, "memoryLimit": "4Gi" }
  ```
 - **Result:** Values were accepted and stored exactly as provided. No chart defaults were merged into the stored representation (e.g., `shmSize: "8Gi"` from chart defaults is absent).
 ### Method 2: `valuesYaml` string field only (vllm-values-yaml)
 - **Deployment:** ✅ Success (status: pending-install, no errors)
 - **Submitted valuesYaml:**
  ```yaml
  resources:
    cpuRequest: 4
    gpuLimit: 1
    gpuMem: 10000
    memoryLimit: "8Gi"
  model:
    huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
  ```
 - **Stored values (parsed and stored in DB):**
  ```json
  { "cpuRequest": 4, "gpuLimit": 1, "gpuMem": 10000, "memoryLimit": "8Gi" }
  ```
 - **Result:** The YAML string was correctly parsed into the structured `values` field in the database. YAML parsing works correctly.
 ### Method 3: Both `values` JSON AND `valuesYaml` with conflict (vllm-conflict-test)
 - **Deployment:** ✅ Success (status: pending-install, **no error or warning returned**)
 - **`values` JSON submitted:**
  ```json
  { "cpuRequest": 4, "memoryLimit": "8Gi", "huggingfaceName": "Qwen/Qwen2.5-0.5B-Instruct" }
  ```
 - **`valuesYaml` submitted:**
  ```yaml
  resources:
    cpuRequest: 2
    memoryLimit: "4Gi"
  model:
    huggingfaceName: "Qwen/Qwen2.5-7B-Instruct"
  ```
 - **Stored values:**
  ```json
  { "cpuRequest": 4, "gpuLimit": 1, "gpuMem": 10000, "memoryLimit": "8Gi", "huggingfaceName": "Qwen/Qwen2.5-0.5B-Instruct" }
  ```
 - **Result:** The `values` JSON field **won every conflict**. The `valuesYaml` values (`cpuRequest: 2`, `memoryLimit: "4Gi"`, `Qwen/Qwen2.5-7B-Instruct`) were completely overridden by the `values` JSON values (`cpuRequest: 4`, `memoryLimit: "8Gi"`, `Qwen/Qwen2.5-0.5B-Instruct`). No error or warning was presented to the user.
 ### Method 4: No values (chart defaults, vllm-defaults-test)
 - **Deployment:** ✅ Success (status: pending-install, no errors)
 - **Stored values:**
  ```json
  { "namespace": "ocdp-u-test-c" }
  ```
 - **Result:** Only the auto-injected `namespace` was stored. Chart defaults (`cpuRequest: 8`, `memoryLimit: "16Gi"`, etc.) are not stored in the API response — they are resolved at Helm deploy time.
 ---
 ## Key Findings
 ### 1. Override Priority Order (when both fields provided)
 | Priority | Source | Description |
 |----------|--------|-------------|
 | **Highest** | `values` JSON field | Structured JSON object in the request body |
 | **Lowest** | `valuesYaml` string field | Raw YAML string in the request body |
 | **Baseline** | Chart built-in `values.yaml` | Default values packaged in the Helm chart |
 ### 2. Conflict Resolution
 When both `values` and `valuesYaml` are provided with conflicting values:
 - **`values` JSON wins** — the structured JSON field takes priority over the YAML string
 - **No error or warning** is returned to the user
 - The system silently prefers the `values` JSON field
 ### 3. `gpuMem=10000` Behavior
 - The integer value `10000` was **accepted without issues** in both `values` JSON and `valuesYaml` formats
 - No normalization or unit conversion was applied (stored as-is: `10000`)
 - Consistent with the project convention that `nvidia.com/gpumem` is treated as a vendor integer MB scalar
 ### 4. All values are stored in a unified `values` field in the DB
 Both `values` JSON and `valuesYaml` inputs are converted to a single structured `values` JSON object in the database. The API response always returns the structured `values` field regardless of how the input was provided.
 ---
 ## Recommendations
 1. **Document the priority order** — users should know that when providing both `values` and `valuesYaml`, the `values` JSON field takes precedence and no error is raised.
 2. **Consider returning a warning** when both fields are provided with conflicting values, as silent override could cause confusion.
 3. **The naming convention** (`values` vs `valuesYaml`) can be misleading since both ultimately serve the same purpose. Consider deprecating one in the API to avoid ambiguity.
--- a/frontend/src/api/generated-orval/api.schemas.ts
+++ b/frontend/src/api/generated-orval/api.schemas.ts
@ -271,6 +271,7 @@ export interface GithubComOcdpClusterServiceInternalAdapterInputHttpDtoInstanceR
  name?: string;
  namespace?: string;
  registryId?: string;
  replicas?: number;
  repository?: string;
  revision?: number;
  /** 实例当前状态 */
--- a/frontend/src/api/index.ts
+++ b/frontend/src/api/index.ts
@ -3,7 +3,7 @@
 * Export configured API client, generated functions, and friendly aliases.
 */
-type AxiosOptions<T extends (...args: any) => any> = Parameters<T>[2];
+type AxiosOptions<T extends (...args: never[]) => unknown> = Parameters<T>[2];
 import {
  deleteClustersClusterId,
@ -143,7 +143,10 @@ export type CreateRegistryRequest = GeneratedCreateRegistryRequest;
 export type UpdateRegistryRequest = GeneratedUpdateRegistryRequest;
 export type RegistryHealthResponse = GeneratedRegistryHealthResponse;
-export type InstanceResponse = GeneratedInstanceResponse;
+export type InstanceResponse = GeneratedInstanceResponse & {
  ownerId?: string;
  ownerUsername?: string;
 };
 export type CreateInstanceRequest = GeneratedCreateInstanceRequest;
 export type UpdateInstanceRequest = GeneratedUpdateInstanceRequest;
 export type InstanceEntry = GeneratedInstanceEntry;
@ -216,6 +219,10 @@ export type NodeMetricsResponse = GeneratedNodeMetricsResponse;
 export const login = postAuthLogin;
 export const register = postAuthRegister;
 export const refreshAuth = postAuthRefresh;
 export const fetchAuthStatus = () =>
  customAxiosInstance<{ needsSetup: boolean; hasUsers: boolean }>({ url: "/auth/status", method: "GET" });
 export const setupInitialAdmin = (data: { username: string; password: string; email?: string }) =>
  customAxiosInstance<{ accessToken: string; refreshToken: string }>({ url: "/auth/setup", method: "POST", data });
 export const listUsers = () => customAxiosInstance<UserResponse[]>({ url: "/users", method: "GET" });
 export const createUser = (data: AdminCreateUserRequest) =>
  customAxiosInstance<UserResponse>({ url: "/users", method: "POST", data });
@ -237,6 +244,26 @@ export const getInstance = getClustersClusterIdInstancesInstanceId;
 export const updateInstance = putClustersClusterIdInstancesInstanceId;
 export const deleteInstance = deleteClustersClusterIdInstancesInstanceId;
 export const listInstanceEntries = getClustersClusterIdInstancesInstanceIdEntries;
 export const scaleInstance = (
  clusterId: string,
  instanceId: string,
  body: { replicas: number; workload?: string },
 ) => {
  return customAxiosInstance<{ instance: InstanceResponse; replicas: number; message: string }>({
    url: `/clusters/${encodeURIComponent(clusterId)}/instances/${encodeURIComponent(instanceId)}/scale`,
    method: "POST",
    data: body,
  });
 };
 export const getInstanceValuesDiff = (
  clusterId: string,
  instanceId: string,
 ) => {
  return customAxiosInstance<{ current: Record<string, unknown>; defaults: Record<string, unknown> }>({
    url: `/clusters/${encodeURIComponent(clusterId)}/instances/${encodeURIComponent(instanceId)}/values-diff`,
    method: "GET",
  });
 };
 export const getInstanceDiagnostics = (
  params: { clusterId: string; instanceId: string },
  options?: { tailLines?: number },
--- a/frontend/src/core/types/index.ts
+++ b/frontend/src/core/types/index.ts
@ -71,11 +71,66 @@ import type { ClusterMonitoring, ClusterMonitoringStatus, NodeMetricsResponse }
 export type NodeMetrics = NodeMetricsResponse;
 export interface UserResourceUsage {
  userId?: string;
  userName?: string;
  username?: string;
  namespace?: string;
  cpuUsed?: string;
  usedCpu?: string;
  cpuRequest?: string;
  cpuLimit?: string;
  memoryUsed?: string;
  usedMemory?: string;
  memoryRequest?: string;
  memoryLimit?: string;
  gpuUsed?: number;
  usedGpu?: number;
  gpuAllocated?: number;
  gpuAllocation?: number;
  gpuMemoryUsed?: string | number;
  usedGpuMemory?: string | number;
  gpuMemUsed?: string | number;
  gpuMemoryAllocated?: string | number;
  gpuMemAllocated?: string | number;
  podCount?: number;
  instanceCount?: number;
  cpuRequests?: string;
  cpuLimits?: string;
  memoryRequests?: string;
  memoryLimits?: string;
  gpuRequests?: number;
  gpuLimits?: number;
  gpuMemoryRequestsMb?: number;
  gpuMemoryLimitsMb?: number;
 }
 export interface ClusterMetrics extends ClusterMonitoring {
  /** Internal UI identifier (legacy) */
  id?: string;
  nodes?: NodeMetrics[];
  status?: ClusterMonitoringStatus | 'warning' | 'error';
  allocatedGpu?: number;
  allocatedGpuMemoryMb?: number;
  allocatedGpuMemoryMB?: number;
  gpuMemoryRequestsMb?: number;
  gpuMemoryLimitsMb?: number;
  gpuAllocated?: number;
  gpuAllocation?: number;
  cpuRequests?: string;
  cpuLimits?: string;
  memoryRequests?: string;
  memoryLimits?: string;
  totalGpuMemory?: string | number;
  usedGpuMemory?: string | number;
  gpuMemoryUsed?: string | number;
  totalGpuMem?: string | number;
  usedGpuMem?: string | number;
  userResources?: UserResourceUsage[];
  resourceUsageByUser?: UserResourceUsage[];
  userResourceUsage?: UserResourceUsage[];
  resourcesByUser?: UserResourceUsage[];
  userResourceRows?: UserResourceUsage[];
 }
 // ==================== Common Types ====================
--- a/frontend/src/features/artifact/instances/components/InstanceCard.tsx
+++ b/frontend/src/features/artifact/instances/components/InstanceCard.tsx
@ -1,27 +1,17 @@
 /**
- * Instance Card Component
+ * Instance Card Component — horizontal row layout
- * Display instance information with action buttons
+ * Compact, readable, with inline scale controls and action buttons
 */
 import React from "react";
 import {
-  Package,
+  Box, Settings, StopCircle, CheckCircle, XCircle, Clock,
-  Settings,
+  Network, Activity, GitBranch, Layers, User,
-  StopCircle,
+  AlertTriangle, HelpCircle, Minus, Plus, Loader2,
  CheckCircle,
  XCircle,
  Clock,
  Network,
  Activity,
  Box,
  Calendar,
  GitBranch,
  Layers,
  AlertTriangle,
  History,
  HelpCircle,
 } from "lucide-react";
-import type { InstanceResponse, InstanceStatus } from "@/api";
+import type { InstanceResponse } from "@/api";
-import { INSTANCE_LAST_OPERATION, INSTANCE_STATUS } from "@/api";
+import { scaleInstance } from "@/api";
 import { useToast } from "@/shared";
 import { formatApiError } from "@/shared/utils";
 interface InstanceCardProps {
  instance: InstanceResponse;
@ -29,293 +19,266 @@ interface InstanceCardProps {
  onTerminate: (instance: InstanceResponse) => void;
  onViewEntries: (instance: InstanceResponse) => void;
  onViewDiagnostics: (instance: InstanceResponse) => void;
  onScale?: (instance: InstanceResponse) => void;
 }
 type StatusVisual = {
  icon: React.ComponentType<{ className?: string }>;
  color: string;
  bg: string;
-  glow: string;
+  border: string;
  label: string;
  defaultReason: string;
 };
-const STATUS_INFO_MAP: Record<InstanceStatus, StatusVisual> = {
+const STATUS_INFO_MAP: Record<string, StatusVisual> = {
-  [INSTANCE_STATUS.deployed]: {
+  deployed: {
    icon: CheckCircle,
-    color: "text-emerald-400",
+    color: "text-emerald-500",
-    bg: "bg-gradient-to-r from-emerald-500/20 to-green-500/20 border-emerald-500/40",
+    bg: "bg-emerald-50",
-    glow: "shadow-emerald-500/20",
+    border: "border-emerald-400",
    label: "Deployed",
    defaultReason: "Deployment completed successfully.",
  },
-  [INSTANCE_STATUS.failed]: {
+  failed: {
    icon: XCircle,
-    color: "text-rose-400",
+    color: "text-rose-500",
-    bg: "bg-gradient-to-r from-rose-500/20 to-red-500/20 border-rose-500/40",
+    bg: "bg-rose-50",
-    glow: "shadow-rose-500/20",
+    border: "border-rose-400",
    label: "Failed",
    defaultReason: "Last operation reported a failure.",
  },
-  [INSTANCE_STATUS["pending-install"]]: {
+  "pending-install": {
    icon: Clock,
-    color: "text-amber-400",
+    color: "text-amber-500",
-    bg: "bg-gradient-to-r from-amber-500/20 to-yellow-500/20 border-amber-500/40",
+    bg: "bg-amber-50",
-    glow: "shadow-amber-500/20",
+    border: "border-amber-400",
    label: "Pending Install",
    defaultReason: "Installation is in progress.",
  },
-  [INSTANCE_STATUS["pending-upgrade"]]: {
+  "pending-upgrade": {
    icon: Clock,
-    color: "text-amber-400",
+    color: "text-amber-500",
-    bg: "bg-gradient-to-r from-amber-500/20 to-yellow-500/20 border-amber-500/40",
+    bg: "bg-amber-50",
-    glow: "shadow-amber-500/20",
+    border: "border-amber-400",
    label: "Pending Upgrade",
    defaultReason: "Upgrade is in progress.",
  },
-  [INSTANCE_STATUS["pending-rollback"]]: {
+  "pending-rollback": {
    icon: Clock,
-    color: "text-amber-400",
+    color: "text-amber-500",
-    bg: "bg-gradient-to-r from-amber-500/20 to-yellow-500/20 border-amber-500/40",
+    bg: "bg-amber-50",
-    glow: "shadow-amber-500/20",
+    border: "border-amber-400",
    label: "Pending Rollback",
    defaultReason: "Rollback is in progress.",
  },
-  [INSTANCE_STATUS["pending-delete"]]: {
+  "pending-delete": {
    icon: Clock,
-    color: "text-orange-400",
+    color: "text-orange-500",
-    bg: "bg-gradient-to-r from-orange-500/20 to-red-500/20 border-orange-500/40",
+    bg: "bg-orange-50",
-    glow: "shadow-orange-500/20",
+    border: "border-orange-400",
    label: "Pending Delete",
    defaultReason: "Deletion is in progress.",
  },
-  [INSTANCE_STATUS.superseded]: {
+  superseded: {
-    icon: History,
+    icon: Layers,
-    color: "text-indigo-300",
+    color: "text-indigo-400",
-    bg: "bg-gradient-to-r from-indigo-500/20 to-purple-500/20 border-indigo-500/40",
+    bg: "bg-indigo-50",
-    glow: "shadow-indigo-500/20",
+    border: "border-indigo-300",
    label: "Superseded",
    defaultReason: "A newer revision has replaced this instance.",
  },
-  [INSTANCE_STATUS.uninstalled]: {
+  uninstalled: {
    icon: StopCircle,
-    color: "text-slate-700",
+    color: "text-slate-500",
-    bg: "bg-gradient-to-r from-slate-500/20 to-gray-500/20 border-slate-300/40",
+    bg: "bg-slate-50",
-    glow: "shadow-slate-500/20",
+    border: "border-slate-300",
    label: "Uninstalled",
    defaultReason: "Instance has been removed from the cluster.",
  },
-  [INSTANCE_STATUS.unknown]: {
+  unknown: {
    icon: HelpCircle,
-    color: "text-slate-700",
+    color: "text-slate-400",
-    bg: "bg-gradient-to-r from-slate-500/20 to-gray-500/20 border-slate-300/40",
+    bg: "bg-slate-50",
-    glow: "shadow-slate-500/20",
+    border: "border-slate-300",
    label: "Unknown",
    defaultReason: "Awaiting next state update.",
  },
 };
 const LAST_OPERATION_LABELS: Record<string, string> = {
  [INSTANCE_LAST_OPERATION.install]: "Install",
  [INSTANCE_LAST_OPERATION.upgrade]: "Upgrade",
  [INSTANCE_LAST_OPERATION.rollback]: "Rollback",
  [INSTANCE_LAST_OPERATION.delete]: "Delete",
  [INSTANCE_LAST_OPERATION.sync]: "Sync",
 };
 function toTitleCase(value: string): string {
  return value
    .split(/[\s-]+/)
    .map((part) => part.charAt(0).toUpperCase() + part.slice(1))
    .join(" ");
 }
 export const InstanceCard: React.FC<InstanceCardProps> = ({
  instance,
  onModify,
  onTerminate,
  onViewEntries,
  onViewDiagnostics,
  onScale,
 }) => {
-  const normalizedStatus = (instance.status ?? INSTANCE_STATUS.unknown) as InstanceStatus;
+  const [scaling, setScaling] = React.useState(false);
-  const statusInfo =
+  const { error: toastError } = useToast();
-    STATUS_INFO_MAP[normalizedStatus] ?? STATUS_INFO_MAP[INSTANCE_STATUS.unknown];
+
  const statusKey = instance.status ?? "unknown";
  const statusInfo = STATUS_INFO_MAP[statusKey] ?? STATUS_INFO_MAP["unknown"];
  const StatusIcon = statusInfo.icon;
-  const statusLabel = statusInfo.label.toUpperCase();
+
-  const instanceName = instance.name || "Unnamed Instance";
+  const instanceName = instance.name || "Unnamed";
-  const repository = instance.repository || "unknown";
+  const chart = instance.chart || instance.repository || "—";
-  const version = instance.version || "latest";
+  const version = instance.version || "—";
  const namespace = instance.namespace || "default";
-  const revision = instance.revision ?? "-";
+  const revision = instance.revision ?? "—";
-  const createdAtText = instance.createdAt
+  const ownerLabel = ownerDisplayName(instance.ownerUsername, instance.ownerId);
-    ? new Date(instance.createdAt).toLocaleDateString()
+
-    : "N/A";
+  const currentReplicas: number = instance.replicas ?? 0;
  const statusReason =
-    typeof instance.statusReason === "string" && instance.statusReason.trim().length > 0
+    typeof instance.statusReason === "string" && instance.statusReason.trim()
      ? instance.statusReason.trim()
      : statusInfo.defaultReason;
  const rawOperation =
    typeof instance.lastOperation === "string" ? instance.lastOperation.trim() : "";
  const lastOperationLabel =
    rawOperation.length > 0
      ? LAST_OPERATION_LABELS[rawOperation] ?? toTitleCase(rawOperation)
      : null;
  const lastError =
-    typeof instance.lastError === "string" ? instance.lastError.trim() : "";
+    typeof instance.lastError === "string" && instance.lastError.trim()
      ? instance.lastError.trim()
      : "";
  const canScale = instance.status === "deployed" || instance.status === "failed";
  const handleScale = async (delta: number) => {
    const newReplicas = Math.max(0, currentReplicas + delta);
    if (newReplicas === currentReplicas) return;
    if (!instance.clusterId || !instance.id) return;
    setScaling(true);
    try {
      const result = await scaleInstance(instance.clusterId, instance.id, {
        replicas: newReplicas,
      });
      onScale?.(result.instance ?? { ...instance, replicas: newReplicas });
    } catch (err) {
      toastError(formatApiError(err) || "Scale failed");
    } finally {
      setScaling(false);
    }
  };
  return (
-    <div className="group relative bg-gradient-to-br from-white via-white to-slate-50 border border-slate-200 rounded-xl hover:border-blue-500/50 hover:shadow-xl hover:shadow-blue-500/10 transition-all duration-300 overflow-hidden">
+    <div className="hover-lift group relative bg-white border border-slate-200 rounded-lg flex flex-col gap-3 px-4 py-3 transition-all lg:flex-row lg:items-center">
-      {/* Decorative gradient overlay */}
+      {/* Left color bar (status) */}
-      <div className="absolute top-0 right-0 w-64 h-64 bg-gradient-to-br from-blue-500/5 to-purple-500/5 rounded-full blur-3xl -z-0 opacity-0 group-hover:opacity-100 transition-opacity duration-500"></div>
+      <div className={`self-stretch w-1 rounded-full flex-shrink-0 ${statusInfo.bg} border ${statusInfo.border}`} />
      {/* Header with enhanced design */}
      <div className="relative px-6 py-5 border-b border-slate-200 bg-gradient-to-r from-slate-50 to-white">
        <div className="flex items-start justify-between">
          <div className="flex items-start gap-4 flex-1">
            {/* Enhanced icon with glow effect */}
            <div className="relative p-3 bg-gradient-to-br from-blue-500/20 to-cyan-500/20 rounded-xl border border-blue-500/30 shadow-lg shadow-blue-500/20 group-hover:shadow-blue-500/40 transition-shadow duration-300">
              <Box className="w-7 h-7 text-blue-400" />
              <div className="absolute inset-0 bg-blue-400/10 rounded-xl blur-sm"></div>
            </div>
            <div className="flex-1 min-w-0">
              <h3 className="text-xl font-bold text-slate-950 truncate">
                {instanceName}
              </h3>
              <div className="flex items-center gap-2 mt-2">
                <Package className="w-4 h-4 text-slate-500" />
                <p className="text-sm text-slate-500 font-mono">
                  {repository}
                </p>
                <span className="text-slate-600">•</span>
                <span className="px-2 py-0.5 text-xs font-semibold text-cyan-400 bg-cyan-500/10 border border-cyan-500/30 rounded">
                  {version}
                </span>
              </div>
            </div>
          </div>
-          {/* Enhanced Status Badge with glow */}
+      {/* Status icon + label */}
-          <div
+      <div className="flex items-center gap-1.5 flex-shrink-0 min-w-[90px]">
-            className={`flex items-center gap-2 px-4 py-2 rounded-full border shadow-lg ${statusInfo.bg} ${statusInfo.glow} backdrop-blur-sm`}
+        <StatusIcon className={`w-4 h-4 ${statusInfo.color}`} />
-          >
+        <span className={`text-xs font-semibold ${statusInfo.color}`}>{statusInfo.label}</span>
-            <StatusIcon className={`w-4 h-4 ${statusInfo.color}`} />
+      </div>
            <span className={`text-sm font-semibold ${statusInfo.color} uppercase tracking-wide`}>
              {statusLabel}
            </span>
          </div>
        </div>
-        <div className="mt-4 flex flex-col gap-1 text-sm text-slate-700">
+      {/* Name + Chart info */}
-          <span className="font-medium text-slate-700">{statusReason}</span>
+      <div className="w-full min-w-0 flex-1 flex items-center gap-4 lg:w-auto">
-          {lastOperationLabel && (
+        <div className="min-w-0">
-            <span className="text-xs uppercase tracking-wide text-slate-500">
+          <h4 className="text-sm font-semibold text-slate-900 truncate">{instanceName}</h4>
-              Operation: {lastOperationLabel}
+          <div className="flex flex-wrap items-center gap-x-3 gap-y-1 text-xs text-slate-500 mt-0.5">
            <span className="flex items-center gap-1">
              <Box className="w-3 h-3" />
              <span className="truncate max-w-[200px]">{chart}:{version}</span>
            </span>
-          )}
+            <span className="flex items-center gap-1">
              <Layers className="w-3 h-3" />
              {namespace}
            </span>
            <span className="flex items-center gap-1">
              <GitBranch className="w-3 h-3" />
              rev{revision}
            </span>
            {ownerLabel && (
              <span className="flex min-w-0 items-center gap-1">
                <User className="w-3 h-3" />
                <span className="truncate max-w-[120px]">{ownerLabel}</span>
              </span>
            )}
          </div>
        </div>
      </div>
-      {/* Enhanced Content Grid */}
+      {/* Status message or error */}
-      <div className="relative px-6 py-5 space-y-4 bg-gradient-to-b from-white to-slate-50">
+      {(statusReason || lastError) && (
-        <div className="grid grid-cols-2 gap-4">
+        <div className="hidden xl:block flex-1 min-w-0 max-w-[280px]">
-          {/* Namespace */}
+          {lastError ? (
-          <div className="p-3 bg-white border border-slate-200 rounded-lg hover:border-purple-500/30 transition-colors">
+            <p className="text-xs text-rose-600 truncate flex items-center gap-1">
-            <div className="flex items-center gap-2 mb-2">
+              <AlertTriangle className="w-3 h-3 flex-shrink-0" />
-              <Layers className="w-4 h-4 text-purple-400" />
+              {lastError}
              <p className="text-xs text-slate-500 uppercase font-semibold tracking-wider">Namespace</p>
            </div>
            <p className="text-sm font-bold text-slate-900">
              {namespace}
            </p>
-          </div>
+          ) : statusReason ? (
-          
+            <p className="text-xs text-slate-500 truncate">{statusReason}</p>
-          {/* Revision */}
+          ) : null}
          <div className="p-3 bg-white border border-slate-200 rounded-lg hover:border-green-500/30 transition-colors">
            <div className="flex items-center gap-2 mb-2">
              <GitBranch className="w-4 h-4 text-green-400" />
              <p className="text-xs text-slate-500 uppercase font-semibold tracking-wider">Revision</p>
            </div>
            <p className="text-sm font-bold text-slate-900">
              {revision}
            </p>
          </div>
          {/* Repository - Full Width */}
          <div className="col-span-2 p-3 bg-white border border-slate-200 rounded-lg hover:border-blue-500/30 transition-colors">
            <div className="flex items-center gap-2 mb-2">
              <Package className="w-4 h-4 text-blue-400" />
              <p className="text-xs text-slate-500 uppercase font-semibold tracking-wider">Repository</p>
            </div>
            <p className="text-sm font-mono text-slate-900 truncate" title={repository}>
              {repository}
            </p>
          </div>
          {/* Launched Date - Full Width */}
          <div className="col-span-2 p-3 bg-white border border-slate-200 rounded-lg hover:border-amber-500/30 transition-colors">
            <div className="flex items-center gap-2 mb-2">
              <Calendar className="w-4 h-4 text-amber-400" />
              <p className="text-xs text-slate-500 uppercase font-semibold tracking-wider">Launched</p>
            </div>
            <p className="text-sm font-bold text-slate-900">
              {createdAtText}
            </p>
          </div>
        </div>
      )}
-        {lastError && (
+      {/* Scale controls */}
-          <div className="flex items-start gap-3 p-4 border border-rose-500/30 bg-rose-500/10 rounded-lg">
+      <div className="flex w-full items-center justify-end gap-0.5 flex-shrink-0 lg:w-auto">
-            <div className="p-2 bg-rose-500/20 rounded-lg border border-rose-500/40">
+        {canScale ? (
-              <AlertTriangle className="w-5 h-5 text-rose-700" />
+          <>
-            </div>
+            <button
-            <div>
+              onClick={() => handleScale(-1)}
-              <p className="text-sm font-semibold text-rose-200">Last error</p>
+              disabled={scaling || currentReplicas <= 0}
-              <p className="text-sm text-rose-100/90">{lastError}</p>
+              className="p-1 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed transition-colors"
-            </div>
+              title="Scale down"
-          </div>
+            >
              {scaling ? (
                <Loader2 className="w-3.5 h-3.5 text-slate-400 animate-spin" />
              ) : (
                <Minus className="w-3.5 h-3.5 text-slate-500" />
              )}
            </button>
            <span className="w-8 text-center text-sm font-bold text-slate-700 tabular-nums">
              {currentReplicas}
            </span>
            <button
              onClick={() => handleScale(1)}
              disabled={scaling}
              className="p-1 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed transition-colors"
              title="Scale up"
            >
              <Plus className="w-3.5 h-3.5 text-slate-500" />
            </button>
          </>
        ) : (
          <span className="text-xs text-slate-400 w-16 text-center">{currentReplicas} repl.</span>
        )}
      </div>
-      {/* Enhanced Actions Bar */}
+      {/* Action buttons */}
-      <div className="relative px-6 py-4 bg-gradient-to-r from-slate-50 via-slate-50 to-white border-t border-slate-200 backdrop-blur-sm">
+      <div className="flex w-full flex-wrap items-center justify-end gap-1.5 flex-shrink-0 lg:w-auto">
-        <div className="grid grid-cols-2 gap-2 md:grid-cols-2 xl:grid-cols-4">
+        <button
-            <button
+          onClick={() => onViewEntries(instance)}
-              onClick={() => onViewEntries(instance)}
+          className="flex items-center gap-1 px-2 py-1.5 rounded-md text-slate-500 hover:text-blue-600 hover:bg-blue-50 transition-colors text-xs font-medium"
-              className="group/btn inline-flex min-w-0 items-center justify-center gap-2 rounded-lg border border-emerald-500/40 bg-emerald-50 px-3 py-2.5 text-sm font-semibold text-emerald-700 transition-all duration-200 hover:border-emerald-500/60 hover:bg-emerald-100 hover:shadow-lg"
+          title="Entries"
-              title="View service entries"
+        >
-            >
+          <Network className="w-3.5 h-3.5" />
-              <Network className="w-4 h-4 group-hover/btn:scale-110 transition-transform" />
+          <span className="hidden sm:inline">Entries</span>
-              <span className="truncate">Entries</span>
+        </button>
-            </button>
+        <button
-            <button
+          onClick={() => onViewDiagnostics(instance)}
-              onClick={() => onViewDiagnostics(instance)}
+          className="flex items-center gap-1 px-2 py-1.5 rounded-md text-slate-500 hover:text-amber-600 hover:bg-amber-50 transition-colors text-xs font-medium"
-              className="group/btn inline-flex min-w-0 items-center justify-center gap-2 rounded-lg border border-indigo-200 bg-indigo-50 px-3 py-2.5 text-sm font-semibold text-indigo-700 transition-all duration-200 hover:border-indigo-300 hover:bg-indigo-100 hover:shadow-lg"
+          title="Diagnostics"
-              title="View describe, events, and pod logs"
+        >
-            >
+          <Activity className="w-3.5 h-3.5" />
-              <Activity className="w-4 h-4 group-hover/btn:scale-110 transition-transform" />
+          <span className="hidden sm:inline">Diag</span>
-              <span className="truncate">Diagnostics</span>
+        </button>
-            </button>
+        <button
-
+          onClick={() => onModify(instance)}
-            <button
+          className="flex items-center gap-1 px-2 py-1.5 rounded-md text-slate-500 hover:text-indigo-600 hover:bg-indigo-50 transition-colors text-xs font-medium"
-              onClick={() => onModify(instance)}
+          title="Modify"
-              className="group/btn inline-flex min-w-0 items-center justify-center gap-2 rounded-lg border border-blue-500/40 bg-blue-50 px-3 py-2.5 text-sm font-semibold text-blue-700 transition-all duration-200 hover:border-blue-500/60 hover:bg-blue-100 hover:shadow-lg"
+        >
-              title="Modify instance configuration"
+          <Settings className="w-3.5 h-3.5" />
-            >
+          <span className="hidden sm:inline">Modify</span>
-              <Settings className="w-4 h-4 group-hover/btn:rotate-90 transition-transform duration-300" />
+        </button>
-              <span className="truncate">Modify</span>
+        <button
-            </button>
+          onClick={() => onTerminate(instance)}
-
+          className="flex items-center gap-1 px-2 py-1.5 rounded-md text-slate-500 hover:text-rose-600 hover:bg-rose-50 transition-colors text-xs font-medium"
-            <button
+          title="Delete"
-              onClick={() => onTerminate(instance)}
+        >
-              className="group/btn inline-flex min-w-0 items-center justify-center gap-2 rounded-lg border border-rose-500/40 bg-red-50 px-3 py-2.5 text-sm font-semibold text-rose-700 transition-all duration-200 hover:border-rose-500/60 hover:bg-rose-100 hover:shadow-lg"
+          <StopCircle className="w-3.5 h-3.5" />
-              title="Terminate instance"
+          <span className="hidden sm:inline">Delete</span>
-            >
+        </button>
              <StopCircle className="w-4 h-4 group-hover/btn:scale-110 transition-transform" />
              <span className="truncate">Delete</span>
            </button>
        </div>
      </div>
    </div>
  );
 };
 const ownerDisplayName = (ownerUsername?: string, ownerId?: string): string => {
  const username = ownerUsername?.trim();
  if (username) return username;
  const id = ownerId?.trim();
  if (!id) return "";
  if (id.length <= 12) return id;
  return `${id.slice(0, 8)}...${id.slice(-4)}`;
 };
--- a/frontend/src/features/artifact/instances/components/ModifyModal.tsx
+++ b/frontend/src/features/artifact/instances/components/ModifyModal.tsx
@ -7,19 +7,17 @@ import React, { useState, useEffect } from "react";
 import { Settings } from "lucide-react";
 import { parse as parseYaml, stringify as stringifyYaml } from "yaml";
 import type { InstanceResponse, UpdateInstanceRequest } from "@/api";
-import { getValuesSchema } from "@/api";
+import { getInstance, getInstanceValuesDiff } from "@/api";
-import { 
+import {
-  Modal, 
+  Modal,
-  Button, 
+  Button,
-  FormField, 
+  FormField,
-  Input, 
+  Input,
  Textarea,
  ErrorState,
  LoadingState,
  Badge,
  SchemaFormGenerator
 } from "@/shared/components";
 import type { JsonSchema } from "@/shared/components/form/SchemaFormGenerator";
 interface ModifyModalProps {
  instance: InstanceResponse;
@ -37,72 +35,109 @@ export const ModifyModal: React.FC<ModifyModalProps> = ({
  const [valuesYaml, setValuesYaml] = useState("");
  const [loading, setLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);
-  
+  const [modifiedKeys, setModifiedKeys] = useState<string[]>([]);
  // Values Schema support
  const [loadingSchema, setLoadingSchema] = useState(false);
  const [valuesSchema, setValuesSchema] = useState<JsonSchema | null>(null);
  const [inputMethod, setInputMethod] = useState<'form' | 'yaml'>('yaml');
  const [formValues, setFormValues] = useState<Record<string, any>>({});
-  // Initialize with current values
+  // Values Diff support
  const [showDiff, setShowDiff] = useState(false);
  const [loadingDiff] = useState(false);
  const [diffData, setDiffData] = useState<{
    current: Record<string, any>;
    defaults: Record<string, any>;
  } | null>(null);
  const [diffError] = useState<string | null>(null);
  // Fetch full Helm values (via values-diff API) and instance detail
  useEffect(() => {
    setTag(instance.version || "");
-    setDescription(""); // InstanceResponse doesn't have description field
+    setDescription("");
    // Parse existing values
    if (instance.values) {
      try {
        const parsedValues = typeof instance.values === 'string' 
          ? JSON.parse(instance.values) 
          : instance.values;
        setFormValues(parsedValues);
        setValuesYaml(typeof parsedValues === 'object' ? stringifyYaml(parsedValues) : String(parsedValues));
      } catch (err) {
        console.error('[ModifyModal] Failed to parse existing values:', err);
        setValuesYaml(String(instance.values) || "");
      }
    }
-    // Load values schema
+    const loadData = async () => {
-    loadValuesSchema();
+      if (instance.clusterId && instance.id) {
        // Load values diff first — gives us the full current Helm values
        try {
          const data = await getInstanceValuesDiff(instance.clusterId, instance.id);
          if (data?.current && Object.keys(data.current).length > 0) {
            const currentYaml = stringifyYaml(data.current, { lineWidth: 0 });
            setValuesYaml(currentYaml);
            setDiffData({ current: data.current, defaults: data.defaults ?? {} });
          }
        } catch (err) {
          console.error('[ModifyModal] Failed to load values diff:', err);
          // Fallback: try instance detail
          try {
            const detail = await getInstance({ clusterId: instance.clusterId, instanceId: instance.id });
            if (detail.values && Object.keys(detail.values).length > 0) {
              const y = stringifyYaml(detail.values, { lineWidth: 0 });
              setValuesYaml(y);
            }
          } catch (err2) {
            console.error('[ModifyModal] Failed to load instance detail:', err2);
          }
        }
      }
    };
    loadData();
  }, [instance]);
-  const loadValuesSchema = async () => {
+  // Recompute modified keys when valuesYaml or diffData changes
-    if (!instance.registryId || !instance.repository || !instance.version) {
+  useEffect(() => {
-      setValuesSchema(null);
+    if (!diffData?.defaults || !valuesYaml) return;
      setInputMethod('yaml');
      return;
    }
    setLoadingSchema(true);
    try {
-      const schemaResponse = await getValuesSchema({ 
+      const current = parseYaml(valuesYaml);
-        registryId: instance.registryId, 
+      const defaults = diffData.defaults;
-        repositoryName: instance.repository, 
+      const changed: string[] = [];
-        reference: instance.version, 
+      const walkKeys = (curr: any, def: any, prefix: string) => {
-      });
+        if (curr === null || curr === undefined) return;
-      const normalizedSchema = extractJsonSchema(schemaResponse);
+        if (typeof curr !== 'object') return;
-      setValuesSchema(normalizedSchema);
+        for (const key of Object.keys(curr)) {
-      
+          const fullKey = prefix ? `${prefix}.${key}` : key;
-      if (normalizedSchema) {
+          if (JSON.stringify(curr[key]) !== JSON.stringify(def?.[key])) {
-        setInputMethod('form');
+            changed.push(fullKey);
-        console.log(`[ModifyModal] Loaded values schema with ${Object.keys(normalizedSchema.properties ?? {}).length} properties`);
+          }
-      } else {
+          if (typeof curr[key] === 'object' && curr[key] !== null && !Array.isArray(curr[key])) {
-        setInputMethod('yaml');
+            walkKeys(curr[key], def?.[key] ?? {}, fullKey);
-        console.log('[ModifyModal] No values schema available, using YAML input');
+          }
-      }
+        }
-    } catch (err) {
+      };
-      console.error('[ModifyModal] Failed to load values schema:', err);
+      walkKeys(current, defaults, '');
-      setValuesSchema(null);
+      setModifiedKeys(changed);
-      setInputMethod('yaml');
+    } catch { /* ignore parse errors */ }
-    } finally {
+  }, [valuesYaml, diffData]);
-      setLoadingSchema(false);
+
-    }
+
  const applyDefaults = () => {
    if (!diffData?.defaults) return;
    setValuesYaml(stringifyYaml(diffData.defaults, { lineWidth: 0 }));
  };
-  const handleFormValuesChange = (values: Record<string, any>) => {
+  /**
-    setFormValues(values);
+   * Render a values object as YAML lines, bolding keys that differ from defaults.
-    setValuesYaml(stringifyYaml(values));
+   */
  const renderDiffValues = (
    values: Record<string, any>,
    compare: Record<string, any>,
  ): React.ReactNode => {
    const yaml = stringifyYaml(values);
    const lines = yaml.split("\n");
    return lines.map((line, i) => {
      // Extract the key name from a YAML line
      const keyMatch = line.match(/^(\s*)([a-zA-Z_][\w-]*)\s*:/);
      if (keyMatch) {
        const key = keyMatch[2];
        const keyChanged =
          compare[key] !== undefined &&
          JSON.stringify(values[key]) !== JSON.stringify(compare[key]);
        if (keyChanged) {
          return (
            <span key={i} className="block">
              {keyMatch[1]}<strong className="text-amber-600 dark:text-amber-400">{key}</strong>:{line.slice(keyMatch[0].length)}
            </span>
          );
        }
      }
      return <span key={i} className="block">{line}</span>;
    });
  };
  const handleSubmit = async (e: React.FormEvent) => {
@ -111,10 +146,12 @@ export const ModifyModal: React.FC<ModifyModalProps> = ({
    setError(null);
    try {
      if (valuesYaml.trim()) {
        parseValuesYaml(valuesYaml);
      }
      const payload: UpdateInstanceRequest = {
        version: tag && tag !== instance.version ? tag : undefined,
        description: description.trim() || undefined,
        values: valuesYaml.trim() ? parseValuesYaml(valuesYaml) : undefined,
        valuesYaml: valuesYaml.trim() || undefined,
      };
@ -211,60 +248,103 @@ export const ModifyModal: React.FC<ModifyModalProps> = ({
          />
        </FormField>
-        {/* Values Configuration */}
+        {/* Current Values — directly editable as YAML */}
-        <div className="space-y-3">
+        <div className="space-y-2">
-          <div className="flex items-center justify-between">
+          <label className="block text-sm font-medium text-slate-700">
-            <label className="block text-sm font-medium text-slate-700">
+            Configuration Values
-              Configuration Values
+          </label>
-            </label>
+          <p className="text-xs text-slate-500">
-            {valuesSchema?.properties && (
+            Editing current deployed values. The full YAML is submitted so nested chart values stay intact.
-              <div className="flex gap-2">
+          </p>
-                <button
+          {modifiedKeys.length > 0 && (
-                  type="button"
+            <div className="flex flex-wrap items-center gap-1.5 text-xs">
-                  onClick={() => setInputMethod('form')}
+              <span className="text-slate-500">Modified:</span>
-                  className="cursor-pointer"
+              {modifiedKeys.map((k) => (
-                >
+                <span key={k} className="px-1.5 py-0.5 rounded bg-amber-100 text-amber-700 font-mono font-medium">
-                  <Badge
+                  {k}
-                    variant={inputMethod === 'form' ? 'success' : 'default'}
+                </span>
-                    size="sm"
+              ))}
-                  >
+            </div>
-                    Form
+          )}
-                  </Badge>
+          <Textarea
-                </button>
+            value={valuesYaml}
-                <button
+            onChange={(e) => setValuesYaml(e.target.value)}
-                  type="button"
+            rows={14}
-                  onClick={() => setInputMethod('yaml')}
+            placeholder="key: value&#10;nested:&#10;  key: value"
-                  className="cursor-pointer"
+            className="font-mono text-sm"
-                >
+          />
-                  <Badge
+        </div>
-                    variant={inputMethod === 'yaml' ? 'success' : 'default'}
+
-                    size="sm"
+        {/* Values Diff Section */}
-                  >
+        {instance.clusterId && instance.id && (
-                    YAML
+          <div className="border-t border-slate-200 pt-3">
-                  </Badge>
+            <button
-                </button>
+              type="button"
              onClick={() => {
                setShowDiff(!showDiff);
              }}
              className="flex items-center gap-2 text-sm font-medium text-indigo-600 hover:text-indigo-700 transition-colors"
            >
              <span>{showDiff ? "Hide" : "Show"} Values Diff</span>
              <span className={`text-xs transition-transform ${showDiff ? "rotate-180" : ""}`}>▼</span>
            </button>
            {showDiff && (
              <div className="mt-3 space-y-3">
                {loadingDiff && (
                  <LoadingState message="Loading values diff..." />
                )}
                {diffError && (
                  <ErrorState title="Diff Error" message={diffError} />
                )}
                {diffData && (
                  <>
                    <div className="grid grid-cols-2 gap-3">
                      {/* Current Values */}
                      <div>
                        <div className="flex items-center gap-2 mb-1">
                          <span className="text-xs font-semibold uppercase tracking-wider text-slate-500">
                            Current
                          </span>
                          <Badge variant="default" size="sm">deployed</Badge>
                        </div>
                        <pre className="text-xs font-mono bg-slate-50 border border-slate-200 rounded-lg p-3 max-h-64 overflow-auto whitespace-pre text-slate-700">
                          {renderDiffValues(diffData.current, diffData.defaults)}
                        </pre>
                      </div>
                      {/* Default Values */}
                      <div>
                        <div className="flex items-center gap-2 mb-1">
                          <span className="text-xs font-semibold uppercase tracking-wider text-slate-500">
                            Defaults
                          </span>
                          <Badge variant="info" size="sm">chart</Badge>
                        </div>
                        <pre className="text-xs font-mono bg-slate-50 border border-slate-200 rounded-lg p-3 max-h-64 overflow-auto whitespace-pre text-slate-500">
                          {renderDiffValues(diffData.defaults, diffData.current)}
                        </pre>
                      </div>
                    </div>
                    {/* Legend */}
                    <p className="text-xs text-slate-500">
                      <strong className="text-amber-600 dark:text-amber-400">Bold amber keys</strong> differ between current and default values.
                    </p>
                    {/* Use Defaults Button */}
                    <button
                      type="button"
                      onClick={applyDefaults}
                      className="inline-flex items-center gap-1.5 text-xs font-medium text-indigo-600 hover:text-indigo-700 bg-indigo-50 hover:bg-indigo-100 border border-indigo-200 rounded-lg px-3 py-1.5 transition-all"
                      title="Replace current values with chart defaults"
                    >
                      <Settings className="w-3.5 h-3.5" />
                      Use Defaults
                    </button>
                  </>
                )}
              </div>
            )}
          </div>
-
+        )}
          {loadingSchema ? (
            <LoadingState message="Loading configuration schema..." />
          ) : inputMethod === 'form' && valuesSchema ? (
            <SchemaFormGenerator
              schema={valuesSchema}
              values={formValues}
              onChange={handleFormValuesChange}
            />
          ) : (
            <Textarea
              value={valuesYaml}
              onChange={(e) => setValuesYaml(e.target.value)}
              rows={12}
              placeholder="key: value&#10;nested:&#10;  key: value"
              className="font-mono text-sm"
            />
          )}
        </div>
        <p className="text-xs text-slate-500">
          Update applies the selected chart version and values override. Resource readiness is tracked from the instance list after submit.
@ -274,42 +354,6 @@ export const ModifyModal: React.FC<ModifyModalProps> = ({
  );
 };
 const isJsonSchemaObject = (value: unknown): value is JsonSchema =>
  typeof value === "object" && value !== null && !Array.isArray(value);
 const extractJsonSchema = (schemaResponse: unknown): JsonSchema | null => {
  if (schemaResponse == null) {
    return null;
  }
  const tryParse = (value: unknown): unknown => {
    if (typeof value === "string") {
      try {
        return JSON.parse(value);
      } catch {
        return null;
      }
    }
    return value;
  };
  let candidate: unknown = tryParse(schemaResponse);
  if (candidate && typeof candidate === "object" && "schema" in (candidate as Record<string, unknown>)) {
    const inner = (candidate as { schema?: unknown }).schema;
    const normalizedInner = extractJsonSchema(inner);
    if (normalizedInner) {
      return normalizedInner;
    }
  }
  if (isJsonSchemaObject(candidate)) {
    return candidate as JsonSchema;
  }
  return null;
 };
 const parseValuesYaml = (source: string): Record<string, any> => {
  const parsed = parseYaml(source);
  if (parsed == null) {
--- a/frontend/src/features/artifact/instances/pages/InstancesManagementPage.tsx
+++ b/frontend/src/features/artifact/instances/pages/InstancesManagementPage.tsx
@ -188,6 +188,22 @@ const InstancesManagementPage: React.FC = () => {
    };
  }, [autoRefresh]);
  const handleScale = useCallback((updatedInstance: InstanceResponse) => {
    setInstancesByCluster((prev) => {
      const next = new Map(prev);
      for (const [clusterId, insts] of next) {
        const idx = insts.findIndex((i) => i.id === updatedInstance.id);
        if (idx !== -1) {
          const updated = [...insts];
          updated[idx] = updatedInstance;
          next.set(clusterId, updated);
          break;
        }
      }
      return next;
    });
  }, []);
  const handleModify = useCallback((instance: Instance) => {
    setModifyInstance(instance);
  }, []);
@ -421,7 +437,7 @@ const InstancesManagementPage: React.FC = () => {
                        </p>
                      </div>
                    </div>
-                    <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
+                    <div className="flex flex-col gap-2">
                      {instances.map((instance) => (
                        <InstanceCard
                          key={instance.id}
@ -429,6 +445,7 @@ const InstancesManagementPage: React.FC = () => {
                          onModify={handleModify}
                          onTerminate={handleTerminate}
                          onScale={handleScale}
                          onViewEntries={handleViewEntries}
                          onViewDiagnostics={handleViewDiagnostics}
                        />
@ -438,7 +455,7 @@ const InstancesManagementPage: React.FC = () => {
                );
              })
            ) : (
-              <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
+              <div className="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-4">
                {filteredInstances.map(({ instance }) => (
                <InstanceCard
                  key={instance.id}
--- a/frontend/src/features/artifact/registries/components/LaunchModal.tsx
+++ b/frontend/src/features/artifact/registries/components/LaunchModal.tsx
@ -222,7 +222,7 @@ export const LaunchModal: React.FC<LaunchModalProps> = ({
      valuesObj = pruneEmptyValues(valuesForm);
    } else if (inputMethod === "yaml" && valuesYaml.trim()) {
      try {
-        valuesObj = parseValuesYaml(valuesYaml);
+        parseValuesYaml(valuesYaml);
        normalizedValuesYaml = valuesYaml.trim();
      } catch (err) {
        console.error(err);
--- a/frontend/src/features/artifact/registries/components/TagCard.tsx
+++ b/frontend/src/features/artifact/registries/components/TagCard.tsx
@ -1,6 +1,6 @@
 /**
 * Tag Card Component
- * Simple card for displaying a single tag/artifact
+ * Card for displaying a single chart tag/artifact with Launch action
 */
 import React, { useState } from "react";
 import { Box, Copy, File, HardDrive, Package, Rocket } from "lucide-react";
@ -13,17 +13,16 @@ interface TagCardProps {
  registryId: string;
  registryUrl?: string;
  tag: ArtifactListItem;
  isLatest?: boolean;
 }
-export const TagCard: React.FC<TagCardProps> = ({ registryId, registryUrl, tag }) => {
+export const TagCard: React.FC<TagCardProps> = ({ registryId, registryUrl, tag, isLatest = false }) => {
  const { success } = useToast();
  const [launchModalOpen, setLaunchModalOpen] = useState(false);
  const category = inferArtifactCategory(tag);
  const handleLaunch = () => {
-    if (category !== "chart") {
+    if (category !== "chart") return;
      return;
    }
    setLaunchModalOpen(true);
  };
@ -42,103 +41,87 @@ export const TagCard: React.FC<TagCardProps> = ({ registryId, registryUrl, tag }
  const formatSize = (bytes: number) => {
    if (bytes === 0) return "N/A";
    const mb = bytes / (1024 * 1024);
-    if (mb < 1) {
+    if (mb < 1) return `${(bytes / 1024).toFixed(1)} KB`;
      return `${(bytes / 1024).toFixed(1)} KB`;
    }
    return `${mb.toFixed(1)} MB`;
  };
  const getTypeColor = (type: ArtifactCategory) => {
    switch (type) {
      case "chart":
-        return "text-blue-400 bg-blue-500/10 border-blue-500/30";
+        return "text-blue-500 bg-blue-500/10 border-blue-500/30";
      case "image":
-        return "text-green-400 bg-green-500/10 border-green-500/30";
+        return "text-green-500 bg-green-500/10 border-green-500/30";
      default:
        return "text-slate-500 bg-gray-500/10 border-gray-500/30";
    }
  };
  const getTypeIcon = (type: ArtifactCategory) => {
-    const className = "w-5 h-5";
+    const className = "w-4 h-4";
    switch (type) {
-      case "chart":
+      case "chart": return <Package className={className} />;
-        return <Package className={className} />;
+      case "image": return <Box className={className} />;
-      case "image":
+      default: return <File className={className} />;
        return <Box className={className} />;
      default:
        return <File className={className} />;
    }
  };
  return (
    <>
-      <div className="bg-white border border-slate-200 rounded-lg p-4 hover:border-brand-blue/50 transition-all group">
+      <div className="hover-lift relative bg-white border border-slate-200 rounded-lg p-4 min-w-[180px] hover:border-brand-blue/50 transition-all group">
-        <div className="flex items-start gap-3">
+        {/* LATEST badge */}
-          {/* Icon */}
+        {isLatest && (
-          <div className="flex-shrink-0">
+          <span className="absolute -top-2 right-2 px-2 py-0.5 rounded-full text-[11px] font-bold uppercase tracking-wider bg-emerald-500 text-white shadow-sm z-10">
-            <div className={`w-10 h-10 rounded-lg border ${getTypeColor(category)} 
+            LATEST
-              flex items-center justify-center text-lg`}>
+          </span>
-              {getTypeIcon(category)}
+        )}
-            </div>
+
        {/* Tag name + type */}
        <div className="flex items-center gap-2 mb-2">
          <div className={`w-7 h-7 rounded-md border ${getTypeColor(category)} flex items-center justify-center flex-shrink-0`}>
            {getTypeIcon(category)}
          </div>
          <h3 className="text-sm font-semibold text-slate-900 truncate flex-1" title={tag.tag}>
            {tag.tag || 'N/A'}
          </h3>
          <span className={`px-1.5 py-0.5 rounded text-[11px] border ${getTypeColor(category)} flex-shrink-0`}>
            {category}
          </span>
        </div>
-          {/* Content */}
+        {/* Repository path */}
-          <div className="flex-1 min-w-0">
+        <p className="text-xs text-slate-400 truncate mb-3" title={tag.repositoryName || ''}>
-            {/* Tag name */}
+          {tag.repositoryName || ' '}
-            <div className="flex items-center gap-2 mb-1">
+        </p>
              <Package className="w-4 h-4 text-blue-600 flex-shrink-0" />
              <h3 className="text-sm font-semibold text-slate-900 truncate">
                {tag.tag || 'N/A'}
              </h3>
              <span
                className={`px-2 py-0.5 rounded text-xs border ${getTypeColor(category)}`}
                title={tag.mediaType || tag.type || ''}
              >
                {category}
              </span>
            </div>
-            {/* Repository path */}
+        {/* Actions row */}
-            <p className="text-xs text-slate-500 truncate mb-2">
+        <div className="flex items-center justify-between gap-2">
-              {tag.repositoryName}
+          <div className="flex items-center gap-1.5 text-xs text-slate-400 flex-shrink-0">
-            </p>
+            <HardDrive className="w-3.5 h-3.5" />
-
+            <span>{formatSize(tag.size || 0)}</span>
            {/* Size */}
            <div className="flex items-center gap-2 text-xs text-slate-500">
              <HardDrive className="w-3.5 h-3.5" />
              <span>{formatSize(tag.size || 0)}</span>
            </div>
          </div>
-
+          <div className="flex items-center gap-2 flex-shrink-0">
-          {/* Actions */}
+            <button
-          <div className="flex-shrink-0 flex flex-col gap-2">
+              onClick={handleCopy}
              className="px-2.5 py-1.5 bg-white hover:bg-slate-50 text-slate-500 border border-slate-200 rounded text-xs transition-colors flex items-center gap-1"
              title="Copy pull command"
            >
              <Copy className="w-3.5 h-3.5" />
              <span>Copy</span>
            </button>
            {category === "chart" && (
              <button
                onClick={handleLaunch}
-                className="px-3 py-1.5 bg-blue-600 hover:bg-blue-700 text-white rounded
+                className="px-2.5 py-1.5 bg-blue-600 hover:bg-blue-700 text-white rounded text-xs font-medium transition-colors flex items-center gap-1"
                  text-xs font-medium transition-colors flex items-center gap-1.5"
                title="Launch this Helm chart"
              >
                <Rocket className="w-3.5 h-3.5" />
                <span>Launch</span>
              </button>
            )}
            <button
              onClick={handleCopy}
              className="px-3 py-1.5 bg-white hover:bg-slate-50 text-slate-700
                border border-slate-200 rounded text-xs transition-colors flex items-center gap-1.5"
              title="Copy pull command"
            >
              <Copy className="w-3.5 h-3.5" />
              <span>Copy</span>
            </button>
          </div>
        </div>
      </div>
      {/* Launch Modal */}
      {launchModalOpen && tag.repositoryName && tag.tag && (
        <LaunchModal
          isOpen={launchModalOpen}
@ -155,9 +138,6 @@ export const TagCard: React.FC<TagCardProps> = ({ registryId, registryUrl, tag }
 const normalizeRegistryHost = (url?: string) => {
  if (!url) return "";
-  try {
+  try { return new URL(url).host; }
-    return new URL(url).host;
+  catch { return url.replace(/^https?:\/\//, "").replace(/\/+$/, ""); }
  } catch {
    return url.replace(/^https?:\/\//, "").replace(/\/+$/, "");
  }
 };
--- a/frontend/src/features/artifact/registries/pages/ArtifactBrowserPage.tsx
+++ b/frontend/src/features/artifact/registries/pages/ArtifactBrowserPage.tsx
@ -11,6 +11,8 @@ import {
  Search,
  ChevronRight,
  ChevronDown,
  ChevronLeft,
  LayoutGrid,
 } from "lucide-react";
 import { useToast } from "@/shared";
 import {
@ -67,7 +69,9 @@ const ArtifactBrowserPage: React.FC = () => {
  const [artifactError, setArtifactError] = useState<string | null>(null);
  const [filter, setFilter] = useState<ListArtifactsFilter | undefined>("chart");
  const [sidebarCollapsed, setSidebarCollapsed] = useState(false);
  const [searchTerm, setSearchTerm] = useState("");
  const [tagSearchTerm, setTagSearchTerm] = useState("");
  const loadArtifacts = useCallback(
    async (
@ -249,6 +253,12 @@ const ArtifactBrowserPage: React.FC = () => {
      );
  }, [registryNodes, searchTerm]);
  const filteredArtifacts = useMemo(() => {
    const term = tagSearchTerm.trim().toLowerCase();
    if (!term) return artifacts;
    return artifacts.filter((a) => (a.tag || "").toLowerCase().includes(term));
  }, [artifacts, tagSearchTerm]);
  const selectedRegistryName = selectedRepository
    ? registryNodes.find((node) => node.registry.id === selectedRepository.registryId)?.registry
        .name
@ -283,120 +293,170 @@ const ArtifactBrowserPage: React.FC = () => {
        </div>
      </div>
      <div className="flex-1 flex overflow-hidden bg-slate-50">
-        <aside className="w-80 border-r border-slate-200 bg-white flex flex-col">
+        {/* Collapsible side panel */}
-          <div className="p-4 border-b border-slate-200 space-y-2">
+        <aside
-            <div className="relative">
+          className={`border-r border-slate-200 bg-white flex flex-col transition-all duration-200 ${
-              <Search className="absolute left-2 top-1/2 -translate-y-1/2 w-4 h-4 text-slate-500" />
+            sidebarCollapsed ? "w-12" : "w-80"
-              <input
+          }`}
-                type="text"
+        >
-                placeholder="Search registries / repositories..."
+          {sidebarCollapsed ? (
-                value={searchTerm}
+            /* Collapsed state: narrow strip with just a toggle */
-                onChange={(e) => setSearchTerm(e.target.value)}
+            <div className="flex flex-col items-center pt-3 gap-4">
-                className="w-full pl-8 pr-3 py-2 rounded-lg bg-white border border-slate-200 text-sm text-slate-900 placeholder-gray-500 focus:outline-none focus:ring-2 focus:ring-blue-500"
+              <button
-              />
+                onClick={() => setSidebarCollapsed(false)}
-            </div>
+                className="p-1.5 hover:bg-slate-100 rounded-md transition"
-            {repositoryError && (
+                title="Expand sidebar"
-              <p className="text-xs text-red-400">{repositoryError}</p>
+              >
-            )}
+                <ChevronRight className="w-4 h-4 text-slate-500" />
-            <div className="flex items-center justify-between text-xs text-slate-500">
+              </button>
-              <span>Registries</span>
+              {registryNodes.slice(0, 5).map((node) => (
-              <Badge variant="secondary">{registryNodes.length}</Badge>
+                <div
-            </div>
+                  key={node.registry.id || node.registry.name}
-          </div>
+                  className="w-7 h-7 rounded-md bg-blue-50 flex items-center justify-center"
-
+                  title={node.registry.name || "Registry"}
-          <div className="flex-1 overflow-y-auto custom-scrollbar">
+                >
-            {loadingRegistries ? (
+                  <Database className="w-3.5 h-3.5 text-blue-600" />
              <div className="p-4">
                <LoadingState message="Loading registries..." />
              </div>
            ) : filteredNodes.length === 0 ? (
              <div className="p-4">
                <EmptyState
                  icon={Database}
                  title="No registries"
                description="Add a Harbor registry to browse deployable charts."
                />
              </div>
            ) : (
              filteredNodes.map((node) => (
                <div key={node.registry.id || node.registry.name}>
                  <button
                    onClick={() => toggleRegistry(node.registry.id)}
                    className="w-full flex items-center justify-between px-4 py-3 hover:bg-slate-50 transition"
                  >
                    <div className="flex items-center gap-2">
                      {node.expanded ? (
                        <ChevronDown className="w-4 h-4 text-slate-500" />
                      ) : (
                        <ChevronRight className="w-4 h-4 text-slate-500" />
                      )}
                      <Database className="w-4 h-4 text-blue-600" />
                      <div className="text-left">
                        <p className="text-sm text-slate-900">{node.registry.name || "Unnamed"}</p>
                        <p className="text-[11px] text-slate-500 truncate">
                          {node.registry.url}
                        </p>
                      </div>
                    </div>
                    <Badge variant="secondary">{node.repositories.length}</Badge>
                  </button>
                  {node.expanded && (
                    <div className="bg-slate-50/60">
                      {node.repositories.length === 0 ? (
                        <p className="px-8 py-3 text-xs text-slate-500">
                          {loadingRepositories
                            ? "Loading repositories..."
                            : "No chart repositories found."}
                        </p>
                      ) : (
                        node.repositories.map((repo) => {
                          const isSelected =
                            selectedRepository?.registryId === repo.registryId &&
                            selectedRepository?.name === repo.name;
                          return (
                            <button
                              key={`${repo.registryId}-${repo.name}`}
                              onClick={() => handleRepositoryClick(repo)}
                              className={`w-full text-left px-8 py-2 flex items-center justify-between text-sm transition ${
                                isSelected
                                  ? "bg-blue-50 text-blue-700"
                                  : "hover:bg-white/80 text-slate-700"
                              }`}
                            >
                              <span className="truncate">{repo.name}</span>
                              {repo.artifactCount !== undefined && (
                                <span className="text-xs text-slate-500">
                                  {repo.artifactCount}
                                </span>
                              )}
                            </button>
                          );
                        })
                      )}
                    </div>
                  )}
                </div>
-              ))
+              ))}
-            )}
+              {registryNodes.length > 5 && (
-          </div>
+                <span className="text-[10px] text-slate-400">
                  +{registryNodes.length - 5}
                </span>
              )}
            </div>
          ) : (
            /* Expanded state: full sidebar */
            <>
              <div className="p-4 border-b border-slate-200 space-y-2">
                <div className="flex items-center gap-2">
                  <button
                    onClick={() => setSidebarCollapsed(true)}
                    className="p-1 hover:bg-slate-100 rounded-md transition flex-shrink-0"
                    title="Collapse sidebar"
                  >
                    <ChevronLeft className="w-4 h-4 text-slate-500" />
                  </button>
                  <div className="relative flex-1">
                    <Search className="absolute left-2 top-1/2 -translate-y-1/2 w-4 h-4 text-slate-500" />
                    <input
                      type="text"
                      placeholder="Search registries / repositories..."
                      value={searchTerm}
                      onChange={(e) => setSearchTerm(e.target.value)}
                      className="w-full pl-8 pr-3 py-2 rounded-lg bg-white border border-slate-200 text-sm text-slate-900 placeholder-gray-500 focus:outline-none focus:ring-2 focus:ring-blue-500"
                    />
                  </div>
                </div>
                {repositoryError && (
                  <p className="text-xs text-red-400">{repositoryError}</p>
                )}
                <div className="flex items-center justify-between text-xs text-slate-500">
                  <span>Registries</span>
                  <Badge variant="secondary">{registryNodes.length}</Badge>
                </div>
              </div>
              <div className="flex-1 overflow-y-auto custom-scrollbar">
                {loadingRegistries ? (
                  <div className="p-4">
                    <LoadingState message="Loading registries..." />
                  </div>
                ) : filteredNodes.length === 0 ? (
                  <div className="p-4">
                    <EmptyState
                      icon={Database}
                      title="No registries"
                      description="Add a Harbor registry to browse deployable charts."
                    />
                  </div>
                ) : (
                  filteredNodes.map((node) => (
                    <div key={node.registry.id || node.registry.name}>
                      <button
                        onClick={() => toggleRegistry(node.registry.id)}
                        className="w-full flex items-center justify-between px-4 py-3 hover:bg-slate-50 transition"
                      >
                        <div className="flex items-center gap-2 min-w-0">
                          {node.expanded ? (
                            <ChevronDown className="w-4 h-4 text-slate-500 flex-shrink-0" />
                          ) : (
                            <ChevronRight className="w-4 h-4 text-slate-500 flex-shrink-0" />
                          )}
                          <Database className="w-4 h-4 text-blue-600 flex-shrink-0" />
                          <div className="text-left min-w-0">
                            <p className="text-sm text-slate-900 truncate">
                              {node.registry.name || "Unnamed"}
                            </p>
                            <p className="text-[11px] text-slate-500 truncate">
                              {node.registry.url}
                            </p>
                          </div>
                        </div>
                        <Badge variant="secondary" className="flex-shrink-0">
                          {node.repositories.length}
                        </Badge>
                      </button>
                      {node.expanded && (
                        <div className="bg-slate-50/60">
                          {node.repositories.length === 0 ? (
                            <p className="px-8 py-3 text-xs text-slate-500">
                              {loadingRepositories
                                ? "Loading repositories..."
                                : "No chart repositories found."}
                            </p>
                          ) : (
                            node.repositories.map((repo) => {
                              const isSelected =
                                selectedRepository?.registryId === repo.registryId &&
                                selectedRepository?.name === repo.name;
                              return (
                                <button
                                  key={`${repo.registryId}-${repo.name}`}
                                  onClick={() => handleRepositoryClick(repo)}
                                  className={`w-full text-left px-8 py-2 flex items-center justify-between text-sm transition ${
                                    isSelected
                                      ? "bg-blue-50 text-blue-700"
                                      : "hover:bg-white/80 text-slate-700"
                                  }`}
                                >
                                  <span className="truncate">{repo.name}</span>
                                  {repo.artifactCount !== undefined && (
                                    <span className="text-xs text-slate-500 flex-shrink-0">
                                      {repo.artifactCount}
                                    </span>
                                  )}
                                </button>
                              );
                            })
                          )}
                        </div>
                      )}
                    </div>
                  ))
                )}
              </div>
            </>
          )}
        </aside>
        <main className="flex-1 flex flex-col bg-white overflow-hidden">
          {!selectedRepository ? (
            /* Placeholder when no repo is selected */
            <div className="flex-1 flex items-center justify-center">
              <EmptyState
-                icon={Package}
+                icon={LayoutGrid}
-                title="Select a repository"
+                title="Select a chart"
-                description="Choose a chart repository from the left panel."
+                description="Select a chart from the left panel to view versions"
              />
            </div>
          ) : (
            <>
              {/* Right panel header */}
              <div className="flex-shrink-0 border-b border-slate-200 p-5 bg-slate-50">
                <div className="flex items-center justify-between flex-wrap gap-4">
                  <div>
                    <p className="text-xs uppercase text-slate-500">Chart repository</p>
-                    <h2 className="text-2xl font-semibold text-slate-900">
+                    <h2 className="text-2xl font-semibold text-slate-900 truncate">
                      {selectedRepository.name}
                    </h2>
                    <p className="text-sm text-slate-500">
@ -422,7 +482,27 @@ const ArtifactBrowserPage: React.FC = () => {
                </div>
              </div>
-              <div className="flex-1 overflow-y-auto p-5">
+              {/* Tag search bar */}
              <div className="flex-shrink-0 px-5 pt-4 pb-2">
                <div className="relative max-w-xs">
                  <Search className="absolute left-2 top-1/2 -translate-y-1/2 w-4 h-4 text-slate-400" />
                  <input
                    type="text"
                    placeholder="Filter tags by version..."
                    value={tagSearchTerm}
                    onChange={(e) => setTagSearchTerm(e.target.value)}
                    className="w-full pl-8 pr-3 py-1.5 rounded-lg bg-white border border-slate-200 text-sm text-slate-900 placeholder-gray-400 focus:outline-none focus:ring-2 focus:ring-blue-500"
                  />
                </div>
                {tagSearchTerm && (
                  <p className="text-xs text-slate-500 mt-1.5">
                    Showing {filteredArtifacts.length} of {artifacts.length} tags
                  </p>
                )}
              </div>
              {/* Tag grid */}
              <div className="flex-1 overflow-y-auto p-5 pt-2">
                {artifactError && (
                  <p className="text-sm text-red-400 mb-3">{artifactError}</p>
                )}
@ -438,14 +518,21 @@ const ArtifactBrowserPage: React.FC = () => {
                        : "This repository doesn't contain any tagged artifacts yet."
                    }
                  />
                ) : filteredArtifacts.length === 0 ? (
                  <EmptyState
                    icon={Search}
                    title="No matching tags"
                    description={`No tags matching "${tagSearchTerm}" found.`}
                  />
                ) : (
-                  <div className="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-4">
+                  <div className="grid grid-cols-1 sm:grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
-                    {artifacts.map((artifact, index) => (
+                    {filteredArtifacts.map((artifact, index) => (
                      <TagCard
                        key={`${artifact.repositoryName || "repo"}-${artifact.tag || index}`}
                        registryId={selectedRepository.registryId}
                        registryUrl={selectedRegistryUrl}
                        tag={artifact}
                        isLatest={index === 0}
                      />
                    ))}
                  </div>
--- a/frontend/src/features/auth/pages/AuthPage.tsx
+++ b/frontend/src/features/auth/pages/AuthPage.tsx
@ -1,9 +1,9 @@
-import React, { useState } from "react";
+import React, { useState, useEffect } from "react";
 import { useNavigate } from "react-router-dom";
-import { LogIn, Loader2, ShieldCheck } from "lucide-react";
+import { LogIn, Loader2, ShieldCheck, UserPlus } from "lucide-react";
 import { useToast } from "@/shared";
 import { getErrorMessage } from "@/shared/utils/handleApiError";
-import { login as apiLogin, type AuthResponse } from "@/api";
+import { login as apiLogin, fetchAuthStatus, setupInitialAdmin, type AuthResponse } from "@/api";
 type Props = {
  onLogin: (response: AuthResponse) => void;
@ -13,12 +13,76 @@ const AuthPage: React.FC<Props> = ({ onLogin }) => {
  const navigate = useNavigate();
  const { success: toastSuccess, error: toastError, info: toastInfo } = useToast();
  // Auth status
  const [needsSetup, setNeedsSetup] = useState<boolean | null>(null);
  const [checkingStatus, setCheckingStatus] = useState(true);
  // Login form
  const [loginUsername, setLoginUsername] = useState("");
  const [loginPassword, setLoginPassword] = useState("");
  const [loginLoading, setLoginLoading] = useState(false);
  const [loginError, setLoginError] = useState<string | null>(null);
  // Setup form
  const [setupUsername, setSetupUsername] = useState("");
  const [setupPassword, setSetupPassword] = useState("");
  const [setupEmail, setSetupEmail] = useState("");
  const [setupLoading, setSetupLoading] = useState(false);
  const [setupError, setSetupError] = useState<string | null>(null);
  // Check if setup is needed on mount
  useEffect(() => {
    let cancelled = false;
    fetchAuthStatus()
      .then((status) => {
        if (!cancelled) {
          setNeedsSetup(status.needsSetup);
          setCheckingStatus(false);
        }
      })
      .catch(() => {
        if (!cancelled) {
          setNeedsSetup(false); // fall back to login on error
          setCheckingStatus(false);
        }
      });
    return () => { cancelled = true; };
  }, []);
  // Handle setup (first admin registration)
  const handleSetup = async (e: React.FormEvent) => {
    e.preventDefault();
    if (!setupUsername || !setupPassword) return;
    setSetupLoading(true);
    setSetupError(null);
    toastInfo("Creating admin account...", { title: "Setup", durationMs: 1200 });
    try {
      const result = await setupInitialAdmin({
        username: setupUsername,
        password: setupPassword,
        email: setupEmail || undefined,
      });
      // setupInitialAdmin returns tokens — use them directly to avoid redundant login
      onLogin({
        accessToken: result.accessToken,
        refreshToken: result.refreshToken,
        username: setupUsername,
      } as any);
      toastSuccess("Admin account created. Welcome!");
      navigate("/home", { replace: true });
    } catch (err: unknown) {
      const msg = getErrorMessage(err, "Setup failed. Please try again later.");
      setSetupError(msg);
      toastError(msg);
    } finally {
      setSetupLoading(false);
    }
  };
  // Handle login
  const handleLogin = async (e: React.FormEvent) => {
    e.preventDefault();
@ -30,8 +94,6 @@ const AuthPage: React.FC<Props> = ({ onLogin }) => {
    try {
      const response = await apiLogin({ username: loginUsername, password: loginPassword });
      // JWT 格式: { access_token, refresh_token, username, ... }
      toastSuccess(`Welcome, ${response.username}!`);
      onLogin(response);
      navigate("/home", { replace: true });
@ -40,7 +102,6 @@ const AuthPage: React.FC<Props> = ({ onLogin }) => {
      const msg = raw?.message?.includes("Failed to fetch")
        ? "Network or CORS error: Please check backend CORS or use Vite proxy in development."
        : getErrorMessage(err, "Login failed. Please try again later.");
      setLoginError(msg);
      toastError(msg);
    } finally {
@ -48,25 +109,36 @@ const AuthPage: React.FC<Props> = ({ onLogin }) => {
    }
  };
-  return (
+  if (checkingStatus) {
-    <div className="relative min-h-screen bg-slate-50 text-slate-900 flex items-center justify-center px-4 sm:px-6">
+    return (
-      <div className="pointer-events-none absolute inset-0 bg-app-gradient opacity-90" aria-hidden="true" />
+      <div className="relative min-h-screen bg-slate-50 flex items-center justify-center">
-      <div className="relative w-full max-w-md p-6 sm:p-7 bg-white/95 border border-slate-200 rounded-lg shadow-2xl backdrop-blur-xl">
+        <Loader2 className="w-8 h-8 text-blue-500 animate-spin" />
-        <div className="animate-fadeIn">
+      </div>
    );
  }
  // Setup view — first admin registration
  if (needsSetup) {
    return (
      <div className="relative min-h-screen bg-slate-50 text-slate-900 flex items-center justify-center px-4 sm:px-6">
        <div className="pointer-events-none absolute inset-0 bg-app-gradient opacity-90" aria-hidden="true" />
        <div className="relative w-full max-w-md p-6 sm:p-7 bg-white/95 border border-slate-200 rounded-lg shadow-2xl backdrop-blur-xl">
          <div className="animate-fadeIn">
            <header className="mb-6 text-center">
-              <ShieldCheck className="w-11 h-11 text-blue-600 mx-auto mb-3" />
+              <ShieldCheck className="w-11 h-11 text-emerald-600 mx-auto mb-3" />
-              <h1 className="text-2xl font-semibold text-slate-900">OCDP Console</h1>
+              <h1 className="text-2xl font-semibold text-slate-900">OCDP Initial Setup</h1>
-              <p className="text-slate-600 text-sm mt-1">Sign in with an account created by an administrator</p>
+              <p className="text-slate-600 text-sm mt-1">Create the first administrator account to get started.</p>
            </header>
-            <form onSubmit={handleLogin} className="space-y-4">
+            <form onSubmit={handleSetup} className="space-y-4">
              <div>
-                <label className="block text-sm text-slate-600">Username</label>
+                <label className="block text-sm text-slate-600">Admin Username</label>
                <input
-                  value={loginUsername}
+                  value={setupUsername}
-                  onChange={(e) => setLoginUsername(e.target.value)}
+                  onChange={(e) => setSetupUsername(e.target.value)}
-                  className="mt-1 w-full bg-white border border-slate-200 rounded-lg p-2 text-slate-900 focus:ring-2 focus:ring-blue-500 focus:border-blue-600 focus:outline-none transition-shadow"
+                  className="mt-1 w-full bg-white border border-slate-200 rounded-lg p-2 text-slate-900 focus:ring-2 focus:ring-emerald-500 focus:border-emerald-600 focus:outline-none transition-shadow"
                  autoComplete="username"
                  autoFocus
                  required
                />
              </div>
@ -75,26 +147,92 @@ const AuthPage: React.FC<Props> = ({ onLogin }) => {
                <label className="block text-sm text-slate-600">Password</label>
                <input
                  type="password"
-                  value={loginPassword}
+                  value={setupPassword}
-                  onChange={(e) => setLoginPassword(e.target.value)}
+                  onChange={(e) => setSetupPassword(e.target.value)}
-                  className="mt-1 w-full bg-white border border-slate-200 rounded-lg p-2 text-slate-900 focus:ring-2 focus:ring-blue-500 focus:border-blue-600 focus:outline-none transition-shadow"
+                  className="mt-1 w-full bg-white border border-slate-200 rounded-lg p-2 text-slate-900 focus:ring-2 focus:ring-emerald-500 focus:border-emerald-600 focus:outline-none transition-shadow"
-                  autoComplete="current-password"
+                  autoComplete="new-password"
                  required
                />
              </div>
              <div>
                <label className="block text-sm text-slate-600">Email (optional)</label>
                <input
                  type="email"
                  value={setupEmail}
                  onChange={(e) => setSetupEmail(e.target.value)}
                  className="mt-1 w-full bg-white border border-slate-200 rounded-lg p-2 text-slate-900 focus:ring-2 focus:ring-emerald-500 focus:border-emerald-600 focus:outline-none transition-shadow"
                  autoComplete="email"
                  placeholder="admin@example.com"
                />
              </div>
              <button
                type="submit"
-                disabled={loginLoading}
+                disabled={setupLoading}
                className={`w-full disabled:opacity-60 font-semibold py-2.5 rounded-lg flex items-center justify-center gap-2 transition-colors duration-150
-                  ${loginLoading ? "bg-blue-500 cursor-wait text-white" : "bg-blue-600 text-white hover:bg-blue-700"}`}
+                  ${setupLoading ? "bg-emerald-500 cursor-wait text-white" : "bg-emerald-600 text-white hover:bg-emerald-700"}`}
              >
-                {loginLoading ? <Loader2 className="w-4 h-4 animate-spin" /> : <LogIn className="w-4 h-4" />}
+                {setupLoading ? <Loader2 className="w-4 h-4 animate-spin" /> : <UserPlus className="w-4 h-4" />}
-                {loginLoading ? "Logging in..." : "Login"}
+                {setupLoading ? "Creating..." : "Create Admin Account"}
              </button>
-              {loginError && <p className="text-red-400 text-center text-sm">{loginError}</p>}
+              {setupError && <p className="text-red-400 text-center text-sm">{setupError}</p>}
            </form>
          </div>
        </div>
      </div>
    );
  }
  // Regular login view
  return (
    <div className="relative min-h-screen bg-slate-50 text-slate-900 flex items-center justify-center px-4 sm:px-6">
      <div className="pointer-events-none absolute inset-0 bg-app-gradient opacity-90" aria-hidden="true" />
      <div className="relative w-full max-w-md p-6 sm:p-7 bg-white/95 border border-slate-200 rounded-lg shadow-2xl backdrop-blur-xl">
        <div className="animate-fadeIn">
          <header className="mb-6 text-center">
            <ShieldCheck className="w-11 h-11 text-blue-600 mx-auto mb-3" />
            <h1 className="text-2xl font-semibold text-slate-900">OCDP Console</h1>
            <p className="text-slate-600 text-sm mt-1">Sign in with an account created by an administrator</p>
          </header>
          <form onSubmit={handleLogin} className="space-y-4">
            <div>
              <label className="block text-sm text-slate-600">Username</label>
              <input
                value={loginUsername}
                onChange={(e) => setLoginUsername(e.target.value)}
                className="mt-1 w-full bg-white border border-slate-200 rounded-lg p-2 text-slate-900 focus:ring-2 focus:ring-blue-500 focus:border-blue-600 focus:outline-none transition-shadow"
                autoComplete="username"
                required
              />
            </div>
            <div>
              <label className="block text-sm text-slate-600">Password</label>
              <input
                type="password"
                value={loginPassword}
                onChange={(e) => setLoginPassword(e.target.value)}
                className="mt-1 w-full bg-white border border-slate-200 rounded-lg p-2 text-slate-900 focus:ring-2 focus:ring-blue-500 focus:border-blue-600 focus:outline-none transition-shadow"
                autoComplete="current-password"
                required
              />
            </div>
            <button
              type="submit"
              disabled={loginLoading}
              className={`w-full disabled:opacity-60 font-semibold py-2.5 rounded-lg flex items-center justify-center gap-2 transition-colors duration-150
                ${loginLoading ? "bg-blue-500 cursor-wait text-white" : "bg-blue-600 text-white hover:bg-blue-700"}`}
            >
              {loginLoading ? <Loader2 className="w-4 h-4 animate-spin" /> : <LogIn className="w-4 h-4" />}
              {loginLoading ? "Logging in..." : "Login"}
            </button>
            {loginError && <p className="text-red-400 text-center text-sm">{loginError}</p>}
          </form>
        </div>
      </div>
    </div>
--- a/frontend/src/features/configuration/users/pages/UserManagementPage.tsx
+++ b/frontend/src/features/configuration/users/pages/UserManagementPage.tsx
@ -1,11 +1,13 @@
-import React, { useEffect, useMemo, useState } from "react";
+import React, { useCallback, useEffect, useMemo, useState } from "react";
-import { Gauge, KeyRound, Pencil, RefreshCw, Shield, Trash2, UserPlus, Users, X } from "lucide-react";
+import { Gauge, KeyRound, Pencil, RefreshCw, Shield, Trash2, UserPlus, Users, X, type LucideIcon } from "lucide-react";
 import { createUser, listClusters, listUsers, updateUser, deleteUser, type ClusterResponse, type UserResponse } from "@/api";
 import { useToast } from "@/shared";
 import { Button, Input, Badge, LoadingState } from "@/shared/components";
 import { formatApiError } from "@/shared/utils";
 import { useAuth } from "@/app/providers";
 type LimitsEditorMode = "edit" | "downgrade";
 const UserManagementPage: React.FC = () => {
  const { user } = useAuth();
  const { success, error: toastError } = useToast();
@ -18,8 +20,8 @@ const UserManagementPage: React.FC = () => {
  const [role, setRole] = useState("user");
  const [namespace, setNamespace] = useState("");
  const [defaultClusterId, setDefaultClusterId] = useState("");
-  const [quotaCpu, setQuotaCpu] = useState("4");
+  const [quotaCpu, setQuotaCpu] = useState("");
-  const [quotaMemory, setQuotaMemory] = useState("16Gi");
+  const [quotaMemory, setQuotaMemory] = useState("");
  const [quotaGpu, setQuotaGpu] = useState("0");
  const [quotaGpuMemory, setQuotaGpuMemory] = useState("0");
  const [mustChangePassword, setMustChangePassword] = useState(true);
@ -31,13 +33,14 @@ const UserManagementPage: React.FC = () => {
  const [editQuotaGpu, setEditQuotaGpu] = useState("");
  const [editQuotaGpuMemory, setEditQuotaGpuMemory] = useState("");
  const [savingLimits, setSavingLimits] = useState(false);
  const [limitsEditorMode, setLimitsEditorMode] = useState<LimitsEditorMode>("edit");
  const sortedUsers = useMemo(
    () => [...users].sort((a, b) => (a.username ?? "").localeCompare(b.username ?? "")),
    [users]
  );
-  const loadUsers = async () => {
+  const loadUsers = useCallback(async () => {
    setLoading(true);
    try {
      setUsers(await listUsers());
@ -46,9 +49,9 @@ const UserManagementPage: React.FC = () => {
    } finally {
      setLoading(false);
    }
-  };
+  }, [toastError]);
-  const loadClusters = async () => {
+  const loadClusters = useCallback(async () => {
    try {
      const data = await listClusters();
      const available = data.filter((cluster) => typeof cluster.id === "string" && cluster.id.length > 0);
@ -57,12 +60,12 @@ const UserManagementPage: React.FC = () => {
    } catch (err) {
      toastError(formatApiError(err) || "Failed to load clusters");
    }
-  };
+  }, [toastError]);
  useEffect(() => {
    void loadUsers();
    void loadClusters();
-  }, []);
+  }, [loadClusters, loadUsers]);
  const handleCreate = async (event: React.FormEvent) => {
    event.preventDefault();
@ -96,8 +99,8 @@ const UserManagementPage: React.FC = () => {
      setRole("user");
      setNamespace("");
      setDefaultClusterId(clusters[0]?.id || "");
-      setQuotaCpu("4");
+      setQuotaCpu("");
-      setQuotaMemory("16Gi");
+      setQuotaMemory("");
      setQuotaGpu("0");
      setQuotaGpuMemory("0");
      setMustChangePassword(true);
@ -135,6 +138,10 @@ const UserManagementPage: React.FC = () => {
  const toggleRole = async (target: UserResponse) => {
    if (!target.id) return;
    const nextRole = target.role === "admin" ? "user" : "admin";
    if (nextRole === "user") {
      openLimitsEditor(target, "downgrade");
      return;
    }
    try {
      await updateUser(target.id, { role: nextRole });
      success("User role updated");
@ -156,12 +163,13 @@ const UserManagementPage: React.FC = () => {
    }
  };
-  const openLimitsEditor = (target: UserResponse) => {
+  const openLimitsEditor = (target: UserResponse, mode: LimitsEditorMode = "edit") => {
    setLimitsEditorMode(mode);
    setEditingLimits(target);
    setEditNamespace(target.namespace || namespaceForUsername(target.username || ""));
    setEditDefaultClusterId(target.defaultClusterId || clusters[0]?.id || "");
-    setEditQuotaCpu(target.quotaCpu || "4");
+    setEditQuotaCpu(target.quotaCpu || "");
-    setEditQuotaMemory(target.quotaMemory || "16Gi");
+    setEditQuotaMemory(target.quotaMemory || "");
    setEditQuotaGpu(target.quotaGpu || "0");
    setEditQuotaGpuMemory(target.quotaGpuMemory || "0");
  };
@ -172,14 +180,15 @@ const UserManagementPage: React.FC = () => {
    setSavingLimits(true);
    try {
      await updateUser(editingLimits.id, {
        ...(limitsEditorMode === "downgrade" ? { role: "user" } : {}),
        namespace: editNamespace.trim(),
        defaultClusterId: editDefaultClusterId.trim(),
-        quotaCpu: editQuotaCpu.trim(),
+        quotaCpu: quotaForApi(editQuotaCpu, true),
-        quotaMemory: editQuotaMemory.trim(),
+        quotaMemory: quotaForApi(editQuotaMemory, true),
        quotaGpu: editQuotaGpu.trim(),
        quotaGpuMemory: editQuotaGpuMemory.trim(),
      });
-      success("User limits updated");
+      success(limitsEditorMode === "downgrade" ? "User role and limits updated" : "User limits updated");
      setEditingLimits(null);
      await loadUsers();
    } catch (err) {
@ -190,7 +199,7 @@ const UserManagementPage: React.FC = () => {
  };
  return (
-    <div className="mx-auto max-w-7xl px-4 py-6 sm:px-6 lg:px-8">
+    <div className="mx-auto max-w-screen-2xl px-4 py-6 sm:px-6 lg:px-8">
      <div className="mb-6 flex flex-col gap-3 sm:flex-row sm:items-end sm:justify-between">
        <div>
          <div className="mb-2 flex items-center gap-2 text-sm font-medium text-blue-700">
@ -207,7 +216,7 @@ const UserManagementPage: React.FC = () => {
        </Button>
      </div>
-      <div className="grid gap-6 lg:grid-cols-[360px_1fr]">
+      <div className="grid gap-6 xl:grid-cols-[380px_minmax(0,1fr)]">
        <form onSubmit={handleCreate} className="rounded-lg border border-slate-200 bg-white p-5 shadow-soft">
          <div className="mb-4 flex items-center gap-2">
            <UserPlus className="h-5 w-5 text-blue-600" />
@ -281,11 +290,11 @@ const UserManagementPage: React.FC = () => {
                <div className="grid grid-cols-2 gap-3">
                  <label className="block text-sm font-medium text-slate-700">
                    CPU
-                    <Input value={quotaCpu} onChange={(e) => setQuotaCpu(e.target.value)} className="mt-1" placeholder="4" />
+                    <Input value={quotaCpu} onChange={(e) => setQuotaCpu(e.target.value)} className="mt-1" placeholder="Unlimited" />
                  </label>
                  <label className="block text-sm font-medium text-slate-700">
                    Memory
-                    <Input value={quotaMemory} onChange={(e) => setQuotaMemory(e.target.value)} className="mt-1" placeholder="16Gi" />
+                    <Input value={quotaMemory} onChange={(e) => setQuotaMemory(e.target.value)} className="mt-1" placeholder="Unlimited" />
                  </label>
                  <label className="block text-sm font-medium text-slate-700">
                    GPU
@ -297,7 +306,7 @@ const UserManagementPage: React.FC = () => {
                  </label>
                </div>
                <p className="mt-2 text-xs text-slate-500">
-                  CPU and memory use Kubernetes quantities. GPU memory is an integer MB value, for example 10000.
+                  Leave CPU or memory blank for no platform quota. GPU and GPU memory default to 0. GPU memory is integer MB, for example 10000.
                </p>
              </div>
            )}
@ -327,90 +336,87 @@ const UserManagementPage: React.FC = () => {
          {loading ? (
            <LoadingState message="Loading users..." />
          ) : (
-            <div className="overflow-x-auto">
+            <div className="divide-y divide-slate-100">
-              <table className="min-w-full divide-y divide-slate-200 text-sm">
+              {sortedUsers.map((target) => (
-                <thead className="bg-slate-50 text-left text-xs font-semibold uppercase tracking-wide text-slate-500">
+                <article key={target.id} className="min-w-0 overflow-hidden px-4 py-4 hover:bg-slate-50 sm:px-5">
-                  <tr>
+                  <div className="flex min-w-0 flex-col gap-4 xl:flex-row xl:items-start xl:justify-between">
-                    <th className="px-5 py-3">User</th>
+                    <div className="min-w-0">
-                    <th className="px-5 py-3">Role</th>
+                      <div className="flex flex-wrap items-center gap-2">
-                    <th className="px-5 py-3">Status</th>
+                        <h3 className="min-w-0 truncate text-sm font-semibold text-slate-950">{target.username}</h3>
                    <th className="px-5 py-3">Namespace</th>
                    <th className="px-5 py-3">Quota</th>
                    <th className="sticky right-0 z-10 bg-slate-50 px-5 py-3 text-right shadow-[-12px_0_18px_-18px_rgba(15,23,42,0.35)]">
                      Actions
                    </th>
                  </tr>
                </thead>
                <tbody className="divide-y divide-slate-100">
                  {sortedUsers.map((target) => (
                    <tr key={target.id} className="group hover:bg-slate-50">
                      <td className="px-5 py-3">
                        <div className="font-medium text-slate-900">{target.username}</div>
                        <div className="text-xs text-slate-500">{target.email}</div>
                      </td>
                      <td className="px-5 py-3">
                        <Badge variant={target.role === "admin" ? "info" : "secondary"} size="sm">
                          {target.role}
                        </Badge>
                      </td>
                      <td className="px-5 py-3">
                        <Badge variant={target.isActive ? "success" : "warning"} size="sm">
                          {target.isActive ? "Active" : "Disabled"}
                        </Badge>
-                      </td>
+                      </div>
-                      <td className="px-5 py-3">
+                      <div className="mt-1 truncate text-xs text-slate-500">{target.email || target.id}</div>
-                        <div className="font-mono text-xs text-slate-700">{target.namespace || "-"}</div>
+                    </div>
-                        <div className="text-xs text-slate-500">{target.workspaceName || target.workspaceId}</div>
+
-                        {target.defaultClusterId && (
+                    <div className="grid w-full min-w-0 grid-cols-2 gap-2 sm:grid-cols-4 xl:w-auto xl:min-w-[360px] xl:max-w-[480px]">
-                          <div className="mt-1 text-xs text-blue-700">{clusterName(clusters, target.defaultClusterId)}</div>
+                      <ActionButton onClick={() => toggleRole(target)} title={`Make ${target.role === "admin" ? "User" : "Admin"}`}>
-                        )}
+                        {target.role === "admin" ? "To User" : "To Admin"}
-                      </td>
+                      </ActionButton>
-                      <td className="px-5 py-3 text-xs text-slate-600">
+                      {target.role !== "admin" && (
-                        {target.role === "admin" ? (
+                        <ActionButton variant="secondary" icon={Pencil} onClick={() => openLimitsEditor(target)}>
-                          <span className="text-slate-400">default workspace</span>
+                          Limits
-                        ) : (
+                        </ActionButton>
-                          <div className="grid gap-1">
+                      )}
-                            <span>CPU {target.quotaCpu || "-"}</span>
+                      <ActionButton
-                            <span>Mem {target.quotaMemory || "-"}</span>
+                        variant="secondary"
-                            <span>GPU {target.quotaGpu || "0"} / Mem {target.quotaGpuMemory || "0"}</span>
+                        onClick={() => toggleActive(target)}
-                          </div>
+                        disabled={target.id === user?.userId}
-                        )}
+                      >
-                      </td>
+                        {target.isActive ? "Disable" : "Enable"}
-                      <td className="sticky right-0 bg-white px-5 py-3 shadow-[-12px_0_18px_-18px_rgba(15,23,42,0.35)] group-hover:bg-slate-50">
+                      </ActionButton>
-                        <div className="grid w-[260px] grid-cols-2 gap-2">
+                      <ActionButton
-                          <Button type="button" variant="ghost" size="sm" onClick={() => toggleRole(target)}>
+                        variant="danger"
-                            Make {target.role === "admin" ? "User" : "Admin"}
+                        icon={Trash2}
-                          </Button>
+                        onClick={() => handleDelete(target)}
-                          {target.role !== "admin" && (
+                        disabled={target.id === user?.userId}
-                            <Button type="button" variant="secondary" size="sm" icon={Pencil} onClick={() => openLimitsEditor(target)}>
+                      >
-                              Limits
+                        Delete
-                            </Button>
+                      </ActionButton>
-                          )}
+                    </div>
-                          <Button
+                  </div>
-                            type="button"
+
-                            variant="secondary"
+                  <div className="mt-4 grid min-w-0 gap-4 lg:grid-cols-[minmax(240px,0.85fr)_minmax(0,1.15fr)]">
-                            size="sm"
+                    <div className="min-w-0 rounded-md border border-slate-200 bg-slate-50 px-3 py-2">
-                            onClick={() => toggleActive(target)}
+                      <div className="grid gap-1 text-xs">
-                            disabled={target.id === user?.userId}
+                        <div className="flex min-w-0 items-center justify-between gap-3">
-                          >
+                          <span className="shrink-0 text-slate-500">Namespace</span>
-                            {target.isActive ? "Disable" : "Enable"}
+                          <span className="min-w-0 truncate font-mono text-slate-800">{target.namespace || "-"}</span>
                          </Button>
                          <Button
                            type="button"
                            variant="danger"
                            size="sm"
                            icon={Trash2}
                            onClick={() => handleDelete(target)}
                            disabled={target.id === user?.userId}
                          >
                            Delete
                          </Button>
                        </div>
-                      </td>
+                        <div className="flex min-w-0 items-center justify-between gap-3">
-                    </tr>
+                          <span className="shrink-0 text-slate-500">Workspace</span>
-                  ))}
+                          <span className="min-w-0 truncate text-slate-700">{target.workspaceName || target.workspaceId || "-"}</span>
-                </tbody>
+                        </div>
-              </table>
+                        <div className="flex min-w-0 items-center justify-between gap-3">
                          <span className="shrink-0 text-slate-500">Cluster</span>
                          <span className="min-w-0 truncate text-blue-700">
                            {target.defaultClusterId ? clusterName(clusters, target.defaultClusterId) : "-"}
                          </span>
                        </div>
                      </div>
                    </div>
                    <div className="min-w-0">
                      {target.role === "admin" ? (
                        <div className="inline-flex rounded-full border border-slate-200 bg-slate-50 px-3 py-1 text-xs text-slate-500">
                          default workspace
                        </div>
                      ) : (
                        <div className="grid min-w-0 grid-cols-2 gap-2 sm:grid-cols-4">
                          {quotaChip("CPU", target.quotaCpu || "Unlimited")}
                          {quotaChip("Memory", target.quotaMemory || "Unlimited")}
                          {quotaChip("GPU", target.quotaGpu || "0")}
                          {quotaChip("GPU Mem", target.quotaGpuMemory || "0")}
                        </div>
                      )}
                    </div>
                  </div>
                </article>
              ))}
            </div>
          )}
        </section>
@ -422,10 +428,14 @@ const UserManagementPage: React.FC = () => {
              <div>
                <div className="flex items-center gap-2 text-sm font-semibold text-blue-700">
                  <Gauge className="h-4 w-4" />
-                  Tenant limits
+                  {limitsEditorMode === "downgrade" ? "Convert admin to user" : "Tenant limits"}
                </div>
                <h2 className="mt-1 text-xl font-semibold text-slate-950">{editingLimits.username}</h2>
-                <p className="mt-1 text-sm text-slate-500">Changes are applied to workspace metadata and the next tenant binding/deploy refreshes Kubernetes ResourceQuota.</p>
+                <p className="mt-1 text-sm text-slate-500">
                  {limitsEditorMode === "downgrade"
                    ? "Set the tenant namespace, default cluster, and resource quota before applying the user role."
                    : "Changes are applied to workspace metadata and the next tenant binding/deploy refreshes Kubernetes ResourceQuota."}
                </p>
              </div>
              <button type="button" onClick={() => setEditingLimits(null)} className="rounded-lg p-2 text-slate-500 hover:bg-slate-100 hover:text-slate-900">
                <X className="h-5 w-5" />
@ -454,11 +464,11 @@ const UserManagementPage: React.FC = () => {
              <div className="grid gap-3 sm:grid-cols-2">
                <label className="block text-sm font-medium text-slate-700">
                  CPU
-                  <Input value={editQuotaCpu} onChange={(e) => setEditQuotaCpu(e.target.value)} className="mt-1" required />
+                  <Input value={editQuotaCpu} onChange={(e) => setEditQuotaCpu(e.target.value)} className="mt-1" placeholder="Unlimited" />
                </label>
                <label className="block text-sm font-medium text-slate-700">
                  Memory
-                  <Input value={editQuotaMemory} onChange={(e) => setEditQuotaMemory(e.target.value)} className="mt-1" required />
+                  <Input value={editQuotaMemory} onChange={(e) => setEditQuotaMemory(e.target.value)} className="mt-1" placeholder="Unlimited" />
                </label>
                <label className="block text-sm font-medium text-slate-700">
                  GPU
@ -475,7 +485,7 @@ const UserManagementPage: React.FC = () => {
                Cancel
              </Button>
              <Button type="submit" variant="primary" loading={savingLimits}>
-                Save Limits
+                {limitsEditorMode === "downgrade" ? "Save Role and Limits" : "Save Limits"}
              </Button>
            </div>
          </form>
@ -499,4 +509,41 @@ const clusterName = (clusters: ClusterResponse[], clusterId: string): string =>
  return cluster?.name || clusterId;
 };
 const quotaChip = (label: string, value: React.ReactNode): React.ReactElement => (
  <div className="min-w-0 rounded-md border border-slate-200 bg-white px-3 py-2">
    <div className="text-[11px] font-medium uppercase tracking-wide text-slate-500">{label}</div>
    <div className="mt-0.5 truncate font-mono text-sm text-slate-900">{value}</div>
  </div>
 );
 const ActionButton = ({
  children,
  icon,
  variant = "ghost",
  ...props
 }: React.ButtonHTMLAttributes<HTMLButtonElement> & {
  children: React.ReactNode;
  icon?: LucideIcon;
  variant?: "secondary" | "danger" | "ghost";
 }): React.ReactElement => (
  <Button
    type="button"
    variant={variant}
    size="sm"
    icon={icon}
    className="min-w-0 max-w-full px-2 text-xs leading-tight [&>svg]:shrink-0"
    {...props}
  >
    <span className="min-w-0 truncate">{children}</span>
  </Button>
 );
 const quotaForApi = (value: string, blankMeansUnlimited = false): string => {
  const trimmed = value.trim();
  if (!trimmed && blankMeansUnlimited) {
    return "unlimited";
  }
  return trimmed;
 };
 export default UserManagementPage;
--- a/frontend/src/features/monitoring/clusters/components/ClusterMonitorCard.tsx
+++ b/frontend/src/features/monitoring/clusters/components/ClusterMonitorCard.tsx
@ -3,7 +3,7 @@
 * 显示单个集群的监控信息
 */
 import React, { useState } from "react";
-import { Activity, CheckCircle, AlertTriangle, XCircle, HelpCircle, Clock, Cpu, Database, Server as ServerIcon, ChevronDown, ChevronUp, TrendingUp } from "lucide-react";
+import { Activity, CheckCircle, AlertTriangle, XCircle, HelpCircle, Clock, Cpu, Database, Server as ServerIcon, ChevronDown, ChevronUp, TrendingUp, Users } from "lucide-react";
 import { Card, Badge } from "@/shared/components";
 import type { ClusterMetrics } from "@/core/types";
 import { NodeMetricCard } from "./NodeMetricCard";
@ -20,6 +20,9 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
  const podCount = cluster.podCount ?? 0;
  const totalGpu = cluster.totalGpu ?? 0;
  const usedGpu = cluster.usedGpu ?? 0;
  const allocatedGpu = firstNumber(cluster.gpuAllocated, cluster.allocatedGpu, cluster.gpuAllocation, usedGpu);
  const usedGpuMemory = firstDisplayValue(cluster.allocatedGpuMemoryMb, cluster.allocatedGpuMemoryMB, cluster.gpuMemoryRequestsMb, cluster.usedGpuMemory, cluster.gpuMemoryUsed, cluster.usedGpuMem);
  const totalGpuMemory = firstDisplayValue(cluster.totalGpuMemory, cluster.totalGpuMem);
  const cpuUsage = cluster.cpuUsage ?? 0;
  const memoryUsage = cluster.memoryUsage ?? 0;
  const gpuUsage = cluster.gpuUsage ?? 0;
@ -27,7 +30,11 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
  const totalCpu = cluster.totalCpu ?? "N/A";
  const usedMemory = cluster.usedMemory ?? "N/A";
  const totalMemory = cluster.totalMemory ?? "N/A";
  const cpuRequestText = firstDisplayValue(cluster.cpuRequests, usedCpu);
  const memoryRequestText = firstDisplayValue(cluster.memoryRequests, usedMemory);
  const hasClusterTotals = Boolean(cluster.totalCpu || cluster.totalMemory || cluster.nodeCount);
  const lastCheckedText = cluster.lastCheck ? new Date(cluster.lastCheck).toLocaleString() : "N/A";
  const userResourceRows = getUserResourceRows(cluster);
  const getStatusBadge = () => {
    switch (status) {
@ -76,13 +83,13 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
            </div>
            {/* Metrics Grid */}
-            <div className="grid grid-cols-2 sm:grid-cols-4 gap-4 mb-3">
+            <div className="grid grid-cols-2 gap-4 mb-3 md:grid-cols-3 xl:grid-cols-5">
              <div>
                <p className="text-xs text-slate-500">Uptime</p>
                <p className="text-sm text-slate-700 font-mono mt-1">{uptime}</p>
              </div>
              <div>
-                <p className="text-xs text-slate-500">Nodes</p>
+                <p className="text-xs text-slate-500">{hasClusterTotals ? "Nodes" : "Visible Nodes"}</p>
                <div className="flex items-center gap-1 mt-1">
                  <ServerIcon className="w-3 h-3 text-blue-400" />
                  <p className="text-sm text-slate-700 font-mono">{nodeCount}</p>
@ -95,7 +102,13 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
              <div>
                <p className="text-xs text-slate-500">GPU</p>
                <p className="text-sm text-slate-700 font-mono mt-1">
-                  {usedGpu}/{totalGpu || "N/A"}
+                  {hasClusterTotals ? `${usedGpu}/${totalGpu || "N/A"}` : `${allocatedGpu} allocated`}
                </p>
              </div>
              <div>
                <p className="text-xs text-slate-500">GPU Mem</p>
                <p className="text-sm text-slate-700 font-mono mt-1">
                  {usedGpuMemory || "N/A"}{totalGpuMemory ? ` / ${totalGpuMemory}` : ""}
                </p>
              </div>
            </div>
@ -105,16 +118,18 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
              <div>
                <div className="flex items-center gap-2 mb-1">
                  <Cpu className="w-3 h-3 text-blue-400" />
-                  <p className="text-xs text-slate-500">CPU (Cluster Total)</p>
+                  <p className="text-xs text-slate-500">{hasClusterTotals ? "CPU (Cluster Total)" : "CPU Requests"}</p>
                </div>
-                <p className="text-sm text-slate-700 font-mono">{usedCpu} / {totalCpu}</p>
+                <p className="text-sm text-slate-700 font-mono">
                  {hasClusterTotals ? `${usedCpu} / ${totalCpu}` : cpuRequestText || "0 cores"}
                </p>
                <div className="mt-1 h-1.5 bg-slate-100 rounded-full overflow-hidden">
                  <div 
                    className="h-full bg-blue-500 rounded-full transition-all"
                    style={{ width: `${Math.min(cpuUsage, 100)}%` }}
                  />
                </div>
-                <p className="text-xs text-slate-500 mt-1">{cpuUsage.toFixed(1)}%</p>
+                <p className="text-xs text-slate-500 mt-1">{hasClusterTotals ? `${cpuUsage.toFixed(1)}%` : "self-scoped allocation"}</p>
                {cluster.maxNodeCpu && (
                  <div className="mt-1.5 pt-1.5 border-t border-slate-200">
                    <div className="flex items-center gap-1">
@ -132,16 +147,18 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
              <div>
                <div className="flex items-center gap-2 mb-1">
                  <Database className="w-3 h-3 text-green-400" />
-                  <p className="text-xs text-slate-500">Memory (Cluster Total)</p>
+                  <p className="text-xs text-slate-500">{hasClusterTotals ? "Memory (Cluster Total)" : "Memory Requests"}</p>
                </div>
-                <p className="text-sm text-slate-700 font-mono">{usedMemory} / {totalMemory}</p>
+                <p className="text-sm text-slate-700 font-mono">
                  {hasClusterTotals ? `${usedMemory} / ${totalMemory}` : memoryRequestText || "0 B"}
                </p>
                <div className="mt-1 h-1.5 bg-slate-100 rounded-full overflow-hidden">
                  <div 
                    className="h-full bg-green-500 rounded-full transition-all"
                    style={{ width: `${Math.min(memoryUsage, 100)}%` }}
                  />
                </div>
-                <p className="text-xs text-slate-500 mt-1">{memoryUsage.toFixed(1)}%</p>
+                <p className="text-xs text-slate-500 mt-1">{hasClusterTotals ? `${memoryUsage.toFixed(1)}%` : "self-scoped allocation"}</p>
                {cluster.maxNodeMemory && (
                  <div className="mt-1.5 pt-1.5 border-t border-slate-200">
                    <div className="flex items-center gap-1">
@ -156,20 +173,20 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
                )}
              </div>
-              {totalGpu > 0 && (
+              {(totalGpu > 0 || allocatedGpu > 0) && (
                <div>
                  <div className="flex items-center gap-2 mb-1">
                    <Activity className="w-3 h-3 text-purple-400" />
-                    <p className="text-xs text-slate-500">GPU (Cluster Total)</p>
+                    <p className="text-xs text-slate-500">GPU Allocation</p>
                  </div>
-                  <p className="text-sm text-slate-700 font-mono">{usedGpu} / {totalGpu}</p>
+                  <p className="text-sm text-slate-700 font-mono">{allocatedGpu} / {totalGpu || "N/A"}</p>
                  <div className="mt-1 h-1.5 bg-slate-100 rounded-full overflow-hidden">
                    <div 
                      className="h-full bg-purple-500 rounded-full transition-all"
                      style={{ width: `${Math.min(gpuUsage, 100)}%` }}
                    />
                  </div>
-                  <p className="text-xs text-slate-500 mt-1">{gpuUsage.toFixed(1)}%</p>
+                  <p className="text-xs text-slate-500 mt-1">{hasClusterTotals ? `${gpuUsage.toFixed(1)}%` : "self-scoped allocation"}</p>
                  {cluster.maxNodeGpu && cluster.maxNodeGpu > 0 && (
                    <div className="mt-1.5 pt-1.5 border-t border-slate-200">
                      <div className="flex items-center gap-1">
@ -184,8 +201,62 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
                  )}
                </div>
              )}
              {(usedGpuMemory || totalGpuMemory) && (
                <div>
                  <div className="flex items-center gap-2 mb-1">
                    <Database className="w-3 h-3 text-fuchsia-500" />
                    <p className="text-xs text-slate-500">GPU Mem</p>
                  </div>
                  <p className="text-sm text-slate-700 font-mono">
                    {usedGpuMemory || "0"}{totalGpuMemory ? ` / ${totalGpuMemory}` : ""}
                  </p>
                  <p className="text-xs text-slate-500 mt-1">requests.nvidia.com/gpumem</p>
                </div>
              )}
            </div>
            {userResourceRows.length > 0 && (
              <div className="mt-3 overflow-hidden rounded-lg border border-slate-200">
                <div className="flex items-center gap-2 border-b border-slate-200 bg-slate-50 px-3 py-2">
                  <Users className="h-4 w-4 text-slate-500" />
                  <h4 className="text-sm font-semibold text-slate-900">User Resources</h4>
                </div>
                <div className="overflow-x-auto">
                  <table className="min-w-[720px] w-full text-left text-xs">
                    <thead className="bg-white text-slate-500">
                      <tr>
                        <th className="px-3 py-2 font-medium">User</th>
                        <th className="px-3 py-2 font-medium">Namespace</th>
                        <th className="px-3 py-2 font-medium">CPU</th>
                        <th className="px-3 py-2 font-medium">Memory</th>
                        <th className="px-3 py-2 font-medium">GPU</th>
                        <th className="px-3 py-2 font-medium">GPU Mem</th>
                        <th className="px-3 py-2 font-medium">Pods</th>
                      </tr>
                    </thead>
                    <tbody className="divide-y divide-slate-100">
                      {userResourceRows.map((row, index) => (
                        <tr key={`${row.userId || row.username || row.userName || "user"}-${index}`} className="bg-white">
                          <td className="max-w-[180px] truncate px-3 py-2 font-medium text-slate-800">
                            {row.username || row.userName || shortId(row.userId) || "-"}
                          </td>
                          <td className="max-w-[180px] truncate px-3 py-2 font-mono text-slate-600">{row.namespace || "-"}</td>
                          <td className="px-3 py-2 font-mono text-slate-700">{firstDisplayValue(row.cpuRequests, row.usedCpu, row.cpuUsed, row.cpuRequest, row.cpuLimits, row.cpuLimit) || "-"}</td>
                          <td className="px-3 py-2 font-mono text-slate-700">{firstDisplayValue(row.memoryRequests, row.usedMemory, row.memoryUsed, row.memoryRequest, row.memoryLimits, row.memoryLimit) || "-"}</td>
                          <td className="px-3 py-2 font-mono text-slate-700">{firstNumber(row.gpuRequests, row.gpuAllocated, row.gpuAllocation, row.usedGpu, row.gpuUsed) ?? 0}</td>
                          <td className="px-3 py-2 font-mono text-slate-700">
                            {firstDisplayValue(row.gpuMemoryRequestsMb, row.gpuMemoryAllocated, row.gpuMemAllocated, row.usedGpuMemory, row.gpuMemoryUsed, row.gpuMemUsed) || "0"}
                          </td>
                          <td className="px-3 py-2 font-mono text-slate-700">{row.podCount ?? "-"}</td>
                        </tr>
                      ))}
                    </tbody>
                  </table>
                </div>
              </div>
            )}
            <div className="mt-3 flex items-center gap-2 text-xs text-slate-500">
              <Clock className="w-3 h-3" />
              <span>Last checked: {lastCheckedText}</span>
@ -233,3 +304,34 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
    </Card>
  );
 };
 const firstNumber = (...values: Array<number | undefined | null>): number => {
  for (const value of values) {
    if (typeof value === "number" && Number.isFinite(value)) {
      return value;
    }
  }
  return 0;
 };
 const firstDisplayValue = (...values: Array<string | number | undefined | null>): string => {
  for (const value of values) {
    if (typeof value === "number" && Number.isFinite(value)) {
      return String(value);
    }
    if (typeof value === "string" && value.trim()) {
      return value.trim();
    }
  }
  return "";
 };
 const getUserResourceRows = (cluster: ClusterMetrics) =>
  cluster.resourceUsageByUser || cluster.userResources || cluster.userResourceUsage || cluster.resourcesByUser || cluster.userResourceRows || [];
 const shortId = (value?: string): string => {
  const id = value?.trim();
  if (!id) return "";
  if (id.length <= 12) return id;
  return `${id.slice(0, 8)}...${id.slice(-4)}`;
 };
--- a/frontend/src/features/monitoring/clusters/pages/MonitoringClustersPage.tsx
+++ b/frontend/src/features/monitoring/clusters/pages/MonitoringClustersPage.tsx
@ -3,7 +3,7 @@
 * 监控集群状态和健康信息
 */
 import React, { useState, useEffect } from "react";
-import { Activity, Server, RefreshCw } from "lucide-react";
+import { Activity, Database, Server, RefreshCw } from "lucide-react";
 import { PageHeader, StatsCard, Button, LoadingState, ErrorState, EmptyState } from "@/shared";
 import { useToast } from "@/shared";
 import { ClusterErrors, SuccessMessages, formatApiError } from "@/shared/utils";
@ -107,6 +107,12 @@ const MonitoringClustersPage: React.FC = () => {
  const healthyCount = clusters.filter(c => c.status === "healthy").length;
  const warningCount = clusters.filter(c => c.status === "warning" || c.status === "unknown").length;
  const errorCount = clusters.filter(c => c.status === "error" || c.status === "unhealthy").length;
  const allocatedGpu = clusters.reduce(
    (sum, cluster) => sum + firstNumber(cluster.gpuAllocated, cluster.allocatedGpu, cluster.gpuAllocation, cluster.usedGpu),
    0
  );
  const totalGpu = clusters.reduce((sum, cluster) => sum + (cluster.totalGpu ?? 0), 0);
  const gpuMemoryText = summarizeGpuMemory(clusters);
  return (
    <div className="space-y-6">
@ -127,7 +133,7 @@ const MonitoringClustersPage: React.FC = () => {
      </PageHeader>
      {/* Summary Stats */}
-      <div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4">
+      <div className="grid grid-cols-1 sm:grid-cols-2 xl:grid-cols-6 gap-4">
        <StatsCard
          title="Total Clusters"
          value={clusters.length}
@ -152,6 +158,18 @@ const MonitoringClustersPage: React.FC = () => {
          icon={Activity}
          variant="red"
        />
        <StatsCard
          title="GPU Allocation"
          value={`${allocatedGpu}/${totalGpu || "N/A"}`}
          icon={Activity}
          variant="purple"
        />
        <StatsCard
          title="GPU Mem"
          value={gpuMemoryText}
          icon={Database}
          variant="orange"
        />
      </div>
      {/* Auto-refresh Info */}
@ -173,3 +191,40 @@ const MonitoringClustersPage: React.FC = () => {
 };
 export default MonitoringClustersPage;
 const firstNumber = (...values: Array<number | undefined | null>): number => {
  for (const value of values) {
    if (typeof value === "number" && Number.isFinite(value)) {
      return value;
    }
  }
  return 0;
 };
 const summarizeGpuMemory = (clusters: ClusterMetrics[]): string => {
  const usedValues = clusters
    .map((cluster) => firstText(cluster.allocatedGpuMemoryMb, cluster.allocatedGpuMemoryMB, cluster.gpuMemoryRequestsMb, cluster.usedGpuMemory, cluster.gpuMemoryUsed, cluster.usedGpuMem))
    .filter(Boolean);
  const totalValues = clusters
    .map((cluster) => firstText(cluster.totalGpuMemory, cluster.totalGpuMem))
    .filter(Boolean);
  if (usedValues.length === 0 && totalValues.length === 0) {
    return "N/A";
  }
  if (usedValues.length === 1 && totalValues.length <= 1) {
    return totalValues[0] ? `${usedValues[0] || "0"} / ${totalValues[0]}` : usedValues[0];
  }
  return `${usedValues.length || 0} clusters`;
 };
 const firstText = (...values: Array<string | number | undefined | null>): string => {
  for (const value of values) {
    if (typeof value === "number" && Number.isFinite(value)) {
      return String(value);
    }
    if (typeof value === "string" && value.trim()) {
      return value.trim();
    }
  }
  return "";
 };
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@ -28,3 +28,19 @@
  to { opacity: 1; transform: translateY(0); }
 }
 .animate-fadeIn { animation: fadeIn 0.25s ease-out; }
 /* Hover animation utilities */
 .hover-lift {
  transition: transform 0.2s ease, box-shadow 0.2s ease;
 }
 .hover-lift:hover {
  transform: translateY(-2px);
  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
 }
 .hover-glow {
  transition: box-shadow 0.2s ease;
 }
 .hover-glow:hover {
  box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.3);
 }
--- a/frontend/src/shared/components/layout/SidebarLayout/SidebarLayout.tsx
+++ b/frontend/src/shared/components/layout/SidebarLayout/SidebarLayout.tsx
@ -21,7 +21,7 @@ export default function SidebarLayout({ items, children }: SidebarLayoutProps) {
        isOpen={isSidebarOpen}
        onClose={() => setIsSidebarOpen(false)}
      />
-      <div className="relative z-10 flex flex-col flex-1">
+      <div className="relative z-10 flex min-w-0 flex-1 flex-col">
        {React.Children.map(children, (child) => {
          // 将 toggleSidebar 函数传递给子组件
          if (React.isValidElement(child)) {
--- a/frontend/src/shared/components/layout/SidebarLayout/SidebarNav.tsx
+++ b/frontend/src/shared/components/layout/SidebarLayout/SidebarNav.tsx
@ -53,10 +53,10 @@ export default function SidebarNav({ items = [] as NavItem[], isOpen = true, onC
      <div key={item.key}>
        <button
          onClick={() => handleItemClick(item, hasChildren)}
-          className={`w-full text-left flex items-center gap-2 px-3 py-2 rounded-xl text-sm font-medium transition-colors duration-200 ${
+          className={`w-full text-left flex items-center gap-2 px-3 py-2 rounded-xl text-sm font-medium transition-colors duration-150 hover:bg-blue-50 dark:hover:bg-blue-900/20 ${
            item.active
              ? "bg-blue-50 text-blue-700 border border-blue-200 shadow-sm"
-              : "text-slate-600 hover:text-slate-950 hover:bg-slate-100"
+              : "text-slate-600 hover:text-slate-950"
          }`}
          style={{ paddingLeft: `${12 + level * 16}px` }}
        >
--- a/frontend/src/shared/components/layout/TopNavLayout/TopNavLayout.tsx
+++ b/frontend/src/shared/components/layout/TopNavLayout/TopNavLayout.tsx
@ -30,7 +30,7 @@ export default function TopNavLayout({
        onSignOut={onSignOut}
        onToggleSidebar={onToggleSidebar}
      />
-      <main className="flex-1 w-full max-w-screen-2xl mx-auto px-4 sm:px-6 lg:px-10 py-6 space-y-6">
+      <main className="min-w-0 flex-1 w-full max-w-screen-2xl mx-auto px-4 sm:px-6 lg:px-10 py-6 space-y-6">
        {children}
      </main>
    </div>
--- a/frontend/src/shared/utils/error-messages.ts
+++ b/frontend/src/shared/utils/error-messages.ts
@ -143,6 +143,14 @@ export function formatApiError(error: any): string | null {
  // Layer 2: HTTP Status Codes (标准HTTP错误)
  const status = error?.response?.status;
  const apiMessage = error?.response?.data?.message;
  const apiError = error?.response?.data?.error;
  if ((status === 400 || status === 403 || status === 422) && apiMessage) {
    return apiMessage;
  }
  if ((status === 400 || status === 403 || status === 422) && apiError) {
    return apiError;
  }
  if (status) {
    switch (status) {
      case 401:
@ -162,12 +170,12 @@ export function formatApiError(error: any): string | null {
  // Layer 3: API Response Messages (后端返回的具体错误)
  // 提取后端返回的详细错误信息
-  if (error?.response?.data?.message) {
+  if (apiMessage) {
-    return error.response.data.message;
+    return apiMessage;
  }
-  if (error?.response?.data?.error) {
+  if (apiError) {
-    return error.response.data.error;
+    return apiError;
  }
  // Layer 4: Generic Error Messages
@ -243,4 +251,3 @@ export type ErrorMessages =
  | typeof InstanceErrors
  | typeof ValidationErrors
  | typeof BusinessErrors;
--- a/infra/nginx/default.conf
+++ b/infra/nginx/default.conf
@ -7,6 +7,7 @@ server {
    listen 80 default_server;
    listen 443 ssl http2 default_server;
    server_name _;
    server_tokens off;
    ssl_certificate     /etc/nginx/certs/tls.crt;
    ssl_certificate_key /etc/nginx/certs/tls.key;
@ -18,6 +19,13 @@ server {
    root /usr/share/nginx/html;
    index index.html;
    resolver 127.0.0.11 valid=10s ipv6=off;
    add_header X-Frame-Options "DENY" always;
    add_header X-Content-Type-Options "nosniff" always;
    add_header Referrer-Policy "no-referrer" always;
    add_header Content-Security-Policy "default-src 'self'; connect-src 'self' http: https: ws: wss:; img-src 'self' data:; style-src 'self' 'unsafe-inline'; script-src 'self'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'" always;
    add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
    # 前端 SPA 路由 fallback
    location / {
@ -26,7 +34,8 @@ server {
    # API 请求代理到 backend 服务
    location /api/ {
-        proxy_pass         http://backend:8080;
+        set                $backend_upstream http://backend:8080;
        proxy_pass         $backend_upstream;
        proxy_http_version 1.1;
        proxy_set_header   Host $host;
        proxy_set_header   X-Real-IP $remote_addr;
@ -34,10 +43,22 @@ server {
        proxy_set_header   X-Forwarded-Proto $scheme;
    }
    # 外部健康检查，避免 /health 落到 SPA
    location = /health {
        access_log off;
        default_type application/json;
        return 200 '{"status":"ok"}';
    }
    # Nginx 健康检查
    location = /healthz {
        access_log off;
        add_header Content-Type text/plain;
        add_header X-Frame-Options "DENY" always;
        add_header X-Content-Type-Options "nosniff" always;
        add_header Referrer-Policy "no-referrer" always;
        add_header Content-Security-Policy "default-src 'self'; connect-src 'self' http: https: ws: wss:; img-src 'self' data:; style-src 'self' 'unsafe-inline'; script-src 'self'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'" always;
        add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
        return 200 'ok';
    }
@ -45,6 +66,11 @@ server {
    location ~* \.(js|css|png|jpg|jpeg|gif|svg|ico)$ {
        expires 7d;
        add_header Cache-Control "public, max-age=604800, immutable";
        add_header X-Frame-Options "DENY" always;
        add_header X-Content-Type-Options "nosniff" always;
        add_header Referrer-Policy "no-referrer" always;
        add_header Content-Security-Policy "default-src 'self'; connect-src 'self' http: https: ws: wss:; img-src 'self' data:; style-src 'self' 'unsafe-inline'; script-src 'self'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'" always;
        add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
        try_files $uri /index.html;
    }
 }
--- a/tasks/lessons.md
+++ b/tasks/lessons.md
@ -7,3 +7,9 @@
 - For real Helm smoke tests, wait for platform instance deletion to remove the DB record before deleting the Kubernetes namespace manually. Deleting the namespace too early can make the async Helm uninstall mark the instance failed.
 - When embedding Helm, setting `actionConfig.Init(..., namespace, ...)` and `Install.Namespace` is not enough. The custom `RESTClientGetter` must also override the raw kubeconfig loader namespace, or manifests without `metadata.namespace` can be created in the kubeconfig context namespace such as `default`.
 - **Axios keysToSnake recursively converts ALL object keys including user-provided values map.** This silently renames Helm chart values (gpuMem → gpu_mem) causing chart to ignore user settings. Fix: skip recursion for known data fields (values, valuesYaml) while still converting field names. Backend DTOs must provide dual json tags (camelCase + snake_case) with Normalize() fallback.
 - In the current two-role model, ordinary users must be forced into username-derived private workspaces/namespaces. Do not accept arbitrary `workspaceId` for role=`user`, or `(workspace_id, cluster_id)` quotas become shared across users. When editing an existing user, update the existing private workspace in place; only migrate users still attached to the default workspace.
 - CPU and memory quotas are allowed to be blank, which means no platform ResourceQuota limit for that resource. GPU and `requests.nvidia.com/gpumem` should still default to explicit `0` for ordinary users unless admin sets them.
 - Layout regression tests must not depend on deliberately invalid charts leaving DB instances behind. The safer behavior is to reject before DB persistence; use mocked API data for pure frontend overflow checks.
 - Monitoring Pod-to-instance attribution cannot rely only on Helm standard labels. Some local charts, including `vllm-serve`, use only `app=<release>`; include that fallback before concluding allocation is zero.
 - In this Compose stack the React frontend is not a long-running frontend container. `frontend-build` is a one-shot asset build and `nginx` is the frontend runtime plus API gateway; README and status commands must make that explicit or users will think the stack is partially down.
 - For admin tables/cards inside the sidebar shell, fixed multi-column grids can still overflow even when individual buttons use `min-w-0`. Prefer responsive card layouts with a wrapping action region, and test at 1440/1280/1024/900/768 widths.
--- a/tasks/session-notes.md
+++ b/tasks/session-notes.md
@ -1,30 +0,0 @@
 # OCDP 系统测试 - 完成报告
 ## 已交付文档
 | 文档 | 路径 | 大小 |
 |------|------|------|
 | 用户操作指南 | `docs/user-guide.md` | 752 lines |
 | 测试场景设计 | `docs/test-scenarios.md` | 67KB, 12 分类, 100+ 用例 |
 | 测试用户凭据 | `docs/test-users.json` | 4 个账号 |
 | 综合 Bug 报告 | `docs/bug-report.md` | 18 个 Bug, 含安全发现 |
 | user-a 测试报告 | `docs/bugs-user-a.md` | 前端 UI 发现 |
 | user-b 测试报告 | `docs/bugs-user-b.md` | API/部署发现 |
 | user-c 测试报告 | `docs/bugs-user-c.md` | 权限隔离发现 |
 | 安全测试报告 | `docs/security/bugs-security.md` | 6 个安全发现 |
 ## Bug 统计: 18 个
 | 严重度 | 数量 | 说明 |
 |--------|------|------|
 | **P0 (Blocker)** | 2 | Launch 按钮无反应、SPA 路由空白页 |
 | **P1 (High)** | 2 | DELETE 404 + 空 body |
 | **P2 (Medium)** | 6 | 缺失 API、静默 namespace 覆盖、无障碍问题 |
 | **P3 (Low)** | 8 | 响应格式、安全头缺失、CORS、用户枚举等 |
 | **Total** | **18** | |
 ## 测试团队
 - user-a-agent ✅ (前端测试)
 - user-b-agent ✅ (API/部署测试)
 - user-c-agent ✅ (权限隔离测试)
 - security-agent ✅ (安全测试)
--- a/tasks/todo.md
+++ b/tasks/todo.md
@ -1,36 +1,129 @@
-# OCDP 第二次测试 - 完成
+# OCDP 最终文档结构
-## 交付文档
+## Quota lifecycle monitoring implementation 2026-05-14
-| 文档 | 路径 | 内容 |
+
 - [x] Main: integrate per-user per-cluster quota semantics and final verification.
  - [x] Treat ordinary-user empty CPU/memory/GPU/GPU memory quotas as explicit zero.
  - [x] Make create/update/scale quota checks use the selected cluster binding and sync ResourceQuota first.
  - [x] Reject GPU=0 user vllm deployment on k3s before DB instance/release creation.
 - [x] Worker A: implement backend quota evaluator/resource quota sync without touching frontend.
 - [x] Worker B: implement user lifecycle cleanup, snake_case DTO normalization, and safe admin/user role transitions.
  - [x] Add auth DTO alternate snake_case fields plus `Normalize()` for register/update requests, and call it in auth handler before service mapping.
  - [x] Make admin-to-user role transitions create or safely reuse the username-derived workspace; detect namespace ownership conflicts and return an explicit domain conflict error.
  - [x] Extend workspace binding repository to list/delete all bindings for a workspace so user deletion can clean every cluster binding.
  - [x] Extend tenant kube client with idempotent tenant cleanup for namespace/service account/role binding/resource quota, refusing system namespaces such as `default` and `kube-system`.
  - [x] Extend `AuthService` dependencies for instance/cluster/binding/tenant cleanup, preserving existing callers and avoiding frontend changes.
  - [x] Update `DeleteUser` to reject deletion when the user owns instances; when safe, clean exclusive user workspace cluster bindings and OCDP tenant resources before deleting the user.
  - [x] Add focused Go tests for DTO normalization, role downgrade workspace reuse/conflict, delete-with-instances conflict, cleanup path, and protected namespace cleanup.
  - [x] Run targeted Go tests, review diff, and add Worker B Review summary here.
  - Review: changed auth DTO normalization, auth handler normalization calls, auth service workspace reuse/delete cleanup logic, workspace binding repository ports/adapters, tenant kube cleanup, domain errors, mock/test coverage, and API wiring. Namespace conflicts now return `ErrWorkspaceNamespaceConflict`/HTTP 409; deleting users with owned/workspace instances returns `ErrUserHasInstances`/HTTP 409; protected tenant namespaces return forbidden-style `ErrProtectedNamespace`. Validation passed with targeted Worker B tests and full backend `go test ./...`.
 - [x] Worker C: implement monitoring resource aggregation and instance owner username fields.
 - [x] Worker D: implement frontend user management, instance card, and monitoring UI changes.
  - [x] Inspect current API/generated/UI type contracts for owner and monitoring resource fields without changing backend.
  - [x] Rework User Management accounts area into a wider operations layout with quota chips/split columns and actions that do not squeeze quota content.
  - [x] Change admin-to-user downgrade flow to open/reuse the tenant resource limit editor and submit role plus namespace/cluster/quota fields together.
  - [x] Show instance owner as `ownerUsername` when present, otherwise a shortened `ownerId`.
  - [x] Extend monitoring frontend types/adapters as needed for GPU allocation, GPU memory, and per-user resource rows returned by the backend.
  - [x] Update Cluster Monitoring cards/page to render GPU allocation/GPU Mem and per-user resource tables while respecting backend-scoped data for normal users.
  - [x] Check responsive behavior for the touched UI and avoid obvious desktop/mobile overflow.
  - [x] Run targeted frontend type/build tests available in the repo and review diff.
  - [x] Add Worker D Review summary with changed files and verification results.
  - Review: changed `frontend/src/features/configuration/users/pages/UserManagementPage.tsx`, `frontend/src/features/artifact/instances/components/InstanceCard.tsx`, `frontend/src/features/monitoring/clusters/components/ClusterMonitorCard.tsx`, `frontend/src/features/monitoring/clusters/pages/MonitoringClustersPage.tsx`, `frontend/src/core/types/index.ts`, and `frontend/src/api/index.ts`. User Management now uses wider operation rows with quota chips and admin-to-user downgrade saves role plus tenant limits. Instance cards show owner username or short owner ID. Cluster monitoring renders GPU allocation, GPU memory, and backend-returned per-user resource rows. Validation: `npm run build` passed; targeted `npx eslint ...` on changed frontend source files passed; full `npm run lint` remains blocked by pre-existing generated/cache and legacy lint errors; Playwright viewport check passed for `/configuration/users` and `/monitoring/clusters` at 390x844 and 1440x1000 with mocked API data and no horizontal overflow detected.
 - [x] Worker E: add API/Playwright/k3s regression tests for this plan.
 - [x] Worker F: read-only review for quota bypass, namespace deletion safety, and monitoring privacy.
 - [x] Run `go test ./...` and `npm run build`.
 - [x] Run Docker Compose smoke plus API/Playwright regression scripts.
 - [x] Run real k3s negative vllm quota deployment test and clean up test users.
 - [ ] Run positive GPU=1 k3s vllm deployment when cluster resources are available.
 - [x] Add Review summary and lessons.
  - Review: ordinary users are now forced into username-derived private workspace/namespace so per-cluster bindings behave as per-user/per-cluster quota buckets. Empty user CPU/memory/GPU/GPU Mem default to explicit `0`; create/update/scale use rendered Helm resource estimates, live ResourceQuota usage minus current release delta, and synced tenant ResourceQuota before persistence/Helm mutation. User deletion blocks on owned/workspace instances, then cleans tenant bindings/namespaces and deletes the exclusive workspace record. Monitoring now returns per-user resource rows and strips cluster-wide node/total metrics for ordinary users. Frontend renders `resourceUsageByUser`, instance owners, less cramped user quotas, and wraps InstanceCard actions to keep Delete inside the viewport. Validation passed: backend `go test ./...`, frontend `npm run build`, Docker Compose health checks, `test/unresolved_bugs_security_gateway_contract.py`, `test/unresolved_bugs_api_contract.py`, `test/user_namespace_quota_api_contract.py`, `test/frontend-playwright-smoke.py`, and `test/instance_card_action_layout_playwright.py`. Positive GPU=1 vllm deployment was not run because the cluster resource constraint remains external; negative GPU=0 vllm quota rejection on k3s passed.
 ## Unresolved bugs implementation 2026-05-14
 - [x] Worker A: fix instance API contract: detail replicas, list values, values/valuesYaml conflict, namespace 403.
  - [x] Inspect current instance handler/service tests and avoid touching other workers' areas.
  - [x] Add request validation so `values` plus `valuesYaml`/`values_yaml` conflicts return HTTP 400, while YAML-only still populates values.
  - [x] Enrich `GetInstance` replica count using the same live K8s source as list.
  - [x] Include `values` in list responses for API compatibility.
  - [x] Change normal-user tenant namespace mismatch from silent override to `ErrForbidden`/HTTP 403.
  - [x] Add focused Go tests for namespace mismatch and replica enrichment/list values where practical.
  - [x] Run targeted Go tests and review diff.
  - Review: changed only scoped instance backend files plus this task tracker; validated with `go test ./internal/domain/service` and `go test ./internal/adapter/input/http/rest` from `backend/`.
 - [x] Worker B: add Helm-rendered quota pre-check helper before DB create/Helm install.
  - [x] Inspect Helm client/service quota contracts and preserve other workers' edits.
  - [x] Add domain quota precheck types and compare logic for CPU, memory, GPU, and integer-MB gpumem.
  - [x] Add Helm render estimator output port and real/mock Helm implementations that render final chart values and sum Pod template requests/limits.
  - [x] Add focused Go tests for quota comparison and rendered manifest estimation where feasible.
  - [x] Run targeted Go tests and review diff.
  - Review: exposed `QuotaPrecheckService.EstimateAndCompare` plus `CompareWorkspaceQuota`; real Helm now dry-renders `/tmp/charts/{chart}-{version}.tgz` with final values and estimates Pod template requests/limits. Added quota and manifest estimator tests. Validation passed with `go test ./internal/domain/service ./internal/adapter/output/helm/...` and full backend `go test ./...`.
 - [ ] Worker C: add compatibility/security backend endpoints and auth/CORS/rate-limit fixes.
  - [x] Inspect backend route/handler/service contracts and preserve other workers' edits.
  - [x] Add `/repositories/{repo}/tags` compatibility alias without changing existing artifact behavior.
  - [x] Add `/monitoring/clusters/{id}/metrics` alias and `/clusters/{id}/stats` compatibility response.
  - [x] Add `/clusters/{id}/kubeconfig` tenant kubeconfig endpoint scoped to the authenticated user's workspace and requested cluster.
  - [x] Make login failures uniform and add a lightweight per-client login rate limit.
  - [x] Replace permissive CORS reflection/wildcard defaults with an allowlist-driven default suitable for local dev.
  - [ ] Add focused Go tests where straightforward, then run relevant Go tests and review diff.
  - Review: changed scoped backend route/auth/CORS handlers, added CORS and login limiter tests, and removed the direct SSE CORS wildcard so global CORS applies. Validation attempted with `go test ./cmd/api ./internal/adapter/input/http/rest` from `backend/`, but it is currently blocked by concurrent Worker B compile errors in `internal/domain/service/quota_precheck.go` and Helm client implementations missing `EstimateInstanceResources`.
 - [x] Worker D: harden Nginx gateway `/health`, server tokens, and security headers.
 - [ ] Worker E: align frontend/API client and Playwright coverage for conflict/namespace/scale flows.
 - [ ] Worker F: add API/security/regression test scripts and review coverage.
 - [ ] Integrate worker changes, resolve conflicts, and run Go/frontend builds.
 - [ ] Run Docker Compose smoke, API contracts, Playwright, and real k3s deploy cleanup.
 - [ ] Update `tasks/lessons.md` and add Review summary here.
 ## docs/ 目录 (已清理)
 | 文件 | 用途 | 状态 |
 |------|------|------|
-| 综合报告 | `docs/test2-report.md` | 3 个测试的完整结果 |
+| `user-guide.md` | 用户操作指南 | ✅ 永久参考 |
-| 配额测试详情 | `docs/test2-quota.md` | 配额限额详细分析 |
+| `test-scenarios.md` | 100+ 测试用例设计 | ✅ 永久参考 |
-| Values 优先级测试 | `docs/test2-values-priority.md` | values 覆盖测试+冲突测试 |
+| `test-users.json` | 4 个测试账号凭证 | ✅ 永久参考 |
-| UI 溢出/滚动/刷新 | `docs/test2-ui-overflow.md` | Playwright + 源码分析 |
+| `regression-full-report.md` | 最新综合回归报告 | ✅ 可删除（下一个版本） |
 | `UNRESOLVED-BUGS.md` | 未修复问题清单 (15 个) | ✅ 当前版本 |
-## 核心发现
+## Worker C monitoring and instance owner backend 2026-05-14
-### 1. 资源配额
+- [x] Inspect existing instance/monitoring permission, repository, DTO, and K8s metrics contracts without reverting other workers' changes.
-| 发现 | 影响 |
+- [x] Add `ownerUsername` to instance entity/DTO responses and hydrate it for detail/list via user repository while preserving ordinary-user/admin visibility rules.
-|------|------|
+- [x] Add K8s Pod resource allocation collection from requests/limits, including GPU and `requests.nvidia.com/gpumem` as integer MB.
-| ✅ K8s ResourceQuota 对象正确创建并生效 | cpu/gpu/mem 限制在 pod 级别执行 |
+- [x] Aggregate `resourceUsageByUser` in monitoring service by matching Pods to visible instances/workspaces/owners, with ordinary users scoped to themselves and admins seeing all visible owners.
-| ❌ **无 API 层预检查** | 后端接受所有部署请求，配额耗尽时 pod stuck pending-install |
+- [x] Expose cluster-level GPU/GPU memory allocation fields and per-user resource usage in `/monitoring/clusters`, detail, and existing aliases.
-| ❌ **GPU 配额可绕过** | gpu=0 用户能提交需要 GPU 的 chart |
+- [x] Add focused Go tests for instance owner username and monitoring resource aggregation/privacy.
-| ❌ **实例不会自动 failed** | 超配额实例永远 stuck 在 pending-install |
+- [x] Run relevant Go tests, review diff, and add Review summary here.
  - Review: Instance list/detail now include `ownerUsername` hydrated from the user repository. Monitoring responses now include per-user resource usage plus CPU/memory/GPU/GPU-memory request/limit allocation fields derived from Kubernetes Pod resources and DB instance ownership mapping; ordinary users only see their own allocation rows/totals, admins see all visible instance owners. Validation passed with `go test ./internal/domain/service`, `go test ./cmd/api ./internal/adapter/input/http/rest ./internal/adapter/output/k8s`, and backend `go test ./...`.
-### 2. Values 覆盖优先级
+## Debug quota limits monitoring UI 2026-05-15
 | 优先级 | 来源 | 说明 |
 |--------|------|------|
 | 🥇 **最高** | `values` JSON 字段 | 结构化 JSON - 覆盖一切 |
 | 🥈 **中** | `valuesYaml` 字符串 | 被 values JSON 覆盖 |
 | 🥉 **最低** | Chart 内置 values.yaml | 默认基线 |
 | ⚠️ **冲突时静默覆盖，无警告** | 两者都提供时 values JSON 全胜 | |
-### 3. 前端 UI
+- [x] Inspect current runtime logs for workspace conflict/quota errors without killing other services.
-| 测试 | 结论 |
+- [x] Fix quota semantics: CPU/memory blank means unlimited; GPU/GPU Mem blank means explicit zero for ordinary users.
-|------|------|
+- [x] Fix admin user update so editing an existing user's quota does not recreate/reassign namespace and does not raise false `workspace namespace conflict`.
-| 水平溢出 | ✅ 无问题 |
+- [x] Rework User Management action controls so they wrap inside the viewport on desktop and mobile.
-| 响应式 | ✅ sm/md/lg/xl 正确 |
+- [x] Improve monitoring for ordinary users with self-scoped useful fields instead of all `N/A`; make admin monitoring show the new resource allocation rows clearly.
-| 滚动 | ✅ 流畅 |
+- [x] Rebuild Docker Compose stack and run backend/frontend tests plus Playwright overflow smoke.
-| 刷新 | ✅ 正常 |
+- [x] Use ivanwu on k3s with vllm-serve 0.6.0, CPU/memory unlimited and gpumem `10000`, then verify/clean up.
-| 颜色对比度 | ⚠️ 登录错误文本 red-400 WCAG AA 不合格 |
+- [x] Add Review summary and lessons.
 Review:
 - Runtime logs were checked before and after changes. The only 502s observed were during intentional backend rebuild; final backend/nginx logs had no error/fatal/5xx entries.
 - Admin can now update ivanwu without `workspace namespace conflict`; ivanwu was migrated to workspace `ivanwu`, namespace `ocdp-u-ivanwu`, default cluster k3s, CPU/memory unlimited, GPU `1`, GPU Mem `10000`.
 - k3s ResourceQuota for ivanwu contains only GPU and GPU Mem hard limits; CPU/memory are omitted as unlimited. A vllm-serve `0.6.0` deployment used `harbor.bwgdi.com/library/vllm-openai:v0.17.1`, reached `deployed`, Pod `1/1 Running`, then was deleted through the platform and quota usage returned to `0/1` GPU and `0/10k` gpumem.
 - Monitoring now shows ordinary users self-scoped allocation rows and admin per-user rows. The vLLM deployment was visible as CPU `1.00 cores`, memory `9.8 GiB`, GPU `1`, GPU Mem `10000`.
 - Verification passed: `go test ./...`, `npm run build`, `test/frontend-playwright-smoke.py`, `test/instance_card_action_layout_playwright.py`, `test/user_management_layout_playwright.py`, `test/user_namespace_quota_api_contract.py`, and `test/unresolved_bugs_api_contract.py`.
 ## Restart docs and user management overflow 2026-05-18
 - [x] Inspect current Docker Compose service lifecycle and identify why frontend/backend feel disconnected.
 - [x] Update Makefile/README so one clear command starts the whole platform, with explicit rebuild/restart/status/log commands.
 - [x] Restart the full stack through the documented command and verify health endpoints.
 - [x] Reproduce User Management overflow with Playwright at desktop/tablet/mobile widths.
 - [x] Fix User Management layout so action buttons and quota controls stay inside the viewport.
 - [x] Run backend/frontend builds plus Playwright layout smoke.
 - [x] Record Review summary and lessons.
 Review:
 - `make up` is now the single documented platform start command. It runs `docker compose up --build -d` for the whole stack, and old commands (`run-2`, `docker-dev`, `docker-prod`, `docker-up`) are compatibility aliases.
 - `docker-compose.yml` now keeps `nginx` under `restart: unless-stopped`; `make docker-ps` and `make up` show `docker compose ps -a`, so the expected `frontend-build Exited (0)` state is visible and less confusing.
 - README now explains that frontend-build is a one-shot build job and the actual frontend runtime is `nginx`, which also proxies `/api`.
 - User Management layout was changed from a fixed four-column row to a responsive card layout with a wrapping action area. The app shell content column also has `min-w-0` so wide children cannot force browser overflow.
 - Verification passed: `go test ./...`, `npm run build`, `make up`, health checks for backend/nginx/web, `test/user_management_layout_playwright.py` across 1440/1280/1024/900/768 widths, `test/frontend-playwright-smoke.py`, and `test/instance_card_action_layout_playwright.py`.
--- a/test/instance_card_action_layout_playwright.py
+++ b/test/instance_card_action_layout_playwright.py
@ -1,7 +1,8 @@
 #!/usr/bin/env python3
-# Covers InstanceCard action layout: creates a harmless failed metadata instance
+# Covers InstanceCard action layout. It prefers a harmless failed metadata
-# with an invalid chart before Helm runs, opens the Instances page, verifies the
+# instance when the API preserves one; if chart validation rejects before DB
-# Delete button remains inside the card and viewport, clicks it, and cleans up.
+# persistence, it falls back to mocking only the instance list/delete API so the
 # visual overflow assertion remains independent from deployment behavior.
 import json
 import os
@ -102,6 +103,7 @@ def main() -> int:
    release = f"ocdp-ui-overflow-{suffix}"
    namespace = f"ocdp-ui-overflow-{suffix}"
    instance_id = ""
    synthetic = False
    try:
        create = request(
            "POST",
@ -125,16 +127,44 @@ def main() -> int:
                break
            time.sleep(0.5)
        if not instance_id:
-            raise AssertionError("test instance was not visible after failed chart download")
+            synthetic = True
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page(viewport={"width": 920, "height": 760})
            page.on("dialog", lambda dialog: dialog.accept())
            login_ui(page)
            if synthetic:
                synthetic_instance = {
                    "id": f"synthetic-{suffix}",
                    "name": release,
                    "namespace": namespace,
                    "clusterId": cluster_id,
                    "registryId": registry_id,
                    "repository": "charts/nonexistent",
                    "chart": "nonexistent",
                    "version": "0.0.0",
                    "status": "failed",
                    "ownerUsername": ADMIN_USER,
                    "values": {},
                    "createdAt": "2026-05-15T00:00:00Z",
                    "updatedAt": "2026-05-15T00:00:00Z",
                }
                def fulfill_instances(route):
                    if route.request.method == "GET":
                        route.fulfill(status=200, content_type="application/json", body=json.dumps({"instances": [synthetic_instance], "total": 1}))
                        return
                    if route.request.method == "DELETE":
                        route.fulfill(status=204, body="")
                        return
                    route.continue_()
                page.route("**/api/v1/clusters/*/instances", fulfill_instances)
                page.route("**/api/v1/clusters/*/instances/*", fulfill_instances)
            page.get_by_role("button", name="Instances", exact=True).click()
            page.wait_for_load_state("networkidle")
-            heading = page.get_by_role("heading", name=release, exact=True)
+            heading = page.get_by_role("heading", name=release, exact=True).first
            expect(heading).to_be_visible(timeout=15000)
            card = heading.locator("xpath=ancestor::div[contains(@class, 'group')][1]")
            delete_button = card.get_by_role("button", name="Delete", exact=True)
--- a/test/unresolved_bugs_api_contract.py
+++ b/test/unresolved_bugs_api_contract.py
@ -0,0 +1,259 @@
 #!/usr/bin/env python3
 # Covers unresolved API regressions: compatibility tags/metrics/stats/kubeconfig
 # endpoints, values/valuesYaml conflict handling, ordinary-user namespace 403,
 # and quota precheck rejection before an instance is persisted.
 import json
 import os
 import sys
 import uuid
 from dataclasses import dataclass
 from typing import Any
 from urllib.error import HTTPError, URLError
 from urllib.parse import quote, urljoin
 from urllib.request import Request, urlopen
 RAW_BASE_URL = os.environ.get("BASE_URL", "http://localhost:18081/api/v1").rstrip("/")
 BASE_URL = RAW_BASE_URL + "/"
 ADMIN_USER = os.environ.get("ADMIN_USER", os.environ.get("BOOTSTRAP_ADMIN_USER", "admin"))
 ADMIN_PASS = os.environ.get("ADMIN_PASS", os.environ.get("BOOTSTRAP_ADMIN_PASS", ""))
 TARGET_CLUSTER_NAME = os.environ.get("TARGET_CLUSTER_NAME", "k3s")
 TARGET_REGISTRY_NAME = os.environ.get("TARGET_REGISTRY_NAME", "harbor-bwgdi")
 NGINX_REPOSITORY = os.environ.get("NGINX_CHART_REPOSITORY", "charts/nginx")
 NGINX_TAG = os.environ.get("NGINX_CHART_TAG", "22.1.1")
 VLLM_REPOSITORY = os.environ.get("VLLM_CHART_REPOSITORY", "charts/vllm-serve")
 VLLM_TAG = os.environ.get("VLLM_CHART_TAG", "0.6.0")
 GPU_MEM_MB = os.environ.get("GPU_MEM_MB", "10000")
@dataclass
 class Response:
    status: int
    headers: dict[str, str]
    body: str
    json: Any
 def parse_json(body: str) -> Any:
    try:
        return json.loads(body) if body else None
    except json.JSONDecodeError:
        return None
 def request(method: str, path: str, token: str | None = None, payload: Any = None, timeout: int = 60) -> Response:
    data = None
    headers = {"Accept": "application/json"}
    if payload is not None:
        data = json.dumps(payload).encode("utf-8")
        headers["Content-Type"] = "application/json"
    if token:
        headers["Authorization"] = f"Bearer {token}"
    url = path if path.startswith("http") else urljoin(BASE_URL, path.lstrip("/"))
    try:
        with urlopen(Request(url, data=data, headers=headers, method=method), timeout=timeout) as res:
            body = res.read().decode("utf-8", errors="replace")
            return Response(res.status, dict(res.headers), body, parse_json(body))
    except HTTPError as exc:
        body = exc.read().decode("utf-8", errors="replace")
        return Response(exc.code, dict(exc.headers), body, parse_json(body))
    except URLError as exc:
        raise AssertionError(f"Cannot reach {url}: {exc}") from exc
 def assert_status(resp: Response, expected: set[int], context: str) -> None:
    if resp.status not in expected:
        raise AssertionError(f"{context}: expected HTTP {sorted(expected)}, got {resp.status}. Body: {resp.body[:800]}")
 def login(username: str, password: str) -> str:
    resp = request("POST", "/auth/login", payload={"username": username, "password": password})
    assert_status(resp, {200}, f"login {username}")
    if not isinstance(resp.json, dict) or not resp.json.get("accessToken"):
        raise AssertionError(f"login {username}: missing accessToken")
    return str(resp.json["accessToken"])
 def list_items(path: str, token: str, context: str) -> list[dict[str, Any]]:
    resp = request("GET", path, token)
    assert_status(resp, {200}, context)
    if isinstance(resp.json, list):
        return [item for item in resp.json if isinstance(item, dict)]
    if isinstance(resp.json, dict):
        for key in ("items", "clusters", "registries", "instances"):
            value = resp.json.get(key)
            if isinstance(value, list):
                return [item for item in value if isinstance(item, dict)]
    raise AssertionError(f"{context}: expected list response, got {resp.body[:800]}")
 def find_by_name(items: list[dict[str, Any]], name: str, context: str) -> dict[str, Any]:
    for item in items:
        if item.get("name") == name:
            return item
    raise AssertionError(f"{context}: could not find {name!r}. Available: {[item.get('name') for item in items]}")
 def encoded_repo(repo: str) -> str:
    return quote(repo, safe="")
 def create_test_user(admin_token: str, cluster_id: str, suffix: str, quota_gpu: str = "1") -> tuple[str, str, str]:
    username = f"api-bugs-{suffix}"
    password = "ApiBugs123!"
    namespace = f"ocdp-u-api-bugs-{suffix}"
    created = request(
        "POST",
        "/users",
        admin_token,
        {
            "username": username,
            "password": password,
            "role": "user",
            "namespace": namespace,
            "defaultClusterId": cluster_id,
            "quotaCpu": "2",
            "quotaMemory": "8Gi",
            "quotaGpu": quota_gpu,
            "quotaGpuMemory": GPU_MEM_MB,
            "isActive": True,
            "mustChangePassword": False,
        },
    )
    assert_status(created, {201}, "create API contract test user")
    return str(created.json["id"]), username, password
 def instance_names(cluster_id: str, token: str) -> set[str]:
    resp = request("GET", f"/clusters/{cluster_id}/instances", token)
    assert_status(resp, {200}, "list instances")
    instances = resp.json.get("instances", []) if isinstance(resp.json, dict) else []
    return {str(item.get("name")) for item in instances if isinstance(item, dict)}
 def main() -> int:
    if not ADMIN_PASS:
        raise AssertionError("ADMIN_PASS or BOOTSTRAP_ADMIN_PASS is required")
    suffix = uuid.uuid4().hex[:6]
    admin_token = login(ADMIN_USER, ADMIN_PASS)
    user_id = ""
    quota_user_id = ""
    try:
        clusters = list_items("/clusters", admin_token, "list clusters")
        cluster = find_by_name(clusters, TARGET_CLUSTER_NAME, "select target cluster")
        cluster_id = str(cluster["id"])
        registries = list_items("/registries", admin_token, "list registries")
        registry = find_by_name(registries, TARGET_REGISTRY_NAME, "select target registry")
        registry_id = str(registry["id"])
        tags = request("GET", f"/registries/{registry_id}/repositories/{encoded_repo(NGINX_REPOSITORY)}/tags?media_type=chart", admin_token)
        assert_status(tags, {200}, "registry repository tags alias")
        if NGINX_TAG not in tags.body:
            raise AssertionError(f"tags alias did not include expected {NGINX_REPOSITORY}:{NGINX_TAG}")
        metrics = request("GET", f"/monitoring/clusters/{cluster_id}/metrics", admin_token)
        assert_status(metrics, {200}, "monitoring metrics alias")
        stats = request("GET", f"/clusters/{cluster_id}/stats", admin_token)
        assert_status(stats, {200}, "cluster stats alias")
        user_id, username, password = create_test_user(admin_token, cluster_id, suffix)
        user_token = login(username, password)
        kubeconfig = request("GET", f"/clusters/{cluster_id}/kubeconfig", user_token)
        assert_status(kubeconfig, {200}, "cluster kubeconfig compatibility endpoint")
        if "apiVersion: v1" not in kubeconfig.body or "kind: Config" not in kubeconfig.body or "token:" not in kubeconfig.body:
            raise AssertionError(f"kubeconfig endpoint did not return tenant token kubeconfig: {kubeconfig.body[:500]}")
        forbidden_fields = ("client-key-data:", "client-certificate-data:")
        leaked = [field for field in forbidden_fields if field in kubeconfig.body]
        if leaked:
            raise AssertionError(f"kubeconfig endpoint leaked stored cert/key fields: {leaked}")
        conflict = request(
            "POST",
            f"/clusters/{cluster_id}/instances",
            user_token,
            {
                "name": f"values-conflict-{suffix}",
                "namespace": f"ocdp-u-api-bugs-{suffix}",
                "registryId": registry_id,
                "repository": NGINX_REPOSITORY,
                "tag": NGINX_TAG,
                "values": {"replicaCount": 1},
                "valuesYaml": "replicaCount: 2\n",
            },
        )
        assert_status(conflict, {400}, "values/valuesYaml conflict")
        if "conflict" not in conflict.body.lower():
            raise AssertionError(f"values conflict response should explain conflict, got {conflict.body[:500]}")
        before = instance_names(cluster_id, user_token)
        forbidden_name = f"namespace-forbidden-{suffix}"
        namespace_forbidden = request(
            "POST",
            f"/clusters/{cluster_id}/instances",
            user_token,
            {
                "name": forbidden_name,
                "namespace": "default",
                "registryId": registry_id,
                "repository": NGINX_REPOSITORY,
                "tag": NGINX_TAG,
                "valuesYaml": "replicaCount: 1\n",
            },
        )
        assert_status(namespace_forbidden, {403}, "ordinary user forbidden namespace")
        after = instance_names(cluster_id, user_token)
        if forbidden_name in after or before != after:
            raise AssertionError("forbidden namespace request must not create an instance")
        quota_user_id, quota_username, quota_password = create_test_user(admin_token, cluster_id, f"quota-{suffix}", quota_gpu="0")
        quota_token = login(quota_username, quota_password)
        quota_name = f"quota-precheck-{suffix}"
        quota_resp = request(
            "POST",
            f"/clusters/{cluster_id}/instances",
            quota_token,
            {
                "name": quota_name,
                "namespace": f"ocdp-u-api-bugs-quota-{suffix}",
                "registryId": registry_id,
                "repository": VLLM_REPOSITORY,
                "tag": VLLM_TAG,
                "valuesYaml": f"""resources:
  gpuLimit: 1
  gpuMem: {GPU_MEM_MB}
  cpuRequest: 1
  memoryLimit: "4Gi"
 replicaCount: 1
 workerSize: 1
 initContainers:
  enabled: false
 """,
            },
            timeout=600,
        )
        assert_status(quota_resp, {403, 422}, "quota precheck rejects over-quota deployment")
        if quota_name in instance_names(cluster_id, quota_token):
            raise AssertionError("quota precheck must reject before persisting an instance")
        print("PASS: unresolved API contract")
        return 0
    finally:
        if user_id:
            cleanup = request("DELETE", f"/users/{user_id}", admin_token)
            if cleanup.status not in {204, 404}:
                print(f"WARN: cleanup user {user_id} returned HTTP {cleanup.status}: {cleanup.body[:300]}", file=sys.stderr)
        if quota_user_id:
            cleanup = request("DELETE", f"/users/{quota_user_id}", admin_token)
            if cleanup.status not in {204, 404}:
                print(f"WARN: cleanup quota user {quota_user_id} returned HTTP {cleanup.status}: {cleanup.body[:300]}", file=sys.stderr)
 if __name__ == "__main__":
    try:
        raise SystemExit(main())
    except AssertionError as exc:
        print(f"FAIL: {exc}", file=sys.stderr)
        raise SystemExit(1)
--- a/test/unresolved_bugs_security_gateway_contract.py
+++ b/test/unresolved_bugs_security_gateway_contract.py
@ -0,0 +1,150 @@
 #!/usr/bin/env python3
 # Covers unresolved security/gateway regressions: uniform login failures,
 # per-IP+username login rate limiting with Retry-After, backend CORS allowlist,
 # gateway /health JSON, Nginx version hiding, and security response headers.
 import json
 import os
 import re
 import sys
 import uuid
 from dataclasses import dataclass
 from typing import Any
 from urllib.error import HTTPError, URLError
 from urllib.parse import urljoin
 from urllib.request import Request, urlopen
 RAW_BASE_URL = os.environ.get("BASE_URL", "http://localhost:18081/api/v1").rstrip("/")
 BASE_URL = RAW_BASE_URL + "/"
 GATEWAY_URL = os.environ.get("GATEWAY_URL", "http://localhost:18080").rstrip("/")
 ADMIN_USER = os.environ.get("ADMIN_USER", os.environ.get("BOOTSTRAP_ADMIN_USER", "admin"))
@dataclass
 class Response:
    status: int
    headers: dict[str, str]
    body: str
    json: Any
 def parse_json(body: str) -> Any:
    try:
        return json.loads(body) if body else None
    except json.JSONDecodeError:
        return None
 def request(method: str, url: str, payload: Any = None, headers: dict[str, str] | None = None) -> Response:
    data = None
    req_headers = dict(headers or {})
    req_headers.setdefault("Accept", "application/json")
    if payload is not None:
        data = json.dumps(payload).encode("utf-8")
        req_headers["Content-Type"] = "application/json"
    target = url if url.startswith("http") else urljoin(BASE_URL, url.lstrip("/"))
    try:
        with urlopen(Request(target, data=data, headers=req_headers, method=method), timeout=20) as res:
            body = res.read().decode("utf-8", errors="replace")
            return Response(res.status, dict(res.headers), body, parse_json(body))
    except HTTPError as exc:
        body = exc.read().decode("utf-8", errors="replace")
        return Response(exc.code, dict(exc.headers), body, parse_json(body))
    except URLError as exc:
        raise AssertionError(f"Cannot reach {target}: {exc}") from exc
 def header(resp: Response, name: str) -> str:
    for key, value in resp.headers.items():
        if key.lower() == name.lower():
            return value
    return ""
 def assert_status(resp: Response, expected: set[int], context: str) -> None:
    if resp.status not in expected:
        raise AssertionError(f"{context}: expected HTTP {sorted(expected)}, got {resp.status}. Body: {resp.body[:500]}")
 def main() -> int:
    fake_user = f"no-such-user-{uuid.uuid4().hex[:8]}"
    existing_failure = request(
        "POST",
        "/auth/login",
        {"username": ADMIN_USER, "password": f"wrong-{uuid.uuid4().hex}"},
        {"X-Forwarded-For": "203.0.113.10"},
    )
    missing_failure = request(
        "POST",
        "/auth/login",
        {"username": fake_user, "password": "wrong-password"},
        {"X-Forwarded-For": "203.0.113.11"},
    )
    assert_status(existing_failure, {401}, "existing-user login failure")
    assert_status(missing_failure, {401}, "missing-user login failure")
    if existing_failure.json != missing_failure.json:
        raise AssertionError(f"login failures must be uniform, got {existing_failure.body!r} vs {missing_failure.body!r}")
    limited_user = f"rate-limit-{uuid.uuid4().hex[:8]}"
    rate_resp = None
    for _ in range(6):
        rate_resp = request(
            "POST",
            "/auth/login",
            {"username": limited_user, "password": "bad-password"},
            {"X-Forwarded-For": "203.0.113.12"},
        )
    assert rate_resp is not None
    assert_status(rate_resp, {429}, "login rate limit")
    if not header(rate_resp, "Retry-After"):
        raise AssertionError("login rate limit response must include Retry-After")
    allowed_origin = "http://localhost:18080"
    unknown_origin = "https://evil.example"
    allowed = request(
        "POST",
        "/auth/login",
        {"username": f"cors-allowed-{uuid.uuid4().hex[:8]}", "password": "bad-password"},
        {"Origin": allowed_origin, "X-Forwarded-For": "203.0.113.13"},
    )
    assert_status(allowed, {401}, "allowed CORS login response")
    if header(allowed, "Access-Control-Allow-Origin") != allowed_origin:
        raise AssertionError(f"allowed origin was not echoed: {allowed.headers}")
    unknown = request(
        "POST",
        "/auth/login",
        {"username": f"cors-unknown-{uuid.uuid4().hex[:8]}", "password": "bad-password"},
        {"Origin": unknown_origin, "X-Forwarded-For": "203.0.113.14"},
    )
    assert_status(unknown, {401}, "unknown CORS login response")
    if header(unknown, "Access-Control-Allow-Origin"):
        raise AssertionError(f"unknown origin must not be allowed: {unknown.headers}")
    health = request("GET", f"{GATEWAY_URL}/health")
    assert_status(health, {200}, "gateway /health")
    if health.json != {"status": "ok"}:
        raise AssertionError(f"/health must return JSON status ok, got {health.body[:300]!r}")
    server = header(health, "Server")
    if re.search(r"nginx/\d", server, re.IGNORECASE):
        raise AssertionError(f"Nginx precise version leaked in Server header: {server}")
    for name in ("X-Frame-Options", "X-Content-Type-Options", "Referrer-Policy", "Content-Security-Policy"):
        if not header(health, name):
            raise AssertionError(f"missing security header {name} on /health")
    healthz = request("GET", f"{GATEWAY_URL}/healthz")
    assert_status(healthz, {200}, "gateway /healthz")
    for name in ("X-Frame-Options", "X-Content-Type-Options", "Referrer-Policy", "Content-Security-Policy"):
        if not header(healthz, name):
            raise AssertionError(f"missing security header {name} on /healthz")
    print("PASS: unresolved security/gateway contract")
    return 0
 if __name__ == "__main__":
    try:
        raise SystemExit(main())
    except AssertionError as exc:
        print(f"FAIL: {exc}", file=sys.stderr)
        raise SystemExit(1)
--- a/test/user_management_layout_playwright.py
+++ b/test/user_management_layout_playwright.py
@ -0,0 +1,63 @@
 #!/usr/bin/env python3
 # Covers admin User Management layout: quota fields and action buttons remain
 # visible inside the viewport on desktop and tablet widths.
 import os
 from playwright.sync_api import expect, sync_playwright
 FRONTEND_URL = os.environ.get("FRONTEND_URL", "http://localhost:18080")
 ADMIN_USER = os.environ.get("ADMIN_USER", "admin")
 ADMIN_PASS = os.environ["ADMIN_PASS"]
 def login(page):
    page.goto(FRONTEND_URL, wait_until="networkidle")
    if page.locator("input[type='password']").count() == 0:
        return
    page.locator("input:not([type='password'])").first.fill(ADMIN_USER)
    page.locator("input[type='password']").first.fill(ADMIN_PASS)
    page.get_by_role("button").filter(has_text="Login").last.click()
    page.wait_for_url("**/home", timeout=15000)
    page.wait_for_load_state("networkidle")
 def assert_no_action_overflow(page):
    expect(page.get_by_role("heading", name="User Management")).to_be_visible(timeout=15000)
    expect(page.get_by_text("GPU Mem").first).to_be_visible(timeout=15000)
    overflow = page.evaluate("document.documentElement.scrollWidth > document.documentElement.clientWidth + 2")
    assert not overflow, "User Management page has horizontal document overflow"
    buttons = page.locator("button").filter(has_text="Limits")
    expect(buttons.first).to_be_visible(timeout=15000)
    viewport = page.viewport_size or {"width": 0, "height": 0}
    action_labels = ("To User", "To Admin", "Limits", "Disable", "Enable", "Delete")
    for label in action_labels:
        matches = page.get_by_role("button", name=label, exact=True)
        for index in range(min(matches.count(), 8)):
            box = matches.nth(index).bounding_box()
            if not box:
                continue
            assert box["x"] >= -1, f"{label} button overflows left viewport edge"
            assert box["x"] + box["width"] <= viewport["width"] + 1, f"{label} button overflows right viewport edge"
 with sync_playwright() as p:
    browser = p.chromium.launch(headless=True)
    for viewport in (
        {"width": 1440, "height": 900},
        {"width": 1280, "height": 900},
        {"width": 1024, "height": 800},
        {"width": 900, "height": 760},
        {"width": 768, "height": 900},
    ):
        page = browser.new_page(viewport=viewport)
        login(page)
        page.get_by_role("button", name="Users", exact=True).click()
        page.wait_for_load_state("networkidle")
        page.wait_for_timeout(500)
        assert_no_action_overflow(page)
        page.close()
    browser.close()
 print("PASS: user management layout")
Author	SHA1	Message	Date
ivanwu	66c08e8bc6	Merge pull request 'dev' (#1 ) from dev into main Reviewed-on: #1	2026-05-22 09:41:11 +00:00
Ivan087	17acb7d018	fix: clear tsbuildinfo cache in frontend-build to prevent stale builds - Add rm -rf node_modules/.tmp before npm run build - Without this, tsc -b reads stale .tsbuildinfo from Docker volume and skips recompiling changed files, causing old JS to be served	2026-05-21 15:01:14 +08:00
Ivan087	e73b3147ed	refactor: simplify setup flow — eliminate redundant DB calls and login round-trips - Add AdminExists() to UserRepository (EXISTS query, not full table scan) - SetupInitialAdmin returns tokens directly (skip separate Login call) - Add SetupRequest DTO to auth_dto.go (replace inline struct) - Extract defaultEmail() helper (removes duplicated email logic) - AuthPage uses setup tokens directly (skip redundant apiLogin call) - Use customAxiosInstance for auth API calls (consistent with codebase)	2026-05-21 14:22:52 +08:00
Ivan087	7d297a2b1a	docs: update README for .env-optional deployment and first-time setup flow - .env is now optional — deployment works with docker compose up --build directly - First user registers as admin through initial setup page - Add verification steps for auth/status and auth/setup endpoints	2026-05-21 13:53:13 +08:00
Ivan087	0094519f52	feat: first-time setup flow — no .env required for deployment - Add GET /auth/status endpoint (returns needsSetup when no admin exists) - Add POST /auth/setup endpoint (public first-admin registration) - Add IsAdminExists + SetupInitialAdmin methods to AuthService - Frontend: detect needsSetup on load, show setup page with admin registration - Frontend: fall back to login page when setup is already complete - Docker compose: env_file already optional (required: false), no changes needed - Bootstrap: auto-detect BOOTSTRAP_CLUSTERS without separate enable flag	2026-05-21 13:49:36 +08:00
Ivan087	0144e9cab7	fix: auto-enable cluster bootstrap, add init-db.sql to postgres volumes - Auto-enable cluster seeding when BOOTSTRAP_CLUSTERS is set (no longer requires separate BOOTSTRAP_ENABLE_CLUSTERS=true) - Add BOOTSTRAP_ENABLE_CLUSTERS to README .env template - Mount init-db.sql in postgres service volumes	2026-05-20 18:00:49 +08:00
Ivan087	33ddaf97db	fix: scale replicas in response, K8s metrics client, quota precheck, auth tests - Add GetMetrics method to MetricsClient interface and implement cluster metrics API - Add QuotaPrecheck service for validating resource quotas before deployment - Add auth DTO with role/permission models and auth handler tests - Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics - Update workspace handler with GetWorkspace endpoint and shared-user list - Fix monitoring handler to use correct service method name - Add tail_lines fallback in instance handler for snake_case query params - Update nginx config for SSE log streaming support (no buffering) - Add comprehensive test coverage: auth_service_test, auth_handler_test, auth_dto_test, metrics_client_test, quota_precheck_test - Update error messages for quota validation and instance operations - ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit - InstanceCard: correctly disable scale-minus when replicas <= 0 - SidebarLayout: add hover transition for sidebar items - Update todo.md and lessons.md with latest fixes	2026-05-20 16:56:29 +08:00
Ivan087	8f90cf0f0d	chore: cleanup old docs, add regression report and unresolved bugs - Remove stale test docs (test2-, bugs-user-, bug-report, session-notes) - Add UNRESOLVED-BUGS.md and regression-full-report.md - Update todo.md	2026-05-14 10:08:57 +08:00
Ivan087	b88fe24aab	fix: real K8s replicas in list API, full Helm values in modify YAML editor - Add Replicas field to entity.Instance - Add EnrichReplicas to InstanceService (batch K8s deployment query) - convertInstanceResponse uses instance.Replicas instead of hardcoded 0 - ModifyModal: load full Helm values from values-diff API (Current deployed) - Remove stale loadValuesDiff, use single useEffect for all data loading - Fix YAML lineWidth:0 for no line wrapping	2026-05-13 16:15:11 +08:00
Ivan087	96d42ee3e1	fix: scale replicas in response, YAML lineWidth, delta values, modified keys - Scale API now returns actual replicas in instance response - ModifyModal: fix YAML stringify line breaking (lineWidth: 0) - ModifyModal: show modified keys summary above YAML editor - ModifyModal: only send delta (user-modified) values to server - Add diffObjects helper for deep object comparison	2026-05-13 15:51:55 +08:00
Ivan087	4441f58299	fix: direct K8s scaling, replicas from K8s API, button labels, modify fetch - Add ScaleClient using K8s API (like kubectl scale deploy --replicas=N) - ScaleDeployment: patch Deployment.Spec.Replicas directly - GetDeploymentReplicas: query actual K8s deployment replicas - Search by labels then fallback to deployment name match - Wire ScaleClient to InstanceService via SetScaleClient in main.go - ModifyModal: fetch full instance detail on open (list excludes values) - InstanceCard: add text labels to action buttons (Entries/Diag/Modify/Delete) - Text visible on sm+ screens, icon-only on xs	2026-05-13 14:54:24 +08:00
Ivan087	49b92e66c3	fix: UI redesign — horizontal instance rows, proper scaling, readable tag cards - Backend: add replicas field to InstanceResponse (extracted from values.replicaCount) - InstanceCard: complete redesign as horizontal row layout - Status bar \| Name+Chart \| Replicas +/- \| Action buttons - Scale controls show for deployed AND failed statuses (scale to 0) - Fix replicas display using new instance.replicas backend field - InstancesManagementPage: vertical row list + onScale callback to update state - TagCard: restore proper padding (p-4), min-width, readable button sizes - ArtifactBrowserPage: reduce grid density (sm:1 md:2 lg:3) - ModifyModal: simplify to YAML-only editing with current values pre-populated - Remove schema-based form generator - Keep values-diff as collapsible reference panel	2026-05-13 12:30:52 +08:00
Ivan087	28ecb2e636	feat: scale instances, --reuse-values, values diff, UI redesign, hover animations Backend (Phase 1): - Add ScaleInstance endpoint (POST /clusters/{id}/instances/{id}/scale) - Add GetInstanceValuesDiff endpoint (GET .../values-diff) - Enable ReuseValues=true in Helm Upgrade for --reuse-values behavior - Add GetValues/GetChartDefaultValues to HelmClient interface - Add ScaleInstanceRequest/Response and InstanceValuesDiffResponse DTOs Frontend (Phase 2): - InstanceCard: +/- scale buttons with loading spinner - ModifyModal: values diff view (current vs defaults), Use Defaults button - ArtifactBrowserPage: collapsible sidebar, compact tag grid, search filter - TagCard: "LATEST" badge, compact layout, responsive design - InstanceCard: compact 3-column layout, fewer scrolls needed - InstancesManagementPage: 3-column grid, compact view - Global hover-lift and hover-glow CSS utilities - SidebarNav: subtle hover transition on links	2026-05-13 11:51:24 +08:00