fix: scale replicas in response, K8s metrics client, quota precheck, auth tests

- Add GetMetrics method to MetricsClient interface and implement cluster metrics API
- Add QuotaPrecheck service for validating resource quotas before deployment
- Add auth DTO with role/permission models and auth handler tests
- Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics
- Update workspace handler with GetWorkspace endpoint and shared-user list
- Fix monitoring handler to use correct service method name
- Add tail_lines fallback in instance handler for snake_case query params
- Update nginx config for SSE log streaming support (no buffering)
- Add comprehensive test coverage: auth_service_test, auth_handler_test,
  auth_dto_test, metrics_client_test, quota_precheck_test
- Update error messages for quota validation and instance operations
- ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit
- InstanceCard: correctly disable scale-minus when replicas <= 0
- SidebarLayout: add hover transition for sidebar items
- Update todo.md and lessons.md with latest fixes
This commit is contained in:
Ivan087
2026-05-20 16:56:29 +08:00
parent 8f90cf0f0d
commit 33ddaf97db
59 changed files with 4805 additions and 457 deletions

View File

@ -25,6 +25,18 @@ type ClusterMetrics struct {
MemoryUsage float64 `json:"memory_usage"` // 百分比
GPUUsage float64 `json:"gpu_usage"` // 百分比
CPURequests string `json:"cpu_requests,omitempty"`
CPULimits string `json:"cpu_limits,omitempty"`
MemoryRequests string `json:"memory_requests,omitempty"`
MemoryLimits string `json:"memory_limits,omitempty"`
GPURequests int64 `json:"gpu_requests,omitempty"`
GPULimits int64 `json:"gpu_limits,omitempty"`
GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb,omitempty"`
GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb,omitempty"`
AllocatedGPU int64 `json:"allocated_gpu,omitempty"`
AllocatedGPUMemoryMB int64 `json:"allocated_gpu_memory_mb,omitempty"`
ResourceUsageByUser []UserResourceUsage `json:"resource_usage_by_user,omitempty"`
// 单机资源最大值
MaxNodeCPU string `json:"max_node_cpu"` // 单机最大CPU容量如 "8 cores"
MaxNodeMemory string `json:"max_node_memory"` // 单机最大内存容量,如 "32 GB"
@ -37,6 +49,42 @@ type ClusterMetrics struct {
Nodes []NodeMetrics `json:"nodes,omitempty"`
}
// ResourceAllocation is derived from Kubernetes Pod resources requests/limits.
type ResourceAllocation struct {
CPURequestsMilli int64
CPULimitsMilli int64
MemoryRequestsBytes int64
MemoryLimitsBytes int64
GPURequests int64
GPULimits int64
GPUMemoryRequestsMB int64
GPUMemoryLimitsMB int64
}
type PodResourceAllocation struct {
ClusterID string
Namespace string
PodName string
InstanceName string
Allocation ResourceAllocation
}
type UserResourceUsage struct {
UserID string `json:"user_id"`
Username string `json:"username"`
WorkspaceID string `json:"workspace_id"`
InstanceCount int `json:"instance_count"`
PodCount int `json:"pod_count"`
CPURequests string `json:"cpu_requests"`
CPULimits string `json:"cpu_limits"`
MemoryRequests string `json:"memory_requests"`
MemoryLimits string `json:"memory_limits"`
GPURequests int64 `json:"gpu_requests"`
GPULimits int64 `json:"gpu_limits"`
GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb"`
GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb"`
}
// NodeMetrics 节点监控指标
type NodeMetrics struct {
NodeName string `json:"node_name"`