package entity import "time" // ClusterMetrics 集群监控指标 type ClusterMetrics struct { ClusterID string `json:"cluster_id"` ClusterName string `json:"cluster_name"` Status string `json:"status"` // healthy, warning, error, unknown Uptime string `json:"uptime"` NodeCount int `json:"node_count"` PodCount int `json:"pod_count"` LastCheck time.Time `json:"last_check"` // 集群级别资源汇总 TotalCPU string `json:"total_cpu"` // 如 "8 cores" TotalMemory string `json:"total_memory"` // 如 "32 GB" TotalGPU int `json:"total_gpu"` // GPU 总数 UsedCPU string `json:"used_cpu"` // 如 "4.5 cores" UsedMemory string `json:"used_memory"` // 如 "16 GB" UsedGPU int `json:"used_gpu"` // 使用的 GPU 数 CPUUsage float64 `json:"cpu_usage"` // 百分比 MemoryUsage float64 `json:"memory_usage"` // 百分比 GPUUsage float64 `json:"gpu_usage"` // 百分比 CPURequests string `json:"cpu_requests,omitempty"` CPULimits string `json:"cpu_limits,omitempty"` MemoryRequests string `json:"memory_requests,omitempty"` MemoryLimits string `json:"memory_limits,omitempty"` GPURequests int64 `json:"gpu_requests,omitempty"` GPULimits int64 `json:"gpu_limits,omitempty"` GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb,omitempty"` GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb,omitempty"` AllocatedGPU int64 `json:"allocated_gpu,omitempty"` AllocatedGPUMemoryMB int64 `json:"allocated_gpu_memory_mb,omitempty"` ResourceUsageByUser []UserResourceUsage `json:"resource_usage_by_user,omitempty"` // 单机资源最大值 MaxNodeCPU string `json:"max_node_cpu"` // 单机最大CPU容量,如 "8 cores" MaxNodeMemory string `json:"max_node_memory"` // 单机最大内存容量,如 "32 GB" MaxNodeGPU int `json:"max_node_gpu"` // 单机最大GPU数量 MaxNodeCPUUsage float64 `json:"max_node_cpu_usage"` // 单机最高CPU使用率 MaxNodeMemUsage float64 `json:"max_node_mem_usage"` // 单机最高内存使用率 MaxNodeGPUUsage float64 `json:"max_node_gpu_usage"` // 单机最高GPU使用率 // 节点列表(简化信息) Nodes []NodeMetrics `json:"nodes,omitempty"` } // ResourceAllocation is derived from Kubernetes Pod resources requests/limits. type ResourceAllocation struct { CPURequestsMilli int64 CPULimitsMilli int64 MemoryRequestsBytes int64 MemoryLimitsBytes int64 GPURequests int64 GPULimits int64 GPUMemoryRequestsMB int64 GPUMemoryLimitsMB int64 } type PodResourceAllocation struct { ClusterID string Namespace string PodName string InstanceName string Allocation ResourceAllocation } type UserResourceUsage struct { UserID string `json:"user_id"` Username string `json:"username"` WorkspaceID string `json:"workspace_id"` InstanceCount int `json:"instance_count"` PodCount int `json:"pod_count"` CPURequests string `json:"cpu_requests"` CPULimits string `json:"cpu_limits"` MemoryRequests string `json:"memory_requests"` MemoryLimits string `json:"memory_limits"` GPURequests int64 `json:"gpu_requests"` GPULimits int64 `json:"gpu_limits"` GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb"` GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb"` } // NodeMetrics 节点监控指标 type NodeMetrics struct { NodeName string `json:"node_name"` Status string `json:"status"` // Ready, NotReady Role string `json:"role"` // control-plane, worker Age string `json:"age"` PodCount int `json:"pod_count"` // CPU 资源 CPUCapacity string `json:"cpu_capacity"` // 如 "4 cores" CPUAllocatable string `json:"cpu_allocatable"` CPUUsage string `json:"cpu_usage"` CPUPercent float64 `json:"cpu_percent"` // 内存资源 MemoryCapacity string `json:"memory_capacity"` // 如 "16 GB" MemoryAllocatable string `json:"memory_allocatable"` MemoryUsage string `json:"memory_usage"` MemoryPercent float64 `json:"memory_percent"` // GPU 资源(如果有) GPUCapacity int `json:"gpu_capacity"` // GPU 总数 GPUUsage int `json:"gpu_usage"` // 已使用的 GPU GPUPercent float64 `json:"gpu_percent"` GPUType string `json:"gpu_type,omitempty"` // GPU 型号,如 "NVIDIA-Tesla-T4" // 其他信息 OSImage string `json:"os_image,omitempty"` KernelVersion string `json:"kernel_version,omitempty"` ContainerRuntime string `json:"container_runtime,omitempty"` KubeletVersion string `json:"kubelet_version,omitempty"` } // MonitoringSummary 监控汇总 type MonitoringSummary struct { TotalClusters int `json:"total_clusters"` HealthyClusters int `json:"healthy_clusters"` WarningClusters int `json:"warning_clusters"` ErrorClusters int `json:"error_clusters"` TotalNodes int `json:"total_nodes"` TotalPods int `json:"total_pods"` LastUpdate time.Time `json:"last_update"` }