- Add GetMetrics method to MetricsClient interface and implement cluster metrics API - Add QuotaPrecheck service for validating resource quotas before deployment - Add auth DTO with role/permission models and auth handler tests - Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics - Update workspace handler with GetWorkspace endpoint and shared-user list - Fix monitoring handler to use correct service method name - Add tail_lines fallback in instance handler for snake_case query params - Update nginx config for SSE log streaming support (no buffering) - Add comprehensive test coverage: auth_service_test, auth_handler_test, auth_dto_test, metrics_client_test, quota_precheck_test - Update error messages for quota validation and instance operations - ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit - InstanceCard: correctly disable scale-minus when replicas <= 0 - SidebarLayout: add hover transition for sidebar items - Update todo.md and lessons.md with latest fixes
131 lines
5.2 KiB
Go
131 lines
5.2 KiB
Go
package entity
|
||
|
||
import "time"
|
||
|
||
// ClusterMetrics 集群监控指标
|
||
type ClusterMetrics struct {
|
||
ClusterID string `json:"cluster_id"`
|
||
ClusterName string `json:"cluster_name"`
|
||
Status string `json:"status"` // healthy, warning, error, unknown
|
||
Uptime string `json:"uptime"`
|
||
NodeCount int `json:"node_count"`
|
||
PodCount int `json:"pod_count"`
|
||
LastCheck time.Time `json:"last_check"`
|
||
|
||
// 集群级别资源汇总
|
||
TotalCPU string `json:"total_cpu"` // 如 "8 cores"
|
||
TotalMemory string `json:"total_memory"` // 如 "32 GB"
|
||
TotalGPU int `json:"total_gpu"` // GPU 总数
|
||
|
||
UsedCPU string `json:"used_cpu"` // 如 "4.5 cores"
|
||
UsedMemory string `json:"used_memory"` // 如 "16 GB"
|
||
UsedGPU int `json:"used_gpu"` // 使用的 GPU 数
|
||
|
||
CPUUsage float64 `json:"cpu_usage"` // 百分比
|
||
MemoryUsage float64 `json:"memory_usage"` // 百分比
|
||
GPUUsage float64 `json:"gpu_usage"` // 百分比
|
||
|
||
CPURequests string `json:"cpu_requests,omitempty"`
|
||
CPULimits string `json:"cpu_limits,omitempty"`
|
||
MemoryRequests string `json:"memory_requests,omitempty"`
|
||
MemoryLimits string `json:"memory_limits,omitempty"`
|
||
GPURequests int64 `json:"gpu_requests,omitempty"`
|
||
GPULimits int64 `json:"gpu_limits,omitempty"`
|
||
GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb,omitempty"`
|
||
GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb,omitempty"`
|
||
AllocatedGPU int64 `json:"allocated_gpu,omitempty"`
|
||
AllocatedGPUMemoryMB int64 `json:"allocated_gpu_memory_mb,omitempty"`
|
||
ResourceUsageByUser []UserResourceUsage `json:"resource_usage_by_user,omitempty"`
|
||
|
||
// 单机资源最大值
|
||
MaxNodeCPU string `json:"max_node_cpu"` // 单机最大CPU容量,如 "8 cores"
|
||
MaxNodeMemory string `json:"max_node_memory"` // 单机最大内存容量,如 "32 GB"
|
||
MaxNodeGPU int `json:"max_node_gpu"` // 单机最大GPU数量
|
||
MaxNodeCPUUsage float64 `json:"max_node_cpu_usage"` // 单机最高CPU使用率
|
||
MaxNodeMemUsage float64 `json:"max_node_mem_usage"` // 单机最高内存使用率
|
||
MaxNodeGPUUsage float64 `json:"max_node_gpu_usage"` // 单机最高GPU使用率
|
||
|
||
// 节点列表(简化信息)
|
||
Nodes []NodeMetrics `json:"nodes,omitempty"`
|
||
}
|
||
|
||
// ResourceAllocation is derived from Kubernetes Pod resources requests/limits.
|
||
type ResourceAllocation struct {
|
||
CPURequestsMilli int64
|
||
CPULimitsMilli int64
|
||
MemoryRequestsBytes int64
|
||
MemoryLimitsBytes int64
|
||
GPURequests int64
|
||
GPULimits int64
|
||
GPUMemoryRequestsMB int64
|
||
GPUMemoryLimitsMB int64
|
||
}
|
||
|
||
type PodResourceAllocation struct {
|
||
ClusterID string
|
||
Namespace string
|
||
PodName string
|
||
InstanceName string
|
||
Allocation ResourceAllocation
|
||
}
|
||
|
||
type UserResourceUsage struct {
|
||
UserID string `json:"user_id"`
|
||
Username string `json:"username"`
|
||
WorkspaceID string `json:"workspace_id"`
|
||
InstanceCount int `json:"instance_count"`
|
||
PodCount int `json:"pod_count"`
|
||
CPURequests string `json:"cpu_requests"`
|
||
CPULimits string `json:"cpu_limits"`
|
||
MemoryRequests string `json:"memory_requests"`
|
||
MemoryLimits string `json:"memory_limits"`
|
||
GPURequests int64 `json:"gpu_requests"`
|
||
GPULimits int64 `json:"gpu_limits"`
|
||
GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb"`
|
||
GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb"`
|
||
}
|
||
|
||
// NodeMetrics 节点监控指标
|
||
type NodeMetrics struct {
|
||
NodeName string `json:"node_name"`
|
||
Status string `json:"status"` // Ready, NotReady
|
||
Role string `json:"role"` // control-plane, worker
|
||
Age string `json:"age"`
|
||
PodCount int `json:"pod_count"`
|
||
|
||
// CPU 资源
|
||
CPUCapacity string `json:"cpu_capacity"` // 如 "4 cores"
|
||
CPUAllocatable string `json:"cpu_allocatable"`
|
||
CPUUsage string `json:"cpu_usage"`
|
||
CPUPercent float64 `json:"cpu_percent"`
|
||
|
||
// 内存资源
|
||
MemoryCapacity string `json:"memory_capacity"` // 如 "16 GB"
|
||
MemoryAllocatable string `json:"memory_allocatable"`
|
||
MemoryUsage string `json:"memory_usage"`
|
||
MemoryPercent float64 `json:"memory_percent"`
|
||
|
||
// GPU 资源(如果有)
|
||
GPUCapacity int `json:"gpu_capacity"` // GPU 总数
|
||
GPUUsage int `json:"gpu_usage"` // 已使用的 GPU
|
||
GPUPercent float64 `json:"gpu_percent"`
|
||
GPUType string `json:"gpu_type,omitempty"` // GPU 型号,如 "NVIDIA-Tesla-T4"
|
||
|
||
// 其他信息
|
||
OSImage string `json:"os_image,omitempty"`
|
||
KernelVersion string `json:"kernel_version,omitempty"`
|
||
ContainerRuntime string `json:"container_runtime,omitempty"`
|
||
KubeletVersion string `json:"kubelet_version,omitempty"`
|
||
}
|
||
|
||
// MonitoringSummary 监控汇总
|
||
type MonitoringSummary struct {
|
||
TotalClusters int `json:"total_clusters"`
|
||
HealthyClusters int `json:"healthy_clusters"`
|
||
WarningClusters int `json:"warning_clusters"`
|
||
ErrorClusters int `json:"error_clusters"`
|
||
TotalNodes int `json:"total_nodes"`
|
||
TotalPods int `json:"total_pods"`
|
||
LastUpdate time.Time `json:"last_update"`
|
||
}
|