Files
ocdp-go/backend/internal/domain/entity/metrics.go
Ivan087 33ddaf97db fix: scale replicas in response, K8s metrics client, quota precheck, auth tests
- Add GetMetrics method to MetricsClient interface and implement cluster metrics API
- Add QuotaPrecheck service for validating resource quotas before deployment
- Add auth DTO with role/permission models and auth handler tests
- Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics
- Update workspace handler with GetWorkspace endpoint and shared-user list
- Fix monitoring handler to use correct service method name
- Add tail_lines fallback in instance handler for snake_case query params
- Update nginx config for SSE log streaming support (no buffering)
- Add comprehensive test coverage: auth_service_test, auth_handler_test,
  auth_dto_test, metrics_client_test, quota_precheck_test
- Update error messages for quota validation and instance operations
- ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit
- InstanceCard: correctly disable scale-minus when replicas <= 0
- SidebarLayout: add hover transition for sidebar items
- Update todo.md and lessons.md with latest fixes
2026-05-20 16:56:29 +08:00

131 lines
5.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package entity
import "time"
// ClusterMetrics 集群监控指标
type ClusterMetrics struct {
ClusterID string `json:"cluster_id"`
ClusterName string `json:"cluster_name"`
Status string `json:"status"` // healthy, warning, error, unknown
Uptime string `json:"uptime"`
NodeCount int `json:"node_count"`
PodCount int `json:"pod_count"`
LastCheck time.Time `json:"last_check"`
// 集群级别资源汇总
TotalCPU string `json:"total_cpu"` // 如 "8 cores"
TotalMemory string `json:"total_memory"` // 如 "32 GB"
TotalGPU int `json:"total_gpu"` // GPU 总数
UsedCPU string `json:"used_cpu"` // 如 "4.5 cores"
UsedMemory string `json:"used_memory"` // 如 "16 GB"
UsedGPU int `json:"used_gpu"` // 使用的 GPU 数
CPUUsage float64 `json:"cpu_usage"` // 百分比
MemoryUsage float64 `json:"memory_usage"` // 百分比
GPUUsage float64 `json:"gpu_usage"` // 百分比
CPURequests string `json:"cpu_requests,omitempty"`
CPULimits string `json:"cpu_limits,omitempty"`
MemoryRequests string `json:"memory_requests,omitempty"`
MemoryLimits string `json:"memory_limits,omitempty"`
GPURequests int64 `json:"gpu_requests,omitempty"`
GPULimits int64 `json:"gpu_limits,omitempty"`
GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb,omitempty"`
GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb,omitempty"`
AllocatedGPU int64 `json:"allocated_gpu,omitempty"`
AllocatedGPUMemoryMB int64 `json:"allocated_gpu_memory_mb,omitempty"`
ResourceUsageByUser []UserResourceUsage `json:"resource_usage_by_user,omitempty"`
// 单机资源最大值
MaxNodeCPU string `json:"max_node_cpu"` // 单机最大CPU容量如 "8 cores"
MaxNodeMemory string `json:"max_node_memory"` // 单机最大内存容量,如 "32 GB"
MaxNodeGPU int `json:"max_node_gpu"` // 单机最大GPU数量
MaxNodeCPUUsage float64 `json:"max_node_cpu_usage"` // 单机最高CPU使用率
MaxNodeMemUsage float64 `json:"max_node_mem_usage"` // 单机最高内存使用率
MaxNodeGPUUsage float64 `json:"max_node_gpu_usage"` // 单机最高GPU使用率
// 节点列表(简化信息)
Nodes []NodeMetrics `json:"nodes,omitempty"`
}
// ResourceAllocation is derived from Kubernetes Pod resources requests/limits.
type ResourceAllocation struct {
CPURequestsMilli int64
CPULimitsMilli int64
MemoryRequestsBytes int64
MemoryLimitsBytes int64
GPURequests int64
GPULimits int64
GPUMemoryRequestsMB int64
GPUMemoryLimitsMB int64
}
type PodResourceAllocation struct {
ClusterID string
Namespace string
PodName string
InstanceName string
Allocation ResourceAllocation
}
type UserResourceUsage struct {
UserID string `json:"user_id"`
Username string `json:"username"`
WorkspaceID string `json:"workspace_id"`
InstanceCount int `json:"instance_count"`
PodCount int `json:"pod_count"`
CPURequests string `json:"cpu_requests"`
CPULimits string `json:"cpu_limits"`
MemoryRequests string `json:"memory_requests"`
MemoryLimits string `json:"memory_limits"`
GPURequests int64 `json:"gpu_requests"`
GPULimits int64 `json:"gpu_limits"`
GPUMemoryRequestsMB int64 `json:"gpu_memory_requests_mb"`
GPUMemoryLimitsMB int64 `json:"gpu_memory_limits_mb"`
}
// NodeMetrics 节点监控指标
type NodeMetrics struct {
NodeName string `json:"node_name"`
Status string `json:"status"` // Ready, NotReady
Role string `json:"role"` // control-plane, worker
Age string `json:"age"`
PodCount int `json:"pod_count"`
// CPU 资源
CPUCapacity string `json:"cpu_capacity"` // 如 "4 cores"
CPUAllocatable string `json:"cpu_allocatable"`
CPUUsage string `json:"cpu_usage"`
CPUPercent float64 `json:"cpu_percent"`
// 内存资源
MemoryCapacity string `json:"memory_capacity"` // 如 "16 GB"
MemoryAllocatable string `json:"memory_allocatable"`
MemoryUsage string `json:"memory_usage"`
MemoryPercent float64 `json:"memory_percent"`
// GPU 资源(如果有)
GPUCapacity int `json:"gpu_capacity"` // GPU 总数
GPUUsage int `json:"gpu_usage"` // 已使用的 GPU
GPUPercent float64 `json:"gpu_percent"`
GPUType string `json:"gpu_type,omitempty"` // GPU 型号,如 "NVIDIA-Tesla-T4"
// 其他信息
OSImage string `json:"os_image,omitempty"`
KernelVersion string `json:"kernel_version,omitempty"`
ContainerRuntime string `json:"container_runtime,omitempty"`
KubeletVersion string `json:"kubelet_version,omitempty"`
}
// MonitoringSummary 监控汇总
type MonitoringSummary struct {
TotalClusters int `json:"total_clusters"`
HealthyClusters int `json:"healthy_clusters"`
WarningClusters int `json:"warning_clusters"`
ErrorClusters int `json:"error_clusters"`
TotalNodes int `json:"total_nodes"`
TotalPods int `json:"total_pods"`
LastUpdate time.Time `json:"last_update"`
}