package service import ( "context" "fmt" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" ) // MonitoringService 监控服务 type MonitoringService struct { clusterRepo repository.ClusterRepository metricsClient repository.MetricsClient } // NewMonitoringService 创建监控服务 func NewMonitoringService( clusterRepo repository.ClusterRepository, metricsClient repository.MetricsClient, ) *MonitoringService { return &MonitoringService{ clusterRepo: clusterRepo, metricsClient: metricsClient, } } // GetClusterMonitoring 获取单个集群的监控信息 func (s *MonitoringService) GetClusterMonitoring(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) { metrics, err := s.metricsClient.GetClusterMetrics(ctx, clusterID) if err != nil { return nil, fmt.Errorf("failed to get cluster metrics: %w", err) } return metrics, nil } // ListClusterMonitoring 获取所有集群的监控信息 func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entity.ClusterMetrics, error) { // 获取所有集群 clusters, err := s.clusterRepo.List(ctx) if err != nil { return nil, fmt.Errorf("failed to list clusters: %w", err) } // 获取每个集群的监控数据 result := make([]*entity.ClusterMetrics, 0, len(clusters)) for _, cluster := range clusters { metrics, err := s.metricsClient.GetClusterMetrics(ctx, cluster.ID) if err != nil { // 如果某个集群获取失败,记录错误但继续 fmt.Printf("Warning: failed to get metrics for cluster %s: %v\n", cluster.ID, err) // 返回基本信息 metrics = &entity.ClusterMetrics{ ClusterID: cluster.ID, ClusterName: cluster.Name, Status: "unknown", } } result = append(result, metrics) } return result, nil } // GetMonitoringSummary 获取监控汇总信息 func (s *MonitoringService) GetMonitoringSummary(ctx context.Context) (*entity.MonitoringSummary, error) { // 获取所有集群监控数据 monitoringList, err := s.ListClusterMonitoring(ctx) if err != nil { return nil, fmt.Errorf("failed to list monitoring: %w", err) } // 统计汇总 summary := &entity.MonitoringSummary{ TotalClusters: len(monitoringList), } for _, m := range monitoringList { switch m.Status { case "healthy": summary.HealthyClusters++ case "warning": summary.WarningClusters++ case "error": summary.ErrorClusters++ } summary.TotalNodes += m.NodeCount summary.TotalPods += m.PodCount } return summary, nil } // GetNodeMetrics 获取集群的节点指标 func (s *MonitoringService) GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) { nodes, err := s.metricsClient.GetNodeMetrics(ctx, clusterID) if err != nil { return nil, fmt.Errorf("failed to get node metrics: %w", err) } return nodes, nil }