- Add Workspace domain (entity, repository, service, handler, DTO) - Add multi-tenant K8s client with tenant binding and quota management - Add K8s diagnostics client (instance diagnostics) - Add authorization middleware (authz package) - Restructure frontend to feature-based architecture (features/) - Add User Management page in configuration - Add AccessDenied page and route guards - Refactor shared components (form inputs, layout, UI) - Update Tailwind config for new design system - Add comprehensive documentation (docs/, tasks/, plans) - Improve cluster service with better kubeconfig handling - Add tests for crypto, config, helm client, tenant binding
132 lines
3.8 KiB
Go
132 lines
3.8 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/ocdp/cluster-service/internal/domain/entity"
|
|
"github.com/ocdp/cluster-service/internal/domain/repository"
|
|
"github.com/ocdp/cluster-service/internal/pkg/authz"
|
|
)
|
|
|
|
// MonitoringService 监控服务
|
|
type MonitoringService struct {
|
|
clusterRepo repository.ClusterRepository
|
|
metricsClient repository.MetricsClient
|
|
}
|
|
|
|
// NewMonitoringService 创建监控服务
|
|
func NewMonitoringService(
|
|
clusterRepo repository.ClusterRepository,
|
|
metricsClient repository.MetricsClient,
|
|
) *MonitoringService {
|
|
return &MonitoringService{
|
|
clusterRepo: clusterRepo,
|
|
metricsClient: metricsClient,
|
|
}
|
|
}
|
|
|
|
// GetClusterMonitoring 获取单个集群的监控信息
|
|
func (s *MonitoringService) GetClusterMonitoring(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) {
|
|
principal, err := authz.RequirePrincipal(ctx)
|
|
if err != nil {
|
|
return nil, entity.ErrUnauthorized
|
|
}
|
|
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
|
|
if err != nil {
|
|
return nil, entity.ErrClusterNotFound
|
|
}
|
|
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
|
|
return nil, entity.ErrClusterNotFound
|
|
}
|
|
metrics, err := s.metricsClient.GetClusterMetrics(ctx, clusterID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get cluster metrics: %w", err)
|
|
}
|
|
return metrics, nil
|
|
}
|
|
|
|
// ListClusterMonitoring 获取所有集群的监控信息
|
|
func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entity.ClusterMetrics, error) {
|
|
principal, err := authz.RequirePrincipal(ctx)
|
|
if err != nil {
|
|
return nil, entity.ErrUnauthorized
|
|
}
|
|
// 获取所有集群
|
|
clusters, err := s.clusterRepo.List(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to list clusters: %w", err)
|
|
}
|
|
|
|
// 获取每个集群的监控数据
|
|
result := make([]*entity.ClusterMetrics, 0, len(clusters))
|
|
for _, cluster := range clusters {
|
|
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
|
|
continue
|
|
}
|
|
metrics, err := s.metricsClient.GetClusterMetrics(ctx, cluster.ID)
|
|
if err != nil {
|
|
// 如果某个集群获取失败,记录错误但继续
|
|
fmt.Printf("Warning: failed to get metrics for cluster %s: %v\n", cluster.ID, err)
|
|
// 返回基本信息
|
|
metrics = &entity.ClusterMetrics{
|
|
ClusterID: cluster.ID,
|
|
ClusterName: cluster.Name,
|
|
Status: "unknown",
|
|
}
|
|
}
|
|
result = append(result, metrics)
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// GetMonitoringSummary 获取监控汇总信息
|
|
func (s *MonitoringService) GetMonitoringSummary(ctx context.Context) (*entity.MonitoringSummary, error) {
|
|
// 获取所有集群监控数据
|
|
monitoringList, err := s.ListClusterMonitoring(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to list monitoring: %w", err)
|
|
}
|
|
|
|
// 统计汇总
|
|
summary := &entity.MonitoringSummary{
|
|
TotalClusters: len(monitoringList),
|
|
}
|
|
|
|
for _, m := range monitoringList {
|
|
switch m.Status {
|
|
case "healthy":
|
|
summary.HealthyClusters++
|
|
case "warning":
|
|
summary.WarningClusters++
|
|
case "error":
|
|
summary.ErrorClusters++
|
|
}
|
|
summary.TotalNodes += m.NodeCount
|
|
summary.TotalPods += m.PodCount
|
|
}
|
|
|
|
return summary, nil
|
|
}
|
|
|
|
// GetNodeMetrics 获取集群的节点指标
|
|
func (s *MonitoringService) GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) {
|
|
principal, err := authz.RequirePrincipal(ctx)
|
|
if err != nil {
|
|
return nil, entity.ErrUnauthorized
|
|
}
|
|
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
|
|
if err != nil {
|
|
return nil, entity.ErrClusterNotFound
|
|
}
|
|
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
|
|
return nil, entity.ErrClusterNotFound
|
|
}
|
|
nodes, err := s.metricsClient.GetNodeMetrics(ctx, clusterID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get node metrics: %w", err)
|
|
}
|
|
return nodes, nil
|
|
}
|