package service import ( "context" "testing" "time" persistencemock "github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/pkg/authz" ) func TestListClusterMonitoringAggregatesResourceUsageForAdmin(t *testing.T) { ctx := authz.WithPrincipal(context.Background(), &authz.Principal{ UserID: "admin-1", Username: "admin", Role: authz.RoleAdmin, WorkspaceID: "workspace-admin", }) instanceRepo, userRepo := seedMonitoringOwners(t, ctx) svc := NewMonitoringService( &monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}}, &stubMetricsClient{allocations: monitoringAllocations()}, instanceRepo, userRepo, ) metrics, err := svc.ListClusterMonitoring(ctx) if err != nil { t.Fatalf("ListClusterMonitoring returned error: %v", err) } if len(metrics) != 1 { t.Fatalf("expected 1 cluster metric, got %d", len(metrics)) } got := metrics[0] if got.AllocatedGPU != 3 || got.AllocatedGPUMemoryMB != 30000 { t.Fatalf("expected total GPU/gpumem allocation 3/30000, got %d/%d", got.AllocatedGPU, got.AllocatedGPUMemoryMB) } if len(got.ResourceUsageByUser) != 2 { t.Fatalf("expected 2 user usage rows, got %d: %#v", len(got.ResourceUsageByUser), got.ResourceUsageByUser) } if got.ResourceUsageByUser[0].Username != "alice" || got.ResourceUsageByUser[0].GPURequests != 1 { t.Fatalf("expected alice GPU request row first, got %#v", got.ResourceUsageByUser[0]) } if got.ResourceUsageByUser[1].Username != "bob" || got.ResourceUsageByUser[1].GPURequests != 2 { t.Fatalf("expected bob GPU request row second, got %#v", got.ResourceUsageByUser[1]) } } func TestListClusterMonitoringFiltersResourceUsageForOrdinaryUser(t *testing.T) { ctx := authz.WithPrincipal(context.Background(), &authz.Principal{ UserID: "user-1", Username: "alice", Role: authz.RoleUser, WorkspaceID: "workspace-1", }) instanceRepo, userRepo := seedMonitoringOwners(t, ctx) svc := NewMonitoringService( &monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}}, &stubMetricsClient{allocations: monitoringAllocations()}, instanceRepo, userRepo, ) metrics, err := svc.ListClusterMonitoring(ctx) if err != nil { t.Fatalf("ListClusterMonitoring returned error: %v", err) } got := metrics[0] if got.AllocatedGPU != 1 || got.AllocatedGPUMemoryMB != 10000 { t.Fatalf("expected ordinary user allocation to be scoped to alice, got %d/%d", got.AllocatedGPU, got.AllocatedGPUMemoryMB) } if len(got.ResourceUsageByUser) != 1 { t.Fatalf("expected only alice usage row, got %d: %#v", len(got.ResourceUsageByUser), got.ResourceUsageByUser) } if got.ResourceUsageByUser[0].UserID != "user-1" || got.ResourceUsageByUser[0].Username != "alice" { t.Fatalf("expected alice usage row, got %#v", got.ResourceUsageByUser[0]) } if got.NodeCount != 0 || len(got.Nodes) != 0 || got.TotalCPU != "" || got.TotalMemory != "" { t.Fatalf("expected ordinary user cluster-wide metrics to be sanitized, got nodes=%d/%d totalCPU=%q totalMemory=%q", got.NodeCount, len(got.Nodes), got.TotalCPU, got.TotalMemory) } if got.PodCount != 1 { t.Fatalf("expected ordinary user pod count to be self scoped, got %d", got.PodCount) } } func TestGetNodeMetricsForbiddenForOrdinaryUser(t *testing.T) { ctx := authz.WithPrincipal(context.Background(), &authz.Principal{ UserID: "user-1", Username: "alice", Role: authz.RoleUser, WorkspaceID: "workspace-1", }) svc := NewMonitoringService( &monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}}, &stubMetricsClient{allocations: monitoringAllocations()}, nil, nil, ) _, err := svc.GetNodeMetrics(ctx, "cluster-1") if err != entity.ErrForbidden { t.Fatalf("expected ordinary user node metrics to be forbidden, got %v", err) } } func seedMonitoringOwners(t *testing.T, ctx context.Context) (*persistencemock.InstanceRepositoryMock, *persistencemock.UserRepositoryMock) { t.Helper() instanceRepo := persistencemock.NewInstanceRepositoryMock().(*persistencemock.InstanceRepositoryMock) userRepo := persistencemock.NewUserRepositoryMock().(*persistencemock.UserRepositoryMock) for _, user := range []*entity.User{ {ID: "user-1", Username: "alice", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-1"}, {ID: "user-2", Username: "bob", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-2"}, } { if err := userRepo.Create(ctx, user); err != nil { t.Fatalf("failed to seed user %s: %v", user.ID, err) } } for _, instance := range []*entity.Instance{ {ID: "inst-1", ClusterID: "cluster-1", Name: "alice-app", Namespace: "ocdp-u-alice", WorkspaceID: "workspace-1", OwnerID: "user-1"}, {ID: "inst-2", ClusterID: "cluster-1", Name: "bob-app", Namespace: "ocdp-u-bob", WorkspaceID: "workspace-2", OwnerID: "user-2"}, } { if err := instanceRepo.Create(ctx, instance); err != nil { t.Fatalf("failed to seed instance %s: %v", instance.ID, err) } } return instanceRepo, userRepo } func monitoringAllocations() []*entity.PodResourceAllocation { return []*entity.PodResourceAllocation{ { ClusterID: "cluster-1", Namespace: "ocdp-u-alice", PodName: "alice-app-0", InstanceName: "alice-app", Allocation: entity.ResourceAllocation{ CPURequestsMilli: 500, CPULimitsMilli: 1000, MemoryRequestsBytes: 1024 * 1024 * 1024, MemoryLimitsBytes: 2 * 1024 * 1024 * 1024, GPURequests: 1, GPULimits: 1, GPUMemoryRequestsMB: 10000, GPUMemoryLimitsMB: 10000, }, }, { ClusterID: "cluster-1", Namespace: "ocdp-u-bob", PodName: "bob-app-0", InstanceName: "bob-app", Allocation: entity.ResourceAllocation{ CPURequestsMilli: 2000, CPULimitsMilli: 4000, MemoryRequestsBytes: 4 * 1024 * 1024 * 1024, MemoryLimitsBytes: 8 * 1024 * 1024 * 1024, GPURequests: 2, GPULimits: 2, GPUMemoryRequestsMB: 20000, GPUMemoryLimitsMB: 20000, }, }, } } type monitoringClusterRepo struct { clusters []*entity.Cluster } func (r *monitoringClusterRepo) Create(ctx context.Context, cluster *entity.Cluster) error { r.clusters = append(r.clusters, cluster) return nil } func (r *monitoringClusterRepo) GetByID(ctx context.Context, id string) (*entity.Cluster, error) { for _, cluster := range r.clusters { if cluster.ID == id { return cluster, nil } } return nil, entity.ErrClusterNotFound } func (r *monitoringClusterRepo) GetByName(ctx context.Context, name string) (*entity.Cluster, error) { for _, cluster := range r.clusters { if cluster.Name == name { return cluster, nil } } return nil, entity.ErrClusterNotFound } func (r *monitoringClusterRepo) Update(ctx context.Context, cluster *entity.Cluster) error { return nil } func (r *monitoringClusterRepo) Delete(ctx context.Context, id string) error { return nil } func (r *monitoringClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) { return r.clusters, nil } type stubMetricsClient struct { allocations []*entity.PodResourceAllocation } func (c *stubMetricsClient) GetClusterMetrics(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) { return &entity.ClusterMetrics{ ClusterID: clusterID, ClusterName: "cluster", Status: "healthy", NodeCount: 3, PodCount: 99, TotalCPU: "48 cores", TotalMemory: "256Gi", Nodes: []entity.NodeMetrics{{NodeName: "node-a"}}, LastCheck: time.Now(), }, nil } func (c *stubMetricsClient) GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) { return nil, nil } func (c *stubMetricsClient) GetPodResourceAllocations(ctx context.Context, clusterID string) ([]*entity.PodResourceAllocation, error) { return c.allocations, nil }