- Add GetMetrics method to MetricsClient interface and implement cluster metrics API - Add QuotaPrecheck service for validating resource quotas before deployment - Add auth DTO with role/permission models and auth handler tests - Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics - Update workspace handler with GetWorkspace endpoint and shared-user list - Fix monitoring handler to use correct service method name - Add tail_lines fallback in instance handler for snake_case query params - Update nginx config for SSE log streaming support (no buffering) - Add comprehensive test coverage: auth_service_test, auth_handler_test, auth_dto_test, metrics_client_test, quota_precheck_test - Update error messages for quota validation and instance operations - ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit - InstanceCard: correctly disable scale-minus when replicas <= 0 - SidebarLayout: add hover transition for sidebar items - Update todo.md and lessons.md with latest fixes
229 lines
7.9 KiB
Go
229 lines
7.9 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
persistencemock "github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock"
|
|
"github.com/ocdp/cluster-service/internal/domain/entity"
|
|
"github.com/ocdp/cluster-service/internal/pkg/authz"
|
|
)
|
|
|
|
func TestListClusterMonitoringAggregatesResourceUsageForAdmin(t *testing.T) {
|
|
ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
|
|
UserID: "admin-1",
|
|
Username: "admin",
|
|
Role: authz.RoleAdmin,
|
|
WorkspaceID: "workspace-admin",
|
|
})
|
|
instanceRepo, userRepo := seedMonitoringOwners(t, ctx)
|
|
svc := NewMonitoringService(
|
|
&monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}},
|
|
&stubMetricsClient{allocations: monitoringAllocations()},
|
|
instanceRepo,
|
|
userRepo,
|
|
)
|
|
|
|
metrics, err := svc.ListClusterMonitoring(ctx)
|
|
if err != nil {
|
|
t.Fatalf("ListClusterMonitoring returned error: %v", err)
|
|
}
|
|
if len(metrics) != 1 {
|
|
t.Fatalf("expected 1 cluster metric, got %d", len(metrics))
|
|
}
|
|
got := metrics[0]
|
|
if got.AllocatedGPU != 3 || got.AllocatedGPUMemoryMB != 30000 {
|
|
t.Fatalf("expected total GPU/gpumem allocation 3/30000, got %d/%d", got.AllocatedGPU, got.AllocatedGPUMemoryMB)
|
|
}
|
|
if len(got.ResourceUsageByUser) != 2 {
|
|
t.Fatalf("expected 2 user usage rows, got %d: %#v", len(got.ResourceUsageByUser), got.ResourceUsageByUser)
|
|
}
|
|
if got.ResourceUsageByUser[0].Username != "alice" || got.ResourceUsageByUser[0].GPURequests != 1 {
|
|
t.Fatalf("expected alice GPU request row first, got %#v", got.ResourceUsageByUser[0])
|
|
}
|
|
if got.ResourceUsageByUser[1].Username != "bob" || got.ResourceUsageByUser[1].GPURequests != 2 {
|
|
t.Fatalf("expected bob GPU request row second, got %#v", got.ResourceUsageByUser[1])
|
|
}
|
|
}
|
|
|
|
func TestListClusterMonitoringFiltersResourceUsageForOrdinaryUser(t *testing.T) {
|
|
ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
|
|
UserID: "user-1",
|
|
Username: "alice",
|
|
Role: authz.RoleUser,
|
|
WorkspaceID: "workspace-1",
|
|
})
|
|
instanceRepo, userRepo := seedMonitoringOwners(t, ctx)
|
|
svc := NewMonitoringService(
|
|
&monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}},
|
|
&stubMetricsClient{allocations: monitoringAllocations()},
|
|
instanceRepo,
|
|
userRepo,
|
|
)
|
|
|
|
metrics, err := svc.ListClusterMonitoring(ctx)
|
|
if err != nil {
|
|
t.Fatalf("ListClusterMonitoring returned error: %v", err)
|
|
}
|
|
got := metrics[0]
|
|
if got.AllocatedGPU != 1 || got.AllocatedGPUMemoryMB != 10000 {
|
|
t.Fatalf("expected ordinary user allocation to be scoped to alice, got %d/%d", got.AllocatedGPU, got.AllocatedGPUMemoryMB)
|
|
}
|
|
if len(got.ResourceUsageByUser) != 1 {
|
|
t.Fatalf("expected only alice usage row, got %d: %#v", len(got.ResourceUsageByUser), got.ResourceUsageByUser)
|
|
}
|
|
if got.ResourceUsageByUser[0].UserID != "user-1" || got.ResourceUsageByUser[0].Username != "alice" {
|
|
t.Fatalf("expected alice usage row, got %#v", got.ResourceUsageByUser[0])
|
|
}
|
|
if got.NodeCount != 0 || len(got.Nodes) != 0 || got.TotalCPU != "" || got.TotalMemory != "" {
|
|
t.Fatalf("expected ordinary user cluster-wide metrics to be sanitized, got nodes=%d/%d totalCPU=%q totalMemory=%q", got.NodeCount, len(got.Nodes), got.TotalCPU, got.TotalMemory)
|
|
}
|
|
if got.PodCount != 1 {
|
|
t.Fatalf("expected ordinary user pod count to be self scoped, got %d", got.PodCount)
|
|
}
|
|
}
|
|
|
|
func TestGetNodeMetricsForbiddenForOrdinaryUser(t *testing.T) {
|
|
ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
|
|
UserID: "user-1",
|
|
Username: "alice",
|
|
Role: authz.RoleUser,
|
|
WorkspaceID: "workspace-1",
|
|
})
|
|
svc := NewMonitoringService(
|
|
&monitoringClusterRepo{clusters: []*entity.Cluster{{ID: "cluster-1", Name: "cluster", Visibility: authz.VisibilityGlobalShared}}},
|
|
&stubMetricsClient{allocations: monitoringAllocations()},
|
|
nil,
|
|
nil,
|
|
)
|
|
|
|
_, err := svc.GetNodeMetrics(ctx, "cluster-1")
|
|
if err != entity.ErrForbidden {
|
|
t.Fatalf("expected ordinary user node metrics to be forbidden, got %v", err)
|
|
}
|
|
}
|
|
|
|
func seedMonitoringOwners(t *testing.T, ctx context.Context) (*persistencemock.InstanceRepositoryMock, *persistencemock.UserRepositoryMock) {
|
|
t.Helper()
|
|
instanceRepo := persistencemock.NewInstanceRepositoryMock().(*persistencemock.InstanceRepositoryMock)
|
|
userRepo := persistencemock.NewUserRepositoryMock().(*persistencemock.UserRepositoryMock)
|
|
for _, user := range []*entity.User{
|
|
{ID: "user-1", Username: "alice", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-1"},
|
|
{ID: "user-2", Username: "bob", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-2"},
|
|
} {
|
|
if err := userRepo.Create(ctx, user); err != nil {
|
|
t.Fatalf("failed to seed user %s: %v", user.ID, err)
|
|
}
|
|
}
|
|
for _, instance := range []*entity.Instance{
|
|
{ID: "inst-1", ClusterID: "cluster-1", Name: "alice-app", Namespace: "ocdp-u-alice", WorkspaceID: "workspace-1", OwnerID: "user-1"},
|
|
{ID: "inst-2", ClusterID: "cluster-1", Name: "bob-app", Namespace: "ocdp-u-bob", WorkspaceID: "workspace-2", OwnerID: "user-2"},
|
|
} {
|
|
if err := instanceRepo.Create(ctx, instance); err != nil {
|
|
t.Fatalf("failed to seed instance %s: %v", instance.ID, err)
|
|
}
|
|
}
|
|
return instanceRepo, userRepo
|
|
}
|
|
|
|
func monitoringAllocations() []*entity.PodResourceAllocation {
|
|
return []*entity.PodResourceAllocation{
|
|
{
|
|
ClusterID: "cluster-1",
|
|
Namespace: "ocdp-u-alice",
|
|
PodName: "alice-app-0",
|
|
InstanceName: "alice-app",
|
|
Allocation: entity.ResourceAllocation{
|
|
CPURequestsMilli: 500,
|
|
CPULimitsMilli: 1000,
|
|
MemoryRequestsBytes: 1024 * 1024 * 1024,
|
|
MemoryLimitsBytes: 2 * 1024 * 1024 * 1024,
|
|
GPURequests: 1,
|
|
GPULimits: 1,
|
|
GPUMemoryRequestsMB: 10000,
|
|
GPUMemoryLimitsMB: 10000,
|
|
},
|
|
},
|
|
{
|
|
ClusterID: "cluster-1",
|
|
Namespace: "ocdp-u-bob",
|
|
PodName: "bob-app-0",
|
|
InstanceName: "bob-app",
|
|
Allocation: entity.ResourceAllocation{
|
|
CPURequestsMilli: 2000,
|
|
CPULimitsMilli: 4000,
|
|
MemoryRequestsBytes: 4 * 1024 * 1024 * 1024,
|
|
MemoryLimitsBytes: 8 * 1024 * 1024 * 1024,
|
|
GPURequests: 2,
|
|
GPULimits: 2,
|
|
GPUMemoryRequestsMB: 20000,
|
|
GPUMemoryLimitsMB: 20000,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
type monitoringClusterRepo struct {
|
|
clusters []*entity.Cluster
|
|
}
|
|
|
|
func (r *monitoringClusterRepo) Create(ctx context.Context, cluster *entity.Cluster) error {
|
|
r.clusters = append(r.clusters, cluster)
|
|
return nil
|
|
}
|
|
|
|
func (r *monitoringClusterRepo) GetByID(ctx context.Context, id string) (*entity.Cluster, error) {
|
|
for _, cluster := range r.clusters {
|
|
if cluster.ID == id {
|
|
return cluster, nil
|
|
}
|
|
}
|
|
return nil, entity.ErrClusterNotFound
|
|
}
|
|
|
|
func (r *monitoringClusterRepo) GetByName(ctx context.Context, name string) (*entity.Cluster, error) {
|
|
for _, cluster := range r.clusters {
|
|
if cluster.Name == name {
|
|
return cluster, nil
|
|
}
|
|
}
|
|
return nil, entity.ErrClusterNotFound
|
|
}
|
|
|
|
func (r *monitoringClusterRepo) Update(ctx context.Context, cluster *entity.Cluster) error {
|
|
return nil
|
|
}
|
|
|
|
func (r *monitoringClusterRepo) Delete(ctx context.Context, id string) error { return nil }
|
|
|
|
func (r *monitoringClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) {
|
|
return r.clusters, nil
|
|
}
|
|
|
|
type stubMetricsClient struct {
|
|
allocations []*entity.PodResourceAllocation
|
|
}
|
|
|
|
func (c *stubMetricsClient) GetClusterMetrics(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) {
|
|
return &entity.ClusterMetrics{
|
|
ClusterID: clusterID,
|
|
ClusterName: "cluster",
|
|
Status: "healthy",
|
|
NodeCount: 3,
|
|
PodCount: 99,
|
|
TotalCPU: "48 cores",
|
|
TotalMemory: "256Gi",
|
|
Nodes: []entity.NodeMetrics{{NodeName: "node-a"}},
|
|
LastCheck: time.Now(),
|
|
}, nil
|
|
}
|
|
|
|
func (c *stubMetricsClient) GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (c *stubMetricsClient) GetPodResourceAllocations(ctx context.Context, clusterID string) ([]*entity.PodResourceAllocation, error) {
|
|
return c.allocations, nil
|
|
}
|