fix: scale replicas in response, K8s metrics client, quota precheck, auth tests
- Add GetMetrics method to MetricsClient interface and implement cluster metrics API - Add QuotaPrecheck service for validating resource quotas before deployment - Add auth DTO with role/permission models and auth handler tests - Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics - Update workspace handler with GetWorkspace endpoint and shared-user list - Fix monitoring handler to use correct service method name - Add tail_lines fallback in instance handler for snake_case query params - Update nginx config for SSE log streaming support (no buffering) - Add comprehensive test coverage: auth_service_test, auth_handler_test, auth_dto_test, metrics_client_test, quota_precheck_test - Update error messages for quota validation and instance operations - ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit - InstanceCard: correctly disable scale-minus when replicas <= 0 - SidebarLayout: add hover transition for sidebar items - Update todo.md and lessons.md with latest fixes
This commit is contained in:
@ -10,6 +10,7 @@ import (
|
||||
"github.com/ocdp/cluster-service/internal/domain/entity"
|
||||
"github.com/ocdp/cluster-service/internal/domain/repository"
|
||||
"github.com/ocdp/cluster-service/internal/pkg/authz"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
)
|
||||
|
||||
func TestDeleteInstanceIgnoresMissingRelease(t *testing.T) {
|
||||
@ -85,6 +86,210 @@ func TestEnforceNamespaceValuesOverridesChartNamespaceKnobs(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyNamespacePolicyRejectsMismatchedTenantNamespace(t *testing.T) {
|
||||
principal := &authz.Principal{
|
||||
UserID: "user-1",
|
||||
Username: "alice",
|
||||
Role: authz.RoleUser,
|
||||
WorkspaceID: "workspace-1",
|
||||
WorkspaceName: "alice",
|
||||
Namespace: "ocdp-u-alice",
|
||||
}
|
||||
cluster := &entity.Cluster{
|
||||
ID: "cluster-1",
|
||||
OwnerID: "admin",
|
||||
Visibility: authz.VisibilityWorkspaceShared,
|
||||
}
|
||||
instance := &entity.Instance{Namespace: "other-namespace"}
|
||||
svc := NewInstanceService(nil, nil, nil, nil, nil, nil)
|
||||
|
||||
if err := svc.applyNamespacePolicy(context.Background(), principal, cluster, instance); !errors.Is(err, entity.ErrForbidden) {
|
||||
t.Fatalf("expected ErrForbidden for mismatched tenant namespace, got %v", err)
|
||||
}
|
||||
if instance.Namespace != "other-namespace" {
|
||||
t.Fatalf("expected namespace to remain unchanged on rejection, got %q", instance.Namespace)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyNamespacePolicyAllowsTenantNamespace(t *testing.T) {
|
||||
principal := &authz.Principal{
|
||||
UserID: "user-1",
|
||||
Username: "alice",
|
||||
Role: authz.RoleUser,
|
||||
WorkspaceID: "workspace-1",
|
||||
WorkspaceName: "alice",
|
||||
Namespace: "ocdp-u-alice",
|
||||
}
|
||||
cluster := &entity.Cluster{
|
||||
ID: "cluster-1",
|
||||
OwnerID: "admin",
|
||||
Visibility: authz.VisibilityWorkspaceShared,
|
||||
}
|
||||
instance := &entity.Instance{Namespace: "ocdp-u-alice"}
|
||||
svc := NewInstanceService(nil, nil, nil, nil, nil, nil)
|
||||
|
||||
if err := svc.applyNamespacePolicy(context.Background(), principal, cluster, instance); err != nil {
|
||||
t.Fatalf("expected matching tenant namespace to be allowed, got %v", err)
|
||||
}
|
||||
if instance.Namespace != "ocdp-u-alice" {
|
||||
t.Fatalf("expected namespace to remain the allowed tenant namespace, got %q", instance.Namespace)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnrichReplicasSetsLiveReplicaCount(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cluster := &entity.Cluster{ID: "cluster-1", Name: "cluster"}
|
||||
svc := NewInstanceService(nil, &stubClusterRepo{cluster: cluster}, nil, nil, nil, nil)
|
||||
svc.SetScaleClient(&stubScaleClient{replicas: 3})
|
||||
|
||||
instances := []*entity.Instance{{
|
||||
ID: "inst-1",
|
||||
ClusterID: "cluster-1",
|
||||
Name: "demo",
|
||||
Namespace: "ocdp-u-alice",
|
||||
Replicas: 1,
|
||||
}}
|
||||
|
||||
enriched := svc.EnrichReplicas(ctx, "cluster-1", instances)
|
||||
if enriched[0].Replicas != 3 {
|
||||
t.Fatalf("expected live replicas to overwrite stored count, got %d", enriched[0].Replicas)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListInstancesByClusterHydratesOwnerUsername(t *testing.T) {
|
||||
ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
|
||||
UserID: "admin-1",
|
||||
Username: "admin",
|
||||
Role: authz.RoleAdmin,
|
||||
WorkspaceID: "workspace-admin",
|
||||
})
|
||||
instanceRepo := persistencemock.NewInstanceRepositoryMock()
|
||||
userRepo := persistencemock.NewUserRepositoryMock()
|
||||
if err := userRepo.Create(ctx, &entity.User{ID: "user-1", Username: "alice", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-1"}); err != nil {
|
||||
t.Fatalf("failed to seed user: %v", err)
|
||||
}
|
||||
instance := &entity.Instance{
|
||||
ID: "inst-1",
|
||||
WorkspaceID: "workspace-1",
|
||||
OwnerID: "user-1",
|
||||
ClusterID: "cluster-1",
|
||||
Name: "demo",
|
||||
Namespace: "ocdp-u-alice",
|
||||
}
|
||||
if err := instanceRepo.Create(ctx, instance); err != nil {
|
||||
t.Fatalf("failed to seed instance: %v", err)
|
||||
}
|
||||
svc := NewInstanceService(
|
||||
instanceRepo,
|
||||
&stubClusterRepo{cluster: &entity.Cluster{ID: "cluster-1", Name: "cluster"}},
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
)
|
||||
svc.SetUserRepository(userRepo)
|
||||
|
||||
instances, err := svc.ListInstancesByCluster(ctx, "cluster-1")
|
||||
if err != nil {
|
||||
t.Fatalf("ListInstancesByCluster returned error: %v", err)
|
||||
}
|
||||
if len(instances) != 1 {
|
||||
t.Fatalf("expected 1 instance, got %d", len(instances))
|
||||
}
|
||||
if instances[0].OwnerUsername != "alice" {
|
||||
t.Fatalf("expected owner username alice, got %q", instances[0].OwnerUsername)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateInstanceRejectsGPUWhenWorkspaceQuotaEmptyBeforeCreate(t *testing.T) {
|
||||
ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
|
||||
UserID: "user-ivanwu",
|
||||
Username: "ivanwu",
|
||||
Role: authz.RoleUser,
|
||||
WorkspaceID: "workspace-ivanwu",
|
||||
WorkspaceName: "ivanwu",
|
||||
Namespace: "ocdp-u-ivanwu",
|
||||
})
|
||||
instanceRepo := persistencemock.NewInstanceRepositoryMock()
|
||||
workspaceRepo := persistencemock.NewWorkspaceRepositoryMock()
|
||||
bindingRepo := persistencemock.NewWorkspaceClusterBindingRepositoryMock()
|
||||
workspace := entity.NewWorkspace("ivanwu", "admin")
|
||||
workspace.ID = "workspace-ivanwu"
|
||||
workspace.K8sNamespace = "ocdp-u-ivanwu"
|
||||
workspace.K8sSAName = entity.ServiceAccountForNamespace(workspace.K8sNamespace)
|
||||
workspace.QuotaCPU = "8"
|
||||
workspace.QuotaMemory = "32Gi"
|
||||
workspace.QuotaGPU = ""
|
||||
workspace.QuotaGPUMem = ""
|
||||
if err := workspaceRepo.Create(ctx, workspace); err != nil {
|
||||
t.Fatalf("seed workspace: %v", err)
|
||||
}
|
||||
|
||||
cluster := &entity.Cluster{
|
||||
ID: "k3s",
|
||||
Name: "k3s",
|
||||
Host: "https://k3s.invalid",
|
||||
Token: "token",
|
||||
OwnerID: "admin",
|
||||
Visibility: authz.VisibilityGlobalShared,
|
||||
}
|
||||
registry := &entity.Registry{
|
||||
ID: "registry-1",
|
||||
Name: "harbor",
|
||||
URL: "https://harbor.invalid",
|
||||
OwnerID: "admin",
|
||||
Visibility: authz.VisibilityGlobalShared,
|
||||
}
|
||||
helm := &stubHelmClient{
|
||||
estimate: &repository.ResourceEstimate{
|
||||
Requests: repository.ResourceVector{
|
||||
CPU: resource.MustParse("2"),
|
||||
Memory: resource.MustParse("8Gi"),
|
||||
GPU: 1,
|
||||
GPUMemoryMB: 10000,
|
||||
},
|
||||
},
|
||||
}
|
||||
oci := &stubOCIClient{}
|
||||
svc := NewInstanceService(
|
||||
instanceRepo,
|
||||
&stubClusterRepo{cluster: cluster},
|
||||
&stubRegistryRepo{registry: registry},
|
||||
helm,
|
||||
oci,
|
||||
nil,
|
||||
bindingRepo,
|
||||
)
|
||||
svc.SetTenantProvisioning(workspaceRepo, &recordingTenantClient{usage: &repository.ResourceQuotaUsage{}})
|
||||
|
||||
instance := entity.NewInstance("k3s", "vllm-qwen", "ocdp-u-ivanwu", registry.ID, "library/vllm-serve", "vllm-serve", "0.1.0")
|
||||
instance.SetValues(map[string]interface{}{
|
||||
"image": map[string]interface{}{
|
||||
"repository": "harbor.bwgdi.com/library/vllm-openai",
|
||||
"tag": "v0.17.1",
|
||||
},
|
||||
"model": "Qwen/Qwen2.5-0.5B",
|
||||
})
|
||||
|
||||
err := svc.CreateInstance(ctx, instance)
|
||||
if !errors.Is(err, ErrQuotaExceeded) {
|
||||
t.Fatalf("expected GPU quota rejection, got %v", err)
|
||||
}
|
||||
instances, listErr := instanceRepo.List(ctx)
|
||||
if listErr != nil {
|
||||
t.Fatalf("list instances: %v", listErr)
|
||||
}
|
||||
if len(instances) != 0 {
|
||||
t.Fatalf("expected quota rejection before instance DB create, got %#v", instances)
|
||||
}
|
||||
if helm.installCalls != 0 {
|
||||
t.Fatalf("expected Helm install not to be called, got %d calls", helm.installCalls)
|
||||
}
|
||||
if oci.pullCalls != 1 {
|
||||
t.Fatalf("expected chart pull for quota rendering, got %d pulls", oci.pullCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func waitForInstanceDeleted(t *testing.T, ctx context.Context, repo repository.InstanceRepository, id string) {
|
||||
t.Helper()
|
||||
|
||||
@ -133,13 +338,19 @@ func (*stubClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) { r
|
||||
|
||||
type stubHelmClient struct {
|
||||
uninstallErr error
|
||||
estimate *repository.ResourceEstimate
|
||||
values map[string]interface{}
|
||||
installCalls int
|
||||
upgradeCalls int
|
||||
}
|
||||
|
||||
func (*stubHelmClient) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
|
||||
func (s *stubHelmClient) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
|
||||
s.installCalls++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*stubHelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
|
||||
func (s *stubHelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
|
||||
s.upgradeCalls++
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -163,13 +374,116 @@ func (*stubHelmClient) List(ctx context.Context, cluster *entity.Cluster, namesp
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (*stubHelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) {
|
||||
return nil, nil
|
||||
func (s *stubHelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) {
|
||||
return s.values, nil
|
||||
}
|
||||
|
||||
func (*stubHelmClient) GetChartDefaultValues(chartPath string) (map[string]interface{}, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *stubHelmClient) EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*repository.ResourceEstimate, error) {
|
||||
if s.estimate != nil {
|
||||
return s.estimate, nil
|
||||
}
|
||||
return &repository.ResourceEstimate{}, nil
|
||||
}
|
||||
|
||||
type stubRegistryRepo struct {
|
||||
registry *entity.Registry
|
||||
}
|
||||
|
||||
func (s *stubRegistryRepo) Create(ctx context.Context, registry *entity.Registry) error {
|
||||
s.registry = registry
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubRegistryRepo) GetByID(ctx context.Context, id string) (*entity.Registry, error) {
|
||||
if s.registry != nil && s.registry.ID == id {
|
||||
return s.registry, nil
|
||||
}
|
||||
return nil, entity.ErrRegistryNotFound
|
||||
}
|
||||
|
||||
func (s *stubRegistryRepo) GetByName(ctx context.Context, name string) (*entity.Registry, error) {
|
||||
if s.registry != nil && s.registry.Name == name {
|
||||
return s.registry, nil
|
||||
}
|
||||
return nil, entity.ErrRegistryNotFound
|
||||
}
|
||||
|
||||
func (s *stubRegistryRepo) Update(ctx context.Context, registry *entity.Registry) error {
|
||||
s.registry = registry
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubRegistryRepo) Delete(ctx context.Context, id string) error {
|
||||
if s.registry != nil && s.registry.ID == id {
|
||||
s.registry = nil
|
||||
return nil
|
||||
}
|
||||
return entity.ErrRegistryNotFound
|
||||
}
|
||||
|
||||
func (s *stubRegistryRepo) List(ctx context.Context) ([]*entity.Registry, error) {
|
||||
if s.registry == nil {
|
||||
return nil, nil
|
||||
}
|
||||
return []*entity.Registry{s.registry}, nil
|
||||
}
|
||||
|
||||
type stubOCIClient struct {
|
||||
pullCalls int
|
||||
}
|
||||
|
||||
func (*stubOCIClient) ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (*stubOCIClient) ListArtifacts(ctx context.Context, registry *entity.Registry, repositoryName, mediaTypeFilter string) ([]*entity.Artifact, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (*stubOCIClient) GetArtifact(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (*entity.Artifact, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (*stubOCIClient) GetValuesSchema(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (*stubOCIClient) GetValuesYAML(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (s *stubOCIClient) PullArtifact(ctx context.Context, registry *entity.Registry, repositoryName, reference, destPath string) error {
|
||||
s.pullCalls++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*stubOCIClient) PushArtifact(ctx context.Context, registry *entity.Registry, repositoryName, tag, sourcePath string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*stubOCIClient) CheckHealth(ctx context.Context, registry *entity.Registry) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type stubScaleClient struct {
|
||||
replicas int32
|
||||
}
|
||||
|
||||
func (s *stubScaleClient) GetDeploymentReplicas(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string) (int32, error) {
|
||||
return s.replicas, nil
|
||||
}
|
||||
|
||||
func (s *stubScaleClient) ScaleDeployment(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string, replicas int32) error {
|
||||
s.replicas = replicas
|
||||
return nil
|
||||
}
|
||||
|
||||
var _ repository.ClusterRepository = (*stubClusterRepo)(nil)
|
||||
var _ repository.RegistryRepository = (*stubRegistryRepo)(nil)
|
||||
var _ repository.HelmClient = (*stubHelmClient)(nil)
|
||||
var _ repository.OCIClient = (*stubOCIClient)(nil)
|
||||
var _ ScaleClient = (*stubScaleClient)(nil)
|
||||
|
||||
Reference in New Issue
Block a user