- Add GetMetrics method to MetricsClient interface and implement cluster metrics API - Add QuotaPrecheck service for validating resource quotas before deployment - Add auth DTO with role/permission models and auth handler tests - Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics - Update workspace handler with GetWorkspace endpoint and shared-user list - Fix monitoring handler to use correct service method name - Add tail_lines fallback in instance handler for snake_case query params - Update nginx config for SSE log streaming support (no buffering) - Add comprehensive test coverage: auth_service_test, auth_handler_test, auth_dto_test, metrics_client_test, quota_precheck_test - Update error messages for quota validation and instance operations - ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit - InstanceCard: correctly disable scale-minus when replicas <= 0 - SidebarLayout: add hover transition for sidebar items - Update todo.md and lessons.md with latest fixes
490 lines
15 KiB
Go
490 lines
15 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"testing"
|
|
"time"
|
|
|
|
persistencemock "github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock"
|
|
"github.com/ocdp/cluster-service/internal/domain/entity"
|
|
"github.com/ocdp/cluster-service/internal/domain/repository"
|
|
"github.com/ocdp/cluster-service/internal/pkg/authz"
|
|
"k8s.io/apimachinery/pkg/api/resource"
|
|
)
|
|
|
|
func TestDeleteInstanceIgnoresMissingRelease(t *testing.T) {
|
|
principal := &authz.Principal{UserID: "user-1", Username: "tester", Role: authz.RoleUser, WorkspaceID: entity.DefaultWorkspaceID}
|
|
ctx := authz.WithPrincipal(context.Background(), principal)
|
|
instanceRepo := persistencemock.NewInstanceRepositoryMock()
|
|
|
|
instance := &entity.Instance{
|
|
ID: "inst-1",
|
|
WorkspaceID: entity.DefaultWorkspaceID,
|
|
OwnerID: "user-1",
|
|
ClusterID: "cluster-1",
|
|
Name: "demo",
|
|
Namespace: "default",
|
|
}
|
|
if err := instanceRepo.Create(ctx, instance); err != nil {
|
|
t.Fatalf("failed to seed instance: %v", err)
|
|
}
|
|
|
|
cluster := &entity.Cluster{ID: "cluster-1", Name: "cluster", Host: "https://example.com"}
|
|
clusterRepo := &stubClusterRepo{cluster: cluster}
|
|
|
|
svc := NewInstanceService(
|
|
instanceRepo,
|
|
clusterRepo,
|
|
nil,
|
|
&stubHelmClient{uninstallErr: entity.ErrInstanceNotFound},
|
|
nil,
|
|
nil,
|
|
)
|
|
|
|
if err := svc.DeleteInstance(ctx, instance.ID); err != nil {
|
|
t.Fatalf("DeleteInstance returned error: %v", err)
|
|
}
|
|
|
|
waitForInstanceDeleted(t, ctx, instanceRepo, instance.ID)
|
|
}
|
|
|
|
func TestEnforceNamespaceValuesOverridesChartNamespaceKnobs(t *testing.T) {
|
|
instance := &entity.Instance{
|
|
Namespace: "ocdp-u-alice",
|
|
Values: map[string]interface{}{
|
|
"namespace": "default",
|
|
"namespaceOverride": "default",
|
|
"targetNamespace": "default",
|
|
"global": map[string]interface{}{
|
|
"namespace": "default",
|
|
"namespaceOverride": "default",
|
|
},
|
|
"image": map[string]interface{}{
|
|
"repository": "nginx",
|
|
},
|
|
},
|
|
}
|
|
|
|
enforceNamespaceValues(instance)
|
|
|
|
if instance.Values["namespace"] != "ocdp-u-alice" {
|
|
t.Fatalf("expected top-level namespace to be enforced, got %#v", instance.Values["namespace"])
|
|
}
|
|
if instance.Values["namespaceOverride"] != "ocdp-u-alice" {
|
|
t.Fatalf("expected namespaceOverride to be enforced, got %#v", instance.Values["namespaceOverride"])
|
|
}
|
|
if instance.Values["targetNamespace"] != "ocdp-u-alice" {
|
|
t.Fatalf("expected targetNamespace to be enforced, got %#v", instance.Values["targetNamespace"])
|
|
}
|
|
global, ok := instance.Values["global"].(map[string]interface{})
|
|
if !ok {
|
|
t.Fatalf("expected global map, got %#v", instance.Values["global"])
|
|
}
|
|
if global["namespace"] != "ocdp-u-alice" || global["namespaceOverride"] != "ocdp-u-alice" {
|
|
t.Fatalf("expected global namespace keys to be enforced, got %#v", global)
|
|
}
|
|
}
|
|
|
|
func TestApplyNamespacePolicyRejectsMismatchedTenantNamespace(t *testing.T) {
|
|
principal := &authz.Principal{
|
|
UserID: "user-1",
|
|
Username: "alice",
|
|
Role: authz.RoleUser,
|
|
WorkspaceID: "workspace-1",
|
|
WorkspaceName: "alice",
|
|
Namespace: "ocdp-u-alice",
|
|
}
|
|
cluster := &entity.Cluster{
|
|
ID: "cluster-1",
|
|
OwnerID: "admin",
|
|
Visibility: authz.VisibilityWorkspaceShared,
|
|
}
|
|
instance := &entity.Instance{Namespace: "other-namespace"}
|
|
svc := NewInstanceService(nil, nil, nil, nil, nil, nil)
|
|
|
|
if err := svc.applyNamespacePolicy(context.Background(), principal, cluster, instance); !errors.Is(err, entity.ErrForbidden) {
|
|
t.Fatalf("expected ErrForbidden for mismatched tenant namespace, got %v", err)
|
|
}
|
|
if instance.Namespace != "other-namespace" {
|
|
t.Fatalf("expected namespace to remain unchanged on rejection, got %q", instance.Namespace)
|
|
}
|
|
}
|
|
|
|
func TestApplyNamespacePolicyAllowsTenantNamespace(t *testing.T) {
|
|
principal := &authz.Principal{
|
|
UserID: "user-1",
|
|
Username: "alice",
|
|
Role: authz.RoleUser,
|
|
WorkspaceID: "workspace-1",
|
|
WorkspaceName: "alice",
|
|
Namespace: "ocdp-u-alice",
|
|
}
|
|
cluster := &entity.Cluster{
|
|
ID: "cluster-1",
|
|
OwnerID: "admin",
|
|
Visibility: authz.VisibilityWorkspaceShared,
|
|
}
|
|
instance := &entity.Instance{Namespace: "ocdp-u-alice"}
|
|
svc := NewInstanceService(nil, nil, nil, nil, nil, nil)
|
|
|
|
if err := svc.applyNamespacePolicy(context.Background(), principal, cluster, instance); err != nil {
|
|
t.Fatalf("expected matching tenant namespace to be allowed, got %v", err)
|
|
}
|
|
if instance.Namespace != "ocdp-u-alice" {
|
|
t.Fatalf("expected namespace to remain the allowed tenant namespace, got %q", instance.Namespace)
|
|
}
|
|
}
|
|
|
|
func TestEnrichReplicasSetsLiveReplicaCount(t *testing.T) {
|
|
ctx := context.Background()
|
|
cluster := &entity.Cluster{ID: "cluster-1", Name: "cluster"}
|
|
svc := NewInstanceService(nil, &stubClusterRepo{cluster: cluster}, nil, nil, nil, nil)
|
|
svc.SetScaleClient(&stubScaleClient{replicas: 3})
|
|
|
|
instances := []*entity.Instance{{
|
|
ID: "inst-1",
|
|
ClusterID: "cluster-1",
|
|
Name: "demo",
|
|
Namespace: "ocdp-u-alice",
|
|
Replicas: 1,
|
|
}}
|
|
|
|
enriched := svc.EnrichReplicas(ctx, "cluster-1", instances)
|
|
if enriched[0].Replicas != 3 {
|
|
t.Fatalf("expected live replicas to overwrite stored count, got %d", enriched[0].Replicas)
|
|
}
|
|
}
|
|
|
|
func TestListInstancesByClusterHydratesOwnerUsername(t *testing.T) {
|
|
ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
|
|
UserID: "admin-1",
|
|
Username: "admin",
|
|
Role: authz.RoleAdmin,
|
|
WorkspaceID: "workspace-admin",
|
|
})
|
|
instanceRepo := persistencemock.NewInstanceRepositoryMock()
|
|
userRepo := persistencemock.NewUserRepositoryMock()
|
|
if err := userRepo.Create(ctx, &entity.User{ID: "user-1", Username: "alice", PasswordHash: "hash", Role: "user", WorkspaceID: "workspace-1"}); err != nil {
|
|
t.Fatalf("failed to seed user: %v", err)
|
|
}
|
|
instance := &entity.Instance{
|
|
ID: "inst-1",
|
|
WorkspaceID: "workspace-1",
|
|
OwnerID: "user-1",
|
|
ClusterID: "cluster-1",
|
|
Name: "demo",
|
|
Namespace: "ocdp-u-alice",
|
|
}
|
|
if err := instanceRepo.Create(ctx, instance); err != nil {
|
|
t.Fatalf("failed to seed instance: %v", err)
|
|
}
|
|
svc := NewInstanceService(
|
|
instanceRepo,
|
|
&stubClusterRepo{cluster: &entity.Cluster{ID: "cluster-1", Name: "cluster"}},
|
|
nil,
|
|
nil,
|
|
nil,
|
|
nil,
|
|
)
|
|
svc.SetUserRepository(userRepo)
|
|
|
|
instances, err := svc.ListInstancesByCluster(ctx, "cluster-1")
|
|
if err != nil {
|
|
t.Fatalf("ListInstancesByCluster returned error: %v", err)
|
|
}
|
|
if len(instances) != 1 {
|
|
t.Fatalf("expected 1 instance, got %d", len(instances))
|
|
}
|
|
if instances[0].OwnerUsername != "alice" {
|
|
t.Fatalf("expected owner username alice, got %q", instances[0].OwnerUsername)
|
|
}
|
|
}
|
|
|
|
func TestCreateInstanceRejectsGPUWhenWorkspaceQuotaEmptyBeforeCreate(t *testing.T) {
|
|
ctx := authz.WithPrincipal(context.Background(), &authz.Principal{
|
|
UserID: "user-ivanwu",
|
|
Username: "ivanwu",
|
|
Role: authz.RoleUser,
|
|
WorkspaceID: "workspace-ivanwu",
|
|
WorkspaceName: "ivanwu",
|
|
Namespace: "ocdp-u-ivanwu",
|
|
})
|
|
instanceRepo := persistencemock.NewInstanceRepositoryMock()
|
|
workspaceRepo := persistencemock.NewWorkspaceRepositoryMock()
|
|
bindingRepo := persistencemock.NewWorkspaceClusterBindingRepositoryMock()
|
|
workspace := entity.NewWorkspace("ivanwu", "admin")
|
|
workspace.ID = "workspace-ivanwu"
|
|
workspace.K8sNamespace = "ocdp-u-ivanwu"
|
|
workspace.K8sSAName = entity.ServiceAccountForNamespace(workspace.K8sNamespace)
|
|
workspace.QuotaCPU = "8"
|
|
workspace.QuotaMemory = "32Gi"
|
|
workspace.QuotaGPU = ""
|
|
workspace.QuotaGPUMem = ""
|
|
if err := workspaceRepo.Create(ctx, workspace); err != nil {
|
|
t.Fatalf("seed workspace: %v", err)
|
|
}
|
|
|
|
cluster := &entity.Cluster{
|
|
ID: "k3s",
|
|
Name: "k3s",
|
|
Host: "https://k3s.invalid",
|
|
Token: "token",
|
|
OwnerID: "admin",
|
|
Visibility: authz.VisibilityGlobalShared,
|
|
}
|
|
registry := &entity.Registry{
|
|
ID: "registry-1",
|
|
Name: "harbor",
|
|
URL: "https://harbor.invalid",
|
|
OwnerID: "admin",
|
|
Visibility: authz.VisibilityGlobalShared,
|
|
}
|
|
helm := &stubHelmClient{
|
|
estimate: &repository.ResourceEstimate{
|
|
Requests: repository.ResourceVector{
|
|
CPU: resource.MustParse("2"),
|
|
Memory: resource.MustParse("8Gi"),
|
|
GPU: 1,
|
|
GPUMemoryMB: 10000,
|
|
},
|
|
},
|
|
}
|
|
oci := &stubOCIClient{}
|
|
svc := NewInstanceService(
|
|
instanceRepo,
|
|
&stubClusterRepo{cluster: cluster},
|
|
&stubRegistryRepo{registry: registry},
|
|
helm,
|
|
oci,
|
|
nil,
|
|
bindingRepo,
|
|
)
|
|
svc.SetTenantProvisioning(workspaceRepo, &recordingTenantClient{usage: &repository.ResourceQuotaUsage{}})
|
|
|
|
instance := entity.NewInstance("k3s", "vllm-qwen", "ocdp-u-ivanwu", registry.ID, "library/vllm-serve", "vllm-serve", "0.1.0")
|
|
instance.SetValues(map[string]interface{}{
|
|
"image": map[string]interface{}{
|
|
"repository": "harbor.bwgdi.com/library/vllm-openai",
|
|
"tag": "v0.17.1",
|
|
},
|
|
"model": "Qwen/Qwen2.5-0.5B",
|
|
})
|
|
|
|
err := svc.CreateInstance(ctx, instance)
|
|
if !errors.Is(err, ErrQuotaExceeded) {
|
|
t.Fatalf("expected GPU quota rejection, got %v", err)
|
|
}
|
|
instances, listErr := instanceRepo.List(ctx)
|
|
if listErr != nil {
|
|
t.Fatalf("list instances: %v", listErr)
|
|
}
|
|
if len(instances) != 0 {
|
|
t.Fatalf("expected quota rejection before instance DB create, got %#v", instances)
|
|
}
|
|
if helm.installCalls != 0 {
|
|
t.Fatalf("expected Helm install not to be called, got %d calls", helm.installCalls)
|
|
}
|
|
if oci.pullCalls != 1 {
|
|
t.Fatalf("expected chart pull for quota rendering, got %d pulls", oci.pullCalls)
|
|
}
|
|
}
|
|
|
|
func waitForInstanceDeleted(t *testing.T, ctx context.Context, repo repository.InstanceRepository, id string) {
|
|
t.Helper()
|
|
|
|
deadline := time.After(2 * time.Second)
|
|
ticker := time.NewTicker(10 * time.Millisecond)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-deadline:
|
|
_, err := repo.GetByID(ctx, id)
|
|
t.Fatalf("expected instance removed, got err=%v", err)
|
|
case <-ticker.C:
|
|
if _, err := repo.GetByID(ctx, id); errors.Is(err, entity.ErrInstanceNotFound) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type stubClusterRepo struct {
|
|
cluster *entity.Cluster
|
|
}
|
|
|
|
func (s *stubClusterRepo) Create(ctx context.Context, cluster *entity.Cluster) error {
|
|
s.cluster = cluster
|
|
return nil
|
|
}
|
|
|
|
func (s *stubClusterRepo) GetByID(ctx context.Context, id string) (*entity.Cluster, error) {
|
|
if s.cluster != nil && s.cluster.ID == id {
|
|
return s.cluster, nil
|
|
}
|
|
return nil, entity.ErrClusterNotFound
|
|
}
|
|
|
|
func (*stubClusterRepo) GetByName(ctx context.Context, name string) (*entity.Cluster, error) {
|
|
return nil, entity.ErrClusterNotFound
|
|
}
|
|
|
|
func (*stubClusterRepo) Update(ctx context.Context, cluster *entity.Cluster) error { return nil }
|
|
|
|
func (*stubClusterRepo) Delete(ctx context.Context, id string) error { return nil }
|
|
|
|
func (*stubClusterRepo) List(ctx context.Context) ([]*entity.Cluster, error) { return nil, nil }
|
|
|
|
type stubHelmClient struct {
|
|
uninstallErr error
|
|
estimate *repository.ResourceEstimate
|
|
values map[string]interface{}
|
|
installCalls int
|
|
upgradeCalls int
|
|
}
|
|
|
|
func (s *stubHelmClient) Install(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
|
|
s.installCalls++
|
|
return nil
|
|
}
|
|
|
|
func (s *stubHelmClient) Upgrade(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) error {
|
|
s.upgradeCalls++
|
|
return nil
|
|
}
|
|
|
|
func (s *stubHelmClient) Uninstall(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) error {
|
|
return s.uninstallErr
|
|
}
|
|
|
|
func (*stubHelmClient) Rollback(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string, revision int) error {
|
|
return nil
|
|
}
|
|
|
|
func (*stubHelmClient) GetStatus(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (*entity.Instance, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (*stubHelmClient) GetHistory(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) ([]*entity.ReleaseHistory, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (*stubHelmClient) List(ctx context.Context, cluster *entity.Cluster, namespace string) ([]*entity.Instance, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (s *stubHelmClient) GetValues(ctx context.Context, cluster *entity.Cluster, releaseName, namespace string) (map[string]interface{}, error) {
|
|
return s.values, nil
|
|
}
|
|
|
|
func (*stubHelmClient) GetChartDefaultValues(chartPath string) (map[string]interface{}, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (s *stubHelmClient) EstimateInstanceResources(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance) (*repository.ResourceEstimate, error) {
|
|
if s.estimate != nil {
|
|
return s.estimate, nil
|
|
}
|
|
return &repository.ResourceEstimate{}, nil
|
|
}
|
|
|
|
type stubRegistryRepo struct {
|
|
registry *entity.Registry
|
|
}
|
|
|
|
func (s *stubRegistryRepo) Create(ctx context.Context, registry *entity.Registry) error {
|
|
s.registry = registry
|
|
return nil
|
|
}
|
|
|
|
func (s *stubRegistryRepo) GetByID(ctx context.Context, id string) (*entity.Registry, error) {
|
|
if s.registry != nil && s.registry.ID == id {
|
|
return s.registry, nil
|
|
}
|
|
return nil, entity.ErrRegistryNotFound
|
|
}
|
|
|
|
func (s *stubRegistryRepo) GetByName(ctx context.Context, name string) (*entity.Registry, error) {
|
|
if s.registry != nil && s.registry.Name == name {
|
|
return s.registry, nil
|
|
}
|
|
return nil, entity.ErrRegistryNotFound
|
|
}
|
|
|
|
func (s *stubRegistryRepo) Update(ctx context.Context, registry *entity.Registry) error {
|
|
s.registry = registry
|
|
return nil
|
|
}
|
|
|
|
func (s *stubRegistryRepo) Delete(ctx context.Context, id string) error {
|
|
if s.registry != nil && s.registry.ID == id {
|
|
s.registry = nil
|
|
return nil
|
|
}
|
|
return entity.ErrRegistryNotFound
|
|
}
|
|
|
|
func (s *stubRegistryRepo) List(ctx context.Context) ([]*entity.Registry, error) {
|
|
if s.registry == nil {
|
|
return nil, nil
|
|
}
|
|
return []*entity.Registry{s.registry}, nil
|
|
}
|
|
|
|
type stubOCIClient struct {
|
|
pullCalls int
|
|
}
|
|
|
|
func (*stubOCIClient) ListRepositories(ctx context.Context, registry *entity.Registry, artifactType string) ([]string, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (*stubOCIClient) ListArtifacts(ctx context.Context, registry *entity.Registry, repositoryName, mediaTypeFilter string) ([]*entity.Artifact, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (*stubOCIClient) GetArtifact(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (*entity.Artifact, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (*stubOCIClient) GetValuesSchema(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (string, error) {
|
|
return "", nil
|
|
}
|
|
|
|
func (*stubOCIClient) GetValuesYAML(ctx context.Context, registry *entity.Registry, repositoryName, reference string) (string, error) {
|
|
return "", nil
|
|
}
|
|
|
|
func (s *stubOCIClient) PullArtifact(ctx context.Context, registry *entity.Registry, repositoryName, reference, destPath string) error {
|
|
s.pullCalls++
|
|
return nil
|
|
}
|
|
|
|
func (*stubOCIClient) PushArtifact(ctx context.Context, registry *entity.Registry, repositoryName, tag, sourcePath string) error {
|
|
return nil
|
|
}
|
|
|
|
func (*stubOCIClient) CheckHealth(ctx context.Context, registry *entity.Registry) error {
|
|
return nil
|
|
}
|
|
|
|
type stubScaleClient struct {
|
|
replicas int32
|
|
}
|
|
|
|
func (s *stubScaleClient) GetDeploymentReplicas(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string) (int32, error) {
|
|
return s.replicas, nil
|
|
}
|
|
|
|
func (s *stubScaleClient) ScaleDeployment(ctx context.Context, cluster *entity.Cluster, namespace, releaseName string, replicas int32) error {
|
|
s.replicas = replicas
|
|
return nil
|
|
}
|
|
|
|
var _ repository.ClusterRepository = (*stubClusterRepo)(nil)
|
|
var _ repository.RegistryRepository = (*stubRegistryRepo)(nil)
|
|
var _ repository.HelmClient = (*stubHelmClient)(nil)
|
|
var _ repository.OCIClient = (*stubOCIClient)(nil)
|
|
var _ ScaleClient = (*stubScaleClient)(nil)
|