fix: scale replicas in response, K8s metrics client, quota precheck, auth tests
- Add GetMetrics method to MetricsClient interface and implement cluster metrics API - Add QuotaPrecheck service for validating resource quotas before deployment - Add auth DTO with role/permission models and auth handler tests - Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics - Update workspace handler with GetWorkspace endpoint and shared-user list - Fix monitoring handler to use correct service method name - Add tail_lines fallback in instance handler for snake_case query params - Update nginx config for SSE log streaming support (no buffering) - Add comprehensive test coverage: auth_service_test, auth_handler_test, auth_dto_test, metrics_client_test, quota_precheck_test - Update error messages for quota validation and instance operations - ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit - InstanceCard: correctly disable scale-minus when replicas <= 0 - SidebarLayout: add hover transition for sidebar items - Update todo.md and lessons.md with latest fixes
This commit is contained in:
@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@ -34,6 +35,7 @@ type InstanceService struct {
|
||||
entryClient repository.InstanceEntryClient
|
||||
diagClient repository.InstanceDiagnosticsClient
|
||||
workspaceRepo repository.WorkspaceRepository
|
||||
userRepo repository.UserRepository
|
||||
tenantClient repository.TenantKubeClient
|
||||
scaleClient ScaleClient
|
||||
}
|
||||
@ -76,6 +78,10 @@ func (s *InstanceService) SetTenantProvisioning(workspaceRepo repository.Workspa
|
||||
s.tenantClient = tenantClient
|
||||
}
|
||||
|
||||
func (s *InstanceService) SetUserRepository(userRepo repository.UserRepository) {
|
||||
s.userRepo = userRepo
|
||||
}
|
||||
|
||||
const chartCacheDir = "/tmp/charts"
|
||||
|
||||
func (s *InstanceService) chartArchivePath(instance *entity.Instance) string {
|
||||
@ -131,15 +137,21 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
|
||||
return err
|
||||
}
|
||||
enforceNamespaceValues(instance)
|
||||
if err := s.ensureTenantForInstance(ctx, principal, cluster, instance); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 检查实例是否已存在
|
||||
existingInstance, _ := s.instanceRepo.GetByClusterAndName(ctx, instance.ClusterID, instance.Name)
|
||||
if existingInstance != nil {
|
||||
return entity.ErrInstanceExists
|
||||
}
|
||||
if err := s.downloadChart(ctx, registry, instance); err != nil {
|
||||
return err
|
||||
}
|
||||
binding, err := s.ensureTenantForInstance(ctx, principal, cluster, instance)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, instance, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
instance.BeginOperation(entity.OperationInstall, "Preparing installation")
|
||||
|
||||
@ -148,13 +160,6 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
|
||||
return err
|
||||
}
|
||||
|
||||
// 下载 chart artifact 供 Helm 使用
|
||||
if err := s.downloadChart(ctx, registry, instance); err != nil {
|
||||
instance.MarkFailure("Failed to download chart", err)
|
||||
_ = s.instanceRepo.Update(ctx, instance)
|
||||
return err
|
||||
}
|
||||
|
||||
// 异步执行 Helm 安装并监控状态
|
||||
go s.executeAndSyncInstall(context.Background(), instance.ID, cluster, registry, instance)
|
||||
|
||||
@ -175,6 +180,7 @@ func (s *InstanceService) GetInstance(ctx context.Context, id string) (*entity.I
|
||||
if !s.canReadInstance(principal, instance) {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
s.enrichOwnerUsernames(ctx, []*entity.Instance{instance})
|
||||
return instance, nil
|
||||
}
|
||||
|
||||
@ -219,8 +225,22 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
|
||||
if !s.canWriteInstance(principal, existingInstance) {
|
||||
return entity.ErrForbidden
|
||||
}
|
||||
instance.ClusterID = existingInstance.ClusterID
|
||||
instance.WorkspaceID = existingInstance.WorkspaceID
|
||||
instance.OwnerID = existingInstance.OwnerID
|
||||
instance.Name = existingInstance.Name
|
||||
if instance.RegistryID == "" {
|
||||
instance.RegistryID = existingInstance.RegistryID
|
||||
}
|
||||
if instance.Repository == "" {
|
||||
instance.Repository = existingInstance.Repository
|
||||
}
|
||||
if instance.Chart == "" {
|
||||
instance.Chart = existingInstance.Chart
|
||||
}
|
||||
if instance.Version == "" {
|
||||
instance.Version = existingInstance.Version
|
||||
}
|
||||
|
||||
// 获取集群信息
|
||||
cluster, err := s.clusterRepo.GetByID(ctx, existingInstance.ClusterID)
|
||||
@ -236,15 +256,21 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
|
||||
|
||||
instance.Namespace = existingInstance.Namespace
|
||||
enforceNamespaceValues(instance)
|
||||
instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade")
|
||||
if err := s.instanceRepo.Update(ctx, instance); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 下载所需 Chart
|
||||
if err := s.downloadChart(ctx, registry, instance); err != nil {
|
||||
instance.MarkFailure("Failed to download chart", err)
|
||||
_ = s.instanceRepo.Update(ctx, instance)
|
||||
return err
|
||||
}
|
||||
binding, err := s.ensureTenantForInstance(ctx, principal, cluster, instance)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, instance, existingInstance); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade")
|
||||
if err := s.instanceRepo.Update(ctx, instance); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -364,9 +390,32 @@ func (s *InstanceService) ListInstancesByCluster(ctx context.Context, clusterID
|
||||
visible = append(visible, instance)
|
||||
}
|
||||
}
|
||||
s.enrichOwnerUsernames(ctx, visible)
|
||||
return visible, nil
|
||||
}
|
||||
|
||||
func (s *InstanceService) enrichOwnerUsernames(ctx context.Context, instances []*entity.Instance) {
|
||||
if s.userRepo == nil || len(instances) == 0 {
|
||||
return
|
||||
}
|
||||
usernames := make(map[string]string)
|
||||
for _, instance := range instances {
|
||||
if instance == nil || instance.OwnerID == "" {
|
||||
continue
|
||||
}
|
||||
if username, ok := usernames[instance.OwnerID]; ok {
|
||||
instance.OwnerUsername = username
|
||||
continue
|
||||
}
|
||||
user, err := s.userRepo.GetByID(ctx, instance.OwnerID)
|
||||
if err != nil || user == nil {
|
||||
continue
|
||||
}
|
||||
usernames[instance.OwnerID] = user.Username
|
||||
instance.OwnerUsername = user.Username
|
||||
}
|
||||
}
|
||||
|
||||
// ListInstanceEntries 列出实例关联的入口信息(Service / Ingress)
|
||||
func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, instanceID string) ([]*entity.InstanceEntry, error) {
|
||||
instance, err := s.GetInstance(ctx, instanceID)
|
||||
@ -442,27 +491,57 @@ func (s *InstanceService) ScaleInstance(ctx context.Context, clusterID, instance
|
||||
if !s.canWriteInstance(principal, instance) {
|
||||
return nil, entity.ErrForbidden
|
||||
}
|
||||
if instance.ClusterID != clusterID {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
|
||||
if err != nil {
|
||||
return nil, entity.ErrClusterNotFound
|
||||
}
|
||||
|
||||
current := cloneInstanceForQuota(instance)
|
||||
currentValues, err := s.helmClient.GetValues(ctx, cluster, instance.Name, instance.Namespace)
|
||||
if err == nil && currentValues != nil {
|
||||
current.SetValues(currentValues)
|
||||
}
|
||||
target := cloneInstanceForQuota(instance)
|
||||
targetValues := copyValues(current.Values)
|
||||
if targetValues == nil {
|
||||
targetValues = copyValues(instance.Values)
|
||||
}
|
||||
if targetValues == nil {
|
||||
targetValues = map[string]interface{}{}
|
||||
}
|
||||
targetValues["replicaCount"] = replicas
|
||||
target.SetValues(targetValues)
|
||||
registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID)
|
||||
if err != nil {
|
||||
return nil, entity.ErrRegistryNotFound
|
||||
}
|
||||
if err := s.downloadChart(ctx, registry, target); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
binding, err := s.ensureTenantForInstance(ctx, principal, cluster, target)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, target, current); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Scale via K8s API directly (like kubectl scale deploy --replicas=N)
|
||||
if s.scaleClient != nil {
|
||||
if err := s.scaleClient.ScaleDeployment(ctx, cluster, instance.Namespace, instance.Name, int32(replicas)); err != nil {
|
||||
return nil, fmt.Errorf("failed to scale deployment: %w", err)
|
||||
}
|
||||
instance.SetValues(targetValues)
|
||||
instance.Replicas = replicas
|
||||
if err := s.instanceRepo.Update(ctx, instance); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
// Fallback: Helm upgrade with replicaCount
|
||||
vals, err := s.helmClient.GetValues(ctx, cluster, instance.Name, instance.Namespace)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get current values: %w", err)
|
||||
}
|
||||
if vals == nil {
|
||||
vals = make(map[string]interface{})
|
||||
}
|
||||
vals["replicaCount"] = replicas
|
||||
instance.SetValues(vals)
|
||||
instance.SetValues(targetValues)
|
||||
instance.BeginOperation(entity.OperationUpgrade, fmt.Sprintf("Scaling to %d replicas", replicas))
|
||||
if err := s.instanceRepo.Update(ctx, instance); err != nil {
|
||||
return nil, err
|
||||
@ -516,6 +595,9 @@ func (s *InstanceService) GetInstanceValuesDiff(ctx context.Context, clusterID,
|
||||
if !s.canReadInstance(principal, instance) {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
if instance.ClusterID != clusterID {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
|
||||
if err != nil {
|
||||
return nil, entity.ErrClusterNotFound
|
||||
@ -528,6 +610,18 @@ func (s *InstanceService) GetInstanceValuesDiff(ctx context.Context, clusterID,
|
||||
|
||||
// Get default values from the chart archive
|
||||
chartPath := s.chartArchivePath(instance)
|
||||
if _, statErr := os.Stat(chartPath); statErr != nil {
|
||||
if !errors.Is(statErr, os.ErrNotExist) {
|
||||
return nil, fmt.Errorf("failed to inspect chart defaults: %w", statErr)
|
||||
}
|
||||
registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID)
|
||||
if err != nil {
|
||||
return nil, entity.ErrRegistryNotFound
|
||||
}
|
||||
if err := s.downloadChart(ctx, registry, instance); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
defaults, err := s.helmClient.GetChartDefaultValues(chartPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read chart defaults: %w", err)
|
||||
@ -593,9 +687,6 @@ func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *a
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if isReservedNamespace(instance.Namespace) {
|
||||
return entity.ErrInvalidNamespace
|
||||
}
|
||||
if cluster.Visibility != authz.VisibilityPrivate || cluster.OwnerID != principal.UserID {
|
||||
namespace := principal.Namespace
|
||||
if namespace == "" {
|
||||
@ -606,9 +697,15 @@ func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *a
|
||||
namespace = binding.Namespace
|
||||
}
|
||||
}
|
||||
if instance.Namespace != "" && instance.Namespace != namespace {
|
||||
return entity.ErrForbidden
|
||||
}
|
||||
instance.Namespace = namespace
|
||||
return nil
|
||||
}
|
||||
if isReservedNamespace(instance.Namespace) {
|
||||
return entity.ErrInvalidNamespace
|
||||
}
|
||||
if instance.Namespace == "" {
|
||||
if cluster.DefaultNamespace != "" {
|
||||
instance.Namespace = cluster.DefaultNamespace
|
||||
@ -621,8 +718,62 @@ func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *a
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error {
|
||||
func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) (*entity.WorkspaceClusterBinding, error) {
|
||||
if principal.IsAdmin() || s.workspaceRepo == nil || s.tenantClient == nil {
|
||||
return nil, nil
|
||||
}
|
||||
workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if workspace.Status == entity.WorkspaceSuspended {
|
||||
return nil, entity.ErrWorkspaceSuspended
|
||||
}
|
||||
binding := &entity.WorkspaceClusterBinding{
|
||||
ID: uuid.New().String(),
|
||||
WorkspaceID: workspace.ID,
|
||||
ClusterID: cluster.ID,
|
||||
Namespace: instance.Namespace,
|
||||
ServiceAccount: workspace.K8sSAName,
|
||||
QuotaCPU: strings.TrimSpace(workspace.QuotaCPU),
|
||||
QuotaMemory: strings.TrimSpace(workspace.QuotaMemory),
|
||||
QuotaGPU: zeroIfEmptyQuota(workspace.QuotaGPU),
|
||||
QuotaGPUMem: zeroIfEmptyQuota(workspace.QuotaGPUMem),
|
||||
Status: "active",
|
||||
CreatedAt: time.Now(),
|
||||
UpdatedAt: time.Now(),
|
||||
}
|
||||
if s.bindingRepo != nil {
|
||||
if existing, err := s.bindingRepo.Get(ctx, workspace.ID, cluster.ID); err == nil && existing != nil {
|
||||
binding.ID = existing.ID
|
||||
binding.CreatedAt = existing.CreatedAt
|
||||
if existing.Namespace != "" {
|
||||
binding.Namespace = existing.Namespace
|
||||
instance.Namespace = existing.Namespace
|
||||
enforceNamespaceValues(instance)
|
||||
}
|
||||
if existing.ServiceAccount != "" {
|
||||
binding.ServiceAccount = existing.ServiceAccount
|
||||
}
|
||||
if existing.Status != "" {
|
||||
binding.Status = existing.Status
|
||||
}
|
||||
}
|
||||
}
|
||||
tenantBinding := tenantBindingFromWorkspaceClusterBinding(binding)
|
||||
if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if s.bindingRepo != nil {
|
||||
if err := s.bindingRepo.Upsert(ctx, binding); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return binding, nil
|
||||
}
|
||||
|
||||
func (s *InstanceService) precheckInstanceQuota(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, binding *entity.WorkspaceClusterBinding, target, current *entity.Instance) error {
|
||||
if principal.IsAdmin() || s.workspaceRepo == nil || s.helmClient == nil {
|
||||
return nil
|
||||
}
|
||||
workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
|
||||
@ -632,29 +783,45 @@ func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal
|
||||
if workspace.Status == entity.WorkspaceSuspended {
|
||||
return entity.ErrWorkspaceSuspended
|
||||
}
|
||||
binding := entity.NewTenantBinding(instance.Namespace)
|
||||
binding.ServiceAccountName = workspace.K8sSAName
|
||||
binding.ResourceQuotaHard = instanceResourceQuotaHard(workspace)
|
||||
if err := s.tenantClient.EnsureTenant(ctx, cluster, binding); err != nil {
|
||||
return err
|
||||
if binding == nil {
|
||||
binding = &entity.WorkspaceClusterBinding{
|
||||
WorkspaceID: principal.WorkspaceID,
|
||||
ClusterID: cluster.ID,
|
||||
Namespace: target.Namespace,
|
||||
QuotaCPU: strings.TrimSpace(workspace.QuotaCPU),
|
||||
QuotaMemory: strings.TrimSpace(workspace.QuotaMemory),
|
||||
QuotaGPU: zeroIfEmptyQuota(workspace.QuotaGPU),
|
||||
QuotaGPUMem: zeroIfEmptyQuota(workspace.QuotaGPUMem),
|
||||
}
|
||||
}
|
||||
if s.bindingRepo != nil {
|
||||
_ = s.bindingRepo.Upsert(ctx, &entity.WorkspaceClusterBinding{
|
||||
ID: uuid.New().String(),
|
||||
WorkspaceID: workspace.ID,
|
||||
ClusterID: cluster.ID,
|
||||
Namespace: instance.Namespace,
|
||||
ServiceAccount: workspace.K8sSAName,
|
||||
QuotaCPU: workspace.QuotaCPU,
|
||||
QuotaMemory: workspace.QuotaMemory,
|
||||
QuotaGPU: workspace.QuotaGPU,
|
||||
QuotaGPUMem: workspace.QuotaGPUMem,
|
||||
Status: "active",
|
||||
CreatedAt: time.Now(),
|
||||
UpdatedAt: time.Now(),
|
||||
})
|
||||
var usage *repository.ResourceQuotaUsage
|
||||
if s.tenantClient != nil {
|
||||
tenantBinding := tenantBindingFromWorkspaceClusterBinding(binding)
|
||||
quotaUsage, err := s.tenantClient.GetResourceQuotaUsage(ctx, cluster, tenantBinding)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
usage = quotaUsage
|
||||
}
|
||||
return nil
|
||||
result, err := NewQuotaPrecheckService(s.helmClient).EstimateAndCompareBinding(ctx, cluster, binding, usage, target, current)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if errors.Is(err, ErrQuotaExceeded) && result != nil {
|
||||
return fmt.Errorf("%w: %s", ErrQuotaExceeded, formatQuotaExceeded(result.Exceeded))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func formatQuotaExceeded(exceeded []QuotaExceededResource) string {
|
||||
if len(exceeded) == 0 {
|
||||
return "requested resources exceed workspace quota"
|
||||
}
|
||||
parts := make([]string, 0, len(exceeded))
|
||||
for _, item := range exceeded {
|
||||
parts = append(parts, fmt.Sprintf("%s required=%s quota=%s", item.Name, item.Required, item.Hard))
|
||||
}
|
||||
return strings.Join(parts, "; ")
|
||||
}
|
||||
|
||||
func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList {
|
||||
@ -687,6 +854,46 @@ func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList
|
||||
return hard
|
||||
}
|
||||
|
||||
func tenantBindingFromWorkspaceClusterBinding(binding *entity.WorkspaceClusterBinding) entity.TenantBinding {
|
||||
namespace := ""
|
||||
if binding != nil {
|
||||
namespace = binding.Namespace
|
||||
}
|
||||
tenantBinding := entity.NewTenantBinding(namespace)
|
||||
if binding != nil {
|
||||
tenantBinding.ServiceAccountName = binding.ServiceAccount
|
||||
tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding)
|
||||
}
|
||||
return tenantBinding
|
||||
}
|
||||
|
||||
func zeroIfEmptyQuota(value string) string {
|
||||
if strings.TrimSpace(value) == "" {
|
||||
return "0"
|
||||
}
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
|
||||
func cloneInstanceForQuota(instance *entity.Instance) *entity.Instance {
|
||||
if instance == nil {
|
||||
return nil
|
||||
}
|
||||
cloned := *instance
|
||||
cloned.SetValues(copyValues(instance.Values))
|
||||
return &cloned
|
||||
}
|
||||
|
||||
func copyValues(values map[string]interface{}) map[string]interface{} {
|
||||
if values == nil {
|
||||
return nil
|
||||
}
|
||||
copied := make(map[string]interface{}, len(values))
|
||||
for key, value := range values {
|
||||
copied[key] = value
|
||||
}
|
||||
return copied
|
||||
}
|
||||
|
||||
func isReservedNamespace(namespace string) bool {
|
||||
switch namespace {
|
||||
case "default", "kube-system", "kube-public", "kube-node-lease":
|
||||
|
||||
Reference in New Issue
Block a user