fix: scale replicas in response, K8s metrics client, quota precheck, auth tests

- Add GetMetrics method to MetricsClient interface and implement cluster metrics API
- Add QuotaPrecheck service for validating resource quotas before deployment
- Add auth DTO with role/permission models and auth handler tests
- Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics
- Update workspace handler with GetWorkspace endpoint and shared-user list
- Fix monitoring handler to use correct service method name
- Add tail_lines fallback in instance handler for snake_case query params
- Update nginx config for SSE log streaming support (no buffering)
- Add comprehensive test coverage: auth_service_test, auth_handler_test,
  auth_dto_test, metrics_client_test, quota_precheck_test
- Update error messages for quota validation and instance operations
- ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit
- InstanceCard: correctly disable scale-minus when replicas <= 0
- SidebarLayout: add hover transition for sidebar items
- Update todo.md and lessons.md with latest fixes
This commit is contained in:
Ivan087
2026-05-20 16:56:29 +08:00
parent 8f90cf0f0d
commit 33ddaf97db
59 changed files with 4805 additions and 457 deletions

View File

@ -6,6 +6,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/google/uuid"
@ -34,6 +35,7 @@ type InstanceService struct {
entryClient repository.InstanceEntryClient
diagClient repository.InstanceDiagnosticsClient
workspaceRepo repository.WorkspaceRepository
userRepo repository.UserRepository
tenantClient repository.TenantKubeClient
scaleClient ScaleClient
}
@ -76,6 +78,10 @@ func (s *InstanceService) SetTenantProvisioning(workspaceRepo repository.Workspa
s.tenantClient = tenantClient
}
func (s *InstanceService) SetUserRepository(userRepo repository.UserRepository) {
s.userRepo = userRepo
}
const chartCacheDir = "/tmp/charts"
func (s *InstanceService) chartArchivePath(instance *entity.Instance) string {
@ -131,15 +137,21 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
return err
}
enforceNamespaceValues(instance)
if err := s.ensureTenantForInstance(ctx, principal, cluster, instance); err != nil {
return err
}
// 检查实例是否已存在
existingInstance, _ := s.instanceRepo.GetByClusterAndName(ctx, instance.ClusterID, instance.Name)
if existingInstance != nil {
return entity.ErrInstanceExists
}
if err := s.downloadChart(ctx, registry, instance); err != nil {
return err
}
binding, err := s.ensureTenantForInstance(ctx, principal, cluster, instance)
if err != nil {
return err
}
if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, instance, nil); err != nil {
return err
}
instance.BeginOperation(entity.OperationInstall, "Preparing installation")
@ -148,13 +160,6 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
return err
}
// 下载 chart artifact 供 Helm 使用
if err := s.downloadChart(ctx, registry, instance); err != nil {
instance.MarkFailure("Failed to download chart", err)
_ = s.instanceRepo.Update(ctx, instance)
return err
}
// 异步执行 Helm 安装并监控状态
go s.executeAndSyncInstall(context.Background(), instance.ID, cluster, registry, instance)
@ -175,6 +180,7 @@ func (s *InstanceService) GetInstance(ctx context.Context, id string) (*entity.I
if !s.canReadInstance(principal, instance) {
return nil, entity.ErrInstanceNotFound
}
s.enrichOwnerUsernames(ctx, []*entity.Instance{instance})
return instance, nil
}
@ -219,8 +225,22 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
if !s.canWriteInstance(principal, existingInstance) {
return entity.ErrForbidden
}
instance.ClusterID = existingInstance.ClusterID
instance.WorkspaceID = existingInstance.WorkspaceID
instance.OwnerID = existingInstance.OwnerID
instance.Name = existingInstance.Name
if instance.RegistryID == "" {
instance.RegistryID = existingInstance.RegistryID
}
if instance.Repository == "" {
instance.Repository = existingInstance.Repository
}
if instance.Chart == "" {
instance.Chart = existingInstance.Chart
}
if instance.Version == "" {
instance.Version = existingInstance.Version
}
// 获取集群信息
cluster, err := s.clusterRepo.GetByID(ctx, existingInstance.ClusterID)
@ -236,15 +256,21 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
instance.Namespace = existingInstance.Namespace
enforceNamespaceValues(instance)
instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade")
if err := s.instanceRepo.Update(ctx, instance); err != nil {
return err
}
// 下载所需 Chart
if err := s.downloadChart(ctx, registry, instance); err != nil {
instance.MarkFailure("Failed to download chart", err)
_ = s.instanceRepo.Update(ctx, instance)
return err
}
binding, err := s.ensureTenantForInstance(ctx, principal, cluster, instance)
if err != nil {
return err
}
if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, instance, existingInstance); err != nil {
return err
}
instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade")
if err := s.instanceRepo.Update(ctx, instance); err != nil {
return err
}
@ -364,9 +390,32 @@ func (s *InstanceService) ListInstancesByCluster(ctx context.Context, clusterID
visible = append(visible, instance)
}
}
s.enrichOwnerUsernames(ctx, visible)
return visible, nil
}
func (s *InstanceService) enrichOwnerUsernames(ctx context.Context, instances []*entity.Instance) {
if s.userRepo == nil || len(instances) == 0 {
return
}
usernames := make(map[string]string)
for _, instance := range instances {
if instance == nil || instance.OwnerID == "" {
continue
}
if username, ok := usernames[instance.OwnerID]; ok {
instance.OwnerUsername = username
continue
}
user, err := s.userRepo.GetByID(ctx, instance.OwnerID)
if err != nil || user == nil {
continue
}
usernames[instance.OwnerID] = user.Username
instance.OwnerUsername = user.Username
}
}
// ListInstanceEntries 列出实例关联的入口信息Service / Ingress
func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, instanceID string) ([]*entity.InstanceEntry, error) {
instance, err := s.GetInstance(ctx, instanceID)
@ -442,27 +491,57 @@ func (s *InstanceService) ScaleInstance(ctx context.Context, clusterID, instance
if !s.canWriteInstance(principal, instance) {
return nil, entity.ErrForbidden
}
if instance.ClusterID != clusterID {
return nil, entity.ErrInstanceNotFound
}
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
if err != nil {
return nil, entity.ErrClusterNotFound
}
current := cloneInstanceForQuota(instance)
currentValues, err := s.helmClient.GetValues(ctx, cluster, instance.Name, instance.Namespace)
if err == nil && currentValues != nil {
current.SetValues(currentValues)
}
target := cloneInstanceForQuota(instance)
targetValues := copyValues(current.Values)
if targetValues == nil {
targetValues = copyValues(instance.Values)
}
if targetValues == nil {
targetValues = map[string]interface{}{}
}
targetValues["replicaCount"] = replicas
target.SetValues(targetValues)
registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID)
if err != nil {
return nil, entity.ErrRegistryNotFound
}
if err := s.downloadChart(ctx, registry, target); err != nil {
return nil, err
}
binding, err := s.ensureTenantForInstance(ctx, principal, cluster, target)
if err != nil {
return nil, err
}
if err := s.precheckInstanceQuota(ctx, principal, cluster, binding, target, current); err != nil {
return nil, err
}
// Scale via K8s API directly (like kubectl scale deploy --replicas=N)
if s.scaleClient != nil {
if err := s.scaleClient.ScaleDeployment(ctx, cluster, instance.Namespace, instance.Name, int32(replicas)); err != nil {
return nil, fmt.Errorf("failed to scale deployment: %w", err)
}
instance.SetValues(targetValues)
instance.Replicas = replicas
if err := s.instanceRepo.Update(ctx, instance); err != nil {
return nil, err
}
} else {
// Fallback: Helm upgrade with replicaCount
vals, err := s.helmClient.GetValues(ctx, cluster, instance.Name, instance.Namespace)
if err != nil {
return nil, fmt.Errorf("failed to get current values: %w", err)
}
if vals == nil {
vals = make(map[string]interface{})
}
vals["replicaCount"] = replicas
instance.SetValues(vals)
instance.SetValues(targetValues)
instance.BeginOperation(entity.OperationUpgrade, fmt.Sprintf("Scaling to %d replicas", replicas))
if err := s.instanceRepo.Update(ctx, instance); err != nil {
return nil, err
@ -516,6 +595,9 @@ func (s *InstanceService) GetInstanceValuesDiff(ctx context.Context, clusterID,
if !s.canReadInstance(principal, instance) {
return nil, entity.ErrInstanceNotFound
}
if instance.ClusterID != clusterID {
return nil, entity.ErrInstanceNotFound
}
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
if err != nil {
return nil, entity.ErrClusterNotFound
@ -528,6 +610,18 @@ func (s *InstanceService) GetInstanceValuesDiff(ctx context.Context, clusterID,
// Get default values from the chart archive
chartPath := s.chartArchivePath(instance)
if _, statErr := os.Stat(chartPath); statErr != nil {
if !errors.Is(statErr, os.ErrNotExist) {
return nil, fmt.Errorf("failed to inspect chart defaults: %w", statErr)
}
registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID)
if err != nil {
return nil, entity.ErrRegistryNotFound
}
if err := s.downloadChart(ctx, registry, instance); err != nil {
return nil, err
}
}
defaults, err := s.helmClient.GetChartDefaultValues(chartPath)
if err != nil {
return nil, fmt.Errorf("failed to read chart defaults: %w", err)
@ -593,9 +687,6 @@ func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *a
}
return nil
}
if isReservedNamespace(instance.Namespace) {
return entity.ErrInvalidNamespace
}
if cluster.Visibility != authz.VisibilityPrivate || cluster.OwnerID != principal.UserID {
namespace := principal.Namespace
if namespace == "" {
@ -606,9 +697,15 @@ func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *a
namespace = binding.Namespace
}
}
if instance.Namespace != "" && instance.Namespace != namespace {
return entity.ErrForbidden
}
instance.Namespace = namespace
return nil
}
if isReservedNamespace(instance.Namespace) {
return entity.ErrInvalidNamespace
}
if instance.Namespace == "" {
if cluster.DefaultNamespace != "" {
instance.Namespace = cluster.DefaultNamespace
@ -621,8 +718,62 @@ func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *a
return nil
}
func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error {
func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) (*entity.WorkspaceClusterBinding, error) {
if principal.IsAdmin() || s.workspaceRepo == nil || s.tenantClient == nil {
return nil, nil
}
workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
if err != nil {
return nil, err
}
if workspace.Status == entity.WorkspaceSuspended {
return nil, entity.ErrWorkspaceSuspended
}
binding := &entity.WorkspaceClusterBinding{
ID: uuid.New().String(),
WorkspaceID: workspace.ID,
ClusterID: cluster.ID,
Namespace: instance.Namespace,
ServiceAccount: workspace.K8sSAName,
QuotaCPU: strings.TrimSpace(workspace.QuotaCPU),
QuotaMemory: strings.TrimSpace(workspace.QuotaMemory),
QuotaGPU: zeroIfEmptyQuota(workspace.QuotaGPU),
QuotaGPUMem: zeroIfEmptyQuota(workspace.QuotaGPUMem),
Status: "active",
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
if s.bindingRepo != nil {
if existing, err := s.bindingRepo.Get(ctx, workspace.ID, cluster.ID); err == nil && existing != nil {
binding.ID = existing.ID
binding.CreatedAt = existing.CreatedAt
if existing.Namespace != "" {
binding.Namespace = existing.Namespace
instance.Namespace = existing.Namespace
enforceNamespaceValues(instance)
}
if existing.ServiceAccount != "" {
binding.ServiceAccount = existing.ServiceAccount
}
if existing.Status != "" {
binding.Status = existing.Status
}
}
}
tenantBinding := tenantBindingFromWorkspaceClusterBinding(binding)
if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil {
return nil, err
}
if s.bindingRepo != nil {
if err := s.bindingRepo.Upsert(ctx, binding); err != nil {
return nil, err
}
}
return binding, nil
}
func (s *InstanceService) precheckInstanceQuota(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, binding *entity.WorkspaceClusterBinding, target, current *entity.Instance) error {
if principal.IsAdmin() || s.workspaceRepo == nil || s.helmClient == nil {
return nil
}
workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
@ -632,29 +783,45 @@ func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal
if workspace.Status == entity.WorkspaceSuspended {
return entity.ErrWorkspaceSuspended
}
binding := entity.NewTenantBinding(instance.Namespace)
binding.ServiceAccountName = workspace.K8sSAName
binding.ResourceQuotaHard = instanceResourceQuotaHard(workspace)
if err := s.tenantClient.EnsureTenant(ctx, cluster, binding); err != nil {
return err
if binding == nil {
binding = &entity.WorkspaceClusterBinding{
WorkspaceID: principal.WorkspaceID,
ClusterID: cluster.ID,
Namespace: target.Namespace,
QuotaCPU: strings.TrimSpace(workspace.QuotaCPU),
QuotaMemory: strings.TrimSpace(workspace.QuotaMemory),
QuotaGPU: zeroIfEmptyQuota(workspace.QuotaGPU),
QuotaGPUMem: zeroIfEmptyQuota(workspace.QuotaGPUMem),
}
}
if s.bindingRepo != nil {
_ = s.bindingRepo.Upsert(ctx, &entity.WorkspaceClusterBinding{
ID: uuid.New().String(),
WorkspaceID: workspace.ID,
ClusterID: cluster.ID,
Namespace: instance.Namespace,
ServiceAccount: workspace.K8sSAName,
QuotaCPU: workspace.QuotaCPU,
QuotaMemory: workspace.QuotaMemory,
QuotaGPU: workspace.QuotaGPU,
QuotaGPUMem: workspace.QuotaGPUMem,
Status: "active",
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
})
var usage *repository.ResourceQuotaUsage
if s.tenantClient != nil {
tenantBinding := tenantBindingFromWorkspaceClusterBinding(binding)
quotaUsage, err := s.tenantClient.GetResourceQuotaUsage(ctx, cluster, tenantBinding)
if err != nil {
return err
}
usage = quotaUsage
}
return nil
result, err := NewQuotaPrecheckService(s.helmClient).EstimateAndCompareBinding(ctx, cluster, binding, usage, target, current)
if err == nil {
return nil
}
if errors.Is(err, ErrQuotaExceeded) && result != nil {
return fmt.Errorf("%w: %s", ErrQuotaExceeded, formatQuotaExceeded(result.Exceeded))
}
return err
}
func formatQuotaExceeded(exceeded []QuotaExceededResource) string {
if len(exceeded) == 0 {
return "requested resources exceed workspace quota"
}
parts := make([]string, 0, len(exceeded))
for _, item := range exceeded {
parts = append(parts, fmt.Sprintf("%s required=%s quota=%s", item.Name, item.Required, item.Hard))
}
return strings.Join(parts, "; ")
}
func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList {
@ -687,6 +854,46 @@ func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList
return hard
}
func tenantBindingFromWorkspaceClusterBinding(binding *entity.WorkspaceClusterBinding) entity.TenantBinding {
namespace := ""
if binding != nil {
namespace = binding.Namespace
}
tenantBinding := entity.NewTenantBinding(namespace)
if binding != nil {
tenantBinding.ServiceAccountName = binding.ServiceAccount
tenantBinding.ResourceQuotaHard = bindingQuotaHard(binding)
}
return tenantBinding
}
func zeroIfEmptyQuota(value string) string {
if strings.TrimSpace(value) == "" {
return "0"
}
return strings.TrimSpace(value)
}
func cloneInstanceForQuota(instance *entity.Instance) *entity.Instance {
if instance == nil {
return nil
}
cloned := *instance
cloned.SetValues(copyValues(instance.Values))
return &cloned
}
func copyValues(values map[string]interface{}) map[string]interface{} {
if values == nil {
return nil
}
copied := make(map[string]interface{}, len(values))
for key, value := range values {
copied[key] = value
}
return copied
}
func isReservedNamespace(namespace string) bool {
switch namespace {
case "default", "kube-system", "kube-public", "kube-node-lease":