fix: scale replicas in response, K8s metrics client, quota precheck, auth tests
- Add GetMetrics method to MetricsClient interface and implement cluster metrics API - Add QuotaPrecheck service for validating resource quotas before deployment - Add auth DTO with role/permission models and auth handler tests - Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics - Update workspace handler with GetWorkspace endpoint and shared-user list - Fix monitoring handler to use correct service method name - Add tail_lines fallback in instance handler for snake_case query params - Update nginx config for SSE log streaming support (no buffering) - Add comprehensive test coverage: auth_service_test, auth_handler_test, auth_dto_test, metrics_client_test, quota_precheck_test - Update error messages for quota validation and instance operations - ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit - InstanceCard: correctly disable scale-minus when replicas <= 0 - SidebarLayout: add hover transition for sidebar items - Update todo.md and lessons.md with latest fixes
This commit is contained in:
400
backend/internal/domain/service/quota_precheck.go
Normal file
400
backend/internal/domain/service/quota_precheck.go
Normal file
@ -0,0 +1,400 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ocdp/cluster-service/internal/domain/entity"
|
||||
"github.com/ocdp/cluster-service/internal/domain/repository"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
|
||||
"k8s.io/apimachinery/pkg/util/yaml"
|
||||
)
|
||||
|
||||
var ErrQuotaExceeded = errors.New("quota exceeded")
|
||||
|
||||
type QuotaExceededResource struct {
|
||||
Name string
|
||||
Required string
|
||||
Hard string
|
||||
}
|
||||
|
||||
type QuotaPrecheckResult struct {
|
||||
Allowed bool
|
||||
Required repository.ResourceEstimate
|
||||
Hard repository.ResourceVector
|
||||
Exceeded []QuotaExceededResource
|
||||
}
|
||||
|
||||
type QuotaPrecheckService struct {
|
||||
helmClient repository.HelmClient
|
||||
}
|
||||
|
||||
func NewQuotaPrecheckService(helmClient repository.HelmClient) *QuotaPrecheckService {
|
||||
return &QuotaPrecheckService{helmClient: helmClient}
|
||||
}
|
||||
|
||||
func (s *QuotaPrecheckService) EstimateAndCompare(ctx context.Context, cluster *entity.Cluster, workspace *entity.Workspace, instance *entity.Instance) (*QuotaPrecheckResult, error) {
|
||||
if s == nil || s.helmClient == nil {
|
||||
return nil, errors.New("quota precheck requires helm client")
|
||||
}
|
||||
estimate, err := s.helmClient.EstimateInstanceResources(ctx, cluster, instance)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result, err := CompareWorkspaceQuota(workspace, estimate)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *QuotaPrecheckService) EstimateAndCompareBinding(ctx context.Context, cluster *entity.Cluster, binding *entity.WorkspaceClusterBinding, usage *repository.ResourceQuotaUsage, target *entity.Instance, current *entity.Instance) (*QuotaPrecheckResult, error) {
|
||||
if s == nil || s.helmClient == nil {
|
||||
return nil, errors.New("quota precheck requires helm client")
|
||||
}
|
||||
targetEstimate, err := s.helmClient.EstimateInstanceResources(ctx, cluster, target)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var currentEstimate *repository.ResourceEstimate
|
||||
if current != nil {
|
||||
currentEstimate, err = s.helmClient.EstimateInstanceResources(ctx, cluster, current)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
result, err := CompareBindingQuota(binding, usage, targetEstimate, currentEstimate)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func CompareWorkspaceQuota(workspace *entity.Workspace, estimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
|
||||
return compareQuotaList(resourceQuotaHard(workspace), nil, estimate, nil)
|
||||
}
|
||||
|
||||
func CompareBindingQuota(binding *entity.WorkspaceClusterBinding, usage *repository.ResourceQuotaUsage, targetEstimate, currentEstimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
|
||||
return compareQuotaList(bindingQuotaHard(binding), usage, targetEstimate, currentEstimate)
|
||||
}
|
||||
|
||||
func compareQuotaList(hardList corev1.ResourceList, usage *repository.ResourceQuotaUsage, targetEstimate, currentEstimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
|
||||
if targetEstimate == nil {
|
||||
targetEstimate = &repository.ResourceEstimate{}
|
||||
}
|
||||
current := effectiveQuotaRequests(currentEstimate)
|
||||
target := effectiveQuotaRequests(targetEstimate)
|
||||
used := repository.ResourceVector{}
|
||||
if usage != nil {
|
||||
used = usage.Used
|
||||
}
|
||||
required := addResourceVector(subtractResourceVectorFloorZero(used, current), target)
|
||||
hard := resourceVectorFromQuotaHard(hardList)
|
||||
result := &QuotaPrecheckResult{
|
||||
Allowed: true,
|
||||
Required: repository.ResourceEstimate{
|
||||
Requests: required,
|
||||
},
|
||||
Hard: hard,
|
||||
}
|
||||
addExceeded := func(name, required, limit string) {
|
||||
result.Allowed = false
|
||||
result.Exceeded = append(result.Exceeded, QuotaExceededResource{
|
||||
Name: name,
|
||||
Required: required,
|
||||
Hard: limit,
|
||||
})
|
||||
}
|
||||
if quantity, ok := hardList[corev1.ResourceName("requests.cpu")]; ok && required.CPU.Cmp(quantity) > 0 {
|
||||
addExceeded("requests.cpu", required.CPU.String(), quantity.String())
|
||||
}
|
||||
if quantity, ok := hardList[corev1.ResourceName("requests.memory")]; ok && required.Memory.Cmp(quantity) > 0 {
|
||||
addExceeded("requests.memory", required.Memory.String(), quantity.String())
|
||||
}
|
||||
if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpu")]; ok && required.GPU > quantity.Value() {
|
||||
addExceeded("requests.nvidia.com/gpu", strconv.FormatInt(required.GPU, 10), quantity.String())
|
||||
}
|
||||
if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpumem")]; ok && required.GPUMemoryMB > quantity.Value() {
|
||||
addExceeded("requests.nvidia.com/gpumem", strconv.FormatInt(required.GPUMemoryMB, 10), quantity.String())
|
||||
}
|
||||
sort.Slice(result.Exceeded, func(i, j int) bool {
|
||||
return result.Exceeded[i].Name < result.Exceeded[j].Name
|
||||
})
|
||||
if !result.Allowed {
|
||||
return result, ErrQuotaExceeded
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func legacyCompareWorkspaceQuota(workspace *entity.Workspace, estimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
|
||||
if estimate == nil {
|
||||
estimate = &repository.ResourceEstimate{}
|
||||
}
|
||||
hardList := resourceQuotaHard(workspace)
|
||||
hard := resourceVectorFromQuotaHard(hardList)
|
||||
result := &QuotaPrecheckResult{
|
||||
Allowed: true,
|
||||
Required: *estimate,
|
||||
Hard: hard,
|
||||
}
|
||||
effectiveRequests := effectiveQuotaRequests(estimate)
|
||||
addExceeded := func(name, required, limit string) {
|
||||
result.Allowed = false
|
||||
result.Exceeded = append(result.Exceeded, QuotaExceededResource{
|
||||
Name: name,
|
||||
Required: required,
|
||||
Hard: limit,
|
||||
})
|
||||
}
|
||||
if quantity, ok := hardList[corev1.ResourceName("requests.cpu")]; ok && effectiveRequests.CPU.Cmp(quantity) > 0 {
|
||||
addExceeded("requests.cpu", effectiveRequests.CPU.String(), quantity.String())
|
||||
}
|
||||
if quantity, ok := hardList[corev1.ResourceName("requests.memory")]; ok && effectiveRequests.Memory.Cmp(quantity) > 0 {
|
||||
addExceeded("requests.memory", effectiveRequests.Memory.String(), quantity.String())
|
||||
}
|
||||
if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpu")]; ok && effectiveRequests.GPU > quantity.Value() {
|
||||
addExceeded("requests.nvidia.com/gpu", strconv.FormatInt(effectiveRequests.GPU, 10), quantity.String())
|
||||
}
|
||||
if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpumem")]; ok && effectiveRequests.GPUMemoryMB > quantity.Value() {
|
||||
addExceeded("requests.nvidia.com/gpumem", strconv.FormatInt(effectiveRequests.GPUMemoryMB, 10), quantity.String())
|
||||
}
|
||||
sort.Slice(result.Exceeded, func(i, j int) bool {
|
||||
return result.Exceeded[i].Name < result.Exceeded[j].Name
|
||||
})
|
||||
if !result.Allowed {
|
||||
return result, ErrQuotaExceeded
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func effectiveQuotaRequests(estimate *repository.ResourceEstimate) repository.ResourceVector {
|
||||
if estimate == nil {
|
||||
return repository.ResourceVector{}
|
||||
}
|
||||
return repository.ResourceVector{
|
||||
CPU: maxQuantity(estimate.Requests.CPU, estimate.Limits.CPU),
|
||||
Memory: maxQuantity(estimate.Requests.Memory, estimate.Limits.Memory),
|
||||
GPU: maxInt64(estimate.Requests.GPU, estimate.Limits.GPU),
|
||||
GPUMemoryMB: maxInt64(estimate.Requests.GPUMemoryMB, estimate.Limits.GPUMemoryMB),
|
||||
}
|
||||
}
|
||||
|
||||
func addResourceVector(left, right repository.ResourceVector) repository.ResourceVector {
|
||||
out := left
|
||||
out.CPU.Add(right.CPU)
|
||||
out.Memory.Add(right.Memory)
|
||||
out.GPU += right.GPU
|
||||
out.GPUMemoryMB += right.GPUMemoryMB
|
||||
return out
|
||||
}
|
||||
|
||||
func subtractResourceVectorFloorZero(left, right repository.ResourceVector) repository.ResourceVector {
|
||||
out := left
|
||||
out.CPU.Sub(right.CPU)
|
||||
if out.CPU.Sign() < 0 {
|
||||
out.CPU = resource.Quantity{}
|
||||
}
|
||||
out.Memory.Sub(right.Memory)
|
||||
if out.Memory.Sign() < 0 {
|
||||
out.Memory = resource.Quantity{}
|
||||
}
|
||||
out.GPU -= right.GPU
|
||||
if out.GPU < 0 {
|
||||
out.GPU = 0
|
||||
}
|
||||
out.GPUMemoryMB -= right.GPUMemoryMB
|
||||
if out.GPUMemoryMB < 0 {
|
||||
out.GPUMemoryMB = 0
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func maxQuantity(left, right resource.Quantity) resource.Quantity {
|
||||
if left.Cmp(right) >= 0 {
|
||||
return left
|
||||
}
|
||||
return right
|
||||
}
|
||||
|
||||
func maxInt64(left, right int64) int64 {
|
||||
if left >= right {
|
||||
return left
|
||||
}
|
||||
return right
|
||||
}
|
||||
|
||||
func EstimateRenderedManifestResources(manifest string) (*repository.ResourceEstimate, error) {
|
||||
decoder := yaml.NewYAMLOrJSONDecoder(strings.NewReader(manifest), 4096)
|
||||
estimate := &repository.ResourceEstimate{}
|
||||
for {
|
||||
var obj unstructured.Unstructured
|
||||
if err := decoder.Decode(&obj); err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
return nil, fmt.Errorf("failed to decode rendered manifest: %w", err)
|
||||
}
|
||||
if obj.GetKind() == "" {
|
||||
continue
|
||||
}
|
||||
podSpec, replicas, ok := podTemplateSpec(obj.Object)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
addPodSpecResources(estimate, podSpec, replicas)
|
||||
}
|
||||
return estimate, nil
|
||||
}
|
||||
|
||||
func resourceVectorFromQuotaHard(hard corev1.ResourceList) repository.ResourceVector {
|
||||
gpu := hard[corev1.ResourceName("requests.nvidia.com/gpu")]
|
||||
gpuMemory := hard[corev1.ResourceName("requests.nvidia.com/gpumem")]
|
||||
return repository.ResourceVector{
|
||||
CPU: hard[corev1.ResourceName("requests.cpu")],
|
||||
Memory: hard[corev1.ResourceName("requests.memory")],
|
||||
GPU: gpu.Value(),
|
||||
GPUMemoryMB: gpuMemory.Value(),
|
||||
}
|
||||
}
|
||||
|
||||
func bindingQuotaHard(binding *entity.WorkspaceClusterBinding) corev1.ResourceList {
|
||||
hard := corev1.ResourceList{}
|
||||
if binding == nil {
|
||||
return hard
|
||||
}
|
||||
addQuantity := func(name corev1.ResourceName, value string) {
|
||||
value = normalizeStandardQuotaQuantity(value)
|
||||
if value == "" {
|
||||
return
|
||||
}
|
||||
if quantity, err := resource.ParseQuantity(value); err == nil {
|
||||
hard[name] = quantity
|
||||
}
|
||||
}
|
||||
addGPUMemoryQuantity := func(value string) {
|
||||
value, err := normalizeGPUMemoryQuota(value)
|
||||
if err != nil || value == "" {
|
||||
return
|
||||
}
|
||||
if quantity, err := resource.ParseQuantity(value); err == nil {
|
||||
hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity
|
||||
}
|
||||
}
|
||||
addQuantity(corev1.ResourceName("requests.cpu"), binding.QuotaCPU)
|
||||
addQuantity(corev1.ResourceName("requests.memory"), binding.QuotaMemory)
|
||||
addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), binding.QuotaGPU)
|
||||
addGPUMemoryQuantity(binding.QuotaGPUMem)
|
||||
return hard
|
||||
}
|
||||
|
||||
func podTemplateSpec(obj map[string]interface{}) (map[string]interface{}, int64, bool) {
|
||||
kind, _, _ := unstructured.NestedString(obj, "kind")
|
||||
switch kind {
|
||||
case "Pod":
|
||||
spec, ok := nestedMap(obj, "spec")
|
||||
return spec, 1, ok
|
||||
case "Deployment", "ReplicaSet", "StatefulSet", "ReplicationController":
|
||||
spec, replicas, ok := workloadTemplateSpec(obj)
|
||||
return spec, replicas, ok
|
||||
case "DaemonSet", "Job":
|
||||
spec, ok := nestedMap(obj, "spec", "template", "spec")
|
||||
return spec, 1, ok
|
||||
case "CronJob":
|
||||
spec, ok := nestedMap(obj, "spec", "jobTemplate", "spec", "template", "spec")
|
||||
return spec, 1, ok
|
||||
default:
|
||||
return nil, 0, false
|
||||
}
|
||||
}
|
||||
|
||||
func workloadTemplateSpec(obj map[string]interface{}) (map[string]interface{}, int64, bool) {
|
||||
spec, ok := nestedMap(obj, "spec", "template", "spec")
|
||||
if !ok {
|
||||
return nil, 0, false
|
||||
}
|
||||
replicas, _, err := unstructured.NestedInt64(obj, "spec", "replicas")
|
||||
if err != nil || replicas < 1 {
|
||||
replicas = 1
|
||||
}
|
||||
return spec, replicas, true
|
||||
}
|
||||
|
||||
func nestedMap(obj map[string]interface{}, fields ...string) (map[string]interface{}, bool) {
|
||||
value, ok, err := unstructured.NestedMap(obj, fields...)
|
||||
return value, ok && err == nil
|
||||
}
|
||||
|
||||
func addPodSpecResources(estimate *repository.ResourceEstimate, podSpec map[string]interface{}, replicas int64) {
|
||||
if replicas < 1 {
|
||||
replicas = 1
|
||||
}
|
||||
for _, field := range []string{"initContainers", "containers"} {
|
||||
containers, ok, err := unstructured.NestedSlice(podSpec, field)
|
||||
if err != nil || !ok {
|
||||
continue
|
||||
}
|
||||
for _, item := range containers {
|
||||
container, ok := item.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
addContainerResourceList(&estimate.Requests, replicas, container, "resources", "requests")
|
||||
addContainerResourceList(&estimate.Limits, replicas, container, "resources", "limits")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func addContainerResourceList(target *repository.ResourceVector, replicas int64, container map[string]interface{}, fields ...string) {
|
||||
resources, ok := nestedMap(container, fields...)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
for name, value := range resources {
|
||||
switch name {
|
||||
case "cpu":
|
||||
addQuantity(&target.CPU, value, replicas)
|
||||
case "memory":
|
||||
addQuantity(&target.Memory, value, replicas)
|
||||
case "nvidia.com/gpu", "requests.nvidia.com/gpu", "limits.nvidia.com/gpu":
|
||||
target.GPU += parseIntegerResource(value) * replicas
|
||||
case "nvidia.com/gpumem", "requests.nvidia.com/gpumem", "limits.nvidia.com/gpumem":
|
||||
target.GPUMemoryMB += parseGPUMemoryResource(value) * replicas
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func addQuantity(target *resource.Quantity, value interface{}, replicas int64) {
|
||||
quantity, err := resource.ParseQuantity(fmt.Sprint(value))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
quantity.Mul(replicas)
|
||||
target.Add(quantity)
|
||||
}
|
||||
|
||||
func parseIntegerResource(value interface{}) int64 {
|
||||
quantity, err := resource.ParseQuantity(fmt.Sprint(value))
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return quantity.Value()
|
||||
}
|
||||
|
||||
func parseGPUMemoryResource(value interface{}) int64 {
|
||||
normalized, err := normalizeGPUMemoryQuota(fmt.Sprint(value))
|
||||
if err != nil || normalized == "" {
|
||||
return 0
|
||||
}
|
||||
parsed, err := strconv.ParseInt(normalized, 10, 64)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return parsed
|
||||
}
|
||||
Reference in New Issue
Block a user