fix: scale replicas in response, K8s metrics client, quota precheck, auth tests

- Add GetMetrics method to MetricsClient interface and implement cluster metrics API
- Add QuotaPrecheck service for validating resource quotas before deployment
- Add auth DTO with role/permission models and auth handler tests
- Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics
- Update workspace handler with GetWorkspace endpoint and shared-user list
- Fix monitoring handler to use correct service method name
- Add tail_lines fallback in instance handler for snake_case query params
- Update nginx config for SSE log streaming support (no buffering)
- Add comprehensive test coverage: auth_service_test, auth_handler_test,
  auth_dto_test, metrics_client_test, quota_precheck_test
- Update error messages for quota validation and instance operations
- ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit
- InstanceCard: correctly disable scale-minus when replicas <= 0
- SidebarLayout: add hover transition for sidebar items
- Update todo.md and lessons.md with latest fixes
This commit is contained in:
Ivan087
2026-05-20 16:56:29 +08:00
parent 8f90cf0f0d
commit 33ddaf97db
59 changed files with 4805 additions and 457 deletions

View File

@ -0,0 +1,400 @@
package service
import (
"context"
"errors"
"fmt"
"io"
"sort"
"strconv"
"strings"
"github.com/ocdp/cluster-service/internal/domain/entity"
"github.com/ocdp/cluster-service/internal/domain/repository"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/util/yaml"
)
var ErrQuotaExceeded = errors.New("quota exceeded")
type QuotaExceededResource struct {
Name string
Required string
Hard string
}
type QuotaPrecheckResult struct {
Allowed bool
Required repository.ResourceEstimate
Hard repository.ResourceVector
Exceeded []QuotaExceededResource
}
type QuotaPrecheckService struct {
helmClient repository.HelmClient
}
func NewQuotaPrecheckService(helmClient repository.HelmClient) *QuotaPrecheckService {
return &QuotaPrecheckService{helmClient: helmClient}
}
func (s *QuotaPrecheckService) EstimateAndCompare(ctx context.Context, cluster *entity.Cluster, workspace *entity.Workspace, instance *entity.Instance) (*QuotaPrecheckResult, error) {
if s == nil || s.helmClient == nil {
return nil, errors.New("quota precheck requires helm client")
}
estimate, err := s.helmClient.EstimateInstanceResources(ctx, cluster, instance)
if err != nil {
return nil, err
}
result, err := CompareWorkspaceQuota(workspace, estimate)
if err != nil {
return result, err
}
return result, nil
}
func (s *QuotaPrecheckService) EstimateAndCompareBinding(ctx context.Context, cluster *entity.Cluster, binding *entity.WorkspaceClusterBinding, usage *repository.ResourceQuotaUsage, target *entity.Instance, current *entity.Instance) (*QuotaPrecheckResult, error) {
if s == nil || s.helmClient == nil {
return nil, errors.New("quota precheck requires helm client")
}
targetEstimate, err := s.helmClient.EstimateInstanceResources(ctx, cluster, target)
if err != nil {
return nil, err
}
var currentEstimate *repository.ResourceEstimate
if current != nil {
currentEstimate, err = s.helmClient.EstimateInstanceResources(ctx, cluster, current)
if err != nil {
return nil, err
}
}
result, err := CompareBindingQuota(binding, usage, targetEstimate, currentEstimate)
if err != nil {
return result, err
}
return result, nil
}
func CompareWorkspaceQuota(workspace *entity.Workspace, estimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
return compareQuotaList(resourceQuotaHard(workspace), nil, estimate, nil)
}
func CompareBindingQuota(binding *entity.WorkspaceClusterBinding, usage *repository.ResourceQuotaUsage, targetEstimate, currentEstimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
return compareQuotaList(bindingQuotaHard(binding), usage, targetEstimate, currentEstimate)
}
func compareQuotaList(hardList corev1.ResourceList, usage *repository.ResourceQuotaUsage, targetEstimate, currentEstimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
if targetEstimate == nil {
targetEstimate = &repository.ResourceEstimate{}
}
current := effectiveQuotaRequests(currentEstimate)
target := effectiveQuotaRequests(targetEstimate)
used := repository.ResourceVector{}
if usage != nil {
used = usage.Used
}
required := addResourceVector(subtractResourceVectorFloorZero(used, current), target)
hard := resourceVectorFromQuotaHard(hardList)
result := &QuotaPrecheckResult{
Allowed: true,
Required: repository.ResourceEstimate{
Requests: required,
},
Hard: hard,
}
addExceeded := func(name, required, limit string) {
result.Allowed = false
result.Exceeded = append(result.Exceeded, QuotaExceededResource{
Name: name,
Required: required,
Hard: limit,
})
}
if quantity, ok := hardList[corev1.ResourceName("requests.cpu")]; ok && required.CPU.Cmp(quantity) > 0 {
addExceeded("requests.cpu", required.CPU.String(), quantity.String())
}
if quantity, ok := hardList[corev1.ResourceName("requests.memory")]; ok && required.Memory.Cmp(quantity) > 0 {
addExceeded("requests.memory", required.Memory.String(), quantity.String())
}
if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpu")]; ok && required.GPU > quantity.Value() {
addExceeded("requests.nvidia.com/gpu", strconv.FormatInt(required.GPU, 10), quantity.String())
}
if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpumem")]; ok && required.GPUMemoryMB > quantity.Value() {
addExceeded("requests.nvidia.com/gpumem", strconv.FormatInt(required.GPUMemoryMB, 10), quantity.String())
}
sort.Slice(result.Exceeded, func(i, j int) bool {
return result.Exceeded[i].Name < result.Exceeded[j].Name
})
if !result.Allowed {
return result, ErrQuotaExceeded
}
return result, nil
}
func legacyCompareWorkspaceQuota(workspace *entity.Workspace, estimate *repository.ResourceEstimate) (*QuotaPrecheckResult, error) {
if estimate == nil {
estimate = &repository.ResourceEstimate{}
}
hardList := resourceQuotaHard(workspace)
hard := resourceVectorFromQuotaHard(hardList)
result := &QuotaPrecheckResult{
Allowed: true,
Required: *estimate,
Hard: hard,
}
effectiveRequests := effectiveQuotaRequests(estimate)
addExceeded := func(name, required, limit string) {
result.Allowed = false
result.Exceeded = append(result.Exceeded, QuotaExceededResource{
Name: name,
Required: required,
Hard: limit,
})
}
if quantity, ok := hardList[corev1.ResourceName("requests.cpu")]; ok && effectiveRequests.CPU.Cmp(quantity) > 0 {
addExceeded("requests.cpu", effectiveRequests.CPU.String(), quantity.String())
}
if quantity, ok := hardList[corev1.ResourceName("requests.memory")]; ok && effectiveRequests.Memory.Cmp(quantity) > 0 {
addExceeded("requests.memory", effectiveRequests.Memory.String(), quantity.String())
}
if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpu")]; ok && effectiveRequests.GPU > quantity.Value() {
addExceeded("requests.nvidia.com/gpu", strconv.FormatInt(effectiveRequests.GPU, 10), quantity.String())
}
if quantity, ok := hardList[corev1.ResourceName("requests.nvidia.com/gpumem")]; ok && effectiveRequests.GPUMemoryMB > quantity.Value() {
addExceeded("requests.nvidia.com/gpumem", strconv.FormatInt(effectiveRequests.GPUMemoryMB, 10), quantity.String())
}
sort.Slice(result.Exceeded, func(i, j int) bool {
return result.Exceeded[i].Name < result.Exceeded[j].Name
})
if !result.Allowed {
return result, ErrQuotaExceeded
}
return result, nil
}
func effectiveQuotaRequests(estimate *repository.ResourceEstimate) repository.ResourceVector {
if estimate == nil {
return repository.ResourceVector{}
}
return repository.ResourceVector{
CPU: maxQuantity(estimate.Requests.CPU, estimate.Limits.CPU),
Memory: maxQuantity(estimate.Requests.Memory, estimate.Limits.Memory),
GPU: maxInt64(estimate.Requests.GPU, estimate.Limits.GPU),
GPUMemoryMB: maxInt64(estimate.Requests.GPUMemoryMB, estimate.Limits.GPUMemoryMB),
}
}
func addResourceVector(left, right repository.ResourceVector) repository.ResourceVector {
out := left
out.CPU.Add(right.CPU)
out.Memory.Add(right.Memory)
out.GPU += right.GPU
out.GPUMemoryMB += right.GPUMemoryMB
return out
}
func subtractResourceVectorFloorZero(left, right repository.ResourceVector) repository.ResourceVector {
out := left
out.CPU.Sub(right.CPU)
if out.CPU.Sign() < 0 {
out.CPU = resource.Quantity{}
}
out.Memory.Sub(right.Memory)
if out.Memory.Sign() < 0 {
out.Memory = resource.Quantity{}
}
out.GPU -= right.GPU
if out.GPU < 0 {
out.GPU = 0
}
out.GPUMemoryMB -= right.GPUMemoryMB
if out.GPUMemoryMB < 0 {
out.GPUMemoryMB = 0
}
return out
}
func maxQuantity(left, right resource.Quantity) resource.Quantity {
if left.Cmp(right) >= 0 {
return left
}
return right
}
func maxInt64(left, right int64) int64 {
if left >= right {
return left
}
return right
}
func EstimateRenderedManifestResources(manifest string) (*repository.ResourceEstimate, error) {
decoder := yaml.NewYAMLOrJSONDecoder(strings.NewReader(manifest), 4096)
estimate := &repository.ResourceEstimate{}
for {
var obj unstructured.Unstructured
if err := decoder.Decode(&obj); err != nil {
if errors.Is(err, io.EOF) {
break
}
return nil, fmt.Errorf("failed to decode rendered manifest: %w", err)
}
if obj.GetKind() == "" {
continue
}
podSpec, replicas, ok := podTemplateSpec(obj.Object)
if !ok {
continue
}
addPodSpecResources(estimate, podSpec, replicas)
}
return estimate, nil
}
func resourceVectorFromQuotaHard(hard corev1.ResourceList) repository.ResourceVector {
gpu := hard[corev1.ResourceName("requests.nvidia.com/gpu")]
gpuMemory := hard[corev1.ResourceName("requests.nvidia.com/gpumem")]
return repository.ResourceVector{
CPU: hard[corev1.ResourceName("requests.cpu")],
Memory: hard[corev1.ResourceName("requests.memory")],
GPU: gpu.Value(),
GPUMemoryMB: gpuMemory.Value(),
}
}
func bindingQuotaHard(binding *entity.WorkspaceClusterBinding) corev1.ResourceList {
hard := corev1.ResourceList{}
if binding == nil {
return hard
}
addQuantity := func(name corev1.ResourceName, value string) {
value = normalizeStandardQuotaQuantity(value)
if value == "" {
return
}
if quantity, err := resource.ParseQuantity(value); err == nil {
hard[name] = quantity
}
}
addGPUMemoryQuantity := func(value string) {
value, err := normalizeGPUMemoryQuota(value)
if err != nil || value == "" {
return
}
if quantity, err := resource.ParseQuantity(value); err == nil {
hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity
}
}
addQuantity(corev1.ResourceName("requests.cpu"), binding.QuotaCPU)
addQuantity(corev1.ResourceName("requests.memory"), binding.QuotaMemory)
addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), binding.QuotaGPU)
addGPUMemoryQuantity(binding.QuotaGPUMem)
return hard
}
func podTemplateSpec(obj map[string]interface{}) (map[string]interface{}, int64, bool) {
kind, _, _ := unstructured.NestedString(obj, "kind")
switch kind {
case "Pod":
spec, ok := nestedMap(obj, "spec")
return spec, 1, ok
case "Deployment", "ReplicaSet", "StatefulSet", "ReplicationController":
spec, replicas, ok := workloadTemplateSpec(obj)
return spec, replicas, ok
case "DaemonSet", "Job":
spec, ok := nestedMap(obj, "spec", "template", "spec")
return spec, 1, ok
case "CronJob":
spec, ok := nestedMap(obj, "spec", "jobTemplate", "spec", "template", "spec")
return spec, 1, ok
default:
return nil, 0, false
}
}
func workloadTemplateSpec(obj map[string]interface{}) (map[string]interface{}, int64, bool) {
spec, ok := nestedMap(obj, "spec", "template", "spec")
if !ok {
return nil, 0, false
}
replicas, _, err := unstructured.NestedInt64(obj, "spec", "replicas")
if err != nil || replicas < 1 {
replicas = 1
}
return spec, replicas, true
}
func nestedMap(obj map[string]interface{}, fields ...string) (map[string]interface{}, bool) {
value, ok, err := unstructured.NestedMap(obj, fields...)
return value, ok && err == nil
}
func addPodSpecResources(estimate *repository.ResourceEstimate, podSpec map[string]interface{}, replicas int64) {
if replicas < 1 {
replicas = 1
}
for _, field := range []string{"initContainers", "containers"} {
containers, ok, err := unstructured.NestedSlice(podSpec, field)
if err != nil || !ok {
continue
}
for _, item := range containers {
container, ok := item.(map[string]interface{})
if !ok {
continue
}
addContainerResourceList(&estimate.Requests, replicas, container, "resources", "requests")
addContainerResourceList(&estimate.Limits, replicas, container, "resources", "limits")
}
}
}
func addContainerResourceList(target *repository.ResourceVector, replicas int64, container map[string]interface{}, fields ...string) {
resources, ok := nestedMap(container, fields...)
if !ok {
return
}
for name, value := range resources {
switch name {
case "cpu":
addQuantity(&target.CPU, value, replicas)
case "memory":
addQuantity(&target.Memory, value, replicas)
case "nvidia.com/gpu", "requests.nvidia.com/gpu", "limits.nvidia.com/gpu":
target.GPU += parseIntegerResource(value) * replicas
case "nvidia.com/gpumem", "requests.nvidia.com/gpumem", "limits.nvidia.com/gpumem":
target.GPUMemoryMB += parseGPUMemoryResource(value) * replicas
}
}
}
func addQuantity(target *resource.Quantity, value interface{}, replicas int64) {
quantity, err := resource.ParseQuantity(fmt.Sprint(value))
if err != nil {
return
}
quantity.Mul(replicas)
target.Add(quantity)
}
func parseIntegerResource(value interface{}) int64 {
quantity, err := resource.ParseQuantity(fmt.Sprint(value))
if err != nil {
return 0
}
return quantity.Value()
}
func parseGPUMemoryResource(value interface{}) int64 {
normalized, err := normalizeGPUMemoryQuota(fmt.Sprint(value))
if err != nil || normalized == "" {
return 0
}
parsed, err := strconv.ParseInt(normalized, 10, 64)
if err != nil {
return 0
}
return parsed
}