- Add GetMetrics method to MetricsClient interface and implement cluster metrics API - Add QuotaPrecheck service for validating resource quotas before deployment - Add auth DTO with role/permission models and auth handler tests - Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics - Update workspace handler with GetWorkspace endpoint and shared-user list - Fix monitoring handler to use correct service method name - Add tail_lines fallback in instance handler for snake_case query params - Update nginx config for SSE log streaming support (no buffering) - Add comprehensive test coverage: auth_service_test, auth_handler_test, auth_dto_test, metrics_client_test, quota_precheck_test - Update error messages for quota validation and instance operations - ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit - InstanceCard: correctly disable scale-minus when replicas <= 0 - SidebarLayout: add hover transition for sidebar items - Update todo.md and lessons.md with latest fixes
484 lines
16 KiB
Go
484 lines
16 KiB
Go
package k8s
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
authenticationv1 "k8s.io/api/authentication/v1"
|
|
corev1 "k8s.io/api/core/v1"
|
|
rbacv1 "k8s.io/api/rbac/v1"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/client-go/kubernetes"
|
|
"k8s.io/client-go/rest"
|
|
"k8s.io/client-go/tools/clientcmd"
|
|
clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
|
|
|
|
"github.com/ocdp/cluster-service/internal/domain/entity"
|
|
"github.com/ocdp/cluster-service/internal/domain/repository"
|
|
)
|
|
|
|
// TenantClient provisions namespace-scoped tenant Kubernetes resources.
|
|
type TenantClient struct {
|
|
clientset kubernetes.Interface
|
|
}
|
|
|
|
// NewTenantClient creates a tenant provisioning client that builds Kubernetes
|
|
// clients from the supplied cluster entity for each call.
|
|
func NewTenantClient() repository.TenantKubeClient {
|
|
return &TenantClient{}
|
|
}
|
|
|
|
// NewTenantClientForClientset creates a tenant provisioning client for tests or
|
|
// callers that already own a Kubernetes client.
|
|
func NewTenantClientForClientset(clientset kubernetes.Interface) repository.TenantKubeClient {
|
|
return &TenantClient{clientset: clientset}
|
|
}
|
|
|
|
// EnsureTenant idempotently ensures Namespace, ServiceAccount, RoleBinding, and
|
|
// ResourceQuota resources for the tenant binding.
|
|
func (c *TenantClient) EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
|
|
binding = binding.WithDefaults()
|
|
if err := binding.Validate(); err != nil {
|
|
return err
|
|
}
|
|
clientset, _, err := c.clientsetForCluster(cluster)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := c.ensureNamespace(ctx, clientset, binding); err != nil {
|
|
return err
|
|
}
|
|
if err := c.ensureServiceAccount(ctx, clientset, binding); err != nil {
|
|
return err
|
|
}
|
|
if err := c.ensureRoleBinding(ctx, clientset, binding); err != nil {
|
|
return err
|
|
}
|
|
if err := c.ensureResourceQuota(ctx, clientset, binding); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// IssueKubeconfig returns a short-lived kubeconfig backed by a Kubernetes
|
|
// TokenRequest. The token exists only in the returned value and is never stored.
|
|
func (c *TenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) {
|
|
binding = binding.WithDefaults()
|
|
if err := binding.Validate(); err != nil {
|
|
return nil, err
|
|
}
|
|
clientset, restConfig, err := c.clientsetForCluster(cluster)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cappedTTL := entity.TenantTokenTTL(ttl)
|
|
expirationSeconds := int64(cappedTTL.Seconds())
|
|
tokenRequest, err := clientset.CoreV1().
|
|
ServiceAccounts(binding.Namespace).
|
|
CreateToken(ctx, binding.ServiceAccountName, &authenticationv1.TokenRequest{
|
|
Spec: authenticationv1.TokenRequestSpec{
|
|
ExpirationSeconds: &expirationSeconds,
|
|
},
|
|
}, metav1.CreateOptions{})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to request tenant service account token: %w", err)
|
|
}
|
|
if tokenRequest.Status.Token == "" {
|
|
return nil, entity.ErrInvalidTenantKubeconfigToken
|
|
}
|
|
|
|
expiresAt := tokenRequest.Status.ExpirationTimestamp.Time
|
|
if expiresAt.IsZero() {
|
|
expiresAt = time.Now().Add(cappedTTL)
|
|
}
|
|
kubeconfig, err := buildTenantKubeconfig(cluster, restConfig, binding, tokenRequest.Status.Token)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &entity.TenantKubeconfig{
|
|
Kubeconfig: kubeconfig,
|
|
ExpiresAt: expiresAt,
|
|
}, nil
|
|
}
|
|
|
|
func (c *TenantClient) GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*repository.ResourceQuotaUsage, error) {
|
|
binding = binding.WithDefaults()
|
|
if err := binding.Validate(); err != nil {
|
|
return nil, err
|
|
}
|
|
clientset, _, err := c.clientsetForCluster(cluster)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
quota, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get tenant resource quota usage: %w", err)
|
|
}
|
|
return &repository.ResourceQuotaUsage{
|
|
Hard: resourceVectorFromList(quota.Status.Hard),
|
|
Used: resourceVectorFromList(quota.Status.Used),
|
|
}, nil
|
|
}
|
|
|
|
// SuspendTenant revokes tenant API access by deleting only the RoleBinding.
|
|
func (c *TenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
|
|
binding = binding.WithDefaults()
|
|
if err := binding.Validate(); err != nil {
|
|
return err
|
|
}
|
|
clientset, _, err := c.clientsetForCluster(cluster)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = clientset.RbacV1().
|
|
RoleBindings(binding.Namespace).
|
|
Delete(ctx, binding.RoleBindingName, metav1.DeleteOptions{})
|
|
if apierrors.IsNotFound(err) {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("failed to delete tenant role binding: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *TenantClient) DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error {
|
|
binding = binding.WithDefaults()
|
|
if err := binding.Validate(); err != nil {
|
|
return err
|
|
}
|
|
if isProtectedTenantNamespace(binding.Namespace) {
|
|
return entity.ErrProtectedNamespace
|
|
}
|
|
clientset, _, err := c.clientsetForCluster(cluster)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := deleteIgnoringNotFound(ctx, func() error {
|
|
return clientset.RbacV1().RoleBindings(binding.Namespace).Delete(ctx, binding.RoleBindingName, metav1.DeleteOptions{})
|
|
}); err != nil {
|
|
return fmt.Errorf("failed to delete tenant role binding: %w", err)
|
|
}
|
|
if err := deleteIgnoringNotFound(ctx, func() error {
|
|
return clientset.CoreV1().ResourceQuotas(binding.Namespace).Delete(ctx, binding.ResourceQuotaName, metav1.DeleteOptions{})
|
|
}); err != nil {
|
|
return fmt.Errorf("failed to delete tenant resource quota: %w", err)
|
|
}
|
|
if err := deleteIgnoringNotFound(ctx, func() error {
|
|
return clientset.CoreV1().ServiceAccounts(binding.Namespace).Delete(ctx, binding.ServiceAccountName, metav1.DeleteOptions{})
|
|
}); err != nil {
|
|
return fmt.Errorf("failed to delete tenant service account: %w", err)
|
|
}
|
|
namespace, err := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{})
|
|
if apierrors.IsNotFound(err) {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get tenant namespace before deletion: %w", err)
|
|
}
|
|
if namespace.Labels["ocdp.io/managed-by"] != "ocdp" || namespace.Labels["ocdp.io/tenant"] != binding.Namespace {
|
|
return fmt.Errorf("refusing to delete unmanaged namespace %q", binding.Namespace)
|
|
}
|
|
if err := deleteIgnoringNotFound(ctx, func() error {
|
|
return clientset.CoreV1().Namespaces().Delete(ctx, binding.Namespace, metav1.DeleteOptions{})
|
|
}); err != nil {
|
|
return fmt.Errorf("failed to delete tenant namespace: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func deleteIgnoringNotFound(ctx context.Context, deleteFn func() error) error {
|
|
if err := ctx.Err(); err != nil {
|
|
return err
|
|
}
|
|
err := deleteFn()
|
|
if apierrors.IsNotFound(err) {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
func isProtectedTenantNamespace(namespace string) bool {
|
|
switch strings.TrimSpace(namespace) {
|
|
case "", "default", "kube-system", "kube-public", "kube-node-lease":
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func resourceVectorFromList(values corev1.ResourceList) repository.ResourceVector {
|
|
gpu := values[corev1.ResourceName("requests.nvidia.com/gpu")]
|
|
gpuMem := values[corev1.ResourceName("requests.nvidia.com/gpumem")]
|
|
return repository.ResourceVector{
|
|
CPU: values[corev1.ResourceName("requests.cpu")],
|
|
Memory: values[corev1.ResourceName("requests.memory")],
|
|
GPU: gpu.Value(),
|
|
GPUMemoryMB: gpuMem.Value(),
|
|
}
|
|
}
|
|
|
|
func (c *TenantClient) clientsetForCluster(cluster *entity.Cluster) (kubernetes.Interface, *rest.Config, error) {
|
|
if c.clientset != nil {
|
|
config := &rest.Config{Host: "https://kubernetes.default.svc"}
|
|
if cluster != nil {
|
|
clusterConfig, err := restConfigFromCluster(cluster)
|
|
if err == nil {
|
|
config = clusterConfig
|
|
}
|
|
}
|
|
return c.clientset, config, nil
|
|
}
|
|
|
|
config, err := restConfigFromCluster(cluster)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
clientset, err := kubernetes.NewForConfig(config)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("failed to create tenant kubernetes client: %w", err)
|
|
}
|
|
return clientset, config, nil
|
|
}
|
|
|
|
func restConfigFromCluster(cluster *entity.Cluster) (*rest.Config, error) {
|
|
if cluster == nil {
|
|
return nil, entity.ErrInvalidClusterHost
|
|
}
|
|
if looksLikeKubeconfig(cluster.CAData) {
|
|
config, err := clientcmd.RESTConfigFromKubeConfig([]byte(cluster.CAData))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse tenant kubeconfig: %w", err)
|
|
}
|
|
return config, nil
|
|
}
|
|
if strings.TrimSpace(cluster.Host) == "" {
|
|
return nil, entity.ErrInvalidClusterHost
|
|
}
|
|
return &rest.Config{
|
|
Host: cluster.Host,
|
|
TLSClientConfig: rest.TLSClientConfig{
|
|
CAData: decodePossiblyBase64(cluster.CAData),
|
|
CertData: decodePossiblyBase64(cluster.CertData),
|
|
KeyData: decodePossiblyBase64(cluster.KeyData),
|
|
},
|
|
BearerToken: cluster.Token,
|
|
}, nil
|
|
}
|
|
|
|
func (c *TenantClient) ensureNamespace(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error {
|
|
namespace := &corev1.Namespace{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: binding.Namespace,
|
|
Labels: copyStringMap(binding.Labels),
|
|
Annotations: copyStringMap(binding.Annotations),
|
|
},
|
|
}
|
|
_, err := clientset.CoreV1().Namespaces().Create(ctx, namespace, metav1.CreateOptions{})
|
|
if apierrors.IsAlreadyExists(err) {
|
|
current, getErr := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{})
|
|
if getErr != nil {
|
|
return fmt.Errorf("failed to get tenant namespace: %w", getErr)
|
|
}
|
|
mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations)
|
|
if _, updateErr := clientset.CoreV1().Namespaces().Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil {
|
|
return fmt.Errorf("failed to update tenant namespace: %w", updateErr)
|
|
}
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create tenant namespace: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *TenantClient) ensureServiceAccount(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error {
|
|
serviceAccount := &corev1.ServiceAccount{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: binding.ServiceAccountName,
|
|
Namespace: binding.Namespace,
|
|
Labels: copyStringMap(binding.Labels),
|
|
Annotations: copyStringMap(binding.Annotations),
|
|
},
|
|
}
|
|
_, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Create(ctx, serviceAccount, metav1.CreateOptions{})
|
|
if apierrors.IsAlreadyExists(err) {
|
|
current, getErr := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{})
|
|
if getErr != nil {
|
|
return fmt.Errorf("failed to get tenant service account: %w", getErr)
|
|
}
|
|
mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations)
|
|
if _, updateErr := clientset.CoreV1().ServiceAccounts(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil {
|
|
return fmt.Errorf("failed to update tenant service account: %w", updateErr)
|
|
}
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create tenant service account: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *TenantClient) ensureRoleBinding(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error {
|
|
roleBinding := desiredRoleBinding(binding)
|
|
_, err := clientset.RbacV1().RoleBindings(binding.Namespace).Create(ctx, roleBinding, metav1.CreateOptions{})
|
|
if apierrors.IsAlreadyExists(err) {
|
|
current, getErr := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{})
|
|
if getErr != nil {
|
|
return fmt.Errorf("failed to get tenant role binding: %w", getErr)
|
|
}
|
|
mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations)
|
|
current.Subjects = roleBinding.Subjects
|
|
current.RoleRef = roleBinding.RoleRef
|
|
if _, updateErr := clientset.RbacV1().RoleBindings(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil {
|
|
return fmt.Errorf("failed to update tenant role binding: %w", updateErr)
|
|
}
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create tenant role binding: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *TenantClient) ensureResourceQuota(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error {
|
|
resourceQuota := &corev1.ResourceQuota{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: binding.ResourceQuotaName,
|
|
Namespace: binding.Namespace,
|
|
Labels: copyStringMap(binding.Labels),
|
|
Annotations: copyStringMap(binding.Annotations),
|
|
},
|
|
Spec: corev1.ResourceQuotaSpec{
|
|
Hard: binding.ResourceQuotaHard.DeepCopy(),
|
|
},
|
|
}
|
|
_, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Create(ctx, resourceQuota, metav1.CreateOptions{})
|
|
if apierrors.IsAlreadyExists(err) {
|
|
current, getErr := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{})
|
|
if getErr != nil {
|
|
return fmt.Errorf("failed to get tenant resource quota: %w", getErr)
|
|
}
|
|
mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations)
|
|
current.Spec.Hard = binding.ResourceQuotaHard.DeepCopy()
|
|
if _, updateErr := clientset.CoreV1().ResourceQuotas(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil {
|
|
return fmt.Errorf("failed to update tenant resource quota: %w", updateErr)
|
|
}
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create tenant resource quota: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func desiredRoleBinding(binding entity.TenantBinding) *rbacv1.RoleBinding {
|
|
return &rbacv1.RoleBinding{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: binding.RoleBindingName,
|
|
Namespace: binding.Namespace,
|
|
Labels: copyStringMap(binding.Labels),
|
|
Annotations: copyStringMap(binding.Annotations),
|
|
},
|
|
Subjects: []rbacv1.Subject{{
|
|
Kind: rbacv1.ServiceAccountKind,
|
|
Name: binding.ServiceAccountName,
|
|
Namespace: binding.Namespace,
|
|
}},
|
|
RoleRef: rbacv1.RoleRef{
|
|
APIGroup: rbacv1.GroupName,
|
|
Kind: "ClusterRole",
|
|
Name: binding.ClusterRoleName,
|
|
},
|
|
}
|
|
}
|
|
|
|
func buildTenantKubeconfig(cluster *entity.Cluster, restConfig *rest.Config, binding entity.TenantBinding, token string) (string, error) {
|
|
host := ""
|
|
var caData []byte
|
|
if restConfig != nil {
|
|
host = restConfig.Host
|
|
caData = append([]byte{}, restConfig.CAData...)
|
|
}
|
|
if host == "" && cluster != nil {
|
|
host = cluster.Host
|
|
}
|
|
if len(caData) == 0 && cluster != nil {
|
|
caData = decodePossiblyBase64(cluster.CAData)
|
|
}
|
|
if host == "" {
|
|
return "", entity.ErrInvalidClusterHost
|
|
}
|
|
|
|
clusterName := "tenant-cluster"
|
|
if cluster != nil && cluster.Name != "" {
|
|
clusterName = cluster.Name
|
|
}
|
|
userName := binding.ServiceAccountName
|
|
contextName := fmt.Sprintf("%s/%s", clusterName, binding.Namespace)
|
|
config := clientcmdapi.NewConfig()
|
|
config.Clusters[clusterName] = &clientcmdapi.Cluster{
|
|
Server: host,
|
|
CertificateAuthorityData: caData,
|
|
}
|
|
config.AuthInfos[userName] = &clientcmdapi.AuthInfo{
|
|
Token: token,
|
|
}
|
|
config.Contexts[contextName] = &clientcmdapi.Context{
|
|
Cluster: clusterName,
|
|
AuthInfo: userName,
|
|
Namespace: binding.Namespace,
|
|
}
|
|
config.CurrentContext = contextName
|
|
|
|
bytes, err := clientcmd.Write(*config)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to build tenant kubeconfig: %w", err)
|
|
}
|
|
return string(bytes), nil
|
|
}
|
|
|
|
func mergeObjectMetadata(meta *metav1.ObjectMeta, labels, annotations map[string]string) {
|
|
if len(labels) > 0 && meta.Labels == nil {
|
|
meta.Labels = map[string]string{}
|
|
}
|
|
for key, value := range labels {
|
|
meta.Labels[key] = value
|
|
}
|
|
if len(annotations) > 0 && meta.Annotations == nil {
|
|
meta.Annotations = map[string]string{}
|
|
}
|
|
for key, value := range annotations {
|
|
meta.Annotations[key] = value
|
|
}
|
|
}
|
|
|
|
func copyStringMap(values map[string]string) map[string]string {
|
|
if len(values) == 0 {
|
|
return nil
|
|
}
|
|
copied := make(map[string]string, len(values))
|
|
for key, value := range values {
|
|
copied[key] = value
|
|
}
|
|
return copied
|
|
}
|
|
|
|
func decodePossiblyBase64(value string) []byte {
|
|
decoded, err := base64.StdEncoding.DecodeString(value)
|
|
if err == nil {
|
|
return decoded
|
|
}
|
|
return []byte(value)
|
|
}
|
|
|
|
func looksLikeKubeconfig(value string) bool {
|
|
trimmed := strings.TrimSpace(value)
|
|
return strings.HasPrefix(trimmed, "apiVersion:") || strings.HasPrefix(trimmed, "kind: Config")
|
|
}
|