package k8s import ( "context" "encoding/base64" "fmt" "strings" "time" authenticationv1 "k8s.io/api/authentication/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" "github.com/ocdp/cluster-service/internal/domain/entity" "github.com/ocdp/cluster-service/internal/domain/repository" ) // TenantClient provisions namespace-scoped tenant Kubernetes resources. type TenantClient struct { clientset kubernetes.Interface } // NewTenantClient creates a tenant provisioning client that builds Kubernetes // clients from the supplied cluster entity for each call. func NewTenantClient() repository.TenantKubeClient { return &TenantClient{} } // NewTenantClientForClientset creates a tenant provisioning client for tests or // callers that already own a Kubernetes client. func NewTenantClientForClientset(clientset kubernetes.Interface) repository.TenantKubeClient { return &TenantClient{clientset: clientset} } // EnsureTenant idempotently ensures Namespace, ServiceAccount, RoleBinding, and // ResourceQuota resources for the tenant binding. func (c *TenantClient) EnsureTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { binding = binding.WithDefaults() if err := binding.Validate(); err != nil { return err } clientset, _, err := c.clientsetForCluster(cluster) if err != nil { return err } if err := c.ensureNamespace(ctx, clientset, binding); err != nil { return err } if err := c.ensureServiceAccount(ctx, clientset, binding); err != nil { return err } if err := c.ensureRoleBinding(ctx, clientset, binding); err != nil { return err } if err := c.ensureResourceQuota(ctx, clientset, binding); err != nil { return err } return nil } // IssueKubeconfig returns a short-lived kubeconfig backed by a Kubernetes // TokenRequest. The token exists only in the returned value and is never stored. func (c *TenantClient) IssueKubeconfig(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding, ttl time.Duration) (*entity.TenantKubeconfig, error) { binding = binding.WithDefaults() if err := binding.Validate(); err != nil { return nil, err } clientset, restConfig, err := c.clientsetForCluster(cluster) if err != nil { return nil, err } cappedTTL := entity.TenantTokenTTL(ttl) expirationSeconds := int64(cappedTTL.Seconds()) tokenRequest, err := clientset.CoreV1(). ServiceAccounts(binding.Namespace). CreateToken(ctx, binding.ServiceAccountName, &authenticationv1.TokenRequest{ Spec: authenticationv1.TokenRequestSpec{ ExpirationSeconds: &expirationSeconds, }, }, metav1.CreateOptions{}) if err != nil { return nil, fmt.Errorf("failed to request tenant service account token: %w", err) } if tokenRequest.Status.Token == "" { return nil, entity.ErrInvalidTenantKubeconfigToken } expiresAt := tokenRequest.Status.ExpirationTimestamp.Time if expiresAt.IsZero() { expiresAt = time.Now().Add(cappedTTL) } kubeconfig, err := buildTenantKubeconfig(cluster, restConfig, binding, tokenRequest.Status.Token) if err != nil { return nil, err } return &entity.TenantKubeconfig{ Kubeconfig: kubeconfig, ExpiresAt: expiresAt, }, nil } func (c *TenantClient) GetResourceQuotaUsage(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) (*repository.ResourceQuotaUsage, error) { binding = binding.WithDefaults() if err := binding.Validate(); err != nil { return nil, err } clientset, _, err := c.clientsetForCluster(cluster) if err != nil { return nil, err } quota, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}) if err != nil { return nil, fmt.Errorf("failed to get tenant resource quota usage: %w", err) } return &repository.ResourceQuotaUsage{ Hard: resourceVectorFromList(quota.Status.Hard), Used: resourceVectorFromList(quota.Status.Used), }, nil } // SuspendTenant revokes tenant API access by deleting only the RoleBinding. func (c *TenantClient) SuspendTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { binding = binding.WithDefaults() if err := binding.Validate(); err != nil { return err } clientset, _, err := c.clientsetForCluster(cluster) if err != nil { return err } err = clientset.RbacV1(). RoleBindings(binding.Namespace). Delete(ctx, binding.RoleBindingName, metav1.DeleteOptions{}) if apierrors.IsNotFound(err) { return nil } if err != nil { return fmt.Errorf("failed to delete tenant role binding: %w", err) } return nil } func (c *TenantClient) DeleteTenant(ctx context.Context, cluster *entity.Cluster, binding entity.TenantBinding) error { binding = binding.WithDefaults() if err := binding.Validate(); err != nil { return err } if isProtectedTenantNamespace(binding.Namespace) { return entity.ErrProtectedNamespace } clientset, _, err := c.clientsetForCluster(cluster) if err != nil { return err } if err := deleteIgnoringNotFound(ctx, func() error { return clientset.RbacV1().RoleBindings(binding.Namespace).Delete(ctx, binding.RoleBindingName, metav1.DeleteOptions{}) }); err != nil { return fmt.Errorf("failed to delete tenant role binding: %w", err) } if err := deleteIgnoringNotFound(ctx, func() error { return clientset.CoreV1().ResourceQuotas(binding.Namespace).Delete(ctx, binding.ResourceQuotaName, metav1.DeleteOptions{}) }); err != nil { return fmt.Errorf("failed to delete tenant resource quota: %w", err) } if err := deleteIgnoringNotFound(ctx, func() error { return clientset.CoreV1().ServiceAccounts(binding.Namespace).Delete(ctx, binding.ServiceAccountName, metav1.DeleteOptions{}) }); err != nil { return fmt.Errorf("failed to delete tenant service account: %w", err) } namespace, err := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{}) if apierrors.IsNotFound(err) { return nil } if err != nil { return fmt.Errorf("failed to get tenant namespace before deletion: %w", err) } if namespace.Labels["ocdp.io/managed-by"] != "ocdp" || namespace.Labels["ocdp.io/tenant"] != binding.Namespace { return fmt.Errorf("refusing to delete unmanaged namespace %q", binding.Namespace) } if err := deleteIgnoringNotFound(ctx, func() error { return clientset.CoreV1().Namespaces().Delete(ctx, binding.Namespace, metav1.DeleteOptions{}) }); err != nil { return fmt.Errorf("failed to delete tenant namespace: %w", err) } return nil } func deleteIgnoringNotFound(ctx context.Context, deleteFn func() error) error { if err := ctx.Err(); err != nil { return err } err := deleteFn() if apierrors.IsNotFound(err) { return nil } return err } func isProtectedTenantNamespace(namespace string) bool { switch strings.TrimSpace(namespace) { case "", "default", "kube-system", "kube-public", "kube-node-lease": return true default: return false } } func resourceVectorFromList(values corev1.ResourceList) repository.ResourceVector { gpu := values[corev1.ResourceName("requests.nvidia.com/gpu")] gpuMem := values[corev1.ResourceName("requests.nvidia.com/gpumem")] return repository.ResourceVector{ CPU: values[corev1.ResourceName("requests.cpu")], Memory: values[corev1.ResourceName("requests.memory")], GPU: gpu.Value(), GPUMemoryMB: gpuMem.Value(), } } func (c *TenantClient) clientsetForCluster(cluster *entity.Cluster) (kubernetes.Interface, *rest.Config, error) { if c.clientset != nil { config := &rest.Config{Host: "https://kubernetes.default.svc"} if cluster != nil { clusterConfig, err := restConfigFromCluster(cluster) if err == nil { config = clusterConfig } } return c.clientset, config, nil } config, err := restConfigFromCluster(cluster) if err != nil { return nil, nil, err } clientset, err := kubernetes.NewForConfig(config) if err != nil { return nil, nil, fmt.Errorf("failed to create tenant kubernetes client: %w", err) } return clientset, config, nil } func restConfigFromCluster(cluster *entity.Cluster) (*rest.Config, error) { if cluster == nil { return nil, entity.ErrInvalidClusterHost } if looksLikeKubeconfig(cluster.CAData) { config, err := clientcmd.RESTConfigFromKubeConfig([]byte(cluster.CAData)) if err != nil { return nil, fmt.Errorf("failed to parse tenant kubeconfig: %w", err) } return config, nil } if strings.TrimSpace(cluster.Host) == "" { return nil, entity.ErrInvalidClusterHost } return &rest.Config{ Host: cluster.Host, TLSClientConfig: rest.TLSClientConfig{ CAData: decodePossiblyBase64(cluster.CAData), CertData: decodePossiblyBase64(cluster.CertData), KeyData: decodePossiblyBase64(cluster.KeyData), }, BearerToken: cluster.Token, }, nil } func (c *TenantClient) ensureNamespace(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { namespace := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: binding.Namespace, Labels: copyStringMap(binding.Labels), Annotations: copyStringMap(binding.Annotations), }, } _, err := clientset.CoreV1().Namespaces().Create(ctx, namespace, metav1.CreateOptions{}) if apierrors.IsAlreadyExists(err) { current, getErr := clientset.CoreV1().Namespaces().Get(ctx, binding.Namespace, metav1.GetOptions{}) if getErr != nil { return fmt.Errorf("failed to get tenant namespace: %w", getErr) } mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) if _, updateErr := clientset.CoreV1().Namespaces().Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { return fmt.Errorf("failed to update tenant namespace: %w", updateErr) } return nil } if err != nil { return fmt.Errorf("failed to create tenant namespace: %w", err) } return nil } func (c *TenantClient) ensureServiceAccount(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { serviceAccount := &corev1.ServiceAccount{ ObjectMeta: metav1.ObjectMeta{ Name: binding.ServiceAccountName, Namespace: binding.Namespace, Labels: copyStringMap(binding.Labels), Annotations: copyStringMap(binding.Annotations), }, } _, err := clientset.CoreV1().ServiceAccounts(binding.Namespace).Create(ctx, serviceAccount, metav1.CreateOptions{}) if apierrors.IsAlreadyExists(err) { current, getErr := clientset.CoreV1().ServiceAccounts(binding.Namespace).Get(ctx, binding.ServiceAccountName, metav1.GetOptions{}) if getErr != nil { return fmt.Errorf("failed to get tenant service account: %w", getErr) } mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) if _, updateErr := clientset.CoreV1().ServiceAccounts(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { return fmt.Errorf("failed to update tenant service account: %w", updateErr) } return nil } if err != nil { return fmt.Errorf("failed to create tenant service account: %w", err) } return nil } func (c *TenantClient) ensureRoleBinding(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { roleBinding := desiredRoleBinding(binding) _, err := clientset.RbacV1().RoleBindings(binding.Namespace).Create(ctx, roleBinding, metav1.CreateOptions{}) if apierrors.IsAlreadyExists(err) { current, getErr := clientset.RbacV1().RoleBindings(binding.Namespace).Get(ctx, binding.RoleBindingName, metav1.GetOptions{}) if getErr != nil { return fmt.Errorf("failed to get tenant role binding: %w", getErr) } mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) current.Subjects = roleBinding.Subjects current.RoleRef = roleBinding.RoleRef if _, updateErr := clientset.RbacV1().RoleBindings(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { return fmt.Errorf("failed to update tenant role binding: %w", updateErr) } return nil } if err != nil { return fmt.Errorf("failed to create tenant role binding: %w", err) } return nil } func (c *TenantClient) ensureResourceQuota(ctx context.Context, clientset kubernetes.Interface, binding entity.TenantBinding) error { resourceQuota := &corev1.ResourceQuota{ ObjectMeta: metav1.ObjectMeta{ Name: binding.ResourceQuotaName, Namespace: binding.Namespace, Labels: copyStringMap(binding.Labels), Annotations: copyStringMap(binding.Annotations), }, Spec: corev1.ResourceQuotaSpec{ Hard: binding.ResourceQuotaHard.DeepCopy(), }, } _, err := clientset.CoreV1().ResourceQuotas(binding.Namespace).Create(ctx, resourceQuota, metav1.CreateOptions{}) if apierrors.IsAlreadyExists(err) { current, getErr := clientset.CoreV1().ResourceQuotas(binding.Namespace).Get(ctx, binding.ResourceQuotaName, metav1.GetOptions{}) if getErr != nil { return fmt.Errorf("failed to get tenant resource quota: %w", getErr) } mergeObjectMetadata(¤t.ObjectMeta, binding.Labels, binding.Annotations) current.Spec.Hard = binding.ResourceQuotaHard.DeepCopy() if _, updateErr := clientset.CoreV1().ResourceQuotas(binding.Namespace).Update(ctx, current, metav1.UpdateOptions{}); updateErr != nil { return fmt.Errorf("failed to update tenant resource quota: %w", updateErr) } return nil } if err != nil { return fmt.Errorf("failed to create tenant resource quota: %w", err) } return nil } func desiredRoleBinding(binding entity.TenantBinding) *rbacv1.RoleBinding { return &rbacv1.RoleBinding{ ObjectMeta: metav1.ObjectMeta{ Name: binding.RoleBindingName, Namespace: binding.Namespace, Labels: copyStringMap(binding.Labels), Annotations: copyStringMap(binding.Annotations), }, Subjects: []rbacv1.Subject{{ Kind: rbacv1.ServiceAccountKind, Name: binding.ServiceAccountName, Namespace: binding.Namespace, }}, RoleRef: rbacv1.RoleRef{ APIGroup: rbacv1.GroupName, Kind: "ClusterRole", Name: binding.ClusterRoleName, }, } } func buildTenantKubeconfig(cluster *entity.Cluster, restConfig *rest.Config, binding entity.TenantBinding, token string) (string, error) { host := "" var caData []byte if restConfig != nil { host = restConfig.Host caData = append([]byte{}, restConfig.CAData...) } if host == "" && cluster != nil { host = cluster.Host } if len(caData) == 0 && cluster != nil { caData = decodePossiblyBase64(cluster.CAData) } if host == "" { return "", entity.ErrInvalidClusterHost } clusterName := "tenant-cluster" if cluster != nil && cluster.Name != "" { clusterName = cluster.Name } userName := binding.ServiceAccountName contextName := fmt.Sprintf("%s/%s", clusterName, binding.Namespace) config := clientcmdapi.NewConfig() config.Clusters[clusterName] = &clientcmdapi.Cluster{ Server: host, CertificateAuthorityData: caData, } config.AuthInfos[userName] = &clientcmdapi.AuthInfo{ Token: token, } config.Contexts[contextName] = &clientcmdapi.Context{ Cluster: clusterName, AuthInfo: userName, Namespace: binding.Namespace, } config.CurrentContext = contextName bytes, err := clientcmd.Write(*config) if err != nil { return "", fmt.Errorf("failed to build tenant kubeconfig: %w", err) } return string(bytes), nil } func mergeObjectMetadata(meta *metav1.ObjectMeta, labels, annotations map[string]string) { if len(labels) > 0 && meta.Labels == nil { meta.Labels = map[string]string{} } for key, value := range labels { meta.Labels[key] = value } if len(annotations) > 0 && meta.Annotations == nil { meta.Annotations = map[string]string{} } for key, value := range annotations { meta.Annotations[key] = value } } func copyStringMap(values map[string]string) map[string]string { if len(values) == 0 { return nil } copied := make(map[string]string, len(values)) for key, value := range values { copied[key] = value } return copied } func decodePossiblyBase64(value string) []byte { decoded, err := base64.StdEncoding.DecodeString(value) if err == nil { return decoded } return []byte(value) } func looksLikeKubeconfig(value string) bool { trimmed := strings.TrimSpace(value) return strings.HasPrefix(trimmed, "apiVersion:") || strings.HasPrefix(trimmed, "kind: Config") }