refactor: full-stack restructure with multi-tenancy, workspace management, and K8s diagnostics
- Add Workspace domain (entity, repository, service, handler, DTO) - Add multi-tenant K8s client with tenant binding and quota management - Add K8s diagnostics client (instance diagnostics) - Add authorization middleware (authz package) - Restructure frontend to feature-based architecture (features/) - Add User Management page in configuration - Add AccessDenied page and route guards - Refactor shared components (form inputs, layout, UI) - Update Tailwind config for new design system - Add comprehensive documentation (docs/, tasks/, plans) - Improve cluster service with better kubeconfig handling - Add tests for crypto, config, helm client, tenant binding
This commit is contained in:
@ -11,16 +11,23 @@ import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/ocdp/cluster-service/internal/domain/entity"
|
||||
"github.com/ocdp/cluster-service/internal/domain/repository"
|
||||
"github.com/ocdp/cluster-service/internal/pkg/authz"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
)
|
||||
|
||||
// InstanceService Helm 实例管理领域服务
|
||||
type InstanceService struct {
|
||||
instanceRepo repository.InstanceRepository
|
||||
clusterRepo repository.ClusterRepository
|
||||
registryRepo repository.RegistryRepository
|
||||
helmClient repository.HelmClient
|
||||
ociClient repository.OCIClient
|
||||
entryClient repository.InstanceEntryClient
|
||||
instanceRepo repository.InstanceRepository
|
||||
clusterRepo repository.ClusterRepository
|
||||
registryRepo repository.RegistryRepository
|
||||
bindingRepo repository.WorkspaceClusterBindingRepository
|
||||
helmClient repository.HelmClient
|
||||
ociClient repository.OCIClient
|
||||
entryClient repository.InstanceEntryClient
|
||||
diagClient repository.InstanceDiagnosticsClient
|
||||
workspaceRepo repository.WorkspaceRepository
|
||||
tenantClient repository.TenantKubeClient
|
||||
}
|
||||
|
||||
// NewInstanceService 创建实例服务
|
||||
@ -31,17 +38,32 @@ func NewInstanceService(
|
||||
helmClient repository.HelmClient,
|
||||
ociClient repository.OCIClient,
|
||||
entryClient repository.InstanceEntryClient,
|
||||
bindingRepo ...repository.WorkspaceClusterBindingRepository,
|
||||
) *InstanceService {
|
||||
var workspaceBindingRepo repository.WorkspaceClusterBindingRepository
|
||||
if len(bindingRepo) > 0 {
|
||||
workspaceBindingRepo = bindingRepo[0]
|
||||
}
|
||||
return &InstanceService{
|
||||
instanceRepo: instanceRepo,
|
||||
clusterRepo: clusterRepo,
|
||||
registryRepo: registryRepo,
|
||||
bindingRepo: workspaceBindingRepo,
|
||||
helmClient: helmClient,
|
||||
ociClient: ociClient,
|
||||
entryClient: entryClient,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *InstanceService) SetDiagnosticsClient(client repository.InstanceDiagnosticsClient) {
|
||||
s.diagClient = client
|
||||
}
|
||||
|
||||
func (s *InstanceService) SetTenantProvisioning(workspaceRepo repository.WorkspaceRepository, tenantClient repository.TenantKubeClient) {
|
||||
s.workspaceRepo = workspaceRepo
|
||||
s.tenantClient = tenantClient
|
||||
}
|
||||
|
||||
const chartCacheDir = "/tmp/charts"
|
||||
|
||||
func (s *InstanceService) chartArchivePath(instance *entity.Instance) string {
|
||||
@ -62,8 +84,14 @@ func (s *InstanceService) downloadChart(ctx context.Context, registry *entity.Re
|
||||
|
||||
// CreateInstance 创建(安装)新实例
|
||||
func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.Instance) error {
|
||||
principal, err := authz.RequirePrincipal(ctx)
|
||||
if err != nil {
|
||||
return entity.ErrUnauthorized
|
||||
}
|
||||
// 生成 ID
|
||||
instance.ID = uuid.New().String()
|
||||
instance.WorkspaceID = principal.WorkspaceID
|
||||
instance.OwnerID = principal.UserID
|
||||
|
||||
// 验证
|
||||
if err := instance.Validate(); err != nil {
|
||||
@ -75,12 +103,25 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
|
||||
if err != nil {
|
||||
return entity.ErrClusterNotFound
|
||||
}
|
||||
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
|
||||
return entity.ErrClusterNotFound
|
||||
}
|
||||
|
||||
// 检查 Registry 是否存在
|
||||
registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID)
|
||||
if err != nil {
|
||||
return entity.ErrRegistryNotFound
|
||||
}
|
||||
if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) {
|
||||
return entity.ErrRegistryNotFound
|
||||
}
|
||||
if err := s.applyNamespacePolicy(ctx, principal, cluster, instance); err != nil {
|
||||
return err
|
||||
}
|
||||
enforceNamespaceValues(instance)
|
||||
if err := s.ensureTenantForInstance(ctx, principal, cluster, instance); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 检查实例是否已存在
|
||||
existingInstance, _ := s.instanceRepo.GetByClusterAndName(ctx, instance.ClusterID, instance.Name)
|
||||
@ -111,13 +152,24 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
|
||||
|
||||
// GetInstance 获取实例
|
||||
func (s *InstanceService) GetInstance(ctx context.Context, id string) (*entity.Instance, error) {
|
||||
return s.instanceRepo.GetByID(ctx, id)
|
||||
principal, err := authz.RequirePrincipal(ctx)
|
||||
if err != nil {
|
||||
return nil, entity.ErrUnauthorized
|
||||
}
|
||||
instance, err := s.instanceRepo.GetByID(ctx, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !s.canReadInstance(principal, instance) {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
return instance, nil
|
||||
}
|
||||
|
||||
// GetInstanceStatus 获取实例实时状态
|
||||
func (s *InstanceService) GetInstanceStatus(ctx context.Context, id string) (*entity.Instance, error) {
|
||||
// 从数据库获取基本信息
|
||||
instance, err := s.instanceRepo.GetByID(ctx, id)
|
||||
instance, err := s.GetInstance(ctx, id)
|
||||
if err != nil {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
@ -143,11 +195,20 @@ func (s *InstanceService) GetInstanceStatus(ctx context.Context, id string) (*en
|
||||
|
||||
// UpdateInstance 更新(升级)实例
|
||||
func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.Instance) error {
|
||||
principal, err := authz.RequirePrincipal(ctx)
|
||||
if err != nil {
|
||||
return entity.ErrUnauthorized
|
||||
}
|
||||
// 检查实例是否存在
|
||||
existingInstance, err := s.instanceRepo.GetByID(ctx, instance.ID)
|
||||
if err != nil {
|
||||
return entity.ErrInstanceNotFound
|
||||
}
|
||||
if !s.canWriteInstance(principal, existingInstance) {
|
||||
return entity.ErrForbidden
|
||||
}
|
||||
instance.WorkspaceID = existingInstance.WorkspaceID
|
||||
instance.OwnerID = existingInstance.OwnerID
|
||||
|
||||
// 获取集群信息
|
||||
cluster, err := s.clusterRepo.GetByID(ctx, existingInstance.ClusterID)
|
||||
@ -161,6 +222,8 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
|
||||
return entity.ErrRegistryNotFound
|
||||
}
|
||||
|
||||
instance.Namespace = existingInstance.Namespace
|
||||
enforceNamespaceValues(instance)
|
||||
instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade")
|
||||
if err := s.instanceRepo.Update(ctx, instance); err != nil {
|
||||
return err
|
||||
@ -182,11 +245,18 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
|
||||
|
||||
// DeleteInstance 删除(卸载)实例
|
||||
func (s *InstanceService) DeleteInstance(ctx context.Context, id string) error {
|
||||
principal, err := authz.RequirePrincipal(ctx)
|
||||
if err != nil {
|
||||
return entity.ErrUnauthorized
|
||||
}
|
||||
// 检查实例是否存在
|
||||
instance, err := s.instanceRepo.GetByID(ctx, id)
|
||||
if err != nil {
|
||||
return entity.ErrInstanceNotFound
|
||||
}
|
||||
if !s.canWriteInstance(principal, instance) {
|
||||
return entity.ErrForbidden
|
||||
}
|
||||
|
||||
// 获取集群信息
|
||||
cluster, err := s.clusterRepo.GetByID(ctx, instance.ClusterID)
|
||||
@ -208,11 +278,18 @@ func (s *InstanceService) DeleteInstance(ctx context.Context, id string) error {
|
||||
|
||||
// RollbackInstance 回滚实例
|
||||
func (s *InstanceService) RollbackInstance(ctx context.Context, id string, revision int) error {
|
||||
principal, err := authz.RequirePrincipal(ctx)
|
||||
if err != nil {
|
||||
return entity.ErrUnauthorized
|
||||
}
|
||||
// 检查实例是否存在
|
||||
instance, err := s.instanceRepo.GetByID(ctx, id)
|
||||
if err != nil {
|
||||
return entity.ErrInstanceNotFound
|
||||
}
|
||||
if !s.canWriteInstance(principal, instance) {
|
||||
return entity.ErrForbidden
|
||||
}
|
||||
|
||||
// 获取集群信息
|
||||
cluster, err := s.clusterRepo.GetByID(ctx, instance.ClusterID)
|
||||
@ -235,7 +312,7 @@ func (s *InstanceService) RollbackInstance(ctx context.Context, id string, revis
|
||||
// GetInstanceHistory 获取实例历史
|
||||
func (s *InstanceService) GetInstanceHistory(ctx context.Context, id string) ([]*entity.ReleaseHistory, error) {
|
||||
// 检查实例是否存在
|
||||
instance, err := s.instanceRepo.GetByID(ctx, id)
|
||||
instance, err := s.GetInstance(ctx, id)
|
||||
if err != nil {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
@ -252,18 +329,35 @@ func (s *InstanceService) GetInstanceHistory(ctx context.Context, id string) ([]
|
||||
|
||||
// ListInstancesByCluster 列出集群的所有实例
|
||||
func (s *InstanceService) ListInstancesByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) {
|
||||
principal, err := authz.RequirePrincipal(ctx)
|
||||
if err != nil {
|
||||
return nil, entity.ErrUnauthorized
|
||||
}
|
||||
// 检查集群是否存在
|
||||
_, err := s.clusterRepo.GetByID(ctx, clusterID)
|
||||
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
|
||||
if err != nil {
|
||||
return nil, entity.ErrClusterNotFound
|
||||
}
|
||||
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
|
||||
return nil, entity.ErrClusterNotFound
|
||||
}
|
||||
|
||||
return s.instanceRepo.ListByCluster(ctx, clusterID)
|
||||
instances, err := s.instanceRepo.ListByCluster(ctx, clusterID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
visible := make([]*entity.Instance, 0, len(instances))
|
||||
for _, instance := range instances {
|
||||
if s.canReadInstance(principal, instance) {
|
||||
visible = append(visible, instance)
|
||||
}
|
||||
}
|
||||
return visible, nil
|
||||
}
|
||||
|
||||
// ListInstanceEntries 列出实例关联的入口信息(Service / Ingress)
|
||||
func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, instanceID string) ([]*entity.InstanceEntry, error) {
|
||||
instance, err := s.instanceRepo.GetByID(ctx, instanceID)
|
||||
instance, err := s.GetInstance(ctx, instanceID)
|
||||
if err != nil {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
@ -283,6 +377,187 @@ func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, in
|
||||
return s.entryClient.ListEntries(ctx, cluster, instance)
|
||||
}
|
||||
|
||||
func (s *InstanceService) GetInstanceDiagnostics(ctx context.Context, clusterID, instanceID string, tailLines int64) (*entity.InstanceDiagnostics, error) {
|
||||
instance, err := s.GetInstance(ctx, instanceID)
|
||||
if err != nil {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
if instance.ClusterID != clusterID {
|
||||
return nil, entity.ErrInstanceNotFound
|
||||
}
|
||||
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
|
||||
if err != nil {
|
||||
return nil, entity.ErrClusterNotFound
|
||||
}
|
||||
if s.diagClient == nil {
|
||||
return nil, fmt.Errorf("instance diagnostics client is not configured")
|
||||
}
|
||||
return s.diagClient.GetDiagnostics(ctx, cluster, instance, tailLines)
|
||||
}
|
||||
|
||||
func (s *InstanceService) canReadInstance(principal *authz.Principal, instance *entity.Instance) bool {
|
||||
if principal.IsAdmin() {
|
||||
return true
|
||||
}
|
||||
return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID
|
||||
}
|
||||
|
||||
func (s *InstanceService) canWriteInstance(principal *authz.Principal, instance *entity.Instance) bool {
|
||||
if principal.IsAdmin() {
|
||||
return true
|
||||
}
|
||||
return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID
|
||||
}
|
||||
|
||||
func enforceNamespaceValues(instance *entity.Instance) {
|
||||
if instance == nil || instance.Namespace == "" {
|
||||
return
|
||||
}
|
||||
if instance.Values == nil {
|
||||
instance.Values = map[string]interface{}{}
|
||||
}
|
||||
instance.Values["namespace"] = instance.Namespace
|
||||
setExistingStringValue(instance.Values, "namespaceOverride", instance.Namespace)
|
||||
setExistingStringValue(instance.Values, "targetNamespace", instance.Namespace)
|
||||
setExistingNestedStringValue(instance.Values, "global", "namespace", instance.Namespace)
|
||||
setExistingNestedStringValue(instance.Values, "global", "namespaceOverride", instance.Namespace)
|
||||
}
|
||||
|
||||
func setExistingStringValue(values map[string]interface{}, key, namespace string) {
|
||||
if _, ok := values[key]; ok {
|
||||
values[key] = namespace
|
||||
}
|
||||
}
|
||||
|
||||
func setExistingNestedStringValue(values map[string]interface{}, parent, key, namespace string) {
|
||||
child, ok := values[parent].(map[string]interface{})
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if _, ok := child[key]; ok {
|
||||
child[key] = namespace
|
||||
}
|
||||
}
|
||||
|
||||
func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error {
|
||||
if principal.IsAdmin() {
|
||||
if isProtectedSystemNamespace(instance.Namespace) {
|
||||
return entity.ErrInvalidNamespace
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if isReservedNamespace(instance.Namespace) {
|
||||
return entity.ErrInvalidNamespace
|
||||
}
|
||||
if cluster.Visibility != authz.VisibilityPrivate || cluster.OwnerID != principal.UserID {
|
||||
namespace := principal.Namespace
|
||||
if namespace == "" {
|
||||
namespace = entity.NamespaceForWorkspace(principal.WorkspaceName)
|
||||
}
|
||||
if s.bindingRepo != nil {
|
||||
if binding, err := s.bindingRepo.Get(ctx, principal.WorkspaceID, cluster.ID); err == nil && binding != nil && binding.Namespace != "" {
|
||||
namespace = binding.Namespace
|
||||
}
|
||||
}
|
||||
instance.Namespace = namespace
|
||||
return nil
|
||||
}
|
||||
if instance.Namespace == "" {
|
||||
if cluster.DefaultNamespace != "" {
|
||||
instance.Namespace = cluster.DefaultNamespace
|
||||
} else if principal.Namespace != "" {
|
||||
instance.Namespace = principal.Namespace
|
||||
} else {
|
||||
instance.Namespace = entity.NamespaceForWorkspace(principal.Username)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error {
|
||||
if principal.IsAdmin() || s.workspaceRepo == nil || s.tenantClient == nil {
|
||||
return nil
|
||||
}
|
||||
workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if workspace.Status == entity.WorkspaceSuspended {
|
||||
return entity.ErrWorkspaceSuspended
|
||||
}
|
||||
binding := entity.NewTenantBinding(instance.Namespace)
|
||||
binding.ServiceAccountName = workspace.K8sSAName
|
||||
binding.ResourceQuotaHard = instanceResourceQuotaHard(workspace)
|
||||
if err := s.tenantClient.EnsureTenant(ctx, cluster, binding); err != nil {
|
||||
return err
|
||||
}
|
||||
if s.bindingRepo != nil {
|
||||
_ = s.bindingRepo.Upsert(ctx, &entity.WorkspaceClusterBinding{
|
||||
ID: uuid.New().String(),
|
||||
WorkspaceID: workspace.ID,
|
||||
ClusterID: cluster.ID,
|
||||
Namespace: instance.Namespace,
|
||||
ServiceAccount: workspace.K8sSAName,
|
||||
QuotaCPU: workspace.QuotaCPU,
|
||||
QuotaMemory: workspace.QuotaMemory,
|
||||
QuotaGPU: workspace.QuotaGPU,
|
||||
QuotaGPUMem: workspace.QuotaGPUMem,
|
||||
Status: "active",
|
||||
CreatedAt: time.Now(),
|
||||
UpdatedAt: time.Now(),
|
||||
})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList {
|
||||
hard := corev1.ResourceList{}
|
||||
addQuantity := func(name corev1.ResourceName, value string) {
|
||||
value = normalizeStandardQuotaQuantity(value)
|
||||
if value == "" {
|
||||
return
|
||||
}
|
||||
if quantity, err := resource.ParseQuantity(value); err == nil {
|
||||
hard[name] = quantity
|
||||
}
|
||||
}
|
||||
addGPUMemoryQuantity := func(value string) {
|
||||
value, err := normalizeGPUMemoryQuota(value)
|
||||
if err != nil || value == "" {
|
||||
return
|
||||
}
|
||||
if quantity, err := resource.ParseQuantity(value); err == nil {
|
||||
hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity
|
||||
}
|
||||
}
|
||||
if workspace == nil {
|
||||
return hard
|
||||
}
|
||||
addQuantity(corev1.ResourceName("requests.cpu"), workspace.QuotaCPU)
|
||||
addQuantity(corev1.ResourceName("requests.memory"), workspace.QuotaMemory)
|
||||
addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), workspace.QuotaGPU)
|
||||
addGPUMemoryQuantity(workspace.QuotaGPUMem)
|
||||
return hard
|
||||
}
|
||||
|
||||
func isReservedNamespace(namespace string) bool {
|
||||
switch namespace {
|
||||
case "default", "kube-system", "kube-public", "kube-node-lease":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isProtectedSystemNamespace(namespace string) bool {
|
||||
switch namespace {
|
||||
case "kube-system", "kube-public", "kube-node-lease":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// executeAndSyncInstall 异步执行安装并监控状态
|
||||
func (s *InstanceService) executeAndSyncInstall(ctx context.Context, instanceID string, cluster *entity.Cluster, registry *entity.Registry, instance *entity.Instance) {
|
||||
// 执行 Helm 安装
|
||||
@ -338,7 +613,7 @@ func (s *InstanceService) executeAndSyncRollback(ctx context.Context, instanceID
|
||||
func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceID string, cluster *entity.Cluster, releaseName, namespace string) {
|
||||
// 执行 Helm 卸载
|
||||
err := s.helmClient.Uninstall(ctx, cluster, releaseName, namespace)
|
||||
|
||||
|
||||
// 获取实例
|
||||
instance, getErr := s.instanceRepo.GetByID(ctx, instanceID)
|
||||
if getErr != nil {
|
||||
@ -360,7 +635,7 @@ func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceI
|
||||
// 卸载成功,标记为已卸载
|
||||
instance.MarkSuccess(entity.StatusUninstalled, instance.Revision, "Instance uninstalled successfully")
|
||||
_ = s.instanceRepo.Update(ctx, instance)
|
||||
|
||||
|
||||
// 验证卸载是否完成:尝试获取状态,如果获取不到说明已卸载
|
||||
time.Sleep(3 * time.Second)
|
||||
_, statusErr := s.helmClient.GetStatus(ctx, cluster, releaseName, namespace)
|
||||
@ -377,7 +652,7 @@ func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceI
|
||||
|
||||
// syncInstanceStatus 同步实例状态(定期检查 Helm 状态并更新数据库)
|
||||
func (s *InstanceService) syncInstanceStatus(ctx context.Context, instanceID string, cluster *entity.Cluster, releaseName, namespace string, operation entity.InstanceOperation) {
|
||||
maxAttempts := 30 // 最多尝试30次(约5分钟)
|
||||
maxAttempts := 30 // 最多尝试30次(约5分钟)
|
||||
interval := 10 * time.Second // 每10秒检查一次
|
||||
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
|
||||
Reference in New Issue
Block a user