refactor: full-stack restructure with multi-tenancy, workspace management, and K8s diagnostics

- Add Workspace domain (entity, repository, service, handler, DTO)
- Add multi-tenant K8s client with tenant binding and quota management
- Add K8s diagnostics client (instance diagnostics)
- Add authorization middleware (authz package)
- Restructure frontend to feature-based architecture (features/)
- Add User Management page in configuration
- Add AccessDenied page and route guards
- Refactor shared components (form inputs, layout, UI)
- Update Tailwind config for new design system
- Add comprehensive documentation (docs/, tasks/, plans)
- Improve cluster service with better kubeconfig handling
- Add tests for crypto, config, helm client, tenant binding
This commit is contained in:
Ivan087
2026-05-12 16:15:14 +08:00
parent c5e51ed069
commit 7f238a3168
172 changed files with 15703 additions and 3162 deletions

View File

@ -4,6 +4,7 @@ import (
"context"
"github.com/ocdp/cluster-service/internal/domain/entity"
"github.com/ocdp/cluster-service/internal/domain/repository"
"github.com/ocdp/cluster-service/internal/pkg/authz"
)
// ArtifactService Artifact 浏览领域服务
@ -25,22 +26,22 @@ func NewArtifactService(
// GetRegistry 获取 Registry 信息
func (s *ArtifactService) GetRegistry(ctx context.Context, registryID string) (*entity.Registry, error) {
return s.registryRepo.GetByID(ctx, registryID)
return s.visibleRegistry(ctx, registryID)
}
// ListRepositories 列出 Registry 中的所有 repositories
func (s *ArtifactService) ListRepositories(ctx context.Context, registryID string) ([]string, error) {
registry, err := s.registryRepo.GetByID(ctx, registryID)
// ListRepositories 列出 Registry 中的 repositories
func (s *ArtifactService) ListRepositories(ctx context.Context, registryID, artifactType string) ([]string, error) {
registry, err := s.visibleRegistry(ctx, registryID)
if err != nil {
return nil, entity.ErrRegistryNotFound
}
return s.ociClient.ListRepositories(ctx, registry)
return s.ociClient.ListRepositories(ctx, registry, artifactType)
}
// ListArtifacts 列出 repository 中的所有 artifacts
func (s *ArtifactService) ListArtifacts(ctx context.Context, registryID, repository, mediaTypeFilter string) ([]*entity.Artifact, error) {
registry, err := s.registryRepo.GetByID(ctx, registryID)
registry, err := s.visibleRegistry(ctx, registryID)
if err != nil {
return nil, entity.ErrRegistryNotFound
}
@ -50,7 +51,7 @@ func (s *ArtifactService) ListArtifacts(ctx context.Context, registryID, reposit
// GetArtifact 获取 artifact 详情
func (s *ArtifactService) GetArtifact(ctx context.Context, registryID, repository, reference string) (*entity.Artifact, error) {
registry, err := s.registryRepo.GetByID(ctx, registryID)
registry, err := s.visibleRegistry(ctx, registryID)
if err != nil {
return nil, entity.ErrRegistryNotFound
}
@ -60,7 +61,7 @@ func (s *ArtifactService) GetArtifact(ctx context.Context, registryID, repositor
// GetValuesSchema 获取 Helm Chart 的 values schema
func (s *ArtifactService) GetValuesSchema(ctx context.Context, registryID, repository, reference string) (string, error) {
registry, err := s.registryRepo.GetByID(ctx, registryID)
registry, err := s.visibleRegistry(ctx, registryID)
if err != nil {
return "", entity.ErrRegistryNotFound
}
@ -68,9 +69,19 @@ func (s *ArtifactService) GetValuesSchema(ctx context.Context, registryID, repos
return s.ociClient.GetValuesSchema(ctx, registry, repository, reference)
}
// GetValuesYAML 获取 Helm Chart 的原始 values.yaml
func (s *ArtifactService) GetValuesYAML(ctx context.Context, registryID, repository, reference string) (string, error) {
registry, err := s.visibleRegistry(ctx, registryID)
if err != nil {
return "", entity.ErrRegistryNotFound
}
return s.ociClient.GetValuesYAML(ctx, registry, repository, reference)
}
// PullArtifact 下载 artifact
func (s *ArtifactService) PullArtifact(ctx context.Context, registryID, repository, reference, destPath string) error {
registry, err := s.registryRepo.GetByID(ctx, registryID)
registry, err := s.visibleRegistry(ctx, registryID)
if err != nil {
return entity.ErrRegistryNotFound
}
@ -78,3 +89,17 @@ func (s *ArtifactService) PullArtifact(ctx context.Context, registryID, reposito
return s.ociClient.PullArtifact(ctx, registry, repository, reference, destPath)
}
func (s *ArtifactService) visibleRegistry(ctx context.Context, registryID string) (*entity.Registry, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
registry, err := s.registryRepo.GetByID(ctx, registryID)
if err != nil {
return nil, entity.ErrRegistryNotFound
}
if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) {
return nil, entity.ErrRegistryNotFound
}
return registry, nil
}

View File

@ -2,14 +2,22 @@ package service
import (
"context"
"strings"
"time"
"github.com/google/uuid"
"github.com/ocdp/cluster-service/internal/domain/entity"
"github.com/ocdp/cluster-service/internal/domain/repository"
"github.com/ocdp/cluster-service/internal/pkg/authz"
jwtpkg "github.com/ocdp/cluster-service/internal/pkg/jwt"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/validation"
)
// AuthService 认证领域服务
type AuthService struct {
userRepo repository.UserRepository
workspaceRepo repository.WorkspaceRepository
passwordHasher PasswordHasher
tokenGenerator TokenGenerator
}
@ -22,27 +30,48 @@ type PasswordHasher interface {
// TokenGenerator Token 生成器接口
type TokenGenerator interface {
Generate(userID, username string) (accessToken, refreshToken string, err error)
Generate(userID, username, role, workspaceID string) (accessToken, refreshToken string, err error)
Verify(token string) (userID, username string, err error)
VerifyWithIssuedAt(token string) (userID, username string, issuedAt int64, err error)
VerifyAccess(token string) (*jwtpkg.Claims, error)
VerifyRefresh(token string) (*jwtpkg.Claims, error)
Refresh(refreshToken string) (newAccessToken string, err error)
}
// NewAuthService 创建认证服务
func NewAuthService(
userRepo repository.UserRepository,
workspaceRepo repository.WorkspaceRepository,
passwordHasher PasswordHasher,
tokenGenerator TokenGenerator,
) *AuthService {
return &AuthService{
userRepo: userRepo,
workspaceRepo: workspaceRepo,
passwordHasher: passwordHasher,
tokenGenerator: tokenGenerator,
}
}
// Register 注册新用户(仅需用户名和密码,邮箱将自动补全)
func (s *AuthService) Register(ctx context.Context, username, password string) (*entity.User, error) {
// Register 注册新用户。业务入口只允许 admin 调用;初始 admin 由 bootstrap seeder 创建。
type UserWorkspaceOptions struct {
Namespace string
DefaultClusterID string
QuotaCPU string
QuotaMemory string
QuotaGPU string
QuotaGPUMem string
}
func (s *AuthService) Register(ctx context.Context, username, password, role, workspaceID string, opts UserWorkspaceOptions, isActive, mustChangePassword *bool) (*entity.User, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
if !principal.IsAdmin() {
return nil, entity.ErrForbidden
}
// 检查用户是否已存在
existingUser, _ := s.userRepo.GetByUsername(ctx, username)
if existingUser != nil {
@ -54,6 +83,10 @@ func (s *AuthService) Register(ctx context.Context, username, password string) (
if err != nil {
return nil, err
}
normalizedOpts, err := normalizeQuotaOptions(opts)
if err != nil {
return nil, err
}
// 默认生成占位邮箱,避免数据库约束失败
email := username + "@local.ocdp"
@ -61,6 +94,27 @@ func (s *AuthService) Register(ctx context.Context, username, password string) (
// 创建用户
user := entity.NewUser(username, passwordHash, email)
user.ID = uuid.New().String()
user.Role = normalizeUserRole(role)
user.WorkspaceID = workspaceID
if user.Role == authz.RoleUser && (user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID) {
workspace, err := s.createUserWorkspace(ctx, username, principal.UserID, normalizedOpts)
if err != nil {
return nil, err
}
user.WorkspaceID = workspace.ID
}
if user.WorkspaceID == "" {
user.WorkspaceID = entity.DefaultWorkspaceID
}
if user.Role == authz.RoleAdmin {
user.WorkspaceID = entity.DefaultWorkspaceID
}
if isActive != nil {
user.IsActive = *isActive
}
if mustChangePassword != nil {
user.MustChangePassword = *mustChangePassword
}
if err := user.Validate(); err != nil {
return nil, err
@ -73,31 +127,241 @@ func (s *AuthService) Register(ctx context.Context, username, password string) (
return user, nil
}
// Login 用户登录
func (s *AuthService) Login(ctx context.Context, username, password string) (accessToken, refreshToken string, err error) {
// 查找用户
user, err := s.userRepo.GetByUsername(ctx, username)
func (s *AuthService) createUserWorkspace(ctx context.Context, username, createdBy string, opts UserWorkspaceOptions) (*entity.Workspace, error) {
if s.workspaceRepo == nil {
return nil, entity.ErrWorkspaceNotFound
}
name := strings.TrimPrefix(entity.NamespaceForUser(username), "ocdp-u-")
workspace := entity.NewWorkspace(name, createdBy)
workspace.ID = uuid.New().String()
workspace.DefaultClusterID = strings.TrimSpace(opts.DefaultClusterID)
namespace := strings.TrimSpace(opts.Namespace)
if namespace == "" {
namespace = entity.NamespaceForUser(username)
}
if namespace != "" {
if len(validation.IsDNS1123Label(namespace)) > 0 {
return nil, entity.ErrInvalidNamespace
}
workspace.K8sNamespace = namespace
workspace.K8sSAName = entity.ServiceAccountForNamespace(namespace)
}
workspace.QuotaCPU = strings.TrimSpace(opts.QuotaCPU)
workspace.QuotaMemory = strings.TrimSpace(opts.QuotaMemory)
workspace.QuotaGPU = strings.TrimSpace(opts.QuotaGPU)
workspace.QuotaGPUMem = strings.TrimSpace(opts.QuotaGPUMem)
if err := s.workspaceRepo.Create(ctx, workspace); err != nil {
return nil, err
}
return workspace, nil
}
func normalizeQuotaOptions(opts UserWorkspaceOptions) (UserWorkspaceOptions, error) {
opts.Namespace = strings.TrimSpace(opts.Namespace)
opts.DefaultClusterID = strings.TrimSpace(opts.DefaultClusterID)
opts.QuotaCPU = normalizeStandardQuotaQuantity(opts.QuotaCPU)
opts.QuotaMemory = normalizeStandardQuotaQuantity(opts.QuotaMemory)
opts.QuotaGPU = normalizeStandardQuotaQuantity(opts.QuotaGPU)
gpuMem, err := normalizeGPUMemoryQuota(opts.QuotaGPUMem)
if err != nil {
return "", "", entity.ErrUserNotFound
return opts, err
}
opts.QuotaGPUMem = gpuMem
for _, value := range []string{opts.QuotaCPU, opts.QuotaMemory, opts.QuotaGPU} {
if value == "" {
continue
}
if _, err := resource.ParseQuantity(value); err != nil {
return opts, entity.ErrInvalidTenantResourceQuota
}
}
if opts.Namespace != "" && len(validation.IsDNS1123Label(opts.Namespace)) > 0 {
return opts, entity.ErrInvalidNamespace
}
return opts, nil
}
func (s *AuthService) ListUsers(ctx context.Context) ([]*entity.User, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
if !principal.IsAdmin() {
return nil, entity.ErrForbidden
}
return s.userRepo.List(ctx)
}
func (s *AuthService) UpdateUser(ctx context.Context, userID, role, workspaceID string, opts UserWorkspaceOptions, isActive, mustChangePassword *bool) (*entity.User, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
if !principal.IsAdmin() {
return nil, entity.ErrForbidden
}
user, err := s.userRepo.GetByID(ctx, userID)
if err != nil {
return nil, entity.ErrUserNotFound
}
if role != "" {
user.Role = normalizeUserRole(role)
}
if workspaceID != "" {
user.WorkspaceID = workspaceID
}
if user.Role == authz.RoleAdmin {
user.WorkspaceID = entity.DefaultWorkspaceID
}
if user.Role == authz.RoleUser && (user.WorkspaceID == "" || user.WorkspaceID == entity.DefaultWorkspaceID) {
normalizedOpts, err := normalizeQuotaOptions(opts)
if err != nil {
return nil, err
}
workspace, err := s.createUserWorkspace(ctx, user.Username, principal.UserID, normalizedOpts)
if err != nil {
return nil, err
}
user.WorkspaceID = workspace.ID
}
if isActive != nil {
if user.ID == principal.UserID && !*isActive {
return nil, entity.ErrForbidden
}
user.IsActive = *isActive
}
if mustChangePassword != nil {
user.MustChangePassword = *mustChangePassword
}
if user.Role != authz.RoleAdmin && hasWorkspaceUpdates(opts) {
normalizedOpts, err := normalizeQuotaOptions(opts)
if err != nil {
return nil, err
}
workspace, err := s.workspaceRepo.GetByID(ctx, user.WorkspaceID)
if err != nil {
return nil, err
}
applyWorkspaceOptions(workspace, normalizedOpts)
if err := s.workspaceRepo.Update(ctx, workspace); err != nil {
return nil, err
}
}
user.RevokedAfter = time.Now()
user.UpdatedAt = time.Now()
if err := user.Validate(); err != nil {
return nil, err
}
if err := s.userRepo.Update(ctx, user); err != nil {
return nil, err
}
return user, nil
}
func hasWorkspaceUpdates(opts UserWorkspaceOptions) bool {
return strings.TrimSpace(opts.Namespace) != "" ||
strings.TrimSpace(opts.DefaultClusterID) != "" ||
strings.TrimSpace(opts.QuotaCPU) != "" ||
strings.TrimSpace(opts.QuotaMemory) != "" ||
strings.TrimSpace(opts.QuotaGPU) != "" ||
strings.TrimSpace(opts.QuotaGPUMem) != ""
}
func applyWorkspaceOptions(workspace *entity.Workspace, opts UserWorkspaceOptions) {
if namespace := strings.TrimSpace(opts.Namespace); namespace != "" {
workspace.K8sNamespace = namespace
workspace.K8sSAName = entity.ServiceAccountForNamespace(namespace)
}
if value := strings.TrimSpace(opts.DefaultClusterID); value != "" {
workspace.DefaultClusterID = value
}
if value := strings.TrimSpace(opts.QuotaCPU); value != "" {
workspace.QuotaCPU = value
}
if value := strings.TrimSpace(opts.QuotaMemory); value != "" {
workspace.QuotaMemory = value
}
if value := strings.TrimSpace(opts.QuotaGPU); value != "" {
workspace.QuotaGPU = value
}
if value := strings.TrimSpace(opts.QuotaGPUMem); value != "" {
workspace.QuotaGPUMem = value
}
}
func (s *AuthService) DeleteUser(ctx context.Context, userID string) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
if !principal.IsAdmin() {
return entity.ErrForbidden
}
if userID == principal.UserID {
return entity.ErrForbidden
}
return s.userRepo.Delete(ctx, userID)
}
func normalizeUserRole(role string) string {
if role == authz.RoleAdmin {
return authz.RoleAdmin
}
return authz.RoleUser
}
// Login 用户登录
func (s *AuthService) Login(ctx context.Context, username, password string) (accessToken, refreshToken string, user *entity.User, err error) {
// 查找用户
user, err = s.userRepo.GetByUsername(ctx, username)
if err != nil {
return "", "", nil, entity.ErrUserNotFound
}
if !user.IsActive {
return "", "", nil, entity.ErrUserInactive
}
if err := s.ensureWorkspaceActive(ctx, user); err != nil {
return "", "", nil, err
}
// 验证密码
if err := s.passwordHasher.Verify(password, user.PasswordHash); err != nil {
return "", "", entity.ErrInvalidPassword
return "", "", nil, entity.ErrInvalidPassword
}
// 生成 Token
accessToken, refreshToken, err = s.tokenGenerator.Generate(user.ID, user.Username)
accessToken, refreshToken, err = s.tokenGenerator.Generate(user.ID, user.Username, user.Role, user.WorkspaceID)
if err != nil {
return "", "", err
return "", "", nil, err
}
return accessToken, refreshToken, nil
return accessToken, refreshToken, user, nil
}
// RefreshToken 刷新 Token
func (s *AuthService) RefreshToken(ctx context.Context, refreshToken string) (string, error) {
return s.tokenGenerator.Refresh(refreshToken)
func (s *AuthService) RefreshToken(ctx context.Context, refreshToken string) (string, *entity.User, error) {
claims, err := s.tokenGenerator.VerifyRefresh(refreshToken)
if err != nil {
return "", nil, err
}
user, err := s.userRepo.GetByID(ctx, claims.UserID)
if err != nil {
return "", nil, entity.ErrUserNotFound
}
if !user.IsActive {
return "", nil, entity.ErrUserInactive
}
if claims.IssuedAt == nil || claims.IssuedAt.Unix() < user.RevokedAfter.Unix() {
return "", nil, entity.ErrTokenRevoked
}
if err := s.ensureWorkspaceActive(ctx, user); err != nil {
return "", nil, err
}
accessToken, _, err := s.tokenGenerator.Generate(user.ID, user.Username, user.Role, user.WorkspaceID)
if err != nil {
return "", nil, err
}
return accessToken, user, nil
}
// GetUserByID 根据 ID 获取用户
@ -106,25 +370,84 @@ func (s *AuthService) GetUserByID(ctx context.Context, id string) (*entity.User,
}
// VerifyAccessToken 验证 Access Token包括 revoked_after 检查)
func (s *AuthService) VerifyAccessToken(ctx context.Context, token string) (userID, username string, err error) {
func (s *AuthService) VerifyAccessToken(ctx context.Context, token string) (*authz.Principal, error) {
// 1. JWT 自验证
userID, username, issuedAt, err := s.tokenGenerator.VerifyWithIssuedAt(token)
claims, err := s.tokenGenerator.VerifyAccess(token)
if err != nil {
return "", "", err
return nil, err
}
// 2. 检查用户级别的撤销时间
user, err := s.userRepo.GetByID(ctx, userID)
user, err := s.userRepo.GetByID(ctx, claims.UserID)
if err != nil {
return "", "", entity.ErrUserNotFound
return nil, entity.ErrUserNotFound
}
if !user.IsActive {
return nil, entity.ErrUserInactive
}
// 3. 如果 Token 签发时间早于 revoked_after则失效
if issuedAt < user.RevokedAfter.Unix() {
return "", "", entity.ErrTokenRevoked
if claims.IssuedAt == nil || claims.IssuedAt.Unix() < user.RevokedAfter.Unix() {
return nil, entity.ErrTokenRevoked
}
if err := s.ensureWorkspaceActive(ctx, user); err != nil {
return nil, err
}
workspaceName := ""
namespace := ""
defaultClusterID := ""
quotaCPU := ""
quotaMemory := ""
quotaGPU := ""
quotaGPUMem := ""
if s.workspaceRepo != nil && user.WorkspaceID != "" {
if workspace, err := s.workspaceRepo.GetByID(ctx, user.WorkspaceID); err == nil && workspace != nil {
workspaceName = workspace.Name
namespace = workspace.K8sNamespace
defaultClusterID = workspace.DefaultClusterID
quotaCPU = workspace.QuotaCPU
quotaMemory = workspace.QuotaMemory
quotaGPU = workspace.QuotaGPU
quotaGPUMem = workspace.QuotaGPUMem
}
}
return userID, username, nil
return &authz.Principal{
UserID: user.ID,
Username: user.Username,
Role: user.Role,
WorkspaceID: user.WorkspaceID,
WorkspaceName: workspaceName,
Namespace: namespace,
DefaultClusterID: defaultClusterID,
QuotaCPU: quotaCPU,
QuotaMemory: quotaMemory,
QuotaGPU: quotaGPU,
QuotaGPUMem: quotaGPUMem,
Permissions: authz.PermissionsForRole(user.Role),
PermissionVersion: 1,
}, nil
}
func (s *AuthService) GetWorkspaceByID(ctx context.Context, id string) (*entity.Workspace, error) {
if s.workspaceRepo == nil || id == "" {
return nil, entity.ErrWorkspaceNotFound
}
return s.workspaceRepo.GetByID(ctx, id)
}
func (s *AuthService) ensureWorkspaceActive(ctx context.Context, user *entity.User) error {
if user.Role == authz.RoleAdmin || user.WorkspaceID == "" || s.workspaceRepo == nil {
return nil
}
workspace, err := s.workspaceRepo.GetByID(ctx, user.WorkspaceID)
if err != nil {
return entity.ErrWorkspaceNotFound
}
if workspace.Status == entity.WorkspaceSuspended {
return entity.ErrWorkspaceSuspended
}
return nil
}
// ChangePassword 修改密码(会触发全局登出)

View File

@ -5,6 +5,7 @@ import (
"github.com/google/uuid"
"github.com/ocdp/cluster-service/internal/domain/entity"
"github.com/ocdp/cluster-service/internal/domain/repository"
"github.com/ocdp/cluster-service/internal/pkg/authz"
)
// ClusterService 集群管理领域服务
@ -21,8 +22,21 @@ func NewClusterService(clusterRepo repository.ClusterRepository) *ClusterService
// CreateCluster 创建新集群
func (s *ClusterService) CreateCluster(ctx context.Context, cluster *entity.Cluster) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 生成 ID
cluster.ID = uuid.New().String()
cluster.OwnerID = principal.UserID
cluster.WorkspaceID = principal.WorkspaceID
if principal.IsAdmin() && cluster.WorkspaceID == "" {
cluster.WorkspaceID = entity.DefaultWorkspaceID
}
if !principal.IsAdmin() && cluster.Visibility == authz.VisibilityGlobalShared {
return entity.ErrForbidden
}
cluster.Visibility = authz.NormalizeVisibility(principal.Role, cluster.Visibility)
// 验证
if err := cluster.Validate(); err != nil {
@ -30,9 +44,11 @@ func (s *ClusterService) CreateCluster(ctx context.Context, cluster *entity.Clus
}
// 检查是否已存在
existingCluster, _ := s.clusterRepo.GetByName(ctx, cluster.Name)
if existingCluster != nil {
return entity.ErrClusterExists
clusters, _ := s.clusterRepo.List(ctx)
for _, existingCluster := range clusters {
if existingCluster.Name == cluster.Name && existingCluster.WorkspaceID == cluster.WorkspaceID && existingCluster.OwnerID == cluster.OwnerID {
return entity.ErrClusterExists
}
}
return s.clusterRepo.Create(ctx, cluster)
@ -40,16 +56,41 @@ func (s *ClusterService) CreateCluster(ctx context.Context, cluster *entity.Clus
// GetCluster 获取集群
func (s *ClusterService) GetCluster(ctx context.Context, id string) (*entity.Cluster, error) {
return s.clusterRepo.GetByID(ctx, id)
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
cluster, err := s.clusterRepo.GetByID(ctx, id)
if err != nil {
return nil, err
}
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
return nil, entity.ErrClusterNotFound
}
return cluster, nil
}
// UpdateCluster 更新集群
func (s *ClusterService) UpdateCluster(ctx context.Context, cluster *entity.Cluster) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 检查是否存在
_, err := s.clusterRepo.GetByID(ctx, cluster.ID)
existing, err := s.clusterRepo.GetByID(ctx, cluster.ID)
if err != nil {
return entity.ErrClusterNotFound
}
if !authz.CanWriteResource(principal, existing.WorkspaceID, existing.OwnerID, existing.Visibility) {
return entity.ErrForbidden
}
cluster.WorkspaceID = existing.WorkspaceID
cluster.OwnerID = existing.OwnerID
if principal.IsAdmin() {
cluster.Visibility = authz.NormalizeVisibility(principal.Role, cluster.Visibility)
} else {
cluster.Visibility = existing.Visibility
}
// 验证
if err := cluster.Validate(); err != nil {
@ -61,17 +102,37 @@ func (s *ClusterService) UpdateCluster(ctx context.Context, cluster *entity.Clus
// DeleteCluster 删除集群
func (s *ClusterService) DeleteCluster(ctx context.Context, id string) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 检查是否存在
_, err := s.clusterRepo.GetByID(ctx, id)
cluster, err := s.clusterRepo.GetByID(ctx, id)
if err != nil {
return entity.ErrClusterNotFound
}
if !authz.CanWriteResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
return entity.ErrForbidden
}
return s.clusterRepo.Delete(ctx, id)
}
// ListClusters 列出所有集群
func (s *ClusterService) ListClusters(ctx context.Context) ([]*entity.Cluster, error) {
return s.clusterRepo.List(ctx)
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
clusters, err := s.clusterRepo.List(ctx)
if err != nil {
return nil, err
}
visible := make([]*entity.Cluster, 0, len(clusters))
for _, cluster := range clusters {
if authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
visible = append(visible, cluster)
}
}
return visible, nil
}

View File

@ -11,16 +11,23 @@ import (
"github.com/google/uuid"
"github.com/ocdp/cluster-service/internal/domain/entity"
"github.com/ocdp/cluster-service/internal/domain/repository"
"github.com/ocdp/cluster-service/internal/pkg/authz"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
// InstanceService Helm 实例管理领域服务
type InstanceService struct {
instanceRepo repository.InstanceRepository
clusterRepo repository.ClusterRepository
registryRepo repository.RegistryRepository
helmClient repository.HelmClient
ociClient repository.OCIClient
entryClient repository.InstanceEntryClient
instanceRepo repository.InstanceRepository
clusterRepo repository.ClusterRepository
registryRepo repository.RegistryRepository
bindingRepo repository.WorkspaceClusterBindingRepository
helmClient repository.HelmClient
ociClient repository.OCIClient
entryClient repository.InstanceEntryClient
diagClient repository.InstanceDiagnosticsClient
workspaceRepo repository.WorkspaceRepository
tenantClient repository.TenantKubeClient
}
// NewInstanceService 创建实例服务
@ -31,17 +38,32 @@ func NewInstanceService(
helmClient repository.HelmClient,
ociClient repository.OCIClient,
entryClient repository.InstanceEntryClient,
bindingRepo ...repository.WorkspaceClusterBindingRepository,
) *InstanceService {
var workspaceBindingRepo repository.WorkspaceClusterBindingRepository
if len(bindingRepo) > 0 {
workspaceBindingRepo = bindingRepo[0]
}
return &InstanceService{
instanceRepo: instanceRepo,
clusterRepo: clusterRepo,
registryRepo: registryRepo,
bindingRepo: workspaceBindingRepo,
helmClient: helmClient,
ociClient: ociClient,
entryClient: entryClient,
}
}
func (s *InstanceService) SetDiagnosticsClient(client repository.InstanceDiagnosticsClient) {
s.diagClient = client
}
func (s *InstanceService) SetTenantProvisioning(workspaceRepo repository.WorkspaceRepository, tenantClient repository.TenantKubeClient) {
s.workspaceRepo = workspaceRepo
s.tenantClient = tenantClient
}
const chartCacheDir = "/tmp/charts"
func (s *InstanceService) chartArchivePath(instance *entity.Instance) string {
@ -62,8 +84,14 @@ func (s *InstanceService) downloadChart(ctx context.Context, registry *entity.Re
// CreateInstance 创建(安装)新实例
func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.Instance) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 生成 ID
instance.ID = uuid.New().String()
instance.WorkspaceID = principal.WorkspaceID
instance.OwnerID = principal.UserID
// 验证
if err := instance.Validate(); err != nil {
@ -75,12 +103,25 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
if err != nil {
return entity.ErrClusterNotFound
}
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
return entity.ErrClusterNotFound
}
// 检查 Registry 是否存在
registry, err := s.registryRepo.GetByID(ctx, instance.RegistryID)
if err != nil {
return entity.ErrRegistryNotFound
}
if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) {
return entity.ErrRegistryNotFound
}
if err := s.applyNamespacePolicy(ctx, principal, cluster, instance); err != nil {
return err
}
enforceNamespaceValues(instance)
if err := s.ensureTenantForInstance(ctx, principal, cluster, instance); err != nil {
return err
}
// 检查实例是否已存在
existingInstance, _ := s.instanceRepo.GetByClusterAndName(ctx, instance.ClusterID, instance.Name)
@ -111,13 +152,24 @@ func (s *InstanceService) CreateInstance(ctx context.Context, instance *entity.I
// GetInstance 获取实例
func (s *InstanceService) GetInstance(ctx context.Context, id string) (*entity.Instance, error) {
return s.instanceRepo.GetByID(ctx, id)
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
instance, err := s.instanceRepo.GetByID(ctx, id)
if err != nil {
return nil, err
}
if !s.canReadInstance(principal, instance) {
return nil, entity.ErrInstanceNotFound
}
return instance, nil
}
// GetInstanceStatus 获取实例实时状态
func (s *InstanceService) GetInstanceStatus(ctx context.Context, id string) (*entity.Instance, error) {
// 从数据库获取基本信息
instance, err := s.instanceRepo.GetByID(ctx, id)
instance, err := s.GetInstance(ctx, id)
if err != nil {
return nil, entity.ErrInstanceNotFound
}
@ -143,11 +195,20 @@ func (s *InstanceService) GetInstanceStatus(ctx context.Context, id string) (*en
// UpdateInstance 更新(升级)实例
func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.Instance) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 检查实例是否存在
existingInstance, err := s.instanceRepo.GetByID(ctx, instance.ID)
if err != nil {
return entity.ErrInstanceNotFound
}
if !s.canWriteInstance(principal, existingInstance) {
return entity.ErrForbidden
}
instance.WorkspaceID = existingInstance.WorkspaceID
instance.OwnerID = existingInstance.OwnerID
// 获取集群信息
cluster, err := s.clusterRepo.GetByID(ctx, existingInstance.ClusterID)
@ -161,6 +222,8 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
return entity.ErrRegistryNotFound
}
instance.Namespace = existingInstance.Namespace
enforceNamespaceValues(instance)
instance.BeginOperation(entity.OperationUpgrade, "Pending upgrade")
if err := s.instanceRepo.Update(ctx, instance); err != nil {
return err
@ -182,11 +245,18 @@ func (s *InstanceService) UpdateInstance(ctx context.Context, instance *entity.I
// DeleteInstance 删除(卸载)实例
func (s *InstanceService) DeleteInstance(ctx context.Context, id string) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 检查实例是否存在
instance, err := s.instanceRepo.GetByID(ctx, id)
if err != nil {
return entity.ErrInstanceNotFound
}
if !s.canWriteInstance(principal, instance) {
return entity.ErrForbidden
}
// 获取集群信息
cluster, err := s.clusterRepo.GetByID(ctx, instance.ClusterID)
@ -208,11 +278,18 @@ func (s *InstanceService) DeleteInstance(ctx context.Context, id string) error {
// RollbackInstance 回滚实例
func (s *InstanceService) RollbackInstance(ctx context.Context, id string, revision int) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 检查实例是否存在
instance, err := s.instanceRepo.GetByID(ctx, id)
if err != nil {
return entity.ErrInstanceNotFound
}
if !s.canWriteInstance(principal, instance) {
return entity.ErrForbidden
}
// 获取集群信息
cluster, err := s.clusterRepo.GetByID(ctx, instance.ClusterID)
@ -235,7 +312,7 @@ func (s *InstanceService) RollbackInstance(ctx context.Context, id string, revis
// GetInstanceHistory 获取实例历史
func (s *InstanceService) GetInstanceHistory(ctx context.Context, id string) ([]*entity.ReleaseHistory, error) {
// 检查实例是否存在
instance, err := s.instanceRepo.GetByID(ctx, id)
instance, err := s.GetInstance(ctx, id)
if err != nil {
return nil, entity.ErrInstanceNotFound
}
@ -252,18 +329,35 @@ func (s *InstanceService) GetInstanceHistory(ctx context.Context, id string) ([]
// ListInstancesByCluster 列出集群的所有实例
func (s *InstanceService) ListInstancesByCluster(ctx context.Context, clusterID string) ([]*entity.Instance, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
// 检查集群是否存在
_, err := s.clusterRepo.GetByID(ctx, clusterID)
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
if err != nil {
return nil, entity.ErrClusterNotFound
}
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
return nil, entity.ErrClusterNotFound
}
return s.instanceRepo.ListByCluster(ctx, clusterID)
instances, err := s.instanceRepo.ListByCluster(ctx, clusterID)
if err != nil {
return nil, err
}
visible := make([]*entity.Instance, 0, len(instances))
for _, instance := range instances {
if s.canReadInstance(principal, instance) {
visible = append(visible, instance)
}
}
return visible, nil
}
// ListInstanceEntries 列出实例关联的入口信息Service / Ingress
func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, instanceID string) ([]*entity.InstanceEntry, error) {
instance, err := s.instanceRepo.GetByID(ctx, instanceID)
instance, err := s.GetInstance(ctx, instanceID)
if err != nil {
return nil, entity.ErrInstanceNotFound
}
@ -283,6 +377,187 @@ func (s *InstanceService) ListInstanceEntries(ctx context.Context, clusterID, in
return s.entryClient.ListEntries(ctx, cluster, instance)
}
func (s *InstanceService) GetInstanceDiagnostics(ctx context.Context, clusterID, instanceID string, tailLines int64) (*entity.InstanceDiagnostics, error) {
instance, err := s.GetInstance(ctx, instanceID)
if err != nil {
return nil, entity.ErrInstanceNotFound
}
if instance.ClusterID != clusterID {
return nil, entity.ErrInstanceNotFound
}
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
if err != nil {
return nil, entity.ErrClusterNotFound
}
if s.diagClient == nil {
return nil, fmt.Errorf("instance diagnostics client is not configured")
}
return s.diagClient.GetDiagnostics(ctx, cluster, instance, tailLines)
}
func (s *InstanceService) canReadInstance(principal *authz.Principal, instance *entity.Instance) bool {
if principal.IsAdmin() {
return true
}
return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID
}
func (s *InstanceService) canWriteInstance(principal *authz.Principal, instance *entity.Instance) bool {
if principal.IsAdmin() {
return true
}
return instance.WorkspaceID == principal.WorkspaceID && instance.OwnerID == principal.UserID
}
func enforceNamespaceValues(instance *entity.Instance) {
if instance == nil || instance.Namespace == "" {
return
}
if instance.Values == nil {
instance.Values = map[string]interface{}{}
}
instance.Values["namespace"] = instance.Namespace
setExistingStringValue(instance.Values, "namespaceOverride", instance.Namespace)
setExistingStringValue(instance.Values, "targetNamespace", instance.Namespace)
setExistingNestedStringValue(instance.Values, "global", "namespace", instance.Namespace)
setExistingNestedStringValue(instance.Values, "global", "namespaceOverride", instance.Namespace)
}
func setExistingStringValue(values map[string]interface{}, key, namespace string) {
if _, ok := values[key]; ok {
values[key] = namespace
}
}
func setExistingNestedStringValue(values map[string]interface{}, parent, key, namespace string) {
child, ok := values[parent].(map[string]interface{})
if !ok {
return
}
if _, ok := child[key]; ok {
child[key] = namespace
}
}
func (s *InstanceService) applyNamespacePolicy(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error {
if principal.IsAdmin() {
if isProtectedSystemNamespace(instance.Namespace) {
return entity.ErrInvalidNamespace
}
return nil
}
if isReservedNamespace(instance.Namespace) {
return entity.ErrInvalidNamespace
}
if cluster.Visibility != authz.VisibilityPrivate || cluster.OwnerID != principal.UserID {
namespace := principal.Namespace
if namespace == "" {
namespace = entity.NamespaceForWorkspace(principal.WorkspaceName)
}
if s.bindingRepo != nil {
if binding, err := s.bindingRepo.Get(ctx, principal.WorkspaceID, cluster.ID); err == nil && binding != nil && binding.Namespace != "" {
namespace = binding.Namespace
}
}
instance.Namespace = namespace
return nil
}
if instance.Namespace == "" {
if cluster.DefaultNamespace != "" {
instance.Namespace = cluster.DefaultNamespace
} else if principal.Namespace != "" {
instance.Namespace = principal.Namespace
} else {
instance.Namespace = entity.NamespaceForWorkspace(principal.Username)
}
}
return nil
}
func (s *InstanceService) ensureTenantForInstance(ctx context.Context, principal *authz.Principal, cluster *entity.Cluster, instance *entity.Instance) error {
if principal.IsAdmin() || s.workspaceRepo == nil || s.tenantClient == nil {
return nil
}
workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
if err != nil {
return err
}
if workspace.Status == entity.WorkspaceSuspended {
return entity.ErrWorkspaceSuspended
}
binding := entity.NewTenantBinding(instance.Namespace)
binding.ServiceAccountName = workspace.K8sSAName
binding.ResourceQuotaHard = instanceResourceQuotaHard(workspace)
if err := s.tenantClient.EnsureTenant(ctx, cluster, binding); err != nil {
return err
}
if s.bindingRepo != nil {
_ = s.bindingRepo.Upsert(ctx, &entity.WorkspaceClusterBinding{
ID: uuid.New().String(),
WorkspaceID: workspace.ID,
ClusterID: cluster.ID,
Namespace: instance.Namespace,
ServiceAccount: workspace.K8sSAName,
QuotaCPU: workspace.QuotaCPU,
QuotaMemory: workspace.QuotaMemory,
QuotaGPU: workspace.QuotaGPU,
QuotaGPUMem: workspace.QuotaGPUMem,
Status: "active",
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
})
}
return nil
}
func instanceResourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList {
hard := corev1.ResourceList{}
addQuantity := func(name corev1.ResourceName, value string) {
value = normalizeStandardQuotaQuantity(value)
if value == "" {
return
}
if quantity, err := resource.ParseQuantity(value); err == nil {
hard[name] = quantity
}
}
addGPUMemoryQuantity := func(value string) {
value, err := normalizeGPUMemoryQuota(value)
if err != nil || value == "" {
return
}
if quantity, err := resource.ParseQuantity(value); err == nil {
hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity
}
}
if workspace == nil {
return hard
}
addQuantity(corev1.ResourceName("requests.cpu"), workspace.QuotaCPU)
addQuantity(corev1.ResourceName("requests.memory"), workspace.QuotaMemory)
addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), workspace.QuotaGPU)
addGPUMemoryQuantity(workspace.QuotaGPUMem)
return hard
}
func isReservedNamespace(namespace string) bool {
switch namespace {
case "default", "kube-system", "kube-public", "kube-node-lease":
return true
default:
return false
}
}
func isProtectedSystemNamespace(namespace string) bool {
switch namespace {
case "kube-system", "kube-public", "kube-node-lease":
return true
default:
return false
}
}
// executeAndSyncInstall 异步执行安装并监控状态
func (s *InstanceService) executeAndSyncInstall(ctx context.Context, instanceID string, cluster *entity.Cluster, registry *entity.Registry, instance *entity.Instance) {
// 执行 Helm 安装
@ -338,7 +613,7 @@ func (s *InstanceService) executeAndSyncRollback(ctx context.Context, instanceID
func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceID string, cluster *entity.Cluster, releaseName, namespace string) {
// 执行 Helm 卸载
err := s.helmClient.Uninstall(ctx, cluster, releaseName, namespace)
// 获取实例
instance, getErr := s.instanceRepo.GetByID(ctx, instanceID)
if getErr != nil {
@ -360,7 +635,7 @@ func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceI
// 卸载成功,标记为已卸载
instance.MarkSuccess(entity.StatusUninstalled, instance.Revision, "Instance uninstalled successfully")
_ = s.instanceRepo.Update(ctx, instance)
// 验证卸载是否完成:尝试获取状态,如果获取不到说明已卸载
time.Sleep(3 * time.Second)
_, statusErr := s.helmClient.GetStatus(ctx, cluster, releaseName, namespace)
@ -377,7 +652,7 @@ func (s *InstanceService) executeAndSyncUninstall(ctx context.Context, instanceI
// syncInstanceStatus 同步实例状态(定期检查 Helm 状态并更新数据库)
func (s *InstanceService) syncInstanceStatus(ctx context.Context, instanceID string, cluster *entity.Cluster, releaseName, namespace string, operation entity.InstanceOperation) {
maxAttempts := 30 // 最多尝试30次约5分钟
maxAttempts := 30 // 最多尝试30次约5分钟
interval := 10 * time.Second // 每10秒检查一次
for i := 0; i < maxAttempts; i++ {

View File

@ -4,21 +4,26 @@ import (
"context"
"errors"
"testing"
"time"
persistencemock "github.com/ocdp/cluster-service/internal/adapter/output/persistence/mock"
"github.com/ocdp/cluster-service/internal/domain/entity"
"github.com/ocdp/cluster-service/internal/domain/repository"
"github.com/ocdp/cluster-service/internal/pkg/authz"
)
func TestDeleteInstanceIgnoresMissingRelease(t *testing.T) {
ctx := context.Background()
principal := &authz.Principal{UserID: "user-1", Username: "tester", Role: authz.RoleUser, WorkspaceID: entity.DefaultWorkspaceID}
ctx := authz.WithPrincipal(context.Background(), principal)
instanceRepo := persistencemock.NewInstanceRepositoryMock()
instance := &entity.Instance{
ID: "inst-1",
ClusterID: "cluster-1",
Name: "demo",
Namespace: "default",
ID: "inst-1",
WorkspaceID: entity.DefaultWorkspaceID,
OwnerID: "user-1",
ClusterID: "cluster-1",
Name: "demo",
Namespace: "default",
}
if err := instanceRepo.Create(ctx, instance); err != nil {
t.Fatalf("failed to seed instance: %v", err)
@ -40,8 +45,63 @@ func TestDeleteInstanceIgnoresMissingRelease(t *testing.T) {
t.Fatalf("DeleteInstance returned error: %v", err)
}
if _, err := instanceRepo.GetByID(ctx, instance.ID); !errors.Is(err, entity.ErrInstanceNotFound) {
t.Fatalf("expected instance removed, got err=%v", err)
waitForInstanceDeleted(t, ctx, instanceRepo, instance.ID)
}
func TestEnforceNamespaceValuesOverridesChartNamespaceKnobs(t *testing.T) {
instance := &entity.Instance{
Namespace: "ocdp-u-alice",
Values: map[string]interface{}{
"namespace": "default",
"namespaceOverride": "default",
"targetNamespace": "default",
"global": map[string]interface{}{
"namespace": "default",
"namespaceOverride": "default",
},
"image": map[string]interface{}{
"repository": "nginx",
},
},
}
enforceNamespaceValues(instance)
if instance.Values["namespace"] != "ocdp-u-alice" {
t.Fatalf("expected top-level namespace to be enforced, got %#v", instance.Values["namespace"])
}
if instance.Values["namespaceOverride"] != "ocdp-u-alice" {
t.Fatalf("expected namespaceOverride to be enforced, got %#v", instance.Values["namespaceOverride"])
}
if instance.Values["targetNamespace"] != "ocdp-u-alice" {
t.Fatalf("expected targetNamespace to be enforced, got %#v", instance.Values["targetNamespace"])
}
global, ok := instance.Values["global"].(map[string]interface{})
if !ok {
t.Fatalf("expected global map, got %#v", instance.Values["global"])
}
if global["namespace"] != "ocdp-u-alice" || global["namespaceOverride"] != "ocdp-u-alice" {
t.Fatalf("expected global namespace keys to be enforced, got %#v", global)
}
}
func waitForInstanceDeleted(t *testing.T, ctx context.Context, repo repository.InstanceRepository, id string) {
t.Helper()
deadline := time.After(2 * time.Second)
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-deadline:
_, err := repo.GetByID(ctx, id)
t.Fatalf("expected instance removed, got err=%v", err)
case <-ticker.C:
if _, err := repo.GetByID(ctx, id); errors.Is(err, entity.ErrInstanceNotFound) {
return
}
}
}
}

View File

@ -6,11 +6,12 @@ import (
"github.com/ocdp/cluster-service/internal/domain/entity"
"github.com/ocdp/cluster-service/internal/domain/repository"
"github.com/ocdp/cluster-service/internal/pkg/authz"
)
// MonitoringService 监控服务
type MonitoringService struct {
clusterRepo repository.ClusterRepository
clusterRepo repository.ClusterRepository
metricsClient repository.MetricsClient
}
@ -20,13 +21,24 @@ func NewMonitoringService(
metricsClient repository.MetricsClient,
) *MonitoringService {
return &MonitoringService{
clusterRepo: clusterRepo,
clusterRepo: clusterRepo,
metricsClient: metricsClient,
}
}
// GetClusterMonitoring 获取单个集群的监控信息
func (s *MonitoringService) GetClusterMonitoring(ctx context.Context, clusterID string) (*entity.ClusterMetrics, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
if err != nil {
return nil, entity.ErrClusterNotFound
}
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
return nil, entity.ErrClusterNotFound
}
metrics, err := s.metricsClient.GetClusterMetrics(ctx, clusterID)
if err != nil {
return nil, fmt.Errorf("failed to get cluster metrics: %w", err)
@ -36,6 +48,10 @@ func (s *MonitoringService) GetClusterMonitoring(ctx context.Context, clusterID
// ListClusterMonitoring 获取所有集群的监控信息
func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entity.ClusterMetrics, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
// 获取所有集群
clusters, err := s.clusterRepo.List(ctx)
if err != nil {
@ -45,6 +61,9 @@ func (s *MonitoringService) ListClusterMonitoring(ctx context.Context) ([]*entit
// 获取每个集群的监控数据
result := make([]*entity.ClusterMetrics, 0, len(clusters))
for _, cluster := range clusters {
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
continue
}
metrics, err := s.metricsClient.GetClusterMetrics(ctx, cluster.ID)
if err != nil {
// 如果某个集群获取失败,记录错误但继续
@ -93,10 +112,20 @@ func (s *MonitoringService) GetMonitoringSummary(ctx context.Context) (*entity.M
// GetNodeMetrics 获取集群的节点指标
func (s *MonitoringService) GetNodeMetrics(ctx context.Context, clusterID string) ([]*entity.NodeMetrics, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
if err != nil {
return nil, entity.ErrClusterNotFound
}
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
return nil, entity.ErrClusterNotFound
}
nodes, err := s.metricsClient.GetNodeMetrics(ctx, clusterID)
if err != nil {
return nil, fmt.Errorf("failed to get node metrics: %w", err)
}
return nodes, nil
}

View File

@ -0,0 +1,54 @@
package service
import (
"strconv"
"strings"
"github.com/ocdp/cluster-service/internal/domain/entity"
)
func normalizeStandardQuotaQuantity(value string) string {
value = strings.TrimSpace(value)
upper := strings.ToUpper(value)
switch {
case strings.HasSuffix(upper, "MB"):
return strings.TrimSpace(value[:len(value)-2]) + "M"
case strings.HasSuffix(upper, "GB"):
return strings.TrimSpace(value[:len(value)-2]) + "G"
default:
return value
}
}
func normalizeGPUMemoryQuota(value string) (string, error) {
value = strings.TrimSpace(value)
if value == "" {
return "", nil
}
upper := strings.ToUpper(value)
multiplier := int64(1)
number := value
switch {
case strings.HasSuffix(upper, "MB"):
number = strings.TrimSpace(value[:len(value)-2])
case strings.HasSuffix(upper, "M"):
number = strings.TrimSpace(value[:len(value)-1])
case strings.HasSuffix(upper, "GB"):
number = strings.TrimSpace(value[:len(value)-2])
multiplier = 1000
case strings.HasSuffix(upper, "G"):
number = strings.TrimSpace(value[:len(value)-1])
multiplier = 1000
case strings.HasSuffix(upper, "GIB"):
number = strings.TrimSpace(value[:len(value)-3])
multiplier = 1024
case strings.HasSuffix(upper, "GI"):
number = strings.TrimSpace(value[:len(value)-2])
multiplier = 1024
}
parsed, err := strconv.ParseInt(number, 10, 64)
if err != nil || parsed < 0 {
return "", entity.ErrInvalidTenantResourceQuota
}
return strconv.FormatInt(parsed*multiplier, 10), nil
}

View File

@ -5,6 +5,7 @@ import (
"github.com/google/uuid"
"github.com/ocdp/cluster-service/internal/domain/entity"
"github.com/ocdp/cluster-service/internal/domain/repository"
"github.com/ocdp/cluster-service/internal/pkg/authz"
)
// RegistryService Registry 管理领域服务
@ -26,8 +27,21 @@ func NewRegistryService(
// CreateRegistry 创建新 Registry
func (s *RegistryService) CreateRegistry(ctx context.Context, registry *entity.Registry) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 生成 ID
registry.ID = uuid.New().String()
registry.OwnerID = principal.UserID
registry.WorkspaceID = principal.WorkspaceID
if principal.IsAdmin() && registry.WorkspaceID == "" {
registry.WorkspaceID = entity.DefaultWorkspaceID
}
if !principal.IsAdmin() && registry.Visibility == authz.VisibilityGlobalShared {
return entity.ErrForbidden
}
registry.Visibility = authz.NormalizeVisibility(principal.Role, registry.Visibility)
// 验证
if err := registry.Validate(); err != nil {
@ -35,9 +49,11 @@ func (s *RegistryService) CreateRegistry(ctx context.Context, registry *entity.R
}
// 检查是否已存在
existingRegistry, _ := s.registryRepo.GetByName(ctx, registry.Name)
if existingRegistry != nil {
return entity.ErrRegistryExists
registries, _ := s.registryRepo.List(ctx)
for _, existingRegistry := range registries {
if existingRegistry.Name == registry.Name && existingRegistry.WorkspaceID == registry.WorkspaceID && existingRegistry.OwnerID == registry.OwnerID {
return entity.ErrRegistryExists
}
}
return s.registryRepo.Create(ctx, registry)
@ -45,16 +61,41 @@ func (s *RegistryService) CreateRegistry(ctx context.Context, registry *entity.R
// GetRegistry 获取 Registry
func (s *RegistryService) GetRegistry(ctx context.Context, id string) (*entity.Registry, error) {
return s.registryRepo.GetByID(ctx, id)
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
registry, err := s.registryRepo.GetByID(ctx, id)
if err != nil {
return nil, err
}
if !authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) {
return nil, entity.ErrRegistryNotFound
}
return registry, nil
}
// UpdateRegistry 更新 Registry
func (s *RegistryService) UpdateRegistry(ctx context.Context, registry *entity.Registry) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 检查是否存在
_, err := s.registryRepo.GetByID(ctx, registry.ID)
existing, err := s.registryRepo.GetByID(ctx, registry.ID)
if err != nil {
return entity.ErrRegistryNotFound
}
if !authz.CanWriteResource(principal, existing.WorkspaceID, existing.OwnerID, existing.Visibility) {
return entity.ErrForbidden
}
registry.WorkspaceID = existing.WorkspaceID
registry.OwnerID = existing.OwnerID
if principal.IsAdmin() {
registry.Visibility = authz.NormalizeVisibility(principal.Role, registry.Visibility)
} else {
registry.Visibility = existing.Visibility
}
// 验证
if err := registry.Validate(); err != nil {
@ -66,27 +107,47 @@ func (s *RegistryService) UpdateRegistry(ctx context.Context, registry *entity.R
// DeleteRegistry 删除 Registry
func (s *RegistryService) DeleteRegistry(ctx context.Context, id string) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
// 检查是否存在
_, err := s.registryRepo.GetByID(ctx, id)
registry, err := s.registryRepo.GetByID(ctx, id)
if err != nil {
return entity.ErrRegistryNotFound
}
if !authz.CanWriteResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) {
return entity.ErrForbidden
}
return s.registryRepo.Delete(ctx, id)
}
// ListRegistries 列出所有 Registries
func (s *RegistryService) ListRegistries(ctx context.Context) ([]*entity.Registry, error) {
return s.registryRepo.List(ctx)
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
registries, err := s.registryRepo.List(ctx)
if err != nil {
return nil, err
}
visible := make([]*entity.Registry, 0, len(registries))
for _, registry := range registries {
if authz.CanReadResource(principal, registry.WorkspaceID, registry.OwnerID, registry.Visibility) {
visible = append(visible, registry)
}
}
return visible, nil
}
// CheckHealth 检查 Registry 健康状态
func (s *RegistryService) CheckHealth(ctx context.Context, id string) error {
registry, err := s.registryRepo.GetByID(ctx, id)
registry, err := s.GetRegistry(ctx, id)
if err != nil {
return entity.ErrRegistryNotFound
}
return s.ociClient.CheckHealth(ctx, registry)
}

View File

@ -0,0 +1,308 @@
package service
import (
"context"
"sort"
"time"
"github.com/google/uuid"
"github.com/ocdp/cluster-service/internal/domain/entity"
"github.com/ocdp/cluster-service/internal/domain/repository"
"github.com/ocdp/cluster-service/internal/pkg/authz"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
type WorkspaceService struct {
workspaceRepo repository.WorkspaceRepository
bindingRepo repository.WorkspaceClusterBindingRepository
clusterRepo repository.ClusterRepository
tenantClient repository.TenantKubeClient
auditRepo repository.AuditLogRepository
}
func NewWorkspaceService(
workspaceRepo repository.WorkspaceRepository,
bindingRepo repository.WorkspaceClusterBindingRepository,
clusterRepo repository.ClusterRepository,
tenantClient repository.TenantKubeClient,
auditRepo repository.AuditLogRepository,
) *WorkspaceService {
return &WorkspaceService{
workspaceRepo: workspaceRepo,
bindingRepo: bindingRepo,
clusterRepo: clusterRepo,
tenantClient: tenantClient,
auditRepo: auditRepo,
}
}
func (s *WorkspaceService) ListWorkspaces(ctx context.Context) ([]*entity.Workspace, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
if principal.IsAdmin() {
return s.workspaceRepo.List(ctx)
}
workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
if err != nil {
return nil, err
}
return []*entity.Workspace{workspace}, nil
}
func (s *WorkspaceService) CreateWorkspace(ctx context.Context, name string) (*entity.Workspace, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
if !principal.IsAdmin() {
return nil, entity.ErrForbidden
}
workspace := entity.NewWorkspace(name, principal.UserID)
workspace.ID = uuid.New().String()
if err := s.workspaceRepo.Create(ctx, workspace); err != nil {
return nil, err
}
s.audit(ctx, principal, "create", "workspace", workspace.ID, workspace.Name, nil)
return workspace, nil
}
func (s *WorkspaceService) EnsureClusterBinding(ctx context.Context, workspaceID, clusterID string) (*entity.WorkspaceClusterBinding, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
if !principal.IsAdmin() && workspaceID != principal.WorkspaceID {
return nil, entity.ErrForbidden
}
workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID)
if err != nil {
return nil, err
}
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
if err != nil {
return nil, entity.ErrClusterNotFound
}
if !principal.IsAdmin() && !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
return nil, entity.ErrClusterNotFound
}
binding := &entity.WorkspaceClusterBinding{
ID: uuid.New().String(),
WorkspaceID: workspace.ID,
ClusterID: cluster.ID,
Namespace: workspace.K8sNamespace,
ServiceAccount: workspace.K8sSAName,
QuotaCPU: workspace.QuotaCPU,
QuotaMemory: workspace.QuotaMemory,
QuotaGPU: workspace.QuotaGPU,
QuotaGPUMem: workspace.QuotaGPUMem,
Status: "active",
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
tenantBinding := entity.NewTenantBinding(binding.Namespace)
tenantBinding.ServiceAccountName = binding.ServiceAccount
tenantBinding.ResourceQuotaHard = resourceQuotaHard(workspace)
if s.tenantClient != nil {
if err := s.tenantClient.EnsureTenant(ctx, cluster, tenantBinding); err != nil {
return nil, err
}
}
if err := s.bindingRepo.Upsert(ctx, binding); err != nil {
return nil, err
}
s.audit(ctx, principal, "init", "workspace_cluster_binding", binding.ID, binding.Namespace, map[string]interface{}{"cluster_id": clusterID})
return binding, nil
}
func (s *WorkspaceService) IssueKubeconfig(ctx context.Context, workspaceID, clusterID string, ttl time.Duration) (*entity.TenantKubeconfig, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
if !principal.IsAdmin() && workspaceID != principal.WorkspaceID {
return nil, entity.ErrForbidden
}
workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID)
if err != nil {
return nil, err
}
if workspace.Status == entity.WorkspaceSuspended {
return nil, entity.ErrWorkspaceSuspended
}
cluster, err := s.clusterRepo.GetByID(ctx, clusterID)
if err != nil {
return nil, entity.ErrClusterNotFound
}
if !principal.IsAdmin() && !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
return nil, entity.ErrClusterNotFound
}
binding, err := s.bindingRepo.Get(ctx, workspaceID, clusterID)
if err != nil {
binding, err = s.EnsureClusterBinding(ctx, workspaceID, clusterID)
if err != nil {
return nil, err
}
}
tenantBinding := entity.NewTenantBinding(binding.Namespace)
tenantBinding.ServiceAccountName = binding.ServiceAccount
tenantBinding.ResourceQuotaHard = resourceQuotaHard(workspace)
kubeconfig, err := s.tenantClient.IssueKubeconfig(ctx, cluster, tenantBinding, ttl)
if err != nil {
return nil, err
}
s.audit(ctx, principal, "issue_kubeconfig", "workspace_cluster_binding", binding.ID, binding.Namespace, map[string]interface{}{"cluster_id": clusterID, "ttl_seconds": int64(entity.TenantTokenTTL(ttl).Seconds())})
return kubeconfig, nil
}
func resourceQuotaHard(workspace *entity.Workspace) corev1.ResourceList {
hard := corev1.ResourceList{}
addQuantity := func(name corev1.ResourceName, value string) {
value = normalizeStandardQuotaQuantity(value)
if value == "" {
return
}
if quantity, err := resource.ParseQuantity(value); err == nil {
hard[name] = quantity
}
}
addGPUMemoryQuantity := func(value string) {
value, err := normalizeGPUMemoryQuota(value)
if err != nil || value == "" {
return
}
if quantity, err := resource.ParseQuantity(value); err == nil {
hard[corev1.ResourceName("requests.nvidia.com/gpumem")] = quantity
}
}
if workspace == nil {
return hard
}
addQuantity(corev1.ResourceName("requests.cpu"), workspace.QuotaCPU)
addQuantity(corev1.ResourceName("requests.memory"), workspace.QuotaMemory)
addQuantity(corev1.ResourceName("requests.nvidia.com/gpu"), workspace.QuotaGPU)
addGPUMemoryQuantity(workspace.QuotaGPUMem)
return hard
}
func (s *WorkspaceService) IssueCurrentKubeconfig(ctx context.Context, requestedClusterID string, ttl time.Duration) (*entity.TenantKubeconfig, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
if requestedClusterID != "" {
return s.IssueKubeconfig(ctx, principal.WorkspaceID, requestedClusterID, ttl)
}
workspace, err := s.workspaceRepo.GetByID(ctx, principal.WorkspaceID)
if err != nil {
return nil, err
}
if workspace.DefaultClusterID != "" {
return s.IssueKubeconfig(ctx, principal.WorkspaceID, workspace.DefaultClusterID, ttl)
}
return s.IssueDefaultKubeconfig(ctx, ttl)
}
func (s *WorkspaceService) IssueDefaultKubeconfig(ctx context.Context, ttl time.Duration) (*entity.TenantKubeconfig, error) {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return nil, entity.ErrUnauthorized
}
clusters, err := s.clusterRepo.List(ctx)
if err != nil {
return nil, err
}
candidates := make([]*entity.Cluster, 0, len(clusters))
for _, cluster := range clusters {
if !authz.CanReadResource(principal, cluster.WorkspaceID, cluster.OwnerID, cluster.Visibility) {
continue
}
switch cluster.Visibility {
case authz.VisibilityGlobalShared:
candidates = append(candidates, cluster)
case authz.VisibilityWorkspaceShared:
if cluster.WorkspaceID == principal.WorkspaceID {
candidates = append(candidates, cluster)
}
}
}
sort.SliceStable(candidates, func(i, j int) bool {
leftRank := defaultKubeconfigClusterRank(candidates[i])
rightRank := defaultKubeconfigClusterRank(candidates[j])
if leftRank != rightRank {
return leftRank < rightRank
}
return candidates[i].Name < candidates[j].Name
})
var firstIssueErr error
for _, cluster := range candidates {
if kubeconfig, err := s.IssueKubeconfig(ctx, principal.WorkspaceID, cluster.ID, ttl); err == nil {
return kubeconfig, nil
} else if firstIssueErr == nil {
firstIssueErr = err
}
}
if firstIssueErr != nil {
return nil, firstIssueErr
}
return nil, entity.ErrClusterNotFound
}
func defaultKubeconfigClusterRank(cluster *entity.Cluster) int {
switch cluster.Visibility {
case authz.VisibilityGlobalShared:
return 0
case authz.VisibilityWorkspaceShared:
return 1
default:
return 2
}
}
func (s *WorkspaceService) SuspendWorkspace(ctx context.Context, workspaceID string) error {
principal, err := authz.RequirePrincipal(ctx)
if err != nil {
return entity.ErrUnauthorized
}
if !principal.IsAdmin() {
return entity.ErrForbidden
}
workspace, err := s.workspaceRepo.GetByID(ctx, workspaceID)
if err != nil {
return err
}
workspace.Status = entity.WorkspaceSuspended
if err := s.workspaceRepo.Update(ctx, workspace); err != nil {
return err
}
clusters, _ := s.clusterRepo.List(ctx)
for _, cluster := range clusters {
binding, err := s.bindingRepo.Get(ctx, workspaceID, cluster.ID)
if err != nil {
continue
}
tenantBinding := entity.NewTenantBinding(binding.Namespace)
tenantBinding.ServiceAccountName = binding.ServiceAccount
_ = s.tenantClient.SuspendTenant(ctx, cluster, tenantBinding)
}
s.audit(ctx, principal, "suspend", "workspace", workspace.ID, workspace.Name, nil)
return nil
}
func (s *WorkspaceService) audit(ctx context.Context, principal *authz.Principal, action, resourceType, resourceID, resourceName string, details map[string]interface{}) {
if s.auditRepo == nil || principal == nil {
return
}
_ = s.auditRepo.Create(ctx, &entity.AuditLog{
WorkspaceID: principal.WorkspaceID,
UserID: principal.UserID,
Action: action,
ResourceType: resourceType,
ResourceID: resourceID,
ResourceName: resourceName,
Details: details,
CreatedAt: time.Now(),
})
}