# Default values for vllm-app. # This is a YAML-formatted file. # Declare variables to be passed into your templates. # This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] imagePullPolicy: IfNotPresent # This is to override the chart name. nameOverride: "" fullnameOverride: "" # This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ serviceAccount: # Specifies whether a service account should be created create: true # Automatically mount a ServiceAccount's API credentials? automount: true # Annotations to add to the service account annotations: {} # The name of the service account to use. # If not set and create is true, a name is generated using the fullname template name: "" # 模型配置 model: huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个 localMountPath: "/Model" # PVC 固定挂载路径 huggingfaceToken: "" # 功能选择 resources: gpuLimit: 1 gpuMem: 0 cpuRequest: 12 memoryLimit: "16Gi" shmSize: "20Gi" # RDMA 配置部分 rdma: enabled: false # 开关:默认关闭,防止在无 RDMA 节点报错 interface: eth0 # NCCL/GLOO 通信使用的网卡名称 (有 RDMA 时可能是 ib0 或 bond0) resourceName: "rdma/rdma_shared_device_a" # RDMA 资源名称 (取决于你的 k8s 插件) resourceCount: 5 # 每个 Pod 需要的 RDMA 设备数量 hca: "mlx5_0:1" # 指定的 HCA 设备 (或者使用 ^mlx5 进行前缀匹配) gidIndex: "0" # RoCEv2 通常需要指定 GID svc: type: LoadBalancer port: 80 targetPort: 8000 # nodePort: 30080 # vLLM 应用配置 vllm: image: "docker.io/vllm/vllm-openai:latest" command: "" llama: image: "docker.io/library/one-click:v1" # lmdeploy 应用配置 lmdeploy: image: "docker.io/openmmlab/lmdeploy:latest-cu12" # NFS PV/PVC 配置 nfs: server: "10.6.80.11" path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight" # storageClass: "local-path" pvSize: "500Gi" pvcSize: "50Gi" # LeaderWorkerSet 配置 replicaCount: 1 workerSize: 1 nodeSelector: {} tolerations: [] affinity: {}