85 lines
2.3 KiB
YAML
85 lines
2.3 KiB
YAML
# Default values for vllm-app.
|
|
# This is a YAML-formatted file.
|
|
# Declare variables to be passed into your templates.
|
|
|
|
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
|
|
imagePullSecrets: []
|
|
imagePullPolicy: IfNotPresent
|
|
# This is to override the chart name.
|
|
nameOverride: ""
|
|
fullnameOverride: ""
|
|
|
|
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
|
|
serviceAccount:
|
|
# Specifies whether a service account should be created
|
|
create: true
|
|
# Automatically mount a ServiceAccount's API credentials?
|
|
automount: true
|
|
# Annotations to add to the service account
|
|
annotations: {}
|
|
# The name of the service account to use.
|
|
# If not set and create is true, a name is generated using the fullname template
|
|
name: ""
|
|
|
|
|
|
# 模型配置
|
|
model:
|
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
|
|
localMountPath: "/Model" # PVC 固定挂载路径
|
|
huggingfaceToken: "<your-hf-token>"
|
|
|
|
# 功能选择
|
|
|
|
resources:
|
|
gpuLimit: 1
|
|
gpuMem: 0
|
|
cpuRequest: 12
|
|
memoryLimit: "16Gi"
|
|
shmSize: "20Gi"
|
|
|
|
# RDMA 配置部分
|
|
rdma:
|
|
enabled: false # 开关:默认关闭,防止在无 RDMA 节点报错
|
|
interface: eth0 # NCCL/GLOO 通信使用的网卡名称 (有 RDMA 时可能是 ib0 或 bond0)
|
|
resourceName: "rdma/rdma_shared_device_a" # RDMA 资源名称 (取决于你的 k8s 插件)
|
|
resourceCount: 5 # 每个 Pod 需要的 RDMA 设备数量
|
|
hca: "mlx5_0:1" # 指定的 HCA 设备 (或者使用 ^mlx5 进行前缀匹配)
|
|
gidIndex: "0" # RoCEv2 通常需要指定 GID
|
|
|
|
svc:
|
|
type: LoadBalancer
|
|
port: 80
|
|
targetPort: 8000
|
|
# nodePort: 30080
|
|
# vLLM 应用配置
|
|
vllm:
|
|
image: "docker.io/vllm/vllm-openai:latest"
|
|
|
|
command: ""
|
|
|
|
llama:
|
|
image: "docker.io/library/one-click:v1"
|
|
|
|
# lmdeploy 应用配置
|
|
lmdeploy:
|
|
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
|
|
|
|
|
|
# NFS PV/PVC 配置
|
|
nfs:
|
|
server: "10.6.80.11"
|
|
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
|
# storageClass: "local-path"
|
|
pvSize: "500Gi"
|
|
pvcSize: "50Gi"
|
|
|
|
# LeaderWorkerSet 配置
|
|
replicaCount: 1
|
|
workerSize: 1
|
|
|
|
nodeSelector: {}
|
|
|
|
tolerations: []
|
|
|
|
affinity: {}
|