272 lines
12 KiB
YAML
272 lines
12 KiB
YAML
{{- if gt (int .Values.workerSize) 1 }}
|
||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||
kind: LeaderWorkerSet
|
||
metadata:
|
||
name: {{ .Release.Name }}
|
||
spec:
|
||
replicas: {{ .Values.replicaCount }}
|
||
leaderWorkerTemplate:
|
||
size: {{ .Values.workerSize }}
|
||
restartPolicy: RecreateGroupOnPodRestart
|
||
leaderTemplate:
|
||
metadata:
|
||
labels:
|
||
role: leader
|
||
spec:
|
||
initContainers:
|
||
# 模型下载作为第一个 initContainer
|
||
- name: download-model
|
||
image: alpine:latest
|
||
imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
|
||
env:
|
||
- name: HF_ENDPOINT
|
||
value: https://hf-mirror.com
|
||
- name: HUGGING_FACE_HUB_TOKEN
|
||
value: {{ .Values.model.huggingfaceToken }}
|
||
command:
|
||
- sh
|
||
- -c
|
||
- |
|
||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
|
||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||
# 检查模型是否存在,不存在则下载
|
||
# echo "DEST_DIR= $DEST_DIR"
|
||
# if [ ! -f "$DEST_DIR/config.json" ]; then
|
||
# ls -l {{ .Values.model.localMountPath }}
|
||
# echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||
# wget https://hf-mirror.com/hfd/hfd.sh
|
||
# chmod a+x hfd.sh
|
||
# apt update && apt upgrade
|
||
# apt install aria2 -y
|
||
# ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||
# # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||
# else
|
||
# echo "Model already exists at $DEST_DIR"
|
||
# fi
|
||
SUCCESS_FLAG="${DEST_DIR}/.success_flag"
|
||
if [ -f "$SUCCESS_FLAG" ]; then
|
||
echo "✅ Success flag found. Skipping download."
|
||
exit 0
|
||
fi
|
||
echo "⬇️ Starting download..."
|
||
apk add --no-cache bash aria2 wget ca-certificates curl
|
||
wget https://hf-mirror.com/hfd/hfd.sh -O hfd.sh && chmod +x hfd.sh
|
||
./hfd.sh {{ .Values.model.huggingfaceName }} --tool aria2c -x 8 --local-dir "$DEST_DIR"
|
||
|
||
touch "$SUCCESS_FLAG"
|
||
echo "🎉 Done."
|
||
volumeMounts:
|
||
- name: weight-volume
|
||
mountPath: {{ .Values.model.localMountPath }}
|
||
containers:
|
||
- name: vllm-leader
|
||
image: {{ .Values.vllm.image }}
|
||
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
||
securityContext:
|
||
capabilities:
|
||
add: [ "IPC_LOCK" ]
|
||
env:
|
||
# - name: HUGGING_FACE_HUB_TOKEN
|
||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||
- name: GLOO_SOCKET_IFNAME
|
||
value: {{ .Values.rdma.interface | default "eth0" | quote }}
|
||
- name: NCCL_SOCKET_IFNAME
|
||
value: {{ .Values.rdma.interface | default "eth0" | quote }}
|
||
- name: RAY_DEDUP_LOGS
|
||
value: "0"
|
||
- name: NCCL_DEBUG
|
||
value: INFO
|
||
|
||
# RDMA 条件配置
|
||
{{- if .Values.rdma.enabled }}
|
||
- name: NCCL_IB_DISABLE
|
||
value: "0"
|
||
- name: NCCL_IB_HCA
|
||
value: {{ .Values.rdma.hca | default "^mlx5" | quote }}
|
||
- name: NCCL_IB_GID_INDEX
|
||
value: {{ .Values.rdma.gidIndex | default "0" | quote }} # 或 "7",根据你的网络配置而定
|
||
{{- else }}
|
||
# 如果未开启 RDMA,显式禁用 IB,防止 NCCL 尝试探测报错
|
||
- name: NCCL_IB_DISABLE
|
||
value: "1"
|
||
{{- end }}
|
||
command:
|
||
- sh
|
||
- -c
|
||
- |
|
||
# 1. 自动 RDMA 探测逻辑 (无论下方跑什么命令,先执行这段)
|
||
# =======================================================
|
||
# echo "🔍 [Init] Detecting RDMA devices..."
|
||
# if [ -d "/sys/class/infiniband" ] && [ "$(ls -A /sys/class/infiniband)" ]; then
|
||
# echo "✅ [Init] RDMA devices found. Enabling NCCL IB."
|
||
# export NCCL_IB_DISABLE=0
|
||
# # 如果环境变量没指定 HCA,默认使用 ^mlx5 匹配
|
||
# export NCCL_IB_HCA=${NCCL_IB_HCA:-"^mlx5"}
|
||
# else
|
||
# echo "⚠️ [Init] No RDMA devices found. Falling back to TCP."
|
||
# export NCCL_IB_DISABLE=1
|
||
# export NCCL_NET_GDR_LEVEL=0
|
||
# fi
|
||
# echo "🚀 [Init] RDMA setup complete. NCCL_IB_DISABLE=$NCCL_IB_DISABLE"
|
||
|
||
{{- if .Values.command }}
|
||
bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); {{ .Values.command }} --distributed-executor-backend ray
|
||
{{- else }}
|
||
bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||
python3 -m vllm.entrypoints.openai.api_server --port 8000 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --distributed-executor-backend ray --trust_remote_code
|
||
{{- end }}
|
||
resources:
|
||
limits:
|
||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||
memory: {{ .Values.resources.memoryLimit }}
|
||
ephemeral-storage: 10Gi
|
||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||
{{- end }}
|
||
{{- if .Values.rdma.enabled }}
|
||
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
|
||
{{- end }}
|
||
requests:
|
||
ephemeral-storage: 10Gi
|
||
cpu: {{ .Values.resources.cpuRequest }}
|
||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||
{{- end }}
|
||
{{- if .Values.rdma.enabled }}
|
||
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
|
||
{{- end }}
|
||
ports:
|
||
- containerPort: 8000
|
||
name: http
|
||
readinessProbe:
|
||
# tcpSocket:
|
||
httpGet:
|
||
path: /health
|
||
port: 8000
|
||
initialDelaySeconds: 120
|
||
periodSeconds: 20
|
||
timeoutSeconds: 5
|
||
volumeMounts:
|
||
- mountPath: /dev/shm
|
||
name: dshm
|
||
- name: weight-volume
|
||
mountPath: {{ .Values.model.localMountPath }}
|
||
volumes:
|
||
- name: dshm
|
||
emptyDir:
|
||
medium: Memory
|
||
sizeLimit: {{ .Values.resources.shmSize }}
|
||
- name: weight-volume
|
||
persistentVolumeClaim:
|
||
claimName: {{ .Release.Name }}-pvc-model
|
||
{{- with .Values.nodeSelector }}
|
||
nodeSelector:
|
||
{{- toYaml . | nindent 10 }}
|
||
{{- end }}
|
||
{{- with .Values.affinity }}
|
||
affinity:
|
||
{{- toYaml . | nindent 10 }}
|
||
{{- end }}
|
||
{{- with .Values.tolerations }}
|
||
tolerations:
|
||
{{- toYaml . | nindent 10 }}
|
||
{{- end }}
|
||
workerTemplate:
|
||
spec:
|
||
containers:
|
||
- name: vllm-worker
|
||
image: {{ .Values.vllm.image }}
|
||
imagePullPolicy: IfNotPresent
|
||
securityContext:
|
||
capabilities:
|
||
add: [ "IPC_LOCK" ]
|
||
command:
|
||
- sh
|
||
- -c
|
||
- |
|
||
# 1. 自动 RDMA 探测逻辑
|
||
# =======================================================
|
||
# echo "🔍 [Init] Detecting RDMA devices..."
|
||
# if [ -d "/sys/class/infiniband" ] && [ "$(ls -A /sys/class/infiniband)" ]; then
|
||
# echo "✅ [Init] RDMA devices found. Enabling NCCL IB."
|
||
# export NCCL_IB_DISABLE=0
|
||
# export NCCL_IB_HCA=${NCCL_IB_HCA:-"^mlx5"}
|
||
# else
|
||
# echo "⚠️ [Init] No RDMA devices found. Falling back to TCP."
|
||
# export NCCL_IB_DISABLE=1
|
||
# export NCCL_NET_GDR_LEVEL=0
|
||
# fi
|
||
bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)
|
||
resources:
|
||
limits:
|
||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||
memory: {{ .Values.resources.memoryLimit }}
|
||
ephemeral-storage: 10Gi
|
||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||
{{- end }}
|
||
{{- if .Values.rdma.enabled }}
|
||
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
|
||
{{- end }}
|
||
requests:
|
||
ephemeral-storage: 10Gi
|
||
cpu: {{ .Values.resources.cpuRequest }}
|
||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||
{{- end }}
|
||
{{- if .Values.rdma.enabled }}
|
||
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
|
||
{{- end }}
|
||
env:
|
||
- name: GLOO_SOCKET_IFNAME
|
||
value: {{ .Values.rdma.interface | default "eth0" | quote }}
|
||
- name: NCCL_SOCKET_IFNAME
|
||
value: {{ .Values.rdma.interface | default "eth0" | quote }}
|
||
- name: NCCL_DEBUG
|
||
value: INFO
|
||
- name: RAY_DEDUP_LOGS
|
||
value: "0"
|
||
|
||
{{- if .Values.rdma.enabled }}
|
||
- name: NCCL_IB_DISABLE
|
||
value: "0"
|
||
- name: NCCL_IB_HCA
|
||
value: {{ .Values.rdma.hca | default "^mlx5" | quote }}
|
||
- name: NCCL_IB_GID_INDEX
|
||
value: {{ .Values.rdma.gidIndex | default "0" | quote }}
|
||
{{- else }}
|
||
- name: NCCL_IB_DISABLE
|
||
value: "1"
|
||
{{- end }}
|
||
|
||
volumeMounts:
|
||
- mountPath: /dev/shm
|
||
name: dshm
|
||
- name: weight-volume
|
||
mountPath: {{ .Values.model.localMountPath }}
|
||
volumes:
|
||
- name: dshm
|
||
emptyDir:
|
||
medium: Memory
|
||
sizeLimit: {{ .Values.resources.shmSize }}
|
||
- name: weight-volume
|
||
persistentVolumeClaim:
|
||
claimName: {{ .Release.Name }}-pvc-model
|
||
{{- with .Values.nodeSelector }}
|
||
nodeSelector:
|
||
{{- toYaml . | nindent 10 }}
|
||
{{- end }}
|
||
{{- with .Values.affinity }}
|
||
affinity:
|
||
{{- toYaml . | nindent 10 }}
|
||
{{- end }}
|
||
{{- with .Values.tolerations }}
|
||
tolerations:
|
||
{{- toYaml . | nindent 10 }}
|
||
{{- end }}
|
||
{{- end }}
|