Files
Ivan087 89bc94a6a9
All checks were successful
Publish Helm Charts / helm-publish (push) Successful in 7s
feat: support Hami on k3s and k8s
2025-12-05 17:09:01 +08:00

272 lines
12 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{{- if gt (int .Values.workerSize) 1 }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: alpine:latest
imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
# echo "DEST_DIR= $DEST_DIR"
# if [ ! -f "$DEST_DIR/config.json" ]; then
# ls -l {{ .Values.model.localMountPath }}
# echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
# wget https://hf-mirror.com/hfd/hfd.sh
# chmod a+x hfd.sh
# apt update && apt upgrade
# apt install aria2 -y
# ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# else
# echo "Model already exists at $DEST_DIR"
# fi
SUCCESS_FLAG="${DEST_DIR}/.success_flag"
if [ -f "$SUCCESS_FLAG" ]; then
echo "✅ Success flag found. Skipping download."
exit 0
fi
echo "⬇️ Starting download..."
apk add --no-cache bash aria2 wget ca-certificates curl
wget https://hf-mirror.com/hfd/hfd.sh -O hfd.sh && chmod +x hfd.sh
./hfd.sh {{ .Values.model.huggingfaceName }} --tool aria2c -x 8 --local-dir "$DEST_DIR"
touch "$SUCCESS_FLAG"
echo "🎉 Done."
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: {{ .Values.imagePullPolicy }}
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: {{ .Values.rdma.interface | default "eth0" | quote }}
- name: NCCL_SOCKET_IFNAME
value: {{ .Values.rdma.interface | default "eth0" | quote }}
- name: RAY_DEDUP_LOGS
value: "0"
- name: NCCL_DEBUG
value: INFO
# RDMA 条件配置
{{- if .Values.rdma.enabled }}
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_IB_HCA
value: {{ .Values.rdma.hca | default "^mlx5" | quote }}
- name: NCCL_IB_GID_INDEX
value: {{ .Values.rdma.gidIndex | default "0" | quote }} # 或 "7",根据你的网络配置而定
{{- else }}
# 如果未开启 RDMA显式禁用 IB防止 NCCL 尝试探测报错
- name: NCCL_IB_DISABLE
value: "1"
{{- end }}
command:
- sh
- -c
- |
# 1. 自动 RDMA 探测逻辑 (无论下方跑什么命令,先执行这段)
# =======================================================
# echo "🔍 [Init] Detecting RDMA devices..."
# if [ -d "/sys/class/infiniband" ] && [ "$(ls -A /sys/class/infiniband)" ]; then
# echo "✅ [Init] RDMA devices found. Enabling NCCL IB."
# export NCCL_IB_DISABLE=0
# # 如果环境变量没指定 HCA默认使用 ^mlx5 匹配
# export NCCL_IB_HCA=${NCCL_IB_HCA:-"^mlx5"}
# else
# echo "⚠️ [Init] No RDMA devices found. Falling back to TCP."
# export NCCL_IB_DISABLE=1
# export NCCL_NET_GDR_LEVEL=0
# fi
# echo "🚀 [Init] RDMA setup complete. NCCL_IB_DISABLE=$NCCL_IB_DISABLE"
{{- if .Values.command }}
bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); {{ .Values.command }} --distributed-executor-backend ray
{{- else }}
bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
python3 -m vllm.entrypoints.openai.api_server --port 8000 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --distributed-executor-backend ray --trust_remote_code
{{- end }}
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
{{- if .Values.rdma.enabled }}
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
{{- end }}
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
{{- if .Values.rdma.enabled }}
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
{{- end }}
ports:
- containerPort: 8000
name: http
readinessProbe:
# tcpSocket:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 10 }}
{{- end }}
workerTemplate:
spec:
containers:
- name: vllm-worker
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- |
# 1. 自动 RDMA 探测逻辑
# =======================================================
# echo "🔍 [Init] Detecting RDMA devices..."
# if [ -d "/sys/class/infiniband" ] && [ "$(ls -A /sys/class/infiniband)" ]; then
# echo "✅ [Init] RDMA devices found. Enabling NCCL IB."
# export NCCL_IB_DISABLE=0
# export NCCL_IB_HCA=${NCCL_IB_HCA:-"^mlx5"}
# else
# echo "⚠️ [Init] No RDMA devices found. Falling back to TCP."
# export NCCL_IB_DISABLE=1
# export NCCL_NET_GDR_LEVEL=0
# fi
bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
{{- if .Values.rdma.enabled }}
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
{{- end }}
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
{{- if .Values.rdma.enabled }}
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
{{- end }}
env:
- name: GLOO_SOCKET_IFNAME
value: {{ .Values.rdma.interface | default "eth0" | quote }}
- name: NCCL_SOCKET_IFNAME
value: {{ .Values.rdma.interface | default "eth0" | quote }}
- name: NCCL_DEBUG
value: INFO
- name: RAY_DEDUP_LOGS
value: "0"
{{- if .Values.rdma.enabled }}
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_IB_HCA
value: {{ .Values.rdma.hca | default "^mlx5" | quote }}
- name: NCCL_IB_GID_INDEX
value: {{ .Values.rdma.gidIndex | default "0" | quote }}
{{- else }}
- name: NCCL_IB_DISABLE
value: "1"
{{- end }}
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- end }}