206 lines
7.8 KiB
YAML
206 lines
7.8 KiB
YAML
{{- if gt (int .Values.workerSize) 1 }}
|
|
apiVersion: leaderworkerset.x-k8s.io/v1
|
|
kind: LeaderWorkerSet
|
|
metadata:
|
|
name: {{ .Release.Name }}
|
|
spec:
|
|
replicas: {{ .Values.replicaCount }}
|
|
leaderWorkerTemplate:
|
|
size: {{ .Values.workerSize }}
|
|
restartPolicy: RecreateGroupOnPodRestart
|
|
leaderTemplate:
|
|
metadata:
|
|
labels:
|
|
role: leader
|
|
spec:
|
|
initContainers:
|
|
# 模型下载作为第一个 initContainer
|
|
- name: download-model
|
|
image: {{ .Values.model.download.image }}
|
|
imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
|
|
env:
|
|
- name: HF_ENDPOINT
|
|
value: https://hf-mirror.com
|
|
- name: HUGGING_FACE_HUB_TOKEN
|
|
value: {{ .Values.model.huggingfaceToken }}
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
|
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
|
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
|
# 检查模型是否存在,不存在则下载
|
|
# echo "DEST_DIR= $DEST_DIR"
|
|
# if [ ! -f "$DEST_DIR/config.json" ]; then
|
|
# ls -l {{ .Values.model.localMountPath }}
|
|
# echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
|
# wget https://hf-mirror.com/hfd/hfd.sh
|
|
# chmod a+x hfd.sh
|
|
# apt update && apt upgrade
|
|
# apt install aria2 -y
|
|
# ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
|
# # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
|
# else
|
|
# echo "Model already exists at $DEST_DIR"
|
|
# fi
|
|
SUCCESS_FLAG="${DEST_DIR}/.success_flag"
|
|
if [ -f "$SUCCESS_FLAG" ]; then
|
|
echo "✅ Success flag found. Skipping download."
|
|
exit 0
|
|
fi
|
|
echo "⬇️ Starting download..."
|
|
apk add --no-cache bash aria2 wget ca-certificates curl
|
|
wget https://hf-mirror.com/hfd/hfd.sh -O hfd.sh && chmod +x hfd.sh
|
|
./hfd.sh {{ .Values.model.huggingfaceName }} --tool aria2c -x 8 --local-dir "$DEST_DIR"
|
|
|
|
touch "$SUCCESS_FLAG"
|
|
echo "🎉 Done."
|
|
volumeMounts:
|
|
- name: weight-volume
|
|
mountPath: {{ .Values.model.localMountPath }}
|
|
containers:
|
|
- name: vllm-leader
|
|
image: {{ .Values.vllm.image }}
|
|
imagePullPolicy: IfNotPresent
|
|
securityContext:
|
|
capabilities:
|
|
add: [ "IPC_LOCK" ]
|
|
env:
|
|
# - name: HUGGING_FACE_HUB_TOKEN
|
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
|
- name: GLOO_SOCKET_IFNAME
|
|
value: eth0
|
|
- name: NCCL_SOCKET_IFNAME
|
|
value: eth0
|
|
- name: NCCL_IB_DISABLE
|
|
value: "0"
|
|
- name: NCCL_DEBUG
|
|
value: INFO
|
|
- name: NCCL_IB_HCA
|
|
value: mlx5_0:1
|
|
- name: NCCL_IB_GID_INDEX
|
|
value: "0" # 或 "7",根据你的网络配置而定
|
|
- name: RAY_DEDUP_LOGS
|
|
value: "0"
|
|
command:
|
|
- sh
|
|
- -c
|
|
{{- if .Values.command }}
|
|
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); {{ .Values.command }}"
|
|
{{- else }}
|
|
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
|
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
|
python3 -m vllm.entrypoints.openai.api_server --port 8000 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
|
{{- end }}
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
|
memory: {{ .Values.resources.memoryLimit }}
|
|
ephemeral-storage: 10Gi
|
|
rdma/rdma_shared_device_a: 10
|
|
requests:
|
|
ephemeral-storage: 10Gi
|
|
cpu: {{ .Values.resources.cpuRequest }}
|
|
ports:
|
|
- containerPort: 8000
|
|
name: http
|
|
readinessProbe:
|
|
# tcpSocket:
|
|
httpGet:
|
|
path: /health
|
|
port: 8000
|
|
initialDelaySeconds: 120
|
|
periodSeconds: 20
|
|
timeoutSeconds: 5
|
|
volumeMounts:
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
- name: weight-volume
|
|
mountPath: {{ .Values.model.localMountPath }}
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
sizeLimit: {{ .Values.resources.shmSize }}
|
|
- name: weight-volume
|
|
persistentVolumeClaim:
|
|
claimName: {{ .Release.Name }}-pvc-model
|
|
{{- with .Values.nodeSelector }}
|
|
nodeSelector:
|
|
{{- toYaml . | nindent 10 }}
|
|
{{- end }}
|
|
{{- with .Values.affinity }}
|
|
affinity:
|
|
{{- toYaml . | nindent 10 }}
|
|
{{- end }}
|
|
{{- with .Values.tolerations }}
|
|
tolerations:
|
|
{{- toYaml . | nindent 10 }}
|
|
{{- end }}
|
|
workerTemplate:
|
|
spec:
|
|
containers:
|
|
- name: vllm-worker
|
|
image: {{ .Values.vllm.image }}
|
|
imagePullPolicy: IfNotPresent
|
|
securityContext:
|
|
capabilities:
|
|
add: [ "IPC_LOCK" ]
|
|
command:
|
|
- sh
|
|
- -c
|
|
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
|
memory: {{ .Values.resources.memoryLimit }}
|
|
ephemeral-storage: 10Gi
|
|
rdma/rdma_shared_device_a: 10
|
|
requests:
|
|
ephemeral-storage: 10Gi
|
|
cpu: {{ .Values.resources.cpuRequest }}
|
|
env:
|
|
# - name: HUGGING_FACE_HUB_TOKEN
|
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
|
- name: GLOO_SOCKET_IFNAME
|
|
value: eth0
|
|
- name: NCCL_SOCKET_IFNAME
|
|
value: eth0
|
|
- name: NCCL_IB_DISABLE
|
|
value: "0"
|
|
- name: NCCL_DEBUG
|
|
value: INFO
|
|
- name: NCCL_IB_HCA
|
|
value: mlx5_0:1
|
|
- name: NCCL_IB_GID_INDEX
|
|
value: "0" # 或 "7",根据你的网络配置而定
|
|
- name: RAY_DEDUP_LOGS
|
|
value: "0"
|
|
volumeMounts:
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
- name: weight-volume
|
|
mountPath: {{ .Values.model.localMountPath }}
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
sizeLimit: {{ .Values.resources.shmSize }}
|
|
- name: weight-volume
|
|
persistentVolumeClaim:
|
|
claimName: {{ .Release.Name }}-pvc-model
|
|
{{- with .Values.nodeSelector }}
|
|
nodeSelector:
|
|
{{- toYaml . | nindent 10 }}
|
|
{{- end }}
|
|
{{- with .Values.affinity }}
|
|
affinity:
|
|
{{- toYaml . | nindent 10 }}
|
|
{{- end }}
|
|
{{- with .Values.tolerations }}
|
|
tolerations:
|
|
{{- toYaml . | nindent 10 }}
|
|
{{- end }}
|
|
{{- end }}
|