Files
helm-charts/vllm-serve/templates/single.yaml
Ivan087 89bc94a6a9
All checks were successful
Publish Helm Charts / helm-publish (push) Successful in 7s
feat: support Hami on k3s and k8s
2025-12-05 17:09:01 +08:00

148 lines
5.5 KiB
YAML

{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ .Release.Name }}
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: alpine:latest
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- /bin/sh
- -c
- |
set -e
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
SUCCESS_FLAG="${DEST_DIR}/.success_flag"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
# echo "DEST_DIR= $DEST_DIR"
# if [ ! -f "$DEST_DIR/config.json" ]; then
# ls -l {{ .Values.model.localMountPath }}
# echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
# wget https://hf-mirror.com/hfd/hfd.sh
# chmod a+x hfd.sh
# apt update && apt upgrade
# apt install aria2 -y
# ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# else
# echo "Model already exists at $DEST_DIR"
# fi
if [ -f "$SUCCESS_FLAG" ]; then
echo "✅ Success flag found. Skipping download."
exit 0
fi
echo "⬇️ Starting download..."
apk add --no-cache bash aria2 wget ca-certificates curl
wget https://hf-mirror.com/hfd/hfd.sh -O hfd.sh && chmod +x hfd.sh
./hfd.sh {{ .Values.model.huggingfaceName }} --tool aria2c -x 8 --local-dir "$DEST_DIR"
touch "$SUCCESS_FLAG"
echo "🎉 Done."
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-pod
image: {{ .Values.vllm.image }}
imagePullPolicy: {{ .Values.imagePullPolicy }}
env:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.vllm.huggingfaceToken }}
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
{{- if .Values.command }}
- {{ .Values.command | quote }}
{{- else }}
- |
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
python3 -m vllm.entrypoints.openai.api_server \
--port 8000 \
--model $MODEL_PATH \
--tensor-parallel-size {{ .Values.resources.gpuLimit }} \
--pipeline_parallel_size {{ .Values.workerSize }} \
--trust_remote_code
{{- end }}
# - "
# MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
# MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
# python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
ports:
- containerPort: 8000
name: http
readinessProbe:
#tcpSocket:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
# - name: weight-volume
# nfs:
# path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
# server: "10.6.80.11"
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}