142 lines
5.2 KiB
YAML
142 lines
5.2 KiB
YAML
{{- if eq (int .Values.workerSize) 1 }}
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: {{ .Release.Name }}
|
|
spec:
|
|
replicas: {{ .Values.replicaCount }}
|
|
selector:
|
|
matchLabels:
|
|
app: {{ .Release.Name }}
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: {{ .Release.Name }}
|
|
spec:
|
|
initContainers:
|
|
# 模型下载作为第一个 initContainer
|
|
- name: download-model
|
|
image: alpine:latest
|
|
imagePullPolicy: IfNotPresent
|
|
env:
|
|
- name: HF_ENDPOINT
|
|
value: https://hf-mirror.com
|
|
- name: HUGGING_FACE_HUB_TOKEN
|
|
value: {{ .Values.model.huggingfaceToken }}
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
|
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
|
|
SUCCESS_FLAG="${DEST_DIR}/.success_flag"
|
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
|
# 检查模型是否存在,不存在则下载
|
|
# echo "DEST_DIR= $DEST_DIR"
|
|
# if [ ! -f "$DEST_DIR/config.json" ]; then
|
|
# ls -l {{ .Values.model.localMountPath }}
|
|
# echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
|
# wget https://hf-mirror.com/hfd/hfd.sh
|
|
# chmod a+x hfd.sh
|
|
# apt update && apt upgrade
|
|
# apt install aria2 -y
|
|
# ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
|
# # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
|
# else
|
|
# echo "Model already exists at $DEST_DIR"
|
|
# fi
|
|
if [ -f "$SUCCESS_FLAG" ]; then
|
|
echo "✅ Success flag found. Skipping download."
|
|
exit 0
|
|
fi
|
|
echo "⬇️ Starting download..."
|
|
apk add --no-cache bash aria2 wget ca-certificates
|
|
wget https://hf-mirror.com/hfd/hfd.sh -O hfd.sh && chmod +x hfd.sh
|
|
./hfd.sh {{ .Values.model.huggingfaceName }} --tool aria2c -x 8 --local-dir "$DEST_DIR"
|
|
|
|
touch "$SUCCESS_FLAG"
|
|
echo "🎉 Done."
|
|
volumeMounts:
|
|
- name: weight-volume
|
|
mountPath: {{ .Values.model.localMountPath }}
|
|
containers:
|
|
- name: vllm-pod
|
|
image: {{ .Values.vllm.image }}
|
|
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
|
env:
|
|
- name: HUGGING_FACE_HUB_TOKEN
|
|
value: {{ .Values.vllm.huggingfaceToken }}
|
|
- name: RAY_DEDUP_LOGS
|
|
value: "0"
|
|
command:
|
|
- sh
|
|
- -c
|
|
{{- if .Values.command }}
|
|
- {{ .Values.command | quote }}
|
|
{{- else }}
|
|
- |
|
|
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
|
MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
|
python3 -m vllm.entrypoints.openai.api_server \
|
|
--port 8000 \
|
|
--model $MODEL_PATH \
|
|
--tensor-parallel-size {{ .Values.resources.gpuLimit }} \
|
|
--pipeline_parallel_size {{ .Values.workerSize }} \
|
|
--trust_remote_code
|
|
{{- end }}
|
|
# - "
|
|
# MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
|
# MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
|
# python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
|
memory: {{ .Values.resources.memoryLimit }}
|
|
ephemeral-storage: 10Gi
|
|
cpu: {{ .Values.resources.cpuRequest }}
|
|
requests:
|
|
ephemeral-storage: 10Gi
|
|
cpu: {{ .Values.resources.cpuRequest }}
|
|
ports:
|
|
- containerPort: 8000
|
|
name: http
|
|
readinessProbe:
|
|
#tcpSocket:
|
|
httpGet:
|
|
path: /health
|
|
port: 8000
|
|
initialDelaySeconds: 120
|
|
periodSeconds: 20
|
|
timeoutSeconds: 5
|
|
volumeMounts:
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
- name: weight-volume
|
|
mountPath: {{ .Values.model.localMountPath }}
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
sizeLimit: {{ .Values.resources.shmSize }}
|
|
- name: weight-volume
|
|
persistentVolumeClaim:
|
|
claimName: {{ .Release.Name }}-pvc-model
|
|
# - name: weight-volume
|
|
# nfs:
|
|
# path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
|
# server: "10.6.80.11"
|
|
{{- with .Values.nodeSelector }}
|
|
nodeSelector:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- with .Values.affinity }}
|
|
affinity:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- with .Values.tolerations }}
|
|
tolerations:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- end }}
|