118 lines
4.0 KiB
YAML
118 lines
4.0 KiB
YAML
{{- if eq (int .Values.workerSize) 1 }}
|
||
apiVersion: apps/v1
|
||
kind: Deployment
|
||
metadata:
|
||
name: {{ .Release.Name }}
|
||
spec:
|
||
replicas: {{ .Values.replicaCount }}
|
||
selector:
|
||
matchLabels:
|
||
app: {{ .Release.Name }}
|
||
template:
|
||
metadata:
|
||
labels:
|
||
app: {{ .Release.Name }}
|
||
spec:
|
||
initContainers:
|
||
# 模型下载作为第一个 initContainer
|
||
- name: download-model
|
||
image: {{ .Values.model.download.image }}
|
||
imagePullPolicy: IfNotPresent
|
||
env:
|
||
- name: HF_ENDPOINT
|
||
value: https://hf-mirror.com
|
||
- name: HUGGING_FACE_HUB_TOKEN
|
||
value: {{ .Values.model.huggingfaceToken }}
|
||
command:
|
||
- sh
|
||
- -c
|
||
- |
|
||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||
# 检查模型是否存在,不存在则下载
|
||
echo "DEST_DIR= $DEST_DIR"
|
||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||
ls -l {{ .Values.model.localMountPath }}
|
||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||
wget https://hf-mirror.com/hfd/hfd.sh
|
||
chmod a+x hfd.sh
|
||
apt install aria2 -y
|
||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||
else
|
||
echo "Model already exists at $DEST_DIR"
|
||
fi
|
||
volumeMounts:
|
||
- name: weight-volume
|
||
mountPath: {{ .Values.model.localMountPath }}
|
||
containers:
|
||
- name: vllm-pod
|
||
image: {{ .Values.vllm.image }}
|
||
imagePullPolicy: IfNotPresent
|
||
env:
|
||
- name: HUGGING_FACE_HUB_TOKEN
|
||
value: {{ .Values.vllm.huggingfaceToken }}
|
||
- name: RAY_DEDUP_LOGS
|
||
value: "0"
|
||
command:
|
||
- /bin/sh
|
||
- -c
|
||
args:
|
||
- |
|
||
{{ .Values.command | default "
|
||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
||
MODEL_PATH='{{ .Values.model.localMountPath }}/$MODEL_NAME';
|
||
python3 -m vllm.entrypoints.openai.api_server \
|
||
--port 8080 \
|
||
--model $MODEL_PATH \
|
||
--tensor-parallel-size {{ .Values.resources.gpuLimit }} \
|
||
--pipeline_parallel_size {{ .Values.workerSize }} \
|
||
--trust_remote_code
|
||
" }}
|
||
resources:
|
||
limits:
|
||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||
memory: {{ .Values.resources.memoryLimit }}
|
||
ephemeral-storage: 10Gi
|
||
requests:
|
||
ephemeral-storage: 10Gi
|
||
cpu: {{ .Values.resources.cpuRequest }}
|
||
ports:
|
||
- containerPort: 8080
|
||
name: http
|
||
readinessProbe:
|
||
#tcpSocket:
|
||
httpGet:
|
||
path: /health
|
||
port: 8080
|
||
initialDelaySeconds: 120
|
||
periodSeconds: 20
|
||
timeoutSeconds: 5
|
||
volumeMounts:
|
||
- mountPath: /dev/shm
|
||
name: dshm
|
||
- name: weight-volume
|
||
mountPath: {{ .Values.model.localMountPath }}
|
||
volumes:
|
||
- name: dshm
|
||
emptyDir:
|
||
medium: Memory
|
||
sizeLimit: {{ .Values.resources.shmSize }}
|
||
- name: weight-volume
|
||
persistentVolumeClaim:
|
||
claimName: {{ .Release.Name }}-pvc-model
|
||
{{- with .Values.nodeSelector }}
|
||
nodeSelector:
|
||
{{- toYaml . | nindent 8 }}
|
||
{{- end }}
|
||
{{- with .Values.affinity }}
|
||
affinity:
|
||
{{- toYaml . | nindent 8 }}
|
||
{{- end }}
|
||
{{- with .Values.tolerations }}
|
||
tolerations:
|
||
{{- toYaml . | nindent 8 }}
|
||
{{- end }}
|
||
{{- end }}
|