109 lines
3.8 KiB
YAML
109 lines
3.8 KiB
YAML
{{- if eq (int .Values.workerSize) 1 }}
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: {{ .Release.Name }}
|
|
spec:
|
|
replicas: {{ .Values.replicaCount }}
|
|
selector:
|
|
matchLabels:
|
|
app: {{ .Release.Name }}
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: {{ .Release.Name }}
|
|
spec:
|
|
initContainers:
|
|
# 模型下载作为第一个 initContainer
|
|
- name: download-model
|
|
image: {{ .Values.model.download.image }}
|
|
imagePullPolicy: IfNotPresent
|
|
env:
|
|
- name: HF_ENDPOINT
|
|
value: https://hf-mirror.com
|
|
- name: HUGGING_FACE_HUB_TOKEN
|
|
value: {{ .Values.model.huggingfaceToken }}
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
|
# 检查模型是否存在,不存在则下载
|
|
echo "DEST_DIR= $DEST_DIR"
|
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
|
ls -l {{ .Values.model.localMountPath }}
|
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
|
wget https://hf-mirror.com/hfd/hfd.sh
|
|
chmod a+x hfd.sh
|
|
apt install aria2 -y
|
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
|
else
|
|
echo "Model already exists at $DEST_DIR"
|
|
fi
|
|
volumeMounts:
|
|
- name: weight-volume
|
|
mountPath: {{ .Values.model.localMountPath }}
|
|
containers:
|
|
- name: vllm-pod
|
|
image: {{ .Values.vllm.image }}
|
|
imagePullPolicy: IfNotPresent
|
|
env:
|
|
- name: HUGGING_FACE_HUB_TOKEN
|
|
value: {{ .Values.vllm.huggingfaceToken }}
|
|
- name: RAY_DEDUP_LOGS
|
|
value: "0"
|
|
command:
|
|
- sh
|
|
- -c
|
|
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
|
echo 'Using single node ------------------------------------------';
|
|
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
|
memory: {{ .Values.resources.memoryLimit }}
|
|
ephemeral-storage: 10Gi
|
|
requests:
|
|
ephemeral-storage: 10Gi
|
|
cpu: {{ .Values.resources.cpuRequest }}
|
|
ports:
|
|
- containerPort: 8080
|
|
name: http
|
|
readinessProbe:
|
|
#tcpSocket:
|
|
httpGet:
|
|
path: /health
|
|
port: 8080
|
|
initialDelaySeconds: 120
|
|
periodSeconds: 20
|
|
timeoutSeconds: 5
|
|
volumeMounts:
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
- name: weight-volume
|
|
mountPath: {{ .Values.model.localMountPath }}
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
sizeLimit: {{ .Values.resources.shmSize }}
|
|
- name: weight-volume
|
|
persistentVolumeClaim:
|
|
claimName: {{ .Release.Name }}-pvc-model
|
|
{{- with .Values.nodeSelector }}
|
|
nodeSelector:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- with .Values.affinity }}
|
|
affinity:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- with .Values.tolerations }}
|
|
tolerations:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- end }}
|