{{- if eq (int .Values.workerSize) 1 }} apiVersion: apps/v1 kind: Deployment metadata: name: {{ .Release.Name }} spec: replicas: {{ .Values.replicaCount }} selector: matchLabels: app: {{ .Release.Name }} template: metadata: labels: app: {{ .Release.Name }} spec: initContainers: # 模型下载作为第一个 initContainer - name: download-model image: {{ .Values.model.download.image }} imagePullPolicy: IfNotPresent env: - name: HF_ENDPOINT value: https://hf-mirror.com - name: HUGGING_FACE_HUB_TOKEN value: {{ .Values.model.huggingfaceToken }} command: - sh - -c - | MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}") DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME" # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}" # 检查模型是否存在,不存在则下载 echo "DEST_DIR= $DEST_DIR" if [ ! -f "$DEST_DIR/config.json" ]; then ls -l {{ .Values.model.localMountPath }} echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR" wget https://hf-mirror.com/hfd/hfd.sh chmod a+x hfd.sh apt update && apt upgrade apt install aria2 -y ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" else echo "Model already exists at $DEST_DIR" fi volumeMounts: - name: weight-volume mountPath: {{ .Values.model.localMountPath }} containers: - name: vllm-pod image: {{ .Values.vllm.image }} imagePullPolicy: IfNotPresent env: - name: HUGGING_FACE_HUB_TOKEN value: {{ .Values.vllm.huggingfaceToken }} - name: RAY_DEDUP_LOGS value: "0" command: - sh - -c {{- if .Values.command }} - {{ .Values.command | quote }} {{- else }} - | MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; python3 -m vllm.entrypoints.openai.api_server \ --port 8000 \ --model $MODEL_PATH \ --tensor-parallel-size {{ .Values.resources.gpuLimit }} \ --pipeline_parallel_size {{ .Values.workerSize }} \ --trust_remote_code {{- end }} # - " # MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); # MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; # python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code" resources: limits: nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" memory: {{ .Values.resources.memoryLimit }} ephemeral-storage: 10Gi requests: ephemeral-storage: 10Gi cpu: {{ .Values.resources.cpuRequest }} ports: - containerPort: 8000 name: http readinessProbe: #tcpSocket: httpGet: path: /health port: 8000 initialDelaySeconds: 120 periodSeconds: 20 timeoutSeconds: 5 volumeMounts: - mountPath: /dev/shm name: dshm - name: weight-volume mountPath: {{ .Values.model.localMountPath }} volumes: - name: dshm emptyDir: medium: Memory sizeLimit: {{ .Values.resources.shmSize }} - name: weight-volume persistentVolumeClaim: claimName: {{ .Release.Name }}-pvc-model # - name: weight-volume # nfs: # path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight" # server: "10.6.80.11" {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.affinity }} affinity: {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} {{- end }}