compatible with two clusters
This commit is contained in:
@ -28,7 +28,7 @@ spec:
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
@ -37,6 +37,7 @@ spec:
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt update && apt upgrade
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
@ -58,9 +59,25 @@ spec:
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
echo 'Using single node ------------------------------------------';
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
#args:
|
||||
# {{- if .Values.command }}
|
||||
# - {{ .Values.command | quote }}
|
||||
# {{- else }}
|
||||
# - |
|
||||
# MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
||||
# MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||||
# # 注意:这里引用变量时不再使用引号包围整个命令块
|
||||
# python3 -m vllm.entrypoints.openai.api_server \
|
||||
# --port 8080 \
|
||||
# --model $MODEL_PATH \
|
||||
# --tensor-parallel-size {{ .Values.resources.gpuLimit }} \
|
||||
# --pipeline_parallel_size {{ .Values.workerSize }} \
|
||||
# --trust_remote_code
|
||||
# {{- end }}
|
||||
- "
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
||||
MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
@ -92,7 +109,11 @@ spec:
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Release.Name }}-pvc-model
|
||||
claimName: {{ .Release.Name }}-pvc-model
|
||||
# - name: weight-volume
|
||||
# nfs:
|
||||
# path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||
# server: "10.6.80.11"
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
|
||||
Reference in New Issue
Block a user