compatible with two clusters
This commit is contained in:
@ -28,7 +28,7 @@ spec:
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
@ -37,6 +37,7 @@ spec:
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt update && apt upgrade
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
@ -74,7 +75,7 @@ spec:
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
|
||||
Reference in New Issue
Block a user