Compare commits

...

3 Commits

Author SHA1 Message Date
210c967135 Merge pull request 'fix: bugs for interuption of downloading models' (#2) from vllm into main
All checks were successful
Publish Helm Charts / helm-publish (push) Successful in 25s
Reviewed-on: #2
2025-12-02 07:49:00 +00:00
9e08afdcb2 build oci for vllm 0.3.1 2025-12-02 15:46:55 +08:00
fccbb10208 fix: bugs for interuption of downloading model 2025-12-02 15:43:05 +08:00
4 changed files with 61 additions and 35 deletions

View File

@ -1,4 +1,4 @@
# List each Helm chart directory to package and push (one per line)
# vllm-serve
vllm-serve
# code-server-chart
open-webui
# open-webui

View File

@ -5,4 +5,4 @@ appVersion: 1.16.0
description: A Helm chart for deploying vLLM with NFS storage
name: vllm-serve
type: application
version: 0.2.0
version: 0.2.1

View File

@ -31,19 +31,31 @@ spec:
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt update && apt upgrade
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
# echo "DEST_DIR= $DEST_DIR"
# if [ ! -f "$DEST_DIR/config.json" ]; then
# ls -l {{ .Values.model.localMountPath }}
# echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
# wget https://hf-mirror.com/hfd/hfd.sh
# chmod a+x hfd.sh
# apt update && apt upgrade
# apt install aria2 -y
# ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# else
# echo "Model already exists at $DEST_DIR"
# fi
SUCCESS_FLAG="${DEST_DIR}/.success_flag"
if [ -f "$SUCCESS_FLAG" ]; then
echo "✅ Success flag found. Skipping download."
exit 0
fi
echo "⬇️ Starting download..."
apk add --no-cache bash aria2 wget ca-certificates
wget https://hf-mirror.com/hfd/hfd.sh -O hfd.sh && chmod +x hfd.sh
./hfd.sh {{ .Values.model.huggingfaceName }} --tool aria2c -x 8 --local-dir "$DEST_DIR"
touch "$SUCCESS_FLAG"
echo "🎉 Done."
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
@ -94,9 +106,9 @@ spec:
- containerPort: 8000
name: http
readinessProbe:
tcpSocket:
#httpGet:
#path: /health
# tcpSocket:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 120
periodSeconds: 20

View File

@ -16,7 +16,7 @@ spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
image: alpine:latest
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
@ -24,33 +24,46 @@ spec:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- /bin/sh
- -c
- |
set -e
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
SUCCESS_FLAG="${DEST_DIR}/.success_flag"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt update && apt upgrade
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
# echo "DEST_DIR= $DEST_DIR"
# if [ ! -f "$DEST_DIR/config.json" ]; then
# ls -l {{ .Values.model.localMountPath }}
# echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
# wget https://hf-mirror.com/hfd/hfd.sh
# chmod a+x hfd.sh
# apt update && apt upgrade
# apt install aria2 -y
# ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# else
# echo "Model already exists at $DEST_DIR"
# fi
if [ -f "$SUCCESS_FLAG" ]; then
echo "✅ Success flag found. Skipping download."
exit 0
fi
echo "⬇️ Starting download..."
apk add --no-cache bash aria2 wget ca-certificates
wget https://hf-mirror.com/hfd/hfd.sh -O hfd.sh && chmod +x hfd.sh
./hfd.sh {{ .Values.model.huggingfaceName }} --tool aria2c -x 8 --local-dir "$DEST_DIR"
touch "$SUCCESS_FLAG"
echo "🎉 Done."
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-pod
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
imagePullPolicy: {{ .Values.imagePullPolicy }}
env:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.vllm.huggingfaceToken }}
@ -81,6 +94,7 @@ spec:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}