first backup of charts

This commit is contained in:
Ivan087
2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions

View File

@ -0,0 +1,114 @@
{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: vllm
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: vllm-app
template:
metadata:
labels:
app: vllm-app
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
#securityContext:
# capabilities:
# add: [ "IPC_LOCK" ]
env:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.vllm.huggingfaceToken }}
#- name: GLOO_SOCKET_IFNAME
# value: eth0
#- name: NCCL_SOCKET_IFNAME
# value: eth0
#- name: NCCL_IB_DISABLE
# value: "0"
#- name: NCCL_DEBUG
# value: INFO
#- name: NCCL_IB_HCA
# value: mlx5_0:1
#- name: NCCL_IB_GID_INDEX
# value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
echo 'Using single node ------------------------------------------';
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
#rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
#tcpSocket:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app}}-pvc-model
{{- end }}