first backup of charts

This commit is contained in:
Ivan087
2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions

23
vllm/vllm-app/.helmignore Normal file
View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

25
vllm/vllm-app/Chart.yaml Normal file
View File

@ -0,0 +1,25 @@
apiVersion: v2
name: vllm-app
description: A Helm chart for deploying vLLM with NFS storage
annotations:
"helm.sh/resource-policy": keep # 防止资源被意外删除
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,165 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: infer
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: llama-leader
image: {{ .Values.llama.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : USE_RAY
value: "1"
# - name : LMDEPLOY_EXECUTOR_BACKEND
# value: "ray"
command:
- sh
- -c
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 7860
name: http
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
workerTemplate:
spec:
containers:
- name: llama-worker
image: {{ .Values.llama.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "echo $(LWS_LEADER_ADDRESS);
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
# - name : LMDEPLOY_EXECUTOR_BACKEND
# value: "ray"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
{{- end }}

View File

@ -0,0 +1,170 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: infer
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: lmdeploy-leader
image: {{ .Values.lmdeploy.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : LMDEPLOY_EXECUTOR_BACKEND
value: "ray"
command:
- sh
- -c
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
lmdeploy serve api_server $MODEL_PATH --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }})) --server-port 8080 --cache-max-entry-count 0.9"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
tcpSocket:
#httpGet:
#path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: nfs-pvc-model
workerTemplate:
spec:
containers:
- name: lmdeploy-worker
image: {{ .Values.lmdeploy.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.lmdeploy.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : LMDEPLOY_EXECUTOR_BACKEND
value: "ray"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: nfs-pvc-model
{{- end }}

View File

@ -0,0 +1,166 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: infer
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
tcpSocket:
#httpGet:
#path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
workerTemplate:
spec:
containers:
- name: vllm-worker
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
{{- end }}

View File

@ -0,0 +1,44 @@
{{- if .Values.model.download.enabled }}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ .Release.Name }}-download-model
annotations:
"helm.sh/hook": pre-install,pre-upgrade # 在安装/升级前执行
"helm.sh/hook-weight": "-10" # 优先执行
"helm.sh/hook-delete-policy": hook-succeeded
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: downloader
image: {{ .Values.model.download.image }}
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
if [ -d "$DEST_DIR" ]; then
echo "Model already exists at $DEST_DIR"
else
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
fi
volumeMounts:
- name: model-storage
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: model-storage
persistentVolumeClaim:
claimName: nfs-pvc-model # 复用之前的 PVC
{{- end }}

View File

@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ .Values.app }}-pv-model
spec:
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
capacity:
storage: {{ .Values.nfs.pvSize }}
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: {{ .Values.nfs.path }}
server: {{ .Values.nfs.server }}

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.app }}-pvc-model
annotations:
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.nfs.pvcSize }}
volumeName: {{ .Values.app }}-pv-model

View File

@ -0,0 +1,39 @@
#apiVersion: v1
#kind: Service
#metadata:
# name: infer-leader-loadbalancer
#spec:
# type: LoadBalancer
# selector:
# leaderworkerset.sigs.k8s.io/name: infer
# role: leader
# ports:
# - protocol: TCP
# port: 8080
# targetPort: 8080
#
---
apiVersion: v1
kind: Service
metadata:
name: {{ .Values.app }}-leader-nodeport
spec:
type: NodePort
{{- if gt (int .Values.workerSize) 1 }}
selector:
leaderworkerset.sigs.k8s.io/name: infer
role: leader
{{- else }}
selector:
app: vllm-app
{{- end }}
ports:
- protocol: TCP
port: 8080
{{- if eq .Values.app "llama" }}
targetPort: 7860
{{- else }}
targetPort: 8080
{{- end }}
nodePort: 30080

View File

@ -0,0 +1,114 @@
{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: vllm
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: vllm-app
template:
metadata:
labels:
app: vllm-app
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
#securityContext:
# capabilities:
# add: [ "IPC_LOCK" ]
env:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.vllm.huggingfaceToken }}
#- name: GLOO_SOCKET_IFNAME
# value: eth0
#- name: NCCL_SOCKET_IFNAME
# value: eth0
#- name: NCCL_IB_DISABLE
# value: "0"
#- name: NCCL_DEBUG
# value: INFO
#- name: NCCL_IB_HCA
# value: mlx5_0:1
#- name: NCCL_IB_GID_INDEX
# value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
echo 'Using single node ------------------------------------------';
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
#rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
#tcpSocket:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app}}-pvc-model
{{- end }}

58
vllm/vllm-app/values.yaml Normal file
View File

@ -0,0 +1,58 @@
# Default values for vllm-app.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# 模型配置
model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
localMountPath: "/Model" # PVC 固定挂载路径
huggingfaceToken: "<your-hf-token>"
download:
enabled: false # 启用自动下载
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
# 功能选择
app: "vllm"
resources:
gpuLimit: 1
cpuRequest: 12
memoryLimit: "16Gi"
shmSize: "20Gi"
# vLLM 应用配置
vllm:
image: "docker.io/vllm/vllm-openai:latest"
#gpuLimit: 2
# cpuRequest: 12
# memoryLimit: "12Gi"
# shmSize: "15Gi"
llama:
image: "docker.io/library/one-click:v1"
# lmdeploy 应用配置
lmdeploy:
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
# gpuLimit: 2
# cpuRequest: 12
# memoryLimit: "12Gi"
# shmSize: "15Gi"
# NFS PV/PVC 配置
nfs:
server: "10.6.80.11"
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
storageClass: "local-path"
pvSize: "500Gi"
pvcSize: "50Gi"
# LeaderWorkerSet 配置
replicaCount: 1
workerSize: 2
nodeSelector: {}
tolerations: []
affinity: {}