first backup of charts

This commit is contained in:
Ivan087
2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions

View File

@ -0,0 +1,16 @@
1. Get the application URL by running these commands:
{{- if contains "NodePort" .Values.svc.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "vllm-serve.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "vllm-serve.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "vllm-serve.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "vllm-serve.labels" -}}
helm.sh/chart: {{ include "vllm-serve.chart" . }}
{{ include "vllm-serve.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "vllm-serve.selectorLabels" -}}
app.kubernetes.io/name: {{ include "vllm-serve.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "vllm-serve.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "vllm-serve.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,188 @@
{{- if gt (int .Values.workerSize) 1 }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
tcpSocket:
#httpGet:
#path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 10 }}
{{- end }}
workerTemplate:
spec:
containers:
- name: vllm-worker
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,28 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ .Release.Name }}-pv-model
spec:
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
capacity:
storage: {{ .Values.nfs.pvSize }}
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: {{ .Values.nfs.path }}
server: {{ .Values.nfs.server }}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Release.Name }}-pvc-model
annotations:
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.nfs.pvcSize }}
volumeName: {{ .Release.Name }}-pv-model

View File

@ -0,0 +1,35 @@
#apiVersion: v1
#kind: Service
#metadata:
# name: infer-leader-loadbalancer
#spec:
# type: LoadBalancer
# selector:
# leaderworkerset.sigs.k8s.io/name: infer
# role: leader
# ports:
# - protocol: TCP
# port: 8080
# targetPort: 8080
#
---
apiVersion: v1
kind: Service
metadata:
name: {{ .Release.Name }}-svc
spec:
type: {{ .Values.svc.type | default "NodePort" }}
{{- if gt (int .Values.workerSize) 1 }}
selector:
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
role: leader
{{- else }}
selector:
app: {{ .Release.Name }}
{{- end }}
ports:
- protocol: TCP
port: {{ .Values.svc.port | default 8080 }}
targetPort: {{ .Values.svc.port | default 8080 }}
nodePort: {{ .Values.svc.nodePort | default 30080 }}

View File

@ -0,0 +1,108 @@
{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ .Release.Name }}
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-pod
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
env:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.vllm.huggingfaceToken }}
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
echo 'Using single node ------------------------------------------';
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
#tcpSocket:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}