compatible with two clusters

This commit is contained in:
mangomqy
2025-11-12 07:26:18 +00:00
parent a158e24d3f
commit 7ad1018f25
7 changed files with 411 additions and 35 deletions

View File

@ -3,12 +3,12 @@
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
{{- else if contains "LoadBalancer" .Values.svc.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
echo http://$SERVICE_IP:{{ .Values.svc.port }}
{{- else if contains "ClusterIP" .Values.svc.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"

View File

@ -28,7 +28,7 @@ spec:
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
@ -37,6 +37,7 @@ spec:
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt update && apt upgrade
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
@ -74,7 +75,7 @@ spec:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:

View File

@ -1,28 +1,40 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ .Release.Name }}-pv-model
spec:
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
capacity:
storage: {{ .Values.nfs.pvSize }}
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: {{ .Values.nfs.path }}
server: {{ .Values.nfs.server }}
#apiVersion: v1
#kind: PersistentVolume
#metadata:
# name: {{ .Release.Name }}-pv-model
#spec:
# storageClassName: weight # {{ .Values.nfs.storageClass | default "local-path" }}
# capacity:
# storage: {{ .Values.nfs.pvSize }}
# accessModes:
# - ReadWriteMany
# persistentVolumeReclaimPolicy: Retain
# # nfs:
# # path: {{ .Values.nfs.path }}
# # server: {{ .Values.nfs.server }}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Release.Name }}-pvc-model
annotations:
# annotations:
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.nfs.pvcSize }}
volumeName: {{ .Release.Name }}-pv-model
# volumeName: {{ .Release.Name }}-pv-model
storageClassName: weight
#apiVersion: v1
#kind: PersistentVolumeClaim
#metadata:
# name: {{ .Release.Name }}-pvc-model
#spec:
# accessModes:
# - ReadWriteMany
# resources:
# requests:
# storage: 20Gi
# storageClassName: nas-dataset

View File

@ -18,7 +18,7 @@ kind: Service
metadata:
name: {{ .Release.Name }}-svc
spec:
type: {{ .Values.svc.type | default "NodePort" }}
type: {{ .Values.svc.type | default "LoadBalancer" }}
{{- if gt (int .Values.workerSize) 1 }}
selector:
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
@ -29,7 +29,7 @@ spec:
{{- end }}
ports:
- protocol: TCP
port: {{ .Values.svc.port | default 8080 }}
targetPort: {{ .Values.svc.port | default 8080 }}
nodePort: {{ .Values.svc.nodePort | default 30080 }}
port: {{ .Values.svc.port | default 80 }}
targetPort: {{ .Values.svc.targetPort | default 8080 }}
# nodePort: {{ .Values.svc.nodePort | default 30080 }}

View File

@ -28,7 +28,7 @@ spec:
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
@ -37,6 +37,7 @@ spec:
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt update && apt upgrade
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
@ -58,9 +59,25 @@ spec:
command:
- sh
- -c
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
echo 'Using single node ------------------------------------------';
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
#args:
# {{- if .Values.command }}
# - {{ .Values.command | quote }}
# {{- else }}
# - |
# MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
# MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
# # 注意:这里引用变量时不再使用引号包围整个命令块
# python3 -m vllm.entrypoints.openai.api_server \
# --port 8080 \
# --model $MODEL_PATH \
# --tensor-parallel-size {{ .Values.resources.gpuLimit }} \
# --pipeline_parallel_size {{ .Values.workerSize }} \
# --trust_remote_code
# {{- end }}
- "
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
@ -92,7 +109,11 @@ spec:
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
claimName: {{ .Release.Name }}-pvc-model
# - name: weight-volume
# nfs:
# path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
# server: "10.6.80.11"
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}