compatible with two clusters
This commit is contained in:
@ -3,12 +3,12 @@
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||
{{- else if contains "LoadBalancer" .Values.svc.type }}
|
||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
echo http://$SERVICE_IP:{{ .Values.svc.port }}
|
||||
{{- else if contains "ClusterIP" .Values.svc.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
|
||||
@ -28,7 +28,7 @@ spec:
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
@ -37,6 +37,7 @@ spec:
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt update && apt upgrade
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
@ -74,7 +75,7 @@ spec:
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
|
||||
@ -1,28 +1,40 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-pv-model
|
||||
spec:
|
||||
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||
capacity:
|
||||
storage: {{ .Values.nfs.pvSize }}
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
nfs:
|
||||
path: {{ .Values.nfs.path }}
|
||||
server: {{ .Values.nfs.server }}
|
||||
#apiVersion: v1
|
||||
#kind: PersistentVolume
|
||||
#metadata:
|
||||
# name: {{ .Release.Name }}-pv-model
|
||||
#spec:
|
||||
# storageClassName: weight # {{ .Values.nfs.storageClass | default "local-path" }}
|
||||
# capacity:
|
||||
# storage: {{ .Values.nfs.pvSize }}
|
||||
# accessModes:
|
||||
# - ReadWriteMany
|
||||
# persistentVolumeReclaimPolicy: Retain
|
||||
# # nfs:
|
||||
# # path: {{ .Values.nfs.path }}
|
||||
# # server: {{ .Values.nfs.server }}
|
||||
---
|
||||
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-pvc-model
|
||||
annotations:
|
||||
# annotations:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.nfs.pvcSize }}
|
||||
volumeName: {{ .Release.Name }}-pv-model
|
||||
# volumeName: {{ .Release.Name }}-pv-model
|
||||
storageClassName: weight
|
||||
|
||||
#apiVersion: v1
|
||||
#kind: PersistentVolumeClaim
|
||||
#metadata:
|
||||
# name: {{ .Release.Name }}-pvc-model
|
||||
#spec:
|
||||
# accessModes:
|
||||
# - ReadWriteMany
|
||||
# resources:
|
||||
# requests:
|
||||
# storage: 20Gi
|
||||
# storageClassName: nas-dataset
|
||||
|
||||
@ -18,7 +18,7 @@ kind: Service
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-svc
|
||||
spec:
|
||||
type: {{ .Values.svc.type | default "NodePort" }}
|
||||
type: {{ .Values.svc.type | default "LoadBalancer" }}
|
||||
{{- if gt (int .Values.workerSize) 1 }}
|
||||
selector:
|
||||
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
|
||||
@ -29,7 +29,7 @@ spec:
|
||||
{{- end }}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: {{ .Values.svc.port | default 8080 }}
|
||||
targetPort: {{ .Values.svc.port | default 8080 }}
|
||||
nodePort: {{ .Values.svc.nodePort | default 30080 }}
|
||||
port: {{ .Values.svc.port | default 80 }}
|
||||
targetPort: {{ .Values.svc.targetPort | default 8080 }}
|
||||
# nodePort: {{ .Values.svc.nodePort | default 30080 }}
|
||||
|
||||
|
||||
@ -28,7 +28,7 @@ spec:
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
@ -37,6 +37,7 @@ spec:
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt update && apt upgrade
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
@ -58,9 +59,25 @@ spec:
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
echo 'Using single node ------------------------------------------';
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
#args:
|
||||
# {{- if .Values.command }}
|
||||
# - {{ .Values.command | quote }}
|
||||
# {{- else }}
|
||||
# - |
|
||||
# MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
||||
# MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||||
# # 注意:这里引用变量时不再使用引号包围整个命令块
|
||||
# python3 -m vllm.entrypoints.openai.api_server \
|
||||
# --port 8080 \
|
||||
# --model $MODEL_PATH \
|
||||
# --tensor-parallel-size {{ .Values.resources.gpuLimit }} \
|
||||
# --pipeline_parallel_size {{ .Values.workerSize }} \
|
||||
# --trust_remote_code
|
||||
# {{- end }}
|
||||
- "
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
||||
MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
@ -92,7 +109,11 @@ spec:
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Release.Name }}-pvc-model
|
||||
claimName: {{ .Release.Name }}-pvc-model
|
||||
# - name: weight-volume
|
||||
# nfs:
|
||||
# path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||
# server: "10.6.80.11"
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
|
||||
Reference in New Issue
Block a user