first backup of charts

2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions
--- a/vllm/vllm-app/templates/llama.yaml
+++ b/vllm/vllm-app/templates/llama.yaml
@ -0,0 +1,165 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy:  IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: llama-leader
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : USE_RAY
+                value: "1"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                 MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+                 llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 7860
+                name: http
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: llama-worker
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "echo $(LWS_LEADER_ADDRESS);
+                bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/vllm/vllm-app/templates/lmdeploy_lws.yaml
+++ b/vllm/vllm-app/templates/lmdeploy_lws.yaml
@ -0,0 +1,170 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer 
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: lmdeploy-leader
+            image: {{ .Values.lmdeploy.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : LMDEPLOY_EXECUTOR_BACKEND
+                value: "ray"
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                lmdeploy serve api_server $MODEL_PATH  --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }}))  --server-port 8080 --cache-max-entry-count 0.9"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: nfs-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: lmdeploy-worker
+            image: {{ .Values.lmdeploy.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.lmdeploy.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : LMDEPLOY_EXECUTOR_BACKEND
+                value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: nfs-pvc-model
+{{- end }}
--- a/vllm/vllm-app/templates/lws.yaml
+++ b/vllm/vllm-app/templates/lws.yaml
@ -0,0 +1,166 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: vllm-leader
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model 
+    workerTemplate:
+      spec:
+        containers:
+          - name: vllm-worker
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/vllm/vllm-app/templates/model-download-job.yaml
+++ b/vllm/vllm-app/templates/model-download-job.yaml
@ -0,0 +1,44 @@
+{{- if .Values.model.download.enabled }}
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ .Release.Name }}-download-model
+  annotations:
+    "helm.sh/hook": pre-install,pre-upgrade  # 在安装/升级前执行
+    "helm.sh/hook-weight": "-10"              # 优先执行
+    "helm.sh/hook-delete-policy": hook-succeeded
+spec:
+  template:
+    spec:
+      restartPolicy: OnFailure
+      containers:
+      - name: downloader
+        image: {{ .Values.model.download.image }}
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            if [ -d "$DEST_DIR" ]; then
+              echo "Model already exists at $DEST_DIR"
+            else
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            fi
+        volumeMounts:
+        - name: model-storage
+          mountPath: {{ .Values.model.localMountPath }}
+      volumes:
+      - name: model-storage
+        persistentVolumeClaim:
+          claimName: nfs-pvc-model  # 复用之前的 PVC
+{{- end }}
--- a/vllm/vllm-app/templates/nfs-pv.yaml
+++ b/vllm/vllm-app/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Values.app }}-pv-model
+spec:
+  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
+  capacity:
+    storage: {{ .Values.nfs.pvSize }}
+  accessModes:
+    - ReadWriteMany
+  persistentVolumeReclaimPolicy: Retain
+  nfs:
+    path: {{ .Values.nfs.path }}
+    server: {{ .Values.nfs.server }}
--- a/vllm/vllm-app/templates/nfs-pvc.yaml
+++ b/vllm/vllm-app/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.app }}-pvc-model
+  annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  volumeName: {{ .Values.app }}-pv-model 
--- a/vllm/vllm-app/templates/services.yaml
+++ b/vllm/vllm-app/templates/services.yaml
@ -0,0 +1,39 @@
+#apiVersion: v1
+#kind: Service
+#metadata:
+#  name: infer-leader-loadbalancer
+#spec:
+#  type: LoadBalancer
+#  selector:
+#    leaderworkerset.sigs.k8s.io/name: infer
+#    role: leader
+#  ports:
+#    - protocol: TCP
+#      port: 8080
+#      targetPort: 8080
+#
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Values.app }}-leader-nodeport
+spec:
+  type: NodePort
+  {{- if gt (int .Values.workerSize) 1 }}
+  selector:
+    leaderworkerset.sigs.k8s.io/name: infer 
+    role: leader
+  {{- else }}
+  selector:
+    app: vllm-app
+  {{- end }}
+  ports:
+    - protocol: TCP
+      port: 8080
+      {{- if eq .Values.app "llama" }}
+      targetPort: 7860
+      {{- else }}
+      targetPort: 8080
+      {{- end }}
+      nodePort: 30080
+
--- a/vllm/vllm-app/templates/single.yaml
+++ b/vllm/vllm-app/templates/single.yaml
@ -0,0 +1,114 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: vllm-app  
+  template:
+    metadata:
+      labels:
+        app: vllm-app
+    spec:
+      initContainers:
+     # 模型下载作为第一个 initContainer
+      - name: download-model
+        image: {{ .Values.model.download.image }}
+        imagePullPolicy: IfNotPresent
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            # 检查模型是否存在，不存在则下载
+            echo "DEST_DIR= $DEST_DIR"
+            ls $DEST_DIR
+            ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+            if [ ! -f "$DEST_DIR/config.json" ]; then
+              ls -l {{ .Values.model.localMountPath }}
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            else
+              echo "Model already exists at $DEST_DIR"
+            fi
+        volumeMounts:
+        - name: weight-volume
+          mountPath: {{ .Values.model.localMountPath }}
+      containers:
+      - name: vllm-leader
+        image: {{ .Values.vllm.image }}
+        imagePullPolicy:  IfNotPresent
+          #securityContext:
+          #  capabilities:
+          #    add: [ "IPC_LOCK" ]
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+            #- name: GLOO_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_IB_DISABLE
+            #  value: "0"
+            #- name: NCCL_DEBUG
+            #  value: INFO
+            #- name: NCCL_IB_HCA
+            #  value: mlx5_0:1
+            #- name: NCCL_IB_GID_INDEX
+            #  value: "0" # 或 "7"，根据你的网络配置而定
+        - name: RAY_DEDUP_LOGS
+          value: "0"
+        command:
+          - sh
+          - -c
+          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+            echo 'Using single node ------------------------------------------'; 
+            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+            memory: {{ .Values.resources.memoryLimit }}
+            ephemeral-storage: 10Gi
+              #rdma/rdma_shared_device_a: 10
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.resources.cpuRequest }}
+        ports:
+        - containerPort: 8080
+          name: http
+        readinessProbe:
+          #tcpSocket:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 120
+          periodSeconds: 20
+          timeoutSeconds: 5
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.resources.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: {{ .Values.app}}-pvc-model
+{{- end }}