first backup of charts

2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions
--- a/vllm/metadata.yaml
+++ b/vllm/metadata.yaml
@ -0,0 +1,53 @@
+
+application_name: &application_name vllm
+
+distributed:
+  method: helm
+  release_name: *application_name
+  chart: vllm-app
+  sets:
+    app: vllm
+    model:
+      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 8
+      memoryLimit: "16Gi"
+      shmSize: "15Gi"
+    workerSize: 2
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30080
+    url: ~
+    paths:
+      docs_path: /docs
+      redoc_path: /redoc
+  pod:
+    name: infer-0
+monolithic:
+  method: helm
+  release_name: *application_name
+  chart: vllm-app
+  sets:
+    app: vllm
+    model:
+      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 8
+      memoryLimit: "16Gi"
+      shmSize: "15Gi"
+    workerSize: 1
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30080
+    url: ~
+  pod:
+    name: vllm
+
--- a/vllm/vllm-app/.helmignore
+++ b/vllm/vllm-app/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/vllm/vllm-app/Chart.yaml
+++ b/vllm/vllm-app/Chart.yaml
@ -0,0 +1,25 @@
+apiVersion: v2
+name: vllm-app
+description: A Helm chart for deploying vLLM with NFS storage
+annotations:
+  "helm.sh/resource-policy": keep  # 防止资源被意外删除
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/vllm/vllm-app/templates/llama.yaml
+++ b/vllm/vllm-app/templates/llama.yaml
@ -0,0 +1,165 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy:  IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: llama-leader
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : USE_RAY
+                value: "1"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                 MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+                 llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 7860
+                name: http
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: llama-worker
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "echo $(LWS_LEADER_ADDRESS);
+                bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/vllm/vllm-app/templates/lmdeploy_lws.yaml
+++ b/vllm/vllm-app/templates/lmdeploy_lws.yaml
@ -0,0 +1,170 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer 
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: lmdeploy-leader
+            image: {{ .Values.lmdeploy.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : LMDEPLOY_EXECUTOR_BACKEND
+                value: "ray"
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                lmdeploy serve api_server $MODEL_PATH  --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }}))  --server-port 8080 --cache-max-entry-count 0.9"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: nfs-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: lmdeploy-worker
+            image: {{ .Values.lmdeploy.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.lmdeploy.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : LMDEPLOY_EXECUTOR_BACKEND
+                value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: nfs-pvc-model
+{{- end }}
--- a/vllm/vllm-app/templates/lws.yaml
+++ b/vllm/vllm-app/templates/lws.yaml
@ -0,0 +1,166 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: vllm-leader
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model 
+    workerTemplate:
+      spec:
+        containers:
+          - name: vllm-worker
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/vllm/vllm-app/templates/model-download-job.yaml
+++ b/vllm/vllm-app/templates/model-download-job.yaml
@ -0,0 +1,44 @@
+{{- if .Values.model.download.enabled }}
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ .Release.Name }}-download-model
+  annotations:
+    "helm.sh/hook": pre-install,pre-upgrade  # 在安装/升级前执行
+    "helm.sh/hook-weight": "-10"              # 优先执行
+    "helm.sh/hook-delete-policy": hook-succeeded
+spec:
+  template:
+    spec:
+      restartPolicy: OnFailure
+      containers:
+      - name: downloader
+        image: {{ .Values.model.download.image }}
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            if [ -d "$DEST_DIR" ]; then
+              echo "Model already exists at $DEST_DIR"
+            else
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            fi
+        volumeMounts:
+        - name: model-storage
+          mountPath: {{ .Values.model.localMountPath }}
+      volumes:
+      - name: model-storage
+        persistentVolumeClaim:
+          claimName: nfs-pvc-model  # 复用之前的 PVC
+{{- end }}
--- a/vllm/vllm-app/templates/nfs-pv.yaml
+++ b/vllm/vllm-app/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Values.app }}-pv-model
+spec:
+  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
+  capacity:
+    storage: {{ .Values.nfs.pvSize }}
+  accessModes:
+    - ReadWriteMany
+  persistentVolumeReclaimPolicy: Retain
+  nfs:
+    path: {{ .Values.nfs.path }}
+    server: {{ .Values.nfs.server }}
--- a/vllm/vllm-app/templates/nfs-pvc.yaml
+++ b/vllm/vllm-app/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.app }}-pvc-model
+  annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  volumeName: {{ .Values.app }}-pv-model 
--- a/vllm/vllm-app/templates/services.yaml
+++ b/vllm/vllm-app/templates/services.yaml
@ -0,0 +1,39 @@
+#apiVersion: v1
+#kind: Service
+#metadata:
+#  name: infer-leader-loadbalancer
+#spec:
+#  type: LoadBalancer
+#  selector:
+#    leaderworkerset.sigs.k8s.io/name: infer
+#    role: leader
+#  ports:
+#    - protocol: TCP
+#      port: 8080
+#      targetPort: 8080
+#
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Values.app }}-leader-nodeport
+spec:
+  type: NodePort
+  {{- if gt (int .Values.workerSize) 1 }}
+  selector:
+    leaderworkerset.sigs.k8s.io/name: infer 
+    role: leader
+  {{- else }}
+  selector:
+    app: vllm-app
+  {{- end }}
+  ports:
+    - protocol: TCP
+      port: 8080
+      {{- if eq .Values.app "llama" }}
+      targetPort: 7860
+      {{- else }}
+      targetPort: 8080
+      {{- end }}
+      nodePort: 30080
+
--- a/vllm/vllm-app/templates/single.yaml
+++ b/vllm/vllm-app/templates/single.yaml
@ -0,0 +1,114 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: vllm-app  
+  template:
+    metadata:
+      labels:
+        app: vllm-app
+    spec:
+      initContainers:
+     # 模型下载作为第一个 initContainer
+      - name: download-model
+        image: {{ .Values.model.download.image }}
+        imagePullPolicy: IfNotPresent
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            # 检查模型是否存在，不存在则下载
+            echo "DEST_DIR= $DEST_DIR"
+            ls $DEST_DIR
+            ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+            if [ ! -f "$DEST_DIR/config.json" ]; then
+              ls -l {{ .Values.model.localMountPath }}
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            else
+              echo "Model already exists at $DEST_DIR"
+            fi
+        volumeMounts:
+        - name: weight-volume
+          mountPath: {{ .Values.model.localMountPath }}
+      containers:
+      - name: vllm-leader
+        image: {{ .Values.vllm.image }}
+        imagePullPolicy:  IfNotPresent
+          #securityContext:
+          #  capabilities:
+          #    add: [ "IPC_LOCK" ]
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+            #- name: GLOO_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_IB_DISABLE
+            #  value: "0"
+            #- name: NCCL_DEBUG
+            #  value: INFO
+            #- name: NCCL_IB_HCA
+            #  value: mlx5_0:1
+            #- name: NCCL_IB_GID_INDEX
+            #  value: "0" # 或 "7"，根据你的网络配置而定
+        - name: RAY_DEDUP_LOGS
+          value: "0"
+        command:
+          - sh
+          - -c
+          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+            echo 'Using single node ------------------------------------------'; 
+            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+            memory: {{ .Values.resources.memoryLimit }}
+            ephemeral-storage: 10Gi
+              #rdma/rdma_shared_device_a: 10
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.resources.cpuRequest }}
+        ports:
+        - containerPort: 8080
+          name: http
+        readinessProbe:
+          #tcpSocket:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 120
+          periodSeconds: 20
+          timeoutSeconds: 5
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.resources.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: {{ .Values.app}}-pvc-model
+{{- end }}
--- a/vllm/vllm-app/values.yaml
+++ b/vllm/vllm-app/values.yaml
@ -0,0 +1,58 @@
+# Default values for vllm-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# 模型配置
+model:
+  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
+  localMountPath: "/Model"                  # PVC 固定挂载路径
+  huggingfaceToken: "<your-hf-token>"
+  download:
+    enabled: false                                  # 启用自动下载
+    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
+
+# 功能选择
+app: "vllm"
+
+resources:
+  gpuLimit: 1
+  cpuRequest: 12
+  memoryLimit: "16Gi"
+  shmSize: "20Gi"
+
+# vLLM 应用配置
+vllm:
+  image: "docker.io/vllm/vllm-openai:latest"
+    #gpuLimit: 2 
+    # cpuRequest: 12
+    # memoryLimit: "12Gi"
+    # shmSize: "15Gi"
+
+llama:
+  image: "docker.io/library/one-click:v1"
+
+# lmdeploy  应用配置
+lmdeploy:
+  image: "docker.io/openmmlab/lmdeploy:latest-cu12"
+    #  gpuLimit: 2
+    #  cpuRequest: 12
+    #  memoryLimit: "12Gi"
+    #  shmSize: "15Gi"
+
+# NFS PV/PVC 配置
+nfs:
+  server: "10.6.80.11"
+  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
+  storageClass: "local-path"
+  pvSize: "500Gi"
+  pvcSize: "50Gi"
+
+# LeaderWorkerSet 配置
+replicaCount: 1
+workerSize: 2
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
--- a/vllm/vllm-serve/.helmignore
+++ b/vllm/vllm-serve/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/vllm/vllm-serve/Chart.yaml
+++ b/vllm/vllm-serve/Chart.yaml
@ -0,0 +1,25 @@
+apiVersion: v2
+name: vllm-serve
+description: A Helm chart for deploying vLLM with NFS storage
+annotations:
+  "helm.sh/resource-policy": keep  # 防止资源被意外删除
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/vllm/vllm-serve/templates/NOTES.txt
+++ b/vllm/vllm-serve/templates/NOTES.txt
@ -0,0 +1,16 @@
+1. Get the application URL by running these commands:
+{{- if contains "NodePort" .Values.svc.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
--- a/vllm/vllm-serve/templates/_helpers.tpl
+++ b/vllm/vllm-serve/templates/_helpers.tpl
@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "vllm-serve.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "vllm-serve.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "vllm-serve.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "vllm-serve.labels" -}}
+helm.sh/chart: {{ include "vllm-serve.chart" . }}
+{{ include "vllm-serve.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "vllm-serve.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "vllm-serve.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "vllm-serve.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "vllm-serve.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
--- a/vllm/vllm-serve/templates/lws.yaml
+++ b/vllm/vllm-serve/templates/lws.yaml
@ -0,0 +1,188 @@
+{{- if gt (int .Values.workerSize) 1 }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: {{ .Release.Name }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: vllm-leader
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Release.Name }}-pvc-model 
+        {{- with .Values.nodeSelector }}
+        nodeSelector:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.affinity }}
+        affinity:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.tolerations }}
+        tolerations:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+    workerTemplate:
+      spec:
+        containers:
+          - name: vllm-worker
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Release.Name }}-pvc-model
+        {{- with .Values.nodeSelector }}
+        nodeSelector:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.affinity }}
+        affinity:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.tolerations }}
+        tolerations:
+          {{- toYaml . | nindent 10 }}
+        {{- end }} 
+{{- end }}
--- a/vllm/vllm-serve/templates/nfs-pvc.yaml
+++ b/vllm/vllm-serve/templates/nfs-pvc.yaml
@ -0,0 +1,28 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Release.Name }}-pv-model
+spec:
+  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
+  capacity:
+    storage: {{ .Values.nfs.pvSize }}
+  accessModes:
+    - ReadWriteMany
+  persistentVolumeReclaimPolicy: Retain
+  nfs:
+    path: {{ .Values.nfs.path }}
+    server: {{ .Values.nfs.server }}
+---
+
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-pvc-model
+  annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  volumeName: {{ .Release.Name }}-pv-model 
--- a/vllm/vllm-serve/templates/services.yaml
+++ b/vllm/vllm-serve/templates/services.yaml
@ -0,0 +1,35 @@
+#apiVersion: v1
+#kind: Service
+#metadata:
+#  name: infer-leader-loadbalancer
+#spec:
+#  type: LoadBalancer
+#  selector:
+#    leaderworkerset.sigs.k8s.io/name: infer
+#    role: leader
+#  ports:
+#    - protocol: TCP
+#      port: 8080
+#      targetPort: 8080
+#
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Release.Name }}-svc
+spec:
+  type: {{ .Values.svc.type | default "NodePort" }}
+  {{- if gt (int .Values.workerSize) 1 }}
+  selector:
+    leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }} 
+    role: leader
+  {{- else }}
+  selector:
+    app: {{ .Release.Name }}
+  {{- end }}
+  ports:
+    - protocol: TCP
+      port: {{ .Values.svc.port | default 8080 }}
+      targetPort: {{ .Values.svc.port | default 8080 }}
+      nodePort: {{ .Values.svc.nodePort | default 30080 }}
+
--- a/vllm/vllm-serve/templates/single.yaml
+++ b/vllm/vllm-serve/templates/single.yaml
@ -0,0 +1,108 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app:  {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ .Release.Name }}
+    spec:
+      initContainers:
+     # 模型下载作为第一个 initContainer
+      - name: download-model
+        image: {{ .Values.model.download.image }}
+        imagePullPolicy: IfNotPresent
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            # 检查模型是否存在，不存在则下载
+            echo "DEST_DIR= $DEST_DIR"
+            if [ ! -f "$DEST_DIR/config.json" ]; then
+              ls -l {{ .Values.model.localMountPath }}
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            else
+              echo "Model already exists at $DEST_DIR"
+            fi
+        volumeMounts:
+        - name: weight-volume
+          mountPath: {{ .Values.model.localMountPath }}
+      containers:
+      - name: vllm-pod
+        image: {{ .Values.vllm.image }}
+        imagePullPolicy:  IfNotPresent
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+        - name: RAY_DEDUP_LOGS
+          value: "0"
+        command:
+          - sh
+          - -c
+          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+            echo 'Using single node ------------------------------------------'; 
+            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+            memory: {{ .Values.resources.memoryLimit }}
+            ephemeral-storage: 10Gi
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.resources.cpuRequest }}
+        ports:
+        - containerPort: 8080
+          name: http
+        readinessProbe:
+          #tcpSocket:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 120
+          periodSeconds: 20
+          timeoutSeconds: 5
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.resources.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: {{ .Release.Name }}-pvc-model          
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- end }}
--- a/vllm/vllm-serve/values.yaml
+++ b/vllm/vllm-serve/values.yaml
@ -0,0 +1,75 @@
+# Default values for vllm-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
+imagePullSecrets: []
+imagePullPolicy: IfNotPresent
+# This is to override the chart name.
+nameOverride: ""
+fullnameOverride: ""
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name: ""
+  
+  
+# 模型配置
+model:
+  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
+  localMountPath: "/Model"                  # PVC 固定挂载路径
+  huggingfaceToken: "<your-hf-token>"
+  download:                               # 启用自动下载
+    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
+
+# 功能选择
+
+resources:
+  gpuLimit: 1
+  cpuRequest: 12
+  memoryLimit: "16Gi"
+  shmSize: "20Gi"
+
+svc:
+  type: NodePort
+  port: 80 
+  targetPort: 8080 
+  nodePort: 30080 
+# vLLM 应用配置
+vllm:
+  image: "docker.io/vllm/vllm-openai:latest"
+
+
+llama:
+  image: "docker.io/library/one-click:v1"
+
+# lmdeploy  应用配置
+lmdeploy:
+  image: "docker.io/openmmlab/lmdeploy:latest-cu12"
+
+
+# NFS PV/PVC 配置
+nfs:
+  server: "10.6.80.11"
+  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
+  storageClass: "local-path"
+  pvSize: "500Gi"
+  pvcSize: "50Gi"
+
+# LeaderWorkerSet 配置
+replicaCount: 1
+workerSize: 2
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}