chore(repo): init helm-charts repo with two charts and automated package/push

2025-11-17 16:50:28 +08:00
commit e8451c0675
28 changed files with 1905 additions and 0 deletions
--- a/vllm-serve/templates/NOTES.txt
+++ b/vllm-serve/templates/NOTES.txt
@ -0,0 +1,16 @@
+1. Get the application URL by running these commands:
+{{- if contains "NodePort" .Values.svc.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.svc.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.svc.port }}
+{{- else if contains "ClusterIP" .Values.svc.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
--- a/vllm-serve/templates/_helpers.tpl
+++ b/vllm-serve/templates/_helpers.tpl
@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "vllm-serve.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "vllm-serve.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "vllm-serve.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "vllm-serve.labels" -}}
+helm.sh/chart: {{ include "vllm-serve.chart" . }}
+{{ include "vllm-serve.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "vllm-serve.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "vllm-serve.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "vllm-serve.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "vllm-serve.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
--- a/vllm-serve/templates/lws.yaml
+++ b/vllm-serve/templates/lws.yaml
@ -0,0 +1,193 @@
+{{- if gt (int .Values.workerSize) 1 }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: {{ .Release.Name }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt update && apt upgrade
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: vllm-leader
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            command:
+              - sh
+              - -c
+              {{- if .Values.command }}
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); {{ .Values.command }}"
+              {{- else }}
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; 
+                python3 -m vllm.entrypoints.openai.api_server --port 8000 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+              {{- end }}
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8000
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8000
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Release.Name }}-pvc-model 
+        {{- with .Values.nodeSelector }}
+        nodeSelector:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.affinity }}
+        affinity:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.tolerations }}
+        tolerations:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+    workerTemplate:
+      spec:
+        containers:
+          - name: vllm-worker
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Release.Name }}-pvc-model
+        {{- with .Values.nodeSelector }}
+        nodeSelector:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.affinity }}
+        affinity:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.tolerations }}
+        tolerations:
+          {{- toYaml . | nindent 10 }}
+        {{- end }} 
+{{- end }}
--- a/vllm-serve/templates/nfs-pvc.yaml
+++ b/vllm-serve/templates/nfs-pvc.yaml
@ -0,0 +1,40 @@
+#apiVersion: v1
+#kind: PersistentVolume
+#metadata:
+#  name: {{ .Release.Name }}-pv-model
+#spec:
+#  storageClassName: weight # {{ .Values.nfs.storageClass | default "local-path" }}
+#  capacity:
+#    storage: {{ .Values.nfs.pvSize }}
+#  accessModes:
+#    - ReadWriteMany
+#  persistentVolumeReclaimPolicy: Retain
+#  #  nfs:
+#  #    path: {{ .Values.nfs.path }}
+#  #    server: {{ .Values.nfs.server }}
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-pvc-model
+  # annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  # volumeName: {{ .Release.Name }}-pv-model 
+  storageClassName: weight 
+
+#apiVersion: v1
+#kind: PersistentVolumeClaim
+#metadata:
+#  name: {{ .Release.Name }}-pvc-model
+#spec:
+#  accessModes:
+#    - ReadWriteMany
+#  resources:
+#    requests:
+#      storage: 20Gi
+#  storageClassName: nas-dataset
--- a/vllm-serve/templates/services.yaml
+++ b/vllm-serve/templates/services.yaml
@ -0,0 +1,35 @@
+#apiVersion: v1
+#kind: Service
+#metadata:
+#  name: infer-leader-loadbalancer
+#spec:
+#  type: LoadBalancer
+#  selector:
+#    leaderworkerset.sigs.k8s.io/name: infer
+#    role: leader
+#  ports:
+#    - protocol: TCP
+#      port: 8080
+#      targetPort: 8080
+#
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Release.Name }}-svc
+spec:
+  type: {{ .Values.svc.type | default "LoadBalancer" }}
+  {{- if gt (int .Values.workerSize) 1 }}
+  selector:
+    leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }} 
+    role: leader
+  {{- else }}
+  selector:
+    app: {{ .Release.Name }}
+  {{- end }}
+  ports:
+    - protocol: TCP
+      port: {{ .Values.svc.port | default 80 }}
+      targetPort: http # {{ .Values.svc.targetPort | default 8080 }}
+      # nodePort: {{ .Values.svc.nodePort | default 30080 }}
+
--- a/vllm-serve/templates/single.yaml
+++ b/vllm-serve/templates/single.yaml
@ -0,0 +1,127 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app:  {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ .Release.Name }}
+    spec:
+      initContainers:
+     # 模型下载作为第一个 initContainer
+      - name: download-model
+        image: {{ .Values.model.download.image }}
+        imagePullPolicy: IfNotPresent
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+            DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
+            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            # 检查模型是否存在，不存在则下载
+            echo "DEST_DIR= $DEST_DIR"
+            if [ ! -f "$DEST_DIR/config.json" ]; then
+              ls -l {{ .Values.model.localMountPath }}
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt update && apt upgrade
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            else
+              echo "Model already exists at $DEST_DIR"
+            fi
+        volumeMounts:
+        - name: weight-volume
+          mountPath: {{ .Values.model.localMountPath }}
+      containers:
+      - name: vllm-pod
+        image: {{ .Values.vllm.image }}
+        imagePullPolicy:  IfNotPresent
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+        - name: RAY_DEDUP_LOGS
+          value: "0"
+        command:
+          - sh
+          - -c
+          {{- if .Values.command }}
+          - {{ .Values.command | quote }}
+          {{- else }}
+          - |
+            MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
+            MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
+            python3 -m vllm.entrypoints.openai.api_server \
+              --port 8000 \
+              --model $MODEL_PATH \
+              --tensor-parallel-size {{ .Values.resources.gpuLimit }} \
+              --pipeline_parallel_size {{ .Values.workerSize }} \
+              --trust_remote_code
+          {{- end }}
+          # - "
+          #   MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); 
+          #   MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; 
+          #   python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code" 
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+            memory: {{ .Values.resources.memoryLimit }}
+            ephemeral-storage: 10Gi
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.resources.cpuRequest }}
+        ports:
+        - containerPort: 8000
+          name: http
+        readinessProbe:
+          #tcpSocket:
+          httpGet:
+            path: /health
+            port: 8000
+          initialDelaySeconds: 120
+          periodSeconds: 20
+          timeoutSeconds: 5
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.resources.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: {{ .Release.Name }}-pvc-model
+      # - name: weight-volume
+      #   nfs:
+      #     path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
+      #     server: "10.6.80.11"        
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- end }}