first backup of charts

2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions
--- a/vllm/vllm-app/templates/single.yaml
+++ b/vllm/vllm-app/templates/single.yaml
@ -0,0 +1,114 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: vllm-app  
+  template:
+    metadata:
+      labels:
+        app: vllm-app
+    spec:
+      initContainers:
+     # 模型下载作为第一个 initContainer
+      - name: download-model
+        image: {{ .Values.model.download.image }}
+        imagePullPolicy: IfNotPresent
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            # 检查模型是否存在，不存在则下载
+            echo "DEST_DIR= $DEST_DIR"
+            ls $DEST_DIR
+            ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+            if [ ! -f "$DEST_DIR/config.json" ]; then
+              ls -l {{ .Values.model.localMountPath }}
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            else
+              echo "Model already exists at $DEST_DIR"
+            fi
+        volumeMounts:
+        - name: weight-volume
+          mountPath: {{ .Values.model.localMountPath }}
+      containers:
+      - name: vllm-leader
+        image: {{ .Values.vllm.image }}
+        imagePullPolicy:  IfNotPresent
+          #securityContext:
+          #  capabilities:
+          #    add: [ "IPC_LOCK" ]
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+            #- name: GLOO_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_IB_DISABLE
+            #  value: "0"
+            #- name: NCCL_DEBUG
+            #  value: INFO
+            #- name: NCCL_IB_HCA
+            #  value: mlx5_0:1
+            #- name: NCCL_IB_GID_INDEX
+            #  value: "0" # 或 "7"，根据你的网络配置而定
+        - name: RAY_DEDUP_LOGS
+          value: "0"
+        command:
+          - sh
+          - -c
+          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+            echo 'Using single node ------------------------------------------'; 
+            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+            memory: {{ .Values.resources.memoryLimit }}
+            ephemeral-storage: 10Gi
+              #rdma/rdma_shared_device_a: 10
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.resources.cpuRequest }}
+        ports:
+        - containerPort: 8080
+          name: http
+        readinessProbe:
+          #tcpSocket:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 120
+          periodSeconds: 20
+          timeoutSeconds: 5
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.resources.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: {{ .Values.app}}-pvc-model
+{{- end }}