first backup of charts

2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions
--- a/llama-factory/llama-factory/.helmignore
+++ b/llama-factory/llama-factory/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/llama-factory/llama-factory/Chart.yaml
+++ b/llama-factory/llama-factory/Chart.yaml
@ -0,0 +1,25 @@
+apiVersion: v2
+name: Llama-factory
+description: A Helm chart for deploying vLLM with NFS storage
+annotations:
+  "helm.sh/resource-policy": keep  # 防止资源被意外删除
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/llama-factory/llama-factory/templates/llama.yaml
+++ b/llama-factory/llama-factory/templates/llama.yaml
@ -0,0 +1,159 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama-factory") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: llamafactory
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy:  IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: llama-leader
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : USE_RAY
+                value: "1"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            command:
+              - sh
+              - -c
+              -  "llamafactory-cli webui"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 7860
+                name: http
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: llama-worker
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "echo $(LWS_LEADER_ADDRESS);
+                bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/llama-factory/llama-factory/templates/nfs-pv.yaml
+++ b/llama-factory/llama-factory/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Values.app }}-pv-model
+spec:
+  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
+  capacity:
+    storage: {{ .Values.nfs.pvSize }}
+  accessModes:
+    - ReadWriteMany
+  persistentVolumeReclaimPolicy: Retain
+  nfs:
+    path: {{ .Values.nfs.path }}
+    server: {{ .Values.nfs.server }}
--- a/llama-factory/llama-factory/templates/nfs-pvc.yaml
+++ b/llama-factory/llama-factory/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.app }}-pvc-model
+  annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  volumeName: {{ .Values.app }}-pv-model 
--- a/llama-factory/llama-factory/templates/services.yaml
+++ b/llama-factory/llama-factory/templates/services.yaml
@ -0,0 +1,33 @@
+#apiVersion: v1
+#kind: Service
+#metadata:
+#  name: infer-leader-loadbalancer
+#spec:
+#  type: LoadBalancer
+#  selector:
+#    leaderworkerset.sigs.k8s.io/name: infer
+#    role: leader
+#  ports:
+#    - protocol: TCP
+#      port: 8080
+#      targetPort: 8080
+#
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Values.app }}-leader-nodeport
+spec:
+  type: NodePort
+  {{- if gt (int .Values.workerSize) 1 }}
+  selector:
+    leaderworkerset.sigs.k8s.io/name: llamafactory 
+    role: leader
+  {{- else }}
+  selector:
+    app: llama-factory 
+  {{- end }}
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 7860
--- a/llama-factory/llama-factory/templates/single.yaml
+++ b/llama-factory/llama-factory/templates/single.yaml
@ -0,0 +1,51 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: llama-factory  
+  template:
+    metadata:
+      labels:
+        app: llama-factory
+    spec:
+      containers:
+      - name: llama-factory
+        image: {{ .Values.llama.image }}
+        imagePullPolicy:  IfNotPresent
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - "llamafactory-cli webui"
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+            memory: {{ .Values.resources.memoryLimit }}
+            ephemeral-storage: 10Gi
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.resources.cpuRequest }}
+        ports:
+        - containerPort: 7860 
+          name: http
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.resources.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: {{ .Values.app}}-pvc-model
+{{- end }}
--- a/llama-factory/llama-factory/values.yaml
+++ b/llama-factory/llama-factory/values.yaml
@ -0,0 +1,44 @@
+# Default values for vllm-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# 模型配置
+model:
+  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
+  localMountPath: "/Model"                  # PVC 固定挂载路径
+  huggingfaceToken: "<your-hf-token>"
+  download:
+    enabled: false                                  # 启用自动下载
+    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
+
+# 功能选择
+app: "llama-factory"
+
+resources:
+  gpuLimit: 1
+  cpuRequest: 8
+  memoryLimit: "16Gi"
+  shmSize: "20Gi"
+
+
+llama:
+  image: "docker.io/library/one-click:v1"
+
+
+# NFS PV/PVC 配置
+nfs:
+  server: "10.6.80.11"
+  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
+  storageClass: "local-path"
+  pvSize: "500Gi"
+  pvcSize: "50Gi"
+
+# LeaderWorkerSet 配置
+replicaCount: 1
+workerSize: 2
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
--- a/llama-factory/metadata.yaml
+++ b/llama-factory/metadata.yaml
@ -0,0 +1,53 @@
+
+application_name: &application_name llama-factory
+
+distributed:
+  method: helm
+  release_name: *application_name
+  chart: llama-factory
+  sets:
+    app: llama-factory
+    model:
+      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 8
+      memoryLimit: "16Gi"
+      shmSize: "15Gi"
+    llama:
+      image: "docker.io/library/one-click:v1"
+    workerSize: 2
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30080
+    url: ~
+  pod:
+    name: llamafactory
+monolithic:
+  method: helm
+  release_name: *application_name
+  chart: llama-factory
+  sets:
+    app: llama-factory
+    model:
+      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 8
+      memoryLimit: "16Gi"
+      shmSize: "15Gi"
+    llama:
+      image: "docker.io/library/one-click:v1"
+    workerSize: 1
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30080
+    url: ~
+  pod:
+    name: llama-factory