{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }} apiVersion: leaderworkerset.x-k8s.io/v1 kind: LeaderWorkerSet metadata: name: infer spec: replicas: {{ .Values.replicaCount }} leaderWorkerTemplate: size: {{ .Values.workerSize }} restartPolicy: RecreateGroupOnPodRestart leaderTemplate: metadata: labels: role: leader spec: initContainers: # 模型下载作为第一个 initContainer - name: download-model image: {{ .Values.model.download.image }} imagePullPolicy: IfNotPresent env: - name: HF_ENDPOINT value: https://hf-mirror.com - name: HUGGING_FACE_HUB_TOKEN value: {{ .Values.model.huggingfaceToken }} command: - sh - -c - | MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}") DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME" # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}" # 检查模型是否存在,不存在则下载 echo "DEST_DIR= $DEST_DIR" ls $DEST_DIR ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }} if [ ! -f "$DEST_DIR/config.json" ]; then ls -l {{ .Values.model.localMountPath }} echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR" wget https://hf-mirror.com/hfd/hfd.sh chmod a+x hfd.sh apt install aria2 -y ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" else echo "Model already exists at $DEST_DIR" fi volumeMounts: - name: weight-volume mountPath: {{ .Values.model.localMountPath }} containers: - name: lmdeploy-leader image: {{ .Values.lmdeploy.image }} imagePullPolicy: IfNotPresent securityContext: capabilities: add: [ "IPC_LOCK" ] env: # - name: HUGGING_FACE_HUB_TOKEN # value: {{ .Values.vllm.huggingfaceToken }} - name: GLOO_SOCKET_IFNAME value: eth0 - name: NCCL_SOCKET_IFNAME value: eth0 - name: NCCL_IB_DISABLE value: "0" - name: NCCL_DEBUG value: INFO - name: NCCL_IB_HCA value: mlx5_0:1 - name: NCCL_IB_GID_INDEX value: "0" # 或 "7",根据你的网络配置而定 - name: RAY_DEDUP_LOGS value: "0" - name : LMDEPLOY_EXECUTOR_BACKEND value: "ray" command: - sh - -c - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; lmdeploy serve api_server $MODEL_PATH --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }})) --server-port 8080 --cache-max-entry-count 0.9" resources: limits: nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" memory: {{ .Values.resources.memoryLimit }} ephemeral-storage: 10Gi rdma/rdma_shared_device_a: 10 requests: ephemeral-storage: 10Gi cpu: {{ .Values.resources.cpuRequest }} ports: - containerPort: 8080 name: http readinessProbe: tcpSocket: #httpGet: #path: /health port: 8080 initialDelaySeconds: 120 periodSeconds: 20 timeoutSeconds: 5 volumeMounts: - mountPath: /dev/shm name: dshm - name: weight-volume mountPath: {{ .Values.model.localMountPath }} volumes: - name: dshm emptyDir: medium: Memory sizeLimit: {{ .Values.resources.shmSize }} - name: weight-volume persistentVolumeClaim: claimName: nfs-pvc-model workerTemplate: spec: containers: - name: lmdeploy-worker image: {{ .Values.lmdeploy.image }} imagePullPolicy: IfNotPresent securityContext: capabilities: add: [ "IPC_LOCK" ] command: - sh - -c - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)" resources: limits: nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" memory: {{ .Values.resources.memoryLimit }} ephemeral-storage: 10Gi rdma/rdma_shared_device_a: 10 requests: ephemeral-storage: 10Gi cpu: {{ .Values.resources.cpuRequest }} env: # - name: HUGGING_FACE_HUB_TOKEN # value: {{ .Values.lmdeploy.huggingfaceToken }} - name: GLOO_SOCKET_IFNAME value: eth0 - name: NCCL_SOCKET_IFNAME value: eth0 - name: NCCL_IB_DISABLE value: "0" - name: NCCL_DEBUG value: INFO - name: NCCL_IB_HCA value: mlx5_0:1 - name: NCCL_IB_GID_INDEX value: "0" # 或 "7",根据你的网络配置而定 - name: RAY_DEDUP_LOGS value: "0" - name : LMDEPLOY_EXECUTOR_BACKEND value: "ray" volumeMounts: - mountPath: /dev/shm name: dshm - name: weight-volume mountPath: {{ .Values.model.localMountPath }} volumes: - name: dshm emptyDir: medium: Memory sizeLimit: {{ .Values.resources.shmSize }} - name: weight-volume persistentVolumeClaim: claimName: nfs-pvc-model {{- end }}