{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
  name: infer 
spec:
  replicas: {{ .Values.replicaCount }}
  leaderWorkerTemplate:
    size: {{ .Values.workerSize }}
    restartPolicy: RecreateGroupOnPodRestart
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        initContainers:
        # 模型下载作为第一个 initContainer
        - name: download-model
          image: {{ .Values.model.download.image }}
          imagePullPolicy: IfNotPresent
          env:
            - name: HF_ENDPOINT
              value: https://hf-mirror.com
            - name: HUGGING_FACE_HUB_TOKEN
              value: {{ .Values.model.huggingfaceToken }}
          command:
            - sh
            - -c
            - |
              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
              # 检查模型是否存在，不存在则下载
              echo "DEST_DIR= $DEST_DIR"
              ls $DEST_DIR
              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
              if [ ! -f "$DEST_DIR/config.json" ]; then
                ls -l {{ .Values.model.localMountPath }}
                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
                wget https://hf-mirror.com/hfd/hfd.sh 
                chmod a+x hfd.sh
                apt install aria2 -y
                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              else
                echo "Model already exists at $DEST_DIR"
              fi
          volumeMounts:
          - name: weight-volume
            mountPath: {{ .Values.model.localMountPath }}
        containers:
          - name: lmdeploy-leader
            image: {{ .Values.lmdeploy.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
              - name : LMDEPLOY_EXECUTOR_BACKEND
                value: "ray"
            command:
              - sh
              - -c
              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
                lmdeploy serve api_server $MODEL_PATH  --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }}))  --server-port 8080 --cache-max-entry-count 0.9"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            ports:
              - containerPort: 8080
                name: http
            readinessProbe:
              tcpSocket:
                #httpGet:
                #path: /health
                port: 8080
              initialDelaySeconds: 120
              periodSeconds: 20
              timeoutSeconds: 5
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath:  {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: nfs-pvc-model
    workerTemplate:
      spec:
        containers:
          - name: lmdeploy-worker
            image: {{ .Values.lmdeploy.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            command:
              - sh
              - -c
              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.lmdeploy.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
              - name : LMDEPLOY_EXECUTOR_BACKEND
                value: "ray"
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath: {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: nfs-pvc-model
{{- end }}