{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ .Release.Name }} 
spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app: {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ .Release.Name }}
    spec:
      initContainers:
     # 模型下载作为第一个 initContainer
      - name: download-model
        image: {{ .Values.model.download.image }}
        imagePullPolicy: IfNotPresent
        env:
          - name: HF_ENDPOINT
            value: https://hf-mirror.com
          - name: HUGGING_FACE_HUB_TOKEN
            value: {{ .Values.model.huggingfaceToken }}
        command:
          - sh
          - -c
          - |
            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
            # 检查模型是否存在，不存在则下载
            echo "DEST_DIR= $DEST_DIR"
            ls $DEST_DIR
            ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
            if [ ! -f "$DEST_DIR/config.json" ]; then
              ls -l {{ .Values.model.localMountPath }}
              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
              wget https://hf-mirror.com/hfd/hfd.sh 
              chmod a+x hfd.sh
              apt install aria2 -y
              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
            else
              echo "Model already exists at $DEST_DIR"
            fi
        volumeMounts:
        - name: weight-volume
          mountPath: {{ .Values.model.localMountPath }}
      containers:
      - name: vllm-leader
        image: {{ .Values.vllm.image }}
        imagePullPolicy:  IfNotPresent
          #securityContext:
          #  capabilities:
          #    add: [ "IPC_LOCK" ]
        env:
        - name: HUGGING_FACE_HUB_TOKEN
          value: {{ .Values.vllm.huggingfaceToken }}
            #- name: GLOO_SOCKET_IFNAME
            #  value: eth0
            #- name: NCCL_SOCKET_IFNAME
            #  value: eth0
            #- name: NCCL_IB_DISABLE
            #  value: "0"
            #- name: NCCL_DEBUG
            #  value: INFO
            #- name: NCCL_IB_HCA
            #  value: mlx5_0:1
            #- name: NCCL_IB_GID_INDEX
            #  value: "0" # 或 "7"，根据你的网络配置而定
        - name: RAY_DEDUP_LOGS
          value: "0"
        command:
          - sh
          - -c
          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
            echo 'Using single node ------------------------------------------'; 
            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.vllm.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
        resources:
          limits:
            nvidia.com/gpu: "{{ .Values.vllm.gpuLimit }}"
            memory: {{ .Values.vllm.memoryLimit }}
            ephemeral-storage: 10Gi
              #rdma/rdma_shared_device_a: 10
          requests:
            ephemeral-storage: 10Gi
            cpu: {{ .Values.vllm.cpuRequest }}
        ports:
        - containerPort: 8080
          name: http
        readinessProbe:
          #tcpSocket:
          httpGet:
            path: /health
            port: 8080
          initialDelaySeconds: 120
          periodSeconds: 20
          timeoutSeconds: 5
        volumeMounts:
          - mountPath: /dev/shm
            name: dshm
          - name: weight-volume
            mountPath:  {{ .Values.model.localMountPath }}
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory
          sizeLimit: {{ .Values.vllm.shmSize }}
      - name: weight-volume
        persistentVolumeClaim:
          claimName: nfs-pvc-model
{{- end }}