ocdp_chart/llama-factory/llama-factory/templates/single.yaml

{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: {{ .Release.Name }}
spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app: llama-factory
  template:
    metadata:
      labels:
        app: llama-factory
    spec:
      containers:
      - name: llama-factory
        image: {{ .Values.llama.image }}
        imagePullPolicy:  IfNotPresent
        env:
        - name: HUGGING_FACE_HUB_TOKEN
          value: {{ .Values.vllm.huggingfaceToken }}
        command:
          - sh
          - -c
          - "llamafactory-cli webui"
        resources:
          limits:
            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
            memory: {{ .Values.resources.memoryLimit }}
            ephemeral-storage: 10Gi
          requests:
            ephemeral-storage: 10Gi
            cpu: {{ .Values.resources.cpuRequest }}
        ports:
        - containerPort: 7860
          name: http
        volumeMounts:
          - mountPath: /dev/shm
            name: dshm
          - name: weight-volume
            mountPath:  {{ .Values.model.localMountPath }}
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory
          sizeLimit: {{ .Values.resources.shmSize }}
      - name: weight-volume
        persistentVolumeClaim:
          claimName: {{ .Values.app}}-pvc-model
{{- end }}