helm-charts/open-webui/charts/ollama/templates/knative/service.yaml

---
{{- if .Values.knative.enabled }}
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: {{ include "ollama.fullname" . }}
  namespace: {{ include "ollama.namespace" . }}
  labels:
    {{- include "ollama.labels" . | nindent 4 }}
  {{- with .Values.knative.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
spec:
  template:
    spec:
      containerConcurrency: {{ .Values.knative.containerConcurrency }}
      timeoutSeconds: {{ .Values.knative.timeoutSeconds }}
      responseStartTimeoutSeconds: {{ .Values.knative.responseStartTimeoutSeconds }}
      idleTimeoutSeconds: {{ .Values.knative.idleTimeoutSeconds }}
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "ollama.serviceAccountName" . }}
      {{- if .Values.runtimeClassName }}
      runtimeClassName: {{ .Values.runtimeClassName | quote }}
      {{- end }}
      {{- if .Values.terminationGracePeriodSeconds }}
      terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }}
      {{- end }}
      {{- with .Values.initContainers }}
      initContainers:
        {{- tpl (toYaml . ) $ | nindent 8 }}
      {{- end }}
      containers:
        - image: "{{ .Values.image.repository }}:{{ .Values.image.tag |  default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}
          securityContext:
            {{- toYaml .Values.securityContext | nindent 12 }}
          ports:
            - containerPort: {{ .Values.ollama.port }}
          env:
            - name: OLLAMA_HOST
              value: "0.0.0.0:{{ .Values.ollama.port }}"
            {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}}
            - name: PATH
              value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
            {{- end}}
            {{- with .Values.extraEnv }}
              {{- toYaml . | nindent 12 }}
            {{- end }}
          envFrom:
            {{- with .Values.extraEnvFrom }}
              {{- toYaml . | nindent 12 }}
            {{- end }}
          args:
          {{- with .Values.extraArgs }}
             {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- if .Values.resources }}
          resources:
            {{- $limits := default dict .Values.resources.limits }}
            {{- if .Values.ollama.gpu.enabled }}
              {{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }}
                {{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }}
                {{- $limits = merge $limits $gpuLimit }}
              {{- end }}
              {{- if eq .Values.ollama.gpu.type "amd" }}
                {{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }}
                {{- $limits = merge $limits $gpuLimit }}
              {{- end }}
            {{- end }}
            {{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }}
            {{- toYaml $ressources | nindent 12 }}
          {{- end}}
          volumeMounts:
            - name: ollama-data
              mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }}
              {{- if .Values.persistentVolume.subPath }}
              subPath: {{ .Values.persistentVolume.subPath }}
              {{- end }}
          {{- range .Values.ollama.models.create }}
            {{- if .configMapRef }}
            - name: {{ .name }}-config-model-volume
              mountPath: /models
            {{- end }}
          {{- end }}
          {{- with .Values.volumeMounts }}
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- if .Values.livenessProbe.enabled }}
          livenessProbe:
            httpGet:
              path: {{ .Values.livenessProbe.path }}
              port: http
            initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
            periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
            successThreshold: {{ .Values.livenessProbe.successThreshold }}
            failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
          {{- end }}
          {{- if .Values.readinessProbe.enabled }}
          readinessProbe:
            httpGet:
              path: {{ .Values.readinessProbe.path }}
              port: http
            initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
            periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
            timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
            successThreshold: {{ .Values.readinessProbe.successThreshold }}
            failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
          {{- end }}
          {{- with .Values.lifecycle}}
          lifecycle:
            {{- toYaml . | nindent 12 }}
          {{- else }}
          {{- if or .Values.ollama.models.pull .Values.ollama.models.run .Values.ollama.models.create }}
          lifecycle:
            postStart:
              exec:
                command:
                  - /bin/sh
                  - -c
                  - |
                    while ! /bin/ollama ps > /dev/null 2>&1; do
                      sleep 5
                    done
                    {{- if .Values.ollama.models.pull }}
                    {{- range .Values.ollama.models.pull }}
                    /bin/ollama pull {{ternary "--insecure" "" $.Values.ollama.insecure | toString }} {{ . }}
                    {{- end }}
                    {{- end }}

                    {{- if .Values.ollama.models.create }}
                    {{- range .Values.ollama.models.create }}
                    {{- if .template }}
                    cat <<EOF > {{ include "ollama.modelsMountPath" $ }}/{{ .name }}
                    {{- .template | nindent 20 }}
                    EOF
                    /bin/ollama create {{ .name }} -f {{ include "ollama.modelsMountPath" . }}/{{ .name }}
                    {{- end }}
                    {{- if .configMapRef }}
                    /bin/ollama create {{ .name }} -f /models/{{ .name }}
                    {{- end }}
                    {{- end }}
                    {{- end }}

                    {{- if .Values.ollama.models.run }}
                    {{- range .Values.ollama.models.run }}
                    /bin/ollama run {{ . }}
                    {{- end }}
                    {{- end }}
          {{- end }}
          {{- end }}
      volumes:
        - name: ollama-data
          {{- if .Values.persistentVolume.enabled }}
          persistentVolumeClaim:
            claimName: {{ .Values.persistentVolume.existingClaim |  default (printf "%s" (include "ollama.fullname" .)) }}
          {{- else }}
          emptyDir: { }
          {{- end }}
        {{- range .Values.ollama.models.create }}
          {{- if .configMapRef }}
        - name: {{ .name }}-config-model-volume
          configMap:
            name: {{ .configMapRef }}
            items:
              - key: {{ .configMapKeyRef }}
                path: {{ .name }}
          {{- end }}
        {{- end }}
        {{- with .Values.volumes }}
          {{- toYaml . | nindent 8 }}
        {{- end }}
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.topologySpreadConstraints }}
      topologySpreadConstraints:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- if or .Values.ollama.gpu.enabled .Values.tolerations }}
      tolerations:
        {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) }}
        - key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}"
          operator: Exists
          effect: NoSchedule
        {{- end }}
        {{- with .Values.tolerations }}
          {{- toYaml . | nindent 8 }}
        {{- end }}
      {{- end }}
{{- end }}