--- {{- if .Values.knative.enabled }} apiVersion: serving.knative.dev/v1 kind: Service metadata: name: {{ include "ollama.fullname" . }} namespace: {{ include "ollama.namespace" . }} labels: {{- include "ollama.labels" . | nindent 4 }} {{- with .Values.knative.annotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} spec: template: spec: containerConcurrency: {{ .Values.knative.containerConcurrency }} timeoutSeconds: {{ .Values.knative.timeoutSeconds }} responseStartTimeoutSeconds: {{ .Values.knative.responseStartTimeoutSeconds }} idleTimeoutSeconds: {{ .Values.knative.idleTimeoutSeconds }} {{- with .Values.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} serviceAccountName: {{ include "ollama.serviceAccountName" . }} {{- if .Values.runtimeClassName }} runtimeClassName: {{ .Values.runtimeClassName | quote }} {{- end }} {{- if .Values.terminationGracePeriodSeconds }} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} {{- end }} {{- with .Values.initContainers }} initContainers: {{- tpl (toYaml . ) $ | nindent 8 }} {{- end }} containers: - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}" imagePullPolicy: {{ .Values.image.pullPolicy }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} ports: - containerPort: {{ .Values.ollama.port }} env: - name: OLLAMA_HOST value: "0.0.0.0:{{ .Values.ollama.port }}" {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}} - name: PATH value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin {{- end}} {{- with .Values.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} envFrom: {{- with .Values.extraEnvFrom }} {{- toYaml . | nindent 12 }} {{- end }} args: {{- with .Values.extraArgs }} {{- toYaml . | nindent 12 }} {{- end }} {{- if .Values.resources }} resources: {{- $limits := default dict .Values.resources.limits }} {{- if .Values.ollama.gpu.enabled }} {{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }} {{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }} {{- $limits = merge $limits $gpuLimit }} {{- end }} {{- if eq .Values.ollama.gpu.type "amd" }} {{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }} {{- $limits = merge $limits $gpuLimit }} {{- end }} {{- end }} {{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }} {{- toYaml $ressources | nindent 12 }} {{- end}} volumeMounts: - name: ollama-data mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }} {{- if .Values.persistentVolume.subPath }} subPath: {{ .Values.persistentVolume.subPath }} {{- end }} {{- range .Values.ollama.models.create }} {{- if .configMapRef }} - name: {{ .name }}-config-model-volume mountPath: /models {{- end }} {{- end }} {{- with .Values.volumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} {{- if .Values.livenessProbe.enabled }} livenessProbe: httpGet: path: {{ .Values.livenessProbe.path }} port: http initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} successThreshold: {{ .Values.livenessProbe.successThreshold }} failureThreshold: {{ .Values.livenessProbe.failureThreshold }} {{- end }} {{- if .Values.readinessProbe.enabled }} readinessProbe: httpGet: path: {{ .Values.readinessProbe.path }} port: http initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} successThreshold: {{ .Values.readinessProbe.successThreshold }} failureThreshold: {{ .Values.readinessProbe.failureThreshold }} {{- end }} {{- with .Values.lifecycle}} lifecycle: {{- toYaml . | nindent 12 }} {{- else }} {{- if or .Values.ollama.models.pull .Values.ollama.models.run .Values.ollama.models.create }} lifecycle: postStart: exec: command: - /bin/sh - -c - | while ! /bin/ollama ps > /dev/null 2>&1; do sleep 5 done {{- if .Values.ollama.models.pull }} {{- range .Values.ollama.models.pull }} /bin/ollama pull {{ternary "--insecure" "" $.Values.ollama.insecure | toString }} {{ . }} {{- end }} {{- end }} {{- if .Values.ollama.models.create }} {{- range .Values.ollama.models.create }} {{- if .template }} cat < {{ include "ollama.modelsMountPath" $ }}/{{ .name }} {{- .template | nindent 20 }} EOF /bin/ollama create {{ .name }} -f {{ include "ollama.modelsMountPath" . }}/{{ .name }} {{- end }} {{- if .configMapRef }} /bin/ollama create {{ .name }} -f /models/{{ .name }} {{- end }} {{- end }} {{- end }} {{- if .Values.ollama.models.run }} {{- range .Values.ollama.models.run }} /bin/ollama run {{ . }} {{- end }} {{- end }} {{- end }} {{- end }} volumes: - name: ollama-data {{- if .Values.persistentVolume.enabled }} persistentVolumeClaim: claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }} {{- else }} emptyDir: { } {{- end }} {{- range .Values.ollama.models.create }} {{- if .configMapRef }} - name: {{ .name }}-config-model-volume configMap: name: {{ .configMapRef }} items: - key: {{ .configMapKeyRef }} path: {{ .name }} {{- end }} {{- end }} {{- with .Values.volumes }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.affinity }} affinity: {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.topologySpreadConstraints }} topologySpreadConstraints: {{- toYaml . | nindent 8 }} {{- end }} {{- if or .Values.ollama.gpu.enabled .Values.tolerations }} tolerations: {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) }} - key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}" operator: Exists effect: NoSchedule {{- end }} {{- with .Values.tolerations }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }}