201 lines
8.3 KiB
YAML
201 lines
8.3 KiB
YAML
---
|
|
{{- if .Values.knative.enabled }}
|
|
apiVersion: serving.knative.dev/v1
|
|
kind: Service
|
|
metadata:
|
|
name: {{ include "ollama.fullname" . }}
|
|
namespace: {{ include "ollama.namespace" . }}
|
|
labels:
|
|
{{- include "ollama.labels" . | nindent 4 }}
|
|
{{- with .Values.knative.annotations }}
|
|
annotations:
|
|
{{- toYaml . | nindent 4 }}
|
|
{{- end }}
|
|
spec:
|
|
template:
|
|
spec:
|
|
containerConcurrency: {{ .Values.knative.containerConcurrency }}
|
|
timeoutSeconds: {{ .Values.knative.timeoutSeconds }}
|
|
responseStartTimeoutSeconds: {{ .Values.knative.responseStartTimeoutSeconds }}
|
|
idleTimeoutSeconds: {{ .Values.knative.idleTimeoutSeconds }}
|
|
{{- with .Values.imagePullSecrets }}
|
|
imagePullSecrets:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
serviceAccountName: {{ include "ollama.serviceAccountName" . }}
|
|
{{- if .Values.runtimeClassName }}
|
|
runtimeClassName: {{ .Values.runtimeClassName | quote }}
|
|
{{- end }}
|
|
{{- if .Values.terminationGracePeriodSeconds }}
|
|
terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }}
|
|
{{- end }}
|
|
{{- with .Values.initContainers }}
|
|
initContainers:
|
|
{{- tpl (toYaml . ) $ | nindent 8 }}
|
|
{{- end }}
|
|
containers:
|
|
- image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}"
|
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
|
securityContext:
|
|
{{- toYaml .Values.securityContext | nindent 12 }}
|
|
ports:
|
|
- containerPort: {{ .Values.ollama.port }}
|
|
env:
|
|
- name: OLLAMA_HOST
|
|
value: "0.0.0.0:{{ .Values.ollama.port }}"
|
|
{{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}}
|
|
- name: PATH
|
|
value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
|
{{- end}}
|
|
{{- with .Values.extraEnv }}
|
|
{{- toYaml . | nindent 12 }}
|
|
{{- end }}
|
|
envFrom:
|
|
{{- with .Values.extraEnvFrom }}
|
|
{{- toYaml . | nindent 12 }}
|
|
{{- end }}
|
|
args:
|
|
{{- with .Values.extraArgs }}
|
|
{{- toYaml . | nindent 12 }}
|
|
{{- end }}
|
|
{{- if .Values.resources }}
|
|
resources:
|
|
{{- $limits := default dict .Values.resources.limits }}
|
|
{{- if .Values.ollama.gpu.enabled }}
|
|
{{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }}
|
|
{{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }}
|
|
{{- $limits = merge $limits $gpuLimit }}
|
|
{{- end }}
|
|
{{- if eq .Values.ollama.gpu.type "amd" }}
|
|
{{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }}
|
|
{{- $limits = merge $limits $gpuLimit }}
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }}
|
|
{{- toYaml $ressources | nindent 12 }}
|
|
{{- end}}
|
|
volumeMounts:
|
|
- name: ollama-data
|
|
mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }}
|
|
{{- if .Values.persistentVolume.subPath }}
|
|
subPath: {{ .Values.persistentVolume.subPath }}
|
|
{{- end }}
|
|
{{- range .Values.ollama.models.create }}
|
|
{{- if .configMapRef }}
|
|
- name: {{ .name }}-config-model-volume
|
|
mountPath: /models
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- with .Values.volumeMounts }}
|
|
{{- toYaml . | nindent 12 }}
|
|
{{- end }}
|
|
{{- if .Values.livenessProbe.enabled }}
|
|
livenessProbe:
|
|
httpGet:
|
|
path: {{ .Values.livenessProbe.path }}
|
|
port: http
|
|
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
|
|
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
|
|
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
|
|
successThreshold: {{ .Values.livenessProbe.successThreshold }}
|
|
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
|
|
{{- end }}
|
|
{{- if .Values.readinessProbe.enabled }}
|
|
readinessProbe:
|
|
httpGet:
|
|
path: {{ .Values.readinessProbe.path }}
|
|
port: http
|
|
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
|
|
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
|
|
timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
|
|
successThreshold: {{ .Values.readinessProbe.successThreshold }}
|
|
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
|
|
{{- end }}
|
|
{{- with .Values.lifecycle}}
|
|
lifecycle:
|
|
{{- toYaml . | nindent 12 }}
|
|
{{- else }}
|
|
{{- if or .Values.ollama.models.pull .Values.ollama.models.run .Values.ollama.models.create }}
|
|
lifecycle:
|
|
postStart:
|
|
exec:
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- |
|
|
while ! /bin/ollama ps > /dev/null 2>&1; do
|
|
sleep 5
|
|
done
|
|
{{- if .Values.ollama.models.pull }}
|
|
{{- range .Values.ollama.models.pull }}
|
|
/bin/ollama pull {{ternary "--insecure" "" $.Values.ollama.insecure | toString }} {{ . }}
|
|
{{- end }}
|
|
{{- end }}
|
|
|
|
{{- if .Values.ollama.models.create }}
|
|
{{- range .Values.ollama.models.create }}
|
|
{{- if .template }}
|
|
cat <<EOF > {{ include "ollama.modelsMountPath" $ }}/{{ .name }}
|
|
{{- .template | nindent 20 }}
|
|
EOF
|
|
/bin/ollama create {{ .name }} -f {{ include "ollama.modelsMountPath" . }}/{{ .name }}
|
|
{{- end }}
|
|
{{- if .configMapRef }}
|
|
/bin/ollama create {{ .name }} -f /models/{{ .name }}
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- end }}
|
|
|
|
{{- if .Values.ollama.models.run }}
|
|
{{- range .Values.ollama.models.run }}
|
|
/bin/ollama run {{ . }}
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- end }}
|
|
volumes:
|
|
- name: ollama-data
|
|
{{- if .Values.persistentVolume.enabled }}
|
|
persistentVolumeClaim:
|
|
claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }}
|
|
{{- else }}
|
|
emptyDir: { }
|
|
{{- end }}
|
|
{{- range .Values.ollama.models.create }}
|
|
{{- if .configMapRef }}
|
|
- name: {{ .name }}-config-model-volume
|
|
configMap:
|
|
name: {{ .configMapRef }}
|
|
items:
|
|
- key: {{ .configMapKeyRef }}
|
|
path: {{ .name }}
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- with .Values.volumes }}
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- with .Values.nodeSelector }}
|
|
nodeSelector:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- with .Values.affinity }}
|
|
affinity:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- with .Values.topologySpreadConstraints }}
|
|
topologySpreadConstraints:
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- if or .Values.ollama.gpu.enabled .Values.tolerations }}
|
|
tolerations:
|
|
{{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) }}
|
|
- key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}"
|
|
operator: Exists
|
|
effect: NoSchedule
|
|
{{- end }}
|
|
{{- with .Values.tolerations }}
|
|
{{- toYaml . | nindent 8 }}
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- end }}
|