feat: support open-webui charts

This commit is contained in:
Ivan087
2025-11-21 10:45:23 +08:00
parent 30dd26120a
commit fc2a155ded
84 changed files with 13039 additions and 2 deletions

View File

@ -0,0 +1,30 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
.drone.yml
*.tmproj
.vscode/
#others
.github
kind-config.yml
ci/

View File

@ -0,0 +1,33 @@
apiVersion: v2
name: ollama
description: Get up and running with large language models locally.
type: application
version: 1.27.0
appVersion: "0.11.4"
annotations:
artifacthub.io/category: ai-machine-learning
artifacthub.io/changes: |
- kind: changed
description: upgrade app version to 0.11.4
links:
- name: Ollama release v0.11.4
url: https://github.com/ollama/ollama/releases/tag/v0.11.4
kubeVersion: "^1.16.0-0"
home: https://ollama.ai/
icon: https://ollama.ai/public/ollama.png
keywords:
- ai
- llm
- llama
- mistral
sources:
- https://github.com/ollama/ollama
- https://github.com/otwld/ollama-helm
maintainers:
- name: OTWLD
email: contact@otwld.com

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 OTWLD
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,331 @@
![otwld ollama helm chart banner](./banner.png)
![GitHub License](https://img.shields.io/github/license/otwld/ollama-helm)
[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/ollama-helm)](https://artifacthub.io/packages/helm/ollama-helm/ollama)
[![Helm Lint and Test](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml)
[![Discord](https://img.shields.io/badge/Discord-OTWLD-blue?logo=discord&logoColor=white)](https://discord.gg/U24mpqTynB)
[Ollama](https://ollama.ai/), get up and running with large language models, locally.
This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama).
## Requirements
- Kubernetes: `>= 1.16.0-0` for **CPU only**
- Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD)
*Not all GPUs are currently supported with ollama (especially with AMD)*
## Deploying Ollama chart
To install the `ollama` chart in the `ollama` namespace:
> [!IMPORTANT]
> We are migrating the registry from https://otwld.github.io/ollama-helm/ url to OTWLD Helm central
> registry https://helm.otwld.com/
> Please update your Helm registry accordingly.
```console
helm repo add otwld https://helm.otwld.com/
helm repo update
helm install ollama otwld/ollama --namespace ollama --create-namespace
```
## Upgrading Ollama chart
First please read the [release notes](https://github.com/ollama/ollama/releases) of Ollama to make sure there are no
backwards incompatible changes.
Make adjustments to your values as needed, then run `helm upgrade`:
```console
# -- This pulls the latest version of the ollama chart from the repo.
helm repo update
helm upgrade ollama otwld/ollama --namespace ollama --values values.yaml
```
## Uninstalling Ollama chart
To uninstall/delete the `ollama` deployment in the `ollama` namespace:
```console
helm delete ollama --namespace ollama
```
Substitute your values if they differ from the examples. See `helm delete --help` for a full reference on `delete`
parameters and flags.
## Interact with Ollama
- **Ollama documentation can be found [HERE](https://github.com/ollama/ollama/tree/main/docs)**
- Interact with RESTful API: [Ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md)
- Interact with official clients libraries: [ollama-js](https://github.com/ollama/ollama-js#custom-client)
and [ollama-python](https://github.com/ollama/ollama-python#custom-client)
- Interact with langchain: [langchain-js](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainjs.md)
and [langchain-python](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md)
## Examples
- **It's highly recommended to run an updated version of Kubernetes for deploying ollama with GPU**
### Basic values.yaml example with GPU and two models pulled at startup
```
ollama:
gpu:
# -- Enable GPU integration
enabled: true
# -- GPU type: 'nvidia' or 'amd'
type: 'nvidia'
# -- Specify the number of GPU to 1
number: 1
# -- List of models to pull at container startup
models:
pull:
- mistral
- llama2
```
---
### Basic values.yaml example with Ingress
```
ollama:
models:
pull:
- llama2
ingress:
enabled: true
hosts:
- host: ollama.domain.lan
paths:
- path: /
pathType: Prefix
```
- *API is now reachable at `ollama.domain.lan`*
---
### Create and run model from template
```
ollama:
models:
create:
- name: llama3.1-ctx32768
template: |
FROM llama3.1
PARAMETER num_ctx 32768
run:
- llama3.1-ctx32768
```
## Upgrading from 0.X.X to 1.X.X
The version 1.X.X introduces the ability to load models in memory at startup, the values have been changed.
Please change `ollama.models` to `ollama.models.pull` to avoid errors before upgrading:
```yaml
ollama:
models:
- mistral
- llama2
```
To:
```yaml
ollama:
models:
pull:
- mistral
- llama2
```
## Helm Values
- See [values.yaml](values.yaml) to see the Chart's default values.
| Key | Type | Default | Description |
|--------------------------------------------|--------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| affinity | object | `{}` | Affinity for pod assignment |
| autoscaling.enabled | bool | `false` | Enable autoscaling |
| autoscaling.maxReplicas | int | `100` | Number of maximum replicas |
| autoscaling.minReplicas | int | `1` | Number of minimum replicas |
| autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU usage to target replica |
| deployment.labels | object | `{}` | Labels to add to the deployment |
| extraArgs | list | `[]` | Additional arguments on the output Deployment definition. |
| extraEnv | list | `[]` | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go |
| extraEnvFrom | list | `[]` | Additionl environment variables from external sources (like ConfigMap) |
| extraObjects | list | `[]` | Extra K8s manifests to deploy |
| fullnameOverride | string | `""` | String to fully override template |
| hostIPC | bool | `false` | Use the hosts ipc namespace. |
| hostNetwork | bool | `false` | Use the host's network namespace. |
| hostPID | bool | `false` | Use the hosts pid namespace |
| image.pullPolicy | string | `"IfNotPresent"` | Docker pull policy |
| image.repository | string | `"ollama/ollama"` | Docker image registry |
| image.tag | string | `""` | Docker image tag, overrides the image tag whose default is the chart appVersion. |
| imagePullSecrets | list | `[]` | Docker registry secret names as an array |
| ingress.annotations | object | `{}` | Additional annotations for the Ingress resource. |
| ingress.className | string | `""` | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) |
| ingress.enabled | bool | `false` | Enable ingress controller resource |
| ingress.hosts[0].host | string | `"ollama.local"` | |
| ingress.hosts[0].paths[0].path | string | `"/"` | |
| ingress.hosts[0].paths[0].pathType | string | `"Prefix"` | |
| ingress.tls | list | `[]` | The tls configuration for hostnames to be covered with this ingress record. |
| initContainers | list | `[]` | Init containers to add to the pod |
| knative.annotations | object | `{}` | Knative service annotations |
| knative.containerConcurrency | int | `0` | Knative service container concurrency |
| knative.enabled | bool | `false` | Enable Knative integration |
| knative.idleTimeoutSeconds | int | `300` | Knative service idle timeout seconds |
| knative.responseStartTimeoutSeconds | int | `300` | Knative service response start timeout seconds |
| knative.timeoutSeconds | int | `300` | Knative service timeout seconds |
| lifecycle | object | `{}` | Lifecycle for pod assignment (override ollama.models startup pull/run) |
| livenessProbe.enabled | bool | `true` | Enable livenessProbe |
| livenessProbe.failureThreshold | int | `6` | Failure threshold for livenessProbe |
| livenessProbe.initialDelaySeconds | int | `60` | Initial delay seconds for livenessProbe |
| livenessProbe.path | string | `"/"` | Request path for livenessProbe |
| livenessProbe.periodSeconds | int | `10` | Period seconds for livenessProbe |
| livenessProbe.successThreshold | int | `1` | Success threshold for livenessProbe |
| livenessProbe.timeoutSeconds | int | `5` | Timeout seconds for livenessProbe |
| nameOverride | string | `""` | String to partially override template (will maintain the release name) |
| namespaceOverride | string | `""` | String to fully override namespace |
| nodeSelector | object | `{}` | Node labels for pod assignment. |
| ollama.gpu.draDriverClass | string | `"gpu.nvidia.com"` | DRA GPU DriverClass |
| ollama.gpu.draEnabled | bool | `false` | Enable DRA GPU integration If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters |
| ollama.gpu.draExistingClaimTemplate | string | `""` | Existing DRA GPU ResourceClaim Template |
| ollama.gpu.enabled | bool | `false` | Enable GPU integration |
| ollama.gpu.mig.devices | object | `{}` | Specify the mig devices and the corresponding number |
| ollama.gpu.mig.enabled | bool | `false` | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored |
| ollama.gpu.number | int | `1` | Specify the number of GPU If you use MIG section below then this parameter is ignored |
| ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice |
| ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images |
| ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup |
| ollama.models.clean | bool | `false` | Automatically remove models present on the disk but not specified in the values file |
| ollama.models.create | list | `[]` | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create: - name: llama3.1-ctx32768 configMapRef: my-configmap configMapKeyRef: configmap-key - name: llama3.1-ctx32768 template: | FROM llama3.1 PARAMETER num_ctx 32768 |
| ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral |
| ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral |
| ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" |
| ollama.port | int | `11434` | |
| persistentVolume.accessModes | list | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ |
| persistentVolume.annotations | object | `{}` | Ollama server data Persistent Volume annotations |
| persistentVolume.enabled | bool | `false` | Enable persistence using PVC |
| persistentVolume.existingClaim | string | `""` | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true |
| persistentVolume.size | string | `"30Gi"` | Ollama server data Persistent Volume size |
| persistentVolume.storageClass | string | `""` | Ollama server data Persistent Volume Storage Class If defined, storageClassName: <storageClass> If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner. (gp2 on AWS, standard on GKE, AWS & OpenStack) |
| persistentVolume.subPath | string | `""` | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty |
| persistentVolume.volumeMode | string | `""` | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: <volumeMode> If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode. |
| persistentVolume.volumeName | string | `""` | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward |
| podAnnotations | object | `{}` | Map of annotations to add to the pods |
| podLabels | object | `{}` | Map of labels to add to the pods |
| podSecurityContext | object | `{}` | Pod Security Context |
| priorityClassName | string | `""` | Priority Class Name |
| readinessProbe.enabled | bool | `true` | Enable readinessProbe |
| readinessProbe.failureThreshold | int | `6` | Failure threshold for readinessProbe |
| readinessProbe.initialDelaySeconds | int | `30` | Initial delay seconds for readinessProbe |
| readinessProbe.path | string | `"/"` | Request path for readinessProbe |
| readinessProbe.periodSeconds | int | `5` | Period seconds for readinessProbe |
| readinessProbe.successThreshold | int | `1` | Success threshold for readinessProbe |
| readinessProbe.timeoutSeconds | int | `3` | Timeout seconds for readinessProbe |
| replicaCount | int | `1` | Number of replicas |
| resources.limits | object | `{}` | Pod limit |
| resources.requests | object | `{}` | Pod requests |
| runtimeClassName | string | `""` | Specify runtime class |
| securityContext | object | `{}` | Container Security Context |
| service.annotations | object | `{}` | Annotations to add to the service |
| service.labels | object | `{}` | Labels to add to the service |
| service.loadBalancerIP | string | `nil` | Load Balancer IP address |
| service.nodePort | int | `31434` | Service node port when service type is 'NodePort' |
| service.port | int | `11434` | Service port |
| service.type | string | `"ClusterIP"` | Service type |
| serviceAccount.annotations | object | `{}` | Annotations to add to the service account |
| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? |
| serviceAccount.create | bool | `true` | Specifies whether a service account should be created |
| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template |
| terminationGracePeriodSeconds | int | `120` | Wait for a grace period |
| tests.annotations | object | `{}` | Annotations to add to the tests |
| tests.enabled | bool | `true` | |
| tests.labels | object | `{}` | Labels to add to the tests |
| tolerations | list | `[]` | Tolerations for pod assignment |
| topologySpreadConstraints | object | `{}` | Topology Spread Constraints for pod assignment |
| updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate |
| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. |
| volumes | list | `[]` | Additional volumes on the output Deployment definition. |
----------------------------------------------
## Core team
<table>
<tr>
<td align="center">
<a href="https://github.com/jdetroyes"
><img
src="https://github.com/jdetroyes.png?size=200"
width="50"
style="margin-bottom: -4px; border-radius: 8px;"
alt="Jean Baptiste Detroyes"
/><br /><b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Jean Baptiste&nbsp;Detroyes&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</b></a
>
<div style="margin-top: 4px">
<a href="https://github.com/jdetroyes" title="Github"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
/></a>
<a
href="mailto:jdetroyes@otwld.com"
title="Email"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
/></a>
</div>
</td>
<td align="center">
<a href="https://github.com/ntrehout"
><img
src="https://github.com/ntrehout.png?size=200"
width="50"
style="margin-bottom: -4px; border-radius: 8px;"
alt="Jean Baptiste Detroyes"
/><br /><b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Nathan&nbsp;Tréhout&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</b></a
>
<div style="margin-top: 4px">
<a href="https://x.com/n_trehout" title="Twitter"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/twitter.svg"
/></a>
<a href="https://github.com/ntrehout" title="Github"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
/></a>
<a
href="mailto:ntrehout@otwld.com"
title="Email"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
/></a>
</div>
</td>
</tr>
</table>
## Support
- For questions, suggestions, and discussion about Ollama please refer to
the [Ollama issue page](https://github.com/ollama/ollama/issues)
- For questions, suggestions, and discussion about this chart please
visit [Ollama-Helm issue page](https://github.com/otwld/ollama-helm/issues) or join
our [OTWLD Discord](https://discord.gg/U24mpqTynB)

View File

@ -0,0 +1,25 @@
1. Get the application URL by running these commands:
{{- if .Values.knative.enabled }}
export KSERVICE_URL=$(kubectl get ksvc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} -o jsonpath={.status.url})
echo "Visit $KSERVICE_URL to use your application"
{{- else if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "ollama.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "ollama.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "ollama.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,80 @@
{{/*
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
*/}}
{{- define "ollama.namespace" -}}
{{- if .Values.namespaceOverride -}}
{{- .Values.namespaceOverride -}}
{{- else -}}
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{/*
Expand the name of the chart.
*/}}
{{- define "ollama.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "ollama.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "ollama.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "ollama.labels" -}}
helm.sh/chart: {{ include "ollama.chart" . }}
{{ include "ollama.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "ollama.selectorLabels" -}}
app.kubernetes.io/name: {{ include "ollama.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "ollama.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "ollama.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
{{/*
Models mount path
*/}}
{{- define "ollama.modelsMountPath" -}}
{{- printf "%s/models" (((.Values).ollama).mountPath | default "/root/.ollama") }}
{{- end -}}

View File

@ -0,0 +1,293 @@
---
{{- if not .Values.knative.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "ollama.fullname" . }}
namespace: {{ include "ollama.namespace" . }}
labels:
{{- include "ollama.labels" . | nindent 4 }}
{{- with .Values.deployment.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
{{- if or .Values.updateStrategy.type .Values.updateStrategy.rollingUpdate }}
strategy: {{ .Values.updateStrategy | toYaml | nindent 4 }}
{{- end }}
selector:
matchLabels:
{{- include "ollama.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "ollama.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- if .Values.hostIPC }}
hostIPC: {{ .Values.hostIPC }}
{{- end }}
{{- if .Values.hostPID }}
hostPID: {{ .Values.hostPID }}
{{- end }}
{{- if .Values.hostNetwork }}
hostNetwork: {{ .Values.hostNetwork }}
{{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "ollama.serviceAccountName" . }}
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName | quote }}
{{- end }}
{{- if .Values.terminationGracePeriodSeconds }}
terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }}
{{- end }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- if .Values.runtimeClassName }}
runtimeClassName: {{ .Values.runtimeClassName | quote }}
{{- end }}
{{- with .Values.initContainers }}
initContainers:
{{- tpl (toYaml . ) $ | nindent 8 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.ollama.port }}
protocol: TCP
env:
- name: OLLAMA_HOST
value: "0.0.0.0:{{ .Values.ollama.port }}"
{{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}}
- name: PATH
value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
{{- end}}
{{- with .Values.extraEnv }}
{{- toYaml . | nindent 12 }}
{{- end }}
envFrom:
{{- with .Values.extraEnvFrom }}
{{- toYaml . | nindent 12 }}
{{- end }}
args:
{{- with .Values.extraArgs }}
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if .Values.resources }}
resources:
{{- $limits := default dict .Values.resources.limits }}
{{- if .Values.ollama.gpu.enabled }}
{{- if .Values.ollama.gpu.draEnabled}}
claims:
- name: gpu
{{- else }}
# If gpu is enabled, it can either be a NVIDIA card or a AMD card
{{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }}
# NVIDIA is assumed by default if no value is set and GPU is enabled
# NVIDIA cards can have mig enabled (i.e., the card is sliced into parts
# Therefore, the first case is no migs enabled
{{- if or (not .Values.ollama.gpu.mig) (not .Values.ollama.gpu.mig.enabled ) }}
{{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }}
{{- $limits = merge $limits $gpuLimit }}
# Second case is mig is enabled
{{- else if or (.Values.ollama.gpu.mig.enabled) }}
# Initialize empty dictionary
{{- $migDevices := dict -}}
# Loop over the entries in the mig devices
{{- range $key, $value := .Values.ollama.gpu.mig.devices }}
{{- $migKey := printf "nvidia.com/mig-%s" $key -}}
{{- $migDevices = merge $migDevices (dict $migKey $value) -}}
{{- end }}
{{- $limits = merge $limits $migDevices}}
{{- end }}
{{- end }}
{{- if eq .Values.ollama.gpu.type "amd" }}
{{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }}
{{- $limits = merge $limits $gpuLimit }}
{{- end }}
{{- end }}
{{- end }}
{{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }}
{{- toYaml $ressources | nindent 12 }}
{{- end}}
volumeMounts:
- name: ollama-data
mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }}
{{- if .Values.persistentVolume.subPath }}
subPath: {{ .Values.persistentVolume.subPath }}
{{- end }}
{{- range .Values.ollama.models.create }}
{{- if .configMapRef }}
- name: {{ .name }}-config-model-volume
mountPath: /models
{{- end }}
{{- end }}
{{- with .Values.volumeMounts }}
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if .Values.livenessProbe.enabled }}
livenessProbe:
httpGet:
path: {{ .Values.livenessProbe.path }}
port: http
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
successThreshold: {{ .Values.livenessProbe.successThreshold }}
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
{{- end }}
{{- if .Values.readinessProbe.enabled }}
readinessProbe:
httpGet:
path: {{ .Values.readinessProbe.path }}
port: http
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
successThreshold: {{ .Values.readinessProbe.successThreshold }}
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
{{- end }}
{{- with .Values.lifecycle}}
lifecycle:
{{- toYaml . | nindent 12 }}
{{- else }}
{{- if or .Values.ollama.models.pull .Values.ollama.models.run .Values.ollama.models.create }}
lifecycle:
postStart:
exec:
command:
- /bin/sh
- -c
- |
while ! /bin/ollama ps > /dev/null 2>&1; do
sleep 5
done
{{- $allModels := list -}}
{{- if .Values.ollama.models.pull }}
{{- range .Values.ollama.models.pull }}
{{- if contains ":" . }}
{{- $allModels = append $allModels . }}
{{- else }}
{{- $allModels = append $allModels (printf "%s:latest" .) }}
{{- end }}
/bin/ollama pull {{ternary "--insecure" "" $.Values.ollama.insecure | toString }} {{ . }}
{{- end }}
{{- end }}
{{- if .Values.ollama.models.create }}
{{- range .Values.ollama.models.create }}
{{- $allModels = append $allModels .name }}
{{- if .template }}
cat <<EOF > {{ include "ollama.modelsMountPath" $ }}/{{ .name }}
{{- .template | nindent 20 }}
EOF
/bin/ollama create {{ .name }} -f {{ include "ollama.modelsMountPath" $ }}/{{ .name }}
{{- end }}
{{- if .configMapRef }}
/bin/ollama create {{ .name }} -f /models/{{ .name }}
{{- end }}
{{- end }}
{{- end }}
{{- if .Values.ollama.models.run }}
{{- range .Values.ollama.models.run }}
{{- if contains ":" . }}
{{- $allModels = append $allModels . }}
{{- else }}
{{- $allModels = append $allModels (printf "%s:latest" .) }}
{{- end }}
/bin/ollama run {{ . }}
{{- end }}
{{- end }}
{{- if .Values.ollama.models.clean }}
/bin/ollama list | awk 'NR>1 {print $1}' | while read model; do
echo "{{ $allModels | join " " }}" | tr ' ' '\n' | grep -Fqx "$model" || /bin/ollama rm "$model"
done
{{- end }}
{{- end }}
{{- end }}
{{- if and .Values.ollama.gpu.enabled .Values.ollama.gpu.draEnabled }}
resourceClaims:
- name: gpu
resourceClaimTemplateName: {{ .Values.ollama.gpu.draExistingClaimTemplate | default (printf "%s" (include "ollama.fullname" .)) }}
{{- end }}
volumes:
- name: ollama-data
{{- if .Values.persistentVolume.enabled }}
persistentVolumeClaim:
claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }}
{{- else }}
emptyDir: { }
{{- end }}
{{- range .Values.ollama.models.create }}
{{- if .configMapRef }}
- name: {{ .name }}-config-model-volume
configMap:
name: {{ .configMapRef }}
items:
- key: {{ .configMapKeyRef }}
path: {{ .name }}
{{- end }}
{{- end }}
{{- with .Values.volumes }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.topologySpreadConstraints }}
topologySpreadConstraints:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if or .Values.ollama.gpu.enabled .Values.tolerations }}
tolerations:
{{- if and .Values.ollama.gpu.enabled (and
( or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))
( or (not .Values.ollama.gpu.mig) (not .Values.ollama.gpu.mig.enabled))
) }}
- key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}"
operator: Exists
effect: NoSchedule
{{- else if and .Values.ollama.gpu.enabled (and
( or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))
(( .Values.ollama.gpu.mig.enabled))
) }}
{{- range $key, $value := .Values.ollama.gpu.mig.devices }}
- key: nvidia.com/mig-{{ $key }}
operator: Exists
effect: NoSchedule
{{- end }}
{{- end }}
{{- with .Values.tolerations }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,4 @@
{{ range .Values.extraObjects }}
---
{{ tpl (toYaml .) $ }}
{{ end }}

View File

@ -0,0 +1,34 @@
---
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "ollama.fullname" . }}
namespace: {{ include "ollama.namespace" . }}
labels:
{{- include "ollama.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "ollama.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,63 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "ollama.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className }}
{{- end }}
{{- end }}
---
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
apiVersion: networking.k8s.io/v1beta1
{{- else }}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
namespace: {{ include "ollama.namespace" . }}
labels:
{{- include "ollama.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,200 @@
---
{{- if .Values.knative.enabled }}
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: {{ include "ollama.fullname" . }}
namespace: {{ include "ollama.namespace" . }}
labels:
{{- include "ollama.labels" . | nindent 4 }}
{{- with .Values.knative.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
template:
spec:
containerConcurrency: {{ .Values.knative.containerConcurrency }}
timeoutSeconds: {{ .Values.knative.timeoutSeconds }}
responseStartTimeoutSeconds: {{ .Values.knative.responseStartTimeoutSeconds }}
idleTimeoutSeconds: {{ .Values.knative.idleTimeoutSeconds }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "ollama.serviceAccountName" . }}
{{- if .Values.runtimeClassName }}
runtimeClassName: {{ .Values.runtimeClassName | quote }}
{{- end }}
{{- if .Values.terminationGracePeriodSeconds }}
terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }}
{{- end }}
{{- with .Values.initContainers }}
initContainers:
{{- tpl (toYaml . ) $ | nindent 8 }}
{{- end }}
containers:
- image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
ports:
- containerPort: {{ .Values.ollama.port }}
env:
- name: OLLAMA_HOST
value: "0.0.0.0:{{ .Values.ollama.port }}"
{{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}}
- name: PATH
value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
{{- end}}
{{- with .Values.extraEnv }}
{{- toYaml . | nindent 12 }}
{{- end }}
envFrom:
{{- with .Values.extraEnvFrom }}
{{- toYaml . | nindent 12 }}
{{- end }}
args:
{{- with .Values.extraArgs }}
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if .Values.resources }}
resources:
{{- $limits := default dict .Values.resources.limits }}
{{- if .Values.ollama.gpu.enabled }}
{{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }}
{{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }}
{{- $limits = merge $limits $gpuLimit }}
{{- end }}
{{- if eq .Values.ollama.gpu.type "amd" }}
{{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }}
{{- $limits = merge $limits $gpuLimit }}
{{- end }}
{{- end }}
{{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }}
{{- toYaml $ressources | nindent 12 }}
{{- end}}
volumeMounts:
- name: ollama-data
mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }}
{{- if .Values.persistentVolume.subPath }}
subPath: {{ .Values.persistentVolume.subPath }}
{{- end }}
{{- range .Values.ollama.models.create }}
{{- if .configMapRef }}
- name: {{ .name }}-config-model-volume
mountPath: /models
{{- end }}
{{- end }}
{{- with .Values.volumeMounts }}
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if .Values.livenessProbe.enabled }}
livenessProbe:
httpGet:
path: {{ .Values.livenessProbe.path }}
port: http
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
successThreshold: {{ .Values.livenessProbe.successThreshold }}
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
{{- end }}
{{- if .Values.readinessProbe.enabled }}
readinessProbe:
httpGet:
path: {{ .Values.readinessProbe.path }}
port: http
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
successThreshold: {{ .Values.readinessProbe.successThreshold }}
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
{{- end }}
{{- with .Values.lifecycle}}
lifecycle:
{{- toYaml . | nindent 12 }}
{{- else }}
{{- if or .Values.ollama.models.pull .Values.ollama.models.run .Values.ollama.models.create }}
lifecycle:
postStart:
exec:
command:
- /bin/sh
- -c
- |
while ! /bin/ollama ps > /dev/null 2>&1; do
sleep 5
done
{{- if .Values.ollama.models.pull }}
{{- range .Values.ollama.models.pull }}
/bin/ollama pull {{ternary "--insecure" "" $.Values.ollama.insecure | toString }} {{ . }}
{{- end }}
{{- end }}
{{- if .Values.ollama.models.create }}
{{- range .Values.ollama.models.create }}
{{- if .template }}
cat <<EOF > {{ include "ollama.modelsMountPath" $ }}/{{ .name }}
{{- .template | nindent 20 }}
EOF
/bin/ollama create {{ .name }} -f {{ include "ollama.modelsMountPath" . }}/{{ .name }}
{{- end }}
{{- if .configMapRef }}
/bin/ollama create {{ .name }} -f /models/{{ .name }}
{{- end }}
{{- end }}
{{- end }}
{{- if .Values.ollama.models.run }}
{{- range .Values.ollama.models.run }}
/bin/ollama run {{ . }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
volumes:
- name: ollama-data
{{- if .Values.persistentVolume.enabled }}
persistentVolumeClaim:
claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }}
{{- else }}
emptyDir: { }
{{- end }}
{{- range .Values.ollama.models.create }}
{{- if .configMapRef }}
- name: {{ .name }}-config-model-volume
configMap:
name: {{ .configMapRef }}
items:
- key: {{ .configMapKeyRef }}
path: {{ .name }}
{{- end }}
{{- end }}
{{- with .Values.volumes }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.topologySpreadConstraints }}
topologySpreadConstraints:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if or .Values.ollama.gpu.enabled .Values.tolerations }}
tolerations:
{{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) }}
- key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}"
operator: Exists
effect: NoSchedule
{{- end }}
{{- with .Values.tolerations }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,33 @@
{{- if (and .Values.persistentVolume.enabled (not .Values.persistentVolume.existingClaim)) -}}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
{{- if .Values.persistentVolume.annotations }}
annotations:
{{ toYaml .Values.persistentVolume.annotations | indent 4 }}
{{- end }}
labels:
{{- include "ollama.labels" . | nindent 4 }}
name: {{ template "ollama.fullname" . }}
namespace: {{ include "ollama.namespace" . }}
spec:
accessModes:
{{ toYaml .Values.persistentVolume.accessModes | indent 4 }}
{{- if .Values.persistentVolume.storageClass }}
{{- if (eq "-" .Values.persistentVolume.storageClass) }}
storageClassName: ""
{{- else }}
storageClassName: "{{ .Values.persistentVolume.storageClass }}"
{{- end }}
{{- end }}
{{- if .Values.persistentVolume.volumeMode }}
volumeMode: "{{ .Values.persistentVolume.volumeMode }}"
{{- end }}
{{- if .Values.persistentVolume.volumeName }}
volumeName: "{{ .Values.persistentVolume.volumeName }}"
{{- end }}
resources:
requests:
storage: "{{ .Values.persistentVolume.size }}"
{{- end -}}

View File

@ -0,0 +1,19 @@
{{- if and .Values.ollama.gpu.enabled .Values.ollama.gpu.draEnabled (not .Values.ollama.gpu.draExistingClaimTemplate) -}}
---
{{- if semverCompare ">=1.34-0" .Capabilities.KubeVersion.GitVersion }}
apiVersion: resource.k8s.io/v1
{{- else }}
apiVersion: resource.k8s.io/v1beta1
{{- end }}
kind: ResourceClaimTemplate
metadata:
name: {{ template "ollama.fullname" . }}
namespace: {{ include "ollama.namespace" . }}
spec:
spec:
devices:
requests:
- name: gpu
deviceClassName: {{ .Values.ollama.gpu.draDriverClass }}
count: {{(.Values.ollama.gpu.number | default 1)}}
{{- end -}}

View File

@ -0,0 +1,32 @@
---
{{- if not .Values.knative.enabled }}
apiVersion: v1
kind: Service
metadata:
name: {{ include "ollama.fullname" . }}
namespace: {{ include "ollama.namespace" . }}
labels:
{{- include "ollama.labels" . | nindent 4 }}
{{- with .Values.service.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .Values.service.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
{{- if contains "NodePort" .Values.service.type }}
nodePort: {{ .Values.service.nodePort }}
{{- end }}
{{- if .Values.service.loadBalancerIP }}
loadBalancerIP: {{ .Values.service.loadBalancerIP | quote }}
{{- end }}
selector:
{{- include "ollama.selectorLabels" . | nindent 4 }}
{{- end }}

View File

@ -0,0 +1,15 @@
{{- if .Values.serviceAccount.create -}}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "ollama.serviceAccountName" . }}
namespace: {{ include "ollama.namespace" . }}
labels:
{{- include "ollama.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
{{- end }}

View File

@ -0,0 +1,25 @@
---
{{- if .Values.tests.enabled }}
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "ollama.fullname" . }}-test-connection"
namespace: {{ include "ollama.namespace" . }}
labels:
{{- include "ollama.labels" . | nindent 4 }}
{{- with .Values.tests.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
annotations:
"helm.sh/hook": test
{{- with .Values.tests.annotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "ollama.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never
{{ end }}

View File

@ -0,0 +1,440 @@
# Default values for ollama-helm.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# -- Number of replicas
replicaCount: 1
# Knative configuration
knative:
# -- Enable Knative integration
enabled: false
# -- Knative service container concurrency
containerConcurrency: 0
# -- Knative service timeout seconds
timeoutSeconds: 300
# -- Knative service response start timeout seconds
responseStartTimeoutSeconds: 300
# -- Knative service idle timeout seconds
idleTimeoutSeconds: 300
# -- Knative service annotations
annotations: {}
# Docker image
image:
# -- Docker image registry
repository: ollama/ollama
# -- Docker pull policy
pullPolicy: IfNotPresent
# -- Docker image tag, overrides the image tag whose default is the chart appVersion.
tag: ""
# -- Docker registry secret names as an array
imagePullSecrets: []
# -- String to partially override template (will maintain the release name)
nameOverride: ""
# -- String to fully override template
fullnameOverride: ""
# -- String to fully override namespace
namespaceOverride: ""
# Ollama parameters
ollama:
# Port Ollama is listening on
port: 11434
gpu:
# -- Enable GPU integration
enabled: false
# -- Enable DRA GPU integration
# If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters
draEnabled: false
# -- DRA GPU DriverClass
draDriverClass: "gpu.nvidia.com"
# -- Existing DRA GPU ResourceClaim Template
draExistingClaimTemplate: ""
# -- GPU type: 'nvidia' or 'amd'
# If 'ollama.gpu.enabled', default value is nvidia
# If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override
# This is due cause AMD and CPU/CUDA are different images
type: 'nvidia'
# -- Specify the number of GPU
# If you use MIG section below then this parameter is ignored
number: 1
# -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice
nvidiaResource: "nvidia.com/gpu"
# nvidiaResource: "nvidia.com/mig-1g.10gb" # example
# If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used)
mig:
# -- Enable multiple mig devices
# If enabled you will have to specify the mig devices
# If enabled is set to false this section is ignored
enabled: false
# -- Specify the mig devices and the corresponding number
devices: {}
# 1g.10gb: 1
# 3g.40gb: 1
models:
# -- List of models to pull at container startup
# The more you add, the longer the container will take to start if models are not present
# pull:
# - llama2
# - mistral
pull: []
# -- List of models to load in memory at container startup
# run:
# - llama2
# - mistral
run: []
# -- List of models to create at container startup, there are two options
# 1. Create a raw model
# 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory.
# create:
# - name: llama3.1-ctx32768
# configMapRef: my-configmap
# configMapKeyRef: configmap-key
# - name: llama3.1-ctx32768
# template: |
# FROM llama3.1
# PARAMETER num_ctx 32768
create: []
# -- Automatically remove models present on the disk but not specified in the values file
clean: false
# -- Add insecure flag for pulling at container startup
insecure: false
# -- Override ollama-data volume mount path, default: "/root/.ollama"
mountPath: ""
# Service account
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
serviceAccount:
# -- Specifies whether a service account should be created
create: true
# -- Automatically mount a ServiceAccount's API credentials?
automount: true
# -- Annotations to add to the service account
annotations: {}
# -- The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
# -- Map of annotations to add to the pods
podAnnotations: {}
# -- Map of labels to add to the pods
podLabels: {}
# -- Pod Security Context
podSecurityContext: {}
# fsGroup: 2000
# -- Priority Class Name
priorityClassName: ""
# -- Container Security Context
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
# -- Specify runtime class
runtimeClassName: ""
# Configure Service
service:
# -- Service type
type: ClusterIP
# -- Service port
port: 11434
# -- Service node port when service type is 'NodePort'
nodePort: 31434
# -- Load Balancer IP address
loadBalancerIP:
# -- Annotations to add to the service
annotations: {}
# -- Labels to add to the service
labels: {}
# Configure Deployment
deployment:
# -- Labels to add to the deployment
labels: {}
# Configure the ingress resource that allows you to access the
ingress:
# -- Enable ingress controller resource
enabled: false
# -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)
className: ""
# -- Additional annotations for the Ingress resource.
annotations: {}
# kubernetes.io/ingress.class: traefik
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
# The list of hostnames to be covered with this ingress record.
hosts:
- host: ollama.local
paths:
- path: /
pathType: Prefix
# -- The tls configuration for hostnames to be covered with this ingress record.
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
# Configure resource requests and limits
# ref: http://kubernetes.io/docs/user-guide/compute-resources/
resources:
# -- Pod requests
requests: {}
# Memory request
# memory: 4096Mi
# CPU request
# cpu: 2000m
# -- Pod limit
limits: {}
# Memory limit
# memory: 8192Mi
# CPU limit
# cpu: 4000m
# Configure extra options for liveness probe
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
livenessProbe:
# -- Enable livenessProbe
enabled: true
# -- Request path for livenessProbe
path: /
# -- Initial delay seconds for livenessProbe
initialDelaySeconds: 60
# -- Period seconds for livenessProbe
periodSeconds: 10
# -- Timeout seconds for livenessProbe
timeoutSeconds: 5
# -- Failure threshold for livenessProbe
failureThreshold: 6
# -- Success threshold for livenessProbe
successThreshold: 1
# Configure extra options for readiness probe
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
readinessProbe:
# -- Enable readinessProbe
enabled: true
# -- Request path for readinessProbe
path: /
# -- Initial delay seconds for readinessProbe
initialDelaySeconds: 30
# -- Period seconds for readinessProbe
periodSeconds: 5
# -- Timeout seconds for readinessProbe
timeoutSeconds: 3
# -- Failure threshold for readinessProbe
failureThreshold: 6
# -- Success threshold for readinessProbe
successThreshold: 1
# Configure autoscaling
autoscaling:
# -- Enable autoscaling
enabled: false
# -- Number of minimum replicas
minReplicas: 1
# -- Number of maximum replicas
maxReplicas: 100
# -- CPU usage to target replica
targetCPUUtilizationPercentage: 80
# -- targetMemoryUtilizationPercentage: 80
# -- Additional volumes on the output Deployment definition.
volumes: []
# -- - name: foo
# secret:
# secretName: mysecret
# optional: false
# -- Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# -- - name: foo
# mountPath: "/etc/foo"
# readOnly: true
# -- Additional arguments on the output Deployment definition.
extraArgs: []
# -- Additional environments variables on the output Deployment definition.
# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go
extraEnv: []
# - name: OLLAMA_DEBUG
# value: "1"
# -- Additionl environment variables from external sources (like ConfigMap)
extraEnvFrom: []
# - configMapRef:
# name: my-env-configmap
# Enable persistence using Persistent Volume Claims
# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/
persistentVolume:
# -- Enable persistence using PVC
enabled: false
# -- Ollama server data Persistent Volume access modes
# Must match those of existing PV or dynamic provisioner
# Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
accessModes:
- ReadWriteOnce
# -- Ollama server data Persistent Volume annotations
annotations: {}
# -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the
# created + ready PVC here. If set, this Chart will not create the default PVC.
# Requires server.persistentVolume.enabled: true
existingClaim: ""
# -- Ollama server data Persistent Volume size
size: 30Gi
# -- Ollama server data Persistent Volume Storage Class
# If defined, storageClassName: <storageClass>
# If set to "-", storageClassName: "", which disables dynamic provisioning
# If undefined (the default) or set to null, no storageClassName spec is
# set, choosing the default provisioner. (gp2 on AWS, standard on
# GKE, AWS & OpenStack)
storageClass: ""
# -- Ollama server data Persistent Volume Binding Mode
# If defined, volumeMode: <volumeMode>
# If empty (the default) or set to null, no volumeBindingMode spec is
# set, choosing the default mode.
volumeMode: ""
# -- Subdirectory of Ollama server data Persistent Volume to mount
# Useful if the volume's root directory is not empty
subPath: ""
# -- Pre-existing PV to attach this claim to
# Useful if a CSI auto-provisions a PV for you and you want to always
# reference the PV moving forward
volumeName: ""
# -- Node labels for pod assignment.
nodeSelector: {}
# -- Tolerations for pod assignment
tolerations: []
# -- Affinity for pod assignment
affinity: {}
# -- Lifecycle for pod assignment (override ollama.models startup pull/run)
lifecycle: {}
# How to replace existing pods
updateStrategy:
# -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate
type: "Recreate"
# -- Topology Spread Constraints for pod assignment
topologySpreadConstraints: {}
# -- Wait for a grace period
terminationGracePeriodSeconds: 120
# -- Init containers to add to the pod
initContainers: []
# - name: startup-tool
# image: alpine:3
# command: [sh, -c]
# args:
# - echo init
# -- Use the hosts ipc namespace.
hostIPC: false
# -- Use the hosts pid namespace
hostPID: false
# -- Use the host's network namespace.
hostNetwork: false
# -- Extra K8s manifests to deploy
extraObjects: []
# - apiVersion: v1
# kind: PersistentVolume
# metadata:
# name: aws-efs
# data:
# key: "value"
# - apiVersion: scheduling.k8s.io/v1
# kind: PriorityClass
# metadata:
# name: high-priority
# value: 1000000
# globalDefault: false
# description: "This priority class should be used for XYZ service pods only."
# Test connection pods
tests:
enabled: true
# -- Labels to add to the tests
labels: {}
# -- Annotations to add to the tests
annotations: {}