feat: support open-webui charts
This commit is contained in:
30
open-webui/charts/ollama/.ollama-helm/.helmignore
Normal file
30
open-webui/charts/ollama/.ollama-helm/.helmignore
Normal file
@ -0,0 +1,30 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
.drone.yml
|
||||
*.tmproj
|
||||
.vscode/
|
||||
|
||||
#others
|
||||
.github
|
||||
kind-config.yml
|
||||
ci/
|
||||
|
||||
33
open-webui/charts/ollama/.ollama-helm/Chart.yaml
Normal file
33
open-webui/charts/ollama/.ollama-helm/Chart.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
apiVersion: v2
|
||||
name: ollama
|
||||
description: Get up and running with large language models locally.
|
||||
|
||||
type: application
|
||||
|
||||
version: 1.27.0
|
||||
|
||||
appVersion: "0.11.4"
|
||||
|
||||
annotations:
|
||||
artifacthub.io/category: ai-machine-learning
|
||||
artifacthub.io/changes: |
|
||||
- kind: changed
|
||||
description: upgrade app version to 0.11.4
|
||||
links:
|
||||
- name: Ollama release v0.11.4
|
||||
url: https://github.com/ollama/ollama/releases/tag/v0.11.4
|
||||
|
||||
kubeVersion: "^1.16.0-0"
|
||||
home: https://ollama.ai/
|
||||
icon: https://ollama.ai/public/ollama.png
|
||||
keywords:
|
||||
- ai
|
||||
- llm
|
||||
- llama
|
||||
- mistral
|
||||
sources:
|
||||
- https://github.com/ollama/ollama
|
||||
- https://github.com/otwld/ollama-helm
|
||||
maintainers:
|
||||
- name: OTWLD
|
||||
email: contact@otwld.com
|
||||
21
open-webui/charts/ollama/.ollama-helm/LICENSE
Normal file
21
open-webui/charts/ollama/.ollama-helm/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 OTWLD
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
331
open-webui/charts/ollama/.ollama-helm/README.md
Normal file
331
open-webui/charts/ollama/.ollama-helm/README.md
Normal file
@ -0,0 +1,331 @@
|
||||

|
||||
|
||||

|
||||
[](https://artifacthub.io/packages/helm/ollama-helm/ollama)
|
||||
[](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml)
|
||||
[](https://discord.gg/U24mpqTynB)
|
||||
|
||||
[Ollama](https://ollama.ai/), get up and running with large language models, locally.
|
||||
|
||||
This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama).
|
||||
|
||||
## Requirements
|
||||
|
||||
- Kubernetes: `>= 1.16.0-0` for **CPU only**
|
||||
|
||||
- Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD)
|
||||
|
||||
*Not all GPUs are currently supported with ollama (especially with AMD)*
|
||||
|
||||
## Deploying Ollama chart
|
||||
|
||||
To install the `ollama` chart in the `ollama` namespace:
|
||||
|
||||
> [!IMPORTANT]
|
||||
> We are migrating the registry from https://otwld.github.io/ollama-helm/ url to OTWLD Helm central
|
||||
> registry https://helm.otwld.com/
|
||||
> Please update your Helm registry accordingly.
|
||||
|
||||
```console
|
||||
helm repo add otwld https://helm.otwld.com/
|
||||
helm repo update
|
||||
helm install ollama otwld/ollama --namespace ollama --create-namespace
|
||||
```
|
||||
|
||||
## Upgrading Ollama chart
|
||||
|
||||
First please read the [release notes](https://github.com/ollama/ollama/releases) of Ollama to make sure there are no
|
||||
backwards incompatible changes.
|
||||
|
||||
Make adjustments to your values as needed, then run `helm upgrade`:
|
||||
|
||||
```console
|
||||
# -- This pulls the latest version of the ollama chart from the repo.
|
||||
helm repo update
|
||||
helm upgrade ollama otwld/ollama --namespace ollama --values values.yaml
|
||||
```
|
||||
|
||||
## Uninstalling Ollama chart
|
||||
|
||||
To uninstall/delete the `ollama` deployment in the `ollama` namespace:
|
||||
|
||||
```console
|
||||
helm delete ollama --namespace ollama
|
||||
```
|
||||
|
||||
Substitute your values if they differ from the examples. See `helm delete --help` for a full reference on `delete`
|
||||
parameters and flags.
|
||||
|
||||
## Interact with Ollama
|
||||
|
||||
- **Ollama documentation can be found [HERE](https://github.com/ollama/ollama/tree/main/docs)**
|
||||
- Interact with RESTful API: [Ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
||||
- Interact with official clients libraries: [ollama-js](https://github.com/ollama/ollama-js#custom-client)
|
||||
and [ollama-python](https://github.com/ollama/ollama-python#custom-client)
|
||||
- Interact with langchain: [langchain-js](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainjs.md)
|
||||
and [langchain-python](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md)
|
||||
|
||||
## Examples
|
||||
|
||||
- **It's highly recommended to run an updated version of Kubernetes for deploying ollama with GPU**
|
||||
|
||||
### Basic values.yaml example with GPU and two models pulled at startup
|
||||
|
||||
```
|
||||
ollama:
|
||||
gpu:
|
||||
# -- Enable GPU integration
|
||||
enabled: true
|
||||
|
||||
# -- GPU type: 'nvidia' or 'amd'
|
||||
type: 'nvidia'
|
||||
|
||||
# -- Specify the number of GPU to 1
|
||||
number: 1
|
||||
|
||||
# -- List of models to pull at container startup
|
||||
models:
|
||||
pull:
|
||||
- mistral
|
||||
- llama2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Basic values.yaml example with Ingress
|
||||
|
||||
```
|
||||
ollama:
|
||||
models:
|
||||
pull:
|
||||
- llama2
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
hosts:
|
||||
- host: ollama.domain.lan
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
```
|
||||
|
||||
- *API is now reachable at `ollama.domain.lan`*
|
||||
|
||||
---
|
||||
|
||||
### Create and run model from template
|
||||
|
||||
```
|
||||
ollama:
|
||||
models:
|
||||
create:
|
||||
- name: llama3.1-ctx32768
|
||||
template: |
|
||||
FROM llama3.1
|
||||
PARAMETER num_ctx 32768
|
||||
run:
|
||||
- llama3.1-ctx32768
|
||||
```
|
||||
|
||||
## Upgrading from 0.X.X to 1.X.X
|
||||
|
||||
The version 1.X.X introduces the ability to load models in memory at startup, the values have been changed.
|
||||
|
||||
Please change `ollama.models` to `ollama.models.pull` to avoid errors before upgrading:
|
||||
|
||||
```yaml
|
||||
ollama:
|
||||
models:
|
||||
- mistral
|
||||
- llama2
|
||||
```
|
||||
|
||||
To:
|
||||
|
||||
```yaml
|
||||
ollama:
|
||||
models:
|
||||
pull:
|
||||
- mistral
|
||||
- llama2
|
||||
```
|
||||
|
||||
## Helm Values
|
||||
|
||||
- See [values.yaml](values.yaml) to see the Chart's default values.
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|--------------------------------------------|--------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| affinity | object | `{}` | Affinity for pod assignment |
|
||||
| autoscaling.enabled | bool | `false` | Enable autoscaling |
|
||||
| autoscaling.maxReplicas | int | `100` | Number of maximum replicas |
|
||||
| autoscaling.minReplicas | int | `1` | Number of minimum replicas |
|
||||
| autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU usage to target replica |
|
||||
| deployment.labels | object | `{}` | Labels to add to the deployment |
|
||||
| extraArgs | list | `[]` | Additional arguments on the output Deployment definition. |
|
||||
| extraEnv | list | `[]` | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go |
|
||||
| extraEnvFrom | list | `[]` | Additionl environment variables from external sources (like ConfigMap) |
|
||||
| extraObjects | list | `[]` | Extra K8s manifests to deploy |
|
||||
| fullnameOverride | string | `""` | String to fully override template |
|
||||
| hostIPC | bool | `false` | Use the host’s ipc namespace. |
|
||||
| hostNetwork | bool | `false` | Use the host's network namespace. |
|
||||
| hostPID | bool | `false` | Use the host’s pid namespace |
|
||||
| image.pullPolicy | string | `"IfNotPresent"` | Docker pull policy |
|
||||
| image.repository | string | `"ollama/ollama"` | Docker image registry |
|
||||
| image.tag | string | `""` | Docker image tag, overrides the image tag whose default is the chart appVersion. |
|
||||
| imagePullSecrets | list | `[]` | Docker registry secret names as an array |
|
||||
| ingress.annotations | object | `{}` | Additional annotations for the Ingress resource. |
|
||||
| ingress.className | string | `""` | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) |
|
||||
| ingress.enabled | bool | `false` | Enable ingress controller resource |
|
||||
| ingress.hosts[0].host | string | `"ollama.local"` | |
|
||||
| ingress.hosts[0].paths[0].path | string | `"/"` | |
|
||||
| ingress.hosts[0].paths[0].pathType | string | `"Prefix"` | |
|
||||
| ingress.tls | list | `[]` | The tls configuration for hostnames to be covered with this ingress record. |
|
||||
| initContainers | list | `[]` | Init containers to add to the pod |
|
||||
| knative.annotations | object | `{}` | Knative service annotations |
|
||||
| knative.containerConcurrency | int | `0` | Knative service container concurrency |
|
||||
| knative.enabled | bool | `false` | Enable Knative integration |
|
||||
| knative.idleTimeoutSeconds | int | `300` | Knative service idle timeout seconds |
|
||||
| knative.responseStartTimeoutSeconds | int | `300` | Knative service response start timeout seconds |
|
||||
| knative.timeoutSeconds | int | `300` | Knative service timeout seconds |
|
||||
| lifecycle | object | `{}` | Lifecycle for pod assignment (override ollama.models startup pull/run) |
|
||||
| livenessProbe.enabled | bool | `true` | Enable livenessProbe |
|
||||
| livenessProbe.failureThreshold | int | `6` | Failure threshold for livenessProbe |
|
||||
| livenessProbe.initialDelaySeconds | int | `60` | Initial delay seconds for livenessProbe |
|
||||
| livenessProbe.path | string | `"/"` | Request path for livenessProbe |
|
||||
| livenessProbe.periodSeconds | int | `10` | Period seconds for livenessProbe |
|
||||
| livenessProbe.successThreshold | int | `1` | Success threshold for livenessProbe |
|
||||
| livenessProbe.timeoutSeconds | int | `5` | Timeout seconds for livenessProbe |
|
||||
| nameOverride | string | `""` | String to partially override template (will maintain the release name) |
|
||||
| namespaceOverride | string | `""` | String to fully override namespace |
|
||||
| nodeSelector | object | `{}` | Node labels for pod assignment. |
|
||||
| ollama.gpu.draDriverClass | string | `"gpu.nvidia.com"` | DRA GPU DriverClass |
|
||||
| ollama.gpu.draEnabled | bool | `false` | Enable DRA GPU integration If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters |
|
||||
| ollama.gpu.draExistingClaimTemplate | string | `""` | Existing DRA GPU ResourceClaim Template |
|
||||
| ollama.gpu.enabled | bool | `false` | Enable GPU integration |
|
||||
| ollama.gpu.mig.devices | object | `{}` | Specify the mig devices and the corresponding number |
|
||||
| ollama.gpu.mig.enabled | bool | `false` | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored |
|
||||
| ollama.gpu.number | int | `1` | Specify the number of GPU If you use MIG section below then this parameter is ignored |
|
||||
| ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice |
|
||||
| ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images |
|
||||
| ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup |
|
||||
| ollama.models.clean | bool | `false` | Automatically remove models present on the disk but not specified in the values file |
|
||||
| ollama.models.create | list | `[]` | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create: - name: llama3.1-ctx32768 configMapRef: my-configmap configMapKeyRef: configmap-key - name: llama3.1-ctx32768 template: | FROM llama3.1 PARAMETER num_ctx 32768 |
|
||||
| ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral |
|
||||
| ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral |
|
||||
| ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" |
|
||||
| ollama.port | int | `11434` | |
|
||||
| persistentVolume.accessModes | list | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ |
|
||||
| persistentVolume.annotations | object | `{}` | Ollama server data Persistent Volume annotations |
|
||||
| persistentVolume.enabled | bool | `false` | Enable persistence using PVC |
|
||||
| persistentVolume.existingClaim | string | `""` | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true |
|
||||
| persistentVolume.size | string | `"30Gi"` | Ollama server data Persistent Volume size |
|
||||
| persistentVolume.storageClass | string | `""` | Ollama server data Persistent Volume Storage Class If defined, storageClassName: <storageClass> If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner. (gp2 on AWS, standard on GKE, AWS & OpenStack) |
|
||||
| persistentVolume.subPath | string | `""` | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty |
|
||||
| persistentVolume.volumeMode | string | `""` | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: <volumeMode> If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode. |
|
||||
| persistentVolume.volumeName | string | `""` | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward |
|
||||
| podAnnotations | object | `{}` | Map of annotations to add to the pods |
|
||||
| podLabels | object | `{}` | Map of labels to add to the pods |
|
||||
| podSecurityContext | object | `{}` | Pod Security Context |
|
||||
| priorityClassName | string | `""` | Priority Class Name |
|
||||
| readinessProbe.enabled | bool | `true` | Enable readinessProbe |
|
||||
| readinessProbe.failureThreshold | int | `6` | Failure threshold for readinessProbe |
|
||||
| readinessProbe.initialDelaySeconds | int | `30` | Initial delay seconds for readinessProbe |
|
||||
| readinessProbe.path | string | `"/"` | Request path for readinessProbe |
|
||||
| readinessProbe.periodSeconds | int | `5` | Period seconds for readinessProbe |
|
||||
| readinessProbe.successThreshold | int | `1` | Success threshold for readinessProbe |
|
||||
| readinessProbe.timeoutSeconds | int | `3` | Timeout seconds for readinessProbe |
|
||||
| replicaCount | int | `1` | Number of replicas |
|
||||
| resources.limits | object | `{}` | Pod limit |
|
||||
| resources.requests | object | `{}` | Pod requests |
|
||||
| runtimeClassName | string | `""` | Specify runtime class |
|
||||
| securityContext | object | `{}` | Container Security Context |
|
||||
| service.annotations | object | `{}` | Annotations to add to the service |
|
||||
| service.labels | object | `{}` | Labels to add to the service |
|
||||
| service.loadBalancerIP | string | `nil` | Load Balancer IP address |
|
||||
| service.nodePort | int | `31434` | Service node port when service type is 'NodePort' |
|
||||
| service.port | int | `11434` | Service port |
|
||||
| service.type | string | `"ClusterIP"` | Service type |
|
||||
| serviceAccount.annotations | object | `{}` | Annotations to add to the service account |
|
||||
| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? |
|
||||
| serviceAccount.create | bool | `true` | Specifies whether a service account should be created |
|
||||
| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template |
|
||||
| terminationGracePeriodSeconds | int | `120` | Wait for a grace period |
|
||||
| tests.annotations | object | `{}` | Annotations to add to the tests |
|
||||
| tests.enabled | bool | `true` | |
|
||||
| tests.labels | object | `{}` | Labels to add to the tests |
|
||||
| tolerations | list | `[]` | Tolerations for pod assignment |
|
||||
| topologySpreadConstraints | object | `{}` | Topology Spread Constraints for pod assignment |
|
||||
| updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate |
|
||||
| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. |
|
||||
| volumes | list | `[]` | Additional volumes on the output Deployment definition. |
|
||||
|
||||
----------------------------------------------
|
||||
|
||||
## Core team
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td align="center">
|
||||
<a href="https://github.com/jdetroyes"
|
||||
><img
|
||||
src="https://github.com/jdetroyes.png?size=200"
|
||||
width="50"
|
||||
style="margin-bottom: -4px; border-radius: 8px;"
|
||||
alt="Jean Baptiste Detroyes"
|
||||
/><br /><b> Jean Baptiste Detroyes </b></a
|
||||
>
|
||||
<div style="margin-top: 4px">
|
||||
<a href="https://github.com/jdetroyes" title="Github"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
|
||||
/></a>
|
||||
<a
|
||||
href="mailto:jdetroyes@otwld.com"
|
||||
title="Email"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
|
||||
/></a>
|
||||
</div>
|
||||
</td>
|
||||
<td align="center">
|
||||
<a href="https://github.com/ntrehout"
|
||||
><img
|
||||
src="https://github.com/ntrehout.png?size=200"
|
||||
width="50"
|
||||
style="margin-bottom: -4px; border-radius: 8px;"
|
||||
alt="Jean Baptiste Detroyes"
|
||||
/><br /><b> Nathan Tréhout </b></a
|
||||
>
|
||||
<div style="margin-top: 4px">
|
||||
<a href="https://x.com/n_trehout" title="Twitter"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/twitter.svg"
|
||||
/></a>
|
||||
<a href="https://github.com/ntrehout" title="Github"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
|
||||
/></a>
|
||||
<a
|
||||
href="mailto:ntrehout@otwld.com"
|
||||
title="Email"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
|
||||
/></a>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
## Support
|
||||
|
||||
- For questions, suggestions, and discussion about Ollama please refer to
|
||||
the [Ollama issue page](https://github.com/ollama/ollama/issues)
|
||||
- For questions, suggestions, and discussion about this chart please
|
||||
visit [Ollama-Helm issue page](https://github.com/otwld/ollama-helm/issues) or join
|
||||
our [OTWLD Discord](https://discord.gg/U24mpqTynB)
|
||||
25
open-webui/charts/ollama/.ollama-helm/templates/NOTES.txt
Normal file
25
open-webui/charts/ollama/.ollama-helm/templates/NOTES.txt
Normal file
@ -0,0 +1,25 @@
|
||||
1. Get the application URL by running these commands:
|
||||
{{- if .Values.knative.enabled }}
|
||||
export KSERVICE_URL=$(kubectl get ksvc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} -o jsonpath={.status.url})
|
||||
echo "Visit $KSERVICE_URL to use your application"
|
||||
{{- else if .Values.ingress.enabled }}
|
||||
{{- range $host := .Values.ingress.hosts }}
|
||||
{{- range .paths }}
|
||||
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- else if contains "NodePort" .Values.service.type }}
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "ollama.fullname" . }})
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "ollama.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "ollama.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
80
open-webui/charts/ollama/.ollama-helm/templates/_helpers.tpl
Normal file
80
open-webui/charts/ollama/.ollama-helm/templates/_helpers.tpl
Normal file
@ -0,0 +1,80 @@
|
||||
{{/*
|
||||
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
|
||||
*/}}
|
||||
{{- define "ollama.namespace" -}}
|
||||
{{- if .Values.namespaceOverride -}}
|
||||
{{- .Values.namespaceOverride -}}
|
||||
{{- else -}}
|
||||
{{- .Release.Namespace -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "ollama.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "ollama.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "ollama.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "ollama.labels" -}}
|
||||
helm.sh/chart: {{ include "ollama.chart" . }}
|
||||
{{ include "ollama.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "ollama.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "ollama.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "ollama.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "ollama.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Models mount path
|
||||
*/}}
|
||||
{{- define "ollama.modelsMountPath" -}}
|
||||
{{- printf "%s/models" (((.Values).ollama).mountPath | default "/root/.ollama") }}
|
||||
{{- end -}}
|
||||
293
open-webui/charts/ollama/.ollama-helm/templates/deployment.yaml
Normal file
293
open-webui/charts/ollama/.ollama-helm/templates/deployment.yaml
Normal file
@ -0,0 +1,293 @@
|
||||
---
|
||||
{{- if not .Values.knative.enabled }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "ollama.fullname" . }}
|
||||
namespace: {{ include "ollama.namespace" . }}
|
||||
labels:
|
||||
{{- include "ollama.labels" . | nindent 4 }}
|
||||
{{- with .Values.deployment.labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if not .Values.autoscaling.enabled }}
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
{{- end }}
|
||||
{{- if or .Values.updateStrategy.type .Values.updateStrategy.rollingUpdate }}
|
||||
strategy: {{ .Values.updateStrategy | toYaml | nindent 4 }}
|
||||
{{- end }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "ollama.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
{{- with .Values.podAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "ollama.labels" . | nindent 8 }}
|
||||
{{- with .Values.podLabels }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if .Values.hostIPC }}
|
||||
hostIPC: {{ .Values.hostIPC }}
|
||||
{{- end }}
|
||||
{{- if .Values.hostPID }}
|
||||
hostPID: {{ .Values.hostPID }}
|
||||
{{- end }}
|
||||
{{- if .Values.hostNetwork }}
|
||||
hostNetwork: {{ .Values.hostNetwork }}
|
||||
{{- end }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "ollama.serviceAccountName" . }}
|
||||
{{- if .Values.priorityClassName }}
|
||||
priorityClassName: {{ .Values.priorityClassName | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.terminationGracePeriodSeconds }}
|
||||
terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }}
|
||||
{{- end }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
||||
{{- if .Values.runtimeClassName }}
|
||||
runtimeClassName: {{ .Values.runtimeClassName | quote }}
|
||||
{{- end }}
|
||||
{{- with .Values.initContainers }}
|
||||
initContainers:
|
||||
{{- tpl (toYaml . ) $ | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{ .Values.ollama.port }}
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: OLLAMA_HOST
|
||||
value: "0.0.0.0:{{ .Values.ollama.port }}"
|
||||
{{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}}
|
||||
- name: PATH
|
||||
value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
{{- end}}
|
||||
{{- with .Values.extraEnv }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
envFrom:
|
||||
{{- with .Values.extraEnvFrom }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
args:
|
||||
{{- with .Values.extraArgs }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .Values.resources }}
|
||||
resources:
|
||||
{{- $limits := default dict .Values.resources.limits }}
|
||||
{{- if .Values.ollama.gpu.enabled }}
|
||||
{{- if .Values.ollama.gpu.draEnabled}}
|
||||
claims:
|
||||
- name: gpu
|
||||
{{- else }}
|
||||
# If gpu is enabled, it can either be a NVIDIA card or a AMD card
|
||||
{{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }}
|
||||
# NVIDIA is assumed by default if no value is set and GPU is enabled
|
||||
# NVIDIA cards can have mig enabled (i.e., the card is sliced into parts
|
||||
# Therefore, the first case is no migs enabled
|
||||
{{- if or (not .Values.ollama.gpu.mig) (not .Values.ollama.gpu.mig.enabled ) }}
|
||||
{{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }}
|
||||
{{- $limits = merge $limits $gpuLimit }}
|
||||
# Second case is mig is enabled
|
||||
{{- else if or (.Values.ollama.gpu.mig.enabled) }}
|
||||
# Initialize empty dictionary
|
||||
{{- $migDevices := dict -}}
|
||||
# Loop over the entries in the mig devices
|
||||
{{- range $key, $value := .Values.ollama.gpu.mig.devices }}
|
||||
{{- $migKey := printf "nvidia.com/mig-%s" $key -}}
|
||||
{{- $migDevices = merge $migDevices (dict $migKey $value) -}}
|
||||
{{- end }}
|
||||
{{- $limits = merge $limits $migDevices}}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if eq .Values.ollama.gpu.type "amd" }}
|
||||
{{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }}
|
||||
{{- $limits = merge $limits $gpuLimit }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }}
|
||||
{{- toYaml $ressources | nindent 12 }}
|
||||
{{- end}}
|
||||
volumeMounts:
|
||||
- name: ollama-data
|
||||
mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }}
|
||||
{{- if .Values.persistentVolume.subPath }}
|
||||
subPath: {{ .Values.persistentVolume.subPath }}
|
||||
{{- end }}
|
||||
{{- range .Values.ollama.models.create }}
|
||||
{{- if .configMapRef }}
|
||||
- name: {{ .name }}-config-model-volume
|
||||
mountPath: /models
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .Values.livenessProbe.enabled }}
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: {{ .Values.livenessProbe.path }}
|
||||
port: http
|
||||
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
|
||||
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
|
||||
successThreshold: {{ .Values.livenessProbe.successThreshold }}
|
||||
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
|
||||
{{- end }}
|
||||
{{- if .Values.readinessProbe.enabled }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: {{ .Values.readinessProbe.path }}
|
||||
port: http
|
||||
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
|
||||
timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
|
||||
successThreshold: {{ .Values.readinessProbe.successThreshold }}
|
||||
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
|
||||
{{- end }}
|
||||
{{- with .Values.lifecycle}}
|
||||
lifecycle:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- else }}
|
||||
{{- if or .Values.ollama.models.pull .Values.ollama.models.run .Values.ollama.models.create }}
|
||||
lifecycle:
|
||||
postStart:
|
||||
exec:
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
while ! /bin/ollama ps > /dev/null 2>&1; do
|
||||
sleep 5
|
||||
done
|
||||
|
||||
{{- $allModels := list -}}
|
||||
|
||||
{{- if .Values.ollama.models.pull }}
|
||||
{{- range .Values.ollama.models.pull }}
|
||||
|
||||
{{- if contains ":" . }}
|
||||
{{- $allModels = append $allModels . }}
|
||||
{{- else }}
|
||||
{{- $allModels = append $allModels (printf "%s:latest" .) }}
|
||||
{{- end }}
|
||||
|
||||
/bin/ollama pull {{ternary "--insecure" "" $.Values.ollama.insecure | toString }} {{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.ollama.models.create }}
|
||||
{{- range .Values.ollama.models.create }}
|
||||
{{- $allModels = append $allModels .name }}
|
||||
{{- if .template }}
|
||||
cat <<EOF > {{ include "ollama.modelsMountPath" $ }}/{{ .name }}
|
||||
{{- .template | nindent 20 }}
|
||||
EOF
|
||||
/bin/ollama create {{ .name }} -f {{ include "ollama.modelsMountPath" $ }}/{{ .name }}
|
||||
{{- end }}
|
||||
{{- if .configMapRef }}
|
||||
/bin/ollama create {{ .name }} -f /models/{{ .name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.ollama.models.run }}
|
||||
{{- range .Values.ollama.models.run }}
|
||||
|
||||
{{- if contains ":" . }}
|
||||
{{- $allModels = append $allModels . }}
|
||||
{{- else }}
|
||||
{{- $allModels = append $allModels (printf "%s:latest" .) }}
|
||||
{{- end }}
|
||||
|
||||
/bin/ollama run {{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.ollama.models.clean }}
|
||||
/bin/ollama list | awk 'NR>1 {print $1}' | while read model; do
|
||||
echo "{{ $allModels | join " " }}" | tr ' ' '\n' | grep -Fqx "$model" || /bin/ollama rm "$model"
|
||||
done
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if and .Values.ollama.gpu.enabled .Values.ollama.gpu.draEnabled }}
|
||||
resourceClaims:
|
||||
- name: gpu
|
||||
resourceClaimTemplateName: {{ .Values.ollama.gpu.draExistingClaimTemplate | default (printf "%s" (include "ollama.fullname" .)) }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: ollama-data
|
||||
{{- if .Values.persistentVolume.enabled }}
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }}
|
||||
{{- else }}
|
||||
emptyDir: { }
|
||||
{{- end }}
|
||||
{{- range .Values.ollama.models.create }}
|
||||
{{- if .configMapRef }}
|
||||
- name: {{ .name }}-config-model-volume
|
||||
configMap:
|
||||
name: {{ .configMapRef }}
|
||||
items:
|
||||
- key: {{ .configMapKeyRef }}
|
||||
path: {{ .name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.volumes }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.topologySpreadConstraints }}
|
||||
topologySpreadConstraints:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if or .Values.ollama.gpu.enabled .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- if and .Values.ollama.gpu.enabled (and
|
||||
( or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))
|
||||
( or (not .Values.ollama.gpu.mig) (not .Values.ollama.gpu.mig.enabled))
|
||||
) }}
|
||||
- key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}"
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
{{- else if and .Values.ollama.gpu.enabled (and
|
||||
( or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))
|
||||
(( .Values.ollama.gpu.mig.enabled))
|
||||
) }}
|
||||
{{- range $key, $value := .Values.ollama.gpu.mig.devices }}
|
||||
- key: nvidia.com/mig-{{ $key }}
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@ -0,0 +1,4 @@
|
||||
{{ range .Values.extraObjects }}
|
||||
---
|
||||
{{ tpl (toYaml .) $ }}
|
||||
{{ end }}
|
||||
34
open-webui/charts/ollama/.ollama-helm/templates/hpa.yaml
Normal file
34
open-webui/charts/ollama/.ollama-helm/templates/hpa.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
---
|
||||
{{- if .Values.autoscaling.enabled }}
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: {{ include "ollama.fullname" . }}
|
||||
namespace: {{ include "ollama.namespace" . }}
|
||||
labels:
|
||||
{{- include "ollama.labels" . | nindent 4 }}
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: {{ include "ollama.fullname" . }}
|
||||
minReplicas: {{ .Values.autoscaling.minReplicas }}
|
||||
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
|
||||
metrics:
|
||||
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
63
open-webui/charts/ollama/.ollama-helm/templates/ingress.yaml
Normal file
63
open-webui/charts/ollama/.ollama-helm/templates/ingress.yaml
Normal file
@ -0,0 +1,63 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
{{- $fullName := include "ollama.fullname" . -}}
|
||||
{{- $svcPort := .Values.service.port -}}
|
||||
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
||||
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
|
||||
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
{{- else }}
|
||||
apiVersion: extensions/v1beta1
|
||||
{{- end }}
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ $fullName }}
|
||||
namespace: {{ include "ollama.namespace" . }}
|
||||
labels:
|
||||
{{- include "ollama.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
|
||||
ingressClassName: {{ .Values.ingress.className }}
|
||||
{{- end }}
|
||||
{{- if .Values.ingress.tls }}
|
||||
tls:
|
||||
{{- range .Values.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
rules:
|
||||
{{- range .Values.ingress.hosts }}
|
||||
- host: {{ .host | quote }}
|
||||
http:
|
||||
paths:
|
||||
{{- range .paths }}
|
||||
- path: {{ .path }}
|
||||
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
|
||||
pathType: {{ .pathType }}
|
||||
{{- end }}
|
||||
backend:
|
||||
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
|
||||
service:
|
||||
name: {{ $fullName }}
|
||||
port:
|
||||
number: {{ $svcPort }}
|
||||
{{- else }}
|
||||
serviceName: {{ $fullName }}
|
||||
servicePort: {{ $svcPort }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@ -0,0 +1,200 @@
|
||||
---
|
||||
{{- if .Values.knative.enabled }}
|
||||
apiVersion: serving.knative.dev/v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "ollama.fullname" . }}
|
||||
namespace: {{ include "ollama.namespace" . }}
|
||||
labels:
|
||||
{{- include "ollama.labels" . | nindent 4 }}
|
||||
{{- with .Values.knative.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containerConcurrency: {{ .Values.knative.containerConcurrency }}
|
||||
timeoutSeconds: {{ .Values.knative.timeoutSeconds }}
|
||||
responseStartTimeoutSeconds: {{ .Values.knative.responseStartTimeoutSeconds }}
|
||||
idleTimeoutSeconds: {{ .Values.knative.idleTimeoutSeconds }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "ollama.serviceAccountName" . }}
|
||||
{{- if .Values.runtimeClassName }}
|
||||
runtimeClassName: {{ .Values.runtimeClassName | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.terminationGracePeriodSeconds }}
|
||||
terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }}
|
||||
{{- end }}
|
||||
{{- with .Values.initContainers }}
|
||||
initContainers:
|
||||
{{- tpl (toYaml . ) $ | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
||||
ports:
|
||||
- containerPort: {{ .Values.ollama.port }}
|
||||
env:
|
||||
- name: OLLAMA_HOST
|
||||
value: "0.0.0.0:{{ .Values.ollama.port }}"
|
||||
{{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}}
|
||||
- name: PATH
|
||||
value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
{{- end}}
|
||||
{{- with .Values.extraEnv }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
envFrom:
|
||||
{{- with .Values.extraEnvFrom }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
args:
|
||||
{{- with .Values.extraArgs }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .Values.resources }}
|
||||
resources:
|
||||
{{- $limits := default dict .Values.resources.limits }}
|
||||
{{- if .Values.ollama.gpu.enabled }}
|
||||
{{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }}
|
||||
{{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }}
|
||||
{{- $limits = merge $limits $gpuLimit }}
|
||||
{{- end }}
|
||||
{{- if eq .Values.ollama.gpu.type "amd" }}
|
||||
{{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }}
|
||||
{{- $limits = merge $limits $gpuLimit }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }}
|
||||
{{- toYaml $ressources | nindent 12 }}
|
||||
{{- end}}
|
||||
volumeMounts:
|
||||
- name: ollama-data
|
||||
mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }}
|
||||
{{- if .Values.persistentVolume.subPath }}
|
||||
subPath: {{ .Values.persistentVolume.subPath }}
|
||||
{{- end }}
|
||||
{{- range .Values.ollama.models.create }}
|
||||
{{- if .configMapRef }}
|
||||
- name: {{ .name }}-config-model-volume
|
||||
mountPath: /models
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .Values.livenessProbe.enabled }}
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: {{ .Values.livenessProbe.path }}
|
||||
port: http
|
||||
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
|
||||
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
|
||||
successThreshold: {{ .Values.livenessProbe.successThreshold }}
|
||||
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
|
||||
{{- end }}
|
||||
{{- if .Values.readinessProbe.enabled }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: {{ .Values.readinessProbe.path }}
|
||||
port: http
|
||||
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
|
||||
timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
|
||||
successThreshold: {{ .Values.readinessProbe.successThreshold }}
|
||||
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
|
||||
{{- end }}
|
||||
{{- with .Values.lifecycle}}
|
||||
lifecycle:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- else }}
|
||||
{{- if or .Values.ollama.models.pull .Values.ollama.models.run .Values.ollama.models.create }}
|
||||
lifecycle:
|
||||
postStart:
|
||||
exec:
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
while ! /bin/ollama ps > /dev/null 2>&1; do
|
||||
sleep 5
|
||||
done
|
||||
{{- if .Values.ollama.models.pull }}
|
||||
{{- range .Values.ollama.models.pull }}
|
||||
/bin/ollama pull {{ternary "--insecure" "" $.Values.ollama.insecure | toString }} {{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.ollama.models.create }}
|
||||
{{- range .Values.ollama.models.create }}
|
||||
{{- if .template }}
|
||||
cat <<EOF > {{ include "ollama.modelsMountPath" $ }}/{{ .name }}
|
||||
{{- .template | nindent 20 }}
|
||||
EOF
|
||||
/bin/ollama create {{ .name }} -f {{ include "ollama.modelsMountPath" . }}/{{ .name }}
|
||||
{{- end }}
|
||||
{{- if .configMapRef }}
|
||||
/bin/ollama create {{ .name }} -f /models/{{ .name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.ollama.models.run }}
|
||||
{{- range .Values.ollama.models.run }}
|
||||
/bin/ollama run {{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: ollama-data
|
||||
{{- if .Values.persistentVolume.enabled }}
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }}
|
||||
{{- else }}
|
||||
emptyDir: { }
|
||||
{{- end }}
|
||||
{{- range .Values.ollama.models.create }}
|
||||
{{- if .configMapRef }}
|
||||
- name: {{ .name }}-config-model-volume
|
||||
configMap:
|
||||
name: {{ .configMapRef }}
|
||||
items:
|
||||
- key: {{ .configMapKeyRef }}
|
||||
path: {{ .name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.volumes }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.topologySpreadConstraints }}
|
||||
topologySpreadConstraints:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if or .Values.ollama.gpu.enabled .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) }}
|
||||
- key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}"
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
33
open-webui/charts/ollama/.ollama-helm/templates/pvc.yaml
Normal file
33
open-webui/charts/ollama/.ollama-helm/templates/pvc.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
{{- if (and .Values.persistentVolume.enabled (not .Values.persistentVolume.existingClaim)) -}}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
{{- if .Values.persistentVolume.annotations }}
|
||||
annotations:
|
||||
{{ toYaml .Values.persistentVolume.annotations | indent 4 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "ollama.labels" . | nindent 4 }}
|
||||
name: {{ template "ollama.fullname" . }}
|
||||
namespace: {{ include "ollama.namespace" . }}
|
||||
spec:
|
||||
accessModes:
|
||||
{{ toYaml .Values.persistentVolume.accessModes | indent 4 }}
|
||||
{{- if .Values.persistentVolume.storageClass }}
|
||||
{{- if (eq "-" .Values.persistentVolume.storageClass) }}
|
||||
storageClassName: ""
|
||||
{{- else }}
|
||||
storageClassName: "{{ .Values.persistentVolume.storageClass }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.persistentVolume.volumeMode }}
|
||||
volumeMode: "{{ .Values.persistentVolume.volumeMode }}"
|
||||
{{- end }}
|
||||
{{- if .Values.persistentVolume.volumeName }}
|
||||
volumeName: "{{ .Values.persistentVolume.volumeName }}"
|
||||
{{- end }}
|
||||
resources:
|
||||
requests:
|
||||
storage: "{{ .Values.persistentVolume.size }}"
|
||||
{{- end -}}
|
||||
@ -0,0 +1,19 @@
|
||||
{{- if and .Values.ollama.gpu.enabled .Values.ollama.gpu.draEnabled (not .Values.ollama.gpu.draExistingClaimTemplate) -}}
|
||||
---
|
||||
{{- if semverCompare ">=1.34-0" .Capabilities.KubeVersion.GitVersion }}
|
||||
apiVersion: resource.k8s.io/v1
|
||||
{{- else }}
|
||||
apiVersion: resource.k8s.io/v1beta1
|
||||
{{- end }}
|
||||
kind: ResourceClaimTemplate
|
||||
metadata:
|
||||
name: {{ template "ollama.fullname" . }}
|
||||
namespace: {{ include "ollama.namespace" . }}
|
||||
spec:
|
||||
spec:
|
||||
devices:
|
||||
requests:
|
||||
- name: gpu
|
||||
deviceClassName: {{ .Values.ollama.gpu.draDriverClass }}
|
||||
count: {{(.Values.ollama.gpu.number | default 1)}}
|
||||
{{- end -}}
|
||||
32
open-webui/charts/ollama/.ollama-helm/templates/service.yaml
Normal file
32
open-webui/charts/ollama/.ollama-helm/templates/service.yaml
Normal file
@ -0,0 +1,32 @@
|
||||
---
|
||||
{{- if not .Values.knative.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "ollama.fullname" . }}
|
||||
namespace: {{ include "ollama.namespace" . }}
|
||||
labels:
|
||||
{{- include "ollama.labels" . | nindent 4 }}
|
||||
{{- with .Values.service.labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- with .Values.service.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: http
|
||||
{{- if contains "NodePort" .Values.service.type }}
|
||||
nodePort: {{ .Values.service.nodePort }}
|
||||
{{- end }}
|
||||
{{- if .Values.service.loadBalancerIP }}
|
||||
loadBalancerIP: {{ .Values.service.loadBalancerIP | quote }}
|
||||
{{- end }}
|
||||
selector:
|
||||
{{- include "ollama.selectorLabels" . | nindent 4 }}
|
||||
{{- end }}
|
||||
@ -0,0 +1,15 @@
|
||||
{{- if .Values.serviceAccount.create -}}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "ollama.serviceAccountName" . }}
|
||||
namespace: {{ include "ollama.namespace" . }}
|
||||
labels:
|
||||
{{- include "ollama.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
||||
{{- end }}
|
||||
@ -0,0 +1,25 @@
|
||||
---
|
||||
{{- if .Values.tests.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: "{{ include "ollama.fullname" . }}-test-connection"
|
||||
namespace: {{ include "ollama.namespace" . }}
|
||||
labels:
|
||||
{{- include "ollama.labels" . | nindent 4 }}
|
||||
{{- with .Values.tests.labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
annotations:
|
||||
"helm.sh/hook": test
|
||||
{{- with .Values.tests.annotations }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
containers:
|
||||
- name: wget
|
||||
image: busybox
|
||||
command: ['wget']
|
||||
args: ['{{ include "ollama.fullname" . }}:{{ .Values.service.port }}']
|
||||
restartPolicy: Never
|
||||
{{ end }}
|
||||
440
open-webui/charts/ollama/.ollama-helm/values.yaml
Normal file
440
open-webui/charts/ollama/.ollama-helm/values.yaml
Normal file
@ -0,0 +1,440 @@
|
||||
# Default values for ollama-helm.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# -- Number of replicas
|
||||
replicaCount: 1
|
||||
|
||||
# Knative configuration
|
||||
knative:
|
||||
# -- Enable Knative integration
|
||||
enabled: false
|
||||
# -- Knative service container concurrency
|
||||
containerConcurrency: 0
|
||||
# -- Knative service timeout seconds
|
||||
timeoutSeconds: 300
|
||||
# -- Knative service response start timeout seconds
|
||||
responseStartTimeoutSeconds: 300
|
||||
# -- Knative service idle timeout seconds
|
||||
idleTimeoutSeconds: 300
|
||||
# -- Knative service annotations
|
||||
annotations: {}
|
||||
|
||||
# Docker image
|
||||
image:
|
||||
# -- Docker image registry
|
||||
repository: ollama/ollama
|
||||
|
||||
# -- Docker pull policy
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
# -- Docker image tag, overrides the image tag whose default is the chart appVersion.
|
||||
tag: ""
|
||||
|
||||
# -- Docker registry secret names as an array
|
||||
imagePullSecrets: []
|
||||
|
||||
# -- String to partially override template (will maintain the release name)
|
||||
nameOverride: ""
|
||||
|
||||
# -- String to fully override template
|
||||
fullnameOverride: ""
|
||||
|
||||
# -- String to fully override namespace
|
||||
namespaceOverride: ""
|
||||
|
||||
# Ollama parameters
|
||||
ollama:
|
||||
# Port Ollama is listening on
|
||||
port: 11434
|
||||
|
||||
gpu:
|
||||
# -- Enable GPU integration
|
||||
enabled: false
|
||||
|
||||
# -- Enable DRA GPU integration
|
||||
# If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters
|
||||
draEnabled: false
|
||||
|
||||
# -- DRA GPU DriverClass
|
||||
draDriverClass: "gpu.nvidia.com"
|
||||
|
||||
# -- Existing DRA GPU ResourceClaim Template
|
||||
draExistingClaimTemplate: ""
|
||||
|
||||
# -- GPU type: 'nvidia' or 'amd'
|
||||
# If 'ollama.gpu.enabled', default value is nvidia
|
||||
# If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override
|
||||
# This is due cause AMD and CPU/CUDA are different images
|
||||
type: 'nvidia'
|
||||
|
||||
# -- Specify the number of GPU
|
||||
# If you use MIG section below then this parameter is ignored
|
||||
number: 1
|
||||
|
||||
# -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice
|
||||
nvidiaResource: "nvidia.com/gpu"
|
||||
# nvidiaResource: "nvidia.com/mig-1g.10gb" # example
|
||||
# If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used)
|
||||
|
||||
mig:
|
||||
# -- Enable multiple mig devices
|
||||
# If enabled you will have to specify the mig devices
|
||||
# If enabled is set to false this section is ignored
|
||||
enabled: false
|
||||
|
||||
# -- Specify the mig devices and the corresponding number
|
||||
devices: {}
|
||||
# 1g.10gb: 1
|
||||
# 3g.40gb: 1
|
||||
|
||||
models:
|
||||
# -- List of models to pull at container startup
|
||||
# The more you add, the longer the container will take to start if models are not present
|
||||
# pull:
|
||||
# - llama2
|
||||
# - mistral
|
||||
pull: []
|
||||
|
||||
# -- List of models to load in memory at container startup
|
||||
# run:
|
||||
# - llama2
|
||||
# - mistral
|
||||
run: []
|
||||
|
||||
# -- List of models to create at container startup, there are two options
|
||||
# 1. Create a raw model
|
||||
# 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory.
|
||||
# create:
|
||||
# - name: llama3.1-ctx32768
|
||||
# configMapRef: my-configmap
|
||||
# configMapKeyRef: configmap-key
|
||||
# - name: llama3.1-ctx32768
|
||||
# template: |
|
||||
# FROM llama3.1
|
||||
# PARAMETER num_ctx 32768
|
||||
create: []
|
||||
|
||||
# -- Automatically remove models present on the disk but not specified in the values file
|
||||
clean: false
|
||||
|
||||
# -- Add insecure flag for pulling at container startup
|
||||
insecure: false
|
||||
|
||||
# -- Override ollama-data volume mount path, default: "/root/.ollama"
|
||||
mountPath: ""
|
||||
|
||||
# Service account
|
||||
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
|
||||
serviceAccount:
|
||||
# -- Specifies whether a service account should be created
|
||||
create: true
|
||||
|
||||
# -- Automatically mount a ServiceAccount's API credentials?
|
||||
automount: true
|
||||
|
||||
# -- Annotations to add to the service account
|
||||
annotations: {}
|
||||
|
||||
# -- The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name: ""
|
||||
|
||||
# -- Map of annotations to add to the pods
|
||||
podAnnotations: {}
|
||||
|
||||
# -- Map of labels to add to the pods
|
||||
podLabels: {}
|
||||
|
||||
# -- Pod Security Context
|
||||
podSecurityContext: {}
|
||||
# fsGroup: 2000
|
||||
|
||||
# -- Priority Class Name
|
||||
priorityClassName: ""
|
||||
|
||||
# -- Container Security Context
|
||||
securityContext: {}
|
||||
# capabilities:
|
||||
# drop:
|
||||
# - ALL
|
||||
# readOnlyRootFilesystem: true
|
||||
# runAsNonRoot: true
|
||||
# runAsUser: 1000
|
||||
|
||||
# -- Specify runtime class
|
||||
runtimeClassName: ""
|
||||
|
||||
# Configure Service
|
||||
service:
|
||||
|
||||
# -- Service type
|
||||
type: ClusterIP
|
||||
|
||||
# -- Service port
|
||||
port: 11434
|
||||
|
||||
# -- Service node port when service type is 'NodePort'
|
||||
nodePort: 31434
|
||||
|
||||
# -- Load Balancer IP address
|
||||
loadBalancerIP:
|
||||
|
||||
# -- Annotations to add to the service
|
||||
annotations: {}
|
||||
|
||||
# -- Labels to add to the service
|
||||
labels: {}
|
||||
|
||||
# Configure Deployment
|
||||
deployment:
|
||||
|
||||
# -- Labels to add to the deployment
|
||||
labels: {}
|
||||
|
||||
# Configure the ingress resource that allows you to access the
|
||||
ingress:
|
||||
# -- Enable ingress controller resource
|
||||
enabled: false
|
||||
|
||||
# -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)
|
||||
className: ""
|
||||
|
||||
# -- Additional annotations for the Ingress resource.
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: traefik
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
|
||||
# The list of hostnames to be covered with this ingress record.
|
||||
hosts:
|
||||
- host: ollama.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
# -- The tls configuration for hostnames to be covered with this ingress record.
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
# Configure resource requests and limits
|
||||
# ref: http://kubernetes.io/docs/user-guide/compute-resources/
|
||||
resources:
|
||||
# -- Pod requests
|
||||
requests: {}
|
||||
# Memory request
|
||||
# memory: 4096Mi
|
||||
|
||||
# CPU request
|
||||
# cpu: 2000m
|
||||
|
||||
# -- Pod limit
|
||||
limits: {}
|
||||
# Memory limit
|
||||
# memory: 8192Mi
|
||||
|
||||
# CPU limit
|
||||
# cpu: 4000m
|
||||
|
||||
# Configure extra options for liveness probe
|
||||
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
|
||||
livenessProbe:
|
||||
# -- Enable livenessProbe
|
||||
enabled: true
|
||||
|
||||
# -- Request path for livenessProbe
|
||||
path: /
|
||||
|
||||
# -- Initial delay seconds for livenessProbe
|
||||
initialDelaySeconds: 60
|
||||
|
||||
# -- Period seconds for livenessProbe
|
||||
periodSeconds: 10
|
||||
|
||||
# -- Timeout seconds for livenessProbe
|
||||
timeoutSeconds: 5
|
||||
|
||||
# -- Failure threshold for livenessProbe
|
||||
failureThreshold: 6
|
||||
|
||||
# -- Success threshold for livenessProbe
|
||||
successThreshold: 1
|
||||
|
||||
# Configure extra options for readiness probe
|
||||
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
|
||||
readinessProbe:
|
||||
# -- Enable readinessProbe
|
||||
enabled: true
|
||||
|
||||
# -- Request path for readinessProbe
|
||||
path: /
|
||||
|
||||
# -- Initial delay seconds for readinessProbe
|
||||
initialDelaySeconds: 30
|
||||
|
||||
# -- Period seconds for readinessProbe
|
||||
periodSeconds: 5
|
||||
|
||||
# -- Timeout seconds for readinessProbe
|
||||
timeoutSeconds: 3
|
||||
|
||||
# -- Failure threshold for readinessProbe
|
||||
failureThreshold: 6
|
||||
|
||||
# -- Success threshold for readinessProbe
|
||||
successThreshold: 1
|
||||
|
||||
# Configure autoscaling
|
||||
autoscaling:
|
||||
# -- Enable autoscaling
|
||||
enabled: false
|
||||
|
||||
# -- Number of minimum replicas
|
||||
minReplicas: 1
|
||||
|
||||
# -- Number of maximum replicas
|
||||
maxReplicas: 100
|
||||
|
||||
# -- CPU usage to target replica
|
||||
targetCPUUtilizationPercentage: 80
|
||||
|
||||
# -- targetMemoryUtilizationPercentage: 80
|
||||
|
||||
# -- Additional volumes on the output Deployment definition.
|
||||
volumes: []
|
||||
# -- - name: foo
|
||||
# secret:
|
||||
# secretName: mysecret
|
||||
# optional: false
|
||||
|
||||
# -- Additional volumeMounts on the output Deployment definition.
|
||||
volumeMounts: []
|
||||
# -- - name: foo
|
||||
# mountPath: "/etc/foo"
|
||||
# readOnly: true
|
||||
|
||||
# -- Additional arguments on the output Deployment definition.
|
||||
extraArgs: []
|
||||
|
||||
# -- Additional environments variables on the output Deployment definition.
|
||||
# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go
|
||||
extraEnv: []
|
||||
# - name: OLLAMA_DEBUG
|
||||
# value: "1"
|
||||
|
||||
# -- Additionl environment variables from external sources (like ConfigMap)
|
||||
extraEnvFrom: []
|
||||
# - configMapRef:
|
||||
# name: my-env-configmap
|
||||
|
||||
# Enable persistence using Persistent Volume Claims
|
||||
# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/
|
||||
persistentVolume:
|
||||
# -- Enable persistence using PVC
|
||||
enabled: false
|
||||
|
||||
# -- Ollama server data Persistent Volume access modes
|
||||
# Must match those of existing PV or dynamic provisioner
|
||||
# Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
|
||||
# -- Ollama server data Persistent Volume annotations
|
||||
annotations: {}
|
||||
|
||||
# -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the
|
||||
# created + ready PVC here. If set, this Chart will not create the default PVC.
|
||||
# Requires server.persistentVolume.enabled: true
|
||||
existingClaim: ""
|
||||
|
||||
# -- Ollama server data Persistent Volume size
|
||||
size: 30Gi
|
||||
|
||||
# -- Ollama server data Persistent Volume Storage Class
|
||||
# If defined, storageClassName: <storageClass>
|
||||
# If set to "-", storageClassName: "", which disables dynamic provisioning
|
||||
# If undefined (the default) or set to null, no storageClassName spec is
|
||||
# set, choosing the default provisioner. (gp2 on AWS, standard on
|
||||
# GKE, AWS & OpenStack)
|
||||
storageClass: ""
|
||||
|
||||
# -- Ollama server data Persistent Volume Binding Mode
|
||||
# If defined, volumeMode: <volumeMode>
|
||||
# If empty (the default) or set to null, no volumeBindingMode spec is
|
||||
# set, choosing the default mode.
|
||||
volumeMode: ""
|
||||
|
||||
# -- Subdirectory of Ollama server data Persistent Volume to mount
|
||||
# Useful if the volume's root directory is not empty
|
||||
subPath: ""
|
||||
|
||||
# -- Pre-existing PV to attach this claim to
|
||||
# Useful if a CSI auto-provisions a PV for you and you want to always
|
||||
# reference the PV moving forward
|
||||
volumeName: ""
|
||||
|
||||
# -- Node labels for pod assignment.
|
||||
nodeSelector: {}
|
||||
|
||||
# -- Tolerations for pod assignment
|
||||
tolerations: []
|
||||
|
||||
# -- Affinity for pod assignment
|
||||
affinity: {}
|
||||
|
||||
# -- Lifecycle for pod assignment (override ollama.models startup pull/run)
|
||||
lifecycle: {}
|
||||
|
||||
# How to replace existing pods
|
||||
updateStrategy:
|
||||
# -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate
|
||||
type: "Recreate"
|
||||
|
||||
# -- Topology Spread Constraints for pod assignment
|
||||
topologySpreadConstraints: {}
|
||||
|
||||
# -- Wait for a grace period
|
||||
terminationGracePeriodSeconds: 120
|
||||
|
||||
# -- Init containers to add to the pod
|
||||
initContainers: []
|
||||
# - name: startup-tool
|
||||
# image: alpine:3
|
||||
# command: [sh, -c]
|
||||
# args:
|
||||
# - echo init
|
||||
|
||||
# -- Use the host’s ipc namespace.
|
||||
hostIPC: false
|
||||
|
||||
# -- Use the host’s pid namespace
|
||||
hostPID: false
|
||||
|
||||
# -- Use the host's network namespace.
|
||||
hostNetwork: false
|
||||
|
||||
# -- Extra K8s manifests to deploy
|
||||
extraObjects: []
|
||||
# - apiVersion: v1
|
||||
# kind: PersistentVolume
|
||||
# metadata:
|
||||
# name: aws-efs
|
||||
# data:
|
||||
# key: "value"
|
||||
# - apiVersion: scheduling.k8s.io/v1
|
||||
# kind: PriorityClass
|
||||
# metadata:
|
||||
# name: high-priority
|
||||
# value: 1000000
|
||||
# globalDefault: false
|
||||
# description: "This priority class should be used for XYZ service pods only."
|
||||
|
||||
# Test connection pods
|
||||
tests:
|
||||
enabled: true
|
||||
# -- Labels to add to the tests
|
||||
labels: {}
|
||||
# -- Annotations to add to the tests
|
||||
annotations: {}
|
||||
Reference in New Issue
Block a user