441 lines
11 KiB
YAML
441 lines
11 KiB
YAML
# Default values for ollama-helm.
|
||
# This is a YAML-formatted file.
|
||
# Declare variables to be passed into your templates.
|
||
|
||
# -- Number of replicas
|
||
replicaCount: 1
|
||
|
||
# Knative configuration
|
||
knative:
|
||
# -- Enable Knative integration
|
||
enabled: false
|
||
# -- Knative service container concurrency
|
||
containerConcurrency: 0
|
||
# -- Knative service timeout seconds
|
||
timeoutSeconds: 300
|
||
# -- Knative service response start timeout seconds
|
||
responseStartTimeoutSeconds: 300
|
||
# -- Knative service idle timeout seconds
|
||
idleTimeoutSeconds: 300
|
||
# -- Knative service annotations
|
||
annotations: {}
|
||
|
||
# Docker image
|
||
image:
|
||
# -- Docker image registry
|
||
repository: ollama/ollama
|
||
|
||
# -- Docker pull policy
|
||
pullPolicy: IfNotPresent
|
||
|
||
# -- Docker image tag, overrides the image tag whose default is the chart appVersion.
|
||
tag: ""
|
||
|
||
# -- Docker registry secret names as an array
|
||
imagePullSecrets: []
|
||
|
||
# -- String to partially override template (will maintain the release name)
|
||
nameOverride: ""
|
||
|
||
# -- String to fully override template
|
||
fullnameOverride: ""
|
||
|
||
# -- String to fully override namespace
|
||
namespaceOverride: ""
|
||
|
||
# Ollama parameters
|
||
ollama:
|
||
# Port Ollama is listening on
|
||
port: 11434
|
||
|
||
gpu:
|
||
# -- Enable GPU integration
|
||
enabled: false
|
||
|
||
# -- Enable DRA GPU integration
|
||
# If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters
|
||
draEnabled: false
|
||
|
||
# -- DRA GPU DriverClass
|
||
draDriverClass: "gpu.nvidia.com"
|
||
|
||
# -- Existing DRA GPU ResourceClaim Template
|
||
draExistingClaimTemplate: ""
|
||
|
||
# -- GPU type: 'nvidia' or 'amd'
|
||
# If 'ollama.gpu.enabled', default value is nvidia
|
||
# If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override
|
||
# This is due cause AMD and CPU/CUDA are different images
|
||
type: 'nvidia'
|
||
|
||
# -- Specify the number of GPU
|
||
# If you use MIG section below then this parameter is ignored
|
||
number: 1
|
||
|
||
# -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice
|
||
nvidiaResource: "nvidia.com/gpu"
|
||
# nvidiaResource: "nvidia.com/mig-1g.10gb" # example
|
||
# If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used)
|
||
|
||
mig:
|
||
# -- Enable multiple mig devices
|
||
# If enabled you will have to specify the mig devices
|
||
# If enabled is set to false this section is ignored
|
||
enabled: false
|
||
|
||
# -- Specify the mig devices and the corresponding number
|
||
devices: {}
|
||
# 1g.10gb: 1
|
||
# 3g.40gb: 1
|
||
|
||
models:
|
||
# -- List of models to pull at container startup
|
||
# The more you add, the longer the container will take to start if models are not present
|
||
# pull:
|
||
# - llama2
|
||
# - mistral
|
||
pull: []
|
||
|
||
# -- List of models to load in memory at container startup
|
||
# run:
|
||
# - llama2
|
||
# - mistral
|
||
run: []
|
||
|
||
# -- List of models to create at container startup, there are two options
|
||
# 1. Create a raw model
|
||
# 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory.
|
||
# create:
|
||
# - name: llama3.1-ctx32768
|
||
# configMapRef: my-configmap
|
||
# configMapKeyRef: configmap-key
|
||
# - name: llama3.1-ctx32768
|
||
# template: |
|
||
# FROM llama3.1
|
||
# PARAMETER num_ctx 32768
|
||
create: []
|
||
|
||
# -- Automatically remove models present on the disk but not specified in the values file
|
||
clean: false
|
||
|
||
# -- Add insecure flag for pulling at container startup
|
||
insecure: false
|
||
|
||
# -- Override ollama-data volume mount path, default: "/root/.ollama"
|
||
mountPath: ""
|
||
|
||
# Service account
|
||
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
|
||
serviceAccount:
|
||
# -- Specifies whether a service account should be created
|
||
create: true
|
||
|
||
# -- Automatically mount a ServiceAccount's API credentials?
|
||
automount: true
|
||
|
||
# -- Annotations to add to the service account
|
||
annotations: {}
|
||
|
||
# -- The name of the service account to use.
|
||
# If not set and create is true, a name is generated using the fullname template
|
||
name: ""
|
||
|
||
# -- Map of annotations to add to the pods
|
||
podAnnotations: {}
|
||
|
||
# -- Map of labels to add to the pods
|
||
podLabels: {}
|
||
|
||
# -- Pod Security Context
|
||
podSecurityContext: {}
|
||
# fsGroup: 2000
|
||
|
||
# -- Priority Class Name
|
||
priorityClassName: ""
|
||
|
||
# -- Container Security Context
|
||
securityContext: {}
|
||
# capabilities:
|
||
# drop:
|
||
# - ALL
|
||
# readOnlyRootFilesystem: true
|
||
# runAsNonRoot: true
|
||
# runAsUser: 1000
|
||
|
||
# -- Specify runtime class
|
||
runtimeClassName: ""
|
||
|
||
# Configure Service
|
||
service:
|
||
|
||
# -- Service type
|
||
type: ClusterIP
|
||
|
||
# -- Service port
|
||
port: 11434
|
||
|
||
# -- Service node port when service type is 'NodePort'
|
||
nodePort: 31434
|
||
|
||
# -- Load Balancer IP address
|
||
loadBalancerIP:
|
||
|
||
# -- Annotations to add to the service
|
||
annotations: {}
|
||
|
||
# -- Labels to add to the service
|
||
labels: {}
|
||
|
||
# Configure Deployment
|
||
deployment:
|
||
|
||
# -- Labels to add to the deployment
|
||
labels: {}
|
||
|
||
# Configure the ingress resource that allows you to access the
|
||
ingress:
|
||
# -- Enable ingress controller resource
|
||
enabled: false
|
||
|
||
# -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)
|
||
className: ""
|
||
|
||
# -- Additional annotations for the Ingress resource.
|
||
annotations: {}
|
||
# kubernetes.io/ingress.class: traefik
|
||
# kubernetes.io/ingress.class: nginx
|
||
# kubernetes.io/tls-acme: "true"
|
||
|
||
# The list of hostnames to be covered with this ingress record.
|
||
hosts:
|
||
- host: ollama.local
|
||
paths:
|
||
- path: /
|
||
pathType: Prefix
|
||
|
||
# -- The tls configuration for hostnames to be covered with this ingress record.
|
||
tls: []
|
||
# - secretName: chart-example-tls
|
||
# hosts:
|
||
# - chart-example.local
|
||
|
||
# Configure resource requests and limits
|
||
# ref: http://kubernetes.io/docs/user-guide/compute-resources/
|
||
resources:
|
||
# -- Pod requests
|
||
requests: {}
|
||
# Memory request
|
||
# memory: 4096Mi
|
||
|
||
# CPU request
|
||
# cpu: 2000m
|
||
|
||
# -- Pod limit
|
||
limits: {}
|
||
# Memory limit
|
||
# memory: 8192Mi
|
||
|
||
# CPU limit
|
||
# cpu: 4000m
|
||
|
||
# Configure extra options for liveness probe
|
||
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
|
||
livenessProbe:
|
||
# -- Enable livenessProbe
|
||
enabled: true
|
||
|
||
# -- Request path for livenessProbe
|
||
path: /
|
||
|
||
# -- Initial delay seconds for livenessProbe
|
||
initialDelaySeconds: 60
|
||
|
||
# -- Period seconds for livenessProbe
|
||
periodSeconds: 10
|
||
|
||
# -- Timeout seconds for livenessProbe
|
||
timeoutSeconds: 5
|
||
|
||
# -- Failure threshold for livenessProbe
|
||
failureThreshold: 6
|
||
|
||
# -- Success threshold for livenessProbe
|
||
successThreshold: 1
|
||
|
||
# Configure extra options for readiness probe
|
||
# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
|
||
readinessProbe:
|
||
# -- Enable readinessProbe
|
||
enabled: true
|
||
|
||
# -- Request path for readinessProbe
|
||
path: /
|
||
|
||
# -- Initial delay seconds for readinessProbe
|
||
initialDelaySeconds: 30
|
||
|
||
# -- Period seconds for readinessProbe
|
||
periodSeconds: 5
|
||
|
||
# -- Timeout seconds for readinessProbe
|
||
timeoutSeconds: 3
|
||
|
||
# -- Failure threshold for readinessProbe
|
||
failureThreshold: 6
|
||
|
||
# -- Success threshold for readinessProbe
|
||
successThreshold: 1
|
||
|
||
# Configure autoscaling
|
||
autoscaling:
|
||
# -- Enable autoscaling
|
||
enabled: false
|
||
|
||
# -- Number of minimum replicas
|
||
minReplicas: 1
|
||
|
||
# -- Number of maximum replicas
|
||
maxReplicas: 100
|
||
|
||
# -- CPU usage to target replica
|
||
targetCPUUtilizationPercentage: 80
|
||
|
||
# -- targetMemoryUtilizationPercentage: 80
|
||
|
||
# -- Additional volumes on the output Deployment definition.
|
||
volumes: []
|
||
# -- - name: foo
|
||
# secret:
|
||
# secretName: mysecret
|
||
# optional: false
|
||
|
||
# -- Additional volumeMounts on the output Deployment definition.
|
||
volumeMounts: []
|
||
# -- - name: foo
|
||
# mountPath: "/etc/foo"
|
||
# readOnly: true
|
||
|
||
# -- Additional arguments on the output Deployment definition.
|
||
extraArgs: []
|
||
|
||
# -- Additional environments variables on the output Deployment definition.
|
||
# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go
|
||
extraEnv: []
|
||
# - name: OLLAMA_DEBUG
|
||
# value: "1"
|
||
|
||
# -- Additionl environment variables from external sources (like ConfigMap)
|
||
extraEnvFrom: []
|
||
# - configMapRef:
|
||
# name: my-env-configmap
|
||
|
||
# Enable persistence using Persistent Volume Claims
|
||
# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/
|
||
persistentVolume:
|
||
# -- Enable persistence using PVC
|
||
enabled: false
|
||
|
||
# -- Ollama server data Persistent Volume access modes
|
||
# Must match those of existing PV or dynamic provisioner
|
||
# Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
|
||
accessModes:
|
||
- ReadWriteOnce
|
||
|
||
# -- Ollama server data Persistent Volume annotations
|
||
annotations: {}
|
||
|
||
# -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the
|
||
# created + ready PVC here. If set, this Chart will not create the default PVC.
|
||
# Requires server.persistentVolume.enabled: true
|
||
existingClaim: ""
|
||
|
||
# -- Ollama server data Persistent Volume size
|
||
size: 30Gi
|
||
|
||
# -- Ollama server data Persistent Volume Storage Class
|
||
# If defined, storageClassName: <storageClass>
|
||
# If set to "-", storageClassName: "", which disables dynamic provisioning
|
||
# If undefined (the default) or set to null, no storageClassName spec is
|
||
# set, choosing the default provisioner. (gp2 on AWS, standard on
|
||
# GKE, AWS & OpenStack)
|
||
storageClass: ""
|
||
|
||
# -- Ollama server data Persistent Volume Binding Mode
|
||
# If defined, volumeMode: <volumeMode>
|
||
# If empty (the default) or set to null, no volumeBindingMode spec is
|
||
# set, choosing the default mode.
|
||
volumeMode: ""
|
||
|
||
# -- Subdirectory of Ollama server data Persistent Volume to mount
|
||
# Useful if the volume's root directory is not empty
|
||
subPath: ""
|
||
|
||
# -- Pre-existing PV to attach this claim to
|
||
# Useful if a CSI auto-provisions a PV for you and you want to always
|
||
# reference the PV moving forward
|
||
volumeName: ""
|
||
|
||
# -- Node labels for pod assignment.
|
||
nodeSelector: {}
|
||
|
||
# -- Tolerations for pod assignment
|
||
tolerations: []
|
||
|
||
# -- Affinity for pod assignment
|
||
affinity: {}
|
||
|
||
# -- Lifecycle for pod assignment (override ollama.models startup pull/run)
|
||
lifecycle: {}
|
||
|
||
# How to replace existing pods
|
||
updateStrategy:
|
||
# -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate
|
||
type: "Recreate"
|
||
|
||
# -- Topology Spread Constraints for pod assignment
|
||
topologySpreadConstraints: {}
|
||
|
||
# -- Wait for a grace period
|
||
terminationGracePeriodSeconds: 120
|
||
|
||
# -- Init containers to add to the pod
|
||
initContainers: []
|
||
# - name: startup-tool
|
||
# image: alpine:3
|
||
# command: [sh, -c]
|
||
# args:
|
||
# - echo init
|
||
|
||
# -- Use the host’s ipc namespace.
|
||
hostIPC: false
|
||
|
||
# -- Use the host’s pid namespace
|
||
hostPID: false
|
||
|
||
# -- Use the host's network namespace.
|
||
hostNetwork: false
|
||
|
||
# -- Extra K8s manifests to deploy
|
||
extraObjects: []
|
||
# - apiVersion: v1
|
||
# kind: PersistentVolume
|
||
# metadata:
|
||
# name: aws-efs
|
||
# data:
|
||
# key: "value"
|
||
# - apiVersion: scheduling.k8s.io/v1
|
||
# kind: PriorityClass
|
||
# metadata:
|
||
# name: high-priority
|
||
# value: 1000000
|
||
# globalDefault: false
|
||
# description: "This priority class should be used for XYZ service pods only."
|
||
|
||
# Test connection pods
|
||
tests:
|
||
enabled: true
|
||
# -- Labels to add to the tests
|
||
labels: {}
|
||
# -- Annotations to add to the tests
|
||
annotations: {}
|