first backup of charts

This commit is contained in:
Ivan087
2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions

1
code/codeserver Submodule

Submodule code/codeserver added at b59a4f7366

55
code/metadata.yaml Normal file
View File

@ -0,0 +1,55 @@
application_name: &application_name code
distributed:
method: helm
release_name: *application_name
chart: codeserver/ci/helm-chart
sets:
image:
repository: codercom/code-server
tag: '4.103.2'
pullPolicy: IfNotPresent
resources:
limits:
nvidia.com/gpu: 0
nodeSelector:
resource-group: gpu_5880
service:
type: NodePort
port: 8080
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: ~
port: 30083
url: ~
pod:
name: *application_name
monolithic:
method: helm
release_name: *application_name
chart: codeserver/ci/helm-chart
sets:
image:
repository: codercom/code-server
tag: '4.103.2'
pullPolicy: IfNotPresent
resources:
limits:
nvidia.com/gpu: 1
nodeSelector:
resource-group: gpu_5880
service:
type: NodePort
port: 8080
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: ~
port: 30083
url: ~
pod:
name: *application_name

47
edgetts/metadata.yaml Normal file
View File

@ -0,0 +1,47 @@
application_name: &application_name edgetts
distributed:
method: helm
release_name: *application_name
chart: test-tts
sets:
image:
repository: travisvn/openai-edge-tts
tag: "latest"
pullPolicy: IfNotPresent
service:
type: NodePort
port: 5050
nodePort: 30250
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: ~
port: 30250
url: ~
pod:
name: *application_name
monolithic:
method: helm
release_name: *application_name
chart: test-tts
sets:
image:
repository: travisvn/openai-edge-tts
tag: "latest"
pullPolicy: IfNotPresent
service:
type: NodePort
port: 5050
nodePort: 30250
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: ~
port: 30250
url: ~
pod:
name: *application_name

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,24 @@
apiVersion: v2
name: test-tts
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "test-tts.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "test-tts.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "test-tts.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "test-tts.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "test-tts.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "test-tts.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "test-tts.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "test-tts.labels" -}}
helm.sh/chart: {{ include "test-tts.chart" . }}
{{ include "test-tts.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "test-tts.selectorLabels" -}}
app.kubernetes.io/name: {{ include "test-tts.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "test-tts.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "test-tts.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,78 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "test-tts.fullname" . }}
labels:
{{- include "test-tts.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "test-tts.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "test-tts.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "test-tts.serviceAccountName" . }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
{{- with .Values.livenessProbe }}
livenessProbe:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.readinessProbe }}
readinessProbe:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,32 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "test-tts.fullname" . }}
labels:
{{- include "test-tts.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "test-tts.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,43 @@
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "test-tts.fullname" . }}
labels:
{{- include "test-tts.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- with .Values.ingress.className }}
ingressClassName: {{ . }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- with .pathType }}
pathType: {{ . }}
{{- end }}
backend:
service:
name: {{ include "test-tts.fullname" $ }}
port:
number: {{ $.Values.service.port }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "test-tts.fullname" . }}
labels:
{{- include "test-tts.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
nodePort: {{ .Values.service.nodePort }}
selector:
{{- include "test-tts.selectorLabels" . | nindent 4 }}

View File

@ -0,0 +1,13 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "test-tts.serviceAccountName" . }}
labels:
{{- include "test-tts.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "test-tts.fullname" . }}-test-connection"
labels:
{{- include "test-tts.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "test-tts.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never

View File

@ -0,0 +1,124 @@
# Default values for test-tts.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
replicaCount: 1
# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
image:
repository: travisvn/openai-edge-tts
# This sets the pull policy for images.
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "latest"
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
imagePullSecrets: []
# This is to override the chart name.
nameOverride: "edgetts"
fullnameOverride: ""
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
serviceAccount:
# Specifies whether a service account should be created
create: true
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
# This is for setting Kubernetes Annotations to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
podAnnotations: {}
# This is for setting Kubernetes Labels to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
podLabels: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
service:
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
type: NodePort
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
port: 5050
nodePort: 30250
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
ingress:
enabled: false
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
#livenessProbe:
# httpGet:
# path: /
# port: http
#readinessProbe:
# httpGet:
# path: /
# port: http
# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
affinity: {}

23
jarvis/jarvis/.helmignore Normal file
View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

24
jarvis/jarvis/Chart.yaml Normal file
View File

@ -0,0 +1,24 @@
apiVersion: v2
name: jarvis
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "jarvis.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "jarvis.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "jarvis.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "jarvis.labels" -}}
helm.sh/chart: {{ include "jarvis.chart" . }}
{{ include "jarvis.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "jarvis.selectorLabels" -}}
app.kubernetes.io/name: {{ include "jarvis.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "jarvis.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,17 @@
# pv.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ .Release.Name }}-pv # PV 的名称,可以自定义
spec:
storageClassName: local-path # 添加这一行与上面StorageClass的名称一致
capacity:
storage: 500Gi # PV 的容量,可以根据 NFS 共享的实际大小或预期使用量调整
accessModes:
- ReadWriteMany # 访问模式:
persistentVolumeReclaimPolicy: Retain
nfs:
path: /volume1/Dataset/PVStore/lab-data-dataset-pvc-ec4aba12-c683-4168-b335-7b1a8819581a/Private/cache-images # NFS 服务器上共享的路径
server: 10.6.80.11 # NFS 服务器的 IP 地址或主机名

View File

@ -0,0 +1,14 @@
# pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Release.Name }}-pvc # PVC 的名称Pod 会引用这个名称
#namespace: default # PVC 所在的命名空间,通常是 default 或你自定义的命名空间
spec:
storageClassName: local-path # 添加这一行与PV和StorageClass的名称一致
accessModes:
- ReadWriteMany # 访问模式,必须与 PV 的 accessModes 匹配或更宽松
resources:
requests:
storage: 50Gi # PVC 请求的存储容量,必须小于或等于 PV 的容量
volumeName: {{ .Release.Name }}-pv # 明确指定要绑定的 PV 的名称,这是手动绑定 PV 的关键

View File

@ -0,0 +1,68 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: jarvis-adapter-deployment
# namespace: jarvis-models
labels:
app: jarvis-adapter
spec:
replicas: 1
selector:
matchLabels:
app: jarvis-adapter
template:
metadata:
labels:
app: jarvis-adapter
spec:
#hostNetwork: true
# --- START: Add this section for image pull secrets ---
imagePullSecrets:
- name: regcred # This MUST match the name of the secret you just created
# --- END: Add this section ---
containers:
- name: jarvis-adapter
image: {{ .Values.jarvis_adapter.image }}
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
env:
- name: INFERENCE_ENDPOINT
value: {{ .Values.jarvis_adapter.endpoint }}
ports:
- containerPort: 5000 # The port your application listens on inside the container
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
protocol: TCP
resources: # Add this section
requests:
cpu: 100m # Example: 100 millicores (0.1 CPU)
memory: 256Mi # Example: 128 mebibytes
limits:
cpu: 500m # Example: Limit to 500 millicores (0.5 CPU)
memory: 512Mi # Example: Limit to 512 mebibytes
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
---
apiVersion: v1
kind: Service
metadata:
name: llm-blackbox
# namespace: jarvis-models
labels:
app: jarvis-adapter
spec:
selector:
app: jarvis-adapter
ports:
- protocol: TCP
port: 80
targetPort: 5000
type: NodePort

View File

@ -0,0 +1,85 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: jarvis-api-deployment
# namespace: jarvis-models
labels:
app: jarvis-api
spec:
replicas: 1
selector:
matchLabels:
app: jarvis-api
template:
metadata:
labels:
app: jarvis-api
spec:
#hostNetwork: true
# --- START: Add this section for image pull secrets ---
imagePullSecrets:
- name: regcred # This MUST match the name of the secret you just created
# --- END: Add this section ---
containers:
- name: jarvis-api
image: {{ .Values.jarvis_api.image }}
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
ports:
- containerPort: 8080 # The port your application listens on inside the container
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
protocol: TCP
resources: # Add this section
requests:
cpu: 100m # Example: 100 millicores (0.1 CPU)
memory: 256Mi # Example: 128 mebibytes
limits:
cpu: 500m # Example: Limit to 500 millicores (0.5 CPU)
memory: 512Mi # Example: Limit to 512 mebibytes
volumeMounts:
- name: env-config-volume
mountPath: /.env.yml
subPath: .env.yml
readOnly: true
- name: images-data
mountPath: /images
volumes:
- name: env-config-volume
#hostPath:
# path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
# type: FileOrCreate
configMap:
name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
items:
- key: .api.env.yml # This is the key defined in the ConfigMap's data section
path: .env.yml # This is the filename inside the mountPath (e.g., /.env.yml)
- name: images-data
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ .Release.Name }}-api-service
# namespace: jarvis-models
labels:
app: jarvis-api
spec:
selector:
app: jarvis-api
ports:
- protocol: TCP
port: 8080
targetPort: 8080
type: NodePort

View File

@ -0,0 +1,183 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Release.Name }}-cm # Name of your ConfigMap
# namespace: jarvis-models # Ensure this matches your Deployment's namespace
data:
.api.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
database:
host: 10.6.14.130
port: 3306
username: jarvis
password: boardwarejarvis
database: jarvis
jwt:
secret: secretkey
users:
- username: jarvis
password: boardwarejarvis
- username: user
password: boardwareuser
- username: g2e
password: g2e
- username: vera
password: vera
- username: ivan
password: ivan
blackbox:
mode: 0 # 0: /?blackbox=models, 1: /models-blackbox
url: http://jarvis-model-service
port: 8080
vad:
url: http://vad-blackbox
asr:
url: http://asr-blackbox
llm:
url: http://llm-blackbox
vlm:
url: http://vlm-blackbox
tts:
url: http://tts-blackbox
chatllama:
url: http://chatllama-blackbox
chroma:
upsert_url: http://chroma-blackbox/upsert
env: dev
authentik:
redirectUri: http://10.6.14.130:4200
baseUrl: https://authentik.universalmacro.com
clientId: xxx
clientSecret: xxx
server:
port: 8080
# log:
# path: "/Workspace/Logging/logtime.out"
log:
loki:
# url: "https://103.192.46.20:27002/laas/1868865592451137536/loki/api/v1/push"
url: "https://loki.bwgdi.com/loki/api/v1/push"
x-odin-auth: "log_m7uxtqtru2318hbaoonf9wgjy8chcnebhwhl0wncsvfctu2ppn9m53q6p3i3"
labels:
app: jarvis
env: dev
location: "k3s_gdi"
model:
tts:
url: http://10.6.14.130:8000/?blackbox_name=tts
tts_model_name: melotts
tts_stream: false
streaming:
url: http://10.6.14.130:8000/?blackbox_name=chat
vlms_url: http://10.6.14.130:8000/?blackbox_name=vlms
.models.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
env:
version: 0.0.1
host: 0.0.0.0
port: 8000
log:
level: debug
time_format: "%Y-%m-%d %H:%M:%S"
filename: "./jarvis-models.log"
loki:
url: "https://loki.bwgdi.com/loki/api/v1/push"
labels:
app: jarvis
env: dev
location: "k3s_gdi"
layer: models
melotts:
mode: local # or docker
url: http://10.6.44.141:18080/convert/tts
speed: 0.9
device: 'cuda:0'
language: 'ZH'
speaker: 'ZH'
cosyvoicetts:
mode: local # or docker
url: http://10.6.44.141:18080/convert/tts
speed: 0.9
device: 'cuda:0'
language: '粤语女'
speaker: 'ZH'
sovitstts:
mode: docker
url: http://10.6.80.90:9880/tts
speed: 0.9
device: 'cuda:0'
language: 'ZH'
speaker: 'ZH'
text_lang: "yue"
ref_audio_path: "output/slicer_opt/Ricky-Wong/Ricky-Wong-3-Mins.wav_0006003840_0006134080.wav"
prompt_lang: "yue"
prompt_text: "你失敗咗點算啊?你而家安安穩穩,點解要咁樣做呢?"
text_split_method: "cut5"
batch_size: 1
media_type: "wav"
streaming_mode: True
sensevoiceasr:
mode: local # or docker
url: http://10.6.44.141:18080/convert/tts
speed: 0.9
device: 'cuda:0'
language: '粤语女'
speaker: 'ZH'
tesou:
url: http://120.196.116.194:48891/chat/
TokenIDConverter:
token_path: src/asr/resources/models/token_list.pkl
unk_symbol: <unk>
CharTokenizer:
symbol_value:
space_symbol: <space>
remove_non_linguistic_symbols: false
WavFrontend:
cmvn_file: src/asr/resources/models/am.mvn
frontend_conf:
fs: 16000
window: hamming
n_mels: 80
frame_length: 25
frame_shift: 10
lfr_m: 7
lfr_n: 6
filter_length_max: -.inf
dither: 0.0
Model:
model_path: src/asr/resources/models/model.onnx
use_cuda: false
CUDAExecutionProvider:
device_id: 0
arena_extend_strategy: kNextPowerOfTwo
cudnn_conv_algo_search: EXHAUSTIVE
do_copy_in_default_stream: true
batch_size: 3
blackbox:
lazyloading: true
vlms:
urls:
qwen_vl: http://vl-svc
vlm: http://vl-svc:8080
path:
chroma_rerank_embedding_model: /Model/BAAI
cosyvoice_path: /Voice/CosyVoice
cosyvoice_model_path: /Voice/CosyVoice/pretrained_models
sensevoice_model_path: /Voice/SenseVoice/SenseVoiceSmall

View File

@ -0,0 +1,96 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: jarvis-model-deployment
# namespace: jarvis-models
labels:
app: jarvis-model
spec:
replicas: 1
selector:
matchLabels:
app: jarvis-model
template:
metadata:
labels:
app: jarvis-model
spec:
#hostNetwork: true
# --- START: Add this section for image pull secrets ---
imagePullSecrets:
- name: regcred # This MUST match the name of the secret you just created
# --- END: Add this section ---
runtimeClassName: nvidia
containers:
- name: jarvis-model
image: {{ .Values.jarvis_model.image }}
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
# command: ["sleep", "infinity"]
ports:
- containerPort: 8000 # The port your application listens on inside the container
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
protocol: TCP
resources: # Add this section
requests:
cpu: 1 # Example: 100 millicores (0.1 CPU)
memory: 512Mi # Example: 128 mebibytes
limits:
cpu: 2 # Example: Limit to 500 millicores (0.5 CPU)
memory: 1Gi # Example: Limit to 512 mebibytes
nvidia.com/gpu: 1
volumeMounts:
- name: env-config-volume
mountPath: /jarvis-models/.env.yaml
subPath: .env.yaml
readOnly: true
- name: nfs-volume
subPath: Weight
mountPath: /Model
- name: nfs-volume
subPath: Voice
mountPath: /Voice
volumes:
- name: env-config-volume
#hostPath:
# path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
# type: FileOrCreate
configMap:
name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
items:
- key: .models.env.yml # This is the key defined in the ConfigMap's data section
path: .env.yaml # This is the filename inside the mountPath (e.g., /.env.yml)
- name: nfs-volume
nfs :
server: "10.6.80.11"
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
#{{- range .Values.volumes }}
#- {{ . | toYaml | nindent 10 | trim }}
#{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
---
apiVersion: v1
kind: Service
metadata:
name: jarvis-model-service
# namespace: jarvis-models
labels:
app: jarvis-model
spec:
selector:
app: jarvis-model
ports:
- protocol: TCP
port: 8080
targetPort: 8000
type: NodePort

87
jarvis/jarvis/values.yaml Normal file
View File

@ -0,0 +1,87 @@
# Default values for jarvis.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
replicaCount: 1
jarvis_api:
image: harbor.bwgdi.com/library/jarvis-api:1.0.9
jarvis_model:
image: harbor.bwgdi.com/library/jarvis-models:0.0.1
jarvis_adapter:
image: harbor.bwgdi.com/library/adapter-test:0.0.1
endpoint: "http://vllm-leader-nodeport:8080"
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
service:
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
type: ClusterIP
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
port: 80
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
ingress:
enabled: false
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
# Additional volumes on the output Deployment definition.
volumes:
- name: nfs-volume
nfs:
server: "10.6.80.11"
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
affinity: {}

43
jarvis/metadata.yaml Normal file
View File

@ -0,0 +1,43 @@
application_name: &application_name jarvis
distributed:
method: helm
release_name: *application_name
chart: jarvis
sets:
jarvis_api:
image: harbor.bwgdi.com/library/jarvis-api:1.0.9
jarvis_adapter:
image: harbor.bwgdi.com/library/adapter-test:0.0.1
endpoint: "http://vllm-leader-nodeport:8080"
nodeSelector: {}
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: jarvis-api-service
port: 30083
url: ~
pod:
name: jarvis-
monolithic:
method: helm
release_name: *application_name
chart: jarvis
sets:
jarvis_api:
image: harbor.bwgdi.com/library/jarvis-api:1.0.9
jarvis_adapter:
image: harbor.bwgdi.com/library/adapter-test:0.0.1
endpoint: "http://vllm-leader-nodeport:8080"
nodeSelector: {}
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: jarvis-api-service
port: 30083
url: ~
pod:
name: jarvis-

View File

@ -0,0 +1,17 @@
apiVersion: v1
appVersion: 6.0.3
description: Helm for jupyter single server with pyspark support
home: https://jupyter.org
icon: https://jupyter.org/assets/main-logo.svg
keywords:
- jupyter
- notebook
- spark
maintainers:
- email: cgiraldo@gradiant.org
name: cgiraldo
name: jupyter
sources:
- https://github.com/gradiant/charts
- https://github.com/astrobounce/helm-jupyter
version: 0.1.6

34
jupyter/jupyter/README.md Normal file
View File

@ -0,0 +1,34 @@
jupyter
=======
Helm for jupyter single server with pyspark support.
For jupyterhub chart see [zero-to-jupyterhub](https://zero-to-jupyterhub.readthedocs.io/en/latest/).
Current chart version is `0.1.0`
Source code can be found [here]((https://github.com/gradiant/charts/charts/jupyter)
## Chart Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| affinity | object | `{}` | |
| image.pullPolicy | string | `"IfNotPresent"` | |
| image.repository | string | `"gradiant/jupyter"` | |
| image.tag | string | `"6.0.1"` | |
| ingress.annotations | object | `{}` | |
| ingress.enabled | bool | `false` | |
| ingress.hosts[0] | string | `"jupyter.127-0-0-1.nip"` | |
| ingress.path | string | `"/"` | |
| ingress.tls | list | `[]` | |
| lab | bool | `true` | |
| nodeSelector | object | `{}` | |
| persistence.accessMode | string | `"ReadWriteOnce"` | |
| persistence.enabled | bool | `true` | |
| persistence.size | string | `"50Gi"` | |
| persistence.storageClass | string | `nil` | |
| resources | object | `{}` | |
| service.externalPort | int | `8888` | |
| service.nodePort.http | string | `nil` | |
| service.type | string | `"ClusterIP"` | |
| tolerations | list | `[]` | |

View File

@ -0,0 +1,23 @@
1. Get access token from jupyter server log:
kubectl logs -f -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }}
1. Create a port-forward to the jupyter:
kubectl port-forward -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }} 8888:{{ .Values.service.externalPort }}
Then open the ui in your browser and use the access token:
open http://localhost:88888
If you set up your own password, remember to restart jupyter server to update the configuration.
File -> Shut Down
{{- if .Values.ingress.enabled }}
Ingress is enabled:
{{- range .Values.ingress.tls }}
{{- range .hosts }}
open https://{{ . }}
{{- end }}
{{- end }}
{{- range .Values.ingress.hosts }}
open http://{{ . }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,32 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "jupyter.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
*/}}
{{- define "jupyter.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{/*
Standard Labels from Helm documentation https://helm.sh/docs/chart_best_practices/#labels-and-annotations
*/}}
{{- define "jupyter.labels" -}}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
app.kubernetes.io/part-of: {{ .Chart.Name }}
{{- end -}}

View File

@ -0,0 +1,36 @@
{{- if .Values.gitNotebooks }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "jupyter.fullname" . }}-git-notebooks
labels:
app.kubernetes.io/name: {{ include "jupyter.name" . }}
{{- include "jupyter.labels" . | nindent 4 }}
data:
git-notebooks.sh: |-
#!/bin/sh
set -x
cd /home/jovyan
{{- if .Values.gitNotebooks.secretName }}
cp -r /tmp/.ssh /root/
chmod 600 /root/.ssh/*
{{- else }}
mkdir /root/.ssh
{{- end }}
echo "Loading notebooks from git repo"
{{- range .Values.gitNotebooks.repos }}
if [ ! -d "/home/jovyan/{{ .name }}" ]
then
echo "Cloning {{ .name }} notebook repository"
{{- if or (hasPrefix "git" .repo) (hasPrefix "ssh" .repo) }}
ssh-keyscan {{ .repo | regexFind "@([a-zA-Z0-9.]*)" | replace "@" "" }} >> ~/.ssh/known_hosts
{{- end }}
git clone {{ .repo }} {{ .name }}
else
echo "{{ .name }} notebook repository already cloned"
fi
{{- end }}
# exit code 0 to continue deployment even if git clone fails
exit 0
{{- end }}

View File

@ -0,0 +1,39 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "jupyter.fullname" . -}}
{{- $ingressPath := .Values.ingress.path -}}
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
app.kubernetes.io/name: {{ include "jupyter.name" . }}
{{- include "jupyter.labels" . | nindent 4 }}
{{- if .Values.ingress.labels }}
{{ toYaml .Values.ingress.labels | indent 4 }}
{{- end }}
{{- with .Values.ingress.annotations }}
annotations:
{{ toYaml . | indent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ . }}
http:
paths:
- path: {{ $ingressPath }}
backend:
serviceName: {{ $fullName }}
servicePort: web
{{- end }}
{{- end }}

View File

@ -0,0 +1,20 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "jupyter.fullname" . }}
labels:
app.kubernetes.io/name: {{ include "jupyter.name" . }}
{{- include "jupyter.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
selector:
app.kubernetes.io/name: {{ include "jupyter.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
ports:
- name: web
protocol: TCP
port: {{ .Values.service.externalPort | default 8888 }}
{{- if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort.http))) }}
nodePort: {{ .Values.service.nodePort.http }}
{{- end }}
targetPort: 8888

View File

@ -0,0 +1,118 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ include "jupyter.fullname" . }}
labels:
app.kubernetes.io/name: {{ include "jupyter.name" . }}
{{- include "jupyter.labels" . | nindent 4 }}
spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ include "jupyter.name" . }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
serviceName: {{ include "jupyter.fullname" . }}
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: {{ include "jupyter.name" . }}
{{- include "jupyter.labels" . | nindent 8}}
spec:
{{- if .Values.gitNotebooks }}
initContainers:
- name: git-notebooks
image: alpine/git
command:
- /bin/bash
- /git-notebooks.sh
volumeMounts:
- name: git-notebooks
mountPath: /git-notebooks.sh
subPath: git-notebooks.sh
- name: jupyter
mountPath: /home/jovyan
{{- if .Values.gitNotebooks.secretName }}
- name: git-secret
mountPath: "/tmp/.ssh"
{{- end }}
{{- end }}
containers:
- name: jupyter
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy | quote }}
env:
- name: JUPYTER_ENABLE_LAB
value: "{{ .Values.lab }}"
- name: JPY_USER
value: "jovyan"
args:
- start-notebook.sh
- --ip=0.0.0.0
- --user="jovyan"
ports:
- name: web
containerPort: 8888
protocol: TCP
resources:
{{ toYaml .Values.resources | indent 10 }}
readinessProbe:
httpGet:
path: /
port: 8888
initialDelaySeconds: 60
timeoutSeconds: 15
livenessProbe:
httpGet:
path: /
port: 8888
initialDelaySeconds: 60
timeoutSeconds: 15
volumeMounts:
- name: jupyter
mountPath: /home/jovyan
volumes:
{{- if .Values.gitNotebooks }}
- name: git-notebooks
configMap:
name: {{ include "jupyter.fullname" . }}-git-notebooks
{{- if .Values.gitNotebooks.secretName }}
- name: git-secret
secret:
secretName: {{ .Values.gitNotebooks.secretName }}
{{- end }}
{{- end }}
{{- if not .Values.persistence.enabled }}
- name: jupyter
emptyDir: {}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if .Values.persistence.enabled }}
volumeClaimTemplates:
- metadata:
name: jupyter
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: "{{ .Values.persistence.size }}"
{{- if .Values.persistence.storageClass }}
{{- if (eq "-" .Values.persistence.storageClass) }}
storageClassName: ""
{{- else }}
storageClassName: "{{ .Values.persistence.storageClass }}"
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,64 @@
# Default values for jupyter.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
image:
repository: gradiant/jupyter
tag: 6.0.3
pullPolicy: IfNotPresent
lab: true
#gitNotebooks:
# secretName: the name of the secret with ssh keys
# repos:
# - name: gradiant
# repo: https://github.com/Gradiant/notebooks.git
# - name: grad-git
# repo: git@github.com:Gradiant/notebooks.git
service:
type: ClusterIP
externalPort: 8888
nodePort:
http:
persistence:
enabled: true
storageClass:
accessMode: ReadWriteOnce
size: 50Gi
## Ingress configuration
## Ref: https://kubernetes.io/docs/concepts/services-networking/ingress/
##
ingress:
enabled: false
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
path: /
hosts:
- jupyter.127-0-0-1.nip.io
tls: []
# - secretName: jupyter-tls
# hosts:
# - jupyter.local
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
nodeSelector: {}
tolerations: []
affinity: {}

55
jupyter/metadata.yaml Normal file
View File

@ -0,0 +1,55 @@
application_name: &application_name jupyter
distributed:
method: helm
release_name: *application_name
chart: jupyter
sets:
image:
repository: gradiant/jupyter
tag: 6.0.3
pullPolicy: IfNotPresent
resources:
limits:
nvidia.com/gpu: 0
nodeSelector:
resource-group: gpu_5880
service:
type: NodePort
nodePort:
http: 30888
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: ~
port: 30888
url: ~
pod:
name: jupyter-
monolithic:
method: helm
release_name: *application_name
chart: jupyter
sets:
image:
repository: gradiant/jupyter
tag: 6.0.3
pullPolicy: IfNotPresent
resources:
limits:
nvidia.com/gpu: 0
nodeSelector:
resource-group: gpu_5880
service:
type: NodePort
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: ~
port: 30888
url: ~
pod:
name: jupyter-

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,25 @@
apiVersion: v2
name: Llama-factory
description: A Helm chart for deploying vLLM with NFS storage
annotations:
"helm.sh/resource-policy": keep # 防止资源被意外删除
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,159 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama-factory") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: llamafactory
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: llama-leader
image: {{ .Values.llama.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : USE_RAY
value: "1"
# - name : LMDEPLOY_EXECUTOR_BACKEND
# value: "ray"
command:
- sh
- -c
- "llamafactory-cli webui"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 7860
name: http
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
workerTemplate:
spec:
containers:
- name: llama-worker
image: {{ .Values.llama.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "echo $(LWS_LEADER_ADDRESS);
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
# - name : LMDEPLOY_EXECUTOR_BACKEND
# value: "ray"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
{{- end }}

View File

@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ .Values.app }}-pv-model
spec:
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
capacity:
storage: {{ .Values.nfs.pvSize }}
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: {{ .Values.nfs.path }}
server: {{ .Values.nfs.server }}

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.app }}-pvc-model
annotations:
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.nfs.pvcSize }}
volumeName: {{ .Values.app }}-pv-model

View File

@ -0,0 +1,33 @@
#apiVersion: v1
#kind: Service
#metadata:
# name: infer-leader-loadbalancer
#spec:
# type: LoadBalancer
# selector:
# leaderworkerset.sigs.k8s.io/name: infer
# role: leader
# ports:
# - protocol: TCP
# port: 8080
# targetPort: 8080
#
---
apiVersion: v1
kind: Service
metadata:
name: {{ .Values.app }}-leader-nodeport
spec:
type: NodePort
{{- if gt (int .Values.workerSize) 1 }}
selector:
leaderworkerset.sigs.k8s.io/name: llamafactory
role: leader
{{- else }}
selector:
app: llama-factory
{{- end }}
ports:
- protocol: TCP
port: 8080
targetPort: 7860

View File

@ -0,0 +1,51 @@
{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: llama-factory
template:
metadata:
labels:
app: llama-factory
spec:
containers:
- name: llama-factory
image: {{ .Values.llama.image }}
imagePullPolicy: IfNotPresent
env:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.vllm.huggingfaceToken }}
command:
- sh
- -c
- "llamafactory-cli webui"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 7860
name: http
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app}}-pvc-model
{{- end }}

View File

@ -0,0 +1,44 @@
# Default values for vllm-app.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# 模型配置
model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
localMountPath: "/Model" # PVC 固定挂载路径
huggingfaceToken: "<your-hf-token>"
download:
enabled: false # 启用自动下载
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
# 功能选择
app: "llama-factory"
resources:
gpuLimit: 1
cpuRequest: 8
memoryLimit: "16Gi"
shmSize: "20Gi"
llama:
image: "docker.io/library/one-click:v1"
# NFS PV/PVC 配置
nfs:
server: "10.6.80.11"
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
storageClass: "local-path"
pvSize: "500Gi"
pvcSize: "50Gi"
# LeaderWorkerSet 配置
replicaCount: 1
workerSize: 2
nodeSelector: {}
tolerations: []
affinity: {}

View File

@ -0,0 +1,53 @@
application_name: &application_name llama-factory
distributed:
method: helm
release_name: *application_name
chart: llama-factory
sets:
app: llama-factory
model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
resources:
gpuLimit: 1
cpuRequest: 8
memoryLimit: "16Gi"
shmSize: "15Gi"
llama:
image: "docker.io/library/one-click:v1"
workerSize: 2
nodeSelector: {}
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
port: 30080
url: ~
pod:
name: llamafactory
monolithic:
method: helm
release_name: *application_name
chart: llama-factory
sets:
app: llama-factory
model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
resources:
gpuLimit: 1
cpuRequest: 8
memoryLimit: "16Gi"
shmSize: "15Gi"
llama:
image: "docker.io/library/one-click:v1"
workerSize: 1
nodeSelector: {}
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
port: 30080
url: ~
pod:
name: llama-factory

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,24 @@
apiVersion: v2
name: jarvis
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "jarvis.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "jarvis.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "jarvis.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "jarvis.labels" -}}
helm.sh/chart: {{ include "jarvis.chart" . }}
{{ include "jarvis.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "jarvis.selectorLabels" -}}
app.kubernetes.io/name: {{ include "jarvis.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "jarvis.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,81 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ .Release.Name }}-dp
# namespace: jarvis-models
labels:
app: {{ .Release.Name }}
spec:
replicas: 1
selector:
matchLabels:
app: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ .Release.Name }}
spec:
#hostNetwork: true
# --- START: Add this section for image pull secrets ---
runtimeClassName: nvidia
imagePullSecrets:
- name: regcred # This MUST match the name of the secret you just created
# --- END: Add this section ---
containers:
- name: melo
image: {{ .Values.melotts.image }}
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
#command:
# - /bin/bash
# - -c
# - "bash && sleep infinity"
ports:
- containerPort: 5000 # The port your application listens on inside the container
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
protocol: TCP
resources: # Add this section
requests:
cpu: 2 # Example: 100 millicores (0.1 CPU)
memory: 4Gi # Example: 128 mebibytes
limits:
cpu: 2 # Example: Limit to 500 millicores (0.5 CPU)
memory: 6Gi # Example: Limit to 512 mebibytes
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
volumeMounts:
- name: weight-volume
mountPath: /models
- name: weight-volume
mountPath: /usr/local/nltk_data
subPath: nltk_data
volumes:
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ .Release.Name }}-service
# namespace: jarvis-models
labels:
app: {{ .Release.Name }}
spec:
selector:
app: {{ .Release.Name }}
ports:
- protocol: TCP
port: 8080
targetPort: 5000
type: NodePort

View File

@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ .Values.app }}-pv-model
spec:
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
capacity:
storage: {{ .Values.nfs.pvSize }}
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: {{ .Values.nfs.path }}
server: {{ .Values.nfs.server }}

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.app }}-pvc-model
annotations:
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.nfs.pvcSize }}
volumeName: {{ .Values.app }}-pv-model

View File

@ -0,0 +1,89 @@
# Default values for jarvis.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
replicaCount: 1
app: "melotts"
melotts:
image: harbor.bwgdi.com/library/melotts:0.0.2
jarvis_adapter:
image: harbor.bwgdi.com/library/adapter-test:0.0.1
endpoint: "http://vllm-leader-nodeport:8080"
nfs:
server: "10.6.80.11"
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Voice/MeloTTS"
storageClass: "local-path"
pvSize: "500Gi"
pvcSize: "50Gi"
resources:
gpuLimit: 1
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
service:
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
type: ClusterIP
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
port: 80
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
ingress:
enabled: false
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector:
resource-group: gpu_5880
tolerations: []
affinity: {}

35
melotts/metadata.yaml Normal file
View File

@ -0,0 +1,35 @@
application_name: &application_name melotts
distributed:
method: helm
release_name: *application_name
chart: melotts
sets:
jarvis_api:
image: harbor.bwgdi.com/library/melotts:0.0.2
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: melo-service
port: 32147
url: ~
pod:
name: *application_name
monolithic:
method: helm
release_name: *application_name
chart: melotts
sets:
jarvis_api:
image: harbor.bwgdi.com/library/melotts:0.0.2
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: melo-service
port: 32147
url: ~
pod:
name: *application_name

53
vllm/metadata.yaml Normal file
View File

@ -0,0 +1,53 @@
application_name: &application_name vllm
distributed:
method: helm
release_name: *application_name
chart: vllm-app
sets:
app: vllm
model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
resources:
gpuLimit: 1
cpuRequest: 8
memoryLimit: "16Gi"
shmSize: "15Gi"
workerSize: 2
nodeSelector: {}
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
port: 30080
url: ~
paths:
docs_path: /docs
redoc_path: /redoc
pod:
name: infer-0
monolithic:
method: helm
release_name: *application_name
chart: vllm-app
sets:
app: vllm
model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
resources:
gpuLimit: 1
cpuRequest: 8
memoryLimit: "16Gi"
shmSize: "15Gi"
workerSize: 1
nodeSelector: {}
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
port: 30080
url: ~
pod:
name: vllm

23
vllm/vllm-app/.helmignore Normal file
View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

25
vllm/vllm-app/Chart.yaml Normal file
View File

@ -0,0 +1,25 @@
apiVersion: v2
name: vllm-app
description: A Helm chart for deploying vLLM with NFS storage
annotations:
"helm.sh/resource-policy": keep # 防止资源被意外删除
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,165 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: infer
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: llama-leader
image: {{ .Values.llama.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : USE_RAY
value: "1"
# - name : LMDEPLOY_EXECUTOR_BACKEND
# value: "ray"
command:
- sh
- -c
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 7860
name: http
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
workerTemplate:
spec:
containers:
- name: llama-worker
image: {{ .Values.llama.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "echo $(LWS_LEADER_ADDRESS);
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
# - name : LMDEPLOY_EXECUTOR_BACKEND
# value: "ray"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
{{- end }}

View File

@ -0,0 +1,170 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: infer
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: lmdeploy-leader
image: {{ .Values.lmdeploy.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : LMDEPLOY_EXECUTOR_BACKEND
value: "ray"
command:
- sh
- -c
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
lmdeploy serve api_server $MODEL_PATH --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }})) --server-port 8080 --cache-max-entry-count 0.9"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
tcpSocket:
#httpGet:
#path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: nfs-pvc-model
workerTemplate:
spec:
containers:
- name: lmdeploy-worker
image: {{ .Values.lmdeploy.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.lmdeploy.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : LMDEPLOY_EXECUTOR_BACKEND
value: "ray"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: nfs-pvc-model
{{- end }}

View File

@ -0,0 +1,166 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: infer
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
tcpSocket:
#httpGet:
#path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
workerTemplate:
spec:
containers:
- name: vllm-worker
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
{{- end }}

View File

@ -0,0 +1,44 @@
{{- if .Values.model.download.enabled }}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ .Release.Name }}-download-model
annotations:
"helm.sh/hook": pre-install,pre-upgrade # 在安装/升级前执行
"helm.sh/hook-weight": "-10" # 优先执行
"helm.sh/hook-delete-policy": hook-succeeded
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: downloader
image: {{ .Values.model.download.image }}
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
if [ -d "$DEST_DIR" ]; then
echo "Model already exists at $DEST_DIR"
else
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
fi
volumeMounts:
- name: model-storage
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: model-storage
persistentVolumeClaim:
claimName: nfs-pvc-model # 复用之前的 PVC
{{- end }}

View File

@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ .Values.app }}-pv-model
spec:
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
capacity:
storage: {{ .Values.nfs.pvSize }}
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: {{ .Values.nfs.path }}
server: {{ .Values.nfs.server }}

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.app }}-pvc-model
annotations:
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.nfs.pvcSize }}
volumeName: {{ .Values.app }}-pv-model

View File

@ -0,0 +1,39 @@
#apiVersion: v1
#kind: Service
#metadata:
# name: infer-leader-loadbalancer
#spec:
# type: LoadBalancer
# selector:
# leaderworkerset.sigs.k8s.io/name: infer
# role: leader
# ports:
# - protocol: TCP
# port: 8080
# targetPort: 8080
#
---
apiVersion: v1
kind: Service
metadata:
name: {{ .Values.app }}-leader-nodeport
spec:
type: NodePort
{{- if gt (int .Values.workerSize) 1 }}
selector:
leaderworkerset.sigs.k8s.io/name: infer
role: leader
{{- else }}
selector:
app: vllm-app
{{- end }}
ports:
- protocol: TCP
port: 8080
{{- if eq .Values.app "llama" }}
targetPort: 7860
{{- else }}
targetPort: 8080
{{- end }}
nodePort: 30080

View File

@ -0,0 +1,114 @@
{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: vllm
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: vllm-app
template:
metadata:
labels:
app: vllm-app
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
#securityContext:
# capabilities:
# add: [ "IPC_LOCK" ]
env:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.vllm.huggingfaceToken }}
#- name: GLOO_SOCKET_IFNAME
# value: eth0
#- name: NCCL_SOCKET_IFNAME
# value: eth0
#- name: NCCL_IB_DISABLE
# value: "0"
#- name: NCCL_DEBUG
# value: INFO
#- name: NCCL_IB_HCA
# value: mlx5_0:1
#- name: NCCL_IB_GID_INDEX
# value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
echo 'Using single node ------------------------------------------';
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
#rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
#tcpSocket:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app}}-pvc-model
{{- end }}

58
vllm/vllm-app/values.yaml Normal file
View File

@ -0,0 +1,58 @@
# Default values for vllm-app.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# 模型配置
model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
localMountPath: "/Model" # PVC 固定挂载路径
huggingfaceToken: "<your-hf-token>"
download:
enabled: false # 启用自动下载
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
# 功能选择
app: "vllm"
resources:
gpuLimit: 1
cpuRequest: 12
memoryLimit: "16Gi"
shmSize: "20Gi"
# vLLM 应用配置
vllm:
image: "docker.io/vllm/vllm-openai:latest"
#gpuLimit: 2
# cpuRequest: 12
# memoryLimit: "12Gi"
# shmSize: "15Gi"
llama:
image: "docker.io/library/one-click:v1"
# lmdeploy 应用配置
lmdeploy:
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
# gpuLimit: 2
# cpuRequest: 12
# memoryLimit: "12Gi"
# shmSize: "15Gi"
# NFS PV/PVC 配置
nfs:
server: "10.6.80.11"
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
storageClass: "local-path"
pvSize: "500Gi"
pvcSize: "50Gi"
# LeaderWorkerSet 配置
replicaCount: 1
workerSize: 2
nodeSelector: {}
tolerations: []
affinity: {}

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,25 @@
apiVersion: v2
name: vllm-serve
description: A Helm chart for deploying vLLM with NFS storage
annotations:
"helm.sh/resource-policy": keep # 防止资源被意外删除
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,16 @@
1. Get the application URL by running these commands:
{{- if contains "NodePort" .Values.svc.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "vllm-serve.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "vllm-serve.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "vllm-serve.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "vllm-serve.labels" -}}
helm.sh/chart: {{ include "vllm-serve.chart" . }}
{{ include "vllm-serve.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "vllm-serve.selectorLabels" -}}
app.kubernetes.io/name: {{ include "vllm-serve.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "vllm-serve.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "vllm-serve.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,188 @@
{{- if gt (int .Values.workerSize) 1 }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
tcpSocket:
#httpGet:
#path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 10 }}
{{- end }}
workerTemplate:
spec:
containers:
- name: vllm-worker
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,28 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ .Release.Name }}-pv-model
spec:
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
capacity:
storage: {{ .Values.nfs.pvSize }}
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: {{ .Values.nfs.path }}
server: {{ .Values.nfs.server }}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Release.Name }}-pvc-model
annotations:
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.nfs.pvcSize }}
volumeName: {{ .Release.Name }}-pv-model

View File

@ -0,0 +1,35 @@
#apiVersion: v1
#kind: Service
#metadata:
# name: infer-leader-loadbalancer
#spec:
# type: LoadBalancer
# selector:
# leaderworkerset.sigs.k8s.io/name: infer
# role: leader
# ports:
# - protocol: TCP
# port: 8080
# targetPort: 8080
#
---
apiVersion: v1
kind: Service
metadata:
name: {{ .Release.Name }}-svc
spec:
type: {{ .Values.svc.type | default "NodePort" }}
{{- if gt (int .Values.workerSize) 1 }}
selector:
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
role: leader
{{- else }}
selector:
app: {{ .Release.Name }}
{{- end }}
ports:
- protocol: TCP
port: {{ .Values.svc.port | default 8080 }}
targetPort: {{ .Values.svc.port | default 8080 }}
nodePort: {{ .Values.svc.nodePort | default 30080 }}

View File

@ -0,0 +1,108 @@
{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ .Release.Name }}
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-pod
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
env:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.vllm.huggingfaceToken }}
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
echo 'Using single node ------------------------------------------';
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
#tcpSocket:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Release.Name }}-pvc-model
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,75 @@
# Default values for vllm-app.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
imagePullSecrets: []
imagePullPolicy: IfNotPresent
# This is to override the chart name.
nameOverride: ""
fullnameOverride: ""
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
serviceAccount:
# Specifies whether a service account should be created
create: true
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
# 模型配置
model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
localMountPath: "/Model" # PVC 固定挂载路径
huggingfaceToken: "<your-hf-token>"
download: # 启用自动下载
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
# 功能选择
resources:
gpuLimit: 1
cpuRequest: 12
memoryLimit: "16Gi"
shmSize: "20Gi"
svc:
type: NodePort
port: 80
targetPort: 8080
nodePort: 30080
# vLLM 应用配置
vllm:
image: "docker.io/vllm/vllm-openai:latest"
llama:
image: "docker.io/library/one-click:v1"
# lmdeploy 应用配置
lmdeploy:
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
# NFS PV/PVC 配置
nfs:
server: "10.6.80.11"
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
storageClass: "local-path"
pvSize: "500Gi"
pvcSize: "50Gi"
# LeaderWorkerSet 配置
replicaCount: 1
workerSize: 2
nodeSelector: {}
tolerations: []
affinity: {}

53
webchat/metadata.yaml Normal file
View File

@ -0,0 +1,53 @@
application_name: &application_name webchat
distributed:
method: helm
release_name: *application_name
chart: vllm-app
sets:
app: llama
model:
huggingfaceName: "Qwen/Qwen2-VL-2B-Instruct"
resources:
gpuLimit: 1
cpuRequest: 8
memoryLimit: "8Gi"
shmSize: "15Gi"
workerSize: 2
nodeSelector: {}
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
port: 30081
url: ~
paths:
docs_path: /docs
redoc_path: /redoc
pod:
name: *application_name
monolithic:
method: helm
release_name: *application_name
chart: vllm-app
sets:
app: vllm
model:
huggingfaceName: "Qwen/Qwen2.5-32B-Instruct"
resources:
gpuLimit: 1
cpuRequest: 12
memoryLimit: "8Gi"
shmSize: "15Gi"
workerSize: 1
nodeSelector: {}
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
port: 30080
url: ~
pod:
name: *application_name

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,25 @@
apiVersion: v2
name: vllm-app
description: A Helm chart for deploying vLLM with NFS storage
annotations:
"helm.sh/resource-policy": keep # 防止资源被意外删除
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,165 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: llama-leader
image: {{ .Values.llama.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : USE_RAY
value: "1"
# - name : LMDEPLOY_EXECUTOR_BACKEND
# value: "ray"
command:
- sh
- -c
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 7860
name: http
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
workerTemplate:
spec:
containers:
- name: llama-worker
image: {{ .Values.llama.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "echo $(LWS_LEADER_ADDRESS);
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
# - name : LMDEPLOY_EXECUTOR_BACKEND
# value: "ray"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
{{- end }}

View File

@ -0,0 +1,170 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: infer
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: lmdeploy-leader
image: {{ .Values.lmdeploy.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : LMDEPLOY_EXECUTOR_BACKEND
value: "ray"
command:
- sh
- -c
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
lmdeploy serve api_server $MODEL_PATH --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }})) --server-port 8080 --cache-max-entry-count 0.9"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
tcpSocket:
#httpGet:
#path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: nfs-pvc-model
workerTemplate:
spec:
containers:
- name: lmdeploy-worker
image: {{ .Values.lmdeploy.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.lmdeploy.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
- name : LMDEPLOY_EXECUTOR_BACKEND
value: "ray"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: nfs-pvc-model
{{- end }}

View File

@ -0,0 +1,166 @@
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: infer
spec:
replicas: {{ .Values.replicaCount }}
leaderWorkerTemplate:
size: {{ .Values.workerSize }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
tcpSocket:
#httpGet:
#path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
workerTemplate:
spec:
containers:
- name: vllm-worker
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
command:
- sh
- -c
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
env:
# - name: HUGGING_FACE_HUB_TOKEN
# value: {{ .Values.vllm.huggingfaceToken }}
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_DEBUG
value: INFO
- name: NCCL_IB_HCA
value: mlx5_0:1
- name: NCCL_IB_GID_INDEX
value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.resources.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: {{ .Values.app }}-pvc-model
{{- end }}

View File

@ -0,0 +1,44 @@
{{- if .Values.model.download.enabled }}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ .Release.Name }}-download-model
annotations:
"helm.sh/hook": pre-install,pre-upgrade # 在安装/升级前执行
"helm.sh/hook-weight": "-10" # 优先执行
"helm.sh/hook-delete-policy": hook-succeeded
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: downloader
image: {{ .Values.model.download.image }}
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
if [ -d "$DEST_DIR" ]; then
echo "Model already exists at $DEST_DIR"
else
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
fi
volumeMounts:
- name: model-storage
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: model-storage
persistentVolumeClaim:
claimName: nfs-pvc-model # 复用之前的 PVC
{{- end }}

View File

@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ .Values.app }}-pv-model
spec:
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
capacity:
storage: {{ .Values.nfs.pvSize }}
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: {{ .Values.nfs.path }}
server: {{ .Values.nfs.server }}

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.app }}-pvc-model
annotations:
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.nfs.pvcSize }}
volumeName: {{ .Values.app }}-pv-model

View File

@ -0,0 +1,39 @@
#apiVersion: v1
#kind: Service
#metadata:
# name: infer-leader-loadbalancer
#spec:
# type: LoadBalancer
# selector:
# leaderworkerset.sigs.k8s.io/name: infer
# role: leader
# ports:
# - protocol: TCP
# port: 8080
# targetPort: 8080
#
---
apiVersion: v1
kind: Service
metadata:
name: {{ .Values.app }}-leader-nodeport
spec:
type: NodePort
{{- if gt (int .Values.workerSize) 1 }}
selector:
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
role: leader
{{- else }}
selector:
app: {{ .Release.Name }}
{{- end }}
ports:
- protocol: TCP
port: 8080
{{- if eq .Values.app "llama" }}
targetPort: 7860
{{- else }}
targetPort: 8080
{{- end }}
nodePort: 30081

View File

@ -0,0 +1,114 @@
{{- if eq (int .Values.workerSize) 1 }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ .Release.Name }}
spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
imagePullPolicy: IfNotPresent
env:
- name: HF_ENDPOINT
value: https://hf-mirror.com
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.model.huggingfaceToken }}
command:
- sh
- -c
- |
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
# 检查模型是否存在,不存在则下载
echo "DEST_DIR= $DEST_DIR"
ls $DEST_DIR
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
if [ ! -f "$DEST_DIR/config.json" ]; then
ls -l {{ .Values.model.localMountPath }}
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
wget https://hf-mirror.com/hfd/hfd.sh
chmod a+x hfd.sh
apt install aria2 -y
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
else
echo "Model already exists at $DEST_DIR"
fi
volumeMounts:
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
containers:
- name: vllm-leader
image: {{ .Values.vllm.image }}
imagePullPolicy: IfNotPresent
#securityContext:
# capabilities:
# add: [ "IPC_LOCK" ]
env:
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.vllm.huggingfaceToken }}
#- name: GLOO_SOCKET_IFNAME
# value: eth0
#- name: NCCL_SOCKET_IFNAME
# value: eth0
#- name: NCCL_IB_DISABLE
# value: "0"
#- name: NCCL_DEBUG
# value: INFO
#- name: NCCL_IB_HCA
# value: mlx5_0:1
#- name: NCCL_IB_GID_INDEX
# value: "0" # 或 "7",根据你的网络配置而定
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
echo 'Using single node ------------------------------------------';
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.vllm.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
resources:
limits:
nvidia.com/gpu: "{{ .Values.vllm.gpuLimit }}"
memory: {{ .Values.vllm.memoryLimit }}
ephemeral-storage: 10Gi
#rdma/rdma_shared_device_a: 10
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.vllm.cpuRequest }}
ports:
- containerPort: 8080
name: http
readinessProbe:
#tcpSocket:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 120
periodSeconds: 20
timeoutSeconds: 5
volumeMounts:
- mountPath: /dev/shm
name: dshm
- name: weight-volume
mountPath: {{ .Values.model.localMountPath }}
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: {{ .Values.vllm.shmSize }}
- name: weight-volume
persistentVolumeClaim:
claimName: nfs-pvc-model
{{- end }}

View File

@ -0,0 +1,58 @@
# Default values for vllm-app.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# 模型配置
model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
localMountPath: "/Model" # PVC 固定挂载路径
huggingfaceToken: "<your-hf-token>"
download:
enabled: false # 启用自动下载
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
# 功能选择
app: "vllm"
resources:
gpuLimit: 1
cpuRequest: 12
memoryLimit: "16Gi"
shmSize: "20Gi"
# vLLM 应用配置
vllm:
image: "docker.io/vllm/vllm-openai:latest"
#gpuLimit: 2
# cpuRequest: 12
# memoryLimit: "12Gi"
# shmSize: "15Gi"
llama:
image: "docker.io/library/one-click:v1"
# lmdeploy 应用配置
lmdeploy:
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
# gpuLimit: 2
# cpuRequest: 12
# memoryLimit: "12Gi"
# shmSize: "15Gi"
# NFS PV/PVC 配置
nfs:
server: "10.6.80.11"
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
storageClass: "local-path"
pvSize: "500Gi"
pvcSize: "50Gi"
# LeaderWorkerSet 配置
replicaCount: 1
workerSize: 2
nodeSelector: {}
tolerations: []
affinity: {}

51
webui/metadata.yaml Normal file
View File

@ -0,0 +1,51 @@
application_name: &application_name webui
distributed:
method: helm
release_name: *application_name
chart: open-webui
sets:
image:
repository: ghcr.io/open-webui/open-webui
tag: main
pullPolicy: "IfNotPresent"
openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
ollama:
enabled: false
service:
type: NodePort
nodePort: 30679
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: ~
port: 30679
url: ~
pod:
name: open-webui-
monolithic:
method: helm
release_name: *application_name
chart: open-webui
sets:
image:
repository: ghcr.io/open-webui/open-webui
tag: main
pullPolicy: "IfNotPresent"
openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
ollama:
enabled: false
service:
type: NodePort
nodePort: 30679
svc:
svc_type: NodePort
protocol: http
hostname: 10.6.14.123
servicename: ~
port: 30679
url: ~
pod:
name: open-webui-

View File

@ -0,0 +1,25 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
.drone.yml
*.tmproj
.vscode/
values-minikube.yaml

View File

@ -0,0 +1,12 @@
dependencies:
- name: ollama
repository: https://otwld.github.io/ollama-helm/
version: 1.27.0
- name: pipelines
repository: https://helm.openwebui.com
version: 0.7.0
- name: tika
repository: https://apache.jfrog.io/artifactory/tika
version: 3.2.2
digest: sha256:1c6e5d6a38dc8ebb4e15b1945fb222fa57b10e8882d5c79ba430648f3c5af372
generated: "2025-08-22T15:22:03.150693+02:00"

View File

@ -0,0 +1,38 @@
annotations:
licenses: MIT
apiVersion: v2
appVersion: 0.6.26
dependencies:
- condition: ollama.enabled
import-values:
- child: service
parent: ollama.service
name: ollama
repository: https://otwld.github.io/ollama-helm/
version: '>=0.24.0'
- condition: pipelines.enabled
import-values:
- child: service
parent: pipelines.service
name: pipelines
repository: https://helm.openwebui.com
version: '>=0.0.1'
- condition: tika.enabled
name: tika
repository: https://apache.jfrog.io/artifactory/tika
version: '>=2.9.0'
description: "Open WebUI: A User-Friendly Web Interface for Chat Interactions \U0001F44B"
home: https://www.openwebui.com/
icon: https://raw.githubusercontent.com/open-webui/open-webui/main/static/favicon.png
keywords:
- llm
- chat
- web-ui
- open-webui
name: open-webui
sources:
- https://github.com/open-webui/helm-charts
- https://github.com/open-webui/open-webui/pkgs/container/open-webui
- https://github.com/otwld/ollama-helm/
- https://hub.docker.com/r/ollama/ollama
version: 7.7.0

270
webui/open-webui/README.md Normal file
View File

@ -0,0 +1,270 @@
# open-webui
![Version: 7.7.0](https://img.shields.io/badge/Version-7.7.0-informational?style=flat-square) ![AppVersion: 0.6.26](https://img.shields.io/badge/AppVersion-0.6.26-informational?style=flat-square)
Open WebUI: A User-Friendly Web Interface for Chat Interactions 👋
**Homepage:** <https://www.openwebui.com/>
## Source Code
* <https://github.com/open-webui/helm-charts>
* <https://github.com/open-webui/open-webui/pkgs/container/open-webui>
* <https://github.com/otwld/ollama-helm/>
* <https://hub.docker.com/r/ollama/ollama>
## Installing
Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
```shell
helm repo add open-webui https://helm.openwebui.com/
helm repo update
```
Now you can install the chart:
```shell
helm upgrade --install open-webui open-webui/open-webui
```
## Requirements
| Repository | Name | Version |
|------------|------|---------|
| https://apache.jfrog.io/artifactory/tika | tika | >=2.9.0 |
| https://helm.openwebui.com | pipelines | >=0.0.1 |
| https://otwld.github.io/ollama-helm/ | ollama | >=0.24.0 |
## Values
### Logging configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| logging.components.audio | string | `""` | Set the log level for the Audio processing component |
| logging.components.comfyui | string | `""` | Set the log level for the ComfyUI Integration component |
| logging.components.config | string | `""` | Set the log level for the Configuration Management component |
| logging.components.db | string | `""` | Set the log level for the Database Operations (Peewee) component |
| logging.components.images | string | `""` | Set the log level for the Image Generation component |
| logging.components.main | string | `""` | Set the log level for the Main Application Execution component |
| logging.components.models | string | `""` | Set the log level for the Model Management component |
| logging.components.ollama | string | `""` | Set the log level for the Ollama Backend Integration component |
| logging.components.openai | string | `""` | Set the log level for the OpenAI API Integration component |
| logging.components.rag | string | `""` | Set the log level for the Retrieval-Augmented Generation (RAG) component |
| logging.components.webhook | string | `""` | Set the log level for the Authentication Webhook component |
| logging.level | string | `""` | Set the global log level ["notset", "debug", "info" (default), "warning", "error", "critical"] |
### Azure Storage configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| persistence.azure.container | string | `""` | Sets the container name for Azure Storage |
| persistence.azure.endpointUrl | string | `""` | Sets the endpoint URL for Azure Storage |
| persistence.azure.key | string | `""` | Set the access key for Azure Storage (ignored if keyExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Managed Identity if run in Azure services |
| persistence.azure.keyExistingSecret | string | `""` | Set the access key for Azure Storage from existing secret |
| persistence.azure.keyExistingSecretKey | string | `""` | Set the access key for Azure Storage from existing secret key |
### Google Cloud Storage configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| persistence.gcs.appCredentialsJson | string | `""` | Contents of Google Application Credentials JSON file (ignored if appCredentialsJsonExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Google Metadata server if run on a Google Compute Engine. File can be generated for a service account following this guide: https://developers.google.com/workspace/guides/create-credentials#service-account |
| persistence.gcs.appCredentialsJsonExistingSecret | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret |
| persistence.gcs.appCredentialsJsonExistingSecretKey | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret key |
| persistence.gcs.bucket | string | `""` | Sets the bucket name for Google Cloud Storage. Bucket must already exist |
### Amazon S3 Storage configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| persistence.s3.accessKey | string | `""` | Sets the access key ID for S3 storage |
| persistence.s3.accessKeyExistingAccessKey | string | `""` | Set the secret access key for S3 storage from existing k8s secret key |
| persistence.s3.accessKeyExistingSecret | string | `""` | Set the secret access key for S3 storage from existing k8s secret |
| persistence.s3.bucket | string | `""` | Sets the bucket name for S3 storage |
| persistence.s3.endpointUrl | string | `""` | Sets the endpoint url for S3 storage |
| persistence.s3.keyPrefix | string | `""` | Sets the key prefix for a S3 object |
| persistence.s3.region | string | `""` | Sets the region name for S3 storage |
| persistence.s3.secretKey | string | `""` | Sets the secret access key for S3 storage (ignored if secretKeyExistingSecret is set) |
| persistence.s3.secretKeyExistingSecret | string | `""` | Set the secret key for S3 storage from existing k8s secret |
| persistence.s3.secretKeyExistingSecretKey | string | `""` | Set the secret key for S3 storage from existing k8s secret key |
### SSO Configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| sso.enableGroupManagement | bool | `false` | Enable OAuth group management through access token groups claim |
| sso.enableRoleManagement | bool | `false` | Enable OAuth role management through access token roles claim |
| sso.enableSignup | bool | `false` | Enable account creation when logging in with OAuth (distinct from regular signup) |
| sso.enabled | bool | `false` | **Enable SSO authentication globally** must enable to use SSO authentication |
| sso.groupManagement.groupsClaim | string | `"groups"` | The claim that contains the groups (can be nested, e.g., user.memberOf) |
| sso.mergeAccountsByEmail | bool | `false` | Allow logging into accounts that match email from OAuth provider (considered insecure) |
### GitHub OAuth configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| sso.github.clientExistingSecret | string | `""` | GitHub OAuth client secret from existing secret |
| sso.github.clientExistingSecretKey | string | `""` | GitHub OAuth client secret key from existing secret |
| sso.github.clientId | string | `""` | GitHub OAuth client ID |
| sso.github.clientSecret | string | `""` | GitHub OAuth client secret (ignored if clientExistingSecret is set) |
| sso.github.enabled | bool | `false` | Enable GitHub OAuth |
### Google OAuth configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| sso.google.clientExistingSecret | string | `""` | Google OAuth client secret from existing secret |
| sso.google.clientExistingSecretKey | string | `""` | Google OAuth client secret key from existing secret |
| sso.google.clientId | string | `""` | Google OAuth client ID |
| sso.google.clientSecret | string | `""` | Google OAuth client secret (ignored if clientExistingSecret is set) |
| sso.google.enabled | bool | `false` | Enable Google OAuth |
### Microsoft OAuth configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| sso.microsoft.clientExistingSecret | string | `""` | Microsoft OAuth client secret from existing secret |
| sso.microsoft.clientExistingSecretKey | string | `""` | Microsoft OAuth client secret key from existing secret |
| sso.microsoft.clientId | string | `""` | Microsoft OAuth client ID |
| sso.microsoft.clientSecret | string | `""` | Microsoft OAuth client secret (ignored if clientExistingSecret is set) |
| sso.microsoft.enabled | bool | `false` | Enable Microsoft OAuth |
| sso.microsoft.tenantId | string | `""` | Microsoft tenant ID - use 9188040d-6c67-4c5b-b112-36a304b66dad for personal accounts |
### OIDC configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| sso.oidc.clientExistingSecret | string | `""` | OICD client secret from existing secret |
| sso.oidc.clientExistingSecretKey | string | `""` | OIDC client secret key from existing secret |
| sso.oidc.clientId | string | `""` | OIDC client ID |
| sso.oidc.clientSecret | string | `""` | OIDC client secret (ignored if clientExistingSecret is set) |
| sso.oidc.enabled | bool | `false` | Enable OIDC authentication |
| sso.oidc.providerName | string | `"SSO"` | Name of the provider to show on the UI |
| sso.oidc.providerUrl | string | `""` | OIDC provider well known URL |
| sso.oidc.scopes | string | `"openid email profile"` | Scopes to request (space-separated). |
### Role management configuration
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| sso.roleManagement.adminRoles | string | `""` | Comma-separated list of roles allowed to log in as admin (receive open webui role admin) |
| sso.roleManagement.allowedRoles | string | `""` | Comma-separated list of roles allowed to log in (receive open webui role user) |
| sso.roleManagement.rolesClaim | string | `"roles"` | The claim that contains the roles (can be nested, e.g., user.roles) |
### SSO trusted header authentication
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| sso.trustedHeader.emailHeader | string | `""` | Header containing the user's email address |
| sso.trustedHeader.enabled | bool | `false` | Enable trusted header authentication |
| sso.trustedHeader.nameHeader | string | `""` | Header containing the user's name (optional, used for new user creation) |
### Other Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| affinity | object | `{}` | Affinity for pod assignment |
| annotations | object | `{}` | |
| args | list | `[]` | Open WebUI container arguments (overrides default) |
| clusterDomain | string | `"cluster.local"` | Value of cluster domain |
| command | list | `[]` | Open WebUI container command (overrides default entrypoint) |
| commonEnvVars | list | `[]` | Env vars added to the Open WebUI deployment, common across environments. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: environment variables defined in both `extraEnvVars` and `commonEnvVars` will result in a conflict. Avoid duplicates) |
| containerSecurityContext | object | `{}` | Configure container security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-containe> |
| copyAppData.args | list | `[]` | Open WebUI copy-app-data init container arguments (overrides default) |
| copyAppData.command | list | `[]` | Open WebUI copy-app-data init container command (overrides default) |
| copyAppData.resources | object | `{}` | |
| databaseUrl | string | `""` | Configure database URL, needed to work with Postgres (example: `postgresql://<user>:<password>@<service>:<port>/<database>`), leave empty to use the default sqlite database |
| enableOpenaiApi | bool | `true` | Enables the use of OpenAI APIs |
| extraEnvFrom | list | `[]` | Env vars added from configmap or secret to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: `extraEnvVars` will take precedence over the value from `extraEnvFrom`) |
| extraEnvVars | list | `[{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}]` | Env vars added to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ |
| extraEnvVars[0] | object | `{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}` | Default API key value for Pipelines. Should be updated in a production deployment, or be changed to the required API key if not using Pipelines |
| extraInitContainers | list | `[]` | Additional init containers to add to the deployment/statefulset ref: <https://kubernetes.io/docs/concepts/workloads/pods/init-containers/> |
| extraResources | list | `[]` | Extra resources to deploy with Open WebUI |
| hostAliases | list | `[]` | HostAliases to be added to hosts-file of each container |
| image | object | `{"pullPolicy":"IfNotPresent","repository":"ghcr.io/open-webui/open-webui","tag":""}` | Open WebUI image tags can be found here: https://github.com/open-webui/open-webui |
| imagePullSecrets | list | `[]` | Configure imagePullSecrets to use private registry ref: <https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry> |
| ingress.additionalHosts | list | `[]` | |
| ingress.annotations | object | `{}` | Use appropriate annotations for your Ingress controller, e.g., for NGINX: |
| ingress.class | string | `""` | |
| ingress.enabled | bool | `false` | |
| ingress.existingSecret | string | `""` | |
| ingress.extraLabels | object | `{}` | Additional custom labels to add to the Ingress metadata Useful for tagging, selecting, or applying policies to the Ingress via labels. |
| ingress.host | string | `"chat.example.com"` | |
| ingress.tls | bool | `false` | |
| livenessProbe | object | `{}` | Probe for liveness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
| managedCertificate.domains[0] | string | `"chat.example.com"` | |
| managedCertificate.enabled | bool | `false` | |
| managedCertificate.name | string | `"mydomain-chat-cert"` | |
| nameOverride | string | `""` | |
| namespaceOverride | string | `""` | |
| nodeSelector | object | `{}` | Node labels for pod assignment. |
| ollama.enabled | bool | `true` | Automatically install Ollama Helm chart from https://otwld.github.io/ollama-helm/. Use [Helm Values](https://github.com/otwld/ollama-helm/#helm-values) to configure |
| ollama.fullnameOverride | string | `"open-webui-ollama"` | If enabling embedded Ollama, update fullnameOverride to your desired Ollama name value, or else it will use the default ollama.name value from the Ollama chart |
| ollamaUrls | list | `[]` | A list of Ollama API endpoints. These can be added in lieu of automatically installing the Ollama Helm chart, or in addition to it. |
| ollamaUrlsFromExtraEnv | bool | `false` | Disables taking Ollama Urls from `ollamaUrls` list |
| openaiBaseApiUrl | string | `"https://api.openai.com/v1"` | OpenAI base API URL to use. Defaults to the Pipelines service endpoint when Pipelines are enabled, and "https://api.openai.com/v1" if Pipelines are not enabled and this value is blank |
| openaiBaseApiUrls | list | `[]` | OpenAI base API URLs to use. Overwrites the value in openaiBaseApiUrl if set |
| persistence.accessModes | list | `["ReadWriteOnce"]` | If using multiple replicas, you must update accessModes to ReadWriteMany |
| persistence.annotations | object | `{}` | |
| persistence.enabled | bool | `true` | |
| persistence.existingClaim | string | `""` | Use existingClaim if you want to re-use an existing Open WebUI PVC instead of creating a new one |
| persistence.provider | string | `"local"` | Sets the storage provider, availables values are `local`, `s3`, `gcs` or `azure` |
| persistence.selector | object | `{}` | |
| persistence.size | string | `"2Gi"` | |
| persistence.storageClass | string | `""` | |
| persistence.subPath | string | `""` | Subdirectory of Open WebUI PVC to mount. Useful if root directory is not empty. |
| pipelines.enabled | bool | `true` | Automatically install Pipelines chart to extend Open WebUI functionality using Pipelines: https://github.com/open-webui/pipelines |
| pipelines.extraEnvVars | list | `[]` | This section can be used to pass required environment variables to your pipelines (e.g. Langfuse hostname) |
| podAnnotations | object | `{}` | |
| podLabels | object | `{}` | |
| podSecurityContext | object | `{}` | Configure pod security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container> |
| priorityClassName | string | `""` | Priority class name for the Open WebUI pods |
| readinessProbe | object | `{}` | Probe for readiness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
| replicaCount | int | `1` | |
| resources | object | `{}` | |
| revisionHistoryLimit | int | `10` | Revision history limit for the workload manager (deployment). |
| runtimeClassName | string | `""` | Configure runtime class ref: <https://kubernetes.io/docs/concepts/containers/runtime-class/> |
| service | object | `{"annotations":{},"containerPort":8080,"labels":{},"loadBalancerClass":"","nodePort":"","port":80,"type":"ClusterIP"}` | Service values to expose Open WebUI pods to cluster |
| serviceAccount.annotations | object | `{}` | |
| serviceAccount.automountServiceAccountToken | bool | `false` | |
| serviceAccount.enable | bool | `true` | |
| serviceAccount.name | string | `""` | |
| startupProbe | object | `{}` | Probe for startup of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
| strategy | object | `{}` | Strategy for updating the workload manager: deployment or statefulset |
| tika.enabled | bool | `false` | Automatically install Apache Tika to extend Open WebUI |
| tolerations | list | `[]` | Tolerations for pod assignment |
| topologySpreadConstraints | list | `[]` | Topology Spread Constraints for pod assignment |
| volumeMounts | object | `{"container":[],"initContainer":[]}` | Configure container volume mounts ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
| volumes | list | `[]` | Configure pod volumes ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
| websocket.enabled | bool | `false` | Enables websocket support in Open WebUI with env `ENABLE_WEBSOCKET_SUPPORT` |
| websocket.manager | string | `"redis"` | Specifies the websocket manager to use with env `WEBSOCKET_MANAGER`: redis (default) |
| websocket.nodeSelector | object | `{}` | Node selector for websocket pods |
| websocket.redis | object | `{"affinity":{},"annotations":{},"args":[],"command":[],"enabled":true,"image":{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"},"labels":{},"name":"open-webui-redis","pods":{"annotations":{},"labels":{}},"resources":{},"securityContext":{},"service":{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"},"tolerations":[]}` | Deploys a redis |
| websocket.redis.affinity | object | `{}` | Redis affinity for pod assignment |
| websocket.redis.annotations | object | `{}` | Redis annotations |
| websocket.redis.args | list | `[]` | Redis arguments (overrides default) |
| websocket.redis.command | list | `[]` | Redis command (overrides default) |
| websocket.redis.enabled | bool | `true` | Enable redis installation |
| websocket.redis.image | object | `{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"}` | Redis image |
| websocket.redis.labels | object | `{}` | Redis labels |
| websocket.redis.name | string | `"open-webui-redis"` | Redis name |
| websocket.redis.pods | object | `{"annotations":{},"labels":{}}` | Redis pod |
| websocket.redis.pods.annotations | object | `{}` | Redis pod annotations |
| websocket.redis.pods.labels | object | `{}` | Redis pod labels |
| websocket.redis.resources | object | `{}` | Redis resources |
| websocket.redis.securityContext | object | `{}` | Redis security context |
| websocket.redis.service | object | `{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"}` | Redis service |
| websocket.redis.service.annotations | object | `{}` | Redis service annotations |
| websocket.redis.service.containerPort | int | `6379` | Redis container/target port |
| websocket.redis.service.labels | object | `{}` | Redis service labels |
| websocket.redis.service.nodePort | string | `""` | Redis service node port. Valid only when type is `NodePort` |
| websocket.redis.service.port | int | `6379` | Redis service port |
| websocket.redis.service.portName | string | `"http"` | Redis service port name. Istio needs this to be something like `tcp-redis` |
| websocket.redis.service.type | string | `"ClusterIP"` | Redis service type |
| websocket.redis.tolerations | list | `[]` | Redis tolerations for pod assignment |
| websocket.url | string | `"redis://open-webui-redis:6379/0"` | Specifies the URL of the Redis instance for websocket communication. Template with `redis://[:<password>@]<hostname>:<port>/<db>` |
----------------------------------------------
Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).

View File

@ -0,0 +1,36 @@
{{ template "chart.header" . }}
{{ template "chart.deprecationWarning" . }}
{{ template "chart.badgesSection" . }}
{{ template "chart.description" . }}
{{ template "chart.homepageLine" . }}
{{ template "chart.maintainersSection" . }}
{{ template "chart.sourcesSection" . }}
## Installing
Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
```shell
helm repo add open-webui https://helm.openwebui.com/
helm repo update
```
Now you can install the chart:
```shell
helm upgrade --install open-webui open-webui/open-webui
```
{{ template "chart.requirementsSection" . }}
{{ template "chart.valuesSection" . }}
----------------------------------------------
Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).

View File

@ -0,0 +1,30 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
.drone.yml
*.tmproj
.vscode/
#others
.github
kind-config.yml
ci/

View File

@ -0,0 +1,30 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
.drone.yml
*.tmproj
.vscode/
#others
.github
kind-config.yml
ci/

View File

@ -0,0 +1,33 @@
apiVersion: v2
name: ollama
description: Get up and running with large language models locally.
type: application
version: 1.27.0
appVersion: "0.11.4"
annotations:
artifacthub.io/category: ai-machine-learning
artifacthub.io/changes: |
- kind: changed
description: upgrade app version to 0.11.4
links:
- name: Ollama release v0.11.4
url: https://github.com/ollama/ollama/releases/tag/v0.11.4
kubeVersion: "^1.16.0-0"
home: https://ollama.ai/
icon: https://ollama.ai/public/ollama.png
keywords:
- ai
- llm
- llama
- mistral
sources:
- https://github.com/ollama/ollama
- https://github.com/otwld/ollama-helm
maintainers:
- name: OTWLD
email: contact@otwld.com

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 OTWLD
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,331 @@
![otwld ollama helm chart banner](./banner.png)
![GitHub License](https://img.shields.io/github/license/otwld/ollama-helm)
[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/ollama-helm)](https://artifacthub.io/packages/helm/ollama-helm/ollama)
[![Helm Lint and Test](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml)
[![Discord](https://img.shields.io/badge/Discord-OTWLD-blue?logo=discord&logoColor=white)](https://discord.gg/U24mpqTynB)
[Ollama](https://ollama.ai/), get up and running with large language models, locally.
This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama).
## Requirements
- Kubernetes: `>= 1.16.0-0` for **CPU only**
- Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD)
*Not all GPUs are currently supported with ollama (especially with AMD)*
## Deploying Ollama chart
To install the `ollama` chart in the `ollama` namespace:
> [!IMPORTANT]
> We are migrating the registry from https://otwld.github.io/ollama-helm/ url to OTWLD Helm central
> registry https://helm.otwld.com/
> Please update your Helm registry accordingly.
```console
helm repo add otwld https://helm.otwld.com/
helm repo update
helm install ollama otwld/ollama --namespace ollama --create-namespace
```
## Upgrading Ollama chart
First please read the [release notes](https://github.com/ollama/ollama/releases) of Ollama to make sure there are no
backwards incompatible changes.
Make adjustments to your values as needed, then run `helm upgrade`:
```console
# -- This pulls the latest version of the ollama chart from the repo.
helm repo update
helm upgrade ollama otwld/ollama --namespace ollama --values values.yaml
```
## Uninstalling Ollama chart
To uninstall/delete the `ollama` deployment in the `ollama` namespace:
```console
helm delete ollama --namespace ollama
```
Substitute your values if they differ from the examples. See `helm delete --help` for a full reference on `delete`
parameters and flags.
## Interact with Ollama
- **Ollama documentation can be found [HERE](https://github.com/ollama/ollama/tree/main/docs)**
- Interact with RESTful API: [Ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md)
- Interact with official clients libraries: [ollama-js](https://github.com/ollama/ollama-js#custom-client)
and [ollama-python](https://github.com/ollama/ollama-python#custom-client)
- Interact with langchain: [langchain-js](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainjs.md)
and [langchain-python](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md)
## Examples
- **It's highly recommended to run an updated version of Kubernetes for deploying ollama with GPU**
### Basic values.yaml example with GPU and two models pulled at startup
```
ollama:
gpu:
# -- Enable GPU integration
enabled: true
# -- GPU type: 'nvidia' or 'amd'
type: 'nvidia'
# -- Specify the number of GPU to 1
number: 1
# -- List of models to pull at container startup
models:
pull:
- mistral
- llama2
```
---
### Basic values.yaml example with Ingress
```
ollama:
models:
pull:
- llama2
ingress:
enabled: true
hosts:
- host: ollama.domain.lan
paths:
- path: /
pathType: Prefix
```
- *API is now reachable at `ollama.domain.lan`*
---
### Create and run model from template
```
ollama:
models:
create:
- name: llama3.1-ctx32768
template: |
FROM llama3.1
PARAMETER num_ctx 32768
run:
- llama3.1-ctx32768
```
## Upgrading from 0.X.X to 1.X.X
The version 1.X.X introduces the ability to load models in memory at startup, the values have been changed.
Please change `ollama.models` to `ollama.models.pull` to avoid errors before upgrading:
```yaml
ollama:
models:
- mistral
- llama2
```
To:
```yaml
ollama:
models:
pull:
- mistral
- llama2
```
## Helm Values
- See [values.yaml](values.yaml) to see the Chart's default values.
| Key | Type | Default | Description |
|--------------------------------------------|--------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| affinity | object | `{}` | Affinity for pod assignment |
| autoscaling.enabled | bool | `false` | Enable autoscaling |
| autoscaling.maxReplicas | int | `100` | Number of maximum replicas |
| autoscaling.minReplicas | int | `1` | Number of minimum replicas |
| autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU usage to target replica |
| deployment.labels | object | `{}` | Labels to add to the deployment |
| extraArgs | list | `[]` | Additional arguments on the output Deployment definition. |
| extraEnv | list | `[]` | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go |
| extraEnvFrom | list | `[]` | Additionl environment variables from external sources (like ConfigMap) |
| extraObjects | list | `[]` | Extra K8s manifests to deploy |
| fullnameOverride | string | `""` | String to fully override template |
| hostIPC | bool | `false` | Use the hosts ipc namespace. |
| hostNetwork | bool | `false` | Use the host's network namespace. |
| hostPID | bool | `false` | Use the hosts pid namespace |
| image.pullPolicy | string | `"IfNotPresent"` | Docker pull policy |
| image.repository | string | `"ollama/ollama"` | Docker image registry |
| image.tag | string | `""` | Docker image tag, overrides the image tag whose default is the chart appVersion. |
| imagePullSecrets | list | `[]` | Docker registry secret names as an array |
| ingress.annotations | object | `{}` | Additional annotations for the Ingress resource. |
| ingress.className | string | `""` | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) |
| ingress.enabled | bool | `false` | Enable ingress controller resource |
| ingress.hosts[0].host | string | `"ollama.local"` | |
| ingress.hosts[0].paths[0].path | string | `"/"` | |
| ingress.hosts[0].paths[0].pathType | string | `"Prefix"` | |
| ingress.tls | list | `[]` | The tls configuration for hostnames to be covered with this ingress record. |
| initContainers | list | `[]` | Init containers to add to the pod |
| knative.annotations | object | `{}` | Knative service annotations |
| knative.containerConcurrency | int | `0` | Knative service container concurrency |
| knative.enabled | bool | `false` | Enable Knative integration |
| knative.idleTimeoutSeconds | int | `300` | Knative service idle timeout seconds |
| knative.responseStartTimeoutSeconds | int | `300` | Knative service response start timeout seconds |
| knative.timeoutSeconds | int | `300` | Knative service timeout seconds |
| lifecycle | object | `{}` | Lifecycle for pod assignment (override ollama.models startup pull/run) |
| livenessProbe.enabled | bool | `true` | Enable livenessProbe |
| livenessProbe.failureThreshold | int | `6` | Failure threshold for livenessProbe |
| livenessProbe.initialDelaySeconds | int | `60` | Initial delay seconds for livenessProbe |
| livenessProbe.path | string | `"/"` | Request path for livenessProbe |
| livenessProbe.periodSeconds | int | `10` | Period seconds for livenessProbe |
| livenessProbe.successThreshold | int | `1` | Success threshold for livenessProbe |
| livenessProbe.timeoutSeconds | int | `5` | Timeout seconds for livenessProbe |
| nameOverride | string | `""` | String to partially override template (will maintain the release name) |
| namespaceOverride | string | `""` | String to fully override namespace |
| nodeSelector | object | `{}` | Node labels for pod assignment. |
| ollama.gpu.draDriverClass | string | `"gpu.nvidia.com"` | DRA GPU DriverClass |
| ollama.gpu.draEnabled | bool | `false` | Enable DRA GPU integration If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters |
| ollama.gpu.draExistingClaimTemplate | string | `""` | Existing DRA GPU ResourceClaim Template |
| ollama.gpu.enabled | bool | `false` | Enable GPU integration |
| ollama.gpu.mig.devices | object | `{}` | Specify the mig devices and the corresponding number |
| ollama.gpu.mig.enabled | bool | `false` | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored |
| ollama.gpu.number | int | `1` | Specify the number of GPU If you use MIG section below then this parameter is ignored |
| ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice |
| ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images |
| ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup |
| ollama.models.clean | bool | `false` | Automatically remove models present on the disk but not specified in the values file |
| ollama.models.create | list | `[]` | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create: - name: llama3.1-ctx32768 configMapRef: my-configmap configMapKeyRef: configmap-key - name: llama3.1-ctx32768 template: | FROM llama3.1 PARAMETER num_ctx 32768 |
| ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral |
| ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral |
| ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" |
| ollama.port | int | `11434` | |
| persistentVolume.accessModes | list | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ |
| persistentVolume.annotations | object | `{}` | Ollama server data Persistent Volume annotations |
| persistentVolume.enabled | bool | `false` | Enable persistence using PVC |
| persistentVolume.existingClaim | string | `""` | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true |
| persistentVolume.size | string | `"30Gi"` | Ollama server data Persistent Volume size |
| persistentVolume.storageClass | string | `""` | Ollama server data Persistent Volume Storage Class If defined, storageClassName: <storageClass> If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner. (gp2 on AWS, standard on GKE, AWS & OpenStack) |
| persistentVolume.subPath | string | `""` | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty |
| persistentVolume.volumeMode | string | `""` | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: <volumeMode> If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode. |
| persistentVolume.volumeName | string | `""` | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward |
| podAnnotations | object | `{}` | Map of annotations to add to the pods |
| podLabels | object | `{}` | Map of labels to add to the pods |
| podSecurityContext | object | `{}` | Pod Security Context |
| priorityClassName | string | `""` | Priority Class Name |
| readinessProbe.enabled | bool | `true` | Enable readinessProbe |
| readinessProbe.failureThreshold | int | `6` | Failure threshold for readinessProbe |
| readinessProbe.initialDelaySeconds | int | `30` | Initial delay seconds for readinessProbe |
| readinessProbe.path | string | `"/"` | Request path for readinessProbe |
| readinessProbe.periodSeconds | int | `5` | Period seconds for readinessProbe |
| readinessProbe.successThreshold | int | `1` | Success threshold for readinessProbe |
| readinessProbe.timeoutSeconds | int | `3` | Timeout seconds for readinessProbe |
| replicaCount | int | `1` | Number of replicas |
| resources.limits | object | `{}` | Pod limit |
| resources.requests | object | `{}` | Pod requests |
| runtimeClassName | string | `""` | Specify runtime class |
| securityContext | object | `{}` | Container Security Context |
| service.annotations | object | `{}` | Annotations to add to the service |
| service.labels | object | `{}` | Labels to add to the service |
| service.loadBalancerIP | string | `nil` | Load Balancer IP address |
| service.nodePort | int | `31434` | Service node port when service type is 'NodePort' |
| service.port | int | `11434` | Service port |
| service.type | string | `"ClusterIP"` | Service type |
| serviceAccount.annotations | object | `{}` | Annotations to add to the service account |
| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? |
| serviceAccount.create | bool | `true` | Specifies whether a service account should be created |
| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template |
| terminationGracePeriodSeconds | int | `120` | Wait for a grace period |
| tests.annotations | object | `{}` | Annotations to add to the tests |
| tests.enabled | bool | `true` | |
| tests.labels | object | `{}` | Labels to add to the tests |
| tolerations | list | `[]` | Tolerations for pod assignment |
| topologySpreadConstraints | object | `{}` | Topology Spread Constraints for pod assignment |
| updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate |
| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. |
| volumes | list | `[]` | Additional volumes on the output Deployment definition. |
----------------------------------------------
## Core team
<table>
<tr>
<td align="center">
<a href="https://github.com/jdetroyes"
><img
src="https://github.com/jdetroyes.png?size=200"
width="50"
style="margin-bottom: -4px; border-radius: 8px;"
alt="Jean Baptiste Detroyes"
/><br /><b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Jean Baptiste&nbsp;Detroyes&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</b></a
>
<div style="margin-top: 4px">
<a href="https://github.com/jdetroyes" title="Github"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
/></a>
<a
href="mailto:jdetroyes@otwld.com"
title="Email"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
/></a>
</div>
</td>
<td align="center">
<a href="https://github.com/ntrehout"
><img
src="https://github.com/ntrehout.png?size=200"
width="50"
style="margin-bottom: -4px; border-radius: 8px;"
alt="Jean Baptiste Detroyes"
/><br /><b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Nathan&nbsp;Tréhout&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</b></a
>
<div style="margin-top: 4px">
<a href="https://x.com/n_trehout" title="Twitter"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/twitter.svg"
/></a>
<a href="https://github.com/ntrehout" title="Github"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
/></a>
<a
href="mailto:ntrehout@otwld.com"
title="Email"
><img
width="16"
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
/></a>
</div>
</td>
</tr>
</table>
## Support
- For questions, suggestions, and discussion about Ollama please refer to
the [Ollama issue page](https://github.com/ollama/ollama/issues)
- For questions, suggestions, and discussion about this chart please
visit [Ollama-Helm issue page](https://github.com/otwld/ollama-helm/issues) or join
our [OTWLD Discord](https://discord.gg/U24mpqTynB)

View File

@ -0,0 +1,25 @@
1. Get the application URL by running these commands:
{{- if .Values.knative.enabled }}
export KSERVICE_URL=$(kubectl get ksvc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} -o jsonpath={.status.url})
echo "Visit $KSERVICE_URL to use your application"
{{- else if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "ollama.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "ollama.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "ollama.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,80 @@
{{/*
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
*/}}
{{- define "ollama.namespace" -}}
{{- if .Values.namespaceOverride -}}
{{- .Values.namespaceOverride -}}
{{- else -}}
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{/*
Expand the name of the chart.
*/}}
{{- define "ollama.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "ollama.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "ollama.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "ollama.labels" -}}
helm.sh/chart: {{ include "ollama.chart" . }}
{{ include "ollama.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "ollama.selectorLabels" -}}
app.kubernetes.io/name: {{ include "ollama.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "ollama.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "ollama.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
{{/*
Models mount path
*/}}
{{- define "ollama.modelsMountPath" -}}
{{- printf "%s/models" (((.Values).ollama).mountPath | default "/root/.ollama") }}
{{- end -}}

Some files were not shown because too many files have changed in this diff Show More