first backup of charts
This commit is contained in:
1
code/codeserver
Submodule
1
code/codeserver
Submodule
Submodule code/codeserver added at b59a4f7366
55
code/metadata.yaml
Normal file
55
code/metadata.yaml
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
application_name: &application_name code
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: codeserver/ci/helm-chart
|
||||
sets:
|
||||
image:
|
||||
repository: codercom/code-server
|
||||
tag: '4.103.2'
|
||||
pullPolicy: IfNotPresent
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 0
|
||||
nodeSelector:
|
||||
resource-group: gpu_5880
|
||||
service:
|
||||
type: NodePort
|
||||
port: 8080
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: ~
|
||||
port: 30083
|
||||
url: ~
|
||||
pod:
|
||||
name: *application_name
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: codeserver/ci/helm-chart
|
||||
sets:
|
||||
image:
|
||||
repository: codercom/code-server
|
||||
tag: '4.103.2'
|
||||
pullPolicy: IfNotPresent
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
nodeSelector:
|
||||
resource-group: gpu_5880
|
||||
service:
|
||||
type: NodePort
|
||||
port: 8080
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: ~
|
||||
port: 30083
|
||||
url: ~
|
||||
pod:
|
||||
name: *application_name
|
||||
47
edgetts/metadata.yaml
Normal file
47
edgetts/metadata.yaml
Normal file
@ -0,0 +1,47 @@
|
||||
|
||||
application_name: &application_name edgetts
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: test-tts
|
||||
sets:
|
||||
image:
|
||||
repository: travisvn/openai-edge-tts
|
||||
tag: "latest"
|
||||
pullPolicy: IfNotPresent
|
||||
service:
|
||||
type: NodePort
|
||||
port: 5050
|
||||
nodePort: 30250
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: ~
|
||||
port: 30250
|
||||
url: ~
|
||||
pod:
|
||||
name: *application_name
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: test-tts
|
||||
sets:
|
||||
image:
|
||||
repository: travisvn/openai-edge-tts
|
||||
tag: "latest"
|
||||
pullPolicy: IfNotPresent
|
||||
service:
|
||||
type: NodePort
|
||||
port: 5050
|
||||
nodePort: 30250
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: ~
|
||||
port: 30250
|
||||
url: ~
|
||||
pod:
|
||||
name: *application_name
|
||||
23
edgetts/test-tts/.helmignore
Normal file
23
edgetts/test-tts/.helmignore
Normal file
@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
24
edgetts/test-tts/Chart.yaml
Normal file
24
edgetts/test-tts/Chart.yaml
Normal file
@ -0,0 +1,24 @@
|
||||
apiVersion: v2
|
||||
name: test-tts
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
22
edgetts/test-tts/templates/NOTES.txt
Normal file
22
edgetts/test-tts/templates/NOTES.txt
Normal file
@ -0,0 +1,22 @@
|
||||
1. Get the application URL by running these commands:
|
||||
{{- if .Values.ingress.enabled }}
|
||||
{{- range $host := .Values.ingress.hosts }}
|
||||
{{- range .paths }}
|
||||
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- else if contains "NodePort" .Values.service.type }}
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "test-tts.fullname" . }})
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "test-tts.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "test-tts.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "test-tts.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
62
edgetts/test-tts/templates/_helpers.tpl
Normal file
62
edgetts/test-tts/templates/_helpers.tpl
Normal file
@ -0,0 +1,62 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "test-tts.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "test-tts.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "test-tts.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "test-tts.labels" -}}
|
||||
helm.sh/chart: {{ include "test-tts.chart" . }}
|
||||
{{ include "test-tts.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "test-tts.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "test-tts.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "test-tts.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "test-tts.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
78
edgetts/test-tts/templates/deployment.yaml
Normal file
78
edgetts/test-tts/templates/deployment.yaml
Normal file
@ -0,0 +1,78 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "test-tts.fullname" . }}
|
||||
labels:
|
||||
{{- include "test-tts.labels" . | nindent 4 }}
|
||||
spec:
|
||||
{{- if not .Values.autoscaling.enabled }}
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
{{- end }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "test-tts.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
{{- with .Values.podAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "test-tts.labels" . | nindent 8 }}
|
||||
{{- with .Values.podLabels }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "test-tts.serviceAccountName" . }}
|
||||
{{- with .Values.podSecurityContext }}
|
||||
securityContext:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
{{- with .Values.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{ .Values.service.port }}
|
||||
protocol: TCP
|
||||
{{- with .Values.livenessProbe }}
|
||||
livenessProbe:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.readinessProbe }}
|
||||
readinessProbe:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.resources }}
|
||||
resources:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.volumeMounts }}
|
||||
volumeMounts:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.volumes }}
|
||||
volumes:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
32
edgetts/test-tts/templates/hpa.yaml
Normal file
32
edgetts/test-tts/templates/hpa.yaml
Normal file
@ -0,0 +1,32 @@
|
||||
{{- if .Values.autoscaling.enabled }}
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: {{ include "test-tts.fullname" . }}
|
||||
labels:
|
||||
{{- include "test-tts.labels" . | nindent 4 }}
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: {{ include "test-tts.fullname" . }}
|
||||
minReplicas: {{ .Values.autoscaling.minReplicas }}
|
||||
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
|
||||
metrics:
|
||||
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
43
edgetts/test-tts/templates/ingress.yaml
Normal file
43
edgetts/test-tts/templates/ingress.yaml
Normal file
@ -0,0 +1,43 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "test-tts.fullname" . }}
|
||||
labels:
|
||||
{{- include "test-tts.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.ingress.className }}
|
||||
ingressClassName: {{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.ingress.tls }}
|
||||
tls:
|
||||
{{- range .Values.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
rules:
|
||||
{{- range .Values.ingress.hosts }}
|
||||
- host: {{ .host | quote }}
|
||||
http:
|
||||
paths:
|
||||
{{- range .paths }}
|
||||
- path: {{ .path }}
|
||||
{{- with .pathType }}
|
||||
pathType: {{ . }}
|
||||
{{- end }}
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "test-tts.fullname" $ }}
|
||||
port:
|
||||
number: {{ $.Values.service.port }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
16
edgetts/test-tts/templates/service.yaml
Normal file
16
edgetts/test-tts/templates/service.yaml
Normal file
@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "test-tts.fullname" . }}
|
||||
labels:
|
||||
{{- include "test-tts.labels" . | nindent 4 }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: http
|
||||
nodePort: {{ .Values.service.nodePort }}
|
||||
selector:
|
||||
{{- include "test-tts.selectorLabels" . | nindent 4 }}
|
||||
13
edgetts/test-tts/templates/serviceaccount.yaml
Normal file
13
edgetts/test-tts/templates/serviceaccount.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
{{- if .Values.serviceAccount.create -}}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "test-tts.serviceAccountName" . }}
|
||||
labels:
|
||||
{{- include "test-tts.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
||||
{{- end }}
|
||||
15
edgetts/test-tts/templates/tests/test-connection.yaml
Normal file
15
edgetts/test-tts/templates/tests/test-connection.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: "{{ include "test-tts.fullname" . }}-test-connection"
|
||||
labels:
|
||||
{{- include "test-tts.labels" . | nindent 4 }}
|
||||
annotations:
|
||||
"helm.sh/hook": test
|
||||
spec:
|
||||
containers:
|
||||
- name: wget
|
||||
image: busybox
|
||||
command: ['wget']
|
||||
args: ['{{ include "test-tts.fullname" . }}:{{ .Values.service.port }}']
|
||||
restartPolicy: Never
|
||||
124
edgetts/test-tts/values.yaml
Normal file
124
edgetts/test-tts/values.yaml
Normal file
@ -0,0 +1,124 @@
|
||||
# Default values for test-tts.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
|
||||
replicaCount: 1
|
||||
|
||||
# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
|
||||
image:
|
||||
repository: travisvn/openai-edge-tts
|
||||
# This sets the pull policy for images.
|
||||
pullPolicy: IfNotPresent
|
||||
# Overrides the image tag whose default is the chart appVersion.
|
||||
tag: "latest"
|
||||
|
||||
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
|
||||
imagePullSecrets: []
|
||||
# This is to override the chart name.
|
||||
nameOverride: "edgetts"
|
||||
fullnameOverride: ""
|
||||
|
||||
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created
|
||||
create: true
|
||||
# Automatically mount a ServiceAccount's API credentials?
|
||||
automount: true
|
||||
# Annotations to add to the service account
|
||||
annotations: {}
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name: ""
|
||||
|
||||
# This is for setting Kubernetes Annotations to a Pod.
|
||||
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
|
||||
podAnnotations: {}
|
||||
# This is for setting Kubernetes Labels to a Pod.
|
||||
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
|
||||
podLabels: {}
|
||||
|
||||
podSecurityContext: {}
|
||||
# fsGroup: 2000
|
||||
|
||||
securityContext: {}
|
||||
# capabilities:
|
||||
# drop:
|
||||
# - ALL
|
||||
# readOnlyRootFilesystem: true
|
||||
# runAsNonRoot: true
|
||||
# runAsUser: 1000
|
||||
|
||||
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
|
||||
service:
|
||||
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
|
||||
type: NodePort
|
||||
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
|
||||
port: 5050
|
||||
nodePort: 30250
|
||||
|
||||
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
hosts:
|
||||
- host: chart-example.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
resources: {}
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
||||
#livenessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: http
|
||||
#readinessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: http
|
||||
|
||||
# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
|
||||
autoscaling:
|
||||
enabled: false
|
||||
minReplicas: 1
|
||||
maxReplicas: 100
|
||||
targetCPUUtilizationPercentage: 80
|
||||
# targetMemoryUtilizationPercentage: 80
|
||||
|
||||
# Additional volumes on the output Deployment definition.
|
||||
volumes: []
|
||||
# - name: foo
|
||||
# secret:
|
||||
# secretName: mysecret
|
||||
# optional: false
|
||||
|
||||
# Additional volumeMounts on the output Deployment definition.
|
||||
volumeMounts: []
|
||||
# - name: foo
|
||||
# mountPath: "/etc/foo"
|
||||
# readOnly: true
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
23
jarvis/jarvis/.helmignore
Normal file
23
jarvis/jarvis/.helmignore
Normal file
@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
24
jarvis/jarvis/Chart.yaml
Normal file
24
jarvis/jarvis/Chart.yaml
Normal file
@ -0,0 +1,24 @@
|
||||
apiVersion: v2
|
||||
name: jarvis
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
22
jarvis/jarvis/templates/NOTES.txt
Normal file
22
jarvis/jarvis/templates/NOTES.txt
Normal file
@ -0,0 +1,22 @@
|
||||
1. Get the application URL by running these commands:
|
||||
{{- if .Values.ingress.enabled }}
|
||||
{{- range $host := .Values.ingress.hosts }}
|
||||
{{- range .paths }}
|
||||
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- else if contains "NodePort" .Values.service.type }}
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
62
jarvis/jarvis/templates/_helpers.tpl
Normal file
62
jarvis/jarvis/templates/_helpers.tpl
Normal file
@ -0,0 +1,62 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "jarvis.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "jarvis.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "jarvis.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "jarvis.labels" -}}
|
||||
helm.sh/chart: {{ include "jarvis.chart" . }}
|
||||
{{ include "jarvis.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "jarvis.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "jarvis.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "jarvis.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
17
jarvis/jarvis/templates/images-pv.yaml
Normal file
17
jarvis/jarvis/templates/images-pv.yaml
Normal file
@ -0,0 +1,17 @@
|
||||
# pv.yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-pv # PV 的名称,可以自定义
|
||||
spec:
|
||||
storageClassName: local-path # 添加这一行,与上面StorageClass的名称一致
|
||||
capacity:
|
||||
storage: 500Gi # PV 的容量,可以根据 NFS 共享的实际大小或预期使用量调整
|
||||
accessModes:
|
||||
- ReadWriteMany # 访问模式:
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
|
||||
nfs:
|
||||
path: /volume1/Dataset/PVStore/lab-data-dataset-pvc-ec4aba12-c683-4168-b335-7b1a8819581a/Private/cache-images # NFS 服务器上共享的路径
|
||||
server: 10.6.80.11 # NFS 服务器的 IP 地址或主机名
|
||||
|
||||
14
jarvis/jarvis/templates/images-pvc.yaml
Normal file
14
jarvis/jarvis/templates/images-pvc.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
# pvc.yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-pvc # PVC 的名称,Pod 会引用这个名称
|
||||
#namespace: default # PVC 所在的命名空间,通常是 default 或你自定义的命名空间
|
||||
spec:
|
||||
storageClassName: local-path # 添加这一行,与PV和StorageClass的名称一致
|
||||
accessModes:
|
||||
- ReadWriteMany # 访问模式,必须与 PV 的 accessModes 匹配或更宽松
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi # PVC 请求的存储容量,必须小于或等于 PV 的容量
|
||||
volumeName: {{ .Release.Name }}-pv # 明确指定要绑定的 PV 的名称,这是手动绑定 PV 的关键
|
||||
68
jarvis/jarvis/templates/jarvis-adapter.yaml
Normal file
68
jarvis/jarvis/templates/jarvis-adapter.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: jarvis-adapter-deployment
|
||||
# namespace: jarvis-models
|
||||
labels:
|
||||
app: jarvis-adapter
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: jarvis-adapter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: jarvis-adapter
|
||||
spec:
|
||||
#hostNetwork: true
|
||||
# --- START: Add this section for image pull secrets ---
|
||||
imagePullSecrets:
|
||||
- name: regcred # This MUST match the name of the secret you just created
|
||||
# --- END: Add this section ---
|
||||
containers:
|
||||
- name: jarvis-adapter
|
||||
image: {{ .Values.jarvis_adapter.image }}
|
||||
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
|
||||
env:
|
||||
- name: INFERENCE_ENDPOINT
|
||||
value: {{ .Values.jarvis_adapter.endpoint }}
|
||||
ports:
|
||||
- containerPort: 5000 # The port your application listens on inside the container
|
||||
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
|
||||
protocol: TCP
|
||||
resources: # Add this section
|
||||
requests:
|
||||
cpu: 100m # Example: 100 millicores (0.1 CPU)
|
||||
memory: 256Mi # Example: 128 mebibytes
|
||||
limits:
|
||||
cpu: 500m # Example: Limit to 500 millicores (0.5 CPU)
|
||||
memory: 512Mi # Example: Limit to 512 mebibytes
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-blackbox
|
||||
# namespace: jarvis-models
|
||||
labels:
|
||||
app: jarvis-adapter
|
||||
spec:
|
||||
selector:
|
||||
app: jarvis-adapter
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 80
|
||||
targetPort: 5000
|
||||
type: NodePort
|
||||
85
jarvis/jarvis/templates/jarvis-api.yaml
Normal file
85
jarvis/jarvis/templates/jarvis-api.yaml
Normal file
@ -0,0 +1,85 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: jarvis-api-deployment
|
||||
# namespace: jarvis-models
|
||||
labels:
|
||||
app: jarvis-api
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: jarvis-api
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: jarvis-api
|
||||
spec:
|
||||
#hostNetwork: true
|
||||
# --- START: Add this section for image pull secrets ---
|
||||
imagePullSecrets:
|
||||
- name: regcred # This MUST match the name of the secret you just created
|
||||
# --- END: Add this section ---
|
||||
containers:
|
||||
- name: jarvis-api
|
||||
image: {{ .Values.jarvis_api.image }}
|
||||
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
|
||||
ports:
|
||||
- containerPort: 8080 # The port your application listens on inside the container
|
||||
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
|
||||
protocol: TCP
|
||||
resources: # Add this section
|
||||
requests:
|
||||
cpu: 100m # Example: 100 millicores (0.1 CPU)
|
||||
memory: 256Mi # Example: 128 mebibytes
|
||||
limits:
|
||||
cpu: 500m # Example: Limit to 500 millicores (0.5 CPU)
|
||||
memory: 512Mi # Example: Limit to 512 mebibytes
|
||||
volumeMounts:
|
||||
- name: env-config-volume
|
||||
mountPath: /.env.yml
|
||||
subPath: .env.yml
|
||||
readOnly: true
|
||||
- name: images-data
|
||||
mountPath: /images
|
||||
volumes:
|
||||
- name: env-config-volume
|
||||
#hostPath:
|
||||
# path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
|
||||
# type: FileOrCreate
|
||||
configMap:
|
||||
name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
|
||||
items:
|
||||
- key: .api.env.yml # This is the key defined in the ConfigMap's data section
|
||||
path: .env.yml # This is the filename inside the mountPath (e.g., /.env.yml)
|
||||
- name: images-data
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Release.Name }}-pvc
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-api-service
|
||||
# namespace: jarvis-models
|
||||
labels:
|
||||
app: jarvis-api
|
||||
spec:
|
||||
selector:
|
||||
app: jarvis-api
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
type: NodePort
|
||||
183
jarvis/jarvis/templates/jarvis-configmap.yaml
Normal file
183
jarvis/jarvis/templates/jarvis-configmap.yaml
Normal file
@ -0,0 +1,183 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-cm # Name of your ConfigMap
|
||||
# namespace: jarvis-models # Ensure this matches your Deployment's namespace
|
||||
data:
|
||||
.api.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
|
||||
database:
|
||||
host: 10.6.14.130
|
||||
port: 3306
|
||||
username: jarvis
|
||||
password: boardwarejarvis
|
||||
database: jarvis
|
||||
jwt:
|
||||
secret: secretkey
|
||||
users:
|
||||
- username: jarvis
|
||||
password: boardwarejarvis
|
||||
- username: user
|
||||
password: boardwareuser
|
||||
- username: g2e
|
||||
password: g2e
|
||||
- username: vera
|
||||
password: vera
|
||||
- username: ivan
|
||||
password: ivan
|
||||
blackbox:
|
||||
mode: 0 # 0: /?blackbox=models, 1: /models-blackbox
|
||||
url: http://jarvis-model-service
|
||||
port: 8080
|
||||
|
||||
vad:
|
||||
url: http://vad-blackbox
|
||||
asr:
|
||||
url: http://asr-blackbox
|
||||
llm:
|
||||
url: http://llm-blackbox
|
||||
vlm:
|
||||
url: http://vlm-blackbox
|
||||
tts:
|
||||
url: http://tts-blackbox
|
||||
chatllama:
|
||||
url: http://chatllama-blackbox
|
||||
chroma:
|
||||
upsert_url: http://chroma-blackbox/upsert
|
||||
|
||||
|
||||
env: dev
|
||||
authentik:
|
||||
redirectUri: http://10.6.14.130:4200
|
||||
baseUrl: https://authentik.universalmacro.com
|
||||
clientId: xxx
|
||||
clientSecret: xxx
|
||||
server:
|
||||
port: 8080
|
||||
|
||||
# log:
|
||||
# path: "/Workspace/Logging/logtime.out"
|
||||
|
||||
log:
|
||||
loki:
|
||||
# url: "https://103.192.46.20:27002/laas/1868865592451137536/loki/api/v1/push"
|
||||
url: "https://loki.bwgdi.com/loki/api/v1/push"
|
||||
x-odin-auth: "log_m7uxtqtru2318hbaoonf9wgjy8chcnebhwhl0wncsvfctu2ppn9m53q6p3i3"
|
||||
labels:
|
||||
app: jarvis
|
||||
env: dev
|
||||
location: "k3s_gdi"
|
||||
|
||||
model:
|
||||
tts:
|
||||
url: http://10.6.14.130:8000/?blackbox_name=tts
|
||||
tts_model_name: melotts
|
||||
tts_stream: false
|
||||
streaming:
|
||||
url: http://10.6.14.130:8000/?blackbox_name=chat
|
||||
vlms_url: http://10.6.14.130:8000/?blackbox_name=vlms
|
||||
.models.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
|
||||
env:
|
||||
version: 0.0.1
|
||||
host: 0.0.0.0
|
||||
port: 8000
|
||||
|
||||
log:
|
||||
level: debug
|
||||
time_format: "%Y-%m-%d %H:%M:%S"
|
||||
filename: "./jarvis-models.log"
|
||||
|
||||
loki:
|
||||
url: "https://loki.bwgdi.com/loki/api/v1/push"
|
||||
labels:
|
||||
app: jarvis
|
||||
env: dev
|
||||
location: "k3s_gdi"
|
||||
layer: models
|
||||
|
||||
melotts:
|
||||
mode: local # or docker
|
||||
url: http://10.6.44.141:18080/convert/tts
|
||||
speed: 0.9
|
||||
device: 'cuda:0'
|
||||
language: 'ZH'
|
||||
speaker: 'ZH'
|
||||
|
||||
cosyvoicetts:
|
||||
mode: local # or docker
|
||||
url: http://10.6.44.141:18080/convert/tts
|
||||
speed: 0.9
|
||||
device: 'cuda:0'
|
||||
language: '粤语女'
|
||||
speaker: 'ZH'
|
||||
|
||||
sovitstts:
|
||||
mode: docker
|
||||
url: http://10.6.80.90:9880/tts
|
||||
speed: 0.9
|
||||
device: 'cuda:0'
|
||||
language: 'ZH'
|
||||
speaker: 'ZH'
|
||||
text_lang: "yue"
|
||||
ref_audio_path: "output/slicer_opt/Ricky-Wong/Ricky-Wong-3-Mins.wav_0006003840_0006134080.wav"
|
||||
prompt_lang: "yue"
|
||||
prompt_text: "你失敗咗點算啊?你而家安安穩穩,點解要咁樣做呢?"
|
||||
text_split_method: "cut5"
|
||||
batch_size: 1
|
||||
media_type: "wav"
|
||||
streaming_mode: True
|
||||
|
||||
sensevoiceasr:
|
||||
mode: local # or docker
|
||||
url: http://10.6.44.141:18080/convert/tts
|
||||
speed: 0.9
|
||||
device: 'cuda:0'
|
||||
language: '粤语女'
|
||||
speaker: 'ZH'
|
||||
|
||||
tesou:
|
||||
url: http://120.196.116.194:48891/chat/
|
||||
|
||||
TokenIDConverter:
|
||||
token_path: src/asr/resources/models/token_list.pkl
|
||||
unk_symbol: <unk>
|
||||
|
||||
CharTokenizer:
|
||||
symbol_value:
|
||||
space_symbol: <space>
|
||||
remove_non_linguistic_symbols: false
|
||||
|
||||
WavFrontend:
|
||||
cmvn_file: src/asr/resources/models/am.mvn
|
||||
frontend_conf:
|
||||
fs: 16000
|
||||
window: hamming
|
||||
n_mels: 80
|
||||
frame_length: 25
|
||||
frame_shift: 10
|
||||
lfr_m: 7
|
||||
lfr_n: 6
|
||||
filter_length_max: -.inf
|
||||
dither: 0.0
|
||||
|
||||
Model:
|
||||
model_path: src/asr/resources/models/model.onnx
|
||||
use_cuda: false
|
||||
CUDAExecutionProvider:
|
||||
device_id: 0
|
||||
arena_extend_strategy: kNextPowerOfTwo
|
||||
cudnn_conv_algo_search: EXHAUSTIVE
|
||||
do_copy_in_default_stream: true
|
||||
batch_size: 3
|
||||
blackbox:
|
||||
lazyloading: true
|
||||
|
||||
vlms:
|
||||
urls:
|
||||
qwen_vl: http://vl-svc
|
||||
vlm: http://vl-svc:8080
|
||||
|
||||
path:
|
||||
chroma_rerank_embedding_model: /Model/BAAI
|
||||
cosyvoice_path: /Voice/CosyVoice
|
||||
cosyvoice_model_path: /Voice/CosyVoice/pretrained_models
|
||||
sensevoice_model_path: /Voice/SenseVoice/SenseVoiceSmall
|
||||
96
jarvis/jarvis/templates/jarvis-models.yaml
Normal file
96
jarvis/jarvis/templates/jarvis-models.yaml
Normal file
@ -0,0 +1,96 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: jarvis-model-deployment
|
||||
# namespace: jarvis-models
|
||||
labels:
|
||||
app: jarvis-model
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: jarvis-model
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: jarvis-model
|
||||
spec:
|
||||
#hostNetwork: true
|
||||
# --- START: Add this section for image pull secrets ---
|
||||
imagePullSecrets:
|
||||
- name: regcred # This MUST match the name of the secret you just created
|
||||
# --- END: Add this section ---
|
||||
runtimeClassName: nvidia
|
||||
containers:
|
||||
- name: jarvis-model
|
||||
image: {{ .Values.jarvis_model.image }}
|
||||
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
|
||||
# command: ["sleep", "infinity"]
|
||||
ports:
|
||||
- containerPort: 8000 # The port your application listens on inside the container
|
||||
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
|
||||
protocol: TCP
|
||||
resources: # Add this section
|
||||
requests:
|
||||
cpu: 1 # Example: 100 millicores (0.1 CPU)
|
||||
memory: 512Mi # Example: 128 mebibytes
|
||||
limits:
|
||||
cpu: 2 # Example: Limit to 500 millicores (0.5 CPU)
|
||||
memory: 1Gi # Example: Limit to 512 mebibytes
|
||||
nvidia.com/gpu: 1
|
||||
volumeMounts:
|
||||
- name: env-config-volume
|
||||
mountPath: /jarvis-models/.env.yaml
|
||||
subPath: .env.yaml
|
||||
readOnly: true
|
||||
- name: nfs-volume
|
||||
subPath: Weight
|
||||
mountPath: /Model
|
||||
- name: nfs-volume
|
||||
subPath: Voice
|
||||
mountPath: /Voice
|
||||
volumes:
|
||||
- name: env-config-volume
|
||||
#hostPath:
|
||||
# path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
|
||||
# type: FileOrCreate
|
||||
configMap:
|
||||
name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
|
||||
items:
|
||||
- key: .models.env.yml # This is the key defined in the ConfigMap's data section
|
||||
path: .env.yaml # This is the filename inside the mountPath (e.g., /.env.yml)
|
||||
- name: nfs-volume
|
||||
nfs :
|
||||
server: "10.6.80.11"
|
||||
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
|
||||
#{{- range .Values.volumes }}
|
||||
#- {{ . | toYaml | nindent 10 | trim }}
|
||||
#{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: jarvis-model-service
|
||||
# namespace: jarvis-models
|
||||
labels:
|
||||
app: jarvis-model
|
||||
spec:
|
||||
selector:
|
||||
app: jarvis-model
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8000
|
||||
type: NodePort
|
||||
87
jarvis/jarvis/values.yaml
Normal file
87
jarvis/jarvis/values.yaml
Normal file
@ -0,0 +1,87 @@
|
||||
# Default values for jarvis.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
|
||||
replicaCount: 1
|
||||
|
||||
|
||||
jarvis_api:
|
||||
image: harbor.bwgdi.com/library/jarvis-api:1.0.9
|
||||
|
||||
jarvis_model:
|
||||
image: harbor.bwgdi.com/library/jarvis-models:0.0.1
|
||||
|
||||
jarvis_adapter:
|
||||
image: harbor.bwgdi.com/library/adapter-test:0.0.1
|
||||
endpoint: "http://vllm-leader-nodeport:8080"
|
||||
|
||||
resources: {}
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
|
||||
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
|
||||
service:
|
||||
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
|
||||
type: ClusterIP
|
||||
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
|
||||
port: 80
|
||||
|
||||
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
hosts:
|
||||
- host: chart-example.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
# Additional volumes on the output Deployment definition.
|
||||
volumes:
|
||||
- name: nfs-volume
|
||||
nfs:
|
||||
server: "10.6.80.11"
|
||||
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
|
||||
|
||||
# - name: foo
|
||||
# secret:
|
||||
# secretName: mysecret
|
||||
# optional: false
|
||||
|
||||
# Additional volumeMounts on the output Deployment definition.
|
||||
volumeMounts: []
|
||||
# - name: foo
|
||||
# mountPath: "/etc/foo"
|
||||
# readOnly: true
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
43
jarvis/metadata.yaml
Normal file
43
jarvis/metadata.yaml
Normal file
@ -0,0 +1,43 @@
|
||||
|
||||
application_name: &application_name jarvis
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: jarvis
|
||||
sets:
|
||||
jarvis_api:
|
||||
image: harbor.bwgdi.com/library/jarvis-api:1.0.9
|
||||
jarvis_adapter:
|
||||
image: harbor.bwgdi.com/library/adapter-test:0.0.1
|
||||
endpoint: "http://vllm-leader-nodeport:8080"
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: jarvis-api-service
|
||||
port: 30083
|
||||
url: ~
|
||||
pod:
|
||||
name: jarvis-
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: jarvis
|
||||
sets:
|
||||
jarvis_api:
|
||||
image: harbor.bwgdi.com/library/jarvis-api:1.0.9
|
||||
jarvis_adapter:
|
||||
image: harbor.bwgdi.com/library/adapter-test:0.0.1
|
||||
endpoint: "http://vllm-leader-nodeport:8080"
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: jarvis-api-service
|
||||
port: 30083
|
||||
url: ~
|
||||
pod:
|
||||
name: jarvis-
|
||||
17
jupyter/jupyter/Chart.yaml
Normal file
17
jupyter/jupyter/Chart.yaml
Normal file
@ -0,0 +1,17 @@
|
||||
apiVersion: v1
|
||||
appVersion: 6.0.3
|
||||
description: Helm for jupyter single server with pyspark support
|
||||
home: https://jupyter.org
|
||||
icon: https://jupyter.org/assets/main-logo.svg
|
||||
keywords:
|
||||
- jupyter
|
||||
- notebook
|
||||
- spark
|
||||
maintainers:
|
||||
- email: cgiraldo@gradiant.org
|
||||
name: cgiraldo
|
||||
name: jupyter
|
||||
sources:
|
||||
- https://github.com/gradiant/charts
|
||||
- https://github.com/astrobounce/helm-jupyter
|
||||
version: 0.1.6
|
||||
34
jupyter/jupyter/README.md
Normal file
34
jupyter/jupyter/README.md
Normal file
@ -0,0 +1,34 @@
|
||||
jupyter
|
||||
=======
|
||||
Helm for jupyter single server with pyspark support.
|
||||
For jupyterhub chart see [zero-to-jupyterhub](https://zero-to-jupyterhub.readthedocs.io/en/latest/).
|
||||
|
||||
Current chart version is `0.1.0`
|
||||
|
||||
Source code can be found [here]((https://github.com/gradiant/charts/charts/jupyter)
|
||||
|
||||
|
||||
## Chart Values
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| affinity | object | `{}` | |
|
||||
| image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| image.repository | string | `"gradiant/jupyter"` | |
|
||||
| image.tag | string | `"6.0.1"` | |
|
||||
| ingress.annotations | object | `{}` | |
|
||||
| ingress.enabled | bool | `false` | |
|
||||
| ingress.hosts[0] | string | `"jupyter.127-0-0-1.nip"` | |
|
||||
| ingress.path | string | `"/"` | |
|
||||
| ingress.tls | list | `[]` | |
|
||||
| lab | bool | `true` | |
|
||||
| nodeSelector | object | `{}` | |
|
||||
| persistence.accessMode | string | `"ReadWriteOnce"` | |
|
||||
| persistence.enabled | bool | `true` | |
|
||||
| persistence.size | string | `"50Gi"` | |
|
||||
| persistence.storageClass | string | `nil` | |
|
||||
| resources | object | `{}` | |
|
||||
| service.externalPort | int | `8888` | |
|
||||
| service.nodePort.http | string | `nil` | |
|
||||
| service.type | string | `"ClusterIP"` | |
|
||||
| tolerations | list | `[]` | |
|
||||
23
jupyter/jupyter/templates/NOTES.txt
Normal file
23
jupyter/jupyter/templates/NOTES.txt
Normal file
@ -0,0 +1,23 @@
|
||||
1. Get access token from jupyter server log:
|
||||
kubectl logs -f -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }}
|
||||
|
||||
1. Create a port-forward to the jupyter:
|
||||
kubectl port-forward -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }} 8888:{{ .Values.service.externalPort }}
|
||||
|
||||
Then open the ui in your browser and use the access token:
|
||||
open http://localhost:88888
|
||||
|
||||
If you set up your own password, remember to restart jupyter server to update the configuration.
|
||||
File -> Shut Down
|
||||
|
||||
{{- if .Values.ingress.enabled }}
|
||||
Ingress is enabled:
|
||||
{{- range .Values.ingress.tls }}
|
||||
{{- range .hosts }}
|
||||
open https://{{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- range .Values.ingress.hosts }}
|
||||
open http://{{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
32
jupyter/jupyter/templates/_helpers.tpl
Normal file
32
jupyter/jupyter/templates/_helpers.tpl
Normal file
@ -0,0 +1,32 @@
|
||||
{{/* vim: set filetype=mustache: */}}
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "jupyter.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
*/}}
|
||||
{{- define "jupyter.fullname" -}}
|
||||
{{- if .Values.fullnameOverride -}}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Standard Labels from Helm documentation https://helm.sh/docs/chart_best_practices/#labels-and-annotations
|
||||
*/}}
|
||||
|
||||
{{- define "jupyter.labels" -}}
|
||||
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
app.kubernetes.io/part-of: {{ .Chart.Name }}
|
||||
{{- end -}}
|
||||
36
jupyter/jupyter/templates/git-notebooks-configmap.yaml
Normal file
36
jupyter/jupyter/templates/git-notebooks-configmap.yaml
Normal file
@ -0,0 +1,36 @@
|
||||
{{- if .Values.gitNotebooks }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "jupyter.fullname" . }}-git-notebooks
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||
{{- include "jupyter.labels" . | nindent 4 }}
|
||||
data:
|
||||
git-notebooks.sh: |-
|
||||
#!/bin/sh
|
||||
set -x
|
||||
cd /home/jovyan
|
||||
{{- if .Values.gitNotebooks.secretName }}
|
||||
cp -r /tmp/.ssh /root/
|
||||
chmod 600 /root/.ssh/*
|
||||
{{- else }}
|
||||
mkdir /root/.ssh
|
||||
{{- end }}
|
||||
echo "Loading notebooks from git repo"
|
||||
{{- range .Values.gitNotebooks.repos }}
|
||||
if [ ! -d "/home/jovyan/{{ .name }}" ]
|
||||
then
|
||||
echo "Cloning {{ .name }} notebook repository"
|
||||
{{- if or (hasPrefix "git" .repo) (hasPrefix "ssh" .repo) }}
|
||||
ssh-keyscan {{ .repo | regexFind "@([a-zA-Z0-9.]*)" | replace "@" "" }} >> ~/.ssh/known_hosts
|
||||
{{- end }}
|
||||
git clone {{ .repo }} {{ .name }}
|
||||
else
|
||||
echo "{{ .name }} notebook repository already cloned"
|
||||
fi
|
||||
{{- end }}
|
||||
# exit code 0 to continue deployment even if git clone fails
|
||||
exit 0
|
||||
|
||||
{{- end }}
|
||||
39
jupyter/jupyter/templates/ingress.yaml
Normal file
39
jupyter/jupyter/templates/ingress.yaml
Normal file
@ -0,0 +1,39 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
{{- $fullName := include "jupyter.fullname" . -}}
|
||||
{{- $ingressPath := .Values.ingress.path -}}
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ $fullName }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||
{{- include "jupyter.labels" . | nindent 4 }}
|
||||
{{- if .Values.ingress.labels }}
|
||||
{{ toYaml .Values.ingress.labels | indent 4 }}
|
||||
{{- end }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{ toYaml . | indent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if .Values.ingress.tls }}
|
||||
tls:
|
||||
{{- range .Values.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
rules:
|
||||
{{- range .Values.ingress.hosts }}
|
||||
- host: {{ . }}
|
||||
http:
|
||||
paths:
|
||||
- path: {{ $ingressPath }}
|
||||
backend:
|
||||
serviceName: {{ $fullName }}
|
||||
servicePort: web
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
20
jupyter/jupyter/templates/service.yaml
Normal file
20
jupyter/jupyter/templates/service.yaml
Normal file
@ -0,0 +1,20 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "jupyter.fullname" . }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||
{{- include "jupyter.labels" . | nindent 4 }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
selector:
|
||||
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
ports:
|
||||
- name: web
|
||||
protocol: TCP
|
||||
port: {{ .Values.service.externalPort | default 8888 }}
|
||||
{{- if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort.http))) }}
|
||||
nodePort: {{ .Values.service.nodePort.http }}
|
||||
{{- end }}
|
||||
targetPort: 8888
|
||||
118
jupyter/jupyter/templates/statefulset.yaml
Normal file
118
jupyter/jupyter/templates/statefulset.yaml
Normal file
@ -0,0 +1,118 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: {{ include "jupyter.fullname" . }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||
{{- include "jupyter.labels" . | nindent 4 }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||
serviceName: {{ include "jupyter.fullname" . }}
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||
{{- include "jupyter.labels" . | nindent 8}}
|
||||
spec:
|
||||
{{- if .Values.gitNotebooks }}
|
||||
initContainers:
|
||||
- name: git-notebooks
|
||||
image: alpine/git
|
||||
command:
|
||||
- /bin/bash
|
||||
- /git-notebooks.sh
|
||||
volumeMounts:
|
||||
- name: git-notebooks
|
||||
mountPath: /git-notebooks.sh
|
||||
subPath: git-notebooks.sh
|
||||
- name: jupyter
|
||||
mountPath: /home/jovyan
|
||||
{{- if .Values.gitNotebooks.secretName }}
|
||||
- name: git-secret
|
||||
mountPath: "/tmp/.ssh"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: jupyter
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy | quote }}
|
||||
env:
|
||||
- name: JUPYTER_ENABLE_LAB
|
||||
value: "{{ .Values.lab }}"
|
||||
- name: JPY_USER
|
||||
value: "jovyan"
|
||||
args:
|
||||
- start-notebook.sh
|
||||
- --ip=0.0.0.0
|
||||
- --user="jovyan"
|
||||
ports:
|
||||
- name: web
|
||||
containerPort: 8888
|
||||
protocol: TCP
|
||||
resources:
|
||||
{{ toYaml .Values.resources | indent 10 }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 8888
|
||||
initialDelaySeconds: 60
|
||||
timeoutSeconds: 15
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 8888
|
||||
initialDelaySeconds: 60
|
||||
timeoutSeconds: 15
|
||||
volumeMounts:
|
||||
- name: jupyter
|
||||
mountPath: /home/jovyan
|
||||
volumes:
|
||||
{{- if .Values.gitNotebooks }}
|
||||
- name: git-notebooks
|
||||
configMap:
|
||||
name: {{ include "jupyter.fullname" . }}-git-notebooks
|
||||
{{- if .Values.gitNotebooks.secretName }}
|
||||
- name: git-secret
|
||||
secret:
|
||||
secretName: {{ .Values.gitNotebooks.secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if not .Values.persistence.enabled }}
|
||||
- name: jupyter
|
||||
emptyDir: {}
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.persistence.enabled }}
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: jupyter
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
resources:
|
||||
requests:
|
||||
storage: "{{ .Values.persistence.size }}"
|
||||
{{- if .Values.persistence.storageClass }}
|
||||
{{- if (eq "-" .Values.persistence.storageClass) }}
|
||||
storageClassName: ""
|
||||
{{- else }}
|
||||
storageClassName: "{{ .Values.persistence.storageClass }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
64
jupyter/jupyter/values.yaml
Normal file
64
jupyter/jupyter/values.yaml
Normal file
@ -0,0 +1,64 @@
|
||||
# Default values for jupyter.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
image:
|
||||
repository: gradiant/jupyter
|
||||
tag: 6.0.3
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
lab: true
|
||||
|
||||
#gitNotebooks:
|
||||
# secretName: the name of the secret with ssh keys
|
||||
# repos:
|
||||
# - name: gradiant
|
||||
# repo: https://github.com/Gradiant/notebooks.git
|
||||
# - name: grad-git
|
||||
# repo: git@github.com:Gradiant/notebooks.git
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
externalPort: 8888
|
||||
nodePort:
|
||||
http:
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass:
|
||||
accessMode: ReadWriteOnce
|
||||
size: 50Gi
|
||||
|
||||
## Ingress configuration
|
||||
## Ref: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||
##
|
||||
ingress:
|
||||
enabled: false
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
path: /
|
||||
hosts:
|
||||
- jupyter.127-0-0-1.nip.io
|
||||
tls: []
|
||||
# - secretName: jupyter-tls
|
||||
# hosts:
|
||||
# - jupyter.local
|
||||
|
||||
resources: {}
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
55
jupyter/metadata.yaml
Normal file
55
jupyter/metadata.yaml
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
application_name: &application_name jupyter
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: jupyter
|
||||
sets:
|
||||
image:
|
||||
repository: gradiant/jupyter
|
||||
tag: 6.0.3
|
||||
pullPolicy: IfNotPresent
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 0
|
||||
nodeSelector:
|
||||
resource-group: gpu_5880
|
||||
service:
|
||||
type: NodePort
|
||||
nodePort:
|
||||
http: 30888
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: ~
|
||||
port: 30888
|
||||
url: ~
|
||||
pod:
|
||||
name: jupyter-
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: jupyter
|
||||
sets:
|
||||
image:
|
||||
repository: gradiant/jupyter
|
||||
tag: 6.0.3
|
||||
pullPolicy: IfNotPresent
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 0
|
||||
nodeSelector:
|
||||
resource-group: gpu_5880
|
||||
service:
|
||||
type: NodePort
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: ~
|
||||
port: 30888
|
||||
url: ~
|
||||
pod:
|
||||
name: jupyter-
|
||||
23
llama-factory/llama-factory/.helmignore
Normal file
23
llama-factory/llama-factory/.helmignore
Normal file
@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
25
llama-factory/llama-factory/Chart.yaml
Normal file
25
llama-factory/llama-factory/Chart.yaml
Normal file
@ -0,0 +1,25 @@
|
||||
apiVersion: v2
|
||||
name: Llama-factory
|
||||
description: A Helm chart for deploying vLLM with NFS storage
|
||||
annotations:
|
||||
"helm.sh/resource-policy": keep # 防止资源被意外删除
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
159
llama-factory/llama-factory/templates/llama.yaml
Normal file
159
llama-factory/llama-factory/templates/llama.yaml
Normal file
@ -0,0 +1,159 @@
|
||||
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama-factory") }}
|
||||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||
kind: LeaderWorkerSet
|
||||
metadata:
|
||||
name: llamafactory
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ .Values.workerSize }}
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
role: leader
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
ls $DEST_DIR
|
||||
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: llama-leader
|
||||
image: {{ .Values.llama.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name : USE_RAY
|
||||
value: "1"
|
||||
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
# value: "ray"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "llamafactory-cli webui"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 7860
|
||||
name: http
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
workerTemplate:
|
||||
spec:
|
||||
containers:
|
||||
- name: llama-worker
|
||||
image: {{ .Values.llama.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "echo $(LWS_LEADER_ADDRESS);
|
||||
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
env:
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
# value: "ray"
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
{{- end }}
|
||||
14
llama-factory/llama-factory/templates/nfs-pv.yaml
Normal file
14
llama-factory/llama-factory/templates/nfs-pv.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: {{ .Values.app }}-pv-model
|
||||
spec:
|
||||
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||
capacity:
|
||||
storage: {{ .Values.nfs.pvSize }}
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
nfs:
|
||||
path: {{ .Values.nfs.path }}
|
||||
server: {{ .Values.nfs.server }}
|
||||
12
llama-factory/llama-factory/templates/nfs-pvc.yaml
Normal file
12
llama-factory/llama-factory/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ .Values.app }}-pvc-model
|
||||
annotations:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.nfs.pvcSize }}
|
||||
volumeName: {{ .Values.app }}-pv-model
|
||||
33
llama-factory/llama-factory/templates/services.yaml
Normal file
33
llama-factory/llama-factory/templates/services.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
#apiVersion: v1
|
||||
#kind: Service
|
||||
#metadata:
|
||||
# name: infer-leader-loadbalancer
|
||||
#spec:
|
||||
# type: LoadBalancer
|
||||
# selector:
|
||||
# leaderworkerset.sigs.k8s.io/name: infer
|
||||
# role: leader
|
||||
# ports:
|
||||
# - protocol: TCP
|
||||
# port: 8080
|
||||
# targetPort: 8080
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ .Values.app }}-leader-nodeport
|
||||
spec:
|
||||
type: NodePort
|
||||
{{- if gt (int .Values.workerSize) 1 }}
|
||||
selector:
|
||||
leaderworkerset.sigs.k8s.io/name: llamafactory
|
||||
role: leader
|
||||
{{- else }}
|
||||
selector:
|
||||
app: llama-factory
|
||||
{{- end }}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 7860
|
||||
51
llama-factory/llama-factory/templates/single.yaml
Normal file
51
llama-factory/llama-factory/templates/single.yaml
Normal file
@ -0,0 +1,51 @@
|
||||
{{- if eq (int .Values.workerSize) 1 }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ .Release.Name }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llama-factory
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: llama-factory
|
||||
spec:
|
||||
containers:
|
||||
- name: llama-factory
|
||||
image: {{ .Values.llama.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.vllm.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "llamafactory-cli webui"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 7860
|
||||
name: http
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app}}-pvc-model
|
||||
{{- end }}
|
||||
44
llama-factory/llama-factory/values.yaml
Normal file
44
llama-factory/llama-factory/values.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
# Default values for vllm-app.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# 模型配置
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
|
||||
localMountPath: "/Model" # PVC 固定挂载路径
|
||||
huggingfaceToken: "<your-hf-token>"
|
||||
download:
|
||||
enabled: false # 启用自动下载
|
||||
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
|
||||
|
||||
# 功能选择
|
||||
app: "llama-factory"
|
||||
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 8
|
||||
memoryLimit: "16Gi"
|
||||
shmSize: "20Gi"
|
||||
|
||||
|
||||
llama:
|
||||
image: "docker.io/library/one-click:v1"
|
||||
|
||||
|
||||
# NFS PV/PVC 配置
|
||||
nfs:
|
||||
server: "10.6.80.11"
|
||||
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||
storageClass: "local-path"
|
||||
pvSize: "500Gi"
|
||||
pvcSize: "50Gi"
|
||||
|
||||
# LeaderWorkerSet 配置
|
||||
replicaCount: 1
|
||||
workerSize: 2
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
53
llama-factory/metadata.yaml
Normal file
53
llama-factory/metadata.yaml
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
application_name: &application_name llama-factory
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: llama-factory
|
||||
sets:
|
||||
app: llama-factory
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 8
|
||||
memoryLimit: "16Gi"
|
||||
shmSize: "15Gi"
|
||||
llama:
|
||||
image: "docker.io/library/one-click:v1"
|
||||
workerSize: 2
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
port: 30080
|
||||
url: ~
|
||||
pod:
|
||||
name: llamafactory
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: llama-factory
|
||||
sets:
|
||||
app: llama-factory
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 8
|
||||
memoryLimit: "16Gi"
|
||||
shmSize: "15Gi"
|
||||
llama:
|
||||
image: "docker.io/library/one-click:v1"
|
||||
workerSize: 1
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
port: 30080
|
||||
url: ~
|
||||
pod:
|
||||
name: llama-factory
|
||||
23
melotts/melotts/.helmignore
Normal file
23
melotts/melotts/.helmignore
Normal file
@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
24
melotts/melotts/Chart.yaml
Normal file
24
melotts/melotts/Chart.yaml
Normal file
@ -0,0 +1,24 @@
|
||||
apiVersion: v2
|
||||
name: jarvis
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
22
melotts/melotts/templates/NOTES.txt
Normal file
22
melotts/melotts/templates/NOTES.txt
Normal file
@ -0,0 +1,22 @@
|
||||
1. Get the application URL by running these commands:
|
||||
{{- if .Values.ingress.enabled }}
|
||||
{{- range $host := .Values.ingress.hosts }}
|
||||
{{- range .paths }}
|
||||
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- else if contains "NodePort" .Values.service.type }}
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
62
melotts/melotts/templates/_helpers.tpl
Normal file
62
melotts/melotts/templates/_helpers.tpl
Normal file
@ -0,0 +1,62 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "jarvis.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "jarvis.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "jarvis.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "jarvis.labels" -}}
|
||||
helm.sh/chart: {{ include "jarvis.chart" . }}
|
||||
{{ include "jarvis.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "jarvis.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "jarvis.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "jarvis.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
81
melotts/melotts/templates/melotts.yaml
Normal file
81
melotts/melotts/templates/melotts.yaml
Normal file
@ -0,0 +1,81 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-dp
|
||||
# namespace: jarvis-models
|
||||
labels:
|
||||
app: {{ .Release.Name }}
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ .Release.Name }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: {{ .Release.Name }}
|
||||
spec:
|
||||
#hostNetwork: true
|
||||
# --- START: Add this section for image pull secrets ---
|
||||
runtimeClassName: nvidia
|
||||
imagePullSecrets:
|
||||
- name: regcred # This MUST match the name of the secret you just created
|
||||
# --- END: Add this section ---
|
||||
containers:
|
||||
- name: melo
|
||||
image: {{ .Values.melotts.image }}
|
||||
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
|
||||
#command:
|
||||
# - /bin/bash
|
||||
# - -c
|
||||
# - "bash && sleep infinity"
|
||||
ports:
|
||||
- containerPort: 5000 # The port your application listens on inside the container
|
||||
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
|
||||
protocol: TCP
|
||||
resources: # Add this section
|
||||
requests:
|
||||
cpu: 2 # Example: 100 millicores (0.1 CPU)
|
||||
memory: 4Gi # Example: 128 mebibytes
|
||||
limits:
|
||||
cpu: 2 # Example: Limit to 500 millicores (0.5 CPU)
|
||||
memory: 6Gi # Example: Limit to 512 mebibytes
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: /models
|
||||
- name: weight-volume
|
||||
mountPath: /usr/local/nltk_data
|
||||
subPath: nltk_data
|
||||
volumes:
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-service
|
||||
# namespace: jarvis-models
|
||||
labels:
|
||||
app: {{ .Release.Name }}
|
||||
spec:
|
||||
selector:
|
||||
app: {{ .Release.Name }}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 5000
|
||||
type: NodePort
|
||||
14
melotts/melotts/templates/nfs-pv.yaml
Normal file
14
melotts/melotts/templates/nfs-pv.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: {{ .Values.app }}-pv-model
|
||||
spec:
|
||||
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||
capacity:
|
||||
storage: {{ .Values.nfs.pvSize }}
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
nfs:
|
||||
path: {{ .Values.nfs.path }}
|
||||
server: {{ .Values.nfs.server }}
|
||||
12
melotts/melotts/templates/nfs-pvc.yaml
Normal file
12
melotts/melotts/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ .Values.app }}-pvc-model
|
||||
annotations:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.nfs.pvcSize }}
|
||||
volumeName: {{ .Values.app }}-pv-model
|
||||
89
melotts/melotts/values.yaml
Normal file
89
melotts/melotts/values.yaml
Normal file
@ -0,0 +1,89 @@
|
||||
# Default values for jarvis.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
|
||||
replicaCount: 1
|
||||
|
||||
app: "melotts"
|
||||
|
||||
melotts:
|
||||
image: harbor.bwgdi.com/library/melotts:0.0.2
|
||||
|
||||
jarvis_adapter:
|
||||
image: harbor.bwgdi.com/library/adapter-test:0.0.1
|
||||
endpoint: "http://vllm-leader-nodeport:8080"
|
||||
|
||||
nfs:
|
||||
server: "10.6.80.11"
|
||||
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Voice/MeloTTS"
|
||||
storageClass: "local-path"
|
||||
pvSize: "500Gi"
|
||||
pvcSize: "50Gi"
|
||||
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
|
||||
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
|
||||
service:
|
||||
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
|
||||
type: ClusterIP
|
||||
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
|
||||
port: 80
|
||||
|
||||
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
hosts:
|
||||
- host: chart-example.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
# Additional volumes on the output Deployment definition.
|
||||
volumes: []
|
||||
# - name: foo
|
||||
# secret:
|
||||
# secretName: mysecret
|
||||
# optional: false
|
||||
|
||||
# Additional volumeMounts on the output Deployment definition.
|
||||
volumeMounts: []
|
||||
# - name: foo
|
||||
# mountPath: "/etc/foo"
|
||||
# readOnly: true
|
||||
|
||||
nodeSelector:
|
||||
resource-group: gpu_5880
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
35
melotts/metadata.yaml
Normal file
35
melotts/metadata.yaml
Normal file
@ -0,0 +1,35 @@
|
||||
|
||||
application_name: &application_name melotts
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: melotts
|
||||
sets:
|
||||
jarvis_api:
|
||||
image: harbor.bwgdi.com/library/melotts:0.0.2
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: melo-service
|
||||
port: 32147
|
||||
url: ~
|
||||
pod:
|
||||
name: *application_name
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: melotts
|
||||
sets:
|
||||
jarvis_api:
|
||||
image: harbor.bwgdi.com/library/melotts:0.0.2
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: melo-service
|
||||
port: 32147
|
||||
url: ~
|
||||
pod:
|
||||
name: *application_name
|
||||
53
vllm/metadata.yaml
Normal file
53
vllm/metadata.yaml
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
application_name: &application_name vllm
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: vllm-app
|
||||
sets:
|
||||
app: vllm
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 8
|
||||
memoryLimit: "16Gi"
|
||||
shmSize: "15Gi"
|
||||
workerSize: 2
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
port: 30080
|
||||
url: ~
|
||||
paths:
|
||||
docs_path: /docs
|
||||
redoc_path: /redoc
|
||||
pod:
|
||||
name: infer-0
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: vllm-app
|
||||
sets:
|
||||
app: vllm
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 8
|
||||
memoryLimit: "16Gi"
|
||||
shmSize: "15Gi"
|
||||
workerSize: 1
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
port: 30080
|
||||
url: ~
|
||||
pod:
|
||||
name: vllm
|
||||
|
||||
23
vllm/vllm-app/.helmignore
Normal file
23
vllm/vllm-app/.helmignore
Normal file
@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
25
vllm/vllm-app/Chart.yaml
Normal file
25
vllm/vllm-app/Chart.yaml
Normal file
@ -0,0 +1,25 @@
|
||||
apiVersion: v2
|
||||
name: vllm-app
|
||||
description: A Helm chart for deploying vLLM with NFS storage
|
||||
annotations:
|
||||
"helm.sh/resource-policy": keep # 防止资源被意外删除
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
165
vllm/vllm-app/templates/llama.yaml
Normal file
165
vllm/vllm-app/templates/llama.yaml
Normal file
@ -0,0 +1,165 @@
|
||||
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
|
||||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||
kind: LeaderWorkerSet
|
||||
metadata:
|
||||
name: infer
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ .Values.workerSize }}
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
role: leader
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
ls $DEST_DIR
|
||||
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: llama-leader
|
||||
image: {{ .Values.llama.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name : USE_RAY
|
||||
value: "1"
|
||||
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
# value: "ray"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 7860
|
||||
name: http
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
workerTemplate:
|
||||
spec:
|
||||
containers:
|
||||
- name: llama-worker
|
||||
image: {{ .Values.llama.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "echo $(LWS_LEADER_ADDRESS);
|
||||
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
# value: "ray"
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
{{- end }}
|
||||
170
vllm/vllm-app/templates/lmdeploy_lws.yaml
Normal file
170
vllm/vllm-app/templates/lmdeploy_lws.yaml
Normal file
@ -0,0 +1,170 @@
|
||||
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
|
||||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||
kind: LeaderWorkerSet
|
||||
metadata:
|
||||
name: infer
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ .Values.workerSize }}
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
role: leader
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
ls $DEST_DIR
|
||||
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: lmdeploy-leader
|
||||
image: {{ .Values.lmdeploy.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
value: "ray"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
lmdeploy serve api_server $MODEL_PATH --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }})) --server-port 8080 --cache-max-entry-count 0.9"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
readinessProbe:
|
||||
tcpSocket:
|
||||
#httpGet:
|
||||
#path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: nfs-pvc-model
|
||||
workerTemplate:
|
||||
spec:
|
||||
containers:
|
||||
- name: lmdeploy-worker
|
||||
image: {{ .Values.lmdeploy.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.lmdeploy.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
value: "ray"
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: nfs-pvc-model
|
||||
{{- end }}
|
||||
166
vllm/vllm-app/templates/lws.yaml
Normal file
166
vllm/vllm-app/templates/lws.yaml
Normal file
@ -0,0 +1,166 @@
|
||||
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
|
||||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||
kind: LeaderWorkerSet
|
||||
metadata:
|
||||
name: infer
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ .Values.workerSize }}
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
role: leader
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
ls $DEST_DIR
|
||||
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: vllm-leader
|
||||
image: {{ .Values.vllm.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
readinessProbe:
|
||||
tcpSocket:
|
||||
#httpGet:
|
||||
#path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
workerTemplate:
|
||||
spec:
|
||||
containers:
|
||||
- name: vllm-worker
|
||||
image: {{ .Values.vllm.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
{{- end }}
|
||||
44
vllm/vllm-app/templates/model-download-job.yaml
Normal file
44
vllm/vllm-app/templates/model-download-job.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
{{- if .Values.model.download.enabled }}
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-download-model
|
||||
annotations:
|
||||
"helm.sh/hook": pre-install,pre-upgrade # 在安装/升级前执行
|
||||
"helm.sh/hook-weight": "-10" # 优先执行
|
||||
"helm.sh/hook-delete-policy": hook-succeeded
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: downloader
|
||||
image: {{ .Values.model.download.image }}
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
if [ -d "$DEST_DIR" ]; then
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
else
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: model-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: nfs-pvc-model # 复用之前的 PVC
|
||||
{{- end }}
|
||||
14
vllm/vllm-app/templates/nfs-pv.yaml
Normal file
14
vllm/vllm-app/templates/nfs-pv.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: {{ .Values.app }}-pv-model
|
||||
spec:
|
||||
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||
capacity:
|
||||
storage: {{ .Values.nfs.pvSize }}
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
nfs:
|
||||
path: {{ .Values.nfs.path }}
|
||||
server: {{ .Values.nfs.server }}
|
||||
12
vllm/vllm-app/templates/nfs-pvc.yaml
Normal file
12
vllm/vllm-app/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ .Values.app }}-pvc-model
|
||||
annotations:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.nfs.pvcSize }}
|
||||
volumeName: {{ .Values.app }}-pv-model
|
||||
39
vllm/vllm-app/templates/services.yaml
Normal file
39
vllm/vllm-app/templates/services.yaml
Normal file
@ -0,0 +1,39 @@
|
||||
#apiVersion: v1
|
||||
#kind: Service
|
||||
#metadata:
|
||||
# name: infer-leader-loadbalancer
|
||||
#spec:
|
||||
# type: LoadBalancer
|
||||
# selector:
|
||||
# leaderworkerset.sigs.k8s.io/name: infer
|
||||
# role: leader
|
||||
# ports:
|
||||
# - protocol: TCP
|
||||
# port: 8080
|
||||
# targetPort: 8080
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ .Values.app }}-leader-nodeport
|
||||
spec:
|
||||
type: NodePort
|
||||
{{- if gt (int .Values.workerSize) 1 }}
|
||||
selector:
|
||||
leaderworkerset.sigs.k8s.io/name: infer
|
||||
role: leader
|
||||
{{- else }}
|
||||
selector:
|
||||
app: vllm-app
|
||||
{{- end }}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
{{- if eq .Values.app "llama" }}
|
||||
targetPort: 7860
|
||||
{{- else }}
|
||||
targetPort: 8080
|
||||
{{- end }}
|
||||
nodePort: 30080
|
||||
|
||||
114
vllm/vllm-app/templates/single.yaml
Normal file
114
vllm/vllm-app/templates/single.yaml
Normal file
@ -0,0 +1,114 @@
|
||||
{{- if eq (int .Values.workerSize) 1 }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vllm
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: vllm-app
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: vllm-app
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
ls $DEST_DIR
|
||||
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: vllm-leader
|
||||
image: {{ .Values.vllm.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
#securityContext:
|
||||
# capabilities:
|
||||
# add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.vllm.huggingfaceToken }}
|
||||
#- name: GLOO_SOCKET_IFNAME
|
||||
# value: eth0
|
||||
#- name: NCCL_SOCKET_IFNAME
|
||||
# value: eth0
|
||||
#- name: NCCL_IB_DISABLE
|
||||
# value: "0"
|
||||
#- name: NCCL_DEBUG
|
||||
# value: INFO
|
||||
#- name: NCCL_IB_HCA
|
||||
# value: mlx5_0:1
|
||||
#- name: NCCL_IB_GID_INDEX
|
||||
# value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
echo 'Using single node ------------------------------------------';
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
#rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
readinessProbe:
|
||||
#tcpSocket:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app}}-pvc-model
|
||||
{{- end }}
|
||||
58
vllm/vllm-app/values.yaml
Normal file
58
vllm/vllm-app/values.yaml
Normal file
@ -0,0 +1,58 @@
|
||||
# Default values for vllm-app.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# 模型配置
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
|
||||
localMountPath: "/Model" # PVC 固定挂载路径
|
||||
huggingfaceToken: "<your-hf-token>"
|
||||
download:
|
||||
enabled: false # 启用自动下载
|
||||
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
|
||||
|
||||
# 功能选择
|
||||
app: "vllm"
|
||||
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 12
|
||||
memoryLimit: "16Gi"
|
||||
shmSize: "20Gi"
|
||||
|
||||
# vLLM 应用配置
|
||||
vllm:
|
||||
image: "docker.io/vllm/vllm-openai:latest"
|
||||
#gpuLimit: 2
|
||||
# cpuRequest: 12
|
||||
# memoryLimit: "12Gi"
|
||||
# shmSize: "15Gi"
|
||||
|
||||
llama:
|
||||
image: "docker.io/library/one-click:v1"
|
||||
|
||||
# lmdeploy 应用配置
|
||||
lmdeploy:
|
||||
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
|
||||
# gpuLimit: 2
|
||||
# cpuRequest: 12
|
||||
# memoryLimit: "12Gi"
|
||||
# shmSize: "15Gi"
|
||||
|
||||
# NFS PV/PVC 配置
|
||||
nfs:
|
||||
server: "10.6.80.11"
|
||||
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||
storageClass: "local-path"
|
||||
pvSize: "500Gi"
|
||||
pvcSize: "50Gi"
|
||||
|
||||
# LeaderWorkerSet 配置
|
||||
replicaCount: 1
|
||||
workerSize: 2
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
23
vllm/vllm-serve/.helmignore
Normal file
23
vllm/vllm-serve/.helmignore
Normal file
@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
25
vllm/vllm-serve/Chart.yaml
Normal file
25
vllm/vllm-serve/Chart.yaml
Normal file
@ -0,0 +1,25 @@
|
||||
apiVersion: v2
|
||||
name: vllm-serve
|
||||
description: A Helm chart for deploying vLLM with NFS storage
|
||||
annotations:
|
||||
"helm.sh/resource-policy": keep # 防止资源被意外删除
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
16
vllm/vllm-serve/templates/NOTES.txt
Normal file
16
vllm/vllm-serve/templates/NOTES.txt
Normal file
@ -0,0 +1,16 @@
|
||||
1. Get the application URL by running these commands:
|
||||
{{- if contains "NodePort" .Values.svc.type }}
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
62
vllm/vllm-serve/templates/_helpers.tpl
Normal file
62
vllm/vllm-serve/templates/_helpers.tpl
Normal file
@ -0,0 +1,62 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "vllm-serve.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "vllm-serve.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "vllm-serve.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "vllm-serve.labels" -}}
|
||||
helm.sh/chart: {{ include "vllm-serve.chart" . }}
|
||||
{{ include "vllm-serve.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "vllm-serve.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "vllm-serve.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "vllm-serve.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "vllm-serve.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
188
vllm/vllm-serve/templates/lws.yaml
Normal file
188
vllm/vllm-serve/templates/lws.yaml
Normal file
@ -0,0 +1,188 @@
|
||||
{{- if gt (int .Values.workerSize) 1 }}
|
||||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||
kind: LeaderWorkerSet
|
||||
metadata:
|
||||
name: {{ .Release.Name }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ .Values.workerSize }}
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
role: leader
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: vllm-leader
|
||||
image: {{ .Values.vllm.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
readinessProbe:
|
||||
tcpSocket:
|
||||
#httpGet:
|
||||
#path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Release.Name }}-pvc-model
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
workerTemplate:
|
||||
spec:
|
||||
containers:
|
||||
- name: vllm-worker
|
||||
image: {{ .Values.vllm.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Release.Name }}-pvc-model
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
28
vllm/vllm-serve/templates/nfs-pvc.yaml
Normal file
28
vllm/vllm-serve/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,28 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-pv-model
|
||||
spec:
|
||||
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||
capacity:
|
||||
storage: {{ .Values.nfs.pvSize }}
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
nfs:
|
||||
path: {{ .Values.nfs.path }}
|
||||
server: {{ .Values.nfs.server }}
|
||||
---
|
||||
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-pvc-model
|
||||
annotations:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.nfs.pvcSize }}
|
||||
volumeName: {{ .Release.Name }}-pv-model
|
||||
35
vllm/vllm-serve/templates/services.yaml
Normal file
35
vllm/vllm-serve/templates/services.yaml
Normal file
@ -0,0 +1,35 @@
|
||||
#apiVersion: v1
|
||||
#kind: Service
|
||||
#metadata:
|
||||
# name: infer-leader-loadbalancer
|
||||
#spec:
|
||||
# type: LoadBalancer
|
||||
# selector:
|
||||
# leaderworkerset.sigs.k8s.io/name: infer
|
||||
# role: leader
|
||||
# ports:
|
||||
# - protocol: TCP
|
||||
# port: 8080
|
||||
# targetPort: 8080
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-svc
|
||||
spec:
|
||||
type: {{ .Values.svc.type | default "NodePort" }}
|
||||
{{- if gt (int .Values.workerSize) 1 }}
|
||||
selector:
|
||||
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
|
||||
role: leader
|
||||
{{- else }}
|
||||
selector:
|
||||
app: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: {{ .Values.svc.port | default 8080 }}
|
||||
targetPort: {{ .Values.svc.port | default 8080 }}
|
||||
nodePort: {{ .Values.svc.nodePort | default 30080 }}
|
||||
|
||||
108
vllm/vllm-serve/templates/single.yaml
Normal file
108
vllm/vllm-serve/templates/single.yaml
Normal file
@ -0,0 +1,108 @@
|
||||
{{- if eq (int .Values.workerSize) 1 }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ .Release.Name }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ .Release.Name }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: {{ .Release.Name }}
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: vllm-pod
|
||||
image: {{ .Values.vllm.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
echo 'Using single node ------------------------------------------';
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
readinessProbe:
|
||||
#tcpSocket:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Release.Name }}-pvc-model
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
75
vllm/vllm-serve/values.yaml
Normal file
75
vllm/vllm-serve/values.yaml
Normal file
@ -0,0 +1,75 @@
|
||||
# Default values for vllm-app.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
|
||||
imagePullSecrets: []
|
||||
imagePullPolicy: IfNotPresent
|
||||
# This is to override the chart name.
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
||||
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created
|
||||
create: true
|
||||
# Automatically mount a ServiceAccount's API credentials?
|
||||
automount: true
|
||||
# Annotations to add to the service account
|
||||
annotations: {}
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name: ""
|
||||
|
||||
|
||||
# 模型配置
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
|
||||
localMountPath: "/Model" # PVC 固定挂载路径
|
||||
huggingfaceToken: "<your-hf-token>"
|
||||
download: # 启用自动下载
|
||||
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
|
||||
|
||||
# 功能选择
|
||||
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 12
|
||||
memoryLimit: "16Gi"
|
||||
shmSize: "20Gi"
|
||||
|
||||
svc:
|
||||
type: NodePort
|
||||
port: 80
|
||||
targetPort: 8080
|
||||
nodePort: 30080
|
||||
# vLLM 应用配置
|
||||
vllm:
|
||||
image: "docker.io/vllm/vllm-openai:latest"
|
||||
|
||||
|
||||
llama:
|
||||
image: "docker.io/library/one-click:v1"
|
||||
|
||||
# lmdeploy 应用配置
|
||||
lmdeploy:
|
||||
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
|
||||
|
||||
|
||||
# NFS PV/PVC 配置
|
||||
nfs:
|
||||
server: "10.6.80.11"
|
||||
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||
storageClass: "local-path"
|
||||
pvSize: "500Gi"
|
||||
pvcSize: "50Gi"
|
||||
|
||||
# LeaderWorkerSet 配置
|
||||
replicaCount: 1
|
||||
workerSize: 2
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
53
webchat/metadata.yaml
Normal file
53
webchat/metadata.yaml
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
application_name: &application_name webchat
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: vllm-app
|
||||
sets:
|
||||
app: llama
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2-VL-2B-Instruct"
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 8
|
||||
memoryLimit: "8Gi"
|
||||
shmSize: "15Gi"
|
||||
workerSize: 2
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
port: 30081
|
||||
url: ~
|
||||
paths:
|
||||
docs_path: /docs
|
||||
redoc_path: /redoc
|
||||
pod:
|
||||
name: *application_name
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: vllm-app
|
||||
sets:
|
||||
app: vllm
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-32B-Instruct"
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 12
|
||||
memoryLimit: "8Gi"
|
||||
shmSize: "15Gi"
|
||||
workerSize: 1
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
port: 30080
|
||||
url: ~
|
||||
pod:
|
||||
name: *application_name
|
||||
|
||||
23
webchat/vllm-app/.helmignore
Normal file
23
webchat/vllm-app/.helmignore
Normal file
@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
25
webchat/vllm-app/Chart.yaml
Normal file
25
webchat/vllm-app/Chart.yaml
Normal file
@ -0,0 +1,25 @@
|
||||
apiVersion: v2
|
||||
name: vllm-app
|
||||
description: A Helm chart for deploying vLLM with NFS storage
|
||||
annotations:
|
||||
"helm.sh/resource-policy": keep # 防止资源被意外删除
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
165
webchat/vllm-app/templates/llama.yaml
Normal file
165
webchat/vllm-app/templates/llama.yaml
Normal file
@ -0,0 +1,165 @@
|
||||
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
|
||||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||
kind: LeaderWorkerSet
|
||||
metadata:
|
||||
name: {{ .Release.Name }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ .Values.workerSize }}
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
role: leader
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
ls $DEST_DIR
|
||||
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: llama-leader
|
||||
image: {{ .Values.llama.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name : USE_RAY
|
||||
value: "1"
|
||||
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
# value: "ray"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 7860
|
||||
name: http
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
workerTemplate:
|
||||
spec:
|
||||
containers:
|
||||
- name: llama-worker
|
||||
image: {{ .Values.llama.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "echo $(LWS_LEADER_ADDRESS);
|
||||
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
# value: "ray"
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
{{- end }}
|
||||
170
webchat/vllm-app/templates/lmdeploy_lws.yaml
Normal file
170
webchat/vllm-app/templates/lmdeploy_lws.yaml
Normal file
@ -0,0 +1,170 @@
|
||||
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
|
||||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||
kind: LeaderWorkerSet
|
||||
metadata:
|
||||
name: infer
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ .Values.workerSize }}
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
role: leader
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
ls $DEST_DIR
|
||||
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: lmdeploy-leader
|
||||
image: {{ .Values.lmdeploy.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
value: "ray"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
lmdeploy serve api_server $MODEL_PATH --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }})) --server-port 8080 --cache-max-entry-count 0.9"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
readinessProbe:
|
||||
tcpSocket:
|
||||
#httpGet:
|
||||
#path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: nfs-pvc-model
|
||||
workerTemplate:
|
||||
spec:
|
||||
containers:
|
||||
- name: lmdeploy-worker
|
||||
image: {{ .Values.lmdeploy.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.lmdeploy.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name : LMDEPLOY_EXECUTOR_BACKEND
|
||||
value: "ray"
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: nfs-pvc-model
|
||||
{{- end }}
|
||||
166
webchat/vllm-app/templates/lws.yaml
Normal file
166
webchat/vllm-app/templates/lws.yaml
Normal file
@ -0,0 +1,166 @@
|
||||
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
|
||||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||
kind: LeaderWorkerSet
|
||||
metadata:
|
||||
name: infer
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ .Values.workerSize }}
|
||||
restartPolicy: RecreateGroupOnPodRestart
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
role: leader
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
ls $DEST_DIR
|
||||
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: vllm-leader
|
||||
image: {{ .Values.vllm.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
readinessProbe:
|
||||
tcpSocket:
|
||||
#httpGet:
|
||||
#path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
workerTemplate:
|
||||
spec:
|
||||
containers:
|
||||
- name: vllm-worker
|
||||
image: {{ .Values.vllm.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: [ "IPC_LOCK" ]
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: eth0
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "0"
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
- name: NCCL_IB_HCA
|
||||
value: mlx5_0:1
|
||||
- name: NCCL_IB_GID_INDEX
|
||||
value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Values.app }}-pvc-model
|
||||
{{- end }}
|
||||
44
webchat/vllm-app/templates/model-download-job.yaml
Normal file
44
webchat/vllm-app/templates/model-download-job.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
{{- if .Values.model.download.enabled }}
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-download-model
|
||||
annotations:
|
||||
"helm.sh/hook": pre-install,pre-upgrade # 在安装/升级前执行
|
||||
"helm.sh/hook-weight": "-10" # 优先执行
|
||||
"helm.sh/hook-delete-policy": hook-succeeded
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: downloader
|
||||
image: {{ .Values.model.download.image }}
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
if [ -d "$DEST_DIR" ]; then
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
else
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: model-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: nfs-pvc-model # 复用之前的 PVC
|
||||
{{- end }}
|
||||
14
webchat/vllm-app/templates/nfs-pv.yaml
Normal file
14
webchat/vllm-app/templates/nfs-pv.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: {{ .Values.app }}-pv-model
|
||||
spec:
|
||||
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||
capacity:
|
||||
storage: {{ .Values.nfs.pvSize }}
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
nfs:
|
||||
path: {{ .Values.nfs.path }}
|
||||
server: {{ .Values.nfs.server }}
|
||||
12
webchat/vllm-app/templates/nfs-pvc.yaml
Normal file
12
webchat/vllm-app/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ .Values.app }}-pvc-model
|
||||
annotations:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.nfs.pvcSize }}
|
||||
volumeName: {{ .Values.app }}-pv-model
|
||||
39
webchat/vllm-app/templates/services.yaml
Normal file
39
webchat/vllm-app/templates/services.yaml
Normal file
@ -0,0 +1,39 @@
|
||||
#apiVersion: v1
|
||||
#kind: Service
|
||||
#metadata:
|
||||
# name: infer-leader-loadbalancer
|
||||
#spec:
|
||||
# type: LoadBalancer
|
||||
# selector:
|
||||
# leaderworkerset.sigs.k8s.io/name: infer
|
||||
# role: leader
|
||||
# ports:
|
||||
# - protocol: TCP
|
||||
# port: 8080
|
||||
# targetPort: 8080
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ .Values.app }}-leader-nodeport
|
||||
spec:
|
||||
type: NodePort
|
||||
{{- if gt (int .Values.workerSize) 1 }}
|
||||
selector:
|
||||
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
|
||||
role: leader
|
||||
{{- else }}
|
||||
selector:
|
||||
app: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
{{- if eq .Values.app "llama" }}
|
||||
targetPort: 7860
|
||||
{{- else }}
|
||||
targetPort: 8080
|
||||
{{- end }}
|
||||
nodePort: 30081
|
||||
|
||||
114
webchat/vllm-app/templates/single.yaml
Normal file
114
webchat/vllm-app/templates/single.yaml
Normal file
@ -0,0 +1,114 @@
|
||||
{{- if eq (int .Values.workerSize) 1 }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ .Release.Name }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ .Release.Name }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: {{ .Release.Name }}
|
||||
spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
value: https://hf-mirror.com
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.model.huggingfaceToken }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
ls $DEST_DIR
|
||||
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||
ls -l {{ .Values.model.localMountPath }}
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
else
|
||||
echo "Model already exists at $DEST_DIR"
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
containers:
|
||||
- name: vllm-leader
|
||||
image: {{ .Values.vllm.image }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
#securityContext:
|
||||
# capabilities:
|
||||
# add: [ "IPC_LOCK" ]
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: {{ .Values.vllm.huggingfaceToken }}
|
||||
#- name: GLOO_SOCKET_IFNAME
|
||||
# value: eth0
|
||||
#- name: NCCL_SOCKET_IFNAME
|
||||
# value: eth0
|
||||
#- name: NCCL_IB_DISABLE
|
||||
# value: "0"
|
||||
#- name: NCCL_DEBUG
|
||||
# value: INFO
|
||||
#- name: NCCL_IB_HCA
|
||||
# value: mlx5_0:1
|
||||
#- name: NCCL_IB_GID_INDEX
|
||||
# value: "0" # 或 "7",根据你的网络配置而定
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
echo 'Using single node ------------------------------------------';
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.vllm.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.vllm.gpuLimit }}"
|
||||
memory: {{ .Values.vllm.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
#rdma/rdma_shared_device_a: 10
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.vllm.cpuRequest }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
readinessProbe:
|
||||
#tcpSocket:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- name: weight-volume
|
||||
mountPath: {{ .Values.model.localMountPath }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: {{ .Values.vllm.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: nfs-pvc-model
|
||||
{{- end }}
|
||||
58
webchat/vllm-app/values.yaml
Normal file
58
webchat/vllm-app/values.yaml
Normal file
@ -0,0 +1,58 @@
|
||||
# Default values for vllm-app.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# 模型配置
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
|
||||
localMountPath: "/Model" # PVC 固定挂载路径
|
||||
huggingfaceToken: "<your-hf-token>"
|
||||
download:
|
||||
enabled: false # 启用自动下载
|
||||
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
|
||||
|
||||
# 功能选择
|
||||
app: "vllm"
|
||||
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 12
|
||||
memoryLimit: "16Gi"
|
||||
shmSize: "20Gi"
|
||||
|
||||
# vLLM 应用配置
|
||||
vllm:
|
||||
image: "docker.io/vllm/vllm-openai:latest"
|
||||
#gpuLimit: 2
|
||||
# cpuRequest: 12
|
||||
# memoryLimit: "12Gi"
|
||||
# shmSize: "15Gi"
|
||||
|
||||
llama:
|
||||
image: "docker.io/library/one-click:v1"
|
||||
|
||||
# lmdeploy 应用配置
|
||||
lmdeploy:
|
||||
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
|
||||
# gpuLimit: 2
|
||||
# cpuRequest: 12
|
||||
# memoryLimit: "12Gi"
|
||||
# shmSize: "15Gi"
|
||||
|
||||
# NFS PV/PVC 配置
|
||||
nfs:
|
||||
server: "10.6.80.11"
|
||||
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||
storageClass: "local-path"
|
||||
pvSize: "500Gi"
|
||||
pvcSize: "50Gi"
|
||||
|
||||
# LeaderWorkerSet 配置
|
||||
replicaCount: 1
|
||||
workerSize: 2
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
51
webui/metadata.yaml
Normal file
51
webui/metadata.yaml
Normal file
@ -0,0 +1,51 @@
|
||||
|
||||
application_name: &application_name webui
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: open-webui
|
||||
sets:
|
||||
image:
|
||||
repository: ghcr.io/open-webui/open-webui
|
||||
tag: main
|
||||
pullPolicy: "IfNotPresent"
|
||||
openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
|
||||
ollama:
|
||||
enabled: false
|
||||
service:
|
||||
type: NodePort
|
||||
nodePort: 30679
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: ~
|
||||
port: 30679
|
||||
url: ~
|
||||
pod:
|
||||
name: open-webui-
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: open-webui
|
||||
sets:
|
||||
image:
|
||||
repository: ghcr.io/open-webui/open-webui
|
||||
tag: main
|
||||
pullPolicy: "IfNotPresent"
|
||||
openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
|
||||
ollama:
|
||||
enabled: false
|
||||
service:
|
||||
type: NodePort
|
||||
nodePort: 30679
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
servicename: ~
|
||||
port: 30679
|
||||
url: ~
|
||||
pod:
|
||||
name: open-webui-
|
||||
25
webui/open-webui/.helmignore
Normal file
25
webui/open-webui/.helmignore
Normal file
@ -0,0 +1,25 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
.drone.yml
|
||||
*.tmproj
|
||||
.vscode/
|
||||
values-minikube.yaml
|
||||
12
webui/open-webui/Chart.lock
Normal file
12
webui/open-webui/Chart.lock
Normal file
@ -0,0 +1,12 @@
|
||||
dependencies:
|
||||
- name: ollama
|
||||
repository: https://otwld.github.io/ollama-helm/
|
||||
version: 1.27.0
|
||||
- name: pipelines
|
||||
repository: https://helm.openwebui.com
|
||||
version: 0.7.0
|
||||
- name: tika
|
||||
repository: https://apache.jfrog.io/artifactory/tika
|
||||
version: 3.2.2
|
||||
digest: sha256:1c6e5d6a38dc8ebb4e15b1945fb222fa57b10e8882d5c79ba430648f3c5af372
|
||||
generated: "2025-08-22T15:22:03.150693+02:00"
|
||||
38
webui/open-webui/Chart.yaml
Normal file
38
webui/open-webui/Chart.yaml
Normal file
@ -0,0 +1,38 @@
|
||||
annotations:
|
||||
licenses: MIT
|
||||
apiVersion: v2
|
||||
appVersion: 0.6.26
|
||||
dependencies:
|
||||
- condition: ollama.enabled
|
||||
import-values:
|
||||
- child: service
|
||||
parent: ollama.service
|
||||
name: ollama
|
||||
repository: https://otwld.github.io/ollama-helm/
|
||||
version: '>=0.24.0'
|
||||
- condition: pipelines.enabled
|
||||
import-values:
|
||||
- child: service
|
||||
parent: pipelines.service
|
||||
name: pipelines
|
||||
repository: https://helm.openwebui.com
|
||||
version: '>=0.0.1'
|
||||
- condition: tika.enabled
|
||||
name: tika
|
||||
repository: https://apache.jfrog.io/artifactory/tika
|
||||
version: '>=2.9.0'
|
||||
description: "Open WebUI: A User-Friendly Web Interface for Chat Interactions \U0001F44B"
|
||||
home: https://www.openwebui.com/
|
||||
icon: https://raw.githubusercontent.com/open-webui/open-webui/main/static/favicon.png
|
||||
keywords:
|
||||
- llm
|
||||
- chat
|
||||
- web-ui
|
||||
- open-webui
|
||||
name: open-webui
|
||||
sources:
|
||||
- https://github.com/open-webui/helm-charts
|
||||
- https://github.com/open-webui/open-webui/pkgs/container/open-webui
|
||||
- https://github.com/otwld/ollama-helm/
|
||||
- https://hub.docker.com/r/ollama/ollama
|
||||
version: 7.7.0
|
||||
270
webui/open-webui/README.md
Normal file
270
webui/open-webui/README.md
Normal file
@ -0,0 +1,270 @@
|
||||
# open-webui
|
||||
|
||||
 
|
||||
|
||||
Open WebUI: A User-Friendly Web Interface for Chat Interactions 👋
|
||||
|
||||
**Homepage:** <https://www.openwebui.com/>
|
||||
|
||||
## Source Code
|
||||
|
||||
* <https://github.com/open-webui/helm-charts>
|
||||
* <https://github.com/open-webui/open-webui/pkgs/container/open-webui>
|
||||
* <https://github.com/otwld/ollama-helm/>
|
||||
* <https://hub.docker.com/r/ollama/ollama>
|
||||
|
||||
## Installing
|
||||
|
||||
Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
|
||||
|
||||
```shell
|
||||
helm repo add open-webui https://helm.openwebui.com/
|
||||
helm repo update
|
||||
```
|
||||
|
||||
Now you can install the chart:
|
||||
|
||||
```shell
|
||||
helm upgrade --install open-webui open-webui/open-webui
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
| Repository | Name | Version |
|
||||
|------------|------|---------|
|
||||
| https://apache.jfrog.io/artifactory/tika | tika | >=2.9.0 |
|
||||
| https://helm.openwebui.com | pipelines | >=0.0.1 |
|
||||
| https://otwld.github.io/ollama-helm/ | ollama | >=0.24.0 |
|
||||
|
||||
## Values
|
||||
|
||||
### Logging configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| logging.components.audio | string | `""` | Set the log level for the Audio processing component |
|
||||
| logging.components.comfyui | string | `""` | Set the log level for the ComfyUI Integration component |
|
||||
| logging.components.config | string | `""` | Set the log level for the Configuration Management component |
|
||||
| logging.components.db | string | `""` | Set the log level for the Database Operations (Peewee) component |
|
||||
| logging.components.images | string | `""` | Set the log level for the Image Generation component |
|
||||
| logging.components.main | string | `""` | Set the log level for the Main Application Execution component |
|
||||
| logging.components.models | string | `""` | Set the log level for the Model Management component |
|
||||
| logging.components.ollama | string | `""` | Set the log level for the Ollama Backend Integration component |
|
||||
| logging.components.openai | string | `""` | Set the log level for the OpenAI API Integration component |
|
||||
| logging.components.rag | string | `""` | Set the log level for the Retrieval-Augmented Generation (RAG) component |
|
||||
| logging.components.webhook | string | `""` | Set the log level for the Authentication Webhook component |
|
||||
| logging.level | string | `""` | Set the global log level ["notset", "debug", "info" (default), "warning", "error", "critical"] |
|
||||
|
||||
### Azure Storage configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| persistence.azure.container | string | `""` | Sets the container name for Azure Storage |
|
||||
| persistence.azure.endpointUrl | string | `""` | Sets the endpoint URL for Azure Storage |
|
||||
| persistence.azure.key | string | `""` | Set the access key for Azure Storage (ignored if keyExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Managed Identity if run in Azure services |
|
||||
| persistence.azure.keyExistingSecret | string | `""` | Set the access key for Azure Storage from existing secret |
|
||||
| persistence.azure.keyExistingSecretKey | string | `""` | Set the access key for Azure Storage from existing secret key |
|
||||
|
||||
### Google Cloud Storage configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| persistence.gcs.appCredentialsJson | string | `""` | Contents of Google Application Credentials JSON file (ignored if appCredentialsJsonExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Google Metadata server if run on a Google Compute Engine. File can be generated for a service account following this guide: https://developers.google.com/workspace/guides/create-credentials#service-account |
|
||||
| persistence.gcs.appCredentialsJsonExistingSecret | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret |
|
||||
| persistence.gcs.appCredentialsJsonExistingSecretKey | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret key |
|
||||
| persistence.gcs.bucket | string | `""` | Sets the bucket name for Google Cloud Storage. Bucket must already exist |
|
||||
|
||||
### Amazon S3 Storage configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| persistence.s3.accessKey | string | `""` | Sets the access key ID for S3 storage |
|
||||
| persistence.s3.accessKeyExistingAccessKey | string | `""` | Set the secret access key for S3 storage from existing k8s secret key |
|
||||
| persistence.s3.accessKeyExistingSecret | string | `""` | Set the secret access key for S3 storage from existing k8s secret |
|
||||
| persistence.s3.bucket | string | `""` | Sets the bucket name for S3 storage |
|
||||
| persistence.s3.endpointUrl | string | `""` | Sets the endpoint url for S3 storage |
|
||||
| persistence.s3.keyPrefix | string | `""` | Sets the key prefix for a S3 object |
|
||||
| persistence.s3.region | string | `""` | Sets the region name for S3 storage |
|
||||
| persistence.s3.secretKey | string | `""` | Sets the secret access key for S3 storage (ignored if secretKeyExistingSecret is set) |
|
||||
| persistence.s3.secretKeyExistingSecret | string | `""` | Set the secret key for S3 storage from existing k8s secret |
|
||||
| persistence.s3.secretKeyExistingSecretKey | string | `""` | Set the secret key for S3 storage from existing k8s secret key |
|
||||
|
||||
### SSO Configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| sso.enableGroupManagement | bool | `false` | Enable OAuth group management through access token groups claim |
|
||||
| sso.enableRoleManagement | bool | `false` | Enable OAuth role management through access token roles claim |
|
||||
| sso.enableSignup | bool | `false` | Enable account creation when logging in with OAuth (distinct from regular signup) |
|
||||
| sso.enabled | bool | `false` | **Enable SSO authentication globally** must enable to use SSO authentication |
|
||||
| sso.groupManagement.groupsClaim | string | `"groups"` | The claim that contains the groups (can be nested, e.g., user.memberOf) |
|
||||
| sso.mergeAccountsByEmail | bool | `false` | Allow logging into accounts that match email from OAuth provider (considered insecure) |
|
||||
|
||||
### GitHub OAuth configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| sso.github.clientExistingSecret | string | `""` | GitHub OAuth client secret from existing secret |
|
||||
| sso.github.clientExistingSecretKey | string | `""` | GitHub OAuth client secret key from existing secret |
|
||||
| sso.github.clientId | string | `""` | GitHub OAuth client ID |
|
||||
| sso.github.clientSecret | string | `""` | GitHub OAuth client secret (ignored if clientExistingSecret is set) |
|
||||
| sso.github.enabled | bool | `false` | Enable GitHub OAuth |
|
||||
|
||||
### Google OAuth configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| sso.google.clientExistingSecret | string | `""` | Google OAuth client secret from existing secret |
|
||||
| sso.google.clientExistingSecretKey | string | `""` | Google OAuth client secret key from existing secret |
|
||||
| sso.google.clientId | string | `""` | Google OAuth client ID |
|
||||
| sso.google.clientSecret | string | `""` | Google OAuth client secret (ignored if clientExistingSecret is set) |
|
||||
| sso.google.enabled | bool | `false` | Enable Google OAuth |
|
||||
|
||||
### Microsoft OAuth configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| sso.microsoft.clientExistingSecret | string | `""` | Microsoft OAuth client secret from existing secret |
|
||||
| sso.microsoft.clientExistingSecretKey | string | `""` | Microsoft OAuth client secret key from existing secret |
|
||||
| sso.microsoft.clientId | string | `""` | Microsoft OAuth client ID |
|
||||
| sso.microsoft.clientSecret | string | `""` | Microsoft OAuth client secret (ignored if clientExistingSecret is set) |
|
||||
| sso.microsoft.enabled | bool | `false` | Enable Microsoft OAuth |
|
||||
| sso.microsoft.tenantId | string | `""` | Microsoft tenant ID - use 9188040d-6c67-4c5b-b112-36a304b66dad for personal accounts |
|
||||
|
||||
### OIDC configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| sso.oidc.clientExistingSecret | string | `""` | OICD client secret from existing secret |
|
||||
| sso.oidc.clientExistingSecretKey | string | `""` | OIDC client secret key from existing secret |
|
||||
| sso.oidc.clientId | string | `""` | OIDC client ID |
|
||||
| sso.oidc.clientSecret | string | `""` | OIDC client secret (ignored if clientExistingSecret is set) |
|
||||
| sso.oidc.enabled | bool | `false` | Enable OIDC authentication |
|
||||
| sso.oidc.providerName | string | `"SSO"` | Name of the provider to show on the UI |
|
||||
| sso.oidc.providerUrl | string | `""` | OIDC provider well known URL |
|
||||
| sso.oidc.scopes | string | `"openid email profile"` | Scopes to request (space-separated). |
|
||||
|
||||
### Role management configuration
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| sso.roleManagement.adminRoles | string | `""` | Comma-separated list of roles allowed to log in as admin (receive open webui role admin) |
|
||||
| sso.roleManagement.allowedRoles | string | `""` | Comma-separated list of roles allowed to log in (receive open webui role user) |
|
||||
| sso.roleManagement.rolesClaim | string | `"roles"` | The claim that contains the roles (can be nested, e.g., user.roles) |
|
||||
|
||||
### SSO trusted header authentication
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| sso.trustedHeader.emailHeader | string | `""` | Header containing the user's email address |
|
||||
| sso.trustedHeader.enabled | bool | `false` | Enable trusted header authentication |
|
||||
| sso.trustedHeader.nameHeader | string | `""` | Header containing the user's name (optional, used for new user creation) |
|
||||
|
||||
### Other Values
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| affinity | object | `{}` | Affinity for pod assignment |
|
||||
| annotations | object | `{}` | |
|
||||
| args | list | `[]` | Open WebUI container arguments (overrides default) |
|
||||
| clusterDomain | string | `"cluster.local"` | Value of cluster domain |
|
||||
| command | list | `[]` | Open WebUI container command (overrides default entrypoint) |
|
||||
| commonEnvVars | list | `[]` | Env vars added to the Open WebUI deployment, common across environments. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: environment variables defined in both `extraEnvVars` and `commonEnvVars` will result in a conflict. Avoid duplicates) |
|
||||
| containerSecurityContext | object | `{}` | Configure container security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-containe> |
|
||||
| copyAppData.args | list | `[]` | Open WebUI copy-app-data init container arguments (overrides default) |
|
||||
| copyAppData.command | list | `[]` | Open WebUI copy-app-data init container command (overrides default) |
|
||||
| copyAppData.resources | object | `{}` | |
|
||||
| databaseUrl | string | `""` | Configure database URL, needed to work with Postgres (example: `postgresql://<user>:<password>@<service>:<port>/<database>`), leave empty to use the default sqlite database |
|
||||
| enableOpenaiApi | bool | `true` | Enables the use of OpenAI APIs |
|
||||
| extraEnvFrom | list | `[]` | Env vars added from configmap or secret to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: `extraEnvVars` will take precedence over the value from `extraEnvFrom`) |
|
||||
| extraEnvVars | list | `[{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}]` | Env vars added to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ |
|
||||
| extraEnvVars[0] | object | `{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}` | Default API key value for Pipelines. Should be updated in a production deployment, or be changed to the required API key if not using Pipelines |
|
||||
| extraInitContainers | list | `[]` | Additional init containers to add to the deployment/statefulset ref: <https://kubernetes.io/docs/concepts/workloads/pods/init-containers/> |
|
||||
| extraResources | list | `[]` | Extra resources to deploy with Open WebUI |
|
||||
| hostAliases | list | `[]` | HostAliases to be added to hosts-file of each container |
|
||||
| image | object | `{"pullPolicy":"IfNotPresent","repository":"ghcr.io/open-webui/open-webui","tag":""}` | Open WebUI image tags can be found here: https://github.com/open-webui/open-webui |
|
||||
| imagePullSecrets | list | `[]` | Configure imagePullSecrets to use private registry ref: <https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry> |
|
||||
| ingress.additionalHosts | list | `[]` | |
|
||||
| ingress.annotations | object | `{}` | Use appropriate annotations for your Ingress controller, e.g., for NGINX: |
|
||||
| ingress.class | string | `""` | |
|
||||
| ingress.enabled | bool | `false` | |
|
||||
| ingress.existingSecret | string | `""` | |
|
||||
| ingress.extraLabels | object | `{}` | Additional custom labels to add to the Ingress metadata Useful for tagging, selecting, or applying policies to the Ingress via labels. |
|
||||
| ingress.host | string | `"chat.example.com"` | |
|
||||
| ingress.tls | bool | `false` | |
|
||||
| livenessProbe | object | `{}` | Probe for liveness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
|
||||
| managedCertificate.domains[0] | string | `"chat.example.com"` | |
|
||||
| managedCertificate.enabled | bool | `false` | |
|
||||
| managedCertificate.name | string | `"mydomain-chat-cert"` | |
|
||||
| nameOverride | string | `""` | |
|
||||
| namespaceOverride | string | `""` | |
|
||||
| nodeSelector | object | `{}` | Node labels for pod assignment. |
|
||||
| ollama.enabled | bool | `true` | Automatically install Ollama Helm chart from https://otwld.github.io/ollama-helm/. Use [Helm Values](https://github.com/otwld/ollama-helm/#helm-values) to configure |
|
||||
| ollama.fullnameOverride | string | `"open-webui-ollama"` | If enabling embedded Ollama, update fullnameOverride to your desired Ollama name value, or else it will use the default ollama.name value from the Ollama chart |
|
||||
| ollamaUrls | list | `[]` | A list of Ollama API endpoints. These can be added in lieu of automatically installing the Ollama Helm chart, or in addition to it. |
|
||||
| ollamaUrlsFromExtraEnv | bool | `false` | Disables taking Ollama Urls from `ollamaUrls` list |
|
||||
| openaiBaseApiUrl | string | `"https://api.openai.com/v1"` | OpenAI base API URL to use. Defaults to the Pipelines service endpoint when Pipelines are enabled, and "https://api.openai.com/v1" if Pipelines are not enabled and this value is blank |
|
||||
| openaiBaseApiUrls | list | `[]` | OpenAI base API URLs to use. Overwrites the value in openaiBaseApiUrl if set |
|
||||
| persistence.accessModes | list | `["ReadWriteOnce"]` | If using multiple replicas, you must update accessModes to ReadWriteMany |
|
||||
| persistence.annotations | object | `{}` | |
|
||||
| persistence.enabled | bool | `true` | |
|
||||
| persistence.existingClaim | string | `""` | Use existingClaim if you want to re-use an existing Open WebUI PVC instead of creating a new one |
|
||||
| persistence.provider | string | `"local"` | Sets the storage provider, availables values are `local`, `s3`, `gcs` or `azure` |
|
||||
| persistence.selector | object | `{}` | |
|
||||
| persistence.size | string | `"2Gi"` | |
|
||||
| persistence.storageClass | string | `""` | |
|
||||
| persistence.subPath | string | `""` | Subdirectory of Open WebUI PVC to mount. Useful if root directory is not empty. |
|
||||
| pipelines.enabled | bool | `true` | Automatically install Pipelines chart to extend Open WebUI functionality using Pipelines: https://github.com/open-webui/pipelines |
|
||||
| pipelines.extraEnvVars | list | `[]` | This section can be used to pass required environment variables to your pipelines (e.g. Langfuse hostname) |
|
||||
| podAnnotations | object | `{}` | |
|
||||
| podLabels | object | `{}` | |
|
||||
| podSecurityContext | object | `{}` | Configure pod security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container> |
|
||||
| priorityClassName | string | `""` | Priority class name for the Open WebUI pods |
|
||||
| readinessProbe | object | `{}` | Probe for readiness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
|
||||
| replicaCount | int | `1` | |
|
||||
| resources | object | `{}` | |
|
||||
| revisionHistoryLimit | int | `10` | Revision history limit for the workload manager (deployment). |
|
||||
| runtimeClassName | string | `""` | Configure runtime class ref: <https://kubernetes.io/docs/concepts/containers/runtime-class/> |
|
||||
| service | object | `{"annotations":{},"containerPort":8080,"labels":{},"loadBalancerClass":"","nodePort":"","port":80,"type":"ClusterIP"}` | Service values to expose Open WebUI pods to cluster |
|
||||
| serviceAccount.annotations | object | `{}` | |
|
||||
| serviceAccount.automountServiceAccountToken | bool | `false` | |
|
||||
| serviceAccount.enable | bool | `true` | |
|
||||
| serviceAccount.name | string | `""` | |
|
||||
| startupProbe | object | `{}` | Probe for startup of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
|
||||
| strategy | object | `{}` | Strategy for updating the workload manager: deployment or statefulset |
|
||||
| tika.enabled | bool | `false` | Automatically install Apache Tika to extend Open WebUI |
|
||||
| tolerations | list | `[]` | Tolerations for pod assignment |
|
||||
| topologySpreadConstraints | list | `[]` | Topology Spread Constraints for pod assignment |
|
||||
| volumeMounts | object | `{"container":[],"initContainer":[]}` | Configure container volume mounts ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
|
||||
| volumes | list | `[]` | Configure pod volumes ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
|
||||
| websocket.enabled | bool | `false` | Enables websocket support in Open WebUI with env `ENABLE_WEBSOCKET_SUPPORT` |
|
||||
| websocket.manager | string | `"redis"` | Specifies the websocket manager to use with env `WEBSOCKET_MANAGER`: redis (default) |
|
||||
| websocket.nodeSelector | object | `{}` | Node selector for websocket pods |
|
||||
| websocket.redis | object | `{"affinity":{},"annotations":{},"args":[],"command":[],"enabled":true,"image":{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"},"labels":{},"name":"open-webui-redis","pods":{"annotations":{},"labels":{}},"resources":{},"securityContext":{},"service":{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"},"tolerations":[]}` | Deploys a redis |
|
||||
| websocket.redis.affinity | object | `{}` | Redis affinity for pod assignment |
|
||||
| websocket.redis.annotations | object | `{}` | Redis annotations |
|
||||
| websocket.redis.args | list | `[]` | Redis arguments (overrides default) |
|
||||
| websocket.redis.command | list | `[]` | Redis command (overrides default) |
|
||||
| websocket.redis.enabled | bool | `true` | Enable redis installation |
|
||||
| websocket.redis.image | object | `{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"}` | Redis image |
|
||||
| websocket.redis.labels | object | `{}` | Redis labels |
|
||||
| websocket.redis.name | string | `"open-webui-redis"` | Redis name |
|
||||
| websocket.redis.pods | object | `{"annotations":{},"labels":{}}` | Redis pod |
|
||||
| websocket.redis.pods.annotations | object | `{}` | Redis pod annotations |
|
||||
| websocket.redis.pods.labels | object | `{}` | Redis pod labels |
|
||||
| websocket.redis.resources | object | `{}` | Redis resources |
|
||||
| websocket.redis.securityContext | object | `{}` | Redis security context |
|
||||
| websocket.redis.service | object | `{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"}` | Redis service |
|
||||
| websocket.redis.service.annotations | object | `{}` | Redis service annotations |
|
||||
| websocket.redis.service.containerPort | int | `6379` | Redis container/target port |
|
||||
| websocket.redis.service.labels | object | `{}` | Redis service labels |
|
||||
| websocket.redis.service.nodePort | string | `""` | Redis service node port. Valid only when type is `NodePort` |
|
||||
| websocket.redis.service.port | int | `6379` | Redis service port |
|
||||
| websocket.redis.service.portName | string | `"http"` | Redis service port name. Istio needs this to be something like `tcp-redis` |
|
||||
| websocket.redis.service.type | string | `"ClusterIP"` | Redis service type |
|
||||
| websocket.redis.tolerations | list | `[]` | Redis tolerations for pod assignment |
|
||||
| websocket.url | string | `"redis://open-webui-redis:6379/0"` | Specifies the URL of the Redis instance for websocket communication. Template with `redis://[:<password>@]<hostname>:<port>/<db>` |
|
||||
|
||||
----------------------------------------------
|
||||
|
||||
Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
|
||||
36
webui/open-webui/README.md.gotmpl
Normal file
36
webui/open-webui/README.md.gotmpl
Normal file
@ -0,0 +1,36 @@
|
||||
{{ template "chart.header" . }}
|
||||
|
||||
{{ template "chart.deprecationWarning" . }}
|
||||
|
||||
{{ template "chart.badgesSection" . }}
|
||||
|
||||
{{ template "chart.description" . }}
|
||||
|
||||
{{ template "chart.homepageLine" . }}
|
||||
|
||||
{{ template "chart.maintainersSection" . }}
|
||||
|
||||
{{ template "chart.sourcesSection" . }}
|
||||
|
||||
## Installing
|
||||
|
||||
Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
|
||||
|
||||
```shell
|
||||
helm repo add open-webui https://helm.openwebui.com/
|
||||
helm repo update
|
||||
```
|
||||
|
||||
Now you can install the chart:
|
||||
|
||||
```shell
|
||||
helm upgrade --install open-webui open-webui/open-webui
|
||||
```
|
||||
|
||||
{{ template "chart.requirementsSection" . }}
|
||||
|
||||
{{ template "chart.valuesSection" . }}
|
||||
|
||||
----------------------------------------------
|
||||
|
||||
Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
|
||||
30
webui/open-webui/charts/ollama/.helmignore
Normal file
30
webui/open-webui/charts/ollama/.helmignore
Normal file
@ -0,0 +1,30 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
.drone.yml
|
||||
*.tmproj
|
||||
.vscode/
|
||||
|
||||
#others
|
||||
.github
|
||||
kind-config.yml
|
||||
ci/
|
||||
|
||||
30
webui/open-webui/charts/ollama/.ollama-helm/.helmignore
Normal file
30
webui/open-webui/charts/ollama/.ollama-helm/.helmignore
Normal file
@ -0,0 +1,30 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
.drone.yml
|
||||
*.tmproj
|
||||
.vscode/
|
||||
|
||||
#others
|
||||
.github
|
||||
kind-config.yml
|
||||
ci/
|
||||
|
||||
33
webui/open-webui/charts/ollama/.ollama-helm/Chart.yaml
Normal file
33
webui/open-webui/charts/ollama/.ollama-helm/Chart.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
apiVersion: v2
|
||||
name: ollama
|
||||
description: Get up and running with large language models locally.
|
||||
|
||||
type: application
|
||||
|
||||
version: 1.27.0
|
||||
|
||||
appVersion: "0.11.4"
|
||||
|
||||
annotations:
|
||||
artifacthub.io/category: ai-machine-learning
|
||||
artifacthub.io/changes: |
|
||||
- kind: changed
|
||||
description: upgrade app version to 0.11.4
|
||||
links:
|
||||
- name: Ollama release v0.11.4
|
||||
url: https://github.com/ollama/ollama/releases/tag/v0.11.4
|
||||
|
||||
kubeVersion: "^1.16.0-0"
|
||||
home: https://ollama.ai/
|
||||
icon: https://ollama.ai/public/ollama.png
|
||||
keywords:
|
||||
- ai
|
||||
- llm
|
||||
- llama
|
||||
- mistral
|
||||
sources:
|
||||
- https://github.com/ollama/ollama
|
||||
- https://github.com/otwld/ollama-helm
|
||||
maintainers:
|
||||
- name: OTWLD
|
||||
email: contact@otwld.com
|
||||
21
webui/open-webui/charts/ollama/.ollama-helm/LICENSE
Normal file
21
webui/open-webui/charts/ollama/.ollama-helm/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 OTWLD
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
331
webui/open-webui/charts/ollama/.ollama-helm/README.md
Normal file
331
webui/open-webui/charts/ollama/.ollama-helm/README.md
Normal file
@ -0,0 +1,331 @@
|
||||

|
||||
|
||||

|
||||
[](https://artifacthub.io/packages/helm/ollama-helm/ollama)
|
||||
[](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml)
|
||||
[](https://discord.gg/U24mpqTynB)
|
||||
|
||||
[Ollama](https://ollama.ai/), get up and running with large language models, locally.
|
||||
|
||||
This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama).
|
||||
|
||||
## Requirements
|
||||
|
||||
- Kubernetes: `>= 1.16.0-0` for **CPU only**
|
||||
|
||||
- Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD)
|
||||
|
||||
*Not all GPUs are currently supported with ollama (especially with AMD)*
|
||||
|
||||
## Deploying Ollama chart
|
||||
|
||||
To install the `ollama` chart in the `ollama` namespace:
|
||||
|
||||
> [!IMPORTANT]
|
||||
> We are migrating the registry from https://otwld.github.io/ollama-helm/ url to OTWLD Helm central
|
||||
> registry https://helm.otwld.com/
|
||||
> Please update your Helm registry accordingly.
|
||||
|
||||
```console
|
||||
helm repo add otwld https://helm.otwld.com/
|
||||
helm repo update
|
||||
helm install ollama otwld/ollama --namespace ollama --create-namespace
|
||||
```
|
||||
|
||||
## Upgrading Ollama chart
|
||||
|
||||
First please read the [release notes](https://github.com/ollama/ollama/releases) of Ollama to make sure there are no
|
||||
backwards incompatible changes.
|
||||
|
||||
Make adjustments to your values as needed, then run `helm upgrade`:
|
||||
|
||||
```console
|
||||
# -- This pulls the latest version of the ollama chart from the repo.
|
||||
helm repo update
|
||||
helm upgrade ollama otwld/ollama --namespace ollama --values values.yaml
|
||||
```
|
||||
|
||||
## Uninstalling Ollama chart
|
||||
|
||||
To uninstall/delete the `ollama` deployment in the `ollama` namespace:
|
||||
|
||||
```console
|
||||
helm delete ollama --namespace ollama
|
||||
```
|
||||
|
||||
Substitute your values if they differ from the examples. See `helm delete --help` for a full reference on `delete`
|
||||
parameters and flags.
|
||||
|
||||
## Interact with Ollama
|
||||
|
||||
- **Ollama documentation can be found [HERE](https://github.com/ollama/ollama/tree/main/docs)**
|
||||
- Interact with RESTful API: [Ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
||||
- Interact with official clients libraries: [ollama-js](https://github.com/ollama/ollama-js#custom-client)
|
||||
and [ollama-python](https://github.com/ollama/ollama-python#custom-client)
|
||||
- Interact with langchain: [langchain-js](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainjs.md)
|
||||
and [langchain-python](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md)
|
||||
|
||||
## Examples
|
||||
|
||||
- **It's highly recommended to run an updated version of Kubernetes for deploying ollama with GPU**
|
||||
|
||||
### Basic values.yaml example with GPU and two models pulled at startup
|
||||
|
||||
```
|
||||
ollama:
|
||||
gpu:
|
||||
# -- Enable GPU integration
|
||||
enabled: true
|
||||
|
||||
# -- GPU type: 'nvidia' or 'amd'
|
||||
type: 'nvidia'
|
||||
|
||||
# -- Specify the number of GPU to 1
|
||||
number: 1
|
||||
|
||||
# -- List of models to pull at container startup
|
||||
models:
|
||||
pull:
|
||||
- mistral
|
||||
- llama2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Basic values.yaml example with Ingress
|
||||
|
||||
```
|
||||
ollama:
|
||||
models:
|
||||
pull:
|
||||
- llama2
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
hosts:
|
||||
- host: ollama.domain.lan
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
```
|
||||
|
||||
- *API is now reachable at `ollama.domain.lan`*
|
||||
|
||||
---
|
||||
|
||||
### Create and run model from template
|
||||
|
||||
```
|
||||
ollama:
|
||||
models:
|
||||
create:
|
||||
- name: llama3.1-ctx32768
|
||||
template: |
|
||||
FROM llama3.1
|
||||
PARAMETER num_ctx 32768
|
||||
run:
|
||||
- llama3.1-ctx32768
|
||||
```
|
||||
|
||||
## Upgrading from 0.X.X to 1.X.X
|
||||
|
||||
The version 1.X.X introduces the ability to load models in memory at startup, the values have been changed.
|
||||
|
||||
Please change `ollama.models` to `ollama.models.pull` to avoid errors before upgrading:
|
||||
|
||||
```yaml
|
||||
ollama:
|
||||
models:
|
||||
- mistral
|
||||
- llama2
|
||||
```
|
||||
|
||||
To:
|
||||
|
||||
```yaml
|
||||
ollama:
|
||||
models:
|
||||
pull:
|
||||
- mistral
|
||||
- llama2
|
||||
```
|
||||
|
||||
## Helm Values
|
||||
|
||||
- See [values.yaml](values.yaml) to see the Chart's default values.
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|--------------------------------------------|--------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| affinity | object | `{}` | Affinity for pod assignment |
|
||||
| autoscaling.enabled | bool | `false` | Enable autoscaling |
|
||||
| autoscaling.maxReplicas | int | `100` | Number of maximum replicas |
|
||||
| autoscaling.minReplicas | int | `1` | Number of minimum replicas |
|
||||
| autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU usage to target replica |
|
||||
| deployment.labels | object | `{}` | Labels to add to the deployment |
|
||||
| extraArgs | list | `[]` | Additional arguments on the output Deployment definition. |
|
||||
| extraEnv | list | `[]` | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go |
|
||||
| extraEnvFrom | list | `[]` | Additionl environment variables from external sources (like ConfigMap) |
|
||||
| extraObjects | list | `[]` | Extra K8s manifests to deploy |
|
||||
| fullnameOverride | string | `""` | String to fully override template |
|
||||
| hostIPC | bool | `false` | Use the host’s ipc namespace. |
|
||||
| hostNetwork | bool | `false` | Use the host's network namespace. |
|
||||
| hostPID | bool | `false` | Use the host’s pid namespace |
|
||||
| image.pullPolicy | string | `"IfNotPresent"` | Docker pull policy |
|
||||
| image.repository | string | `"ollama/ollama"` | Docker image registry |
|
||||
| image.tag | string | `""` | Docker image tag, overrides the image tag whose default is the chart appVersion. |
|
||||
| imagePullSecrets | list | `[]` | Docker registry secret names as an array |
|
||||
| ingress.annotations | object | `{}` | Additional annotations for the Ingress resource. |
|
||||
| ingress.className | string | `""` | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) |
|
||||
| ingress.enabled | bool | `false` | Enable ingress controller resource |
|
||||
| ingress.hosts[0].host | string | `"ollama.local"` | |
|
||||
| ingress.hosts[0].paths[0].path | string | `"/"` | |
|
||||
| ingress.hosts[0].paths[0].pathType | string | `"Prefix"` | |
|
||||
| ingress.tls | list | `[]` | The tls configuration for hostnames to be covered with this ingress record. |
|
||||
| initContainers | list | `[]` | Init containers to add to the pod |
|
||||
| knative.annotations | object | `{}` | Knative service annotations |
|
||||
| knative.containerConcurrency | int | `0` | Knative service container concurrency |
|
||||
| knative.enabled | bool | `false` | Enable Knative integration |
|
||||
| knative.idleTimeoutSeconds | int | `300` | Knative service idle timeout seconds |
|
||||
| knative.responseStartTimeoutSeconds | int | `300` | Knative service response start timeout seconds |
|
||||
| knative.timeoutSeconds | int | `300` | Knative service timeout seconds |
|
||||
| lifecycle | object | `{}` | Lifecycle for pod assignment (override ollama.models startup pull/run) |
|
||||
| livenessProbe.enabled | bool | `true` | Enable livenessProbe |
|
||||
| livenessProbe.failureThreshold | int | `6` | Failure threshold for livenessProbe |
|
||||
| livenessProbe.initialDelaySeconds | int | `60` | Initial delay seconds for livenessProbe |
|
||||
| livenessProbe.path | string | `"/"` | Request path for livenessProbe |
|
||||
| livenessProbe.periodSeconds | int | `10` | Period seconds for livenessProbe |
|
||||
| livenessProbe.successThreshold | int | `1` | Success threshold for livenessProbe |
|
||||
| livenessProbe.timeoutSeconds | int | `5` | Timeout seconds for livenessProbe |
|
||||
| nameOverride | string | `""` | String to partially override template (will maintain the release name) |
|
||||
| namespaceOverride | string | `""` | String to fully override namespace |
|
||||
| nodeSelector | object | `{}` | Node labels for pod assignment. |
|
||||
| ollama.gpu.draDriverClass | string | `"gpu.nvidia.com"` | DRA GPU DriverClass |
|
||||
| ollama.gpu.draEnabled | bool | `false` | Enable DRA GPU integration If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters |
|
||||
| ollama.gpu.draExistingClaimTemplate | string | `""` | Existing DRA GPU ResourceClaim Template |
|
||||
| ollama.gpu.enabled | bool | `false` | Enable GPU integration |
|
||||
| ollama.gpu.mig.devices | object | `{}` | Specify the mig devices and the corresponding number |
|
||||
| ollama.gpu.mig.enabled | bool | `false` | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored |
|
||||
| ollama.gpu.number | int | `1` | Specify the number of GPU If you use MIG section below then this parameter is ignored |
|
||||
| ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice |
|
||||
| ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images |
|
||||
| ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup |
|
||||
| ollama.models.clean | bool | `false` | Automatically remove models present on the disk but not specified in the values file |
|
||||
| ollama.models.create | list | `[]` | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create: - name: llama3.1-ctx32768 configMapRef: my-configmap configMapKeyRef: configmap-key - name: llama3.1-ctx32768 template: | FROM llama3.1 PARAMETER num_ctx 32768 |
|
||||
| ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral |
|
||||
| ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral |
|
||||
| ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" |
|
||||
| ollama.port | int | `11434` | |
|
||||
| persistentVolume.accessModes | list | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ |
|
||||
| persistentVolume.annotations | object | `{}` | Ollama server data Persistent Volume annotations |
|
||||
| persistentVolume.enabled | bool | `false` | Enable persistence using PVC |
|
||||
| persistentVolume.existingClaim | string | `""` | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true |
|
||||
| persistentVolume.size | string | `"30Gi"` | Ollama server data Persistent Volume size |
|
||||
| persistentVolume.storageClass | string | `""` | Ollama server data Persistent Volume Storage Class If defined, storageClassName: <storageClass> If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner. (gp2 on AWS, standard on GKE, AWS & OpenStack) |
|
||||
| persistentVolume.subPath | string | `""` | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty |
|
||||
| persistentVolume.volumeMode | string | `""` | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: <volumeMode> If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode. |
|
||||
| persistentVolume.volumeName | string | `""` | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward |
|
||||
| podAnnotations | object | `{}` | Map of annotations to add to the pods |
|
||||
| podLabels | object | `{}` | Map of labels to add to the pods |
|
||||
| podSecurityContext | object | `{}` | Pod Security Context |
|
||||
| priorityClassName | string | `""` | Priority Class Name |
|
||||
| readinessProbe.enabled | bool | `true` | Enable readinessProbe |
|
||||
| readinessProbe.failureThreshold | int | `6` | Failure threshold for readinessProbe |
|
||||
| readinessProbe.initialDelaySeconds | int | `30` | Initial delay seconds for readinessProbe |
|
||||
| readinessProbe.path | string | `"/"` | Request path for readinessProbe |
|
||||
| readinessProbe.periodSeconds | int | `5` | Period seconds for readinessProbe |
|
||||
| readinessProbe.successThreshold | int | `1` | Success threshold for readinessProbe |
|
||||
| readinessProbe.timeoutSeconds | int | `3` | Timeout seconds for readinessProbe |
|
||||
| replicaCount | int | `1` | Number of replicas |
|
||||
| resources.limits | object | `{}` | Pod limit |
|
||||
| resources.requests | object | `{}` | Pod requests |
|
||||
| runtimeClassName | string | `""` | Specify runtime class |
|
||||
| securityContext | object | `{}` | Container Security Context |
|
||||
| service.annotations | object | `{}` | Annotations to add to the service |
|
||||
| service.labels | object | `{}` | Labels to add to the service |
|
||||
| service.loadBalancerIP | string | `nil` | Load Balancer IP address |
|
||||
| service.nodePort | int | `31434` | Service node port when service type is 'NodePort' |
|
||||
| service.port | int | `11434` | Service port |
|
||||
| service.type | string | `"ClusterIP"` | Service type |
|
||||
| serviceAccount.annotations | object | `{}` | Annotations to add to the service account |
|
||||
| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? |
|
||||
| serviceAccount.create | bool | `true` | Specifies whether a service account should be created |
|
||||
| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template |
|
||||
| terminationGracePeriodSeconds | int | `120` | Wait for a grace period |
|
||||
| tests.annotations | object | `{}` | Annotations to add to the tests |
|
||||
| tests.enabled | bool | `true` | |
|
||||
| tests.labels | object | `{}` | Labels to add to the tests |
|
||||
| tolerations | list | `[]` | Tolerations for pod assignment |
|
||||
| topologySpreadConstraints | object | `{}` | Topology Spread Constraints for pod assignment |
|
||||
| updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate |
|
||||
| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. |
|
||||
| volumes | list | `[]` | Additional volumes on the output Deployment definition. |
|
||||
|
||||
----------------------------------------------
|
||||
|
||||
## Core team
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td align="center">
|
||||
<a href="https://github.com/jdetroyes"
|
||||
><img
|
||||
src="https://github.com/jdetroyes.png?size=200"
|
||||
width="50"
|
||||
style="margin-bottom: -4px; border-radius: 8px;"
|
||||
alt="Jean Baptiste Detroyes"
|
||||
/><br /><b> Jean Baptiste Detroyes </b></a
|
||||
>
|
||||
<div style="margin-top: 4px">
|
||||
<a href="https://github.com/jdetroyes" title="Github"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
|
||||
/></a>
|
||||
<a
|
||||
href="mailto:jdetroyes@otwld.com"
|
||||
title="Email"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
|
||||
/></a>
|
||||
</div>
|
||||
</td>
|
||||
<td align="center">
|
||||
<a href="https://github.com/ntrehout"
|
||||
><img
|
||||
src="https://github.com/ntrehout.png?size=200"
|
||||
width="50"
|
||||
style="margin-bottom: -4px; border-radius: 8px;"
|
||||
alt="Jean Baptiste Detroyes"
|
||||
/><br /><b> Nathan Tréhout </b></a
|
||||
>
|
||||
<div style="margin-top: 4px">
|
||||
<a href="https://x.com/n_trehout" title="Twitter"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/twitter.svg"
|
||||
/></a>
|
||||
<a href="https://github.com/ntrehout" title="Github"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
|
||||
/></a>
|
||||
<a
|
||||
href="mailto:ntrehout@otwld.com"
|
||||
title="Email"
|
||||
><img
|
||||
width="16"
|
||||
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
|
||||
/></a>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
## Support
|
||||
|
||||
- For questions, suggestions, and discussion about Ollama please refer to
|
||||
the [Ollama issue page](https://github.com/ollama/ollama/issues)
|
||||
- For questions, suggestions, and discussion about this chart please
|
||||
visit [Ollama-Helm issue page](https://github.com/otwld/ollama-helm/issues) or join
|
||||
our [OTWLD Discord](https://discord.gg/U24mpqTynB)
|
||||
@ -0,0 +1,25 @@
|
||||
1. Get the application URL by running these commands:
|
||||
{{- if .Values.knative.enabled }}
|
||||
export KSERVICE_URL=$(kubectl get ksvc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} -o jsonpath={.status.url})
|
||||
echo "Visit $KSERVICE_URL to use your application"
|
||||
{{- else if .Values.ingress.enabled }}
|
||||
{{- range $host := .Values.ingress.hosts }}
|
||||
{{- range .paths }}
|
||||
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- else if contains "NodePort" .Values.service.type }}
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "ollama.fullname" . }})
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "ollama.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "ollama.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
@ -0,0 +1,80 @@
|
||||
{{/*
|
||||
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
|
||||
*/}}
|
||||
{{- define "ollama.namespace" -}}
|
||||
{{- if .Values.namespaceOverride -}}
|
||||
{{- .Values.namespaceOverride -}}
|
||||
{{- else -}}
|
||||
{{- .Release.Namespace -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "ollama.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "ollama.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "ollama.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "ollama.labels" -}}
|
||||
helm.sh/chart: {{ include "ollama.chart" . }}
|
||||
{{ include "ollama.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "ollama.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "ollama.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "ollama.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "ollama.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Models mount path
|
||||
*/}}
|
||||
{{- define "ollama.modelsMountPath" -}}
|
||||
{{- printf "%s/models" (((.Values).ollama).mountPath | default "/root/.ollama") }}
|
||||
{{- end -}}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user