first backup of charts
This commit is contained in:
1
code/codeserver
Submodule
1
code/codeserver
Submodule
Submodule code/codeserver added at b59a4f7366
55
code/metadata.yaml
Normal file
55
code/metadata.yaml
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
|
||||||
|
application_name: &application_name code
|
||||||
|
|
||||||
|
distributed:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: codeserver/ci/helm-chart
|
||||||
|
sets:
|
||||||
|
image:
|
||||||
|
repository: codercom/code-server
|
||||||
|
tag: '4.103.2'
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: 0
|
||||||
|
nodeSelector:
|
||||||
|
resource-group: gpu_5880
|
||||||
|
service:
|
||||||
|
type: NodePort
|
||||||
|
port: 8080
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: ~
|
||||||
|
port: 30083
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: *application_name
|
||||||
|
monolithic:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: codeserver/ci/helm-chart
|
||||||
|
sets:
|
||||||
|
image:
|
||||||
|
repository: codercom/code-server
|
||||||
|
tag: '4.103.2'
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: 1
|
||||||
|
nodeSelector:
|
||||||
|
resource-group: gpu_5880
|
||||||
|
service:
|
||||||
|
type: NodePort
|
||||||
|
port: 8080
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: ~
|
||||||
|
port: 30083
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: *application_name
|
||||||
47
edgetts/metadata.yaml
Normal file
47
edgetts/metadata.yaml
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
|
||||||
|
application_name: &application_name edgetts
|
||||||
|
|
||||||
|
distributed:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: test-tts
|
||||||
|
sets:
|
||||||
|
image:
|
||||||
|
repository: travisvn/openai-edge-tts
|
||||||
|
tag: "latest"
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
service:
|
||||||
|
type: NodePort
|
||||||
|
port: 5050
|
||||||
|
nodePort: 30250
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: ~
|
||||||
|
port: 30250
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: *application_name
|
||||||
|
monolithic:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: test-tts
|
||||||
|
sets:
|
||||||
|
image:
|
||||||
|
repository: travisvn/openai-edge-tts
|
||||||
|
tag: "latest"
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
service:
|
||||||
|
type: NodePort
|
||||||
|
port: 5050
|
||||||
|
nodePort: 30250
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: ~
|
||||||
|
port: 30250
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: *application_name
|
||||||
23
edgetts/test-tts/.helmignore
Normal file
23
edgetts/test-tts/.helmignore
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
24
edgetts/test-tts/Chart.yaml
Normal file
24
edgetts/test-tts/Chart.yaml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
name: test-tts
|
||||||
|
description: A Helm chart for Kubernetes
|
||||||
|
|
||||||
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
#
|
||||||
|
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||||
|
# to be deployed.
|
||||||
|
#
|
||||||
|
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||||
|
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||||
|
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||||
|
type: application
|
||||||
|
|
||||||
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
|
# to the chart and its templates, including the app version.
|
||||||
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
|
version: 0.1.0
|
||||||
|
|
||||||
|
# This is the version number of the application being deployed. This version number should be
|
||||||
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
|
# It is recommended to use it with quotes.
|
||||||
|
appVersion: "1.16.0"
|
||||||
22
edgetts/test-tts/templates/NOTES.txt
Normal file
22
edgetts/test-tts/templates/NOTES.txt
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
1. Get the application URL by running these commands:
|
||||||
|
{{- if .Values.ingress.enabled }}
|
||||||
|
{{- range $host := .Values.ingress.hosts }}
|
||||||
|
{{- range .paths }}
|
||||||
|
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- else if contains "NodePort" .Values.service.type }}
|
||||||
|
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "test-tts.fullname" . }})
|
||||||
|
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||||
|
echo http://$NODE_IP:$NODE_PORT
|
||||||
|
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||||
|
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||||
|
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "test-tts.fullname" . }}'
|
||||||
|
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "test-tts.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||||
|
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||||
|
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||||
|
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "test-tts.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||||
|
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||||
|
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||||
|
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||||
|
{{- end }}
|
||||||
62
edgetts/test-tts/templates/_helpers.tpl
Normal file
62
edgetts/test-tts/templates/_helpers.tpl
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "test-tts.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "test-tts.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride }}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||||
|
{{- if contains $name .Release.Name }}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart name and version as used by the chart label.
|
||||||
|
*/}}
|
||||||
|
{{- define "test-tts.chart" -}}
|
||||||
|
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Common labels
|
||||||
|
*/}}
|
||||||
|
{{- define "test-tts.labels" -}}
|
||||||
|
helm.sh/chart: {{ include "test-tts.chart" . }}
|
||||||
|
{{ include "test-tts.selectorLabels" . }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Selector labels
|
||||||
|
*/}}
|
||||||
|
{{- define "test-tts.selectorLabels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "test-tts.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account to use
|
||||||
|
*/}}
|
||||||
|
{{- define "test-tts.serviceAccountName" -}}
|
||||||
|
{{- if .Values.serviceAccount.create }}
|
||||||
|
{{- default (include "test-tts.fullname" .) .Values.serviceAccount.name }}
|
||||||
|
{{- else }}
|
||||||
|
{{- default "default" .Values.serviceAccount.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
78
edgetts/test-tts/templates/deployment.yaml
Normal file
78
edgetts/test-tts/templates/deployment.yaml
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ include "test-tts.fullname" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "test-tts.labels" . | nindent 4 }}
|
||||||
|
spec:
|
||||||
|
{{- if not .Values.autoscaling.enabled }}
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
{{- end }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "test-tts.selectorLabels" . | nindent 6 }}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
{{- with .Values.podAnnotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
labels:
|
||||||
|
{{- include "test-tts.labels" . | nindent 8 }}
|
||||||
|
{{- with .Values.podLabels }}
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
{{- with .Values.imagePullSecrets }}
|
||||||
|
imagePullSecrets:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
serviceAccountName: {{ include "test-tts.serviceAccountName" . }}
|
||||||
|
{{- with .Values.podSecurityContext }}
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
containers:
|
||||||
|
- name: {{ .Chart.Name }}
|
||||||
|
{{- with .Values.securityContext }}
|
||||||
|
securityContext:
|
||||||
|
{{- toYaml . | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||||
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: {{ .Values.service.port }}
|
||||||
|
protocol: TCP
|
||||||
|
{{- with .Values.livenessProbe }}
|
||||||
|
livenessProbe:
|
||||||
|
{{- toYaml . | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.readinessProbe }}
|
||||||
|
readinessProbe:
|
||||||
|
{{- toYaml . | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.resources }}
|
||||||
|
resources:
|
||||||
|
{{- toYaml . | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.volumeMounts }}
|
||||||
|
volumeMounts:
|
||||||
|
{{- toYaml . | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.volumes }}
|
||||||
|
volumes:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
32
edgetts/test-tts/templates/hpa.yaml
Normal file
32
edgetts/test-tts/templates/hpa.yaml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
{{- if .Values.autoscaling.enabled }}
|
||||||
|
apiVersion: autoscaling/v2
|
||||||
|
kind: HorizontalPodAutoscaler
|
||||||
|
metadata:
|
||||||
|
name: {{ include "test-tts.fullname" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "test-tts.labels" . | nindent 4 }}
|
||||||
|
spec:
|
||||||
|
scaleTargetRef:
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
name: {{ include "test-tts.fullname" . }}
|
||||||
|
minReplicas: {{ .Values.autoscaling.minReplicas }}
|
||||||
|
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
|
||||||
|
metrics:
|
||||||
|
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||||
|
- type: Resource
|
||||||
|
resource:
|
||||||
|
name: cpu
|
||||||
|
target:
|
||||||
|
type: Utilization
|
||||||
|
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||||
|
- type: Resource
|
||||||
|
resource:
|
||||||
|
name: memory
|
||||||
|
target:
|
||||||
|
type: Utilization
|
||||||
|
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
43
edgetts/test-tts/templates/ingress.yaml
Normal file
43
edgetts/test-tts/templates/ingress.yaml
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
{{- if .Values.ingress.enabled -}}
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: {{ include "test-tts.fullname" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "test-tts.labels" . | nindent 4 }}
|
||||||
|
{{- with .Values.ingress.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
{{- with .Values.ingress.className }}
|
||||||
|
ingressClassName: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ingress.tls }}
|
||||||
|
tls:
|
||||||
|
{{- range .Values.ingress.tls }}
|
||||||
|
- hosts:
|
||||||
|
{{- range .hosts }}
|
||||||
|
- {{ . | quote }}
|
||||||
|
{{- end }}
|
||||||
|
secretName: {{ .secretName }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
rules:
|
||||||
|
{{- range .Values.ingress.hosts }}
|
||||||
|
- host: {{ .host | quote }}
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
{{- range .paths }}
|
||||||
|
- path: {{ .path }}
|
||||||
|
{{- with .pathType }}
|
||||||
|
pathType: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: {{ include "test-tts.fullname" $ }}
|
||||||
|
port:
|
||||||
|
number: {{ $.Values.service.port }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
16
edgetts/test-tts/templates/service.yaml
Normal file
16
edgetts/test-tts/templates/service.yaml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ include "test-tts.fullname" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "test-tts.labels" . | nindent 4 }}
|
||||||
|
spec:
|
||||||
|
type: {{ .Values.service.type }}
|
||||||
|
ports:
|
||||||
|
- port: {{ .Values.service.port }}
|
||||||
|
targetPort: http
|
||||||
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
nodePort: {{ .Values.service.nodePort }}
|
||||||
|
selector:
|
||||||
|
{{- include "test-tts.selectorLabels" . | nindent 4 }}
|
||||||
13
edgetts/test-tts/templates/serviceaccount.yaml
Normal file
13
edgetts/test-tts/templates/serviceaccount.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
{{- if .Values.serviceAccount.create -}}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: {{ include "test-tts.serviceAccountName" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "test-tts.labels" . | nindent 4 }}
|
||||||
|
{{- with .Values.serviceAccount.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
||||||
|
{{- end }}
|
||||||
15
edgetts/test-tts/templates/tests/test-connection.yaml
Normal file
15
edgetts/test-tts/templates/tests/test-connection.yaml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: "{{ include "test-tts.fullname" . }}-test-connection"
|
||||||
|
labels:
|
||||||
|
{{- include "test-tts.labels" . | nindent 4 }}
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": test
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: wget
|
||||||
|
image: busybox
|
||||||
|
command: ['wget']
|
||||||
|
args: ['{{ include "test-tts.fullname" . }}:{{ .Values.service.port }}']
|
||||||
|
restartPolicy: Never
|
||||||
124
edgetts/test-tts/values.yaml
Normal file
124
edgetts/test-tts/values.yaml
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
# Default values for test-tts.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
|
||||||
|
replicaCount: 1
|
||||||
|
|
||||||
|
# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
|
||||||
|
image:
|
||||||
|
repository: travisvn/openai-edge-tts
|
||||||
|
# This sets the pull policy for images.
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
# Overrides the image tag whose default is the chart appVersion.
|
||||||
|
tag: "latest"
|
||||||
|
|
||||||
|
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
|
||||||
|
imagePullSecrets: []
|
||||||
|
# This is to override the chart name.
|
||||||
|
nameOverride: "edgetts"
|
||||||
|
fullnameOverride: ""
|
||||||
|
|
||||||
|
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
|
||||||
|
serviceAccount:
|
||||||
|
# Specifies whether a service account should be created
|
||||||
|
create: true
|
||||||
|
# Automatically mount a ServiceAccount's API credentials?
|
||||||
|
automount: true
|
||||||
|
# Annotations to add to the service account
|
||||||
|
annotations: {}
|
||||||
|
# The name of the service account to use.
|
||||||
|
# If not set and create is true, a name is generated using the fullname template
|
||||||
|
name: ""
|
||||||
|
|
||||||
|
# This is for setting Kubernetes Annotations to a Pod.
|
||||||
|
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
|
||||||
|
podAnnotations: {}
|
||||||
|
# This is for setting Kubernetes Labels to a Pod.
|
||||||
|
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
|
||||||
|
podLabels: {}
|
||||||
|
|
||||||
|
podSecurityContext: {}
|
||||||
|
# fsGroup: 2000
|
||||||
|
|
||||||
|
securityContext: {}
|
||||||
|
# capabilities:
|
||||||
|
# drop:
|
||||||
|
# - ALL
|
||||||
|
# readOnlyRootFilesystem: true
|
||||||
|
# runAsNonRoot: true
|
||||||
|
# runAsUser: 1000
|
||||||
|
|
||||||
|
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
|
||||||
|
service:
|
||||||
|
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
|
||||||
|
type: NodePort
|
||||||
|
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
|
||||||
|
port: 5050
|
||||||
|
nodePort: 30250
|
||||||
|
|
||||||
|
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||||
|
ingress:
|
||||||
|
enabled: false
|
||||||
|
className: ""
|
||||||
|
annotations: {}
|
||||||
|
# kubernetes.io/ingress.class: nginx
|
||||||
|
# kubernetes.io/tls-acme: "true"
|
||||||
|
hosts:
|
||||||
|
- host: chart-example.local
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: ImplementationSpecific
|
||||||
|
tls: []
|
||||||
|
# - secretName: chart-example-tls
|
||||||
|
# hosts:
|
||||||
|
# - chart-example.local
|
||||||
|
|
||||||
|
resources: {}
|
||||||
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||||
|
# choice for the user. This also increases chances charts run on environments with little
|
||||||
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||||
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||||
|
# limits:
|
||||||
|
# cpu: 100m
|
||||||
|
# memory: 128Mi
|
||||||
|
# requests:
|
||||||
|
# cpu: 100m
|
||||||
|
# memory: 128Mi
|
||||||
|
|
||||||
|
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
||||||
|
#livenessProbe:
|
||||||
|
# httpGet:
|
||||||
|
# path: /
|
||||||
|
# port: http
|
||||||
|
#readinessProbe:
|
||||||
|
# httpGet:
|
||||||
|
# path: /
|
||||||
|
# port: http
|
||||||
|
|
||||||
|
# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
|
||||||
|
autoscaling:
|
||||||
|
enabled: false
|
||||||
|
minReplicas: 1
|
||||||
|
maxReplicas: 100
|
||||||
|
targetCPUUtilizationPercentage: 80
|
||||||
|
# targetMemoryUtilizationPercentage: 80
|
||||||
|
|
||||||
|
# Additional volumes on the output Deployment definition.
|
||||||
|
volumes: []
|
||||||
|
# - name: foo
|
||||||
|
# secret:
|
||||||
|
# secretName: mysecret
|
||||||
|
# optional: false
|
||||||
|
|
||||||
|
# Additional volumeMounts on the output Deployment definition.
|
||||||
|
volumeMounts: []
|
||||||
|
# - name: foo
|
||||||
|
# mountPath: "/etc/foo"
|
||||||
|
# readOnly: true
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
affinity: {}
|
||||||
23
jarvis/jarvis/.helmignore
Normal file
23
jarvis/jarvis/.helmignore
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
24
jarvis/jarvis/Chart.yaml
Normal file
24
jarvis/jarvis/Chart.yaml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
name: jarvis
|
||||||
|
description: A Helm chart for Kubernetes
|
||||||
|
|
||||||
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
#
|
||||||
|
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||||
|
# to be deployed.
|
||||||
|
#
|
||||||
|
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||||
|
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||||
|
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||||
|
type: application
|
||||||
|
|
||||||
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
|
# to the chart and its templates, including the app version.
|
||||||
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
|
version: 0.1.0
|
||||||
|
|
||||||
|
# This is the version number of the application being deployed. This version number should be
|
||||||
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
|
# It is recommended to use it with quotes.
|
||||||
|
appVersion: "1.16.0"
|
||||||
22
jarvis/jarvis/templates/NOTES.txt
Normal file
22
jarvis/jarvis/templates/NOTES.txt
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
1. Get the application URL by running these commands:
|
||||||
|
{{- if .Values.ingress.enabled }}
|
||||||
|
{{- range $host := .Values.ingress.hosts }}
|
||||||
|
{{- range .paths }}
|
||||||
|
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- else if contains "NodePort" .Values.service.type }}
|
||||||
|
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
|
||||||
|
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||||
|
echo http://$NODE_IP:$NODE_PORT
|
||||||
|
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||||
|
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||||
|
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
|
||||||
|
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||||
|
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||||
|
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||||
|
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||||
|
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||||
|
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||||
|
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||||
|
{{- end }}
|
||||||
62
jarvis/jarvis/templates/_helpers.tpl
Normal file
62
jarvis/jarvis/templates/_helpers.tpl
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride }}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||||
|
{{- if contains $name .Release.Name }}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart name and version as used by the chart label.
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.chart" -}}
|
||||||
|
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Common labels
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.labels" -}}
|
||||||
|
helm.sh/chart: {{ include "jarvis.chart" . }}
|
||||||
|
{{ include "jarvis.selectorLabels" . }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Selector labels
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.selectorLabels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "jarvis.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account to use
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.serviceAccountName" -}}
|
||||||
|
{{- if .Values.serviceAccount.create }}
|
||||||
|
{{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
|
||||||
|
{{- else }}
|
||||||
|
{{- default "default" .Values.serviceAccount.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
17
jarvis/jarvis/templates/images-pv.yaml
Normal file
17
jarvis/jarvis/templates/images-pv.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# pv.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-pv # PV 的名称,可以自定义
|
||||||
|
spec:
|
||||||
|
storageClassName: local-path # 添加这一行,与上面StorageClass的名称一致
|
||||||
|
capacity:
|
||||||
|
storage: 500Gi # PV 的容量,可以根据 NFS 共享的实际大小或预期使用量调整
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany # 访问模式:
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
|
||||||
|
nfs:
|
||||||
|
path: /volume1/Dataset/PVStore/lab-data-dataset-pvc-ec4aba12-c683-4168-b335-7b1a8819581a/Private/cache-images # NFS 服务器上共享的路径
|
||||||
|
server: 10.6.80.11 # NFS 服务器的 IP 地址或主机名
|
||||||
|
|
||||||
14
jarvis/jarvis/templates/images-pvc.yaml
Normal file
14
jarvis/jarvis/templates/images-pvc.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# pvc.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-pvc # PVC 的名称,Pod 会引用这个名称
|
||||||
|
#namespace: default # PVC 所在的命名空间,通常是 default 或你自定义的命名空间
|
||||||
|
spec:
|
||||||
|
storageClassName: local-path # 添加这一行,与PV和StorageClass的名称一致
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany # 访问模式,必须与 PV 的 accessModes 匹配或更宽松
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 50Gi # PVC 请求的存储容量,必须小于或等于 PV 的容量
|
||||||
|
volumeName: {{ .Release.Name }}-pv # 明确指定要绑定的 PV 的名称,这是手动绑定 PV 的关键
|
||||||
68
jarvis/jarvis/templates/jarvis-adapter.yaml
Normal file
68
jarvis/jarvis/templates/jarvis-adapter.yaml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: jarvis-adapter-deployment
|
||||||
|
# namespace: jarvis-models
|
||||||
|
labels:
|
||||||
|
app: jarvis-adapter
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: jarvis-adapter
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: jarvis-adapter
|
||||||
|
spec:
|
||||||
|
#hostNetwork: true
|
||||||
|
# --- START: Add this section for image pull secrets ---
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: regcred # This MUST match the name of the secret you just created
|
||||||
|
# --- END: Add this section ---
|
||||||
|
containers:
|
||||||
|
- name: jarvis-adapter
|
||||||
|
image: {{ .Values.jarvis_adapter.image }}
|
||||||
|
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
|
||||||
|
env:
|
||||||
|
- name: INFERENCE_ENDPOINT
|
||||||
|
value: {{ .Values.jarvis_adapter.endpoint }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 5000 # The port your application listens on inside the container
|
||||||
|
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
|
||||||
|
protocol: TCP
|
||||||
|
resources: # Add this section
|
||||||
|
requests:
|
||||||
|
cpu: 100m # Example: 100 millicores (0.1 CPU)
|
||||||
|
memory: 256Mi # Example: 128 mebibytes
|
||||||
|
limits:
|
||||||
|
cpu: 500m # Example: Limit to 500 millicores (0.5 CPU)
|
||||||
|
memory: 512Mi # Example: Limit to 512 mebibytes
|
||||||
|
{{- with .Values.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-blackbox
|
||||||
|
# namespace: jarvis-models
|
||||||
|
labels:
|
||||||
|
app: jarvis-adapter
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: jarvis-adapter
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 80
|
||||||
|
targetPort: 5000
|
||||||
|
type: NodePort
|
||||||
85
jarvis/jarvis/templates/jarvis-api.yaml
Normal file
85
jarvis/jarvis/templates/jarvis-api.yaml
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: jarvis-api-deployment
|
||||||
|
# namespace: jarvis-models
|
||||||
|
labels:
|
||||||
|
app: jarvis-api
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: jarvis-api
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: jarvis-api
|
||||||
|
spec:
|
||||||
|
#hostNetwork: true
|
||||||
|
# --- START: Add this section for image pull secrets ---
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: regcred # This MUST match the name of the secret you just created
|
||||||
|
# --- END: Add this section ---
|
||||||
|
containers:
|
||||||
|
- name: jarvis-api
|
||||||
|
image: {{ .Values.jarvis_api.image }}
|
||||||
|
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080 # The port your application listens on inside the container
|
||||||
|
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
|
||||||
|
protocol: TCP
|
||||||
|
resources: # Add this section
|
||||||
|
requests:
|
||||||
|
cpu: 100m # Example: 100 millicores (0.1 CPU)
|
||||||
|
memory: 256Mi # Example: 128 mebibytes
|
||||||
|
limits:
|
||||||
|
cpu: 500m # Example: Limit to 500 millicores (0.5 CPU)
|
||||||
|
memory: 512Mi # Example: Limit to 512 mebibytes
|
||||||
|
volumeMounts:
|
||||||
|
- name: env-config-volume
|
||||||
|
mountPath: /.env.yml
|
||||||
|
subPath: .env.yml
|
||||||
|
readOnly: true
|
||||||
|
- name: images-data
|
||||||
|
mountPath: /images
|
||||||
|
volumes:
|
||||||
|
- name: env-config-volume
|
||||||
|
#hostPath:
|
||||||
|
# path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
|
||||||
|
# type: FileOrCreate
|
||||||
|
configMap:
|
||||||
|
name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
|
||||||
|
items:
|
||||||
|
- key: .api.env.yml # This is the key defined in the ConfigMap's data section
|
||||||
|
path: .env.yml # This is the filename inside the mountPath (e.g., /.env.yml)
|
||||||
|
- name: images-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Release.Name }}-pvc
|
||||||
|
{{- with .Values.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-api-service
|
||||||
|
# namespace: jarvis-models
|
||||||
|
labels:
|
||||||
|
app: jarvis-api
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: jarvis-api
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
targetPort: 8080
|
||||||
|
type: NodePort
|
||||||
183
jarvis/jarvis/templates/jarvis-configmap.yaml
Normal file
183
jarvis/jarvis/templates/jarvis-configmap.yaml
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-cm # Name of your ConfigMap
|
||||||
|
# namespace: jarvis-models # Ensure this matches your Deployment's namespace
|
||||||
|
data:
|
||||||
|
.api.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
|
||||||
|
database:
|
||||||
|
host: 10.6.14.130
|
||||||
|
port: 3306
|
||||||
|
username: jarvis
|
||||||
|
password: boardwarejarvis
|
||||||
|
database: jarvis
|
||||||
|
jwt:
|
||||||
|
secret: secretkey
|
||||||
|
users:
|
||||||
|
- username: jarvis
|
||||||
|
password: boardwarejarvis
|
||||||
|
- username: user
|
||||||
|
password: boardwareuser
|
||||||
|
- username: g2e
|
||||||
|
password: g2e
|
||||||
|
- username: vera
|
||||||
|
password: vera
|
||||||
|
- username: ivan
|
||||||
|
password: ivan
|
||||||
|
blackbox:
|
||||||
|
mode: 0 # 0: /?blackbox=models, 1: /models-blackbox
|
||||||
|
url: http://jarvis-model-service
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
vad:
|
||||||
|
url: http://vad-blackbox
|
||||||
|
asr:
|
||||||
|
url: http://asr-blackbox
|
||||||
|
llm:
|
||||||
|
url: http://llm-blackbox
|
||||||
|
vlm:
|
||||||
|
url: http://vlm-blackbox
|
||||||
|
tts:
|
||||||
|
url: http://tts-blackbox
|
||||||
|
chatllama:
|
||||||
|
url: http://chatllama-blackbox
|
||||||
|
chroma:
|
||||||
|
upsert_url: http://chroma-blackbox/upsert
|
||||||
|
|
||||||
|
|
||||||
|
env: dev
|
||||||
|
authentik:
|
||||||
|
redirectUri: http://10.6.14.130:4200
|
||||||
|
baseUrl: https://authentik.universalmacro.com
|
||||||
|
clientId: xxx
|
||||||
|
clientSecret: xxx
|
||||||
|
server:
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
# log:
|
||||||
|
# path: "/Workspace/Logging/logtime.out"
|
||||||
|
|
||||||
|
log:
|
||||||
|
loki:
|
||||||
|
# url: "https://103.192.46.20:27002/laas/1868865592451137536/loki/api/v1/push"
|
||||||
|
url: "https://loki.bwgdi.com/loki/api/v1/push"
|
||||||
|
x-odin-auth: "log_m7uxtqtru2318hbaoonf9wgjy8chcnebhwhl0wncsvfctu2ppn9m53q6p3i3"
|
||||||
|
labels:
|
||||||
|
app: jarvis
|
||||||
|
env: dev
|
||||||
|
location: "k3s_gdi"
|
||||||
|
|
||||||
|
model:
|
||||||
|
tts:
|
||||||
|
url: http://10.6.14.130:8000/?blackbox_name=tts
|
||||||
|
tts_model_name: melotts
|
||||||
|
tts_stream: false
|
||||||
|
streaming:
|
||||||
|
url: http://10.6.14.130:8000/?blackbox_name=chat
|
||||||
|
vlms_url: http://10.6.14.130:8000/?blackbox_name=vlms
|
||||||
|
.models.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
|
||||||
|
env:
|
||||||
|
version: 0.0.1
|
||||||
|
host: 0.0.0.0
|
||||||
|
port: 8000
|
||||||
|
|
||||||
|
log:
|
||||||
|
level: debug
|
||||||
|
time_format: "%Y-%m-%d %H:%M:%S"
|
||||||
|
filename: "./jarvis-models.log"
|
||||||
|
|
||||||
|
loki:
|
||||||
|
url: "https://loki.bwgdi.com/loki/api/v1/push"
|
||||||
|
labels:
|
||||||
|
app: jarvis
|
||||||
|
env: dev
|
||||||
|
location: "k3s_gdi"
|
||||||
|
layer: models
|
||||||
|
|
||||||
|
melotts:
|
||||||
|
mode: local # or docker
|
||||||
|
url: http://10.6.44.141:18080/convert/tts
|
||||||
|
speed: 0.9
|
||||||
|
device: 'cuda:0'
|
||||||
|
language: 'ZH'
|
||||||
|
speaker: 'ZH'
|
||||||
|
|
||||||
|
cosyvoicetts:
|
||||||
|
mode: local # or docker
|
||||||
|
url: http://10.6.44.141:18080/convert/tts
|
||||||
|
speed: 0.9
|
||||||
|
device: 'cuda:0'
|
||||||
|
language: '粤语女'
|
||||||
|
speaker: 'ZH'
|
||||||
|
|
||||||
|
sovitstts:
|
||||||
|
mode: docker
|
||||||
|
url: http://10.6.80.90:9880/tts
|
||||||
|
speed: 0.9
|
||||||
|
device: 'cuda:0'
|
||||||
|
language: 'ZH'
|
||||||
|
speaker: 'ZH'
|
||||||
|
text_lang: "yue"
|
||||||
|
ref_audio_path: "output/slicer_opt/Ricky-Wong/Ricky-Wong-3-Mins.wav_0006003840_0006134080.wav"
|
||||||
|
prompt_lang: "yue"
|
||||||
|
prompt_text: "你失敗咗點算啊?你而家安安穩穩,點解要咁樣做呢?"
|
||||||
|
text_split_method: "cut5"
|
||||||
|
batch_size: 1
|
||||||
|
media_type: "wav"
|
||||||
|
streaming_mode: True
|
||||||
|
|
||||||
|
sensevoiceasr:
|
||||||
|
mode: local # or docker
|
||||||
|
url: http://10.6.44.141:18080/convert/tts
|
||||||
|
speed: 0.9
|
||||||
|
device: 'cuda:0'
|
||||||
|
language: '粤语女'
|
||||||
|
speaker: 'ZH'
|
||||||
|
|
||||||
|
tesou:
|
||||||
|
url: http://120.196.116.194:48891/chat/
|
||||||
|
|
||||||
|
TokenIDConverter:
|
||||||
|
token_path: src/asr/resources/models/token_list.pkl
|
||||||
|
unk_symbol: <unk>
|
||||||
|
|
||||||
|
CharTokenizer:
|
||||||
|
symbol_value:
|
||||||
|
space_symbol: <space>
|
||||||
|
remove_non_linguistic_symbols: false
|
||||||
|
|
||||||
|
WavFrontend:
|
||||||
|
cmvn_file: src/asr/resources/models/am.mvn
|
||||||
|
frontend_conf:
|
||||||
|
fs: 16000
|
||||||
|
window: hamming
|
||||||
|
n_mels: 80
|
||||||
|
frame_length: 25
|
||||||
|
frame_shift: 10
|
||||||
|
lfr_m: 7
|
||||||
|
lfr_n: 6
|
||||||
|
filter_length_max: -.inf
|
||||||
|
dither: 0.0
|
||||||
|
|
||||||
|
Model:
|
||||||
|
model_path: src/asr/resources/models/model.onnx
|
||||||
|
use_cuda: false
|
||||||
|
CUDAExecutionProvider:
|
||||||
|
device_id: 0
|
||||||
|
arena_extend_strategy: kNextPowerOfTwo
|
||||||
|
cudnn_conv_algo_search: EXHAUSTIVE
|
||||||
|
do_copy_in_default_stream: true
|
||||||
|
batch_size: 3
|
||||||
|
blackbox:
|
||||||
|
lazyloading: true
|
||||||
|
|
||||||
|
vlms:
|
||||||
|
urls:
|
||||||
|
qwen_vl: http://vl-svc
|
||||||
|
vlm: http://vl-svc:8080
|
||||||
|
|
||||||
|
path:
|
||||||
|
chroma_rerank_embedding_model: /Model/BAAI
|
||||||
|
cosyvoice_path: /Voice/CosyVoice
|
||||||
|
cosyvoice_model_path: /Voice/CosyVoice/pretrained_models
|
||||||
|
sensevoice_model_path: /Voice/SenseVoice/SenseVoiceSmall
|
||||||
96
jarvis/jarvis/templates/jarvis-models.yaml
Normal file
96
jarvis/jarvis/templates/jarvis-models.yaml
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: jarvis-model-deployment
|
||||||
|
# namespace: jarvis-models
|
||||||
|
labels:
|
||||||
|
app: jarvis-model
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: jarvis-model
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: jarvis-model
|
||||||
|
spec:
|
||||||
|
#hostNetwork: true
|
||||||
|
# --- START: Add this section for image pull secrets ---
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: regcred # This MUST match the name of the secret you just created
|
||||||
|
# --- END: Add this section ---
|
||||||
|
runtimeClassName: nvidia
|
||||||
|
containers:
|
||||||
|
- name: jarvis-model
|
||||||
|
image: {{ .Values.jarvis_model.image }}
|
||||||
|
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
|
||||||
|
# command: ["sleep", "infinity"]
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000 # The port your application listens on inside the container
|
||||||
|
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
|
||||||
|
protocol: TCP
|
||||||
|
resources: # Add this section
|
||||||
|
requests:
|
||||||
|
cpu: 1 # Example: 100 millicores (0.1 CPU)
|
||||||
|
memory: 512Mi # Example: 128 mebibytes
|
||||||
|
limits:
|
||||||
|
cpu: 2 # Example: Limit to 500 millicores (0.5 CPU)
|
||||||
|
memory: 1Gi # Example: Limit to 512 mebibytes
|
||||||
|
nvidia.com/gpu: 1
|
||||||
|
volumeMounts:
|
||||||
|
- name: env-config-volume
|
||||||
|
mountPath: /jarvis-models/.env.yaml
|
||||||
|
subPath: .env.yaml
|
||||||
|
readOnly: true
|
||||||
|
- name: nfs-volume
|
||||||
|
subPath: Weight
|
||||||
|
mountPath: /Model
|
||||||
|
- name: nfs-volume
|
||||||
|
subPath: Voice
|
||||||
|
mountPath: /Voice
|
||||||
|
volumes:
|
||||||
|
- name: env-config-volume
|
||||||
|
#hostPath:
|
||||||
|
# path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
|
||||||
|
# type: FileOrCreate
|
||||||
|
configMap:
|
||||||
|
name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
|
||||||
|
items:
|
||||||
|
- key: .models.env.yml # This is the key defined in the ConfigMap's data section
|
||||||
|
path: .env.yaml # This is the filename inside the mountPath (e.g., /.env.yml)
|
||||||
|
- name: nfs-volume
|
||||||
|
nfs :
|
||||||
|
server: "10.6.80.11"
|
||||||
|
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
|
||||||
|
#{{- range .Values.volumes }}
|
||||||
|
#- {{ . | toYaml | nindent 10 | trim }}
|
||||||
|
#{{- end }}
|
||||||
|
{{- with .Values.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: jarvis-model-service
|
||||||
|
# namespace: jarvis-models
|
||||||
|
labels:
|
||||||
|
app: jarvis-model
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: jarvis-model
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
targetPort: 8000
|
||||||
|
type: NodePort
|
||||||
87
jarvis/jarvis/values.yaml
Normal file
87
jarvis/jarvis/values.yaml
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# Default values for jarvis.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
|
||||||
|
replicaCount: 1
|
||||||
|
|
||||||
|
|
||||||
|
jarvis_api:
|
||||||
|
image: harbor.bwgdi.com/library/jarvis-api:1.0.9
|
||||||
|
|
||||||
|
jarvis_model:
|
||||||
|
image: harbor.bwgdi.com/library/jarvis-models:0.0.1
|
||||||
|
|
||||||
|
jarvis_adapter:
|
||||||
|
image: harbor.bwgdi.com/library/adapter-test:0.0.1
|
||||||
|
endpoint: "http://vllm-leader-nodeport:8080"
|
||||||
|
|
||||||
|
resources: {}
|
||||||
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||||
|
# choice for the user. This also increases chances charts run on environments with little
|
||||||
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||||
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||||
|
# limits:
|
||||||
|
# cpu: 100m
|
||||||
|
# memory: 128Mi
|
||||||
|
# requests:
|
||||||
|
# cpu: 100m
|
||||||
|
# memory: 128Mi
|
||||||
|
|
||||||
|
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /
|
||||||
|
port: http
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /
|
||||||
|
port: http
|
||||||
|
|
||||||
|
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
|
||||||
|
service:
|
||||||
|
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
|
||||||
|
type: ClusterIP
|
||||||
|
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
|
||||||
|
port: 80
|
||||||
|
|
||||||
|
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||||
|
ingress:
|
||||||
|
enabled: false
|
||||||
|
className: ""
|
||||||
|
annotations: {}
|
||||||
|
# kubernetes.io/ingress.class: nginx
|
||||||
|
# kubernetes.io/tls-acme: "true"
|
||||||
|
hosts:
|
||||||
|
- host: chart-example.local
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: ImplementationSpecific
|
||||||
|
tls: []
|
||||||
|
# - secretName: chart-example-tls
|
||||||
|
# hosts:
|
||||||
|
# - chart-example.local
|
||||||
|
|
||||||
|
# Additional volumes on the output Deployment definition.
|
||||||
|
volumes:
|
||||||
|
- name: nfs-volume
|
||||||
|
nfs:
|
||||||
|
server: "10.6.80.11"
|
||||||
|
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
|
||||||
|
|
||||||
|
# - name: foo
|
||||||
|
# secret:
|
||||||
|
# secretName: mysecret
|
||||||
|
# optional: false
|
||||||
|
|
||||||
|
# Additional volumeMounts on the output Deployment definition.
|
||||||
|
volumeMounts: []
|
||||||
|
# - name: foo
|
||||||
|
# mountPath: "/etc/foo"
|
||||||
|
# readOnly: true
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
affinity: {}
|
||||||
43
jarvis/metadata.yaml
Normal file
43
jarvis/metadata.yaml
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
|
||||||
|
application_name: &application_name jarvis
|
||||||
|
|
||||||
|
distributed:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: jarvis
|
||||||
|
sets:
|
||||||
|
jarvis_api:
|
||||||
|
image: harbor.bwgdi.com/library/jarvis-api:1.0.9
|
||||||
|
jarvis_adapter:
|
||||||
|
image: harbor.bwgdi.com/library/adapter-test:0.0.1
|
||||||
|
endpoint: "http://vllm-leader-nodeport:8080"
|
||||||
|
nodeSelector: {}
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: jarvis-api-service
|
||||||
|
port: 30083
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: jarvis-
|
||||||
|
monolithic:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: jarvis
|
||||||
|
sets:
|
||||||
|
jarvis_api:
|
||||||
|
image: harbor.bwgdi.com/library/jarvis-api:1.0.9
|
||||||
|
jarvis_adapter:
|
||||||
|
image: harbor.bwgdi.com/library/adapter-test:0.0.1
|
||||||
|
endpoint: "http://vllm-leader-nodeport:8080"
|
||||||
|
nodeSelector: {}
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: jarvis-api-service
|
||||||
|
port: 30083
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: jarvis-
|
||||||
17
jupyter/jupyter/Chart.yaml
Normal file
17
jupyter/jupyter/Chart.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
appVersion: 6.0.3
|
||||||
|
description: Helm for jupyter single server with pyspark support
|
||||||
|
home: https://jupyter.org
|
||||||
|
icon: https://jupyter.org/assets/main-logo.svg
|
||||||
|
keywords:
|
||||||
|
- jupyter
|
||||||
|
- notebook
|
||||||
|
- spark
|
||||||
|
maintainers:
|
||||||
|
- email: cgiraldo@gradiant.org
|
||||||
|
name: cgiraldo
|
||||||
|
name: jupyter
|
||||||
|
sources:
|
||||||
|
- https://github.com/gradiant/charts
|
||||||
|
- https://github.com/astrobounce/helm-jupyter
|
||||||
|
version: 0.1.6
|
||||||
34
jupyter/jupyter/README.md
Normal file
34
jupyter/jupyter/README.md
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
jupyter
|
||||||
|
=======
|
||||||
|
Helm for jupyter single server with pyspark support.
|
||||||
|
For jupyterhub chart see [zero-to-jupyterhub](https://zero-to-jupyterhub.readthedocs.io/en/latest/).
|
||||||
|
|
||||||
|
Current chart version is `0.1.0`
|
||||||
|
|
||||||
|
Source code can be found [here]((https://github.com/gradiant/charts/charts/jupyter)
|
||||||
|
|
||||||
|
|
||||||
|
## Chart Values
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| affinity | object | `{}` | |
|
||||||
|
| image.pullPolicy | string | `"IfNotPresent"` | |
|
||||||
|
| image.repository | string | `"gradiant/jupyter"` | |
|
||||||
|
| image.tag | string | `"6.0.1"` | |
|
||||||
|
| ingress.annotations | object | `{}` | |
|
||||||
|
| ingress.enabled | bool | `false` | |
|
||||||
|
| ingress.hosts[0] | string | `"jupyter.127-0-0-1.nip"` | |
|
||||||
|
| ingress.path | string | `"/"` | |
|
||||||
|
| ingress.tls | list | `[]` | |
|
||||||
|
| lab | bool | `true` | |
|
||||||
|
| nodeSelector | object | `{}` | |
|
||||||
|
| persistence.accessMode | string | `"ReadWriteOnce"` | |
|
||||||
|
| persistence.enabled | bool | `true` | |
|
||||||
|
| persistence.size | string | `"50Gi"` | |
|
||||||
|
| persistence.storageClass | string | `nil` | |
|
||||||
|
| resources | object | `{}` | |
|
||||||
|
| service.externalPort | int | `8888` | |
|
||||||
|
| service.nodePort.http | string | `nil` | |
|
||||||
|
| service.type | string | `"ClusterIP"` | |
|
||||||
|
| tolerations | list | `[]` | |
|
||||||
23
jupyter/jupyter/templates/NOTES.txt
Normal file
23
jupyter/jupyter/templates/NOTES.txt
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
1. Get access token from jupyter server log:
|
||||||
|
kubectl logs -f -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }}
|
||||||
|
|
||||||
|
1. Create a port-forward to the jupyter:
|
||||||
|
kubectl port-forward -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }} 8888:{{ .Values.service.externalPort }}
|
||||||
|
|
||||||
|
Then open the ui in your browser and use the access token:
|
||||||
|
open http://localhost:88888
|
||||||
|
|
||||||
|
If you set up your own password, remember to restart jupyter server to update the configuration.
|
||||||
|
File -> Shut Down
|
||||||
|
|
||||||
|
{{- if .Values.ingress.enabled }}
|
||||||
|
Ingress is enabled:
|
||||||
|
{{- range .Values.ingress.tls }}
|
||||||
|
{{- range .hosts }}
|
||||||
|
open https://{{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- range .Values.ingress.hosts }}
|
||||||
|
open http://{{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
32
jupyter/jupyter/templates/_helpers.tpl
Normal file
32
jupyter/jupyter/templates/_helpers.tpl
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
{{/* vim: set filetype=mustache: */}}
|
||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "jupyter.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
*/}}
|
||||||
|
{{- define "jupyter.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride -}}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Standard Labels from Helm documentation https://helm.sh/docs/chart_best_practices/#labels-and-annotations
|
||||||
|
*/}}
|
||||||
|
|
||||||
|
{{- define "jupyter.labels" -}}
|
||||||
|
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
app.kubernetes.io/part-of: {{ .Chart.Name }}
|
||||||
|
{{- end -}}
|
||||||
36
jupyter/jupyter/templates/git-notebooks-configmap.yaml
Normal file
36
jupyter/jupyter/templates/git-notebooks-configmap.yaml
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
{{- if .Values.gitNotebooks }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ include "jupyter.fullname" . }}-git-notebooks
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||||
|
{{- include "jupyter.labels" . | nindent 4 }}
|
||||||
|
data:
|
||||||
|
git-notebooks.sh: |-
|
||||||
|
#!/bin/sh
|
||||||
|
set -x
|
||||||
|
cd /home/jovyan
|
||||||
|
{{- if .Values.gitNotebooks.secretName }}
|
||||||
|
cp -r /tmp/.ssh /root/
|
||||||
|
chmod 600 /root/.ssh/*
|
||||||
|
{{- else }}
|
||||||
|
mkdir /root/.ssh
|
||||||
|
{{- end }}
|
||||||
|
echo "Loading notebooks from git repo"
|
||||||
|
{{- range .Values.gitNotebooks.repos }}
|
||||||
|
if [ ! -d "/home/jovyan/{{ .name }}" ]
|
||||||
|
then
|
||||||
|
echo "Cloning {{ .name }} notebook repository"
|
||||||
|
{{- if or (hasPrefix "git" .repo) (hasPrefix "ssh" .repo) }}
|
||||||
|
ssh-keyscan {{ .repo | regexFind "@([a-zA-Z0-9.]*)" | replace "@" "" }} >> ~/.ssh/known_hosts
|
||||||
|
{{- end }}
|
||||||
|
git clone {{ .repo }} {{ .name }}
|
||||||
|
else
|
||||||
|
echo "{{ .name }} notebook repository already cloned"
|
||||||
|
fi
|
||||||
|
{{- end }}
|
||||||
|
# exit code 0 to continue deployment even if git clone fails
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
{{- end }}
|
||||||
39
jupyter/jupyter/templates/ingress.yaml
Normal file
39
jupyter/jupyter/templates/ingress.yaml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
{{- if .Values.ingress.enabled -}}
|
||||||
|
{{- $fullName := include "jupyter.fullname" . -}}
|
||||||
|
{{- $ingressPath := .Values.ingress.path -}}
|
||||||
|
apiVersion: extensions/v1beta1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: {{ $fullName }}
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||||
|
{{- include "jupyter.labels" . | nindent 4 }}
|
||||||
|
{{- if .Values.ingress.labels }}
|
||||||
|
{{ toYaml .Values.ingress.labels | indent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.ingress.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{ toYaml . | indent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
{{- if .Values.ingress.tls }}
|
||||||
|
tls:
|
||||||
|
{{- range .Values.ingress.tls }}
|
||||||
|
- hosts:
|
||||||
|
{{- range .hosts }}
|
||||||
|
- {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
secretName: {{ .secretName }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
rules:
|
||||||
|
{{- range .Values.ingress.hosts }}
|
||||||
|
- host: {{ . }}
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: {{ $ingressPath }}
|
||||||
|
backend:
|
||||||
|
serviceName: {{ $fullName }}
|
||||||
|
servicePort: web
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
20
jupyter/jupyter/templates/service.yaml
Normal file
20
jupyter/jupyter/templates/service.yaml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ include "jupyter.fullname" . }}
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||||
|
{{- include "jupyter.labels" . | nindent 4 }}
|
||||||
|
spec:
|
||||||
|
type: {{ .Values.service.type }}
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
protocol: TCP
|
||||||
|
port: {{ .Values.service.externalPort | default 8888 }}
|
||||||
|
{{- if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort.http))) }}
|
||||||
|
nodePort: {{ .Values.service.nodePort.http }}
|
||||||
|
{{- end }}
|
||||||
|
targetPort: 8888
|
||||||
118
jupyter/jupyter/templates/statefulset.yaml
Normal file
118
jupyter/jupyter/templates/statefulset.yaml
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: {{ include "jupyter.fullname" . }}
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||||
|
{{- include "jupyter.labels" . | nindent 4 }}
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||||
|
serviceName: {{ include "jupyter.fullname" . }}
|
||||||
|
replicas: 1
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: {{ include "jupyter.name" . }}
|
||||||
|
{{- include "jupyter.labels" . | nindent 8}}
|
||||||
|
spec:
|
||||||
|
{{- if .Values.gitNotebooks }}
|
||||||
|
initContainers:
|
||||||
|
- name: git-notebooks
|
||||||
|
image: alpine/git
|
||||||
|
command:
|
||||||
|
- /bin/bash
|
||||||
|
- /git-notebooks.sh
|
||||||
|
volumeMounts:
|
||||||
|
- name: git-notebooks
|
||||||
|
mountPath: /git-notebooks.sh
|
||||||
|
subPath: git-notebooks.sh
|
||||||
|
- name: jupyter
|
||||||
|
mountPath: /home/jovyan
|
||||||
|
{{- if .Values.gitNotebooks.secretName }}
|
||||||
|
- name: git-secret
|
||||||
|
mountPath: "/tmp/.ssh"
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
containers:
|
||||||
|
- name: jupyter
|
||||||
|
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||||
|
imagePullPolicy: {{ .Values.image.pullPolicy | quote }}
|
||||||
|
env:
|
||||||
|
- name: JUPYTER_ENABLE_LAB
|
||||||
|
value: "{{ .Values.lab }}"
|
||||||
|
- name: JPY_USER
|
||||||
|
value: "jovyan"
|
||||||
|
args:
|
||||||
|
- start-notebook.sh
|
||||||
|
- --ip=0.0.0.0
|
||||||
|
- --user="jovyan"
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
containerPort: 8888
|
||||||
|
protocol: TCP
|
||||||
|
resources:
|
||||||
|
{{ toYaml .Values.resources | indent 10 }}
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /
|
||||||
|
port: 8888
|
||||||
|
initialDelaySeconds: 60
|
||||||
|
timeoutSeconds: 15
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /
|
||||||
|
port: 8888
|
||||||
|
initialDelaySeconds: 60
|
||||||
|
timeoutSeconds: 15
|
||||||
|
volumeMounts:
|
||||||
|
- name: jupyter
|
||||||
|
mountPath: /home/jovyan
|
||||||
|
volumes:
|
||||||
|
{{- if .Values.gitNotebooks }}
|
||||||
|
- name: git-notebooks
|
||||||
|
configMap:
|
||||||
|
name: {{ include "jupyter.fullname" . }}-git-notebooks
|
||||||
|
{{- if .Values.gitNotebooks.secretName }}
|
||||||
|
- name: git-secret
|
||||||
|
secret:
|
||||||
|
secretName: {{ .Values.gitNotebooks.secretName }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if not .Values.persistence.enabled }}
|
||||||
|
- name: jupyter
|
||||||
|
emptyDir: {}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.persistence.enabled }}
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: jupyter
|
||||||
|
spec:
|
||||||
|
accessModes: [ "ReadWriteOnce" ]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: "{{ .Values.persistence.size }}"
|
||||||
|
{{- if .Values.persistence.storageClass }}
|
||||||
|
{{- if (eq "-" .Values.persistence.storageClass) }}
|
||||||
|
storageClassName: ""
|
||||||
|
{{- else }}
|
||||||
|
storageClassName: "{{ .Values.persistence.storageClass }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
|
||||||
64
jupyter/jupyter/values.yaml
Normal file
64
jupyter/jupyter/values.yaml
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# Default values for jupyter.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
image:
|
||||||
|
repository: gradiant/jupyter
|
||||||
|
tag: 6.0.3
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
lab: true
|
||||||
|
|
||||||
|
#gitNotebooks:
|
||||||
|
# secretName: the name of the secret with ssh keys
|
||||||
|
# repos:
|
||||||
|
# - name: gradiant
|
||||||
|
# repo: https://github.com/Gradiant/notebooks.git
|
||||||
|
# - name: grad-git
|
||||||
|
# repo: git@github.com:Gradiant/notebooks.git
|
||||||
|
|
||||||
|
service:
|
||||||
|
type: ClusterIP
|
||||||
|
externalPort: 8888
|
||||||
|
nodePort:
|
||||||
|
http:
|
||||||
|
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
storageClass:
|
||||||
|
accessMode: ReadWriteOnce
|
||||||
|
size: 50Gi
|
||||||
|
|
||||||
|
## Ingress configuration
|
||||||
|
## Ref: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||||
|
##
|
||||||
|
ingress:
|
||||||
|
enabled: false
|
||||||
|
annotations: {}
|
||||||
|
# kubernetes.io/ingress.class: nginx
|
||||||
|
# kubernetes.io/tls-acme: "true"
|
||||||
|
path: /
|
||||||
|
hosts:
|
||||||
|
- jupyter.127-0-0-1.nip.io
|
||||||
|
tls: []
|
||||||
|
# - secretName: jupyter-tls
|
||||||
|
# hosts:
|
||||||
|
# - jupyter.local
|
||||||
|
|
||||||
|
resources: {}
|
||||||
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||||
|
# choice for the user. This also increases chances charts run on environments with little
|
||||||
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||||
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||||
|
# limits:
|
||||||
|
# cpu: 100m
|
||||||
|
# memory: 128Mi
|
||||||
|
# requests:
|
||||||
|
# cpu: 100m
|
||||||
|
# memory: 128Mi
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
affinity: {}
|
||||||
55
jupyter/metadata.yaml
Normal file
55
jupyter/metadata.yaml
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
|
||||||
|
application_name: &application_name jupyter
|
||||||
|
|
||||||
|
distributed:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: jupyter
|
||||||
|
sets:
|
||||||
|
image:
|
||||||
|
repository: gradiant/jupyter
|
||||||
|
tag: 6.0.3
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: 0
|
||||||
|
nodeSelector:
|
||||||
|
resource-group: gpu_5880
|
||||||
|
service:
|
||||||
|
type: NodePort
|
||||||
|
nodePort:
|
||||||
|
http: 30888
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: ~
|
||||||
|
port: 30888
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: jupyter-
|
||||||
|
monolithic:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: jupyter
|
||||||
|
sets:
|
||||||
|
image:
|
||||||
|
repository: gradiant/jupyter
|
||||||
|
tag: 6.0.3
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: 0
|
||||||
|
nodeSelector:
|
||||||
|
resource-group: gpu_5880
|
||||||
|
service:
|
||||||
|
type: NodePort
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: ~
|
||||||
|
port: 30888
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: jupyter-
|
||||||
23
llama-factory/llama-factory/.helmignore
Normal file
23
llama-factory/llama-factory/.helmignore
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
25
llama-factory/llama-factory/Chart.yaml
Normal file
25
llama-factory/llama-factory/Chart.yaml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
name: Llama-factory
|
||||||
|
description: A Helm chart for deploying vLLM with NFS storage
|
||||||
|
annotations:
|
||||||
|
"helm.sh/resource-policy": keep # 防止资源被意外删除
|
||||||
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
#
|
||||||
|
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||||
|
# to be deployed.
|
||||||
|
#
|
||||||
|
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||||
|
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||||
|
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||||
|
type: application
|
||||||
|
|
||||||
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
|
# to the chart and its templates, including the app version.
|
||||||
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
|
version: 0.1.0
|
||||||
|
|
||||||
|
# This is the version number of the application being deployed. This version number should be
|
||||||
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
|
# It is recommended to use it with quotes.
|
||||||
|
appVersion: "1.16.0"
|
||||||
159
llama-factory/llama-factory/templates/llama.yaml
Normal file
159
llama-factory/llama-factory/templates/llama.yaml
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama-factory") }}
|
||||||
|
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||||
|
kind: LeaderWorkerSet
|
||||||
|
metadata:
|
||||||
|
name: llamafactory
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
leaderWorkerTemplate:
|
||||||
|
size: {{ .Values.workerSize }}
|
||||||
|
restartPolicy: RecreateGroupOnPodRestart
|
||||||
|
leaderTemplate:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
role: leader
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
ls $DEST_DIR
|
||||||
|
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: llama-leader
|
||||||
|
image: {{ .Values.llama.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name : USE_RAY
|
||||||
|
value: "1"
|
||||||
|
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
# value: "ray"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "llamafactory-cli webui"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 7860
|
||||||
|
name: http
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
workerTemplate:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: llama-worker
|
||||||
|
image: {{ .Values.llama.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "echo $(LWS_LEADER_ADDRESS);
|
||||||
|
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
env:
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
# value: "ray"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
{{- end }}
|
||||||
14
llama-factory/llama-factory/templates/nfs-pv.yaml
Normal file
14
llama-factory/llama-factory/templates/nfs-pv.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-pv-model
|
||||||
|
spec:
|
||||||
|
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||||
|
capacity:
|
||||||
|
storage: {{ .Values.nfs.pvSize }}
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
nfs:
|
||||||
|
path: {{ .Values.nfs.path }}
|
||||||
|
server: {{ .Values.nfs.server }}
|
||||||
12
llama-factory/llama-factory/templates/nfs-pvc.yaml
Normal file
12
llama-factory/llama-factory/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-pvc-model
|
||||||
|
annotations:
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ .Values.nfs.pvcSize }}
|
||||||
|
volumeName: {{ .Values.app }}-pv-model
|
||||||
33
llama-factory/llama-factory/templates/services.yaml
Normal file
33
llama-factory/llama-factory/templates/services.yaml
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#apiVersion: v1
|
||||||
|
#kind: Service
|
||||||
|
#metadata:
|
||||||
|
# name: infer-leader-loadbalancer
|
||||||
|
#spec:
|
||||||
|
# type: LoadBalancer
|
||||||
|
# selector:
|
||||||
|
# leaderworkerset.sigs.k8s.io/name: infer
|
||||||
|
# role: leader
|
||||||
|
# ports:
|
||||||
|
# - protocol: TCP
|
||||||
|
# port: 8080
|
||||||
|
# targetPort: 8080
|
||||||
|
#
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-leader-nodeport
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
{{- if gt (int .Values.workerSize) 1 }}
|
||||||
|
selector:
|
||||||
|
leaderworkerset.sigs.k8s.io/name: llamafactory
|
||||||
|
role: leader
|
||||||
|
{{- else }}
|
||||||
|
selector:
|
||||||
|
app: llama-factory
|
||||||
|
{{- end }}
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
targetPort: 7860
|
||||||
51
llama-factory/llama-factory/templates/single.yaml
Normal file
51
llama-factory/llama-factory/templates/single.yaml
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
{{- if eq (int .Values.workerSize) 1 }}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llama-factory
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: llama-factory
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: llama-factory
|
||||||
|
image: {{ .Values.llama.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "llamafactory-cli webui"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 7860
|
||||||
|
name: http
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app}}-pvc-model
|
||||||
|
{{- end }}
|
||||||
44
llama-factory/llama-factory/values.yaml
Normal file
44
llama-factory/llama-factory/values.yaml
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# Default values for vllm-app.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
# 模型配置
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
|
||||||
|
localMountPath: "/Model" # PVC 固定挂载路径
|
||||||
|
huggingfaceToken: "<your-hf-token>"
|
||||||
|
download:
|
||||||
|
enabled: false # 启用自动下载
|
||||||
|
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
|
||||||
|
|
||||||
|
# 功能选择
|
||||||
|
app: "llama-factory"
|
||||||
|
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 8
|
||||||
|
memoryLimit: "16Gi"
|
||||||
|
shmSize: "20Gi"
|
||||||
|
|
||||||
|
|
||||||
|
llama:
|
||||||
|
image: "docker.io/library/one-click:v1"
|
||||||
|
|
||||||
|
|
||||||
|
# NFS PV/PVC 配置
|
||||||
|
nfs:
|
||||||
|
server: "10.6.80.11"
|
||||||
|
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||||
|
storageClass: "local-path"
|
||||||
|
pvSize: "500Gi"
|
||||||
|
pvcSize: "50Gi"
|
||||||
|
|
||||||
|
# LeaderWorkerSet 配置
|
||||||
|
replicaCount: 1
|
||||||
|
workerSize: 2
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
affinity: {}
|
||||||
53
llama-factory/metadata.yaml
Normal file
53
llama-factory/metadata.yaml
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
|
||||||
|
application_name: &application_name llama-factory
|
||||||
|
|
||||||
|
distributed:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: llama-factory
|
||||||
|
sets:
|
||||||
|
app: llama-factory
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 8
|
||||||
|
memoryLimit: "16Gi"
|
||||||
|
shmSize: "15Gi"
|
||||||
|
llama:
|
||||||
|
image: "docker.io/library/one-click:v1"
|
||||||
|
workerSize: 2
|
||||||
|
nodeSelector: {}
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
port: 30080
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: llamafactory
|
||||||
|
monolithic:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: llama-factory
|
||||||
|
sets:
|
||||||
|
app: llama-factory
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 8
|
||||||
|
memoryLimit: "16Gi"
|
||||||
|
shmSize: "15Gi"
|
||||||
|
llama:
|
||||||
|
image: "docker.io/library/one-click:v1"
|
||||||
|
workerSize: 1
|
||||||
|
nodeSelector: {}
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
port: 30080
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: llama-factory
|
||||||
23
melotts/melotts/.helmignore
Normal file
23
melotts/melotts/.helmignore
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
24
melotts/melotts/Chart.yaml
Normal file
24
melotts/melotts/Chart.yaml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
name: jarvis
|
||||||
|
description: A Helm chart for Kubernetes
|
||||||
|
|
||||||
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
#
|
||||||
|
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||||
|
# to be deployed.
|
||||||
|
#
|
||||||
|
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||||
|
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||||
|
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||||
|
type: application
|
||||||
|
|
||||||
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
|
# to the chart and its templates, including the app version.
|
||||||
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
|
version: 0.1.0
|
||||||
|
|
||||||
|
# This is the version number of the application being deployed. This version number should be
|
||||||
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
|
# It is recommended to use it with quotes.
|
||||||
|
appVersion: "1.16.0"
|
||||||
22
melotts/melotts/templates/NOTES.txt
Normal file
22
melotts/melotts/templates/NOTES.txt
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
1. Get the application URL by running these commands:
|
||||||
|
{{- if .Values.ingress.enabled }}
|
||||||
|
{{- range $host := .Values.ingress.hosts }}
|
||||||
|
{{- range .paths }}
|
||||||
|
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- else if contains "NodePort" .Values.service.type }}
|
||||||
|
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
|
||||||
|
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||||
|
echo http://$NODE_IP:$NODE_PORT
|
||||||
|
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||||
|
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||||
|
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
|
||||||
|
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||||
|
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||||
|
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||||
|
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||||
|
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||||
|
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||||
|
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||||
|
{{- end }}
|
||||||
62
melotts/melotts/templates/_helpers.tpl
Normal file
62
melotts/melotts/templates/_helpers.tpl
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride }}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||||
|
{{- if contains $name .Release.Name }}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart name and version as used by the chart label.
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.chart" -}}
|
||||||
|
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Common labels
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.labels" -}}
|
||||||
|
helm.sh/chart: {{ include "jarvis.chart" . }}
|
||||||
|
{{ include "jarvis.selectorLabels" . }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Selector labels
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.selectorLabels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "jarvis.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account to use
|
||||||
|
*/}}
|
||||||
|
{{- define "jarvis.serviceAccountName" -}}
|
||||||
|
{{- if .Values.serviceAccount.create }}
|
||||||
|
{{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
|
||||||
|
{{- else }}
|
||||||
|
{{- default "default" .Values.serviceAccount.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
81
melotts/melotts/templates/melotts.yaml
Normal file
81
melotts/melotts/templates/melotts.yaml
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-dp
|
||||||
|
# namespace: jarvis-models
|
||||||
|
labels:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
#hostNetwork: true
|
||||||
|
# --- START: Add this section for image pull secrets ---
|
||||||
|
runtimeClassName: nvidia
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: regcred # This MUST match the name of the secret you just created
|
||||||
|
# --- END: Add this section ---
|
||||||
|
containers:
|
||||||
|
- name: melo
|
||||||
|
image: {{ .Values.melotts.image }}
|
||||||
|
imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
|
||||||
|
#command:
|
||||||
|
# - /bin/bash
|
||||||
|
# - -c
|
||||||
|
# - "bash && sleep infinity"
|
||||||
|
ports:
|
||||||
|
- containerPort: 5000 # The port your application listens on inside the container
|
||||||
|
#hostPort: 8880 # The port on the host machine (--network=host combined with -p 8880:8080)
|
||||||
|
protocol: TCP
|
||||||
|
resources: # Add this section
|
||||||
|
requests:
|
||||||
|
cpu: 2 # Example: 100 millicores (0.1 CPU)
|
||||||
|
memory: 4Gi # Example: 128 mebibytes
|
||||||
|
limits:
|
||||||
|
cpu: 2 # Example: Limit to 500 millicores (0.5 CPU)
|
||||||
|
memory: 6Gi # Example: Limit to 512 mebibytes
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: /models
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: /usr/local/nltk_data
|
||||||
|
subPath: nltk_data
|
||||||
|
volumes:
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
{{- with .Values.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-service
|
||||||
|
# namespace: jarvis-models
|
||||||
|
labels:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
targetPort: 5000
|
||||||
|
type: NodePort
|
||||||
14
melotts/melotts/templates/nfs-pv.yaml
Normal file
14
melotts/melotts/templates/nfs-pv.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-pv-model
|
||||||
|
spec:
|
||||||
|
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||||
|
capacity:
|
||||||
|
storage: {{ .Values.nfs.pvSize }}
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
nfs:
|
||||||
|
path: {{ .Values.nfs.path }}
|
||||||
|
server: {{ .Values.nfs.server }}
|
||||||
12
melotts/melotts/templates/nfs-pvc.yaml
Normal file
12
melotts/melotts/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-pvc-model
|
||||||
|
annotations:
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ .Values.nfs.pvcSize }}
|
||||||
|
volumeName: {{ .Values.app }}-pv-model
|
||||||
89
melotts/melotts/values.yaml
Normal file
89
melotts/melotts/values.yaml
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# Default values for jarvis.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
|
||||||
|
replicaCount: 1
|
||||||
|
|
||||||
|
app: "melotts"
|
||||||
|
|
||||||
|
melotts:
|
||||||
|
image: harbor.bwgdi.com/library/melotts:0.0.2
|
||||||
|
|
||||||
|
jarvis_adapter:
|
||||||
|
image: harbor.bwgdi.com/library/adapter-test:0.0.1
|
||||||
|
endpoint: "http://vllm-leader-nodeport:8080"
|
||||||
|
|
||||||
|
nfs:
|
||||||
|
server: "10.6.80.11"
|
||||||
|
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Voice/MeloTTS"
|
||||||
|
storageClass: "local-path"
|
||||||
|
pvSize: "500Gi"
|
||||||
|
pvcSize: "50Gi"
|
||||||
|
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||||
|
# choice for the user. This also increases chances charts run on environments with little
|
||||||
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||||
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||||
|
# limits:
|
||||||
|
# cpu: 100m
|
||||||
|
# memory: 128Mi
|
||||||
|
# requests:
|
||||||
|
# cpu: 100m
|
||||||
|
# memory: 128Mi
|
||||||
|
|
||||||
|
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /
|
||||||
|
port: http
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /
|
||||||
|
port: http
|
||||||
|
|
||||||
|
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
|
||||||
|
service:
|
||||||
|
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
|
||||||
|
type: ClusterIP
|
||||||
|
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
|
||||||
|
port: 80
|
||||||
|
|
||||||
|
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||||
|
ingress:
|
||||||
|
enabled: false
|
||||||
|
className: ""
|
||||||
|
annotations: {}
|
||||||
|
# kubernetes.io/ingress.class: nginx
|
||||||
|
# kubernetes.io/tls-acme: "true"
|
||||||
|
hosts:
|
||||||
|
- host: chart-example.local
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: ImplementationSpecific
|
||||||
|
tls: []
|
||||||
|
# - secretName: chart-example-tls
|
||||||
|
# hosts:
|
||||||
|
# - chart-example.local
|
||||||
|
|
||||||
|
# Additional volumes on the output Deployment definition.
|
||||||
|
volumes: []
|
||||||
|
# - name: foo
|
||||||
|
# secret:
|
||||||
|
# secretName: mysecret
|
||||||
|
# optional: false
|
||||||
|
|
||||||
|
# Additional volumeMounts on the output Deployment definition.
|
||||||
|
volumeMounts: []
|
||||||
|
# - name: foo
|
||||||
|
# mountPath: "/etc/foo"
|
||||||
|
# readOnly: true
|
||||||
|
|
||||||
|
nodeSelector:
|
||||||
|
resource-group: gpu_5880
|
||||||
|
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
affinity: {}
|
||||||
35
melotts/metadata.yaml
Normal file
35
melotts/metadata.yaml
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
|
||||||
|
application_name: &application_name melotts
|
||||||
|
|
||||||
|
distributed:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: melotts
|
||||||
|
sets:
|
||||||
|
jarvis_api:
|
||||||
|
image: harbor.bwgdi.com/library/melotts:0.0.2
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: melo-service
|
||||||
|
port: 32147
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: *application_name
|
||||||
|
monolithic:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: melotts
|
||||||
|
sets:
|
||||||
|
jarvis_api:
|
||||||
|
image: harbor.bwgdi.com/library/melotts:0.0.2
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: melo-service
|
||||||
|
port: 32147
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: *application_name
|
||||||
53
vllm/metadata.yaml
Normal file
53
vllm/metadata.yaml
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
|
||||||
|
application_name: &application_name vllm
|
||||||
|
|
||||||
|
distributed:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: vllm-app
|
||||||
|
sets:
|
||||||
|
app: vllm
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 8
|
||||||
|
memoryLimit: "16Gi"
|
||||||
|
shmSize: "15Gi"
|
||||||
|
workerSize: 2
|
||||||
|
nodeSelector: {}
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
port: 30080
|
||||||
|
url: ~
|
||||||
|
paths:
|
||||||
|
docs_path: /docs
|
||||||
|
redoc_path: /redoc
|
||||||
|
pod:
|
||||||
|
name: infer-0
|
||||||
|
monolithic:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: vllm-app
|
||||||
|
sets:
|
||||||
|
app: vllm
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 8
|
||||||
|
memoryLimit: "16Gi"
|
||||||
|
shmSize: "15Gi"
|
||||||
|
workerSize: 1
|
||||||
|
nodeSelector: {}
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
port: 30080
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: vllm
|
||||||
|
|
||||||
23
vllm/vllm-app/.helmignore
Normal file
23
vllm/vllm-app/.helmignore
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
25
vllm/vllm-app/Chart.yaml
Normal file
25
vllm/vllm-app/Chart.yaml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
name: vllm-app
|
||||||
|
description: A Helm chart for deploying vLLM with NFS storage
|
||||||
|
annotations:
|
||||||
|
"helm.sh/resource-policy": keep # 防止资源被意外删除
|
||||||
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
#
|
||||||
|
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||||
|
# to be deployed.
|
||||||
|
#
|
||||||
|
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||||
|
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||||
|
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||||
|
type: application
|
||||||
|
|
||||||
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
|
# to the chart and its templates, including the app version.
|
||||||
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
|
version: 0.1.0
|
||||||
|
|
||||||
|
# This is the version number of the application being deployed. This version number should be
|
||||||
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
|
# It is recommended to use it with quotes.
|
||||||
|
appVersion: "1.16.0"
|
||||||
165
vllm/vllm-app/templates/llama.yaml
Normal file
165
vllm/vllm-app/templates/llama.yaml
Normal file
@ -0,0 +1,165 @@
|
|||||||
|
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
|
||||||
|
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||||
|
kind: LeaderWorkerSet
|
||||||
|
metadata:
|
||||||
|
name: infer
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
leaderWorkerTemplate:
|
||||||
|
size: {{ .Values.workerSize }}
|
||||||
|
restartPolicy: RecreateGroupOnPodRestart
|
||||||
|
leaderTemplate:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
role: leader
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
ls $DEST_DIR
|
||||||
|
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: llama-leader
|
||||||
|
image: {{ .Values.llama.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name : USE_RAY
|
||||||
|
value: "1"
|
||||||
|
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
# value: "ray"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||||
|
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 7860
|
||||||
|
name: http
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
workerTemplate:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: llama-worker
|
||||||
|
image: {{ .Values.llama.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "echo $(LWS_LEADER_ADDRESS);
|
||||||
|
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
# value: "ray"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
{{- end }}
|
||||||
170
vllm/vllm-app/templates/lmdeploy_lws.yaml
Normal file
170
vllm/vllm-app/templates/lmdeploy_lws.yaml
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
|
||||||
|
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||||
|
kind: LeaderWorkerSet
|
||||||
|
metadata:
|
||||||
|
name: infer
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
leaderWorkerTemplate:
|
||||||
|
size: {{ .Values.workerSize }}
|
||||||
|
restartPolicy: RecreateGroupOnPodRestart
|
||||||
|
leaderTemplate:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
role: leader
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
ls $DEST_DIR
|
||||||
|
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: lmdeploy-leader
|
||||||
|
image: {{ .Values.lmdeploy.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
value: "ray"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||||
|
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
lmdeploy serve api_server $MODEL_PATH --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }})) --server-port 8080 --cache-max-entry-count 0.9"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
#httpGet:
|
||||||
|
#path: /health
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 20
|
||||||
|
timeoutSeconds: 5
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: nfs-pvc-model
|
||||||
|
workerTemplate:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: lmdeploy-worker
|
||||||
|
image: {{ .Values.lmdeploy.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.lmdeploy.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
value: "ray"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: nfs-pvc-model
|
||||||
|
{{- end }}
|
||||||
166
vllm/vllm-app/templates/lws.yaml
Normal file
166
vllm/vllm-app/templates/lws.yaml
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
|
||||||
|
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||||
|
kind: LeaderWorkerSet
|
||||||
|
metadata:
|
||||||
|
name: infer
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
leaderWorkerTemplate:
|
||||||
|
size: {{ .Values.workerSize }}
|
||||||
|
restartPolicy: RecreateGroupOnPodRestart
|
||||||
|
leaderTemplate:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
role: leader
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
ls $DEST_DIR
|
||||||
|
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: vllm-leader
|
||||||
|
image: {{ .Values.vllm.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||||
|
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
#httpGet:
|
||||||
|
#path: /health
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 20
|
||||||
|
timeoutSeconds: 5
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
workerTemplate:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: vllm-worker
|
||||||
|
image: {{ .Values.vllm.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
{{- end }}
|
||||||
44
vllm/vllm-app/templates/model-download-job.yaml
Normal file
44
vllm/vllm-app/templates/model-download-job.yaml
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
{{- if .Values.model.download.enabled }}
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-download-model
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": pre-install,pre-upgrade # 在安装/升级前执行
|
||||||
|
"helm.sh/hook-weight": "-10" # 优先执行
|
||||||
|
"helm.sh/hook-delete-policy": hook-succeeded
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
containers:
|
||||||
|
- name: downloader
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
if [ -d "$DEST_DIR" ]; then
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: model-storage
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: model-storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: nfs-pvc-model # 复用之前的 PVC
|
||||||
|
{{- end }}
|
||||||
14
vllm/vllm-app/templates/nfs-pv.yaml
Normal file
14
vllm/vllm-app/templates/nfs-pv.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-pv-model
|
||||||
|
spec:
|
||||||
|
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||||
|
capacity:
|
||||||
|
storage: {{ .Values.nfs.pvSize }}
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
nfs:
|
||||||
|
path: {{ .Values.nfs.path }}
|
||||||
|
server: {{ .Values.nfs.server }}
|
||||||
12
vllm/vllm-app/templates/nfs-pvc.yaml
Normal file
12
vllm/vllm-app/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-pvc-model
|
||||||
|
annotations:
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ .Values.nfs.pvcSize }}
|
||||||
|
volumeName: {{ .Values.app }}-pv-model
|
||||||
39
vllm/vllm-app/templates/services.yaml
Normal file
39
vllm/vllm-app/templates/services.yaml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
#apiVersion: v1
|
||||||
|
#kind: Service
|
||||||
|
#metadata:
|
||||||
|
# name: infer-leader-loadbalancer
|
||||||
|
#spec:
|
||||||
|
# type: LoadBalancer
|
||||||
|
# selector:
|
||||||
|
# leaderworkerset.sigs.k8s.io/name: infer
|
||||||
|
# role: leader
|
||||||
|
# ports:
|
||||||
|
# - protocol: TCP
|
||||||
|
# port: 8080
|
||||||
|
# targetPort: 8080
|
||||||
|
#
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-leader-nodeport
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
{{- if gt (int .Values.workerSize) 1 }}
|
||||||
|
selector:
|
||||||
|
leaderworkerset.sigs.k8s.io/name: infer
|
||||||
|
role: leader
|
||||||
|
{{- else }}
|
||||||
|
selector:
|
||||||
|
app: vllm-app
|
||||||
|
{{- end }}
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
{{- if eq .Values.app "llama" }}
|
||||||
|
targetPort: 7860
|
||||||
|
{{- else }}
|
||||||
|
targetPort: 8080
|
||||||
|
{{- end }}
|
||||||
|
nodePort: 30080
|
||||||
|
|
||||||
114
vllm/vllm-app/templates/single.yaml
Normal file
114
vllm/vllm-app/templates/single.yaml
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
{{- if eq (int .Values.workerSize) 1 }}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vllm
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: vllm-app
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: vllm-app
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
ls $DEST_DIR
|
||||||
|
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: vllm-leader
|
||||||
|
image: {{ .Values.vllm.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
#securityContext:
|
||||||
|
# capabilities:
|
||||||
|
# add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
#- name: GLOO_SOCKET_IFNAME
|
||||||
|
# value: eth0
|
||||||
|
#- name: NCCL_SOCKET_IFNAME
|
||||||
|
# value: eth0
|
||||||
|
#- name: NCCL_IB_DISABLE
|
||||||
|
# value: "0"
|
||||||
|
#- name: NCCL_DEBUG
|
||||||
|
# value: INFO
|
||||||
|
#- name: NCCL_IB_HCA
|
||||||
|
# value: mlx5_0:1
|
||||||
|
#- name: NCCL_IB_GID_INDEX
|
||||||
|
# value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
echo 'Using single node ------------------------------------------';
|
||||||
|
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
#rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
readinessProbe:
|
||||||
|
#tcpSocket:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 20
|
||||||
|
timeoutSeconds: 5
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app}}-pvc-model
|
||||||
|
{{- end }}
|
||||||
58
vllm/vllm-app/values.yaml
Normal file
58
vllm/vllm-app/values.yaml
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# Default values for vllm-app.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
# 模型配置
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
|
||||||
|
localMountPath: "/Model" # PVC 固定挂载路径
|
||||||
|
huggingfaceToken: "<your-hf-token>"
|
||||||
|
download:
|
||||||
|
enabled: false # 启用自动下载
|
||||||
|
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
|
||||||
|
|
||||||
|
# 功能选择
|
||||||
|
app: "vllm"
|
||||||
|
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 12
|
||||||
|
memoryLimit: "16Gi"
|
||||||
|
shmSize: "20Gi"
|
||||||
|
|
||||||
|
# vLLM 应用配置
|
||||||
|
vllm:
|
||||||
|
image: "docker.io/vllm/vllm-openai:latest"
|
||||||
|
#gpuLimit: 2
|
||||||
|
# cpuRequest: 12
|
||||||
|
# memoryLimit: "12Gi"
|
||||||
|
# shmSize: "15Gi"
|
||||||
|
|
||||||
|
llama:
|
||||||
|
image: "docker.io/library/one-click:v1"
|
||||||
|
|
||||||
|
# lmdeploy 应用配置
|
||||||
|
lmdeploy:
|
||||||
|
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
|
||||||
|
# gpuLimit: 2
|
||||||
|
# cpuRequest: 12
|
||||||
|
# memoryLimit: "12Gi"
|
||||||
|
# shmSize: "15Gi"
|
||||||
|
|
||||||
|
# NFS PV/PVC 配置
|
||||||
|
nfs:
|
||||||
|
server: "10.6.80.11"
|
||||||
|
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||||
|
storageClass: "local-path"
|
||||||
|
pvSize: "500Gi"
|
||||||
|
pvcSize: "50Gi"
|
||||||
|
|
||||||
|
# LeaderWorkerSet 配置
|
||||||
|
replicaCount: 1
|
||||||
|
workerSize: 2
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
affinity: {}
|
||||||
23
vllm/vllm-serve/.helmignore
Normal file
23
vllm/vllm-serve/.helmignore
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
25
vllm/vllm-serve/Chart.yaml
Normal file
25
vllm/vllm-serve/Chart.yaml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
name: vllm-serve
|
||||||
|
description: A Helm chart for deploying vLLM with NFS storage
|
||||||
|
annotations:
|
||||||
|
"helm.sh/resource-policy": keep # 防止资源被意外删除
|
||||||
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
#
|
||||||
|
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||||
|
# to be deployed.
|
||||||
|
#
|
||||||
|
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||||
|
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||||
|
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||||
|
type: application
|
||||||
|
|
||||||
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
|
# to the chart and its templates, including the app version.
|
||||||
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
|
version: 0.1.0
|
||||||
|
|
||||||
|
# This is the version number of the application being deployed. This version number should be
|
||||||
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
|
# It is recommended to use it with quotes.
|
||||||
|
appVersion: "1.16.0"
|
||||||
16
vllm/vllm-serve/templates/NOTES.txt
Normal file
16
vllm/vllm-serve/templates/NOTES.txt
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
1. Get the application URL by running these commands:
|
||||||
|
{{- if contains "NodePort" .Values.svc.type }}
|
||||||
|
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
|
||||||
|
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||||
|
echo http://$NODE_IP:$NODE_PORT
|
||||||
|
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||||
|
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||||
|
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
|
||||||
|
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||||
|
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||||
|
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||||
|
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||||
|
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||||
|
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||||
|
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||||
|
{{- end }}
|
||||||
62
vllm/vllm-serve/templates/_helpers.tpl
Normal file
62
vllm/vllm-serve/templates/_helpers.tpl
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "vllm-serve.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "vllm-serve.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride }}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||||
|
{{- if contains $name .Release.Name }}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart name and version as used by the chart label.
|
||||||
|
*/}}
|
||||||
|
{{- define "vllm-serve.chart" -}}
|
||||||
|
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Common labels
|
||||||
|
*/}}
|
||||||
|
{{- define "vllm-serve.labels" -}}
|
||||||
|
helm.sh/chart: {{ include "vllm-serve.chart" . }}
|
||||||
|
{{ include "vllm-serve.selectorLabels" . }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Selector labels
|
||||||
|
*/}}
|
||||||
|
{{- define "vllm-serve.selectorLabels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "vllm-serve.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account to use
|
||||||
|
*/}}
|
||||||
|
{{- define "vllm-serve.serviceAccountName" -}}
|
||||||
|
{{- if .Values.serviceAccount.create }}
|
||||||
|
{{- default (include "vllm-serve.fullname" .) .Values.serviceAccount.name }}
|
||||||
|
{{- else }}
|
||||||
|
{{- default "default" .Values.serviceAccount.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
188
vllm/vllm-serve/templates/lws.yaml
Normal file
188
vllm/vllm-serve/templates/lws.yaml
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
{{- if gt (int .Values.workerSize) 1 }}
|
||||||
|
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||||
|
kind: LeaderWorkerSet
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
leaderWorkerTemplate:
|
||||||
|
size: {{ .Values.workerSize }}
|
||||||
|
restartPolicy: RecreateGroupOnPodRestart
|
||||||
|
leaderTemplate:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
role: leader
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: vllm-leader
|
||||||
|
image: {{ .Values.vllm.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||||
|
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
#httpGet:
|
||||||
|
#path: /health
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 20
|
||||||
|
timeoutSeconds: 5
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Release.Name }}-pvc-model
|
||||||
|
{{- with .Values.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
|
workerTemplate:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: vllm-worker
|
||||||
|
image: {{ .Values.vllm.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Release.Name }}-pvc-model
|
||||||
|
{{- with .Values.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
28
vllm/vllm-serve/templates/nfs-pvc.yaml
Normal file
28
vllm/vllm-serve/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-pv-model
|
||||||
|
spec:
|
||||||
|
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||||
|
capacity:
|
||||||
|
storage: {{ .Values.nfs.pvSize }}
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
nfs:
|
||||||
|
path: {{ .Values.nfs.path }}
|
||||||
|
server: {{ .Values.nfs.server }}
|
||||||
|
---
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-pvc-model
|
||||||
|
annotations:
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ .Values.nfs.pvcSize }}
|
||||||
|
volumeName: {{ .Release.Name }}-pv-model
|
||||||
35
vllm/vllm-serve/templates/services.yaml
Normal file
35
vllm/vllm-serve/templates/services.yaml
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
#apiVersion: v1
|
||||||
|
#kind: Service
|
||||||
|
#metadata:
|
||||||
|
# name: infer-leader-loadbalancer
|
||||||
|
#spec:
|
||||||
|
# type: LoadBalancer
|
||||||
|
# selector:
|
||||||
|
# leaderworkerset.sigs.k8s.io/name: infer
|
||||||
|
# role: leader
|
||||||
|
# ports:
|
||||||
|
# - protocol: TCP
|
||||||
|
# port: 8080
|
||||||
|
# targetPort: 8080
|
||||||
|
#
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-svc
|
||||||
|
spec:
|
||||||
|
type: {{ .Values.svc.type | default "NodePort" }}
|
||||||
|
{{- if gt (int .Values.workerSize) 1 }}
|
||||||
|
selector:
|
||||||
|
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
|
||||||
|
role: leader
|
||||||
|
{{- else }}
|
||||||
|
selector:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
{{- end }}
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: {{ .Values.svc.port | default 8080 }}
|
||||||
|
targetPort: {{ .Values.svc.port | default 8080 }}
|
||||||
|
nodePort: {{ .Values.svc.nodePort | default 30080 }}
|
||||||
|
|
||||||
108
vllm/vllm-serve/templates/single.yaml
Normal file
108
vllm/vllm-serve/templates/single.yaml
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
{{- if eq (int .Values.workerSize) 1 }}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: vllm-pod
|
||||||
|
image: {{ .Values.vllm.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
echo 'Using single node ------------------------------------------';
|
||||||
|
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
readinessProbe:
|
||||||
|
#tcpSocket:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 20
|
||||||
|
timeoutSeconds: 5
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Release.Name }}-pvc-model
|
||||||
|
{{- with .Values.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
75
vllm/vllm-serve/values.yaml
Normal file
75
vllm/vllm-serve/values.yaml
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
# Default values for vllm-app.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
|
||||||
|
imagePullSecrets: []
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
# This is to override the chart name.
|
||||||
|
nameOverride: ""
|
||||||
|
fullnameOverride: ""
|
||||||
|
|
||||||
|
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
|
||||||
|
serviceAccount:
|
||||||
|
# Specifies whether a service account should be created
|
||||||
|
create: true
|
||||||
|
# Automatically mount a ServiceAccount's API credentials?
|
||||||
|
automount: true
|
||||||
|
# Annotations to add to the service account
|
||||||
|
annotations: {}
|
||||||
|
# The name of the service account to use.
|
||||||
|
# If not set and create is true, a name is generated using the fullname template
|
||||||
|
name: ""
|
||||||
|
|
||||||
|
|
||||||
|
# 模型配置
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
|
||||||
|
localMountPath: "/Model" # PVC 固定挂载路径
|
||||||
|
huggingfaceToken: "<your-hf-token>"
|
||||||
|
download: # 启用自动下载
|
||||||
|
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
|
||||||
|
|
||||||
|
# 功能选择
|
||||||
|
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 12
|
||||||
|
memoryLimit: "16Gi"
|
||||||
|
shmSize: "20Gi"
|
||||||
|
|
||||||
|
svc:
|
||||||
|
type: NodePort
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
|
nodePort: 30080
|
||||||
|
# vLLM 应用配置
|
||||||
|
vllm:
|
||||||
|
image: "docker.io/vllm/vllm-openai:latest"
|
||||||
|
|
||||||
|
|
||||||
|
llama:
|
||||||
|
image: "docker.io/library/one-click:v1"
|
||||||
|
|
||||||
|
# lmdeploy 应用配置
|
||||||
|
lmdeploy:
|
||||||
|
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
|
||||||
|
|
||||||
|
|
||||||
|
# NFS PV/PVC 配置
|
||||||
|
nfs:
|
||||||
|
server: "10.6.80.11"
|
||||||
|
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||||
|
storageClass: "local-path"
|
||||||
|
pvSize: "500Gi"
|
||||||
|
pvcSize: "50Gi"
|
||||||
|
|
||||||
|
# LeaderWorkerSet 配置
|
||||||
|
replicaCount: 1
|
||||||
|
workerSize: 2
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
affinity: {}
|
||||||
53
webchat/metadata.yaml
Normal file
53
webchat/metadata.yaml
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
|
||||||
|
application_name: &application_name webchat
|
||||||
|
|
||||||
|
distributed:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: vllm-app
|
||||||
|
sets:
|
||||||
|
app: llama
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2-VL-2B-Instruct"
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 8
|
||||||
|
memoryLimit: "8Gi"
|
||||||
|
shmSize: "15Gi"
|
||||||
|
workerSize: 2
|
||||||
|
nodeSelector: {}
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
port: 30081
|
||||||
|
url: ~
|
||||||
|
paths:
|
||||||
|
docs_path: /docs
|
||||||
|
redoc_path: /redoc
|
||||||
|
pod:
|
||||||
|
name: *application_name
|
||||||
|
monolithic:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: vllm-app
|
||||||
|
sets:
|
||||||
|
app: vllm
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2.5-32B-Instruct"
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 12
|
||||||
|
memoryLimit: "8Gi"
|
||||||
|
shmSize: "15Gi"
|
||||||
|
workerSize: 1
|
||||||
|
nodeSelector: {}
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
port: 30080
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: *application_name
|
||||||
|
|
||||||
23
webchat/vllm-app/.helmignore
Normal file
23
webchat/vllm-app/.helmignore
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
25
webchat/vllm-app/Chart.yaml
Normal file
25
webchat/vllm-app/Chart.yaml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
name: vllm-app
|
||||||
|
description: A Helm chart for deploying vLLM with NFS storage
|
||||||
|
annotations:
|
||||||
|
"helm.sh/resource-policy": keep # 防止资源被意外删除
|
||||||
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
#
|
||||||
|
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||||
|
# to be deployed.
|
||||||
|
#
|
||||||
|
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||||
|
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||||
|
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||||
|
type: application
|
||||||
|
|
||||||
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
|
# to the chart and its templates, including the app version.
|
||||||
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
|
version: 0.1.0
|
||||||
|
|
||||||
|
# This is the version number of the application being deployed. This version number should be
|
||||||
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
|
# It is recommended to use it with quotes.
|
||||||
|
appVersion: "1.16.0"
|
||||||
165
webchat/vllm-app/templates/llama.yaml
Normal file
165
webchat/vllm-app/templates/llama.yaml
Normal file
@ -0,0 +1,165 @@
|
|||||||
|
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
|
||||||
|
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||||
|
kind: LeaderWorkerSet
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
leaderWorkerTemplate:
|
||||||
|
size: {{ .Values.workerSize }}
|
||||||
|
restartPolicy: RecreateGroupOnPodRestart
|
||||||
|
leaderTemplate:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
role: leader
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
ls $DEST_DIR
|
||||||
|
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: llama-leader
|
||||||
|
image: {{ .Values.llama.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name : USE_RAY
|
||||||
|
value: "1"
|
||||||
|
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
# value: "ray"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||||
|
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 7860
|
||||||
|
name: http
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
workerTemplate:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: llama-worker
|
||||||
|
image: {{ .Values.llama.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "echo $(LWS_LEADER_ADDRESS);
|
||||||
|
bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
# - name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
# value: "ray"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
{{- end }}
|
||||||
170
webchat/vllm-app/templates/lmdeploy_lws.yaml
Normal file
170
webchat/vllm-app/templates/lmdeploy_lws.yaml
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
|
||||||
|
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||||
|
kind: LeaderWorkerSet
|
||||||
|
metadata:
|
||||||
|
name: infer
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
leaderWorkerTemplate:
|
||||||
|
size: {{ .Values.workerSize }}
|
||||||
|
restartPolicy: RecreateGroupOnPodRestart
|
||||||
|
leaderTemplate:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
role: leader
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
ls $DEST_DIR
|
||||||
|
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: lmdeploy-leader
|
||||||
|
image: {{ .Values.lmdeploy.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
value: "ray"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||||
|
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
lmdeploy serve api_server $MODEL_PATH --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }})) --server-port 8080 --cache-max-entry-count 0.9"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
#httpGet:
|
||||||
|
#path: /health
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 20
|
||||||
|
timeoutSeconds: 5
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: nfs-pvc-model
|
||||||
|
workerTemplate:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: lmdeploy-worker
|
||||||
|
image: {{ .Values.lmdeploy.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.lmdeploy.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name : LMDEPLOY_EXECUTOR_BACKEND
|
||||||
|
value: "ray"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: nfs-pvc-model
|
||||||
|
{{- end }}
|
||||||
166
webchat/vllm-app/templates/lws.yaml
Normal file
166
webchat/vllm-app/templates/lws.yaml
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
|
||||||
|
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||||
|
kind: LeaderWorkerSet
|
||||||
|
metadata:
|
||||||
|
name: infer
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
leaderWorkerTemplate:
|
||||||
|
size: {{ .Values.workerSize }}
|
||||||
|
restartPolicy: RecreateGroupOnPodRestart
|
||||||
|
leaderTemplate:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
role: leader
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
ls $DEST_DIR
|
||||||
|
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: vllm-leader
|
||||||
|
image: {{ .Values.vllm.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||||
|
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
#httpGet:
|
||||||
|
#path: /health
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 20
|
||||||
|
timeoutSeconds: 5
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
workerTemplate:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: vllm-worker
|
||||||
|
image: {{ .Values.vllm.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add: [ "IPC_LOCK" ]
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||||
|
memory: {{ .Values.resources.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.resources.cpuRequest }}
|
||||||
|
env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
- name: GLOO_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_SOCKET_IFNAME
|
||||||
|
value: eth0
|
||||||
|
- name: NCCL_IB_DISABLE
|
||||||
|
value: "0"
|
||||||
|
- name: NCCL_DEBUG
|
||||||
|
value: INFO
|
||||||
|
- name: NCCL_IB_HCA
|
||||||
|
value: mlx5_0:1
|
||||||
|
- name: NCCL_IB_GID_INDEX
|
||||||
|
value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.resources.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .Values.app }}-pvc-model
|
||||||
|
{{- end }}
|
||||||
44
webchat/vllm-app/templates/model-download-job.yaml
Normal file
44
webchat/vllm-app/templates/model-download-job.yaml
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
{{- if .Values.model.download.enabled }}
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-download-model
|
||||||
|
annotations:
|
||||||
|
"helm.sh/hook": pre-install,pre-upgrade # 在安装/升级前执行
|
||||||
|
"helm.sh/hook-weight": "-10" # 优先执行
|
||||||
|
"helm.sh/hook-delete-policy": hook-succeeded
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
containers:
|
||||||
|
- name: downloader
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
if [ -d "$DEST_DIR" ]; then
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: model-storage
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: model-storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: nfs-pvc-model # 复用之前的 PVC
|
||||||
|
{{- end }}
|
||||||
14
webchat/vllm-app/templates/nfs-pv.yaml
Normal file
14
webchat/vllm-app/templates/nfs-pv.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-pv-model
|
||||||
|
spec:
|
||||||
|
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||||
|
capacity:
|
||||||
|
storage: {{ .Values.nfs.pvSize }}
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
nfs:
|
||||||
|
path: {{ .Values.nfs.path }}
|
||||||
|
server: {{ .Values.nfs.server }}
|
||||||
12
webchat/vllm-app/templates/nfs-pvc.yaml
Normal file
12
webchat/vllm-app/templates/nfs-pvc.yaml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-pvc-model
|
||||||
|
annotations:
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ .Values.nfs.pvcSize }}
|
||||||
|
volumeName: {{ .Values.app }}-pv-model
|
||||||
39
webchat/vllm-app/templates/services.yaml
Normal file
39
webchat/vllm-app/templates/services.yaml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
#apiVersion: v1
|
||||||
|
#kind: Service
|
||||||
|
#metadata:
|
||||||
|
# name: infer-leader-loadbalancer
|
||||||
|
#spec:
|
||||||
|
# type: LoadBalancer
|
||||||
|
# selector:
|
||||||
|
# leaderworkerset.sigs.k8s.io/name: infer
|
||||||
|
# role: leader
|
||||||
|
# ports:
|
||||||
|
# - protocol: TCP
|
||||||
|
# port: 8080
|
||||||
|
# targetPort: 8080
|
||||||
|
#
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.app }}-leader-nodeport
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
{{- if gt (int .Values.workerSize) 1 }}
|
||||||
|
selector:
|
||||||
|
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
|
||||||
|
role: leader
|
||||||
|
{{- else }}
|
||||||
|
selector:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
{{- end }}
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
{{- if eq .Values.app "llama" }}
|
||||||
|
targetPort: 7860
|
||||||
|
{{- else }}
|
||||||
|
targetPort: 8080
|
||||||
|
{{- end }}
|
||||||
|
nodePort: 30081
|
||||||
|
|
||||||
114
webchat/vllm-app/templates/single.yaml
Normal file
114
webchat/vllm-app/templates/single.yaml
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
{{- if eq (int .Values.workerSize) 1 }}
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.replicaCount }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
# 模型下载作为第一个 initContainer
|
||||||
|
- name: download-model
|
||||||
|
image: {{ .Values.model.download.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: HF_ENDPOINT
|
||||||
|
value: https://hf-mirror.com
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.model.huggingfaceToken }}
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||||
|
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||||
|
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||||
|
# 检查模型是否存在,不存在则下载
|
||||||
|
echo "DEST_DIR= $DEST_DIR"
|
||||||
|
ls $DEST_DIR
|
||||||
|
ls -l {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
|
||||||
|
if [ ! -f "$DEST_DIR/config.json" ]; then
|
||||||
|
ls -l {{ .Values.model.localMountPath }}
|
||||||
|
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||||
|
wget https://hf-mirror.com/hfd/hfd.sh
|
||||||
|
chmod a+x hfd.sh
|
||||||
|
apt install aria2 -y
|
||||||
|
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||||
|
else
|
||||||
|
echo "Model already exists at $DEST_DIR"
|
||||||
|
fi
|
||||||
|
volumeMounts:
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
containers:
|
||||||
|
- name: vllm-leader
|
||||||
|
image: {{ .Values.vllm.image }}
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
#securityContext:
|
||||||
|
# capabilities:
|
||||||
|
# add: [ "IPC_LOCK" ]
|
||||||
|
env:
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
value: {{ .Values.vllm.huggingfaceToken }}
|
||||||
|
#- name: GLOO_SOCKET_IFNAME
|
||||||
|
# value: eth0
|
||||||
|
#- name: NCCL_SOCKET_IFNAME
|
||||||
|
# value: eth0
|
||||||
|
#- name: NCCL_IB_DISABLE
|
||||||
|
# value: "0"
|
||||||
|
#- name: NCCL_DEBUG
|
||||||
|
# value: INFO
|
||||||
|
#- name: NCCL_IB_HCA
|
||||||
|
# value: mlx5_0:1
|
||||||
|
#- name: NCCL_IB_GID_INDEX
|
||||||
|
# value: "0" # 或 "7",根据你的网络配置而定
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||||
|
echo 'Using single node ------------------------------------------';
|
||||||
|
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.vllm.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: "{{ .Values.vllm.gpuLimit }}"
|
||||||
|
memory: {{ .Values.vllm.memoryLimit }}
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
#rdma/rdma_shared_device_a: 10
|
||||||
|
requests:
|
||||||
|
ephemeral-storage: 10Gi
|
||||||
|
cpu: {{ .Values.vllm.cpuRequest }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
readinessProbe:
|
||||||
|
#tcpSocket:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 20
|
||||||
|
timeoutSeconds: 5
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- name: weight-volume
|
||||||
|
mountPath: {{ .Values.model.localMountPath }}
|
||||||
|
volumes:
|
||||||
|
- name: dshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: {{ .Values.vllm.shmSize }}
|
||||||
|
- name: weight-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: nfs-pvc-model
|
||||||
|
{{- end }}
|
||||||
58
webchat/vllm-app/values.yaml
Normal file
58
webchat/vllm-app/values.yaml
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# Default values for vllm-app.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
# 模型配置
|
||||||
|
model:
|
||||||
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
|
||||||
|
localMountPath: "/Model" # PVC 固定挂载路径
|
||||||
|
huggingfaceToken: "<your-hf-token>"
|
||||||
|
download:
|
||||||
|
enabled: false # 启用自动下载
|
||||||
|
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
|
||||||
|
|
||||||
|
# 功能选择
|
||||||
|
app: "vllm"
|
||||||
|
|
||||||
|
resources:
|
||||||
|
gpuLimit: 1
|
||||||
|
cpuRequest: 12
|
||||||
|
memoryLimit: "16Gi"
|
||||||
|
shmSize: "20Gi"
|
||||||
|
|
||||||
|
# vLLM 应用配置
|
||||||
|
vllm:
|
||||||
|
image: "docker.io/vllm/vllm-openai:latest"
|
||||||
|
#gpuLimit: 2
|
||||||
|
# cpuRequest: 12
|
||||||
|
# memoryLimit: "12Gi"
|
||||||
|
# shmSize: "15Gi"
|
||||||
|
|
||||||
|
llama:
|
||||||
|
image: "docker.io/library/one-click:v1"
|
||||||
|
|
||||||
|
# lmdeploy 应用配置
|
||||||
|
lmdeploy:
|
||||||
|
image: "docker.io/openmmlab/lmdeploy:latest-cu12"
|
||||||
|
# gpuLimit: 2
|
||||||
|
# cpuRequest: 12
|
||||||
|
# memoryLimit: "12Gi"
|
||||||
|
# shmSize: "15Gi"
|
||||||
|
|
||||||
|
# NFS PV/PVC 配置
|
||||||
|
nfs:
|
||||||
|
server: "10.6.80.11"
|
||||||
|
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||||
|
storageClass: "local-path"
|
||||||
|
pvSize: "500Gi"
|
||||||
|
pvcSize: "50Gi"
|
||||||
|
|
||||||
|
# LeaderWorkerSet 配置
|
||||||
|
replicaCount: 1
|
||||||
|
workerSize: 2
|
||||||
|
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
affinity: {}
|
||||||
51
webui/metadata.yaml
Normal file
51
webui/metadata.yaml
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
|
||||||
|
application_name: &application_name webui
|
||||||
|
|
||||||
|
distributed:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: open-webui
|
||||||
|
sets:
|
||||||
|
image:
|
||||||
|
repository: ghcr.io/open-webui/open-webui
|
||||||
|
tag: main
|
||||||
|
pullPolicy: "IfNotPresent"
|
||||||
|
openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
|
||||||
|
ollama:
|
||||||
|
enabled: false
|
||||||
|
service:
|
||||||
|
type: NodePort
|
||||||
|
nodePort: 30679
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: ~
|
||||||
|
port: 30679
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: open-webui-
|
||||||
|
monolithic:
|
||||||
|
method: helm
|
||||||
|
release_name: *application_name
|
||||||
|
chart: open-webui
|
||||||
|
sets:
|
||||||
|
image:
|
||||||
|
repository: ghcr.io/open-webui/open-webui
|
||||||
|
tag: main
|
||||||
|
pullPolicy: "IfNotPresent"
|
||||||
|
openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
|
||||||
|
ollama:
|
||||||
|
enabled: false
|
||||||
|
service:
|
||||||
|
type: NodePort
|
||||||
|
nodePort: 30679
|
||||||
|
svc:
|
||||||
|
svc_type: NodePort
|
||||||
|
protocol: http
|
||||||
|
hostname: 10.6.14.123
|
||||||
|
servicename: ~
|
||||||
|
port: 30679
|
||||||
|
url: ~
|
||||||
|
pod:
|
||||||
|
name: open-webui-
|
||||||
25
webui/open-webui/.helmignore
Normal file
25
webui/open-webui/.helmignore
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
.drone.yml
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
|
values-minikube.yaml
|
||||||
12
webui/open-webui/Chart.lock
Normal file
12
webui/open-webui/Chart.lock
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
dependencies:
|
||||||
|
- name: ollama
|
||||||
|
repository: https://otwld.github.io/ollama-helm/
|
||||||
|
version: 1.27.0
|
||||||
|
- name: pipelines
|
||||||
|
repository: https://helm.openwebui.com
|
||||||
|
version: 0.7.0
|
||||||
|
- name: tika
|
||||||
|
repository: https://apache.jfrog.io/artifactory/tika
|
||||||
|
version: 3.2.2
|
||||||
|
digest: sha256:1c6e5d6a38dc8ebb4e15b1945fb222fa57b10e8882d5c79ba430648f3c5af372
|
||||||
|
generated: "2025-08-22T15:22:03.150693+02:00"
|
||||||
38
webui/open-webui/Chart.yaml
Normal file
38
webui/open-webui/Chart.yaml
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
annotations:
|
||||||
|
licenses: MIT
|
||||||
|
apiVersion: v2
|
||||||
|
appVersion: 0.6.26
|
||||||
|
dependencies:
|
||||||
|
- condition: ollama.enabled
|
||||||
|
import-values:
|
||||||
|
- child: service
|
||||||
|
parent: ollama.service
|
||||||
|
name: ollama
|
||||||
|
repository: https://otwld.github.io/ollama-helm/
|
||||||
|
version: '>=0.24.0'
|
||||||
|
- condition: pipelines.enabled
|
||||||
|
import-values:
|
||||||
|
- child: service
|
||||||
|
parent: pipelines.service
|
||||||
|
name: pipelines
|
||||||
|
repository: https://helm.openwebui.com
|
||||||
|
version: '>=0.0.1'
|
||||||
|
- condition: tika.enabled
|
||||||
|
name: tika
|
||||||
|
repository: https://apache.jfrog.io/artifactory/tika
|
||||||
|
version: '>=2.9.0'
|
||||||
|
description: "Open WebUI: A User-Friendly Web Interface for Chat Interactions \U0001F44B"
|
||||||
|
home: https://www.openwebui.com/
|
||||||
|
icon: https://raw.githubusercontent.com/open-webui/open-webui/main/static/favicon.png
|
||||||
|
keywords:
|
||||||
|
- llm
|
||||||
|
- chat
|
||||||
|
- web-ui
|
||||||
|
- open-webui
|
||||||
|
name: open-webui
|
||||||
|
sources:
|
||||||
|
- https://github.com/open-webui/helm-charts
|
||||||
|
- https://github.com/open-webui/open-webui/pkgs/container/open-webui
|
||||||
|
- https://github.com/otwld/ollama-helm/
|
||||||
|
- https://hub.docker.com/r/ollama/ollama
|
||||||
|
version: 7.7.0
|
||||||
270
webui/open-webui/README.md
Normal file
270
webui/open-webui/README.md
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
# open-webui
|
||||||
|
|
||||||
|
 
|
||||||
|
|
||||||
|
Open WebUI: A User-Friendly Web Interface for Chat Interactions 👋
|
||||||
|
|
||||||
|
**Homepage:** <https://www.openwebui.com/>
|
||||||
|
|
||||||
|
## Source Code
|
||||||
|
|
||||||
|
* <https://github.com/open-webui/helm-charts>
|
||||||
|
* <https://github.com/open-webui/open-webui/pkgs/container/open-webui>
|
||||||
|
* <https://github.com/otwld/ollama-helm/>
|
||||||
|
* <https://hub.docker.com/r/ollama/ollama>
|
||||||
|
|
||||||
|
## Installing
|
||||||
|
|
||||||
|
Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
|
||||||
|
|
||||||
|
```shell
|
||||||
|
helm repo add open-webui https://helm.openwebui.com/
|
||||||
|
helm repo update
|
||||||
|
```
|
||||||
|
|
||||||
|
Now you can install the chart:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
helm upgrade --install open-webui open-webui/open-webui
|
||||||
|
```
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| Repository | Name | Version |
|
||||||
|
|------------|------|---------|
|
||||||
|
| https://apache.jfrog.io/artifactory/tika | tika | >=2.9.0 |
|
||||||
|
| https://helm.openwebui.com | pipelines | >=0.0.1 |
|
||||||
|
| https://otwld.github.io/ollama-helm/ | ollama | >=0.24.0 |
|
||||||
|
|
||||||
|
## Values
|
||||||
|
|
||||||
|
### Logging configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| logging.components.audio | string | `""` | Set the log level for the Audio processing component |
|
||||||
|
| logging.components.comfyui | string | `""` | Set the log level for the ComfyUI Integration component |
|
||||||
|
| logging.components.config | string | `""` | Set the log level for the Configuration Management component |
|
||||||
|
| logging.components.db | string | `""` | Set the log level for the Database Operations (Peewee) component |
|
||||||
|
| logging.components.images | string | `""` | Set the log level for the Image Generation component |
|
||||||
|
| logging.components.main | string | `""` | Set the log level for the Main Application Execution component |
|
||||||
|
| logging.components.models | string | `""` | Set the log level for the Model Management component |
|
||||||
|
| logging.components.ollama | string | `""` | Set the log level for the Ollama Backend Integration component |
|
||||||
|
| logging.components.openai | string | `""` | Set the log level for the OpenAI API Integration component |
|
||||||
|
| logging.components.rag | string | `""` | Set the log level for the Retrieval-Augmented Generation (RAG) component |
|
||||||
|
| logging.components.webhook | string | `""` | Set the log level for the Authentication Webhook component |
|
||||||
|
| logging.level | string | `""` | Set the global log level ["notset", "debug", "info" (default), "warning", "error", "critical"] |
|
||||||
|
|
||||||
|
### Azure Storage configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| persistence.azure.container | string | `""` | Sets the container name for Azure Storage |
|
||||||
|
| persistence.azure.endpointUrl | string | `""` | Sets the endpoint URL for Azure Storage |
|
||||||
|
| persistence.azure.key | string | `""` | Set the access key for Azure Storage (ignored if keyExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Managed Identity if run in Azure services |
|
||||||
|
| persistence.azure.keyExistingSecret | string | `""` | Set the access key for Azure Storage from existing secret |
|
||||||
|
| persistence.azure.keyExistingSecretKey | string | `""` | Set the access key for Azure Storage from existing secret key |
|
||||||
|
|
||||||
|
### Google Cloud Storage configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| persistence.gcs.appCredentialsJson | string | `""` | Contents of Google Application Credentials JSON file (ignored if appCredentialsJsonExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Google Metadata server if run on a Google Compute Engine. File can be generated for a service account following this guide: https://developers.google.com/workspace/guides/create-credentials#service-account |
|
||||||
|
| persistence.gcs.appCredentialsJsonExistingSecret | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret |
|
||||||
|
| persistence.gcs.appCredentialsJsonExistingSecretKey | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret key |
|
||||||
|
| persistence.gcs.bucket | string | `""` | Sets the bucket name for Google Cloud Storage. Bucket must already exist |
|
||||||
|
|
||||||
|
### Amazon S3 Storage configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| persistence.s3.accessKey | string | `""` | Sets the access key ID for S3 storage |
|
||||||
|
| persistence.s3.accessKeyExistingAccessKey | string | `""` | Set the secret access key for S3 storage from existing k8s secret key |
|
||||||
|
| persistence.s3.accessKeyExistingSecret | string | `""` | Set the secret access key for S3 storage from existing k8s secret |
|
||||||
|
| persistence.s3.bucket | string | `""` | Sets the bucket name for S3 storage |
|
||||||
|
| persistence.s3.endpointUrl | string | `""` | Sets the endpoint url for S3 storage |
|
||||||
|
| persistence.s3.keyPrefix | string | `""` | Sets the key prefix for a S3 object |
|
||||||
|
| persistence.s3.region | string | `""` | Sets the region name for S3 storage |
|
||||||
|
| persistence.s3.secretKey | string | `""` | Sets the secret access key for S3 storage (ignored if secretKeyExistingSecret is set) |
|
||||||
|
| persistence.s3.secretKeyExistingSecret | string | `""` | Set the secret key for S3 storage from existing k8s secret |
|
||||||
|
| persistence.s3.secretKeyExistingSecretKey | string | `""` | Set the secret key for S3 storage from existing k8s secret key |
|
||||||
|
|
||||||
|
### SSO Configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| sso.enableGroupManagement | bool | `false` | Enable OAuth group management through access token groups claim |
|
||||||
|
| sso.enableRoleManagement | bool | `false` | Enable OAuth role management through access token roles claim |
|
||||||
|
| sso.enableSignup | bool | `false` | Enable account creation when logging in with OAuth (distinct from regular signup) |
|
||||||
|
| sso.enabled | bool | `false` | **Enable SSO authentication globally** must enable to use SSO authentication |
|
||||||
|
| sso.groupManagement.groupsClaim | string | `"groups"` | The claim that contains the groups (can be nested, e.g., user.memberOf) |
|
||||||
|
| sso.mergeAccountsByEmail | bool | `false` | Allow logging into accounts that match email from OAuth provider (considered insecure) |
|
||||||
|
|
||||||
|
### GitHub OAuth configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| sso.github.clientExistingSecret | string | `""` | GitHub OAuth client secret from existing secret |
|
||||||
|
| sso.github.clientExistingSecretKey | string | `""` | GitHub OAuth client secret key from existing secret |
|
||||||
|
| sso.github.clientId | string | `""` | GitHub OAuth client ID |
|
||||||
|
| sso.github.clientSecret | string | `""` | GitHub OAuth client secret (ignored if clientExistingSecret is set) |
|
||||||
|
| sso.github.enabled | bool | `false` | Enable GitHub OAuth |
|
||||||
|
|
||||||
|
### Google OAuth configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| sso.google.clientExistingSecret | string | `""` | Google OAuth client secret from existing secret |
|
||||||
|
| sso.google.clientExistingSecretKey | string | `""` | Google OAuth client secret key from existing secret |
|
||||||
|
| sso.google.clientId | string | `""` | Google OAuth client ID |
|
||||||
|
| sso.google.clientSecret | string | `""` | Google OAuth client secret (ignored if clientExistingSecret is set) |
|
||||||
|
| sso.google.enabled | bool | `false` | Enable Google OAuth |
|
||||||
|
|
||||||
|
### Microsoft OAuth configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| sso.microsoft.clientExistingSecret | string | `""` | Microsoft OAuth client secret from existing secret |
|
||||||
|
| sso.microsoft.clientExistingSecretKey | string | `""` | Microsoft OAuth client secret key from existing secret |
|
||||||
|
| sso.microsoft.clientId | string | `""` | Microsoft OAuth client ID |
|
||||||
|
| sso.microsoft.clientSecret | string | `""` | Microsoft OAuth client secret (ignored if clientExistingSecret is set) |
|
||||||
|
| sso.microsoft.enabled | bool | `false` | Enable Microsoft OAuth |
|
||||||
|
| sso.microsoft.tenantId | string | `""` | Microsoft tenant ID - use 9188040d-6c67-4c5b-b112-36a304b66dad for personal accounts |
|
||||||
|
|
||||||
|
### OIDC configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| sso.oidc.clientExistingSecret | string | `""` | OICD client secret from existing secret |
|
||||||
|
| sso.oidc.clientExistingSecretKey | string | `""` | OIDC client secret key from existing secret |
|
||||||
|
| sso.oidc.clientId | string | `""` | OIDC client ID |
|
||||||
|
| sso.oidc.clientSecret | string | `""` | OIDC client secret (ignored if clientExistingSecret is set) |
|
||||||
|
| sso.oidc.enabled | bool | `false` | Enable OIDC authentication |
|
||||||
|
| sso.oidc.providerName | string | `"SSO"` | Name of the provider to show on the UI |
|
||||||
|
| sso.oidc.providerUrl | string | `""` | OIDC provider well known URL |
|
||||||
|
| sso.oidc.scopes | string | `"openid email profile"` | Scopes to request (space-separated). |
|
||||||
|
|
||||||
|
### Role management configuration
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| sso.roleManagement.adminRoles | string | `""` | Comma-separated list of roles allowed to log in as admin (receive open webui role admin) |
|
||||||
|
| sso.roleManagement.allowedRoles | string | `""` | Comma-separated list of roles allowed to log in (receive open webui role user) |
|
||||||
|
| sso.roleManagement.rolesClaim | string | `"roles"` | The claim that contains the roles (can be nested, e.g., user.roles) |
|
||||||
|
|
||||||
|
### SSO trusted header authentication
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| sso.trustedHeader.emailHeader | string | `""` | Header containing the user's email address |
|
||||||
|
| sso.trustedHeader.enabled | bool | `false` | Enable trusted header authentication |
|
||||||
|
| sso.trustedHeader.nameHeader | string | `""` | Header containing the user's name (optional, used for new user creation) |
|
||||||
|
|
||||||
|
### Other Values
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|-----|------|---------|-------------|
|
||||||
|
| affinity | object | `{}` | Affinity for pod assignment |
|
||||||
|
| annotations | object | `{}` | |
|
||||||
|
| args | list | `[]` | Open WebUI container arguments (overrides default) |
|
||||||
|
| clusterDomain | string | `"cluster.local"` | Value of cluster domain |
|
||||||
|
| command | list | `[]` | Open WebUI container command (overrides default entrypoint) |
|
||||||
|
| commonEnvVars | list | `[]` | Env vars added to the Open WebUI deployment, common across environments. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: environment variables defined in both `extraEnvVars` and `commonEnvVars` will result in a conflict. Avoid duplicates) |
|
||||||
|
| containerSecurityContext | object | `{}` | Configure container security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-containe> |
|
||||||
|
| copyAppData.args | list | `[]` | Open WebUI copy-app-data init container arguments (overrides default) |
|
||||||
|
| copyAppData.command | list | `[]` | Open WebUI copy-app-data init container command (overrides default) |
|
||||||
|
| copyAppData.resources | object | `{}` | |
|
||||||
|
| databaseUrl | string | `""` | Configure database URL, needed to work with Postgres (example: `postgresql://<user>:<password>@<service>:<port>/<database>`), leave empty to use the default sqlite database |
|
||||||
|
| enableOpenaiApi | bool | `true` | Enables the use of OpenAI APIs |
|
||||||
|
| extraEnvFrom | list | `[]` | Env vars added from configmap or secret to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: `extraEnvVars` will take precedence over the value from `extraEnvFrom`) |
|
||||||
|
| extraEnvVars | list | `[{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}]` | Env vars added to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ |
|
||||||
|
| extraEnvVars[0] | object | `{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}` | Default API key value for Pipelines. Should be updated in a production deployment, or be changed to the required API key if not using Pipelines |
|
||||||
|
| extraInitContainers | list | `[]` | Additional init containers to add to the deployment/statefulset ref: <https://kubernetes.io/docs/concepts/workloads/pods/init-containers/> |
|
||||||
|
| extraResources | list | `[]` | Extra resources to deploy with Open WebUI |
|
||||||
|
| hostAliases | list | `[]` | HostAliases to be added to hosts-file of each container |
|
||||||
|
| image | object | `{"pullPolicy":"IfNotPresent","repository":"ghcr.io/open-webui/open-webui","tag":""}` | Open WebUI image tags can be found here: https://github.com/open-webui/open-webui |
|
||||||
|
| imagePullSecrets | list | `[]` | Configure imagePullSecrets to use private registry ref: <https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry> |
|
||||||
|
| ingress.additionalHosts | list | `[]` | |
|
||||||
|
| ingress.annotations | object | `{}` | Use appropriate annotations for your Ingress controller, e.g., for NGINX: |
|
||||||
|
| ingress.class | string | `""` | |
|
||||||
|
| ingress.enabled | bool | `false` | |
|
||||||
|
| ingress.existingSecret | string | `""` | |
|
||||||
|
| ingress.extraLabels | object | `{}` | Additional custom labels to add to the Ingress metadata Useful for tagging, selecting, or applying policies to the Ingress via labels. |
|
||||||
|
| ingress.host | string | `"chat.example.com"` | |
|
||||||
|
| ingress.tls | bool | `false` | |
|
||||||
|
| livenessProbe | object | `{}` | Probe for liveness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
|
||||||
|
| managedCertificate.domains[0] | string | `"chat.example.com"` | |
|
||||||
|
| managedCertificate.enabled | bool | `false` | |
|
||||||
|
| managedCertificate.name | string | `"mydomain-chat-cert"` | |
|
||||||
|
| nameOverride | string | `""` | |
|
||||||
|
| namespaceOverride | string | `""` | |
|
||||||
|
| nodeSelector | object | `{}` | Node labels for pod assignment. |
|
||||||
|
| ollama.enabled | bool | `true` | Automatically install Ollama Helm chart from https://otwld.github.io/ollama-helm/. Use [Helm Values](https://github.com/otwld/ollama-helm/#helm-values) to configure |
|
||||||
|
| ollama.fullnameOverride | string | `"open-webui-ollama"` | If enabling embedded Ollama, update fullnameOverride to your desired Ollama name value, or else it will use the default ollama.name value from the Ollama chart |
|
||||||
|
| ollamaUrls | list | `[]` | A list of Ollama API endpoints. These can be added in lieu of automatically installing the Ollama Helm chart, or in addition to it. |
|
||||||
|
| ollamaUrlsFromExtraEnv | bool | `false` | Disables taking Ollama Urls from `ollamaUrls` list |
|
||||||
|
| openaiBaseApiUrl | string | `"https://api.openai.com/v1"` | OpenAI base API URL to use. Defaults to the Pipelines service endpoint when Pipelines are enabled, and "https://api.openai.com/v1" if Pipelines are not enabled and this value is blank |
|
||||||
|
| openaiBaseApiUrls | list | `[]` | OpenAI base API URLs to use. Overwrites the value in openaiBaseApiUrl if set |
|
||||||
|
| persistence.accessModes | list | `["ReadWriteOnce"]` | If using multiple replicas, you must update accessModes to ReadWriteMany |
|
||||||
|
| persistence.annotations | object | `{}` | |
|
||||||
|
| persistence.enabled | bool | `true` | |
|
||||||
|
| persistence.existingClaim | string | `""` | Use existingClaim if you want to re-use an existing Open WebUI PVC instead of creating a new one |
|
||||||
|
| persistence.provider | string | `"local"` | Sets the storage provider, availables values are `local`, `s3`, `gcs` or `azure` |
|
||||||
|
| persistence.selector | object | `{}` | |
|
||||||
|
| persistence.size | string | `"2Gi"` | |
|
||||||
|
| persistence.storageClass | string | `""` | |
|
||||||
|
| persistence.subPath | string | `""` | Subdirectory of Open WebUI PVC to mount. Useful if root directory is not empty. |
|
||||||
|
| pipelines.enabled | bool | `true` | Automatically install Pipelines chart to extend Open WebUI functionality using Pipelines: https://github.com/open-webui/pipelines |
|
||||||
|
| pipelines.extraEnvVars | list | `[]` | This section can be used to pass required environment variables to your pipelines (e.g. Langfuse hostname) |
|
||||||
|
| podAnnotations | object | `{}` | |
|
||||||
|
| podLabels | object | `{}` | |
|
||||||
|
| podSecurityContext | object | `{}` | Configure pod security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container> |
|
||||||
|
| priorityClassName | string | `""` | Priority class name for the Open WebUI pods |
|
||||||
|
| readinessProbe | object | `{}` | Probe for readiness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
|
||||||
|
| replicaCount | int | `1` | |
|
||||||
|
| resources | object | `{}` | |
|
||||||
|
| revisionHistoryLimit | int | `10` | Revision history limit for the workload manager (deployment). |
|
||||||
|
| runtimeClassName | string | `""` | Configure runtime class ref: <https://kubernetes.io/docs/concepts/containers/runtime-class/> |
|
||||||
|
| service | object | `{"annotations":{},"containerPort":8080,"labels":{},"loadBalancerClass":"","nodePort":"","port":80,"type":"ClusterIP"}` | Service values to expose Open WebUI pods to cluster |
|
||||||
|
| serviceAccount.annotations | object | `{}` | |
|
||||||
|
| serviceAccount.automountServiceAccountToken | bool | `false` | |
|
||||||
|
| serviceAccount.enable | bool | `true` | |
|
||||||
|
| serviceAccount.name | string | `""` | |
|
||||||
|
| startupProbe | object | `{}` | Probe for startup of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
|
||||||
|
| strategy | object | `{}` | Strategy for updating the workload manager: deployment or statefulset |
|
||||||
|
| tika.enabled | bool | `false` | Automatically install Apache Tika to extend Open WebUI |
|
||||||
|
| tolerations | list | `[]` | Tolerations for pod assignment |
|
||||||
|
| topologySpreadConstraints | list | `[]` | Topology Spread Constraints for pod assignment |
|
||||||
|
| volumeMounts | object | `{"container":[],"initContainer":[]}` | Configure container volume mounts ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
|
||||||
|
| volumes | list | `[]` | Configure pod volumes ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
|
||||||
|
| websocket.enabled | bool | `false` | Enables websocket support in Open WebUI with env `ENABLE_WEBSOCKET_SUPPORT` |
|
||||||
|
| websocket.manager | string | `"redis"` | Specifies the websocket manager to use with env `WEBSOCKET_MANAGER`: redis (default) |
|
||||||
|
| websocket.nodeSelector | object | `{}` | Node selector for websocket pods |
|
||||||
|
| websocket.redis | object | `{"affinity":{},"annotations":{},"args":[],"command":[],"enabled":true,"image":{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"},"labels":{},"name":"open-webui-redis","pods":{"annotations":{},"labels":{}},"resources":{},"securityContext":{},"service":{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"},"tolerations":[]}` | Deploys a redis |
|
||||||
|
| websocket.redis.affinity | object | `{}` | Redis affinity for pod assignment |
|
||||||
|
| websocket.redis.annotations | object | `{}` | Redis annotations |
|
||||||
|
| websocket.redis.args | list | `[]` | Redis arguments (overrides default) |
|
||||||
|
| websocket.redis.command | list | `[]` | Redis command (overrides default) |
|
||||||
|
| websocket.redis.enabled | bool | `true` | Enable redis installation |
|
||||||
|
| websocket.redis.image | object | `{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"}` | Redis image |
|
||||||
|
| websocket.redis.labels | object | `{}` | Redis labels |
|
||||||
|
| websocket.redis.name | string | `"open-webui-redis"` | Redis name |
|
||||||
|
| websocket.redis.pods | object | `{"annotations":{},"labels":{}}` | Redis pod |
|
||||||
|
| websocket.redis.pods.annotations | object | `{}` | Redis pod annotations |
|
||||||
|
| websocket.redis.pods.labels | object | `{}` | Redis pod labels |
|
||||||
|
| websocket.redis.resources | object | `{}` | Redis resources |
|
||||||
|
| websocket.redis.securityContext | object | `{}` | Redis security context |
|
||||||
|
| websocket.redis.service | object | `{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"}` | Redis service |
|
||||||
|
| websocket.redis.service.annotations | object | `{}` | Redis service annotations |
|
||||||
|
| websocket.redis.service.containerPort | int | `6379` | Redis container/target port |
|
||||||
|
| websocket.redis.service.labels | object | `{}` | Redis service labels |
|
||||||
|
| websocket.redis.service.nodePort | string | `""` | Redis service node port. Valid only when type is `NodePort` |
|
||||||
|
| websocket.redis.service.port | int | `6379` | Redis service port |
|
||||||
|
| websocket.redis.service.portName | string | `"http"` | Redis service port name. Istio needs this to be something like `tcp-redis` |
|
||||||
|
| websocket.redis.service.type | string | `"ClusterIP"` | Redis service type |
|
||||||
|
| websocket.redis.tolerations | list | `[]` | Redis tolerations for pod assignment |
|
||||||
|
| websocket.url | string | `"redis://open-webui-redis:6379/0"` | Specifies the URL of the Redis instance for websocket communication. Template with `redis://[:<password>@]<hostname>:<port>/<db>` |
|
||||||
|
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
|
||||||
36
webui/open-webui/README.md.gotmpl
Normal file
36
webui/open-webui/README.md.gotmpl
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
{{ template "chart.header" . }}
|
||||||
|
|
||||||
|
{{ template "chart.deprecationWarning" . }}
|
||||||
|
|
||||||
|
{{ template "chart.badgesSection" . }}
|
||||||
|
|
||||||
|
{{ template "chart.description" . }}
|
||||||
|
|
||||||
|
{{ template "chart.homepageLine" . }}
|
||||||
|
|
||||||
|
{{ template "chart.maintainersSection" . }}
|
||||||
|
|
||||||
|
{{ template "chart.sourcesSection" . }}
|
||||||
|
|
||||||
|
## Installing
|
||||||
|
|
||||||
|
Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
|
||||||
|
|
||||||
|
```shell
|
||||||
|
helm repo add open-webui https://helm.openwebui.com/
|
||||||
|
helm repo update
|
||||||
|
```
|
||||||
|
|
||||||
|
Now you can install the chart:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
helm upgrade --install open-webui open-webui/open-webui
|
||||||
|
```
|
||||||
|
|
||||||
|
{{ template "chart.requirementsSection" . }}
|
||||||
|
|
||||||
|
{{ template "chart.valuesSection" . }}
|
||||||
|
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
|
||||||
30
webui/open-webui/charts/ollama/.helmignore
Normal file
30
webui/open-webui/charts/ollama/.helmignore
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
.drone.yml
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
#others
|
||||||
|
.github
|
||||||
|
kind-config.yml
|
||||||
|
ci/
|
||||||
|
|
||||||
30
webui/open-webui/charts/ollama/.ollama-helm/.helmignore
Normal file
30
webui/open-webui/charts/ollama/.ollama-helm/.helmignore
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# Patterns to ignore when building packages.
|
||||||
|
# This supports shell glob matching, relative path matching, and
|
||||||
|
# negation (prefixed with !). Only one pattern per line.
|
||||||
|
.DS_Store
|
||||||
|
# Common VCS dirs
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.bzr/
|
||||||
|
.bzrignore
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.svn/
|
||||||
|
# Common backup files
|
||||||
|
*.swp
|
||||||
|
*.bak
|
||||||
|
*.tmp
|
||||||
|
*.orig
|
||||||
|
*~
|
||||||
|
# Various IDEs
|
||||||
|
.project
|
||||||
|
.idea/
|
||||||
|
.drone.yml
|
||||||
|
*.tmproj
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
#others
|
||||||
|
.github
|
||||||
|
kind-config.yml
|
||||||
|
ci/
|
||||||
|
|
||||||
33
webui/open-webui/charts/ollama/.ollama-helm/Chart.yaml
Normal file
33
webui/open-webui/charts/ollama/.ollama-helm/Chart.yaml
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
name: ollama
|
||||||
|
description: Get up and running with large language models locally.
|
||||||
|
|
||||||
|
type: application
|
||||||
|
|
||||||
|
version: 1.27.0
|
||||||
|
|
||||||
|
appVersion: "0.11.4"
|
||||||
|
|
||||||
|
annotations:
|
||||||
|
artifacthub.io/category: ai-machine-learning
|
||||||
|
artifacthub.io/changes: |
|
||||||
|
- kind: changed
|
||||||
|
description: upgrade app version to 0.11.4
|
||||||
|
links:
|
||||||
|
- name: Ollama release v0.11.4
|
||||||
|
url: https://github.com/ollama/ollama/releases/tag/v0.11.4
|
||||||
|
|
||||||
|
kubeVersion: "^1.16.0-0"
|
||||||
|
home: https://ollama.ai/
|
||||||
|
icon: https://ollama.ai/public/ollama.png
|
||||||
|
keywords:
|
||||||
|
- ai
|
||||||
|
- llm
|
||||||
|
- llama
|
||||||
|
- mistral
|
||||||
|
sources:
|
||||||
|
- https://github.com/ollama/ollama
|
||||||
|
- https://github.com/otwld/ollama-helm
|
||||||
|
maintainers:
|
||||||
|
- name: OTWLD
|
||||||
|
email: contact@otwld.com
|
||||||
21
webui/open-webui/charts/ollama/.ollama-helm/LICENSE
Normal file
21
webui/open-webui/charts/ollama/.ollama-helm/LICENSE
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2024 OTWLD
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
331
webui/open-webui/charts/ollama/.ollama-helm/README.md
Normal file
331
webui/open-webui/charts/ollama/.ollama-helm/README.md
Normal file
@ -0,0 +1,331 @@
|
|||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
[](https://artifacthub.io/packages/helm/ollama-helm/ollama)
|
||||||
|
[](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml)
|
||||||
|
[](https://discord.gg/U24mpqTynB)
|
||||||
|
|
||||||
|
[Ollama](https://ollama.ai/), get up and running with large language models, locally.
|
||||||
|
|
||||||
|
This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama).
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Kubernetes: `>= 1.16.0-0` for **CPU only**
|
||||||
|
|
||||||
|
- Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD)
|
||||||
|
|
||||||
|
*Not all GPUs are currently supported with ollama (especially with AMD)*
|
||||||
|
|
||||||
|
## Deploying Ollama chart
|
||||||
|
|
||||||
|
To install the `ollama` chart in the `ollama` namespace:
|
||||||
|
|
||||||
|
> [!IMPORTANT]
|
||||||
|
> We are migrating the registry from https://otwld.github.io/ollama-helm/ url to OTWLD Helm central
|
||||||
|
> registry https://helm.otwld.com/
|
||||||
|
> Please update your Helm registry accordingly.
|
||||||
|
|
||||||
|
```console
|
||||||
|
helm repo add otwld https://helm.otwld.com/
|
||||||
|
helm repo update
|
||||||
|
helm install ollama otwld/ollama --namespace ollama --create-namespace
|
||||||
|
```
|
||||||
|
|
||||||
|
## Upgrading Ollama chart
|
||||||
|
|
||||||
|
First please read the [release notes](https://github.com/ollama/ollama/releases) of Ollama to make sure there are no
|
||||||
|
backwards incompatible changes.
|
||||||
|
|
||||||
|
Make adjustments to your values as needed, then run `helm upgrade`:
|
||||||
|
|
||||||
|
```console
|
||||||
|
# -- This pulls the latest version of the ollama chart from the repo.
|
||||||
|
helm repo update
|
||||||
|
helm upgrade ollama otwld/ollama --namespace ollama --values values.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Uninstalling Ollama chart
|
||||||
|
|
||||||
|
To uninstall/delete the `ollama` deployment in the `ollama` namespace:
|
||||||
|
|
||||||
|
```console
|
||||||
|
helm delete ollama --namespace ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
Substitute your values if they differ from the examples. See `helm delete --help` for a full reference on `delete`
|
||||||
|
parameters and flags.
|
||||||
|
|
||||||
|
## Interact with Ollama
|
||||||
|
|
||||||
|
- **Ollama documentation can be found [HERE](https://github.com/ollama/ollama/tree/main/docs)**
|
||||||
|
- Interact with RESTful API: [Ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md)
|
||||||
|
- Interact with official clients libraries: [ollama-js](https://github.com/ollama/ollama-js#custom-client)
|
||||||
|
and [ollama-python](https://github.com/ollama/ollama-python#custom-client)
|
||||||
|
- Interact with langchain: [langchain-js](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainjs.md)
|
||||||
|
and [langchain-python](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md)
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- **It's highly recommended to run an updated version of Kubernetes for deploying ollama with GPU**
|
||||||
|
|
||||||
|
### Basic values.yaml example with GPU and two models pulled at startup
|
||||||
|
|
||||||
|
```
|
||||||
|
ollama:
|
||||||
|
gpu:
|
||||||
|
# -- Enable GPU integration
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
# -- GPU type: 'nvidia' or 'amd'
|
||||||
|
type: 'nvidia'
|
||||||
|
|
||||||
|
# -- Specify the number of GPU to 1
|
||||||
|
number: 1
|
||||||
|
|
||||||
|
# -- List of models to pull at container startup
|
||||||
|
models:
|
||||||
|
pull:
|
||||||
|
- mistral
|
||||||
|
- llama2
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Basic values.yaml example with Ingress
|
||||||
|
|
||||||
|
```
|
||||||
|
ollama:
|
||||||
|
models:
|
||||||
|
pull:
|
||||||
|
- llama2
|
||||||
|
|
||||||
|
ingress:
|
||||||
|
enabled: true
|
||||||
|
hosts:
|
||||||
|
- host: ollama.domain.lan
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
```
|
||||||
|
|
||||||
|
- *API is now reachable at `ollama.domain.lan`*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Create and run model from template
|
||||||
|
|
||||||
|
```
|
||||||
|
ollama:
|
||||||
|
models:
|
||||||
|
create:
|
||||||
|
- name: llama3.1-ctx32768
|
||||||
|
template: |
|
||||||
|
FROM llama3.1
|
||||||
|
PARAMETER num_ctx 32768
|
||||||
|
run:
|
||||||
|
- llama3.1-ctx32768
|
||||||
|
```
|
||||||
|
|
||||||
|
## Upgrading from 0.X.X to 1.X.X
|
||||||
|
|
||||||
|
The version 1.X.X introduces the ability to load models in memory at startup, the values have been changed.
|
||||||
|
|
||||||
|
Please change `ollama.models` to `ollama.models.pull` to avoid errors before upgrading:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
ollama:
|
||||||
|
models:
|
||||||
|
- mistral
|
||||||
|
- llama2
|
||||||
|
```
|
||||||
|
|
||||||
|
To:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
ollama:
|
||||||
|
models:
|
||||||
|
pull:
|
||||||
|
- mistral
|
||||||
|
- llama2
|
||||||
|
```
|
||||||
|
|
||||||
|
## Helm Values
|
||||||
|
|
||||||
|
- See [values.yaml](values.yaml) to see the Chart's default values.
|
||||||
|
|
||||||
|
| Key | Type | Default | Description |
|
||||||
|
|--------------------------------------------|--------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| affinity | object | `{}` | Affinity for pod assignment |
|
||||||
|
| autoscaling.enabled | bool | `false` | Enable autoscaling |
|
||||||
|
| autoscaling.maxReplicas | int | `100` | Number of maximum replicas |
|
||||||
|
| autoscaling.minReplicas | int | `1` | Number of minimum replicas |
|
||||||
|
| autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU usage to target replica |
|
||||||
|
| deployment.labels | object | `{}` | Labels to add to the deployment |
|
||||||
|
| extraArgs | list | `[]` | Additional arguments on the output Deployment definition. |
|
||||||
|
| extraEnv | list | `[]` | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go |
|
||||||
|
| extraEnvFrom | list | `[]` | Additionl environment variables from external sources (like ConfigMap) |
|
||||||
|
| extraObjects | list | `[]` | Extra K8s manifests to deploy |
|
||||||
|
| fullnameOverride | string | `""` | String to fully override template |
|
||||||
|
| hostIPC | bool | `false` | Use the host’s ipc namespace. |
|
||||||
|
| hostNetwork | bool | `false` | Use the host's network namespace. |
|
||||||
|
| hostPID | bool | `false` | Use the host’s pid namespace |
|
||||||
|
| image.pullPolicy | string | `"IfNotPresent"` | Docker pull policy |
|
||||||
|
| image.repository | string | `"ollama/ollama"` | Docker image registry |
|
||||||
|
| image.tag | string | `""` | Docker image tag, overrides the image tag whose default is the chart appVersion. |
|
||||||
|
| imagePullSecrets | list | `[]` | Docker registry secret names as an array |
|
||||||
|
| ingress.annotations | object | `{}` | Additional annotations for the Ingress resource. |
|
||||||
|
| ingress.className | string | `""` | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) |
|
||||||
|
| ingress.enabled | bool | `false` | Enable ingress controller resource |
|
||||||
|
| ingress.hosts[0].host | string | `"ollama.local"` | |
|
||||||
|
| ingress.hosts[0].paths[0].path | string | `"/"` | |
|
||||||
|
| ingress.hosts[0].paths[0].pathType | string | `"Prefix"` | |
|
||||||
|
| ingress.tls | list | `[]` | The tls configuration for hostnames to be covered with this ingress record. |
|
||||||
|
| initContainers | list | `[]` | Init containers to add to the pod |
|
||||||
|
| knative.annotations | object | `{}` | Knative service annotations |
|
||||||
|
| knative.containerConcurrency | int | `0` | Knative service container concurrency |
|
||||||
|
| knative.enabled | bool | `false` | Enable Knative integration |
|
||||||
|
| knative.idleTimeoutSeconds | int | `300` | Knative service idle timeout seconds |
|
||||||
|
| knative.responseStartTimeoutSeconds | int | `300` | Knative service response start timeout seconds |
|
||||||
|
| knative.timeoutSeconds | int | `300` | Knative service timeout seconds |
|
||||||
|
| lifecycle | object | `{}` | Lifecycle for pod assignment (override ollama.models startup pull/run) |
|
||||||
|
| livenessProbe.enabled | bool | `true` | Enable livenessProbe |
|
||||||
|
| livenessProbe.failureThreshold | int | `6` | Failure threshold for livenessProbe |
|
||||||
|
| livenessProbe.initialDelaySeconds | int | `60` | Initial delay seconds for livenessProbe |
|
||||||
|
| livenessProbe.path | string | `"/"` | Request path for livenessProbe |
|
||||||
|
| livenessProbe.periodSeconds | int | `10` | Period seconds for livenessProbe |
|
||||||
|
| livenessProbe.successThreshold | int | `1` | Success threshold for livenessProbe |
|
||||||
|
| livenessProbe.timeoutSeconds | int | `5` | Timeout seconds for livenessProbe |
|
||||||
|
| nameOverride | string | `""` | String to partially override template (will maintain the release name) |
|
||||||
|
| namespaceOverride | string | `""` | String to fully override namespace |
|
||||||
|
| nodeSelector | object | `{}` | Node labels for pod assignment. |
|
||||||
|
| ollama.gpu.draDriverClass | string | `"gpu.nvidia.com"` | DRA GPU DriverClass |
|
||||||
|
| ollama.gpu.draEnabled | bool | `false` | Enable DRA GPU integration If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters |
|
||||||
|
| ollama.gpu.draExistingClaimTemplate | string | `""` | Existing DRA GPU ResourceClaim Template |
|
||||||
|
| ollama.gpu.enabled | bool | `false` | Enable GPU integration |
|
||||||
|
| ollama.gpu.mig.devices | object | `{}` | Specify the mig devices and the corresponding number |
|
||||||
|
| ollama.gpu.mig.enabled | bool | `false` | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored |
|
||||||
|
| ollama.gpu.number | int | `1` | Specify the number of GPU If you use MIG section below then this parameter is ignored |
|
||||||
|
| ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice |
|
||||||
|
| ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images |
|
||||||
|
| ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup |
|
||||||
|
| ollama.models.clean | bool | `false` | Automatically remove models present on the disk but not specified in the values file |
|
||||||
|
| ollama.models.create | list | `[]` | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create: - name: llama3.1-ctx32768 configMapRef: my-configmap configMapKeyRef: configmap-key - name: llama3.1-ctx32768 template: | FROM llama3.1 PARAMETER num_ctx 32768 |
|
||||||
|
| ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral |
|
||||||
|
| ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral |
|
||||||
|
| ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" |
|
||||||
|
| ollama.port | int | `11434` | |
|
||||||
|
| persistentVolume.accessModes | list | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ |
|
||||||
|
| persistentVolume.annotations | object | `{}` | Ollama server data Persistent Volume annotations |
|
||||||
|
| persistentVolume.enabled | bool | `false` | Enable persistence using PVC |
|
||||||
|
| persistentVolume.existingClaim | string | `""` | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true |
|
||||||
|
| persistentVolume.size | string | `"30Gi"` | Ollama server data Persistent Volume size |
|
||||||
|
| persistentVolume.storageClass | string | `""` | Ollama server data Persistent Volume Storage Class If defined, storageClassName: <storageClass> If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner. (gp2 on AWS, standard on GKE, AWS & OpenStack) |
|
||||||
|
| persistentVolume.subPath | string | `""` | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty |
|
||||||
|
| persistentVolume.volumeMode | string | `""` | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: <volumeMode> If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode. |
|
||||||
|
| persistentVolume.volumeName | string | `""` | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward |
|
||||||
|
| podAnnotations | object | `{}` | Map of annotations to add to the pods |
|
||||||
|
| podLabels | object | `{}` | Map of labels to add to the pods |
|
||||||
|
| podSecurityContext | object | `{}` | Pod Security Context |
|
||||||
|
| priorityClassName | string | `""` | Priority Class Name |
|
||||||
|
| readinessProbe.enabled | bool | `true` | Enable readinessProbe |
|
||||||
|
| readinessProbe.failureThreshold | int | `6` | Failure threshold for readinessProbe |
|
||||||
|
| readinessProbe.initialDelaySeconds | int | `30` | Initial delay seconds for readinessProbe |
|
||||||
|
| readinessProbe.path | string | `"/"` | Request path for readinessProbe |
|
||||||
|
| readinessProbe.periodSeconds | int | `5` | Period seconds for readinessProbe |
|
||||||
|
| readinessProbe.successThreshold | int | `1` | Success threshold for readinessProbe |
|
||||||
|
| readinessProbe.timeoutSeconds | int | `3` | Timeout seconds for readinessProbe |
|
||||||
|
| replicaCount | int | `1` | Number of replicas |
|
||||||
|
| resources.limits | object | `{}` | Pod limit |
|
||||||
|
| resources.requests | object | `{}` | Pod requests |
|
||||||
|
| runtimeClassName | string | `""` | Specify runtime class |
|
||||||
|
| securityContext | object | `{}` | Container Security Context |
|
||||||
|
| service.annotations | object | `{}` | Annotations to add to the service |
|
||||||
|
| service.labels | object | `{}` | Labels to add to the service |
|
||||||
|
| service.loadBalancerIP | string | `nil` | Load Balancer IP address |
|
||||||
|
| service.nodePort | int | `31434` | Service node port when service type is 'NodePort' |
|
||||||
|
| service.port | int | `11434` | Service port |
|
||||||
|
| service.type | string | `"ClusterIP"` | Service type |
|
||||||
|
| serviceAccount.annotations | object | `{}` | Annotations to add to the service account |
|
||||||
|
| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? |
|
||||||
|
| serviceAccount.create | bool | `true` | Specifies whether a service account should be created |
|
||||||
|
| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template |
|
||||||
|
| terminationGracePeriodSeconds | int | `120` | Wait for a grace period |
|
||||||
|
| tests.annotations | object | `{}` | Annotations to add to the tests |
|
||||||
|
| tests.enabled | bool | `true` | |
|
||||||
|
| tests.labels | object | `{}` | Labels to add to the tests |
|
||||||
|
| tolerations | list | `[]` | Tolerations for pod assignment |
|
||||||
|
| topologySpreadConstraints | object | `{}` | Topology Spread Constraints for pod assignment |
|
||||||
|
| updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate |
|
||||||
|
| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. |
|
||||||
|
| volumes | list | `[]` | Additional volumes on the output Deployment definition. |
|
||||||
|
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
## Core team
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td align="center">
|
||||||
|
<a href="https://github.com/jdetroyes"
|
||||||
|
><img
|
||||||
|
src="https://github.com/jdetroyes.png?size=200"
|
||||||
|
width="50"
|
||||||
|
style="margin-bottom: -4px; border-radius: 8px;"
|
||||||
|
alt="Jean Baptiste Detroyes"
|
||||||
|
/><br /><b> Jean Baptiste Detroyes </b></a
|
||||||
|
>
|
||||||
|
<div style="margin-top: 4px">
|
||||||
|
<a href="https://github.com/jdetroyes" title="Github"
|
||||||
|
><img
|
||||||
|
width="16"
|
||||||
|
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
|
||||||
|
/></a>
|
||||||
|
<a
|
||||||
|
href="mailto:jdetroyes@otwld.com"
|
||||||
|
title="Email"
|
||||||
|
><img
|
||||||
|
width="16"
|
||||||
|
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
|
||||||
|
/></a>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td align="center">
|
||||||
|
<a href="https://github.com/ntrehout"
|
||||||
|
><img
|
||||||
|
src="https://github.com/ntrehout.png?size=200"
|
||||||
|
width="50"
|
||||||
|
style="margin-bottom: -4px; border-radius: 8px;"
|
||||||
|
alt="Jean Baptiste Detroyes"
|
||||||
|
/><br /><b> Nathan Tréhout </b></a
|
||||||
|
>
|
||||||
|
<div style="margin-top: 4px">
|
||||||
|
<a href="https://x.com/n_trehout" title="Twitter"
|
||||||
|
><img
|
||||||
|
width="16"
|
||||||
|
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/twitter.svg"
|
||||||
|
/></a>
|
||||||
|
<a href="https://github.com/ntrehout" title="Github"
|
||||||
|
><img
|
||||||
|
width="16"
|
||||||
|
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
|
||||||
|
/></a>
|
||||||
|
<a
|
||||||
|
href="mailto:ntrehout@otwld.com"
|
||||||
|
title="Email"
|
||||||
|
><img
|
||||||
|
width="16"
|
||||||
|
src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
|
||||||
|
/></a>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
- For questions, suggestions, and discussion about Ollama please refer to
|
||||||
|
the [Ollama issue page](https://github.com/ollama/ollama/issues)
|
||||||
|
- For questions, suggestions, and discussion about this chart please
|
||||||
|
visit [Ollama-Helm issue page](https://github.com/otwld/ollama-helm/issues) or join
|
||||||
|
our [OTWLD Discord](https://discord.gg/U24mpqTynB)
|
||||||
@ -0,0 +1,25 @@
|
|||||||
|
1. Get the application URL by running these commands:
|
||||||
|
{{- if .Values.knative.enabled }}
|
||||||
|
export KSERVICE_URL=$(kubectl get ksvc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} -o jsonpath={.status.url})
|
||||||
|
echo "Visit $KSERVICE_URL to use your application"
|
||||||
|
{{- else if .Values.ingress.enabled }}
|
||||||
|
{{- range $host := .Values.ingress.hosts }}
|
||||||
|
{{- range .paths }}
|
||||||
|
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- else if contains "NodePort" .Values.service.type }}
|
||||||
|
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "ollama.fullname" . }})
|
||||||
|
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||||
|
echo http://$NODE_IP:$NODE_PORT
|
||||||
|
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||||
|
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||||
|
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "ollama.fullname" . }}'
|
||||||
|
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||||
|
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||||
|
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||||
|
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "ollama.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||||
|
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||||
|
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||||
|
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||||
|
{{- end }}
|
||||||
@ -0,0 +1,80 @@
|
|||||||
|
{{/*
|
||||||
|
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
|
||||||
|
*/}}
|
||||||
|
{{- define "ollama.namespace" -}}
|
||||||
|
{{- if .Values.namespaceOverride -}}
|
||||||
|
{{- .Values.namespaceOverride -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- .Release.Namespace -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "ollama.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "ollama.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride }}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||||
|
{{- if contains $name .Release.Name }}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart name and version as used by the chart label.
|
||||||
|
*/}}
|
||||||
|
{{- define "ollama.chart" -}}
|
||||||
|
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Common labels
|
||||||
|
*/}}
|
||||||
|
{{- define "ollama.labels" -}}
|
||||||
|
helm.sh/chart: {{ include "ollama.chart" . }}
|
||||||
|
{{ include "ollama.selectorLabels" . }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Selector labels
|
||||||
|
*/}}
|
||||||
|
{{- define "ollama.selectorLabels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "ollama.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account to use
|
||||||
|
*/}}
|
||||||
|
{{- define "ollama.serviceAccountName" -}}
|
||||||
|
{{- if .Values.serviceAccount.create }}
|
||||||
|
{{- default (include "ollama.fullname" .) .Values.serviceAccount.name }}
|
||||||
|
{{- else }}
|
||||||
|
{{- default "default" .Values.serviceAccount.name }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Models mount path
|
||||||
|
*/}}
|
||||||
|
{{- define "ollama.modelsMountPath" -}}
|
||||||
|
{{- printf "%s/models" (((.Values).ollama).mountPath | default "/root/.ollama") }}
|
||||||
|
{{- end -}}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user