first backup of charts

2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions
--- a/code/codeserver
+++ b/code/codeserver
--- a/code/metadata.yaml
+++ b/code/metadata.yaml
@ -0,0 +1,55 @@
+
+application_name: &application_name code
+
+distributed: 
+  method: helm
+  release_name: *application_name
+  chart: codeserver/ci/helm-chart
+  sets:
+    image: 
+      repository: codercom/code-server
+      tag: '4.103.2'
+      pullPolicy: IfNotPresent
+    resources: 
+      limits:
+        nvidia.com/gpu: 0
+    nodeSelector: 
+      resource-group: gpu_5880
+    service:
+      type: NodePort
+      port: 8080
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: ~
+    port: 30083
+    url: ~
+  pod:
+    name: *application_name
+monolithic: 
+  method: helm
+  release_name: *application_name
+  chart: codeserver/ci/helm-chart
+  sets:
+    image: 
+      repository: codercom/code-server
+      tag: '4.103.2'
+      pullPolicy: IfNotPresent
+    resources: 
+      limits:
+        nvidia.com/gpu: 1
+    nodeSelector: 
+      resource-group: gpu_5880
+    service:
+      type: NodePort
+      port: 8080
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: ~
+    port: 30083
+    url: ~
+  pod:
+    name: *application_name
--- a/edgetts/metadata.yaml
+++ b/edgetts/metadata.yaml
@ -0,0 +1,47 @@
+
+application_name: &application_name edgetts
+
+distributed: 
+  method: helm
+  release_name: *application_name
+  chart: test-tts
+  sets:
+    image: 
+      repository: travisvn/openai-edge-tts
+      tag: "latest"
+      pullPolicy: IfNotPresent
+    service:
+      type: NodePort
+      port: 5050
+      nodePort: 30250
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: ~
+    port: 30250
+    url: ~
+  pod:
+    name: *application_name
+monolithic: 
+  method: helm
+  release_name: *application_name
+  chart: test-tts
+  sets:
+    image: 
+      repository: travisvn/openai-edge-tts
+      tag: "latest"
+      pullPolicy: IfNotPresent
+    service:
+      type: NodePort
+      port: 5050
+      nodePort: 30250
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: ~
+    port: 30250
+    url: ~
+  pod:
+    name: *application_name
--- a/edgetts/test-tts/.helmignore
+++ b/edgetts/test-tts/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/edgetts/test-tts/Chart.yaml
+++ b/edgetts/test-tts/Chart.yaml
@ -0,0 +1,24 @@
+apiVersion: v2
+name: test-tts
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/edgetts/test-tts/templates/NOTES.txt
+++ b/edgetts/test-tts/templates/NOTES.txt
@ -0,0 +1,22 @@
+1. Get the application URL by running these commands:
+{{- if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "test-tts.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "test-tts.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "test-tts.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "test-tts.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
--- a/edgetts/test-tts/templates/_helpers.tpl
+++ b/edgetts/test-tts/templates/_helpers.tpl
@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "test-tts.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "test-tts.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "test-tts.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "test-tts.labels" -}}
+helm.sh/chart: {{ include "test-tts.chart" . }}
+{{ include "test-tts.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "test-tts.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "test-tts.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "test-tts.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "test-tts.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
--- a/edgetts/test-tts/templates/deployment.yaml
+++ b/edgetts/test-tts/templates/deployment.yaml
@ -0,0 +1,78 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "test-tts.fullname" . }}
+  labels:
+    {{- include "test-tts.labels" . | nindent 4 }}
+spec:
+  {{- if not .Values.autoscaling.enabled }}
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "test-tts.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "test-tts.labels" . | nindent 8 }}
+        {{- with .Values.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "test-tts.serviceAccountName" . }}
+      {{- with .Values.podSecurityContext }}
+      securityContext:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ .Chart.Name }}
+          {{- with .Values.securityContext }}
+          securityContext:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          ports:
+            - name: http
+              containerPort: {{ .Values.service.port }}
+              protocol: TCP
+          {{- with .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.volumeMounts }}
+          volumeMounts:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+      {{- with .Values.volumes }}
+      volumes:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
--- a/edgetts/test-tts/templates/hpa.yaml
+++ b/edgetts/test-tts/templates/hpa.yaml
@ -0,0 +1,32 @@
+{{- if .Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "test-tts.fullname" . }}
+  labels:
+    {{- include "test-tts.labels" . | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "test-tts.fullname" . }}
+  minReplicas: {{ .Values.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    {{- end }}
+{{- end }}
--- a/edgetts/test-tts/templates/ingress.yaml
+++ b/edgetts/test-tts/templates/ingress.yaml
@ -0,0 +1,43 @@
+{{- if .Values.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "test-tts.fullname" . }}
+  labels:
+    {{- include "test-tts.labels" . | nindent 4 }}
+  {{- with .Values.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ingress.className }}
+  ingressClassName: {{ . }}
+  {{- end }}
+  {{- if .Values.ingress.tls }}
+  tls:
+    {{- range .Values.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- with .pathType }}
+            pathType: {{ . }}
+            {{- end }}
+            backend:
+              service:
+                name: {{ include "test-tts.fullname" $ }}
+                port:
+                  number: {{ $.Values.service.port }}
+          {{- end }}
+    {{- end }}
+{{- end }}
--- a/edgetts/test-tts/templates/service.yaml
+++ b/edgetts/test-tts/templates/service.yaml
@ -0,0 +1,16 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "test-tts.fullname" . }}
+  labels:
+    {{- include "test-tts.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: http
+      protocol: TCP
+      name: http
+      nodePort: {{ .Values.service.nodePort }}
+  selector:
+    {{- include "test-tts.selectorLabels" . | nindent 4 }}
--- a/edgetts/test-tts/templates/serviceaccount.yaml
+++ b/edgetts/test-tts/templates/serviceaccount.yaml
@ -0,0 +1,13 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "test-tts.serviceAccountName" . }}
+  labels:
+    {{- include "test-tts.labels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
+{{- end }}
--- a/edgetts/test-tts/templates/tests/test-connection.yaml
+++ b/edgetts/test-tts/templates/tests/test-connection.yaml
@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "test-tts.fullname" . }}-test-connection"
+  labels:
+    {{- include "test-tts.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+spec:
+  containers:
+    - name: wget
+      image: busybox
+      command: ['wget']
+      args: ['{{ include "test-tts.fullname" . }}:{{ .Values.service.port }}']
+  restartPolicy: Never
--- a/edgetts/test-tts/values.yaml
+++ b/edgetts/test-tts/values.yaml
@ -0,0 +1,124 @@
+# Default values for test-tts.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
+replicaCount: 1
+
+# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
+image:
+  repository: travisvn/openai-edge-tts
+  # This sets the pull policy for images.
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
+imagePullSecrets: []
+# This is to override the chart name.
+nameOverride: "edgetts"
+fullnameOverride: ""
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name: ""
+
+# This is for setting Kubernetes Annotations to a Pod.
+# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
+podAnnotations: {}
+# This is for setting Kubernetes Labels to a Pod.
+# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
+podLabels: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext: {}
+  # capabilities:
+  #   drop:
+  #   - ALL
+  # readOnlyRootFilesystem: true
+  # runAsNonRoot: true
+  # runAsUser: 1000
+
+# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
+service:
+  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
+  type: NodePort
+  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
+  port: 5050
+  nodePort: 30250
+
+# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
+ingress:
+  enabled: false
+  className: ""
+  annotations: {}
+    # kubernetes.io/ingress.class: nginx
+    # kubernetes.io/tls-acme: "true"
+  hosts:
+    - host: chart-example.local
+      paths:
+        - path: /
+          pathType: ImplementationSpecific
+  tls: []
+  #  - secretName: chart-example-tls
+  #    hosts:
+  #      - chart-example.local
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
+#livenessProbe:
+#  httpGet:
+#    path: /
+#    port: http
+#readinessProbe:
+#  httpGet:
+#    path: /
+#    port: http
+
+# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
+autoscaling:
+  enabled: false
+  minReplicas: 1
+  maxReplicas: 100
+  targetCPUUtilizationPercentage: 80
+  # targetMemoryUtilizationPercentage: 80
+
+# Additional volumes on the output Deployment definition.
+volumes: []
+# - name: foo
+#   secret:
+#     secretName: mysecret
+#     optional: false
+
+# Additional volumeMounts on the output Deployment definition.
+volumeMounts: []
+# - name: foo
+#   mountPath: "/etc/foo"
+#   readOnly: true
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
--- a/jarvis/jarvis/.helmignore
+++ b/jarvis/jarvis/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/jarvis/jarvis/Chart.yaml
+++ b/jarvis/jarvis/Chart.yaml
@ -0,0 +1,24 @@
+apiVersion: v2
+name: jarvis
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/jarvis/jarvis/templates/NOTES.txt
+++ b/jarvis/jarvis/templates/NOTES.txt
@ -0,0 +1,22 @@
+1. Get the application URL by running these commands:
+{{- if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
--- a/jarvis/jarvis/templates/_helpers.tpl
+++ b/jarvis/jarvis/templates/_helpers.tpl
@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "jarvis.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "jarvis.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "jarvis.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "jarvis.labels" -}}
+helm.sh/chart: {{ include "jarvis.chart" . }}
+{{ include "jarvis.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "jarvis.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "jarvis.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "jarvis.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
--- a/jarvis/jarvis/templates/images-pv.yaml
+++ b/jarvis/jarvis/templates/images-pv.yaml
@ -0,0 +1,17 @@
+# pv.yaml
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Release.Name }}-pv  # PV 的名称，可以自定义
+spec:
+  storageClassName: local-path # 添加这一行，与上面StorageClass的名称一致
+  capacity:
+    storage: 500Gi       # PV 的容量，可以根据 NFS 共享的实际大小或预期使用量调整
+  accessModes:
+    - ReadWriteMany     # 访问模式：
+  persistentVolumeReclaimPolicy: Retain 
+
+  nfs:
+    path: /volume1/Dataset/PVStore/lab-data-dataset-pvc-ec4aba12-c683-4168-b335-7b1a8819581a/Private/cache-images  # NFS 服务器上共享的路径
+    server: 10.6.80.11 # NFS 服务器的 IP 地址或主机名
+
--- a/jarvis/jarvis/templates/images-pvc.yaml
+++ b/jarvis/jarvis/templates/images-pvc.yaml
@ -0,0 +1,14 @@
+# pvc.yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-pvc   # PVC 的名称，Pod 会引用这个名称
+  #namespace: default     # PVC 所在的命名空间，通常是 default 或你自定义的命名空间
+spec:
+  storageClassName: local-path # 添加这一行，与PV和StorageClass的名称一致
+  accessModes:
+    - ReadWriteMany     # 访问模式，必须与 PV 的 accessModes 匹配或更宽松
+  resources:
+    requests:
+      storage: 50Gi      # PVC 请求的存储容量，必须小于或等于 PV 的容量
+  volumeName: {{ .Release.Name }}-pv # 明确指定要绑定的 PV 的名称，这是手动绑定 PV 的关键
--- a/jarvis/jarvis/templates/jarvis-adapter.yaml
+++ b/jarvis/jarvis/templates/jarvis-adapter.yaml
@ -0,0 +1,68 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: jarvis-adapter-deployment
+  # namespace: jarvis-models
+  labels:
+    app: jarvis-adapter
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: jarvis-adapter
+  template:
+    metadata:
+      labels:
+        app: jarvis-adapter
+    spec:
+      #hostNetwork: true
+      # --- START: Add this section for image pull secrets ---
+      imagePullSecrets:
+      - name: regcred # This MUST match the name of the secret you just created
+      # --- END: Add this section ---
+      containers:
+      - name: jarvis-adapter
+        image: {{ .Values.jarvis_adapter.image }}
+        imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
+        env:
+          - name: INFERENCE_ENDPOINT
+            value: {{ .Values.jarvis_adapter.endpoint }}
+        ports:
+        - containerPort: 5000 # The port your application listens on inside the container
+          #hostPort: 8880    # The port on the host machine (--network=host combined with -p 8880:8080)
+          protocol: TCP
+        resources:  # Add this section
+          requests:
+            cpu: 100m  # Example: 100 millicores (0.1 CPU)
+            memory: 256Mi # Example: 128 mebibytes
+          limits:
+            cpu: 500m  # Example: Limit to 500 millicores (0.5 CPU)
+            memory: 512Mi # Example: Limit to 512 mebibytes
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llm-blackbox
+  # namespace: jarvis-models
+  labels:
+    app: jarvis-adapter
+spec:
+  selector:
+    app: jarvis-adapter
+  ports:
+    - protocol: TCP
+      port: 80
+      targetPort: 5000
+  type: NodePort
--- a/jarvis/jarvis/templates/jarvis-api.yaml
+++ b/jarvis/jarvis/templates/jarvis-api.yaml
@ -0,0 +1,85 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: jarvis-api-deployment
+  # namespace: jarvis-models
+  labels:
+    app: jarvis-api
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: jarvis-api
+  template:
+    metadata:
+      labels:
+        app: jarvis-api
+    spec:
+      #hostNetwork: true
+      # --- START: Add this section for image pull secrets ---
+      imagePullSecrets:
+      - name: regcred # This MUST match the name of the secret you just created
+      # --- END: Add this section ---
+      containers:
+      - name: jarvis-api
+        image: {{ .Values.jarvis_api.image }}
+        imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
+        ports:
+        - containerPort: 8080 # The port your application listens on inside the container
+          #hostPort: 8880    # The port on the host machine (--network=host combined with -p 8880:8080)
+          protocol: TCP
+        resources:  # Add this section
+          requests:
+            cpu: 100m  # Example: 100 millicores (0.1 CPU)
+            memory: 256Mi # Example: 128 mebibytes
+          limits:
+            cpu: 500m  # Example: Limit to 500 millicores (0.5 CPU)
+            memory: 512Mi # Example: Limit to 512 mebibytes
+        volumeMounts:
+        - name: env-config-volume
+          mountPath: /.env.yml
+          subPath: .env.yml
+          readOnly: true
+        - name: images-data
+          mountPath: /images
+      volumes:
+      - name: env-config-volume
+        #hostPath:
+        #  path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
+        #  type: FileOrCreate
+        configMap:
+          name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
+          items:
+            - key: .api.env.yml    # This is the key defined in the ConfigMap's data section
+              path: .env.yml   # This is the filename inside the mountPath (e.g., /.env.yml)
+      - name: images-data
+        persistentVolumeClaim:
+          claimName: {{ .Release.Name }}-pvc
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Release.Name }}-api-service
+  # namespace: jarvis-models
+  labels:
+    app: jarvis-api
+spec:
+  selector:
+    app: jarvis-api
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 8080
+  type: NodePort
--- a/jarvis/jarvis/templates/jarvis-configmap.yaml
+++ b/jarvis/jarvis/templates/jarvis-configmap.yaml
@ -0,0 +1,183 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-cm # Name of your ConfigMap
+  # namespace: jarvis-models # Ensure this matches your Deployment's namespace
+data:
+  .api.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
+    database:
+      host: 10.6.14.130
+      port: 3306
+      username: jarvis
+      password: boardwarejarvis
+      database: jarvis
+    jwt:
+      secret: secretkey
+    users:
+      - username: jarvis
+        password: boardwarejarvis
+      - username: user
+        password: boardwareuser
+      - username: g2e
+        password: g2e
+      - username: vera
+        password: vera
+      - username: ivan
+        password: ivan    
+    blackbox:
+      mode: 0 # 0: /?blackbox=models, 1: /models-blackbox
+      url: http://jarvis-model-service
+      port: 8080
+    
+      vad:
+        url: http://vad-blackbox
+      asr:
+        url: http://asr-blackbox
+      llm:
+        url: http://llm-blackbox
+      vlm:
+        url: http://vlm-blackbox
+      tts:
+        url: http://tts-blackbox
+      chatllama:
+        url: http://chatllama-blackbox
+      chroma:
+        upsert_url: http://chroma-blackbox/upsert
+        
+    
+    env: dev
+    authentik:
+      redirectUri: http://10.6.14.130:4200
+      baseUrl: https://authentik.universalmacro.com
+      clientId: xxx
+      clientSecret: xxx
+    server:
+      port: 8080
+    
+    # log:
+    #   path: "/Workspace/Logging/logtime.out"
+    
+    log:
+      loki:
+        # url: "https://103.192.46.20:27002/laas/1868865592451137536/loki/api/v1/push"
+        url: "https://loki.bwgdi.com/loki/api/v1/push"
+        x-odin-auth: "log_m7uxtqtru2318hbaoonf9wgjy8chcnebhwhl0wncsvfctu2ppn9m53q6p3i3"
+      labels:
+        app: jarvis
+        env: dev
+        location: "k3s_gdi"
+    
+    model:
+      tts:
+        url: http://10.6.14.130:8000/?blackbox_name=tts
+        tts_model_name: melotts
+        tts_stream: false
+      streaming:
+        url: http://10.6.14.130:8000/?blackbox_name=chat
+        vlms_url: http://10.6.14.130:8000/?blackbox_name=vlms
+  .models.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
+    env:
+      version: 0.0.1
+      host: 0.0.0.0
+      port: 8000
+    
+    log:
+      level: debug
+      time_format: "%Y-%m-%d %H:%M:%S"
+      filename: "./jarvis-models.log"
+    
+    loki:
+      url: "https://loki.bwgdi.com/loki/api/v1/push"
+      labels:
+        app: jarvis
+        env: dev
+        location: "k3s_gdi"
+        layer: models
+    
+    melotts:
+      mode: local # or docker
+      url: http://10.6.44.141:18080/convert/tts
+      speed: 0.9
+      device: 'cuda:0'
+      language: 'ZH'
+      speaker: 'ZH'
+    
+    cosyvoicetts:
+      mode: local # or docker
+      url: http://10.6.44.141:18080/convert/tts
+      speed: 0.9
+      device: 'cuda:0'
+      language: '粤语女'
+      speaker: 'ZH'
+    
+    sovitstts:
+      mode: docker
+      url: http://10.6.80.90:9880/tts
+      speed: 0.9
+      device: 'cuda:0'
+      language: 'ZH'
+      speaker: 'ZH'
+      text_lang: "yue"
+      ref_audio_path: "output/slicer_opt/Ricky-Wong/Ricky-Wong-3-Mins.wav_0006003840_0006134080.wav"
+      prompt_lang: "yue"
+      prompt_text: "你失敗咗點算啊？你而家安安穩穩，點解要咁樣做呢？"
+      text_split_method: "cut5"
+      batch_size: 1
+      media_type: "wav"
+      streaming_mode: True
+    
+    sensevoiceasr:
+      mode: local # or docker
+      url: http://10.6.44.141:18080/convert/tts
+      speed: 0.9
+      device: 'cuda:0'
+      language: '粤语女'
+      speaker: 'ZH'
+    
+    tesou:
+      url: http://120.196.116.194:48891/chat/
+    
+    TokenIDConverter:
+      token_path: src/asr/resources/models/token_list.pkl
+      unk_symbol: <unk>
+    
+    CharTokenizer:
+      symbol_value:
+      space_symbol: <space>
+      remove_non_linguistic_symbols: false
+    
+    WavFrontend:
+      cmvn_file: src/asr/resources/models/am.mvn
+      frontend_conf:
+        fs: 16000
+        window: hamming
+        n_mels: 80
+        frame_length: 25
+        frame_shift: 10
+        lfr_m: 7
+        lfr_n: 6
+        filter_length_max: -.inf
+        dither: 0.0
+    
+    Model:
+      model_path: src/asr/resources/models/model.onnx
+      use_cuda: false
+      CUDAExecutionProvider:
+          device_id: 0
+          arena_extend_strategy: kNextPowerOfTwo
+          cudnn_conv_algo_search: EXHAUSTIVE
+          do_copy_in_default_stream: true
+      batch_size: 3
+    blackbox:
+      lazyloading: true
+    
+    vlms:
+      urls: 
+        qwen_vl: http://vl-svc
+        vlm: http://vl-svc:8080
+    
+    path:
+      chroma_rerank_embedding_model: /Model/BAAI
+      cosyvoice_path: /Voice/CosyVoice
+      cosyvoice_model_path: /Voice/CosyVoice/pretrained_models
+      sensevoice_model_path: /Voice/SenseVoice/SenseVoiceSmall 
--- a/jarvis/jarvis/templates/jarvis-models.yaml
+++ b/jarvis/jarvis/templates/jarvis-models.yaml
@ -0,0 +1,96 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: jarvis-model-deployment
+  # namespace: jarvis-models
+  labels:
+    app: jarvis-model
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: jarvis-model
+  template:
+    metadata:
+      labels:
+        app: jarvis-model
+    spec:
+      #hostNetwork: true
+      # --- START: Add this section for image pull secrets ---
+      imagePullSecrets:
+      - name: regcred # This MUST match the name of the secret you just created
+      # --- END: Add this section ---
+      runtimeClassName: nvidia
+      containers:
+      - name: jarvis-model
+        image: {{ .Values.jarvis_model.image }}
+        imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
+        # command: ["sleep", "infinity"]
+        ports:
+        - containerPort: 8000 # The port your application listens on inside the container
+          #hostPort: 8880    # The port on the host machine (--network=host combined with -p 8880:8080)
+          protocol: TCP
+        resources:  # Add this section
+          requests:
+            cpu: 1  # Example: 100 millicores (0.1 CPU)
+            memory: 512Mi  # Example: 128 mebibytes
+          limits:
+            cpu: 2  # Example: Limit to 500 millicores (0.5 CPU)
+            memory: 1Gi # Example: Limit to 512 mebibytes
+            nvidia.com/gpu: 1
+        volumeMounts:
+        - name: env-config-volume
+          mountPath: /jarvis-models/.env.yaml
+          subPath: .env.yaml
+          readOnly: true
+        - name: nfs-volume
+          subPath: Weight
+          mountPath: /Model
+        - name: nfs-volume
+          subPath: Voice
+          mountPath: /Voice
+      volumes:
+      - name: env-config-volume
+        #hostPath:
+        #  path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
+        #  type: FileOrCreate
+        configMap:
+          name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
+          items:
+            - key: .models.env.yml    # This is the key defined in the ConfigMap's data section
+              path: .env.yaml   # This is the filename inside the mountPath (e.g., /.env.yml)
+      - name: nfs-volume
+        nfs :
+          server: "10.6.80.11"
+          path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
+      #{{- range .Values.volumes }}
+      #- {{ . | toYaml | nindent 10 | trim }}
+      #{{- end }}  
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: jarvis-model-service
+  # namespace: jarvis-models
+  labels:
+    app: jarvis-model
+spec:
+  selector:
+    app: jarvis-model
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 8000
+  type: NodePort
--- a/jarvis/jarvis/values.yaml
+++ b/jarvis/jarvis/values.yaml
@ -0,0 +1,87 @@
+# Default values for jarvis.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
+replicaCount: 1
+
+
+jarvis_api:
+  image: harbor.bwgdi.com/library/jarvis-api:1.0.9
+
+jarvis_model:
+  image: harbor.bwgdi.com/library/jarvis-models:0.0.1
+  
+jarvis_adapter:
+  image: harbor.bwgdi.com/library/adapter-test:0.0.1
+  endpoint: "http://vllm-leader-nodeport:8080"
+
+resources: {}  
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
+livenessProbe:
+  httpGet:
+    path: /
+    port: http
+readinessProbe:
+  httpGet:
+    path: /
+    port: http
+
+# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
+service:
+  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
+  type: ClusterIP
+  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
+  port: 80
+
+# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
+ingress:
+  enabled: false
+  className: ""
+  annotations: {}
+    # kubernetes.io/ingress.class: nginx
+    # kubernetes.io/tls-acme: "true"
+  hosts:
+    - host: chart-example.local
+      paths:
+        - path: /
+          pathType: ImplementationSpecific
+  tls: []
+  #  - secretName: chart-example-tls
+  #    hosts:
+  #      - chart-example.local
+
+# Additional volumes on the output Deployment definition.
+volumes: 
+- name: nfs-volume
+  nfs:
+    server: "10.6.80.11"
+    path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
+  
+# - name: foo
+#   secret:
+#     secretName: mysecret
+#     optional: false
+
+# Additional volumeMounts on the output Deployment definition.
+volumeMounts: []
+# - name: foo
+#   mountPath: "/etc/foo"
+#   readOnly: true
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
--- a/jarvis/metadata.yaml
+++ b/jarvis/metadata.yaml
@ -0,0 +1,43 @@
+
+application_name: &application_name jarvis 
+
+distributed: 
+  method: helm
+  release_name: *application_name
+  chart: jarvis
+  sets:
+    jarvis_api:
+      image: harbor.bwgdi.com/library/jarvis-api:1.0.9
+    jarvis_adapter:
+      image: harbor.bwgdi.com/library/adapter-test:0.0.1
+      endpoint: "http://vllm-leader-nodeport:8080"
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: jarvis-api-service
+    port: 30083
+    url: ~
+  pod:
+    name: jarvis-
+monolithic: 
+  method: helm
+  release_name: *application_name
+  chart: jarvis
+  sets:
+    jarvis_api:
+      image: harbor.bwgdi.com/library/jarvis-api:1.0.9
+    jarvis_adapter:
+      image: harbor.bwgdi.com/library/adapter-test:0.0.1
+      endpoint: "http://vllm-leader-nodeport:8080"
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: jarvis-api-service
+    port: 30083
+    url: ~
+  pod:
+    name: jarvis-
--- a/jupyter/jupyter/Chart.yaml
+++ b/jupyter/jupyter/Chart.yaml
@ -0,0 +1,17 @@
+apiVersion: v1
+appVersion: 6.0.3
+description: Helm for jupyter single server with pyspark support
+home: https://jupyter.org
+icon: https://jupyter.org/assets/main-logo.svg
+keywords:
+- jupyter
+- notebook
+- spark
+maintainers:
+- email: cgiraldo@gradiant.org
+  name: cgiraldo
+name: jupyter
+sources:
+- https://github.com/gradiant/charts
+- https://github.com/astrobounce/helm-jupyter
+version: 0.1.6
--- a/jupyter/jupyter/README.md
+++ b/jupyter/jupyter/README.md
@ -0,0 +1,34 @@
+jupyter
+=======
+Helm for jupyter single server with pyspark support.
+For jupyterhub chart see [zero-to-jupyterhub](https://zero-to-jupyterhub.readthedocs.io/en/latest/).
+
+Current chart version is `0.1.0`
+
+Source code can be found [here]((https://github.com/gradiant/charts/charts/jupyter)
+
+
+## Chart Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| affinity | object | `{}` |  |
+| image.pullPolicy | string | `"IfNotPresent"` |  |
+| image.repository | string | `"gradiant/jupyter"` |  |
+| image.tag | string | `"6.0.1"` |  |
+| ingress.annotations | object | `{}` |  |
+| ingress.enabled | bool | `false` |  |
+| ingress.hosts[0] | string | `"jupyter.127-0-0-1.nip"` |  |
+| ingress.path | string | `"/"` |  |
+| ingress.tls | list | `[]` |  |
+| lab | bool | `true` |  |
+| nodeSelector | object | `{}` |  |
+| persistence.accessMode | string | `"ReadWriteOnce"` |  |
+| persistence.enabled | bool | `true` |  |
+| persistence.size | string | `"50Gi"` |  |
+| persistence.storageClass | string | `nil` |  |
+| resources | object | `{}` |  |
+| service.externalPort | int | `8888` |  |
+| service.nodePort.http | string | `nil` |  |
+| service.type | string | `"ClusterIP"` |  |
+| tolerations | list | `[]` |  |
--- a/jupyter/jupyter/templates/NOTES.txt
+++ b/jupyter/jupyter/templates/NOTES.txt
@ -0,0 +1,23 @@
+1. Get access token from jupyter server log:
+   kubectl logs -f -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }}
+
+1. Create a port-forward to the jupyter:
+   kubectl port-forward -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }} 8888:{{ .Values.service.externalPort }}
+
+Then open the ui in your browser and use the access token:
+   open http://localhost:88888
+
+If you set up your own password, remember to restart jupyter server to update the configuration.
+  File -> Shut Down  
+
+{{- if .Values.ingress.enabled }}
+Ingress is enabled:
+{{- range .Values.ingress.tls }}
+{{- range .hosts }}
+  open https://{{ . }}
+{{- end }}
+{{- end }}
+{{- range .Values.ingress.hosts }}
+  open http://{{ . }}
+{{- end }}
+{{- end }}
--- a/jupyter/jupyter/templates/_helpers.tpl
+++ b/jupyter/jupyter/templates/_helpers.tpl
@ -0,0 +1,32 @@
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "jupyter.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+*/}}
+{{- define "jupyter.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Standard Labels from Helm documentation https://helm.sh/docs/chart_best_practices/#labels-and-annotations
+*/}}
+
+{{- define "jupyter.labels" -}}
+helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
+app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+app.kubernetes.io/instance: {{ .Release.Name | quote }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+app.kubernetes.io/part-of: {{ .Chart.Name }}
+{{- end -}}
--- a/jupyter/jupyter/templates/git-notebooks-configmap.yaml
+++ b/jupyter/jupyter/templates/git-notebooks-configmap.yaml
@ -0,0 +1,36 @@
+{{- if .Values.gitNotebooks }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "jupyter.fullname" . }}-git-notebooks
+  labels:
+    app.kubernetes.io/name: {{ include "jupyter.name" . }}
+    {{- include "jupyter.labels" . | nindent 4 }}
+data:
+  git-notebooks.sh: |-
+    #!/bin/sh
+    set -x
+    cd /home/jovyan
+    {{- if .Values.gitNotebooks.secretName }}
+    cp -r /tmp/.ssh /root/
+    chmod 600 /root/.ssh/*
+    {{- else }}
+    mkdir /root/.ssh
+    {{- end }}
+    echo "Loading notebooks from git repo"
+    {{- range .Values.gitNotebooks.repos }}
+    if [ ! -d "/home/jovyan/{{ .name }}" ]
+    then
+      echo "Cloning {{ .name }} notebook repository"
+      {{- if or (hasPrefix "git" .repo) (hasPrefix "ssh" .repo) }}
+      ssh-keyscan {{ .repo | regexFind "@([a-zA-Z0-9.]*)" | replace "@" "" }} >> ~/.ssh/known_hosts
+      {{- end }}
+      git clone {{ .repo }} {{ .name }}
+    else 
+      echo "{{ .name }} notebook repository already cloned"
+    fi
+    {{- end }}
+    # exit code 0 to continue deployment even if git clone fails
+    exit 0
+
+{{- end }}
--- a/jupyter/jupyter/templates/ingress.yaml
+++ b/jupyter/jupyter/templates/ingress.yaml
@ -0,0 +1,39 @@
+{{- if .Values.ingress.enabled -}}
+{{- $fullName := include "jupyter.fullname" . -}}
+{{- $ingressPath := .Values.ingress.path -}}
+apiVersion: extensions/v1beta1
+kind: Ingress
+metadata:
+  name: {{ $fullName }}
+  labels:
+    app.kubernetes.io/name: {{ include "jupyter.name" . }}
+    {{- include "jupyter.labels" . | nindent 4 }}
+    {{- if .Values.ingress.labels }}
+    {{ toYaml .Values.ingress.labels | indent 4 }}
+    {{- end }}
+{{- with .Values.ingress.annotations }}
+  annotations:
+{{ toYaml . | indent 4 }}
+{{- end }}
+spec:
+{{- if .Values.ingress.tls }}
+  tls:
+  {{- range .Values.ingress.tls }}
+    - hosts:
+      {{- range .hosts }}
+        - {{ . }}
+      {{- end }}
+      secretName: {{ .secretName }}
+  {{- end }}
+{{- end }}
+  rules:
+  {{- range .Values.ingress.hosts }}
+    - host: {{ . }}
+      http:
+        paths:
+          - path: {{ $ingressPath }}
+            backend:
+              serviceName: {{ $fullName }}
+              servicePort: web
+  {{- end }}
+{{- end }}
--- a/jupyter/jupyter/templates/service.yaml
+++ b/jupyter/jupyter/templates/service.yaml
@ -0,0 +1,20 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jupyter.fullname" . }}
+  labels:
+    app.kubernetes.io/name: {{ include "jupyter.name" . }}
+    {{- include "jupyter.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  selector:
+    app.kubernetes.io/name: {{ include "jupyter.name" . }}
+    app.kubernetes.io/instance: {{ .Release.Name }}
+  ports:
+    - name: web
+      protocol: TCP
+      port: {{ .Values.service.externalPort | default 8888 }}
+      {{- if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort.http))) }}
+      nodePort: {{ .Values.service.nodePort.http }}
+      {{- end }}
+      targetPort: 8888
--- a/jupyter/jupyter/templates/statefulset.yaml
+++ b/jupyter/jupyter/templates/statefulset.yaml
@ -0,0 +1,118 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: {{ include "jupyter.fullname" . }}
+  labels:
+    app.kubernetes.io/name: {{ include "jupyter.name" . }}
+    {{- include "jupyter.labels" . | nindent 4 }}
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: {{ include "jupyter.name" . }}
+      app.kubernetes.io/instance: {{ .Release.Name | quote }}
+  serviceName: {{ include "jupyter.fullname" . }} 
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: {{ include "jupyter.name" . }}
+        {{- include "jupyter.labels" . | nindent 8}}
+    spec:
+      {{- if .Values.gitNotebooks }}
+      initContainers:
+      - name: git-notebooks
+        image: alpine/git
+        command:
+        - /bin/bash
+        - /git-notebooks.sh
+        volumeMounts:
+        - name: git-notebooks
+          mountPath: /git-notebooks.sh
+          subPath: git-notebooks.sh
+        - name: jupyter
+          mountPath: /home/jovyan
+        {{- if .Values.gitNotebooks.secretName }}
+        - name: git-secret
+          mountPath: "/tmp/.ssh"
+        {{- end }}
+      {{- end }}
+      containers:
+      - name: jupyter
+        image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+        imagePullPolicy: {{ .Values.image.pullPolicy | quote }}
+        env:
+        - name: JUPYTER_ENABLE_LAB
+          value: "{{ .Values.lab }}"
+        - name: JPY_USER
+          value: "jovyan"
+        args:
+          - start-notebook.sh
+          - --ip=0.0.0.0
+          - --user="jovyan"
+        ports:
+        - name: web
+          containerPort: 8888
+          protocol: TCP
+        resources:
+{{ toYaml .Values.resources | indent 10 }}
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 8888
+          initialDelaySeconds: 60
+          timeoutSeconds: 15
+        livenessProbe:
+          httpGet:
+            path: /
+            port: 8888
+          initialDelaySeconds: 60
+          timeoutSeconds: 15
+        volumeMounts:
+        - name: jupyter
+          mountPath: /home/jovyan
+      volumes:
+      {{- if .Values.gitNotebooks }}
+      - name: git-notebooks
+        configMap:
+          name: {{ include "jupyter.fullname" . }}-git-notebooks
+      {{- if .Values.gitNotebooks.secretName }}
+      - name: git-secret
+        secret:
+          secretName: {{ .Values.gitNotebooks.secretName }}
+      {{- end }}
+      {{- end }}
+      {{- if not .Values.persistence.enabled }}
+      - name: jupyter
+        emptyDir: {}
+      {{- end }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+  {{- if .Values.persistence.enabled }}
+  volumeClaimTemplates:
+  - metadata:
+      name: jupyter
+    spec:
+      accessModes: [ "ReadWriteOnce" ]
+      resources:
+        requests:
+          storage: "{{ .Values.persistence.size }}"
+      {{- if .Values.persistence.storageClass }}
+      {{- if (eq "-" .Values.persistence.storageClass) }}
+      storageClassName: ""
+      {{- else }}
+      storageClassName: "{{ .Values.persistence.storageClass }}"
+      {{- end }}
+      {{- end }}
+  {{- end }}
+
+
--- a/jupyter/jupyter/values.yaml
+++ b/jupyter/jupyter/values.yaml
@ -0,0 +1,64 @@
+# Default values for jupyter.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+image:
+  repository: gradiant/jupyter
+  tag: 6.0.3
+  pullPolicy: IfNotPresent
+
+lab: true
+
+#gitNotebooks:
+#  secretName: the name of the secret with ssh keys
+#  repos:
+#    - name: gradiant
+#      repo: https://github.com/Gradiant/notebooks.git 
+#    - name: grad-git
+#      repo: git@github.com:Gradiant/notebooks.git
+
+service:
+  type: ClusterIP
+  externalPort: 8888
+  nodePort:
+    http:
+
+persistence:
+  enabled: true
+  storageClass:
+  accessMode: ReadWriteOnce
+  size: 50Gi
+
+## Ingress configuration
+## Ref: https://kubernetes.io/docs/concepts/services-networking/ingress/
+##
+ingress:
+  enabled: false
+  annotations: {}
+    # kubernetes.io/ingress.class: nginx
+    # kubernetes.io/tls-acme: "true"
+  path: /
+  hosts:
+    - jupyter.127-0-0-1.nip.io
+  tls: []
+    # - secretName: jupyter-tls
+    #   hosts:
+    #     - jupyter.local
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
--- a/jupyter/metadata.yaml
+++ b/jupyter/metadata.yaml
@ -0,0 +1,55 @@
+
+application_name: &application_name jupyter
+
+distributed: 
+  method: helm
+  release_name: *application_name
+  chart: jupyter
+  sets:
+    image: 
+      repository: gradiant/jupyter
+      tag: 6.0.3
+      pullPolicy: IfNotPresent
+    resources: 
+      limits:
+        nvidia.com/gpu: 0
+    nodeSelector: 
+      resource-group: gpu_5880
+    service:
+      type: NodePort
+      nodePort:
+        http: 30888
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: ~
+    port: 30888
+    url: ~
+  pod:
+    name: jupyter-
+monolithic: 
+  method: helm
+  release_name: *application_name
+  chart: jupyter
+  sets:
+    image: 
+      repository: gradiant/jupyter
+      tag: 6.0.3
+      pullPolicy: IfNotPresent
+    resources: 
+      limits:
+        nvidia.com/gpu: 0
+    nodeSelector: 
+      resource-group: gpu_5880
+    service:
+      type: NodePort
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: ~
+    port: 30888
+    url: ~
+  pod:
+    name: jupyter-
--- a/llama-factory/llama-factory/.helmignore
+++ b/llama-factory/llama-factory/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/llama-factory/llama-factory/Chart.yaml
+++ b/llama-factory/llama-factory/Chart.yaml
@ -0,0 +1,25 @@
+apiVersion: v2
+name: Llama-factory
+description: A Helm chart for deploying vLLM with NFS storage
+annotations:
+  "helm.sh/resource-policy": keep  # 防止资源被意外删除
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/llama-factory/llama-factory/templates/llama.yaml
+++ b/llama-factory/llama-factory/templates/llama.yaml
@ -0,0 +1,159 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama-factory") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: llamafactory
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy:  IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: llama-leader
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : USE_RAY
+                value: "1"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            command:
+              - sh
+              - -c
+              -  "llamafactory-cli webui"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 7860
+                name: http
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: llama-worker
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "echo $(LWS_LEADER_ADDRESS);
+                bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/llama-factory/llama-factory/templates/nfs-pv.yaml
+++ b/llama-factory/llama-factory/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Values.app }}-pv-model
+spec:
+  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
+  capacity:
+    storage: {{ .Values.nfs.pvSize }}
+  accessModes:
+    - ReadWriteMany
+  persistentVolumeReclaimPolicy: Retain
+  nfs:
+    path: {{ .Values.nfs.path }}
+    server: {{ .Values.nfs.server }}
--- a/llama-factory/llama-factory/templates/nfs-pvc.yaml
+++ b/llama-factory/llama-factory/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.app }}-pvc-model
+  annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  volumeName: {{ .Values.app }}-pv-model 
--- a/llama-factory/llama-factory/templates/services.yaml
+++ b/llama-factory/llama-factory/templates/services.yaml
@ -0,0 +1,33 @@
+#apiVersion: v1
+#kind: Service
+#metadata:
+#  name: infer-leader-loadbalancer
+#spec:
+#  type: LoadBalancer
+#  selector:
+#    leaderworkerset.sigs.k8s.io/name: infer
+#    role: leader
+#  ports:
+#    - protocol: TCP
+#      port: 8080
+#      targetPort: 8080
+#
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Values.app }}-leader-nodeport
+spec:
+  type: NodePort
+  {{- if gt (int .Values.workerSize) 1 }}
+  selector:
+    leaderworkerset.sigs.k8s.io/name: llamafactory 
+    role: leader
+  {{- else }}
+  selector:
+    app: llama-factory 
+  {{- end }}
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 7860
--- a/llama-factory/llama-factory/templates/single.yaml
+++ b/llama-factory/llama-factory/templates/single.yaml
@ -0,0 +1,51 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: llama-factory  
+  template:
+    metadata:
+      labels:
+        app: llama-factory
+    spec:
+      containers:
+      - name: llama-factory
+        image: {{ .Values.llama.image }}
+        imagePullPolicy:  IfNotPresent
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - "llamafactory-cli webui"
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+            memory: {{ .Values.resources.memoryLimit }}
+            ephemeral-storage: 10Gi
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.resources.cpuRequest }}
+        ports:
+        - containerPort: 7860 
+          name: http
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.resources.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: {{ .Values.app}}-pvc-model
+{{- end }}
--- a/llama-factory/llama-factory/values.yaml
+++ b/llama-factory/llama-factory/values.yaml
@ -0,0 +1,44 @@
+# Default values for vllm-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# 模型配置
+model:
+  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
+  localMountPath: "/Model"                  # PVC 固定挂载路径
+  huggingfaceToken: "<your-hf-token>"
+  download:
+    enabled: false                                  # 启用自动下载
+    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
+
+# 功能选择
+app: "llama-factory"
+
+resources:
+  gpuLimit: 1
+  cpuRequest: 8
+  memoryLimit: "16Gi"
+  shmSize: "20Gi"
+
+
+llama:
+  image: "docker.io/library/one-click:v1"
+
+
+# NFS PV/PVC 配置
+nfs:
+  server: "10.6.80.11"
+  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
+  storageClass: "local-path"
+  pvSize: "500Gi"
+  pvcSize: "50Gi"
+
+# LeaderWorkerSet 配置
+replicaCount: 1
+workerSize: 2
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
--- a/llama-factory/metadata.yaml
+++ b/llama-factory/metadata.yaml
@ -0,0 +1,53 @@
+
+application_name: &application_name llama-factory
+
+distributed:
+  method: helm
+  release_name: *application_name
+  chart: llama-factory
+  sets:
+    app: llama-factory
+    model:
+      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 8
+      memoryLimit: "16Gi"
+      shmSize: "15Gi"
+    llama:
+      image: "docker.io/library/one-click:v1"
+    workerSize: 2
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30080
+    url: ~
+  pod:
+    name: llamafactory
+monolithic:
+  method: helm
+  release_name: *application_name
+  chart: llama-factory
+  sets:
+    app: llama-factory
+    model:
+      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 8
+      memoryLimit: "16Gi"
+      shmSize: "15Gi"
+    llama:
+      image: "docker.io/library/one-click:v1"
+    workerSize: 1
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30080
+    url: ~
+  pod:
+    name: llama-factory
--- a/melotts/melotts/.helmignore
+++ b/melotts/melotts/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/melotts/melotts/Chart.yaml
+++ b/melotts/melotts/Chart.yaml
@ -0,0 +1,24 @@
+apiVersion: v2
+name: jarvis
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/melotts/melotts/templates/NOTES.txt
+++ b/melotts/melotts/templates/NOTES.txt
@ -0,0 +1,22 @@
+1. Get the application URL by running these commands:
+{{- if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
--- a/melotts/melotts/templates/_helpers.tpl
+++ b/melotts/melotts/templates/_helpers.tpl
@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "jarvis.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "jarvis.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "jarvis.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "jarvis.labels" -}}
+helm.sh/chart: {{ include "jarvis.chart" . }}
+{{ include "jarvis.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "jarvis.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "jarvis.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "jarvis.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
--- a/melotts/melotts/templates/melotts.yaml
+++ b/melotts/melotts/templates/melotts.yaml
@ -0,0 +1,81 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}-dp 
+  # namespace: jarvis-models
+  labels:
+    app: {{ .Release.Name }}
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ .Release.Name }}
+    spec:
+      #hostNetwork: true
+      # --- START: Add this section for image pull secrets ---
+      runtimeClassName: nvidia 
+      imagePullSecrets:
+      - name: regcred # This MUST match the name of the secret you just created
+      # --- END: Add this section ---
+      containers:
+      - name: melo
+        image: {{ .Values.melotts.image }}
+        imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
+          #command:
+          #  - /bin/bash
+          #  - -c
+          #  - "bash && sleep infinity"
+        ports:
+        - containerPort: 5000 # The port your application listens on inside the container
+          #hostPort: 8880    # The port on the host machine (--network=host combined with -p 8880:8080)
+          protocol: TCP
+        resources:  # Add this section
+          requests:
+            cpu: 2  # Example: 100 millicores (0.1 CPU)
+            memory: 4Gi # Example: 128 mebibytes
+          limits:
+            cpu: 2  # Example: Limit to 500 millicores (0.5 CPU)
+            memory: 6Gi # Example: Limit to 512 mebibytes
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+        volumeMounts:
+        - name: weight-volume 
+          mountPath: /models
+        - name: weight-volume
+          mountPath: /usr/local/nltk_data
+          subPath: nltk_data
+      volumes:
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Release.Name }}-service
+  # namespace: jarvis-models
+  labels:
+    app: {{ .Release.Name }}
+spec:
+  selector:
+    app: {{ .Release.Name }}
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 5000
+  type: NodePort
--- a/melotts/melotts/templates/nfs-pv.yaml
+++ b/melotts/melotts/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Values.app }}-pv-model
+spec:
+  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
+  capacity:
+    storage: {{ .Values.nfs.pvSize }}
+  accessModes:
+    - ReadWriteMany
+  persistentVolumeReclaimPolicy: Retain
+  nfs:
+    path: {{ .Values.nfs.path }}
+    server: {{ .Values.nfs.server }}
--- a/melotts/melotts/templates/nfs-pvc.yaml
+++ b/melotts/melotts/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.app }}-pvc-model
+  annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  volumeName: {{ .Values.app }}-pv-model 
--- a/melotts/melotts/values.yaml
+++ b/melotts/melotts/values.yaml
@ -0,0 +1,89 @@
+# Default values for jarvis.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
+replicaCount: 1
+
+app: "melotts"
+
+melotts:
+  image: harbor.bwgdi.com/library/melotts:0.0.2
+
+jarvis_adapter:
+  image: harbor.bwgdi.com/library/adapter-test:0.0.1
+  endpoint: "http://vllm-leader-nodeport:8080"
+
+nfs:
+  server: "10.6.80.11"
+  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Voice/MeloTTS"
+  storageClass: "local-path"
+  pvSize: "500Gi"
+  pvcSize: "50Gi"
+    
+resources:   
+  gpuLimit: 1
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
+livenessProbe:
+  httpGet:
+    path: /
+    port: http
+readinessProbe:
+  httpGet:
+    path: /
+    port: http
+
+# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
+service:
+  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
+  type: ClusterIP
+  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
+  port: 80
+
+# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
+ingress:
+  enabled: false
+  className: ""
+  annotations: {}
+    # kubernetes.io/ingress.class: nginx
+    # kubernetes.io/tls-acme: "true"
+  hosts:
+    - host: chart-example.local
+      paths:
+        - path: /
+          pathType: ImplementationSpecific
+  tls: []
+  #  - secretName: chart-example-tls
+  #    hosts:
+  #      - chart-example.local
+
+# Additional volumes on the output Deployment definition.
+volumes: []
+# - name: foo
+#   secret:
+#     secretName: mysecret
+#     optional: false
+
+# Additional volumeMounts on the output Deployment definition.
+volumeMounts: []
+# - name: foo
+#   mountPath: "/etc/foo"
+#   readOnly: true
+
+nodeSelector: 
+  resource-group: gpu_5880
+  
+tolerations: []
+
+affinity: {}
--- a/melotts/metadata.yaml
+++ b/melotts/metadata.yaml
@ -0,0 +1,35 @@
+
+application_name: &application_name melotts 
+
+distributed: 
+  method: helm
+  release_name: *application_name
+  chart: melotts
+  sets:
+    jarvis_api:
+      image: harbor.bwgdi.com/library/melotts:0.0.2
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: melo-service
+    port: 32147
+    url: ~
+  pod:
+    name: *application_name
+monolithic: 
+  method: helm
+  release_name: *application_name
+  chart: melotts
+  sets:
+    jarvis_api:
+      image: harbor.bwgdi.com/library/melotts:0.0.2
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: melo-service
+    port: 32147
+    url: ~
+  pod:
+    name: *application_name
--- a/vllm/metadata.yaml
+++ b/vllm/metadata.yaml
@ -0,0 +1,53 @@
+
+application_name: &application_name vllm
+
+distributed:
+  method: helm
+  release_name: *application_name
+  chart: vllm-app
+  sets:
+    app: vllm
+    model:
+      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 8
+      memoryLimit: "16Gi"
+      shmSize: "15Gi"
+    workerSize: 2
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30080
+    url: ~
+    paths:
+      docs_path: /docs
+      redoc_path: /redoc
+  pod:
+    name: infer-0
+monolithic:
+  method: helm
+  release_name: *application_name
+  chart: vllm-app
+  sets:
+    app: vllm
+    model:
+      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 8
+      memoryLimit: "16Gi"
+      shmSize: "15Gi"
+    workerSize: 1
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30080
+    url: ~
+  pod:
+    name: vllm
+
--- a/vllm/vllm-app/.helmignore
+++ b/vllm/vllm-app/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/vllm/vllm-app/Chart.yaml
+++ b/vllm/vllm-app/Chart.yaml
@ -0,0 +1,25 @@
+apiVersion: v2
+name: vllm-app
+description: A Helm chart for deploying vLLM with NFS storage
+annotations:
+  "helm.sh/resource-policy": keep  # 防止资源被意外删除
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/vllm/vllm-app/templates/llama.yaml
+++ b/vllm/vllm-app/templates/llama.yaml
@ -0,0 +1,165 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy:  IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: llama-leader
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : USE_RAY
+                value: "1"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                 MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+                 llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 7860
+                name: http
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: llama-worker
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "echo $(LWS_LEADER_ADDRESS);
+                bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/vllm/vllm-app/templates/lmdeploy_lws.yaml
+++ b/vllm/vllm-app/templates/lmdeploy_lws.yaml
@ -0,0 +1,170 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer 
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: lmdeploy-leader
+            image: {{ .Values.lmdeploy.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : LMDEPLOY_EXECUTOR_BACKEND
+                value: "ray"
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                lmdeploy serve api_server $MODEL_PATH  --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }}))  --server-port 8080 --cache-max-entry-count 0.9"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: nfs-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: lmdeploy-worker
+            image: {{ .Values.lmdeploy.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.lmdeploy.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : LMDEPLOY_EXECUTOR_BACKEND
+                value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: nfs-pvc-model
+{{- end }}
--- a/vllm/vllm-app/templates/lws.yaml
+++ b/vllm/vllm-app/templates/lws.yaml
@ -0,0 +1,166 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: vllm-leader
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model 
+    workerTemplate:
+      spec:
+        containers:
+          - name: vllm-worker
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/vllm/vllm-app/templates/model-download-job.yaml
+++ b/vllm/vllm-app/templates/model-download-job.yaml
@ -0,0 +1,44 @@
+{{- if .Values.model.download.enabled }}
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ .Release.Name }}-download-model
+  annotations:
+    "helm.sh/hook": pre-install,pre-upgrade  # 在安装/升级前执行
+    "helm.sh/hook-weight": "-10"              # 优先执行
+    "helm.sh/hook-delete-policy": hook-succeeded
+spec:
+  template:
+    spec:
+      restartPolicy: OnFailure
+      containers:
+      - name: downloader
+        image: {{ .Values.model.download.image }}
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            if [ -d "$DEST_DIR" ]; then
+              echo "Model already exists at $DEST_DIR"
+            else
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            fi
+        volumeMounts:
+        - name: model-storage
+          mountPath: {{ .Values.model.localMountPath }}
+      volumes:
+      - name: model-storage
+        persistentVolumeClaim:
+          claimName: nfs-pvc-model  # 复用之前的 PVC
+{{- end }}
--- a/vllm/vllm-app/templates/nfs-pv.yaml
+++ b/vllm/vllm-app/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Values.app }}-pv-model
+spec:
+  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
+  capacity:
+    storage: {{ .Values.nfs.pvSize }}
+  accessModes:
+    - ReadWriteMany
+  persistentVolumeReclaimPolicy: Retain
+  nfs:
+    path: {{ .Values.nfs.path }}
+    server: {{ .Values.nfs.server }}
--- a/vllm/vllm-app/templates/nfs-pvc.yaml
+++ b/vllm/vllm-app/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.app }}-pvc-model
+  annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  volumeName: {{ .Values.app }}-pv-model 
--- a/vllm/vllm-app/templates/services.yaml
+++ b/vllm/vllm-app/templates/services.yaml
@ -0,0 +1,39 @@
+#apiVersion: v1
+#kind: Service
+#metadata:
+#  name: infer-leader-loadbalancer
+#spec:
+#  type: LoadBalancer
+#  selector:
+#    leaderworkerset.sigs.k8s.io/name: infer
+#    role: leader
+#  ports:
+#    - protocol: TCP
+#      port: 8080
+#      targetPort: 8080
+#
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Values.app }}-leader-nodeport
+spec:
+  type: NodePort
+  {{- if gt (int .Values.workerSize) 1 }}
+  selector:
+    leaderworkerset.sigs.k8s.io/name: infer 
+    role: leader
+  {{- else }}
+  selector:
+    app: vllm-app
+  {{- end }}
+  ports:
+    - protocol: TCP
+      port: 8080
+      {{- if eq .Values.app "llama" }}
+      targetPort: 7860
+      {{- else }}
+      targetPort: 8080
+      {{- end }}
+      nodePort: 30080
+
--- a/vllm/vllm-app/templates/single.yaml
+++ b/vllm/vllm-app/templates/single.yaml
@ -0,0 +1,114 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: vllm-app  
+  template:
+    metadata:
+      labels:
+        app: vllm-app
+    spec:
+      initContainers:
+     # 模型下载作为第一个 initContainer
+      - name: download-model
+        image: {{ .Values.model.download.image }}
+        imagePullPolicy: IfNotPresent
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            # 检查模型是否存在，不存在则下载
+            echo "DEST_DIR= $DEST_DIR"
+            ls $DEST_DIR
+            ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+            if [ ! -f "$DEST_DIR/config.json" ]; then
+              ls -l {{ .Values.model.localMountPath }}
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            else
+              echo "Model already exists at $DEST_DIR"
+            fi
+        volumeMounts:
+        - name: weight-volume
+          mountPath: {{ .Values.model.localMountPath }}
+      containers:
+      - name: vllm-leader
+        image: {{ .Values.vllm.image }}
+        imagePullPolicy:  IfNotPresent
+          #securityContext:
+          #  capabilities:
+          #    add: [ "IPC_LOCK" ]
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+            #- name: GLOO_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_IB_DISABLE
+            #  value: "0"
+            #- name: NCCL_DEBUG
+            #  value: INFO
+            #- name: NCCL_IB_HCA
+            #  value: mlx5_0:1
+            #- name: NCCL_IB_GID_INDEX
+            #  value: "0" # 或 "7"，根据你的网络配置而定
+        - name: RAY_DEDUP_LOGS
+          value: "0"
+        command:
+          - sh
+          - -c
+          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+            echo 'Using single node ------------------------------------------'; 
+            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+            memory: {{ .Values.resources.memoryLimit }}
+            ephemeral-storage: 10Gi
+              #rdma/rdma_shared_device_a: 10
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.resources.cpuRequest }}
+        ports:
+        - containerPort: 8080
+          name: http
+        readinessProbe:
+          #tcpSocket:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 120
+          periodSeconds: 20
+          timeoutSeconds: 5
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.resources.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: {{ .Values.app}}-pvc-model
+{{- end }}
--- a/vllm/vllm-app/values.yaml
+++ b/vllm/vllm-app/values.yaml
@ -0,0 +1,58 @@
+# Default values for vllm-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# 模型配置
+model:
+  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
+  localMountPath: "/Model"                  # PVC 固定挂载路径
+  huggingfaceToken: "<your-hf-token>"
+  download:
+    enabled: false                                  # 启用自动下载
+    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
+
+# 功能选择
+app: "vllm"
+
+resources:
+  gpuLimit: 1
+  cpuRequest: 12
+  memoryLimit: "16Gi"
+  shmSize: "20Gi"
+
+# vLLM 应用配置
+vllm:
+  image: "docker.io/vllm/vllm-openai:latest"
+    #gpuLimit: 2 
+    # cpuRequest: 12
+    # memoryLimit: "12Gi"
+    # shmSize: "15Gi"
+
+llama:
+  image: "docker.io/library/one-click:v1"
+
+# lmdeploy  应用配置
+lmdeploy:
+  image: "docker.io/openmmlab/lmdeploy:latest-cu12"
+    #  gpuLimit: 2
+    #  cpuRequest: 12
+    #  memoryLimit: "12Gi"
+    #  shmSize: "15Gi"
+
+# NFS PV/PVC 配置
+nfs:
+  server: "10.6.80.11"
+  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
+  storageClass: "local-path"
+  pvSize: "500Gi"
+  pvcSize: "50Gi"
+
+# LeaderWorkerSet 配置
+replicaCount: 1
+workerSize: 2
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
--- a/vllm/vllm-serve/.helmignore
+++ b/vllm/vllm-serve/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/vllm/vllm-serve/Chart.yaml
+++ b/vllm/vllm-serve/Chart.yaml
@ -0,0 +1,25 @@
+apiVersion: v2
+name: vllm-serve
+description: A Helm chart for deploying vLLM with NFS storage
+annotations:
+  "helm.sh/resource-policy": keep  # 防止资源被意外删除
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/vllm/vllm-serve/templates/NOTES.txt
+++ b/vllm/vllm-serve/templates/NOTES.txt
@ -0,0 +1,16 @@
+1. Get the application URL by running these commands:
+{{- if contains "NodePort" .Values.svc.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
--- a/vllm/vllm-serve/templates/_helpers.tpl
+++ b/vllm/vllm-serve/templates/_helpers.tpl
@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "vllm-serve.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "vllm-serve.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "vllm-serve.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "vllm-serve.labels" -}}
+helm.sh/chart: {{ include "vllm-serve.chart" . }}
+{{ include "vllm-serve.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "vllm-serve.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "vllm-serve.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "vllm-serve.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "vllm-serve.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
--- a/vllm/vllm-serve/templates/lws.yaml
+++ b/vllm/vllm-serve/templates/lws.yaml
@ -0,0 +1,188 @@
+{{- if gt (int .Values.workerSize) 1 }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: {{ .Release.Name }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: vllm-leader
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Release.Name }}-pvc-model 
+        {{- with .Values.nodeSelector }}
+        nodeSelector:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.affinity }}
+        affinity:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.tolerations }}
+        tolerations:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+    workerTemplate:
+      spec:
+        containers:
+          - name: vllm-worker
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Release.Name }}-pvc-model
+        {{- with .Values.nodeSelector }}
+        nodeSelector:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.affinity }}
+        affinity:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.tolerations }}
+        tolerations:
+          {{- toYaml . | nindent 10 }}
+        {{- end }} 
+{{- end }}
--- a/vllm/vllm-serve/templates/nfs-pvc.yaml
+++ b/vllm/vllm-serve/templates/nfs-pvc.yaml
@ -0,0 +1,28 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Release.Name }}-pv-model
+spec:
+  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
+  capacity:
+    storage: {{ .Values.nfs.pvSize }}
+  accessModes:
+    - ReadWriteMany
+  persistentVolumeReclaimPolicy: Retain
+  nfs:
+    path: {{ .Values.nfs.path }}
+    server: {{ .Values.nfs.server }}
+---
+
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-pvc-model
+  annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  volumeName: {{ .Release.Name }}-pv-model 
--- a/vllm/vllm-serve/templates/services.yaml
+++ b/vllm/vllm-serve/templates/services.yaml
@ -0,0 +1,35 @@
+#apiVersion: v1
+#kind: Service
+#metadata:
+#  name: infer-leader-loadbalancer
+#spec:
+#  type: LoadBalancer
+#  selector:
+#    leaderworkerset.sigs.k8s.io/name: infer
+#    role: leader
+#  ports:
+#    - protocol: TCP
+#      port: 8080
+#      targetPort: 8080
+#
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Release.Name }}-svc
+spec:
+  type: {{ .Values.svc.type | default "NodePort" }}
+  {{- if gt (int .Values.workerSize) 1 }}
+  selector:
+    leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }} 
+    role: leader
+  {{- else }}
+  selector:
+    app: {{ .Release.Name }}
+  {{- end }}
+  ports:
+    - protocol: TCP
+      port: {{ .Values.svc.port | default 8080 }}
+      targetPort: {{ .Values.svc.port | default 8080 }}
+      nodePort: {{ .Values.svc.nodePort | default 30080 }}
+
--- a/vllm/vllm-serve/templates/single.yaml
+++ b/vllm/vllm-serve/templates/single.yaml
@ -0,0 +1,108 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app:  {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ .Release.Name }}
+    spec:
+      initContainers:
+     # 模型下载作为第一个 initContainer
+      - name: download-model
+        image: {{ .Values.model.download.image }}
+        imagePullPolicy: IfNotPresent
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            # 检查模型是否存在，不存在则下载
+            echo "DEST_DIR= $DEST_DIR"
+            if [ ! -f "$DEST_DIR/config.json" ]; then
+              ls -l {{ .Values.model.localMountPath }}
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            else
+              echo "Model already exists at $DEST_DIR"
+            fi
+        volumeMounts:
+        - name: weight-volume
+          mountPath: {{ .Values.model.localMountPath }}
+      containers:
+      - name: vllm-pod
+        image: {{ .Values.vllm.image }}
+        imagePullPolicy:  IfNotPresent
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+        - name: RAY_DEDUP_LOGS
+          value: "0"
+        command:
+          - sh
+          - -c
+          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+            echo 'Using single node ------------------------------------------'; 
+            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+            memory: {{ .Values.resources.memoryLimit }}
+            ephemeral-storage: 10Gi
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.resources.cpuRequest }}
+        ports:
+        - containerPort: 8080
+          name: http
+        readinessProbe:
+          #tcpSocket:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 120
+          periodSeconds: 20
+          timeoutSeconds: 5
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.resources.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: {{ .Release.Name }}-pvc-model          
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- end }}
--- a/vllm/vllm-serve/values.yaml
+++ b/vllm/vllm-serve/values.yaml
@ -0,0 +1,75 @@
+# Default values for vllm-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
+imagePullSecrets: []
+imagePullPolicy: IfNotPresent
+# This is to override the chart name.
+nameOverride: ""
+fullnameOverride: ""
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name: ""
+  
+  
+# 模型配置
+model:
+  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
+  localMountPath: "/Model"                  # PVC 固定挂载路径
+  huggingfaceToken: "<your-hf-token>"
+  download:                               # 启用自动下载
+    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
+
+# 功能选择
+
+resources:
+  gpuLimit: 1
+  cpuRequest: 12
+  memoryLimit: "16Gi"
+  shmSize: "20Gi"
+
+svc:
+  type: NodePort
+  port: 80 
+  targetPort: 8080 
+  nodePort: 30080 
+# vLLM 应用配置
+vllm:
+  image: "docker.io/vllm/vllm-openai:latest"
+
+
+llama:
+  image: "docker.io/library/one-click:v1"
+
+# lmdeploy  应用配置
+lmdeploy:
+  image: "docker.io/openmmlab/lmdeploy:latest-cu12"
+
+
+# NFS PV/PVC 配置
+nfs:
+  server: "10.6.80.11"
+  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
+  storageClass: "local-path"
+  pvSize: "500Gi"
+  pvcSize: "50Gi"
+
+# LeaderWorkerSet 配置
+replicaCount: 1
+workerSize: 2
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
--- a/webchat/metadata.yaml
+++ b/webchat/metadata.yaml
@ -0,0 +1,53 @@
+
+application_name: &application_name webchat
+
+distributed:
+  method: helm
+  release_name: *application_name
+  chart: vllm-app
+  sets:
+    app: llama
+    model:
+      huggingfaceName: "Qwen/Qwen2-VL-2B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 8
+      memoryLimit: "8Gi"
+      shmSize: "15Gi"
+    workerSize: 2
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30081
+    url: ~
+    paths:
+      docs_path: /docs
+      redoc_path: /redoc
+  pod:
+    name: *application_name
+monolithic:
+  method: helm
+  release_name: *application_name
+  chart: vllm-app
+  sets:
+    app: vllm
+    model:
+      huggingfaceName: "Qwen/Qwen2.5-32B-Instruct"
+    resources:
+      gpuLimit: 1
+      cpuRequest: 12
+      memoryLimit: "8Gi"
+      shmSize: "15Gi"
+    workerSize: 1
+    nodeSelector: {}
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    port: 30080
+    url: ~
+  pod:
+    name: *application_name
+
--- a/webchat/vllm-app/.helmignore
+++ b/webchat/vllm-app/.helmignore
@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/webchat/vllm-app/Chart.yaml
+++ b/webchat/vllm-app/Chart.yaml
@ -0,0 +1,25 @@
+apiVersion: v2
+name: vllm-app
+description: A Helm chart for deploying vLLM with NFS storage
+annotations:
+  "helm.sh/resource-policy": keep  # 防止资源被意外删除
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
--- a/webchat/vllm-app/templates/llama.yaml
+++ b/webchat/vllm-app/templates/llama.yaml
@ -0,0 +1,165 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: {{ .Release.Name }} 
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy:  IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: llama-leader
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : USE_RAY
+                value: "1"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                 MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+                 llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 7860
+                name: http
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: llama-worker
+            image: {{ .Values.llama.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "echo $(LWS_LEADER_ADDRESS);
+                bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+                  # - name : LMDEPLOY_EXECUTOR_BACKEND
+                  #   value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/webchat/vllm-app/templates/lmdeploy_lws.yaml
+++ b/webchat/vllm-app/templates/lmdeploy_lws.yaml
@ -0,0 +1,170 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer 
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: lmdeploy-leader
+            image: {{ .Values.lmdeploy.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : LMDEPLOY_EXECUTOR_BACKEND
+                value: "ray"
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                lmdeploy serve api_server $MODEL_PATH  --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }}))  --server-port 8080 --cache-max-entry-count 0.9"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: nfs-pvc-model
+    workerTemplate:
+      spec:
+        containers:
+          - name: lmdeploy-worker
+            image: {{ .Values.lmdeploy.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.lmdeploy.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+              - name : LMDEPLOY_EXECUTOR_BACKEND
+                value: "ray"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: nfs-pvc-model
+{{- end }}
--- a/webchat/vllm-app/templates/lws.yaml
+++ b/webchat/vllm-app/templates/lws.yaml
@ -0,0 +1,166 @@
+{{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: infer
+spec:
+  replicas: {{ .Values.replicaCount }}
+  leaderWorkerTemplate:
+    size: {{ .Values.workerSize }}
+    restartPolicy: RecreateGroupOnPodRestart
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        initContainers:
+        # 模型下载作为第一个 initContainer
+        - name: download-model
+          image: {{ .Values.model.download.image }}
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: HF_ENDPOINT
+              value: https://hf-mirror.com
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: {{ .Values.model.huggingfaceToken }}
+          command:
+            - sh
+            - -c
+            - |
+              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+              # 检查模型是否存在，不存在则下载
+              echo "DEST_DIR= $DEST_DIR"
+              ls $DEST_DIR
+              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+              if [ ! -f "$DEST_DIR/config.json" ]; then
+                ls -l {{ .Values.model.localMountPath }}
+                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+                wget https://hf-mirror.com/hfd/hfd.sh 
+                chmod a+x hfd.sh
+                apt install aria2 -y
+                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              else
+                echo "Model already exists at $DEST_DIR"
+              fi
+          volumeMounts:
+          - name: weight-volume
+            mountPath: {{ .Values.model.localMountPath }}
+        containers:
+          - name: vllm-leader
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
+                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
+                python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            ports:
+              - containerPort: 8080
+                name: http
+            readinessProbe:
+              tcpSocket:
+                #httpGet:
+                #path: /health
+                port: 8080
+              initialDelaySeconds: 120
+              periodSeconds: 20
+              timeoutSeconds: 5
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath:  {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model 
+    workerTemplate:
+      spec:
+        containers:
+          - name: vllm-worker
+            image: {{ .Values.vllm.image }}
+            imagePullPolicy:  IfNotPresent
+            securityContext:
+              capabilities:
+                add: [ "IPC_LOCK" ]
+            command:
+              - sh
+              - -c
+              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+            resources:
+              limits:
+                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
+                memory: {{ .Values.resources.memoryLimit }}
+                ephemeral-storage: 10Gi
+                rdma/rdma_shared_device_a: 10
+              requests:
+                ephemeral-storage: 10Gi
+                cpu: {{ .Values.resources.cpuRequest }}
+            env:
+              # - name: HUGGING_FACE_HUB_TOKEN
+              #   value: {{ .Values.vllm.huggingfaceToken }}
+              - name: GLOO_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_SOCKET_IFNAME
+                value: eth0
+              - name: NCCL_IB_DISABLE
+                value: "0"
+              - name: NCCL_DEBUG
+                value: INFO
+              - name: NCCL_IB_HCA
+                value: mlx5_0:1
+              - name: NCCL_IB_GID_INDEX
+                value: "0" # 或 "7"，根据你的网络配置而定
+              - name: RAY_DEDUP_LOGS
+                value: "0"
+            volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+              - name: weight-volume
+                mountPath: {{ .Values.model.localMountPath }}
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.resources.shmSize }}
+        - name: weight-volume
+          persistentVolumeClaim:
+            claimName: {{ .Values.app }}-pvc-model
+{{- end }}
--- a/webchat/vllm-app/templates/model-download-job.yaml
+++ b/webchat/vllm-app/templates/model-download-job.yaml
@ -0,0 +1,44 @@
+{{- if .Values.model.download.enabled }}
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ .Release.Name }}-download-model
+  annotations:
+    "helm.sh/hook": pre-install,pre-upgrade  # 在安装/升级前执行
+    "helm.sh/hook-weight": "-10"              # 优先执行
+    "helm.sh/hook-delete-policy": hook-succeeded
+spec:
+  template:
+    spec:
+      restartPolicy: OnFailure
+      containers:
+      - name: downloader
+        image: {{ .Values.model.download.image }}
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            if [ -d "$DEST_DIR" ]; then
+              echo "Model already exists at $DEST_DIR"
+            else
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            fi
+        volumeMounts:
+        - name: model-storage
+          mountPath: {{ .Values.model.localMountPath }}
+      volumes:
+      - name: model-storage
+        persistentVolumeClaim:
+          claimName: nfs-pvc-model  # 复用之前的 PVC
+{{- end }}
--- a/webchat/vllm-app/templates/nfs-pv.yaml
+++ b/webchat/vllm-app/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Values.app }}-pv-model
+spec:
+  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
+  capacity:
+    storage: {{ .Values.nfs.pvSize }}
+  accessModes:
+    - ReadWriteMany
+  persistentVolumeReclaimPolicy: Retain
+  nfs:
+    path: {{ .Values.nfs.path }}
+    server: {{ .Values.nfs.server }}
--- a/webchat/vllm-app/templates/nfs-pvc.yaml
+++ b/webchat/vllm-app/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Values.app }}-pvc-model
+  annotations:
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: {{ .Values.nfs.pvcSize }}
+  volumeName: {{ .Values.app }}-pv-model 
--- a/webchat/vllm-app/templates/services.yaml
+++ b/webchat/vllm-app/templates/services.yaml
@ -0,0 +1,39 @@
+#apiVersion: v1
+#kind: Service
+#metadata:
+#  name: infer-leader-loadbalancer
+#spec:
+#  type: LoadBalancer
+#  selector:
+#    leaderworkerset.sigs.k8s.io/name: infer
+#    role: leader
+#  ports:
+#    - protocol: TCP
+#      port: 8080
+#      targetPort: 8080
+#
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Values.app }}-leader-nodeport
+spec:
+  type: NodePort
+  {{- if gt (int .Values.workerSize) 1 }}
+  selector:
+    leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }} 
+    role: leader
+  {{- else }}
+  selector:
+    app: {{ .Release.Name }}
+  {{- end }}
+  ports:
+    - protocol: TCP
+      port: 8080
+      {{- if eq .Values.app "llama" }}
+      targetPort: 7860
+      {{- else }}
+      targetPort: 8080
+      {{- end }}
+      nodePort: 30081
+
--- a/webchat/vllm-app/templates/single.yaml
+++ b/webchat/vllm-app/templates/single.yaml
@ -0,0 +1,114 @@
+{{- if eq (int .Values.workerSize) 1 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }} 
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ .Release.Name }}
+    spec:
+      initContainers:
+     # 模型下载作为第一个 initContainer
+      - name: download-model
+        image: {{ .Values.model.download.image }}
+        imagePullPolicy: IfNotPresent
+        env:
+          - name: HF_ENDPOINT
+            value: https://hf-mirror.com
+          - name: HUGGING_FACE_HUB_TOKEN
+            value: {{ .Values.model.huggingfaceToken }}
+        command:
+          - sh
+          - -c
+          - |
+            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
+            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
+            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
+            # 检查模型是否存在，不存在则下载
+            echo "DEST_DIR= $DEST_DIR"
+            ls $DEST_DIR
+            ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
+            if [ ! -f "$DEST_DIR/config.json" ]; then
+              ls -l {{ .Values.model.localMountPath }}
+              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
+              wget https://hf-mirror.com/hfd/hfd.sh 
+              chmod a+x hfd.sh
+              apt install aria2 -y
+              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
+            else
+              echo "Model already exists at $DEST_DIR"
+            fi
+        volumeMounts:
+        - name: weight-volume
+          mountPath: {{ .Values.model.localMountPath }}
+      containers:
+      - name: vllm-leader
+        image: {{ .Values.vllm.image }}
+        imagePullPolicy:  IfNotPresent
+          #securityContext:
+          #  capabilities:
+          #    add: [ "IPC_LOCK" ]
+        env:
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ .Values.vllm.huggingfaceToken }}
+            #- name: GLOO_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_SOCKET_IFNAME
+            #  value: eth0
+            #- name: NCCL_IB_DISABLE
+            #  value: "0"
+            #- name: NCCL_DEBUG
+            #  value: INFO
+            #- name: NCCL_IB_HCA
+            #  value: mlx5_0:1
+            #- name: NCCL_IB_GID_INDEX
+            #  value: "0" # 或 "7"，根据你的网络配置而定
+        - name: RAY_DEDUP_LOGS
+          value: "0"
+        command:
+          - sh
+          - -c
+          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
+            echo 'Using single node ------------------------------------------'; 
+            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.vllm.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
+        resources:
+          limits:
+            nvidia.com/gpu: "{{ .Values.vllm.gpuLimit }}"
+            memory: {{ .Values.vllm.memoryLimit }}
+            ephemeral-storage: 10Gi
+              #rdma/rdma_shared_device_a: 10
+          requests:
+            ephemeral-storage: 10Gi
+            cpu: {{ .Values.vllm.cpuRequest }}
+        ports:
+        - containerPort: 8080
+          name: http
+        readinessProbe:
+          #tcpSocket:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 120
+          periodSeconds: 20
+          timeoutSeconds: 5
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: dshm
+          - name: weight-volume
+            mountPath:  {{ .Values.model.localMountPath }}
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: {{ .Values.vllm.shmSize }}
+      - name: weight-volume
+        persistentVolumeClaim:
+          claimName: nfs-pvc-model
+{{- end }}
--- a/webchat/vllm-app/values.yaml
+++ b/webchat/vllm-app/values.yaml
@ -0,0 +1,58 @@
+# Default values for vllm-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# 模型配置
+model:
+  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
+  localMountPath: "/Model"                  # PVC 固定挂载路径
+  huggingfaceToken: "<your-hf-token>"
+  download:
+    enabled: false                                  # 启用自动下载
+    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
+
+# 功能选择
+app: "vllm"
+
+resources:
+  gpuLimit: 1
+  cpuRequest: 12
+  memoryLimit: "16Gi"
+  shmSize: "20Gi"
+
+# vLLM 应用配置
+vllm:
+  image: "docker.io/vllm/vllm-openai:latest"
+    #gpuLimit: 2 
+    # cpuRequest: 12
+    # memoryLimit: "12Gi"
+    # shmSize: "15Gi"
+
+llama:
+  image: "docker.io/library/one-click:v1"
+
+# lmdeploy  应用配置
+lmdeploy:
+  image: "docker.io/openmmlab/lmdeploy:latest-cu12"
+    #  gpuLimit: 2
+    #  cpuRequest: 12
+    #  memoryLimit: "12Gi"
+    #  shmSize: "15Gi"
+
+# NFS PV/PVC 配置
+nfs:
+  server: "10.6.80.11"
+  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
+  storageClass: "local-path"
+  pvSize: "500Gi"
+  pvcSize: "50Gi"
+
+# LeaderWorkerSet 配置
+replicaCount: 1
+workerSize: 2
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
--- a/webui/metadata.yaml
+++ b/webui/metadata.yaml
@ -0,0 +1,51 @@
+
+application_name: &application_name webui
+
+distributed: 
+  method: helm
+  release_name: *application_name
+  chart: open-webui
+  sets:
+    image: 
+      repository: ghcr.io/open-webui/open-webui
+      tag: main
+      pullPolicy: "IfNotPresent"
+    openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
+    ollama:
+      enabled: false
+    service:
+      type: NodePort
+      nodePort: 30679
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: ~
+    port: 30679
+    url: ~
+  pod:
+    name: open-webui-
+monolithic: 
+  method: helm
+  release_name: *application_name
+  chart: open-webui
+  sets:
+    image: 
+      repository: ghcr.io/open-webui/open-webui
+      tag: main
+      pullPolicy: "IfNotPresent"
+    openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
+    ollama:
+      enabled: false
+    service:
+      type: NodePort
+      nodePort: 30679
+  svc:
+    svc_type: NodePort
+    protocol: http
+    hostname: 10.6.14.123
+    servicename: ~
+    port: 30679
+    url: ~
+  pod:
+    name: open-webui-
--- a/webui/open-webui/.helmignore
+++ b/webui/open-webui/.helmignore
@ -0,0 +1,25 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+.drone.yml
+*.tmproj
+.vscode/
+values-minikube.yaml
--- a/webui/open-webui/Chart.lock
+++ b/webui/open-webui/Chart.lock
@ -0,0 +1,12 @@
+dependencies:
+- name: ollama
+  repository: https://otwld.github.io/ollama-helm/
+  version: 1.27.0
+- name: pipelines
+  repository: https://helm.openwebui.com
+  version: 0.7.0
+- name: tika
+  repository: https://apache.jfrog.io/artifactory/tika
+  version: 3.2.2
+digest: sha256:1c6e5d6a38dc8ebb4e15b1945fb222fa57b10e8882d5c79ba430648f3c5af372
+generated: "2025-08-22T15:22:03.150693+02:00"
--- a/webui/open-webui/Chart.yaml
+++ b/webui/open-webui/Chart.yaml
@ -0,0 +1,38 @@
+annotations:
+  licenses: MIT
+apiVersion: v2
+appVersion: 0.6.26
+dependencies:
+- condition: ollama.enabled
+  import-values:
+  - child: service
+    parent: ollama.service
+  name: ollama
+  repository: https://otwld.github.io/ollama-helm/
+  version: '>=0.24.0'
+- condition: pipelines.enabled
+  import-values:
+  - child: service
+    parent: pipelines.service
+  name: pipelines
+  repository: https://helm.openwebui.com
+  version: '>=0.0.1'
+- condition: tika.enabled
+  name: tika
+  repository: https://apache.jfrog.io/artifactory/tika
+  version: '>=2.9.0'
+description: "Open WebUI: A User-Friendly Web Interface for Chat Interactions \U0001F44B"
+home: https://www.openwebui.com/
+icon: https://raw.githubusercontent.com/open-webui/open-webui/main/static/favicon.png
+keywords:
+- llm
+- chat
+- web-ui
+- open-webui
+name: open-webui
+sources:
+- https://github.com/open-webui/helm-charts
+- https://github.com/open-webui/open-webui/pkgs/container/open-webui
+- https://github.com/otwld/ollama-helm/
+- https://hub.docker.com/r/ollama/ollama
+version: 7.7.0
--- a/webui/open-webui/README.md
+++ b/webui/open-webui/README.md
@ -0,0 +1,270 @@
+# open-webui
+
+![Version: 7.7.0](https://img.shields.io/badge/Version-7.7.0-informational?style=flat-square) ![AppVersion: 0.6.26](https://img.shields.io/badge/AppVersion-0.6.26-informational?style=flat-square)
+
+Open WebUI: A User-Friendly Web Interface for Chat Interactions 👋
+
+**Homepage:** <https://www.openwebui.com/>
+
+## Source Code
+
+* <https://github.com/open-webui/helm-charts>
+* <https://github.com/open-webui/open-webui/pkgs/container/open-webui>
+* <https://github.com/otwld/ollama-helm/>
+* <https://hub.docker.com/r/ollama/ollama>
+
+## Installing
+
+Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
+
+```shell
+helm repo add open-webui https://helm.openwebui.com/
+helm repo update
+```
+
+Now you can install the chart:
+
+```shell
+helm upgrade --install open-webui open-webui/open-webui
+```
+
+## Requirements
+
+| Repository | Name | Version |
+|------------|------|---------|
+| https://apache.jfrog.io/artifactory/tika | tika | >=2.9.0 |
+| https://helm.openwebui.com | pipelines | >=0.0.1 |
+| https://otwld.github.io/ollama-helm/ | ollama | >=0.24.0 |
+
+## Values
+
+### Logging configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| logging.components.audio | string | `""` | Set the log level for the Audio processing component |
+| logging.components.comfyui | string | `""` | Set the log level for the ComfyUI Integration component |
+| logging.components.config | string | `""` | Set the log level for the Configuration Management component |
+| logging.components.db | string | `""` | Set the log level for the Database Operations (Peewee) component |
+| logging.components.images | string | `""` | Set the log level for the Image Generation component |
+| logging.components.main | string | `""` | Set the log level for the Main Application Execution component |
+| logging.components.models | string | `""` | Set the log level for the Model Management component |
+| logging.components.ollama | string | `""` | Set the log level for the Ollama Backend Integration component |
+| logging.components.openai | string | `""` | Set the log level for the OpenAI API Integration component |
+| logging.components.rag | string | `""` | Set the log level for the Retrieval-Augmented Generation (RAG) component |
+| logging.components.webhook | string | `""` | Set the log level for the Authentication Webhook component |
+| logging.level | string | `""` | Set the global log level ["notset", "debug", "info" (default), "warning", "error", "critical"] |
+
+### Azure Storage configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| persistence.azure.container | string | `""` | Sets the container name for Azure Storage |
+| persistence.azure.endpointUrl | string | `""` | Sets the endpoint URL for Azure Storage |
+| persistence.azure.key | string | `""` | Set the access key for Azure Storage (ignored if keyExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Managed Identity if run in Azure services |
+| persistence.azure.keyExistingSecret | string | `""` | Set the access key for Azure Storage from existing secret |
+| persistence.azure.keyExistingSecretKey | string | `""` | Set the access key for Azure Storage from existing secret key |
+
+### Google Cloud Storage configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| persistence.gcs.appCredentialsJson | string | `""` | Contents of Google Application Credentials JSON file (ignored if appCredentialsJsonExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Google Metadata server if run on a Google Compute Engine. File can be generated for a service account following this guide: https://developers.google.com/workspace/guides/create-credentials#service-account |
+| persistence.gcs.appCredentialsJsonExistingSecret | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret |
+| persistence.gcs.appCredentialsJsonExistingSecretKey | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret key |
+| persistence.gcs.bucket | string | `""` | Sets the bucket name for Google Cloud Storage. Bucket must already exist |
+
+### Amazon S3 Storage configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| persistence.s3.accessKey | string | `""` | Sets the access key ID for S3 storage |
+| persistence.s3.accessKeyExistingAccessKey | string | `""` | Set the secret access key for S3 storage from existing k8s secret key |
+| persistence.s3.accessKeyExistingSecret | string | `""` | Set the secret access key for S3 storage from existing k8s secret |
+| persistence.s3.bucket | string | `""` | Sets the bucket name for S3 storage |
+| persistence.s3.endpointUrl | string | `""` | Sets the endpoint url for S3 storage |
+| persistence.s3.keyPrefix | string | `""` | Sets the key prefix for a S3 object |
+| persistence.s3.region | string | `""` | Sets the region name for S3 storage |
+| persistence.s3.secretKey | string | `""` | Sets the secret access key for S3 storage (ignored if secretKeyExistingSecret is set) |
+| persistence.s3.secretKeyExistingSecret | string | `""` | Set the secret key for S3 storage from existing k8s secret |
+| persistence.s3.secretKeyExistingSecretKey | string | `""` | Set the secret key for S3 storage from existing k8s secret key |
+
+### SSO Configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| sso.enableGroupManagement | bool | `false` | Enable OAuth group management through access token groups claim |
+| sso.enableRoleManagement | bool | `false` | Enable OAuth role management through access token roles claim |
+| sso.enableSignup | bool | `false` | Enable account creation when logging in with OAuth (distinct from regular signup) |
+| sso.enabled | bool | `false` | **Enable SSO authentication globally** must enable to use SSO authentication |
+| sso.groupManagement.groupsClaim | string | `"groups"` | The claim that contains the groups (can be nested, e.g., user.memberOf) |
+| sso.mergeAccountsByEmail | bool | `false` | Allow logging into accounts that match email from OAuth provider (considered insecure) |
+
+### GitHub OAuth configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| sso.github.clientExistingSecret | string | `""` | GitHub OAuth client secret from existing secret |
+| sso.github.clientExistingSecretKey | string | `""` | GitHub OAuth client secret key from existing secret |
+| sso.github.clientId | string | `""` | GitHub OAuth client ID |
+| sso.github.clientSecret | string | `""` | GitHub OAuth client secret (ignored if clientExistingSecret is set) |
+| sso.github.enabled | bool | `false` | Enable GitHub OAuth |
+
+### Google OAuth configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| sso.google.clientExistingSecret | string | `""` | Google OAuth client secret from existing secret |
+| sso.google.clientExistingSecretKey | string | `""` | Google OAuth client secret key from existing secret |
+| sso.google.clientId | string | `""` | Google OAuth client ID |
+| sso.google.clientSecret | string | `""` | Google OAuth client secret (ignored if clientExistingSecret is set) |
+| sso.google.enabled | bool | `false` | Enable Google OAuth |
+
+### Microsoft OAuth configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| sso.microsoft.clientExistingSecret | string | `""` | Microsoft OAuth client secret from existing secret |
+| sso.microsoft.clientExistingSecretKey | string | `""` | Microsoft OAuth client secret key from existing secret |
+| sso.microsoft.clientId | string | `""` | Microsoft OAuth client ID |
+| sso.microsoft.clientSecret | string | `""` | Microsoft OAuth client secret (ignored if clientExistingSecret is set) |
+| sso.microsoft.enabled | bool | `false` | Enable Microsoft OAuth |
+| sso.microsoft.tenantId | string | `""` | Microsoft tenant ID - use 9188040d-6c67-4c5b-b112-36a304b66dad for personal accounts |
+
+### OIDC configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| sso.oidc.clientExistingSecret | string | `""` | OICD client secret from existing secret |
+| sso.oidc.clientExistingSecretKey | string | `""` | OIDC client secret key from existing secret |
+| sso.oidc.clientId | string | `""` | OIDC client ID |
+| sso.oidc.clientSecret | string | `""` | OIDC client secret (ignored if clientExistingSecret is set) |
+| sso.oidc.enabled | bool | `false` | Enable OIDC authentication |
+| sso.oidc.providerName | string | `"SSO"` | Name of the provider to show on the UI |
+| sso.oidc.providerUrl | string | `""` | OIDC provider well known URL |
+| sso.oidc.scopes | string | `"openid email profile"` | Scopes to request (space-separated). |
+
+### Role management configuration
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| sso.roleManagement.adminRoles | string | `""` | Comma-separated list of roles allowed to log in as admin (receive open webui role admin) |
+| sso.roleManagement.allowedRoles | string | `""` | Comma-separated list of roles allowed to log in (receive open webui role user) |
+| sso.roleManagement.rolesClaim | string | `"roles"` | The claim that contains the roles (can be nested, e.g., user.roles) |
+
+### SSO trusted header authentication
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| sso.trustedHeader.emailHeader | string | `""` | Header containing the user's email address |
+| sso.trustedHeader.enabled | bool | `false` | Enable trusted header authentication |
+| sso.trustedHeader.nameHeader | string | `""` | Header containing the user's name (optional, used for new user creation) |
+
+### Other Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| affinity | object | `{}` | Affinity for pod assignment |
+| annotations | object | `{}` |  |
+| args | list | `[]` | Open WebUI container arguments (overrides default) |
+| clusterDomain | string | `"cluster.local"` | Value of cluster domain |
+| command | list | `[]` | Open WebUI container command (overrides default entrypoint) |
+| commonEnvVars | list | `[]` | Env vars added to the Open WebUI deployment, common across environments. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: environment variables defined in both `extraEnvVars` and `commonEnvVars` will result in a conflict. Avoid duplicates) |
+| containerSecurityContext | object | `{}` | Configure container security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-containe> |
+| copyAppData.args | list | `[]` | Open WebUI copy-app-data init container arguments (overrides default) |
+| copyAppData.command | list | `[]` | Open WebUI copy-app-data init container command (overrides default) |
+| copyAppData.resources | object | `{}` |  |
+| databaseUrl | string | `""` | Configure database URL, needed to work with Postgres (example: `postgresql://<user>:<password>@<service>:<port>/<database>`), leave empty to use the default sqlite database |
+| enableOpenaiApi | bool | `true` | Enables the use of OpenAI APIs |
+| extraEnvFrom | list | `[]` | Env vars added from configmap or secret to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: `extraEnvVars` will take precedence over the value from `extraEnvFrom`) |
+| extraEnvVars | list | `[{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}]` | Env vars added to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ |
+| extraEnvVars[0] | object | `{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}` | Default API key value for Pipelines. Should be updated in a production deployment, or be changed to the required API key if not using Pipelines |
+| extraInitContainers | list | `[]` | Additional init containers to add to the deployment/statefulset ref: <https://kubernetes.io/docs/concepts/workloads/pods/init-containers/> |
+| extraResources | list | `[]` | Extra resources to deploy with Open WebUI |
+| hostAliases | list | `[]` | HostAliases to be added to hosts-file of each container |
+| image | object | `{"pullPolicy":"IfNotPresent","repository":"ghcr.io/open-webui/open-webui","tag":""}` | Open WebUI image tags can be found here: https://github.com/open-webui/open-webui |
+| imagePullSecrets | list | `[]` | Configure imagePullSecrets to use private registry ref: <https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry> |
+| ingress.additionalHosts | list | `[]` |  |
+| ingress.annotations | object | `{}` | Use appropriate annotations for your Ingress controller, e.g., for NGINX: |
+| ingress.class | string | `""` |  |
+| ingress.enabled | bool | `false` |  |
+| ingress.existingSecret | string | `""` |  |
+| ingress.extraLabels | object | `{}` | Additional custom labels to add to the Ingress metadata Useful for tagging, selecting, or applying policies to the Ingress via labels. |
+| ingress.host | string | `"chat.example.com"` |  |
+| ingress.tls | bool | `false` |  |
+| livenessProbe | object | `{}` | Probe for liveness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
+| managedCertificate.domains[0] | string | `"chat.example.com"` |  |
+| managedCertificate.enabled | bool | `false` |  |
+| managedCertificate.name | string | `"mydomain-chat-cert"` |  |
+| nameOverride | string | `""` |  |
+| namespaceOverride | string | `""` |  |
+| nodeSelector | object | `{}` | Node labels for pod assignment. |
+| ollama.enabled | bool | `true` | Automatically install Ollama Helm chart from https://otwld.github.io/ollama-helm/. Use [Helm Values](https://github.com/otwld/ollama-helm/#helm-values) to configure |
+| ollama.fullnameOverride | string | `"open-webui-ollama"` | If enabling embedded Ollama, update fullnameOverride to your desired Ollama name value, or else it will use the default ollama.name value from the Ollama chart |
+| ollamaUrls | list | `[]` | A list of Ollama API endpoints. These can be added in lieu of automatically installing the Ollama Helm chart, or in addition to it. |
+| ollamaUrlsFromExtraEnv | bool | `false` | Disables taking Ollama Urls from `ollamaUrls`  list |
+| openaiBaseApiUrl | string | `"https://api.openai.com/v1"` | OpenAI base API URL to use. Defaults to the Pipelines service endpoint when Pipelines are enabled, and "https://api.openai.com/v1" if Pipelines are not enabled and this value is blank |
+| openaiBaseApiUrls | list | `[]` | OpenAI base API URLs to use. Overwrites the value in openaiBaseApiUrl if set |
+| persistence.accessModes | list | `["ReadWriteOnce"]` | If using multiple replicas, you must update accessModes to ReadWriteMany |
+| persistence.annotations | object | `{}` |  |
+| persistence.enabled | bool | `true` |  |
+| persistence.existingClaim | string | `""` | Use existingClaim if you want to re-use an existing Open WebUI PVC instead of creating a new one |
+| persistence.provider | string | `"local"` | Sets the storage provider, availables values are `local`, `s3`, `gcs` or `azure` |
+| persistence.selector | object | `{}` |  |
+| persistence.size | string | `"2Gi"` |  |
+| persistence.storageClass | string | `""` |  |
+| persistence.subPath | string | `""` | Subdirectory of Open WebUI PVC to mount. Useful if root directory is not empty. |
+| pipelines.enabled | bool | `true` | Automatically install Pipelines chart to extend Open WebUI functionality using Pipelines: https://github.com/open-webui/pipelines |
+| pipelines.extraEnvVars | list | `[]` | This section can be used to pass required environment variables to your pipelines (e.g. Langfuse hostname) |
+| podAnnotations | object | `{}` |  |
+| podLabels | object | `{}` |  |
+| podSecurityContext | object | `{}` | Configure pod security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container> |
+| priorityClassName | string | `""` | Priority class name for the Open WebUI pods |
+| readinessProbe | object | `{}` | Probe for readiness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
+| replicaCount | int | `1` |  |
+| resources | object | `{}` |  |
+| revisionHistoryLimit | int | `10` | Revision history limit for the workload manager (deployment). |
+| runtimeClassName | string | `""` | Configure runtime class ref: <https://kubernetes.io/docs/concepts/containers/runtime-class/> |
+| service | object | `{"annotations":{},"containerPort":8080,"labels":{},"loadBalancerClass":"","nodePort":"","port":80,"type":"ClusterIP"}` | Service values to expose Open WebUI pods to cluster |
+| serviceAccount.annotations | object | `{}` |  |
+| serviceAccount.automountServiceAccountToken | bool | `false` |  |
+| serviceAccount.enable | bool | `true` |  |
+| serviceAccount.name | string | `""` |  |
+| startupProbe | object | `{}` | Probe for startup of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
+| strategy | object | `{}` | Strategy for updating the workload manager: deployment or statefulset |
+| tika.enabled | bool | `false` | Automatically install Apache Tika to extend Open WebUI |
+| tolerations | list | `[]` | Tolerations for pod assignment |
+| topologySpreadConstraints | list | `[]` | Topology Spread Constraints for pod assignment |
+| volumeMounts | object | `{"container":[],"initContainer":[]}` | Configure container volume mounts ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
+| volumes | list | `[]` | Configure pod volumes ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
+| websocket.enabled | bool | `false` | Enables websocket support in Open WebUI with env `ENABLE_WEBSOCKET_SUPPORT` |
+| websocket.manager | string | `"redis"` | Specifies the websocket manager to use with env `WEBSOCKET_MANAGER`: redis (default) |
+| websocket.nodeSelector | object | `{}` | Node selector for websocket pods |
+| websocket.redis | object | `{"affinity":{},"annotations":{},"args":[],"command":[],"enabled":true,"image":{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"},"labels":{},"name":"open-webui-redis","pods":{"annotations":{},"labels":{}},"resources":{},"securityContext":{},"service":{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"},"tolerations":[]}` | Deploys a redis |
+| websocket.redis.affinity | object | `{}` | Redis affinity for pod assignment |
+| websocket.redis.annotations | object | `{}` | Redis annotations |
+| websocket.redis.args | list | `[]` | Redis arguments (overrides default) |
+| websocket.redis.command | list | `[]` | Redis command (overrides default) |
+| websocket.redis.enabled | bool | `true` | Enable redis installation |
+| websocket.redis.image | object | `{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"}` | Redis image |
+| websocket.redis.labels | object | `{}` | Redis labels |
+| websocket.redis.name | string | `"open-webui-redis"` | Redis name |
+| websocket.redis.pods | object | `{"annotations":{},"labels":{}}` | Redis pod |
+| websocket.redis.pods.annotations | object | `{}` | Redis pod annotations |
+| websocket.redis.pods.labels | object | `{}` | Redis pod labels |
+| websocket.redis.resources | object | `{}` | Redis resources |
+| websocket.redis.securityContext | object | `{}` | Redis security context |
+| websocket.redis.service | object | `{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"}` | Redis service |
+| websocket.redis.service.annotations | object | `{}` | Redis service annotations |
+| websocket.redis.service.containerPort | int | `6379` | Redis container/target port |
+| websocket.redis.service.labels | object | `{}` | Redis service labels |
+| websocket.redis.service.nodePort | string | `""` | Redis service node port. Valid only when type is `NodePort` |
+| websocket.redis.service.port | int | `6379` | Redis service port |
+| websocket.redis.service.portName | string | `"http"` | Redis service port name. Istio needs this to be something like `tcp-redis` |
+| websocket.redis.service.type | string | `"ClusterIP"` | Redis service type |
+| websocket.redis.tolerations | list | `[]` | Redis tolerations for pod assignment |
+| websocket.url | string | `"redis://open-webui-redis:6379/0"` | Specifies the URL of the Redis instance for websocket communication. Template with `redis://[:<password>@]<hostname>:<port>/<db>` |
+
+----------------------------------------------
+
+Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
--- a/webui/open-webui/README.md.gotmpl
+++ b/webui/open-webui/README.md.gotmpl
@ -0,0 +1,36 @@
+{{ template "chart.header" . }}
+
+{{ template "chart.deprecationWarning" . }}
+
+{{ template "chart.badgesSection" . }}
+
+{{ template "chart.description" . }}
+
+{{ template "chart.homepageLine" . }}
+
+{{ template "chart.maintainersSection" . }}
+
+{{ template "chart.sourcesSection" . }}
+
+## Installing
+
+Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
+
+```shell
+helm repo add open-webui https://helm.openwebui.com/
+helm repo update
+```
+
+Now you can install the chart:
+
+```shell
+helm upgrade --install open-webui open-webui/open-webui
+```
+
+{{ template "chart.requirementsSection" . }}
+
+{{ template "chart.valuesSection" . }}
+
+----------------------------------------------
+
+Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
--- a/webui/open-webui/charts/ollama/.helmignore
+++ b/webui/open-webui/charts/ollama/.helmignore
@ -0,0 +1,30 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+.drone.yml
+*.tmproj
+.vscode/
+
+#others
+.github
+kind-config.yml
+ci/
+
--- a/webui/open-webui/charts/ollama/.ollama-helm/.helmignore
+++ b/webui/open-webui/charts/ollama/.ollama-helm/.helmignore
@ -0,0 +1,30 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+.drone.yml
+*.tmproj
+.vscode/
+
+#others
+.github
+kind-config.yml
+ci/
+
--- a/webui/open-webui/charts/ollama/.ollama-helm/Chart.yaml
+++ b/webui/open-webui/charts/ollama/.ollama-helm/Chart.yaml
@ -0,0 +1,33 @@
+apiVersion: v2
+name: ollama
+description: Get up and running with large language models locally.
+
+type: application
+
+version: 1.27.0
+
+appVersion: "0.11.4"
+
+annotations:
+  artifacthub.io/category: ai-machine-learning
+  artifacthub.io/changes: |
+    - kind: changed
+      description: upgrade app version to 0.11.4
+      links:
+        - name: Ollama release v0.11.4
+          url: https://github.com/ollama/ollama/releases/tag/v0.11.4
+
+kubeVersion: "^1.16.0-0"
+home: https://ollama.ai/
+icon: https://ollama.ai/public/ollama.png
+keywords:
+  - ai
+  - llm
+  - llama
+  - mistral
+sources:
+  - https://github.com/ollama/ollama
+  - https://github.com/otwld/ollama-helm
+maintainers:
+  - name: OTWLD
+    email: contact@otwld.com
--- a/webui/open-webui/charts/ollama/.ollama-helm/LICENSE
+++ b/webui/open-webui/charts/ollama/.ollama-helm/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 OTWLD
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/webui/open-webui/charts/ollama/.ollama-helm/README.md
+++ b/webui/open-webui/charts/ollama/.ollama-helm/README.md
@ -0,0 +1,331 @@
+![otwld ollama helm chart banner](./banner.png)
+
+![GitHub License](https://img.shields.io/github/license/otwld/ollama-helm)
+[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/ollama-helm)](https://artifacthub.io/packages/helm/ollama-helm/ollama)
+[![Helm Lint and Test](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml)
+[![Discord](https://img.shields.io/badge/Discord-OTWLD-blue?logo=discord&logoColor=white)](https://discord.gg/U24mpqTynB)
+
+[Ollama](https://ollama.ai/), get up and running with large language models, locally.
+
+This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama).
+
+## Requirements
+
+- Kubernetes: `>= 1.16.0-0` for **CPU only**
+
+- Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD)
+
+*Not all GPUs are currently supported with ollama (especially with AMD)*
+
+## Deploying Ollama chart
+
+To install the `ollama` chart in the `ollama` namespace:
+
+> [!IMPORTANT]  
+> We are migrating the registry from https://otwld.github.io/ollama-helm/ url to OTWLD Helm central
+> registry https://helm.otwld.com/
+> Please update your Helm registry accordingly.
+
+```console
+helm repo add otwld https://helm.otwld.com/
+helm repo update
+helm install ollama otwld/ollama --namespace ollama --create-namespace
+```
+
+## Upgrading Ollama chart
+
+First please read the [release notes](https://github.com/ollama/ollama/releases) of Ollama to make sure there are no
+backwards incompatible changes.
+
+Make adjustments to your values as needed, then run `helm upgrade`:
+
+```console
+# -- This pulls the latest version of the ollama chart from the repo.
+helm repo update
+helm upgrade ollama otwld/ollama --namespace ollama --values values.yaml
+```
+
+## Uninstalling Ollama chart
+
+To uninstall/delete the `ollama` deployment in the `ollama` namespace:
+
+```console
+helm delete ollama --namespace ollama
+```
+
+Substitute your values if they differ from the examples. See `helm delete --help` for a full reference on `delete`
+parameters and flags.
+
+## Interact with Ollama
+
+- **Ollama documentation can be found [HERE](https://github.com/ollama/ollama/tree/main/docs)**
+- Interact with RESTful API: [Ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md)
+- Interact with official clients libraries: [ollama-js](https://github.com/ollama/ollama-js#custom-client)
+  and [ollama-python](https://github.com/ollama/ollama-python#custom-client)
+- Interact with langchain: [langchain-js](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainjs.md)
+  and [langchain-python](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md)
+
+## Examples
+
+- **It's highly recommended to run an updated version of Kubernetes for deploying ollama with GPU**
+
+### Basic values.yaml example with GPU and two models pulled at startup
+
+```
+ollama:
+  gpu:
+    # -- Enable GPU integration
+    enabled: true
+    
+    # -- GPU type: 'nvidia' or 'amd'
+    type: 'nvidia'
+    
+    # -- Specify the number of GPU to 1
+    number: 1
+   
+  # -- List of models to pull at container startup
+  models:
+    pull:
+      - mistral
+      - llama2
+```
+
+---
+
+### Basic values.yaml example with Ingress
+
+```
+ollama:
+  models:
+    pull:
+      - llama2
+  
+ingress:
+  enabled: true
+  hosts:
+  - host: ollama.domain.lan
+    paths:
+      - path: /
+        pathType: Prefix
+```
+
+- *API is now reachable at `ollama.domain.lan`*
+
+---
+
+### Create and run model from template
+
+```
+ollama:
+  models:
+    create:
+      - name: llama3.1-ctx32768
+        template: |
+          FROM llama3.1
+          PARAMETER num_ctx 32768
+    run:
+      - llama3.1-ctx32768
+```
+
+## Upgrading from 0.X.X to 1.X.X
+
+The version 1.X.X introduces the ability to load models in memory at startup, the values have been changed.
+
+Please change `ollama.models` to `ollama.models.pull` to avoid errors before upgrading:
+
+```yaml
+ollama:
+  models:
+    - mistral
+    - llama2
+```
+
+To:
+
+```yaml
+ollama:
+  models:
+    pull:
+      - mistral
+      - llama2
+```
+
+## Helm Values
+
+- See [values.yaml](values.yaml) to see the Chart's default values.
+
+| Key                                        | Type   | Default             | Description                                                                                                                                                                                                                                                                                                                                               |
+|--------------------------------------------|--------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| affinity                                   | object | `{}`                | Affinity for pod assignment                                                                                                                                                                                                                                                                                                                               |
+| autoscaling.enabled                        | bool   | `false`             | Enable autoscaling                                                                                                                                                                                                                                                                                                                                        |
+| autoscaling.maxReplicas                    | int    | `100`               | Number of maximum replicas                                                                                                                                                                                                                                                                                                                                |
+| autoscaling.minReplicas                    | int    | `1`                 | Number of minimum replicas                                                                                                                                                                                                                                                                                                                                |
+| autoscaling.targetCPUUtilizationPercentage | int    | `80`                | CPU usage to target replica                                                                                                                                                                                                                                                                                                                               |
+| deployment.labels                          | object | `{}`                | Labels to add to the deployment                                                                                                                                                                                                                                                                                                                           |
+| extraArgs                                  | list   | `[]`                | Additional arguments on the output Deployment definition.                                                                                                                                                                                                                                                                                                 |
+| extraEnv                                   | list   | `[]`                | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go                                                                                                                                                                               |
+| extraEnvFrom                               | list   | `[]`                | Additionl environment variables from external sources (like ConfigMap)                                                                                                                                                                                                                                                                                    |
+| extraObjects                               | list   | `[]`                | Extra K8s manifests to deploy                                                                                                                                                                                                                                                                                                                             |
+| fullnameOverride                           | string | `""`                | String to fully override template                                                                                                                                                                                                                                                                                                                         |
+| hostIPC                                    | bool   | `false`             | Use the host’s ipc namespace.                                                                                                                                                                                                                                                                                                                             |
+| hostNetwork                                | bool   | `false`             | Use the host's network namespace.                                                                                                                                                                                                                                                                                                                         |
+| hostPID                                    | bool   | `false`             | Use the host’s pid namespace                                                                                                                                                                                                                                                                                                                              |
+| image.pullPolicy                           | string | `"IfNotPresent"`    | Docker pull policy                                                                                                                                                                                                                                                                                                                                        |
+| image.repository                           | string | `"ollama/ollama"`   | Docker image registry                                                                                                                                                                                                                                                                                                                                     |
+| image.tag                                  | string | `""`                | Docker image tag, overrides the image tag whose default is the chart appVersion.                                                                                                                                                                                                                                                                          |
+| imagePullSecrets                           | list   | `[]`                | Docker registry secret names as an array                                                                                                                                                                                                                                                                                                                  |
+| ingress.annotations                        | object | `{}`                | Additional annotations for the Ingress resource.                                                                                                                                                                                                                                                                                                          |
+| ingress.className                          | string | `""`                | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)                                                                                                                                                                                                                                                                                |
+| ingress.enabled                            | bool   | `false`             | Enable ingress controller resource                                                                                                                                                                                                                                                                                                                        |
+| ingress.hosts[0].host                      | string | `"ollama.local"`    |                                                                                                                                                                                                                                                                                                                                                           |
+| ingress.hosts[0].paths[0].path             | string | `"/"`               |                                                                                                                                                                                                                                                                                                                                                           |
+| ingress.hosts[0].paths[0].pathType         | string | `"Prefix"`          |                                                                                                                                                                                                                                                                                                                                                           |
+| ingress.tls                                | list   | `[]`                | The tls configuration for hostnames to be covered with this ingress record.                                                                                                                                                                                                                                                                               |
+| initContainers                             | list   | `[]`                | Init containers to add to the pod                                                                                                                                                                                                                                                                                                                         |
+| knative.annotations                        | object | `{}`                | Knative service annotations                                                                                                                                                                                                                                                                                                                               |
+| knative.containerConcurrency               | int    | `0`                 | Knative service container concurrency                                                                                                                                                                                                                                                                                                                     |
+| knative.enabled                            | bool   | `false`             | Enable Knative integration                                                                                                                                                                                                                                                                                                                                |
+| knative.idleTimeoutSeconds                 | int    | `300`               | Knative service idle timeout seconds                                                                                                                                                                                                                                                                                                                      |
+| knative.responseStartTimeoutSeconds        | int    | `300`               | Knative service response start timeout seconds                                                                                                                                                                                                                                                                                                            |
+| knative.timeoutSeconds                     | int    | `300`               | Knative service timeout seconds                                                                                                                                                                                                                                                                                                                           |
+| lifecycle                                  | object | `{}`                | Lifecycle for pod assignment (override ollama.models startup pull/run)                                                                                                                                                                                                                                                                                    |
+| livenessProbe.enabled                      | bool   | `true`              | Enable livenessProbe                                                                                                                                                                                                                                                                                                                                      |
+| livenessProbe.failureThreshold             | int    | `6`                 | Failure threshold for livenessProbe                                                                                                                                                                                                                                                                                                                       |
+| livenessProbe.initialDelaySeconds          | int    | `60`                | Initial delay seconds for livenessProbe                                                                                                                                                                                                                                                                                                                   |
+| livenessProbe.path                         | string | `"/"`               | Request path for livenessProbe                                                                                                                                                                                                                                                                                                                            |
+| livenessProbe.periodSeconds                | int    | `10`                | Period seconds for livenessProbe                                                                                                                                                                                                                                                                                                                          |
+| livenessProbe.successThreshold             | int    | `1`                 | Success threshold for livenessProbe                                                                                                                                                                                                                                                                                                                       |
+| livenessProbe.timeoutSeconds               | int    | `5`                 | Timeout seconds for livenessProbe                                                                                                                                                                                                                                                                                                                         |
+| nameOverride                               | string | `""`                | String to partially override template  (will maintain the release name)                                                                                                                                                                                                                                                                                   |
+| namespaceOverride                          | string | `""`                | String to fully override namespace                                                                                                                                                                                                                                                                                                                        |
+| nodeSelector                               | object | `{}`                | Node labels for pod assignment.                                                                                                                                                                                                                                                                                                                           |
+| ollama.gpu.draDriverClass                  | string | `"gpu.nvidia.com"`  | DRA GPU DriverClass                                                                                                                                                                                                                                                                                                                                       |
+| ollama.gpu.draEnabled                      | bool   | `false`             | Enable DRA GPU integration If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters                                                                                                                                                                                                                  |
+| ollama.gpu.draExistingClaimTemplate        | string | `""`                | Existing DRA GPU ResourceClaim Template                                                                                                                                                                                                                                                                                                                   |
+| ollama.gpu.enabled                         | bool   | `false`             | Enable GPU integration                                                                                                                                                                                                                                                                                                                                    |
+| ollama.gpu.mig.devices                     | object | `{}`                | Specify the mig devices and the corresponding number                                                                                                                                                                                                                                                                                                      |
+| ollama.gpu.mig.enabled                     | bool   | `false`             | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored                                                                                                                                                                                                                        |
+| ollama.gpu.number                          | int    | `1`                 | Specify the number of GPU If you use MIG section below then this parameter is ignored                                                                                                                                                                                                                                                                     |
+| ollama.gpu.nvidiaResource                  | string | `"nvidia.com/gpu"`  | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice                                                                                                                                                                                                                                                                      |
+| ollama.gpu.type                            | string | `"nvidia"`          | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images                                                                                                                             |
+| ollama.insecure                            | bool   | `false`             | Add insecure flag for pulling at container startup                                                                                                                                                                                                                                                                                                        |
+| ollama.models.clean                        | bool   | `false`             | Automatically remove models present on the disk but not specified in the values file                                                                                                                                                                                                                                                                      |
+| ollama.models.create                       | list   | `[]`                | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create:  - name: llama3.1-ctx32768    configMapRef: my-configmap    configMapKeyRef: configmap-key  - name: llama3.1-ctx32768    template: |      FROM llama3.1      PARAMETER num_ctx 32768 |
+| ollama.models.pull                         | list   | `[]`                | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull:  - llama2  - mistral                                                                                                                                                                                            |
+| ollama.models.run                          | list   | `[]`                | List of models to load in memory at container startup run:  - llama2  - mistral                                                                                                                                                                                                                                                                           |
+| ollama.mountPath                           | string | `""`                | Override ollama-data volume mount path, default: "/root/.ollama"                                                                                                                                                                                                                                                                                          |
+| ollama.port                                | int    | `11434`             |                                                                                                                                                                                                                                                                                                                                                           |
+| persistentVolume.accessModes               | list   | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/                                                                                                                                                                                    |
+| persistentVolume.annotations               | object | `{}`                | Ollama server data Persistent Volume annotations                                                                                                                                                                                                                                                                                                          |
+| persistentVolume.enabled                   | bool   | `false`             | Enable persistence using PVC                                                                                                                                                                                                                                                                                                                              |
+| persistentVolume.existingClaim             | string | `""`                | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true                                                                                                                                        |
+| persistentVolume.size                      | string | `"30Gi"`            | Ollama server data Persistent Volume size                                                                                                                                                                                                                                                                                                                 |
+| persistentVolume.storageClass              | string | `""`                | Ollama server data Persistent Volume Storage Class If defined, storageClassName: <storageClass> If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner.  (gp2 on AWS, standard on GKE, AWS & OpenStack)                    |
+| persistentVolume.subPath                   | string | `""`                | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty                                                                                                                                                                                                                                          |
+| persistentVolume.volumeMode                | string | `""`                | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: <volumeMode> If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode.                                                                                                                                                                |
+| persistentVolume.volumeName                | string | `""`                | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward                                                                                                                                                                                                               |
+| podAnnotations                             | object | `{}`                | Map of annotations to add to the pods                                                                                                                                                                                                                                                                                                                     |
+| podLabels                                  | object | `{}`                | Map of labels to add to the pods                                                                                                                                                                                                                                                                                                                          |
+| podSecurityContext                         | object | `{}`                | Pod Security Context                                                                                                                                                                                                                                                                                                                                      |
+| priorityClassName                          | string | `""`                | Priority Class Name                                                                                                                                                                                                                                                                                                                                       |
+| readinessProbe.enabled                     | bool   | `true`              | Enable readinessProbe                                                                                                                                                                                                                                                                                                                                     |
+| readinessProbe.failureThreshold            | int    | `6`                 | Failure threshold for readinessProbe                                                                                                                                                                                                                                                                                                                      |
+| readinessProbe.initialDelaySeconds         | int    | `30`                | Initial delay seconds for readinessProbe                                                                                                                                                                                                                                                                                                                  |
+| readinessProbe.path                        | string | `"/"`               | Request path for readinessProbe                                                                                                                                                                                                                                                                                                                           |
+| readinessProbe.periodSeconds               | int    | `5`                 | Period seconds for readinessProbe                                                                                                                                                                                                                                                                                                                         |
+| readinessProbe.successThreshold            | int    | `1`                 | Success threshold for readinessProbe                                                                                                                                                                                                                                                                                                                      |
+| readinessProbe.timeoutSeconds              | int    | `3`                 | Timeout seconds for readinessProbe                                                                                                                                                                                                                                                                                                                        |
+| replicaCount                               | int    | `1`                 | Number of replicas                                                                                                                                                                                                                                                                                                                                        |
+| resources.limits                           | object | `{}`                | Pod limit                                                                                                                                                                                                                                                                                                                                                 |
+| resources.requests                         | object | `{}`                | Pod requests                                                                                                                                                                                                                                                                                                                                              |
+| runtimeClassName                           | string | `""`                | Specify runtime class                                                                                                                                                                                                                                                                                                                                     |
+| securityContext                            | object | `{}`                | Container Security Context                                                                                                                                                                                                                                                                                                                                |
+| service.annotations                        | object | `{}`                | Annotations to add to the service                                                                                                                                                                                                                                                                                                                         |
+| service.labels                             | object | `{}`                | Labels to add to the service                                                                                                                                                                                                                                                                                                                              |
+| service.loadBalancerIP                     | string | `nil`               | Load Balancer IP address                                                                                                                                                                                                                                                                                                                                  |
+| service.nodePort                           | int    | `31434`             | Service node port when service type is 'NodePort'                                                                                                                                                                                                                                                                                                         |
+| service.port                               | int    | `11434`             | Service port                                                                                                                                                                                                                                                                                                                                              |
+| service.type                               | string | `"ClusterIP"`       | Service type                                                                                                                                                                                                                                                                                                                                              |
+| serviceAccount.annotations                 | object | `{}`                | Annotations to add to the service account                                                                                                                                                                                                                                                                                                                 |
+| serviceAccount.automount                   | bool   | `true`              | Automatically mount a ServiceAccount's API credentials?                                                                                                                                                                                                                                                                                                   |
+| serviceAccount.create                      | bool   | `true`              | Specifies whether a service account should be created                                                                                                                                                                                                                                                                                                     |
+| serviceAccount.name                        | string | `""`                | The name of the service account to use. If not set and create is true, a name is generated using the fullname template                                                                                                                                                                                                                                    |
+| terminationGracePeriodSeconds              | int    | `120`               | Wait for a grace period                                                                                                                                                                                                                                                                                                                                   |
+| tests.annotations                          | object | `{}`                | Annotations to add to the tests                                                                                                                                                                                                                                                                                                                           |
+| tests.enabled                              | bool   | `true`              |                                                                                                                                                                                                                                                                                                                                                           |
+| tests.labels                               | object | `{}`                | Labels to add to the tests                                                                                                                                                                                                                                                                                                                                |
+| tolerations                                | list   | `[]`                | Tolerations for pod assignment                                                                                                                                                                                                                                                                                                                            |
+| topologySpreadConstraints                  | object | `{}`                | Topology Spread Constraints for pod assignment                                                                                                                                                                                                                                                                                                            |
+| updateStrategy.type                        | string | `"Recreate"`        | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate                                                                                                                                                                                                                                                                             |
+| volumeMounts                               | list   | `[]`                | Additional volumeMounts on the output Deployment definition.                                                                                                                                                                                                                                                                                              |
+| volumes                                    | list   | `[]`                | Additional volumes on the output Deployment definition.                                                                                                                                                                                                                                                                                                   |
+
+----------------------------------------------
+
+## Core team
+
+<table>
+    <tr>
+       <td align="center">
+            <a href="https://github.com/jdetroyes"
+                ><img
+                    src="https://github.com/jdetroyes.png?size=200"
+                    width="50"
+                    style="margin-bottom: -4px; border-radius: 8px;"
+                    alt="Jean Baptiste Detroyes"
+                /><br /><b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Jean Baptiste&nbsp;Detroyes&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</b></a
+            >
+            <div style="margin-top: 4px">
+                <a href="https://github.com/jdetroyes" title="Github"
+                    ><img
+                        width="16"
+                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
+                /></a>
+                <a
+                    href="mailto:jdetroyes@otwld.com"
+                    title="Email"
+                    ><img
+                        width="16"
+                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
+                /></a>
+            </div>
+        </td>
+       <td align="center">
+            <a href="https://github.com/ntrehout"
+                ><img
+                    src="https://github.com/ntrehout.png?size=200"
+                    width="50"
+                    style="margin-bottom: -4px; border-radius: 8px;"
+                    alt="Jean Baptiste Detroyes"
+                /><br /><b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Nathan&nbsp;Tréhout&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</b></a
+            >
+            <div style="margin-top: 4px">
+                <a href="https://x.com/n_trehout" title="Twitter"
+                    ><img
+                        width="16"
+                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/twitter.svg"
+                /></a>
+                <a href="https://github.com/ntrehout" title="Github"
+                    ><img
+                        width="16"
+                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
+                /></a>
+                <a
+                    href="mailto:ntrehout@otwld.com"
+                    title="Email"
+                    ><img
+                        width="16"
+                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
+                /></a>
+            </div>
+        </td>
+    </tr>
+</table>
+
+## Support
+
+- For questions, suggestions, and discussion about Ollama please refer to
+  the [Ollama issue page](https://github.com/ollama/ollama/issues)
+- For questions, suggestions, and discussion about this chart please
+  visit [Ollama-Helm issue page](https://github.com/otwld/ollama-helm/issues) or join
+  our [OTWLD Discord](https://discord.gg/U24mpqTynB)
--- a/webui/open-webui/charts/ollama/.ollama-helm/templates/NOTES.txt
+++ b/webui/open-webui/charts/ollama/.ollama-helm/templates/NOTES.txt
@ -0,0 +1,25 @@
+1. Get the application URL by running these commands:
+{{- if .Values.knative.enabled }}
+    export KSERVICE_URL=$(kubectl get ksvc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} -o jsonpath={.status.url})
+    echo "Visit $KSERVICE_URL to use your application"
+{{- else if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "ollama.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "ollama.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "ollama.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
--- a/webui/open-webui/charts/ollama/.ollama-helm/templates/_helpers.tpl
+++ b/webui/open-webui/charts/ollama/.ollama-helm/templates/_helpers.tpl
@ -0,0 +1,80 @@
+{{/*
+Allow the release namespace to be overridden for multi-namespace deployments in combined charts
+*/}}
+{{- define "ollama.namespace" -}}
+  {{- if .Values.namespaceOverride -}}
+    {{- .Values.namespaceOverride -}}
+  {{- else -}}
+    {{- .Release.Namespace -}}
+  {{- end -}}
+{{- end -}}
+
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "ollama.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "ollama.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "ollama.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "ollama.labels" -}}
+helm.sh/chart: {{ include "ollama.chart" . }}
+{{ include "ollama.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "ollama.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "ollama.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "ollama.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "ollama.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
+
+{{/*
+Models mount path
+*/}}
+{{- define "ollama.modelsMountPath" -}}
+{{- printf "%s/models" (((.Values).ollama).mountPath | default "/root/.ollama") }}
+{{- end -}}
--- a/Show More
+++ b/Show More